snix_castore/path/
mod.rs

1//! Contains data structures to deal with Paths in the snix-castore model.
2use bstr::ByteSlice;
3use std::{
4    borrow::Borrow,
5    fmt::{self, Debug, Display},
6    mem,
7    ops::Deref,
8    str::FromStr,
9};
10
11mod component;
12pub use component::{PathComponent, PathComponentError};
13
14/// Represents a Path in the castore model.
15/// These are always relative, and platform-independent, which distinguishes
16/// them from the ones provided in the standard library.
17#[derive(Eq, Hash, PartialEq)]
18#[repr(transparent)] // SAFETY: Representation has to match [u8]
19pub struct Path {
20    // As node names in the castore model cannot contain slashes,
21    // we use them as component separators here.
22    inner: [u8],
23}
24
25#[allow(dead_code)]
26impl Path {
27    // SAFETY: The empty path is valid.
28    pub const ROOT: &'static Path = unsafe { Path::from_bytes_unchecked(&[]) };
29
30    /// Convert a byte slice to a path, without checking validity.
31    const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Path {
32        // SAFETY: &[u8] and &Path have the same representation.
33        unsafe { mem::transmute(bytes) }
34    }
35
36    fn from_bytes(bytes: &[u8]) -> Option<&Path> {
37        if !bytes.is_empty() {
38            // Ensure all components are valid castore node names.
39            for component in bytes.split_str(b"/") {
40                if component::validate_name(component).is_err() {
41                    return None;
42                }
43            }
44        }
45
46        // SAFETY: We have verified that the path contains no empty components.
47        Some(unsafe { Path::from_bytes_unchecked(bytes) })
48    }
49
50    pub fn into_boxed_bytes(self: Box<Path>) -> Box<[u8]> {
51        // SAFETY: Box<Path> and Box<[u8]> have the same representation.
52        unsafe { mem::transmute(self) }
53    }
54
55    /// Returns the path without its final component, if there is one.
56    ///
57    /// Note that the parent of a bare file name is [Path::ROOT].
58    /// [Path::ROOT] is the only path without a parent.
59    pub fn parent(&self) -> Option<&Path> {
60        // The root does not have a parent.
61        if self.inner.is_empty() {
62            return None;
63        }
64
65        Some(
66            if let Some((parent, _file_name)) = self.inner.rsplit_once_str(b"/") {
67                // SAFETY: The parent of a valid Path is a valid Path.
68                unsafe { Path::from_bytes_unchecked(parent) }
69            } else {
70                // The parent of a bare file name is the root.
71                Path::ROOT
72            },
73        )
74    }
75
76    /// Creates a PathBuf with `name` adjoined to self.
77    pub fn try_join(&self, name: &[u8]) -> Result<PathBuf, std::io::Error> {
78        let mut v = PathBuf::with_capacity(self.inner.len() + name.len() + 1);
79        v.inner.extend_from_slice(&self.inner);
80        v.try_push(name)?;
81
82        Ok(v)
83    }
84
85    /// Provides an iterator over the components of the path,
86    /// which are individual [PathComponent]s.
87    /// In case the path is empty, an empty iterator is returned.
88    pub fn components(&self) -> impl Iterator<Item = PathComponent> + '_ {
89        let mut iter = self.inner.split_str(&b"/");
90
91        // We don't want to return an empty element, consume it if it's the only one.
92        if self.inner.is_empty() {
93            let _ = iter.next();
94        }
95
96        iter.map(|b| PathComponent {
97            inner: bytes::Bytes::copy_from_slice(b),
98        })
99    }
100
101    /// Produces an iterator over the components of the path, which are
102    /// individual byte slices.
103    /// In case the path is empty, an empty iterator is returned.
104    pub fn components_bytes(&self) -> impl Iterator<Item = &[u8]> {
105        let mut iter = self.inner.split_str(&b"/");
106
107        // We don't want to return an empty element, consume it if it's the only one.
108        if self.inner.is_empty() {
109            let _ = iter.next();
110        }
111
112        iter
113    }
114
115    /// Returns the final component of the Path, if there is one, as a
116    /// [PathComponent].
117    pub fn file_name(&self) -> Option<PathComponent> {
118        self.components().last()
119    }
120
121    /// Returns the final component of the Path, if there is one, in bytes.
122    pub fn file_name_bytes(&self) -> Option<&[u8]> {
123        self.components_bytes().last()
124    }
125
126    /// Returns the extension (without leading dot) of the Path, if possible.
127    pub fn extension(&self) -> Option<&[u8]> {
128        let file_name = match self.inner[..].rsplit_once_str(b"/") {
129            Some((_, r)) => r,
130            None => &self.inner[..],
131        };
132        let mut iter = file_name.rsplitn(2, |b| *b == b'.');
133        let e = iter.next();
134        // Return None if there's no dot.
135        iter.next()?;
136
137        e
138    }
139
140    pub fn as_bytes(&self) -> &[u8] {
141        &self.inner
142    }
143}
144
145impl Debug for Path {
146    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
147        Debug::fmt(self.inner.as_bstr(), f)
148    }
149}
150
151impl Display for Path {
152    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
153        Display::fmt(self.inner.as_bstr(), f)
154    }
155}
156
157impl AsRef<Path> for Path {
158    fn as_ref(&self) -> &Path {
159        self
160    }
161}
162
163/// Represents a owned PathBuf in the castore model.
164/// These are always relative, and platform-independent, which distinguishes
165/// them from the ones provided in the standard library.
166#[derive(Clone, Default, Eq, Hash, PartialEq)]
167pub struct PathBuf {
168    inner: Vec<u8>,
169}
170
171impl Deref for PathBuf {
172    type Target = Path;
173
174    fn deref(&self) -> &Self::Target {
175        // SAFETY: PathBuf always contains a valid Path.
176        unsafe { Path::from_bytes_unchecked(&self.inner) }
177    }
178}
179
180impl AsRef<Path> for PathBuf {
181    fn as_ref(&self) -> &Path {
182        self
183    }
184}
185
186impl ToOwned for Path {
187    type Owned = PathBuf;
188
189    fn to_owned(&self) -> Self::Owned {
190        PathBuf {
191            inner: self.inner.to_owned(),
192        }
193    }
194}
195
196impl Borrow<Path> for PathBuf {
197    fn borrow(&self) -> &Path {
198        self
199    }
200}
201
202impl From<Box<Path>> for PathBuf {
203    fn from(value: Box<Path>) -> Self {
204        // SAFETY: Box<Path> is always a valid path.
205        unsafe { PathBuf::from_bytes_unchecked(value.into_boxed_bytes().into_vec()) }
206    }
207}
208
209impl From<&Path> for PathBuf {
210    fn from(value: &Path) -> Self {
211        value.to_owned()
212    }
213}
214
215impl FromStr for PathBuf {
216    type Err = std::io::Error;
217
218    fn from_str(s: &str) -> Result<PathBuf, Self::Err> {
219        Ok(Path::from_bytes(s.as_bytes())
220            .ok_or(std::io::ErrorKind::InvalidData)?
221            .to_owned())
222    }
223}
224
225impl Debug for PathBuf {
226    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
227        Debug::fmt(&**self, f)
228    }
229}
230
231impl Display for PathBuf {
232    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
233        Display::fmt(&**self, f)
234    }
235}
236
237impl PathBuf {
238    pub fn new() -> PathBuf {
239        Self::default()
240    }
241
242    pub fn with_capacity(capacity: usize) -> PathBuf {
243        // SAFETY: The empty path is a valid path.
244        Self {
245            inner: Vec::with_capacity(capacity),
246        }
247    }
248
249    /// Adjoins `name` to self.
250    pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> {
251        if component::validate_name(name).is_err() {
252            return Err(std::io::ErrorKind::InvalidData.into());
253        }
254
255        if !self.inner.is_empty() {
256            self.inner.push(b'/');
257        }
258
259        self.inner.extend_from_slice(name);
260
261        Ok(())
262    }
263
264    /// Convert a byte vector to a PathBuf, without checking validity.
265    unsafe fn from_bytes_unchecked(bytes: Vec<u8>) -> PathBuf {
266        PathBuf { inner: bytes }
267    }
268
269    /// Convert from a [std::path::Path] to [PathBuf].
270    ///
271    /// - Self uses `/` as path separator.
272    /// - Absolute paths are always rejected, are are these with custom prefixes.
273    /// - Repeated separators are deduplicated.
274    /// - Occurrences of `.` are normalized away.
275    /// - A trailing slash is normalized away.
276    ///
277    /// A `canonicalize_dotdot` boolean controls whether `..` will get
278    /// canonicalized if possible, or should return an error.
279    ///
280    /// For more exotic paths, this conversion might produce different results
281    /// on different platforms, due to different underlying byte
282    /// representations, which is why it's restricted to unix for now.
283    #[cfg(unix)]
284    pub fn from_host_path(
285        host_path: &std::path::Path,
286        canonicalize_dotdot: bool,
287    ) -> Result<Self, std::io::Error> {
288        let mut p = PathBuf::with_capacity(host_path.as_os_str().len());
289
290        for component in host_path.components() {
291            match component {
292                std::path::Component::Prefix(_) | std::path::Component::RootDir => {
293                    return Err(std::io::Error::new(
294                        std::io::ErrorKind::InvalidData,
295                        "found disallowed prefix or rootdir",
296                    ));
297                }
298                std::path::Component::CurDir => continue, // ignore
299                std::path::Component::ParentDir => {
300                    if canonicalize_dotdot {
301                        // Try popping the last element from the path being constructed.
302                        // FUTUREWORK: pop method?
303                        p = p
304                            .parent()
305                            .ok_or_else(|| {
306                                std::io::Error::new(
307                                    std::io::ErrorKind::InvalidData,
308                                    "found .. going too far up",
309                                )
310                            })?
311                            .to_owned();
312                    } else {
313                        return Err(std::io::Error::new(
314                            std::io::ErrorKind::InvalidData,
315                            "found disallowed ..",
316                        ));
317                    }
318                }
319                std::path::Component::Normal(s) => {
320                    // append the new component to the path being constructed.
321                    p.try_push(s.as_encoded_bytes()).map_err(|_| {
322                        std::io::Error::new(
323                            std::io::ErrorKind::InvalidData,
324                            "encountered invalid node in sub_path component",
325                        )
326                    })?
327                }
328            }
329        }
330
331        Ok(p)
332    }
333
334    pub fn into_boxed_path(self) -> Box<Path> {
335        // SAFETY: Box<[u8]> and Box<Path> have the same representation,
336        // and PathBuf always contains a valid Path.
337        unsafe { mem::transmute(self.inner.into_boxed_slice()) }
338    }
339
340    pub fn into_bytes(self) -> Vec<u8> {
341        self.inner
342    }
343}
344
345#[cfg(test)]
346mod test {
347    use super::{Path, PathBuf};
348    use bstr::ByteSlice;
349    use rstest::rstest;
350
351    // TODO: add some manual tests including invalid UTF-8 (hard to express
352    // with rstest)
353
354    #[rstest]
355    #[case::empty("", 0)]
356    #[case("a", 1)]
357    #[case("a/b", 2)]
358    #[case("a/b/c", 3)]
359    // add two slightly more cursed variants.
360    // Technically nothing prevents us from representing this with castore,
361    // but maybe we want to disallow constructing paths like this as it's a
362    // bad idea.
363    #[case::cursed("C:\\a/b", 2)]
364    #[case::cursed("\\\\snix-store", 1)]
365    pub fn from_str(#[case] s: &str, #[case] num_components: usize) {
366        let p: PathBuf = s.parse().expect("must parse");
367
368        assert_eq!(s.as_bytes(), p.as_bytes(), "inner bytes mismatch");
369        assert_eq!(
370            num_components,
371            p.components_bytes().count(),
372            "number of components mismatch"
373        );
374    }
375
376    #[rstest]
377    #[case::absolute("/a/b")]
378    #[case::two_forward_slashes_start("//a/b")]
379    #[case::two_forward_slashes_middle("a/b//c/d")]
380    #[case::trailing_slash("a/b/")]
381    #[case::dot(".")]
382    #[case::dotdot("..")]
383    #[case::dot_start("./a")]
384    #[case::dotdot_start("../a")]
385    #[case::dot_middle("a/./b")]
386    #[case::dotdot_middle("a/../b")]
387    #[case::dot_end("a/b/.")]
388    #[case::dotdot_end("a/b/..")]
389    #[case::null("fo\0o")]
390    pub fn from_str_fail(#[case] s: &str) {
391        s.parse::<PathBuf>().expect_err("must fail");
392    }
393
394    #[rstest]
395    #[case("foo", "")]
396    #[case("foo/bar", "foo")]
397    #[case("foo2/bar2", "foo2")]
398    #[case("foo/bar/baz", "foo/bar")]
399    pub fn parent(#[case] p: PathBuf, #[case] exp_parent: PathBuf) {
400        assert_eq!(Some(&*exp_parent), p.parent());
401    }
402
403    #[rstest]
404    pub fn no_parent() {
405        assert!(Path::ROOT.parent().is_none());
406    }
407
408    #[rstest]
409    #[case("a", "b", "a/b")]
410    #[case("a", "b", "a/b")]
411    pub fn join_push(#[case] mut p: PathBuf, #[case] name: &str, #[case] exp_p: PathBuf) {
412        assert_eq!(exp_p, p.try_join(name.as_bytes()).expect("join failed"));
413        p.try_push(name.as_bytes()).expect("push failed");
414        assert_eq!(exp_p, p);
415    }
416
417    #[rstest]
418    #[case("a", "/")]
419    #[case("a", "")]
420    #[case("a", "b/c")]
421    #[case("", "/")]
422    #[case("", "")]
423    #[case("", "b/c")]
424    #[case("", ".")]
425    #[case("", "..")]
426    pub fn join_push_fail(#[case] mut p: PathBuf, #[case] name: &str) {
427        p.try_join(name.as_bytes())
428            .expect_err("join succeeded unexpectedly");
429        p.try_push(name.as_bytes())
430            .expect_err("push succeeded unexpectedly");
431    }
432
433    #[rstest]
434    #[case::empty("", vec![])]
435    #[case("a", vec!["a"])]
436    #[case("a/b", vec!["a", "b"])]
437    #[case("a/b/c", vec!["a","b", "c"])]
438    pub fn components_bytes(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) {
439        assert_eq!(
440            exp_components,
441            p.components_bytes()
442                .map(|x| x.to_str().unwrap())
443                .collect::<Vec<_>>()
444        );
445    }
446
447    #[rstest]
448    #[case::empty("", "", false)]
449    #[case::path("a", "a", false)]
450    #[case::path2("a/b", "a/b", false)]
451    #[case::double_slash_middle("a//b", "a/b", false)]
452    #[case::dot(".", "", false)]
453    #[case::dot_start("./a/b", "a/b", false)]
454    #[case::dot_middle("a/./b", "a/b", false)]
455    #[case::dot_end("a/b/.", "a/b", false)]
456    #[case::trailing_slash("a/b/", "a/b", false)]
457    #[case::dotdot_canonicalize("a/..", "", true)]
458    #[case::dotdot_canonicalize2("a/../b", "b", true)]
459    #[cfg_attr(unix, case::faux_prefix("\\\\nix-store", "\\\\nix-store", false))]
460    #[cfg_attr(unix, case::faux_letter("C:\\foo.txt", "C:\\foo.txt", false))]
461    pub fn from_host_path(
462        #[case] host_path: std::path::PathBuf,
463        #[case] exp_path: PathBuf,
464        #[case] canonicalize_dotdot: bool,
465    ) {
466        let p = PathBuf::from_host_path(&host_path, canonicalize_dotdot).expect("must succeed");
467
468        assert_eq!(exp_path, p);
469    }
470
471    #[rstest]
472    #[case::absolute("/", false)]
473    #[case::dotdot_root("..", false)]
474    #[case::dotdot_root_canonicalize("..", true)]
475    #[case::dotdot_root_no_canonicalize("a/..", false)]
476    #[case::invalid_name("foo/bar\0", false)]
477    // #[cfg_attr(windows, case::prefix("\\\\nix-store", false))]
478    // #[cfg_attr(windows, case::letter("C:\\foo.txt", false))]
479    pub fn from_host_path_fail(
480        #[case] host_path: std::path::PathBuf,
481        #[case] canonicalize_dotdot: bool,
482    ) {
483        PathBuf::from_host_path(&host_path, canonicalize_dotdot).expect_err("must fail");
484    }
485
486    #[rstest]
487    #[case::without_dot(PathBuf { inner: "foo".into()}, None)]
488    #[case::simple(PathBuf { inner: "foo.txt".into()}, Some(&b"txt"[..]))]
489    #[case::empty(PathBuf { inner: "foo.".into()}, Some(&b""[..]))]
490    #[case::multiple(PathBuf { inner: "foo.bar.txt".into()}, Some(&b"txt"[..]))]
491    #[case::with_components(PathBuf { inner: "foo/foo.txt".into()}, Some(&b"txt"[..]))]
492    #[case::path(PathBuf { inner: "foo.a/foo".into()}, None)]
493    fn extension(#[case] p: PathBuf, #[case] exp_extension: Option<&[u8]>) {
494        assert_eq!(exp_extension, p.extension())
495    }
496}