snix_castore/path/
mod.rs

1//! Contains data structures to deal with Paths in the snix-castore model.
2use bstr::ByteSlice;
3use std::{
4    borrow::Borrow,
5    fmt::{self, Debug, Display},
6    mem,
7    ops::Deref,
8    str::FromStr,
9};
10
11mod component;
12pub use component::{PathComponent, PathComponentError};
13
14/// Represents a Path in the castore model.
15/// These are always relative, and platform-independent, which distinguishes
16/// them from the ones provided in the standard library.
17#[derive(Eq, Hash, PartialEq)]
18#[repr(transparent)] // SAFETY: Representation has to match [u8]
19pub struct Path {
20    // As node names in the castore model cannot contain slashes,
21    // we use them as component separators here.
22    inner: [u8],
23}
24
25#[allow(dead_code)]
26impl Path {
27    // SAFETY: The empty path is valid.
28    pub const ROOT: &'static Path = unsafe { Path::from_bytes_unchecked(&[]) };
29
30    /// Convert a byte slice to a path, without checking validity.
31    const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Path {
32        // SAFETY: &[u8] and &Path have the same representation.
33        unsafe { mem::transmute(bytes) }
34    }
35
36    /// Convert a byte slice to a path, while checking validity.
37    /// Returns None if it contains invalid segments.
38    pub fn from_bytes(bytes: &[u8]) -> Option<&Path> {
39        if !bytes.is_empty() {
40            // Ensure all components are valid castore node names.
41            for component in bytes.split_str(b"/") {
42                if component::validate_name(component).is_err() {
43                    return None;
44                }
45            }
46        }
47
48        // SAFETY: We have verified that the path contains no empty components.
49        Some(unsafe { Path::from_bytes_unchecked(bytes) })
50    }
51
52    pub fn into_boxed_bytes(self: Box<Path>) -> Box<[u8]> {
53        // SAFETY: Box<Path> and Box<[u8]> have the same representation.
54        unsafe { mem::transmute(self) }
55    }
56
57    /// Returns the path without its final component, if there is one.
58    ///
59    /// Note that the parent of a bare file name is [Path::ROOT].
60    /// [Path::ROOT] is the only path without a parent.
61    pub fn parent(&self) -> Option<&Path> {
62        // The root does not have a parent.
63        if self.inner.is_empty() {
64            return None;
65        }
66
67        Some(
68            if let Some((parent, _file_name)) = self.inner.rsplit_once_str(b"/") {
69                // SAFETY: The parent of a valid Path is a valid Path.
70                unsafe { Path::from_bytes_unchecked(parent) }
71            } else {
72                // The parent of a bare file name is the root.
73                Path::ROOT
74            },
75        )
76    }
77
78    /// Creates a PathBuf with `name` adjoined to self.
79    pub fn try_join(&self, name: &[u8]) -> Result<PathBuf, std::io::Error> {
80        let mut v = PathBuf::with_capacity(self.inner.len() + name.len() + 1);
81        v.inner.extend_from_slice(&self.inner);
82        v.try_push(name)?;
83
84        Ok(v)
85    }
86
87    /// Provides an iterator over the components of the path,
88    /// which are individual [PathComponent]s.
89    /// In case the path is empty, an empty iterator is returned.
90    pub fn components(&self) -> impl Iterator<Item = PathComponent> + '_ {
91        let mut iter = self.inner.split_str(&b"/");
92
93        // We don't want to return an empty element, consume it if it's the only one.
94        if self.inner.is_empty() {
95            let _ = iter.next();
96        }
97
98        iter.map(|b| PathComponent {
99            inner: bytes::Bytes::copy_from_slice(b),
100        })
101    }
102
103    /// Produces an iterator over the components of the path, which are
104    /// individual byte slices.
105    /// In case the path is empty, an empty iterator is returned.
106    pub fn components_bytes(&self) -> impl Iterator<Item = &[u8]> {
107        let mut iter = self.inner.split_str(&b"/");
108
109        // We don't want to return an empty element, consume it if it's the only one.
110        if self.inner.is_empty() {
111            let _ = iter.next();
112        }
113
114        iter
115    }
116
117    /// Returns the final component of the Path, if there is one, as a
118    /// [PathComponent].
119    pub fn file_name(&self) -> Option<PathComponent> {
120        self.components().last()
121    }
122
123    /// Returns the final component of the Path, if there is one, in bytes.
124    pub fn file_name_bytes(&self) -> Option<&[u8]> {
125        self.components_bytes().last()
126    }
127
128    /// Returns the extension (without leading dot) of the Path, if possible.
129    pub fn extension(&self) -> Option<&[u8]> {
130        let file_name = match self.inner[..].rsplit_once_str(b"/") {
131            Some((_, r)) => r,
132            None => &self.inner[..],
133        };
134        let mut iter = file_name.rsplitn(2, |b| *b == b'.');
135        let e = iter.next();
136        // Return None if there's no dot.
137        iter.next()?;
138
139        e
140    }
141
142    pub fn as_bytes(&self) -> &[u8] {
143        &self.inner
144    }
145}
146
147impl Debug for Path {
148    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
149        Debug::fmt(self.inner.as_bstr(), f)
150    }
151}
152
153impl Display for Path {
154    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
155        Display::fmt(self.inner.as_bstr(), f)
156    }
157}
158
159impl AsRef<Path> for Path {
160    fn as_ref(&self) -> &Path {
161        self
162    }
163}
164
165/// Represents a owned PathBuf in the castore model.
166/// These are always relative, and platform-independent, which distinguishes
167/// them from the ones provided in the standard library.
168#[derive(Clone, Default, Eq, Hash, PartialEq)]
169pub struct PathBuf {
170    inner: Vec<u8>,
171}
172
173impl Deref for PathBuf {
174    type Target = Path;
175
176    fn deref(&self) -> &Self::Target {
177        // SAFETY: PathBuf always contains a valid Path.
178        unsafe { Path::from_bytes_unchecked(&self.inner) }
179    }
180}
181
182impl AsRef<Path> for PathBuf {
183    fn as_ref(&self) -> &Path {
184        self
185    }
186}
187
188impl ToOwned for Path {
189    type Owned = PathBuf;
190
191    fn to_owned(&self) -> Self::Owned {
192        PathBuf {
193            inner: self.inner.to_owned(),
194        }
195    }
196}
197
198impl Borrow<Path> for PathBuf {
199    fn borrow(&self) -> &Path {
200        self
201    }
202}
203
204impl From<Box<Path>> for PathBuf {
205    fn from(value: Box<Path>) -> Self {
206        // SAFETY: Box<Path> is always a valid path.
207        unsafe { PathBuf::from_bytes_unchecked(value.into_boxed_bytes().into_vec()) }
208    }
209}
210
211impl From<&Path> for PathBuf {
212    fn from(value: &Path) -> Self {
213        value.to_owned()
214    }
215}
216
217impl FromStr for PathBuf {
218    type Err = std::io::Error;
219
220    fn from_str(s: &str) -> Result<PathBuf, Self::Err> {
221        Ok(Path::from_bytes(s.as_bytes())
222            .ok_or(std::io::ErrorKind::InvalidData)?
223            .to_owned())
224    }
225}
226
227impl Debug for PathBuf {
228    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
229        Debug::fmt(&**self, f)
230    }
231}
232
233impl Display for PathBuf {
234    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
235        Display::fmt(&**self, f)
236    }
237}
238
239impl PathBuf {
240    pub fn new() -> PathBuf {
241        Self::default()
242    }
243
244    pub fn with_capacity(capacity: usize) -> PathBuf {
245        // SAFETY: The empty path is a valid path.
246        Self {
247            inner: Vec::with_capacity(capacity),
248        }
249    }
250
251    /// Adjoins `name` to self.
252    pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> {
253        if component::validate_name(name).is_err() {
254            return Err(std::io::ErrorKind::InvalidData.into());
255        }
256
257        if !self.inner.is_empty() {
258            self.inner.push(b'/');
259        }
260
261        self.inner.extend_from_slice(name);
262
263        Ok(())
264    }
265
266    /// Convert a byte vector to a PathBuf, without checking validity.
267    unsafe fn from_bytes_unchecked(bytes: Vec<u8>) -> PathBuf {
268        PathBuf { inner: bytes }
269    }
270
271    /// Convert from a [std::path::Path] to [PathBuf].
272    ///
273    /// - Self uses `/` as path separator.
274    /// - Absolute paths are always rejected, are are these with custom prefixes.
275    /// - Repeated separators are deduplicated.
276    /// - Occurrences of `.` are normalized away.
277    /// - A trailing slash is normalized away.
278    ///
279    /// A `canonicalize_dotdot` boolean controls whether `..` will get
280    /// canonicalized if possible, or should return an error.
281    ///
282    /// For more exotic paths, this conversion might produce different results
283    /// on different platforms, due to different underlying byte
284    /// representations, which is why it's restricted to unix for now.
285    #[cfg(unix)]
286    pub fn from_host_path(
287        host_path: &std::path::Path,
288        canonicalize_dotdot: bool,
289    ) -> Result<Self, std::io::Error> {
290        let mut p = PathBuf::with_capacity(host_path.as_os_str().len());
291
292        for component in host_path.components() {
293            match component {
294                std::path::Component::Prefix(_) | std::path::Component::RootDir => {
295                    return Err(std::io::Error::new(
296                        std::io::ErrorKind::InvalidData,
297                        "found disallowed prefix or rootdir",
298                    ));
299                }
300                std::path::Component::CurDir => continue, // ignore
301                std::path::Component::ParentDir => {
302                    if canonicalize_dotdot {
303                        // Try popping the last element from the path being constructed.
304                        // FUTUREWORK: pop method?
305                        p = p
306                            .parent()
307                            .ok_or_else(|| {
308                                std::io::Error::new(
309                                    std::io::ErrorKind::InvalidData,
310                                    "found .. going too far up",
311                                )
312                            })?
313                            .to_owned();
314                    } else {
315                        return Err(std::io::Error::new(
316                            std::io::ErrorKind::InvalidData,
317                            "found disallowed ..",
318                        ));
319                    }
320                }
321                std::path::Component::Normal(s) => {
322                    // append the new component to the path being constructed.
323                    p.try_push(s.as_encoded_bytes()).map_err(|_| {
324                        std::io::Error::new(
325                            std::io::ErrorKind::InvalidData,
326                            "encountered invalid node in sub_path component",
327                        )
328                    })?
329                }
330            }
331        }
332
333        Ok(p)
334    }
335
336    pub fn into_boxed_path(self) -> Box<Path> {
337        // SAFETY: Box<[u8]> and Box<Path> have the same representation,
338        // and PathBuf always contains a valid Path.
339        unsafe { mem::transmute(self.inner.into_boxed_slice()) }
340    }
341
342    pub fn into_bytes(self) -> Vec<u8> {
343        self.inner
344    }
345}
346
347#[cfg(test)]
348mod test {
349    use super::{Path, PathBuf};
350    use bstr::ByteSlice;
351    use rstest::rstest;
352
353    // TODO: add some manual tests including invalid UTF-8 (hard to express
354    // with rstest)
355
356    #[rstest]
357    #[case::empty("", 0)]
358    #[case("a", 1)]
359    #[case("a/b", 2)]
360    #[case("a/b/c", 3)]
361    // add two slightly more cursed variants.
362    // Technically nothing prevents us from representing this with castore,
363    // but maybe we want to disallow constructing paths like this as it's a
364    // bad idea.
365    #[case::cursed("C:\\a/b", 2)]
366    #[case::cursed("\\\\snix-store", 1)]
367    pub fn from_str(#[case] s: &str, #[case] num_components: usize) {
368        let p: PathBuf = s.parse().expect("must parse");
369
370        assert_eq!(s.as_bytes(), p.as_bytes(), "inner bytes mismatch");
371        assert_eq!(
372            num_components,
373            p.components_bytes().count(),
374            "number of components mismatch"
375        );
376    }
377
378    #[rstest]
379    #[case::absolute("/a/b")]
380    #[case::two_forward_slashes_start("//a/b")]
381    #[case::two_forward_slashes_middle("a/b//c/d")]
382    #[case::trailing_slash("a/b/")]
383    #[case::dot(".")]
384    #[case::dotdot("..")]
385    #[case::dot_start("./a")]
386    #[case::dotdot_start("../a")]
387    #[case::dot_middle("a/./b")]
388    #[case::dotdot_middle("a/../b")]
389    #[case::dot_end("a/b/.")]
390    #[case::dotdot_end("a/b/..")]
391    #[case::null("fo\0o")]
392    pub fn from_str_fail(#[case] s: &str) {
393        s.parse::<PathBuf>().expect_err("must fail");
394    }
395
396    #[rstest]
397    #[case("foo", "")]
398    #[case("foo/bar", "foo")]
399    #[case("foo2/bar2", "foo2")]
400    #[case("foo/bar/baz", "foo/bar")]
401    pub fn parent(#[case] p: PathBuf, #[case] exp_parent: PathBuf) {
402        assert_eq!(Some(&*exp_parent), p.parent());
403    }
404
405    #[rstest]
406    pub fn no_parent() {
407        assert!(Path::ROOT.parent().is_none());
408    }
409
410    #[rstest]
411    #[case("a", "b", "a/b")]
412    #[case("a", "b", "a/b")]
413    pub fn join_push(#[case] mut p: PathBuf, #[case] name: &str, #[case] exp_p: PathBuf) {
414        assert_eq!(exp_p, p.try_join(name.as_bytes()).expect("join failed"));
415        p.try_push(name.as_bytes()).expect("push failed");
416        assert_eq!(exp_p, p);
417    }
418
419    #[rstest]
420    #[case("a", "/")]
421    #[case("a", "")]
422    #[case("a", "b/c")]
423    #[case("", "/")]
424    #[case("", "")]
425    #[case("", "b/c")]
426    #[case("", ".")]
427    #[case("", "..")]
428    pub fn join_push_fail(#[case] mut p: PathBuf, #[case] name: &str) {
429        p.try_join(name.as_bytes())
430            .expect_err("join succeeded unexpectedly");
431        p.try_push(name.as_bytes())
432            .expect_err("push succeeded unexpectedly");
433    }
434
435    #[rstest]
436    #[case::empty("", vec![])]
437    #[case("a", vec!["a"])]
438    #[case("a/b", vec!["a", "b"])]
439    #[case("a/b/c", vec!["a","b", "c"])]
440    pub fn components_bytes(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) {
441        assert_eq!(
442            exp_components,
443            p.components_bytes()
444                .map(|x| x.to_str().unwrap())
445                .collect::<Vec<_>>()
446        );
447    }
448
449    #[rstest]
450    #[case::empty("", "", false)]
451    #[case::path("a", "a", false)]
452    #[case::path2("a/b", "a/b", false)]
453    #[case::double_slash_middle("a//b", "a/b", false)]
454    #[case::dot(".", "", false)]
455    #[case::dot_start("./a/b", "a/b", false)]
456    #[case::dot_middle("a/./b", "a/b", false)]
457    #[case::dot_end("a/b/.", "a/b", false)]
458    #[case::trailing_slash("a/b/", "a/b", false)]
459    #[case::dotdot_canonicalize("a/..", "", true)]
460    #[case::dotdot_canonicalize2("a/../b", "b", true)]
461    #[cfg_attr(unix, case::faux_prefix("\\\\nix-store", "\\\\nix-store", false))]
462    #[cfg_attr(unix, case::faux_letter("C:\\foo.txt", "C:\\foo.txt", false))]
463    pub fn from_host_path(
464        #[case] host_path: std::path::PathBuf,
465        #[case] exp_path: PathBuf,
466        #[case] canonicalize_dotdot: bool,
467    ) {
468        let p = PathBuf::from_host_path(&host_path, canonicalize_dotdot).expect("must succeed");
469
470        assert_eq!(exp_path, p);
471    }
472
473    #[rstest]
474    #[case::absolute("/", false)]
475    #[case::dotdot_root("..", false)]
476    #[case::dotdot_root_canonicalize("..", true)]
477    #[case::dotdot_root_no_canonicalize("a/..", false)]
478    #[case::invalid_name("foo/bar\0", false)]
479    // #[cfg_attr(windows, case::prefix("\\\\nix-store", false))]
480    // #[cfg_attr(windows, case::letter("C:\\foo.txt", false))]
481    pub fn from_host_path_fail(
482        #[case] host_path: std::path::PathBuf,
483        #[case] canonicalize_dotdot: bool,
484    ) {
485        PathBuf::from_host_path(&host_path, canonicalize_dotdot).expect_err("must fail");
486    }
487
488    #[rstest]
489    #[case::without_dot(PathBuf { inner: "foo".into()}, None)]
490    #[case::simple(PathBuf { inner: "foo.txt".into()}, Some(&b"txt"[..]))]
491    #[case::empty(PathBuf { inner: "foo.".into()}, Some(&b""[..]))]
492    #[case::multiple(PathBuf { inner: "foo.bar.txt".into()}, Some(&b"txt"[..]))]
493    #[case::with_components(PathBuf { inner: "foo/foo.txt".into()}, Some(&b"txt"[..]))]
494    #[case::path(PathBuf { inner: "foo.a/foo".into()}, None)]
495    fn extension(#[case] p: PathBuf, #[case] exp_extension: Option<&[u8]>) {
496        assert_eq!(exp_extension, p.extension())
497    }
498}