fuse_backend_rs/passthrough/
sync_io.rs

1// Copyright (C) 2020 Alibaba Cloud. All rights reserved.
2// Copyright 2019 The Chromium OS Authors. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE-BSD-3-Clause file.
5
6//! Fuse passthrough file system, mirroring an existing FS hierarchy.
7
8use std::ffi::{CStr, CString};
9use std::fs::File;
10use std::io;
11use std::mem::{self, size_of, ManuallyDrop, MaybeUninit};
12use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
13use std::sync::atomic::Ordering;
14use std::sync::Arc;
15use std::time::Duration;
16
17use super::os_compat::LinuxDirent64;
18use super::util::stat_fd;
19use super::*;
20use crate::abi::fuse_abi::{CreateIn, Opcode, FOPEN_IN_KILL_SUIDGID, WRITE_KILL_PRIV};
21#[cfg(any(feature = "vhost-user-fs", feature = "virtiofs"))]
22use crate::abi::virtio_fs;
23use crate::api::filesystem::{
24    Context, DirEntry, Entry, FileSystem, FsOptions, GetxattrReply, ListxattrReply, OpenOptions,
25    SetattrValid, ZeroCopyReader, ZeroCopyWriter,
26};
27use crate::bytes_to_cstr;
28#[cfg(any(feature = "vhost-user-fs", feature = "virtiofs"))]
29use crate::transport::FsCacheReqHandler;
30
31impl<S: BitmapSlice + Send + Sync> PassthroughFs<S> {
32    fn open_inode(&self, inode: Inode, flags: i32) -> io::Result<File> {
33        let data = self.inode_map.get(inode)?;
34        if !is_safe_inode(data.mode) {
35            Err(ebadf())
36        } else {
37            let new_flags = self.get_writeback_open_flags(flags);
38            data.open_file(new_flags | libc::O_CLOEXEC, &self.proc_self_fd)
39        }
40    }
41
42    /// Check the HandleData flags against the flags from the current request
43    /// if these do not match update the file descriptor flags and store the new
44    /// result in the HandleData entry
45    #[inline(always)]
46    fn check_fd_flags(&self, data: Arc<HandleData>, fd: RawFd, flags: u32) -> io::Result<()> {
47        let open_flags = data.get_flags();
48        if open_flags != flags {
49            let ret = unsafe { libc::fcntl(fd, libc::F_SETFL, flags) };
50            if ret != 0 {
51                return Err(io::Error::last_os_error());
52            }
53            data.set_flags(flags);
54        }
55        Ok(())
56    }
57
58    fn do_readdir(
59        &self,
60        inode: Inode,
61        handle: Handle,
62        size: u32,
63        offset: u64,
64        add_entry: &mut dyn FnMut(DirEntry, RawFd) -> io::Result<usize>,
65    ) -> io::Result<()> {
66        if size == 0 {
67            return Ok(());
68        }
69
70        let mut buf = Vec::<u8>::with_capacity(size as usize);
71        let data = self.get_dirdata(handle, inode, libc::O_RDONLY)?;
72
73        {
74            // Since we are going to work with the kernel offset, we have to acquire the file lock
75            // for both the `lseek64` and `getdents64` syscalls to ensure that no other thread
76            // changes the kernel offset while we are using it.
77            let (guard, dir) = data.get_file_mut();
78
79            // Safe because this doesn't modify any memory and we check the return value.
80            let res =
81                unsafe { libc::lseek64(dir.as_raw_fd(), offset as libc::off64_t, libc::SEEK_SET) };
82            if res < 0 {
83                return Err(io::Error::last_os_error());
84            }
85
86            // Safe because the kernel guarantees that it will only write to `buf` and we check the
87            // return value.
88            let res = unsafe {
89                libc::syscall(
90                    libc::SYS_getdents64,
91                    dir.as_raw_fd(),
92                    buf.as_mut_ptr() as *mut LinuxDirent64,
93                    size as libc::c_int,
94                )
95            };
96            if res < 0 {
97                return Err(io::Error::last_os_error());
98            }
99
100            // Safe because we trust the value returned by kernel.
101            unsafe { buf.set_len(res as usize) };
102
103            // Explicitly drop the lock so that it's not held while we fill in the fuse buffer.
104            mem::drop(guard);
105        }
106
107        let mut rem = &buf[..];
108        let orig_rem_len = rem.len();
109        while !rem.is_empty() {
110            // We only use debug asserts here because these values are coming from the kernel and we
111            // trust them implicitly.
112            debug_assert!(
113                rem.len() >= size_of::<LinuxDirent64>(),
114                "fuse: not enough space left in `rem`"
115            );
116
117            let (front, back) = rem.split_at(size_of::<LinuxDirent64>());
118
119            let dirent64 = LinuxDirent64::from_slice(front)
120                .expect("fuse: unable to get LinuxDirent64 from slice");
121
122            let namelen = dirent64.d_reclen as usize - size_of::<LinuxDirent64>();
123            debug_assert!(
124                namelen <= back.len(),
125                "fuse: back is smaller than `namelen`"
126            );
127
128            let name = &back[..namelen];
129            let res = if name.starts_with(CURRENT_DIR_CSTR) || name.starts_with(PARENT_DIR_CSTR) {
130                // We don't want to report the "." and ".." entries. However, returning `Ok(0)` will
131                // break the loop so return `Ok` with a non-zero value instead.
132                Ok(1)
133            } else {
134                // The Sys_getdents64 in kernel will pad the name with '\0'
135                // bytes up to 8-byte alignment, so @name may contain a few null
136                // terminators.  This causes an extra lookup from fuse when
137                // called by readdirplus, because kernel path walking only takes
138                // name without null terminators, the dentry with more than 1
139                // null terminators added by readdirplus doesn't satisfy the
140                // path walking.
141                let name = bytes_to_cstr(name)
142                    .map_err(|e| {
143                        error!("fuse: do_readdir: {:?}", e);
144                        einval()
145                    })?
146                    .to_bytes();
147
148                add_entry(
149                    DirEntry {
150                        ino: dirent64.d_ino,
151                        offset: dirent64.d_off as u64,
152                        type_: u32::from(dirent64.d_ty),
153                        name,
154                    },
155                    data.borrow_fd().as_raw_fd(),
156                )
157            };
158
159            debug_assert!(
160                rem.len() >= dirent64.d_reclen as usize,
161                "fuse: rem is smaller than `d_reclen`"
162            );
163
164            match res {
165                Ok(0) => break,
166                Ok(_) => rem = &rem[dirent64.d_reclen as usize..],
167                // If there's an error, we can only signal it if we haven't
168                // stored any entries yet - otherwise we'd end up with wrong
169                // lookup counts for the entries that are already in the
170                // buffer. So we return what we've collected until that point.
171                Err(e) if rem.len() == orig_rem_len => return Err(e),
172                Err(_) => return Ok(()),
173            }
174        }
175
176        Ok(())
177    }
178
179    fn do_open(
180        &self,
181        inode: Inode,
182        flags: u32,
183        fuse_flags: u32,
184    ) -> io::Result<(Option<Handle>, OpenOptions, Option<u32>)> {
185        let killpriv = if self.killpriv_v2.load(Ordering::Relaxed)
186            && (fuse_flags & FOPEN_IN_KILL_SUIDGID != 0)
187        {
188            self::drop_cap_fsetid()?
189        } else {
190            None
191        };
192        let file = self.open_inode(inode, flags as i32)?;
193        drop(killpriv);
194
195        let data = HandleData::new(inode, file, flags);
196        let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
197        self.handle_map.insert(handle, data);
198
199        let mut opts = OpenOptions::empty();
200        match self.cfg.cache_policy {
201            // We only set the direct I/O option on files.
202            CachePolicy::Never => opts.set(
203                OpenOptions::DIRECT_IO,
204                flags & (libc::O_DIRECTORY as u32) == 0,
205            ),
206            CachePolicy::Metadata => {
207                if flags & (libc::O_DIRECTORY as u32) == 0 {
208                    opts |= OpenOptions::DIRECT_IO;
209                } else {
210                    opts |= OpenOptions::CACHE_DIR | OpenOptions::KEEP_CACHE;
211                }
212            }
213            CachePolicy::Always => {
214                opts |= OpenOptions::KEEP_CACHE;
215                if flags & (libc::O_DIRECTORY as u32) != 0 {
216                    opts |= OpenOptions::CACHE_DIR;
217                }
218            }
219            _ => {}
220        };
221
222        Ok((Some(handle), opts, None))
223    }
224
225    fn do_getattr(
226        &self,
227        inode: Inode,
228        handle: Option<Handle>,
229    ) -> io::Result<(libc::stat64, Duration)> {
230        let st;
231        let data = self.inode_map.get(inode).map_err(|e| {
232            error!("fuse: do_getattr ino {} Not find err {:?}", inode, e);
233            e
234        })?;
235
236        // kernel sends 0 as handle in case of no_open, and it depends on fuse server to handle
237        // this case correctly.
238        if !self.no_open.load(Ordering::Relaxed) && handle.is_some() {
239            // Safe as we just checked handle
240            let hd = self.handle_map.get(handle.unwrap(), inode)?;
241            st = stat_fd(hd.get_file(), None);
242        } else {
243            st = data.handle.stat();
244        }
245
246        let st = st.map_err(|e| {
247            error!("fuse: do_getattr stat failed ino {} err {:?}", inode, e);
248            e
249        })?;
250
251        Ok((st, self.cfg.attr_timeout))
252    }
253
254    fn do_unlink(&self, parent: Inode, name: &CStr, flags: libc::c_int) -> io::Result<()> {
255        let data = self.inode_map.get(parent)?;
256        let file = data.get_file()?;
257        // Safe because this doesn't modify any memory and we check the return value.
258        let res = unsafe { libc::unlinkat(file.as_raw_fd(), name.as_ptr(), flags) };
259        if res == 0 {
260            Ok(())
261        } else {
262            Err(io::Error::last_os_error())
263        }
264    }
265
266    fn get_dirdata(
267        &self,
268        handle: Handle,
269        inode: Inode,
270        flags: libc::c_int,
271    ) -> io::Result<Arc<HandleData>> {
272        let no_open = self.no_opendir.load(Ordering::Relaxed);
273        if !no_open {
274            self.handle_map.get(handle, inode)
275        } else {
276            let file = self.open_inode(inode, flags | libc::O_DIRECTORY)?;
277            Ok(Arc::new(HandleData::new(inode, file, flags as u32)))
278        }
279    }
280
281    fn get_data(
282        &self,
283        handle: Handle,
284        inode: Inode,
285        flags: libc::c_int,
286    ) -> io::Result<Arc<HandleData>> {
287        let no_open = self.no_open.load(Ordering::Relaxed);
288        if !no_open {
289            self.handle_map.get(handle, inode)
290        } else {
291            let file = self.open_inode(inode, flags)?;
292            Ok(Arc::new(HandleData::new(inode, file, flags as u32)))
293        }
294    }
295}
296
297impl<S: BitmapSlice + Send + Sync> FileSystem for PassthroughFs<S> {
298    type Inode = Inode;
299    type Handle = Handle;
300
301    fn init(&self, capable: FsOptions) -> io::Result<FsOptions> {
302        if self.cfg.do_import {
303            self.import()?;
304        }
305
306        let mut opts = FsOptions::DO_READDIRPLUS | FsOptions::READDIRPLUS_AUTO;
307        // !cfg.do_import means we are under vfs, in which case capable is already
308        // negotiated and must be honored.
309        if (!self.cfg.do_import || self.cfg.writeback)
310            && capable.contains(FsOptions::WRITEBACK_CACHE)
311        {
312            opts |= FsOptions::WRITEBACK_CACHE;
313            self.writeback.store(true, Ordering::Relaxed);
314        }
315        if (!self.cfg.do_import || self.cfg.no_open)
316            && capable.contains(FsOptions::ZERO_MESSAGE_OPEN)
317        {
318            opts |= FsOptions::ZERO_MESSAGE_OPEN;
319            // We can't support FUSE_ATOMIC_O_TRUNC with no_open
320            opts.remove(FsOptions::ATOMIC_O_TRUNC);
321            self.no_open.store(true, Ordering::Relaxed);
322        }
323        if (!self.cfg.do_import || self.cfg.no_opendir)
324            && capable.contains(FsOptions::ZERO_MESSAGE_OPENDIR)
325        {
326            opts |= FsOptions::ZERO_MESSAGE_OPENDIR;
327            self.no_opendir.store(true, Ordering::Relaxed);
328        }
329        if (!self.cfg.do_import || self.cfg.killpriv_v2)
330            && capable.contains(FsOptions::HANDLE_KILLPRIV_V2)
331        {
332            opts |= FsOptions::HANDLE_KILLPRIV_V2;
333            self.killpriv_v2.store(true, Ordering::Relaxed);
334        }
335
336        if capable.contains(FsOptions::PERFILE_DAX) {
337            opts |= FsOptions::PERFILE_DAX;
338            self.perfile_dax.store(true, Ordering::Relaxed);
339        }
340
341        Ok(opts)
342    }
343
344    fn destroy(&self) {
345        self.handle_map.clear();
346        self.inode_map.clear();
347
348        if let Err(e) = self.import() {
349            error!("fuse: failed to destroy instance, {:?}", e);
350        };
351    }
352
353    fn statfs(&self, _ctx: &Context, inode: Inode) -> io::Result<libc::statvfs64> {
354        let mut out = MaybeUninit::<libc::statvfs64>::zeroed();
355        let data = self.inode_map.get(inode)?;
356        let file = data.get_file()?;
357
358        // Safe because this will only modify `out` and we check the return value.
359        match unsafe { libc::fstatvfs64(file.as_raw_fd(), out.as_mut_ptr()) } {
360            // Safe because the kernel guarantees that `out` has been initialized.
361            0 => Ok(unsafe { out.assume_init() }),
362            _ => Err(io::Error::last_os_error()),
363        }
364    }
365
366    fn lookup(&self, _ctx: &Context, parent: Inode, name: &CStr) -> io::Result<Entry> {
367        // Don't use is_safe_path_component(), allow "." and ".." for NFS export support
368        if name.to_bytes_with_nul().contains(&SLASH_ASCII) {
369            return Err(einval());
370        }
371        self.do_lookup(parent, name)
372    }
373
374    fn forget(&self, _ctx: &Context, inode: Inode, count: u64) {
375        let mut inodes = self.inode_map.get_map_mut();
376
377        self.forget_one(&mut inodes, inode, count)
378    }
379
380    fn batch_forget(&self, _ctx: &Context, requests: Vec<(Inode, u64)>) {
381        let mut inodes = self.inode_map.get_map_mut();
382
383        for (inode, count) in requests {
384            self.forget_one(&mut inodes, inode, count)
385        }
386    }
387
388    fn opendir(
389        &self,
390        _ctx: &Context,
391        inode: Inode,
392        flags: u32,
393    ) -> io::Result<(Option<Handle>, OpenOptions)> {
394        if self.no_opendir.load(Ordering::Relaxed) {
395            info!("fuse: opendir is not supported.");
396            Err(enosys())
397        } else {
398            self.do_open(inode, flags | (libc::O_DIRECTORY as u32), 0)
399                .map(|(a, b, _)| (a, b))
400        }
401    }
402
403    fn releasedir(
404        &self,
405        _ctx: &Context,
406        inode: Inode,
407        _flags: u32,
408        handle: Handle,
409    ) -> io::Result<()> {
410        if self.no_opendir.load(Ordering::Relaxed) {
411            info!("fuse: releasedir is not supported.");
412            Err(io::Error::from_raw_os_error(libc::ENOSYS))
413        } else {
414            self.do_release(inode, handle)
415        }
416    }
417
418    fn mkdir(
419        &self,
420        ctx: &Context,
421        parent: Inode,
422        name: &CStr,
423        mode: u32,
424        umask: u32,
425    ) -> io::Result<Entry> {
426        self.validate_path_component(name)?;
427
428        let data = self.inode_map.get(parent)?;
429
430        let res = {
431            let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
432
433            let file = data.get_file()?;
434            // Safe because this doesn't modify any memory and we check the return value.
435            unsafe { libc::mkdirat(file.as_raw_fd(), name.as_ptr(), mode & !umask) }
436        };
437        if res < 0 {
438            return Err(io::Error::last_os_error());
439        }
440
441        self.do_lookup(parent, name)
442    }
443
444    fn rmdir(&self, _ctx: &Context, parent: Inode, name: &CStr) -> io::Result<()> {
445        self.validate_path_component(name)?;
446        self.do_unlink(parent, name, libc::AT_REMOVEDIR)
447    }
448
449    fn readdir(
450        &self,
451        _ctx: &Context,
452        inode: Inode,
453        handle: Handle,
454        size: u32,
455        offset: u64,
456        add_entry: &mut dyn FnMut(DirEntry) -> io::Result<usize>,
457    ) -> io::Result<()> {
458        if self.no_readdir.load(Ordering::Relaxed) {
459            return Ok(());
460        }
461        self.do_readdir(inode, handle, size, offset, &mut |mut dir_entry, _dir| {
462            dir_entry.ino = {
463                // Safe because do_readdir() has ensured dir_entry.name is a
464                // valid [u8] generated by CStr::to_bytes().
465                let name = unsafe {
466                    CStr::from_bytes_with_nul_unchecked(std::slice::from_raw_parts(
467                        &dir_entry.name[0],
468                        dir_entry.name.len() + 1,
469                    ))
470                };
471
472                let entry = self.do_lookup(inode, name)?;
473                let mut inodes = self.inode_map.get_map_mut();
474                self.forget_one(&mut inodes, entry.inode, 1);
475                entry.inode
476            };
477
478            add_entry(dir_entry)
479        })
480    }
481
482    fn readdirplus(
483        &self,
484        _ctx: &Context,
485        inode: Inode,
486        handle: Handle,
487        size: u32,
488        offset: u64,
489        add_entry: &mut dyn FnMut(DirEntry, Entry) -> io::Result<usize>,
490    ) -> io::Result<()> {
491        if self.no_readdir.load(Ordering::Relaxed) {
492            return Ok(());
493        }
494        self.do_readdir(inode, handle, size, offset, &mut |mut dir_entry, _dir| {
495            // Safe because do_readdir() has ensured dir_entry.name is a
496            // valid [u8] generated by CStr::to_bytes().
497            let name = unsafe {
498                CStr::from_bytes_with_nul_unchecked(std::slice::from_raw_parts(
499                    &dir_entry.name[0],
500                    dir_entry.name.len() + 1,
501                ))
502            };
503            let entry = self.do_lookup(inode, name)?;
504            let ino = entry.inode;
505            dir_entry.ino = entry.attr.st_ino;
506
507            add_entry(dir_entry, entry).map(|r| {
508                // true when size is not large enough to hold entry.
509                if r == 0 {
510                    // Release the refcount acquired by self.do_lookup().
511                    let mut inodes = self.inode_map.get_map_mut();
512                    self.forget_one(&mut inodes, ino, 1);
513                }
514                r
515            })
516        })
517    }
518
519    fn open(
520        &self,
521        _ctx: &Context,
522        inode: Inode,
523        flags: u32,
524        fuse_flags: u32,
525    ) -> io::Result<(Option<Handle>, OpenOptions, Option<u32>)> {
526        if self.no_open.load(Ordering::Relaxed) {
527            info!("fuse: open is not supported.");
528            Err(enosys())
529        } else {
530            self.do_open(inode, flags, fuse_flags)
531        }
532    }
533
534    fn release(
535        &self,
536        _ctx: &Context,
537        inode: Inode,
538        _flags: u32,
539        handle: Handle,
540        _flush: bool,
541        _flock_release: bool,
542        _lock_owner: Option<u64>,
543    ) -> io::Result<()> {
544        if self.no_open.load(Ordering::Relaxed) {
545            Err(enosys())
546        } else {
547            self.do_release(inode, handle)
548        }
549    }
550
551    fn create(
552        &self,
553        ctx: &Context,
554        parent: Inode,
555        name: &CStr,
556        args: CreateIn,
557    ) -> io::Result<(Entry, Option<Handle>, OpenOptions, Option<u32>)> {
558        self.validate_path_component(name)?;
559
560        let dir = self.inode_map.get(parent)?;
561        let dir_file = dir.get_file()?;
562
563        let new_file = {
564            let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
565
566            let flags = self.get_writeback_open_flags(args.flags as i32);
567            Self::create_file_excl(&dir_file, name, flags, args.mode & !(args.umask & 0o777))?
568        };
569
570        let entry = self.do_lookup(parent, name)?;
571        let file = match new_file {
572            // File didn't exist, now created by create_file_excl()
573            Some(f) => f,
574            // File exists, and args.flags doesn't contain O_EXCL. Now let's open it with
575            // open_inode().
576            None => {
577                // Cap restored when _killpriv is dropped
578                let _killpriv = if self.killpriv_v2.load(Ordering::Relaxed)
579                    && (args.fuse_flags & FOPEN_IN_KILL_SUIDGID != 0)
580                {
581                    self::drop_cap_fsetid()?
582                } else {
583                    None
584                };
585
586                let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
587                self.open_inode(entry.inode, args.flags as i32)?
588            }
589        };
590
591        let ret_handle = if !self.no_open.load(Ordering::Relaxed) {
592            let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
593            let data = HandleData::new(entry.inode, file, args.flags);
594
595            self.handle_map.insert(handle, data);
596            Some(handle)
597        } else {
598            None
599        };
600
601        let mut opts = OpenOptions::empty();
602        match self.cfg.cache_policy {
603            CachePolicy::Never => opts |= OpenOptions::DIRECT_IO,
604            CachePolicy::Metadata => opts |= OpenOptions::DIRECT_IO,
605            CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE,
606            _ => {}
607        };
608
609        Ok((entry, ret_handle, opts, None))
610    }
611
612    fn unlink(&self, _ctx: &Context, parent: Inode, name: &CStr) -> io::Result<()> {
613        self.validate_path_component(name)?;
614        self.do_unlink(parent, name, 0)
615    }
616
617    #[cfg(any(feature = "vhost-user-fs", feature = "virtiofs"))]
618    fn setupmapping(
619        &self,
620        _ctx: &Context,
621        inode: Inode,
622        _handle: Handle,
623        foffset: u64,
624        len: u64,
625        flags: u64,
626        moffset: u64,
627        vu_req: &mut dyn FsCacheReqHandler,
628    ) -> io::Result<()> {
629        debug!(
630            "fuse: setupmapping ino {:?} foffset 0x{:x} len 0x{:x} flags 0x{:x} moffset 0x{:x}",
631            inode, foffset, len, flags, moffset
632        );
633
634        let open_flags = if (flags & virtio_fs::SetupmappingFlags::WRITE.bits()) != 0 {
635            libc::O_RDWR
636        } else {
637            libc::O_RDONLY
638        };
639
640        let file = self.open_inode(inode, open_flags)?;
641        (*vu_req).map(foffset, moffset, len, flags, file.as_raw_fd())
642    }
643
644    #[cfg(any(feature = "vhost-user-fs", feature = "virtiofs"))]
645    fn removemapping(
646        &self,
647        _ctx: &Context,
648        _inode: Inode,
649        requests: Vec<virtio_fs::RemovemappingOne>,
650        vu_req: &mut dyn FsCacheReqHandler,
651    ) -> io::Result<()> {
652        (*vu_req).unmap(requests)
653    }
654
655    fn read(
656        &self,
657        _ctx: &Context,
658        inode: Inode,
659        handle: Handle,
660        w: &mut dyn ZeroCopyWriter,
661        size: u32,
662        offset: u64,
663        _lock_owner: Option<u64>,
664        flags: u32,
665    ) -> io::Result<usize> {
666        let data = self.get_data(handle, inode, libc::O_RDONLY)?;
667
668        // Manually implement File::try_clone() by borrowing fd of data.file instead of dup().
669        // It's safe because the `data` variable's lifetime spans the whole function,
670        // so data.file won't be closed.
671        let f = unsafe { File::from_raw_fd(data.borrow_fd().as_raw_fd()) };
672
673        self.check_fd_flags(data.clone(), f.as_raw_fd(), flags)?;
674
675        let mut f = ManuallyDrop::new(f);
676
677        w.write_from(&mut *f, size as usize, offset)
678    }
679
680    fn write(
681        &self,
682        _ctx: &Context,
683        inode: Inode,
684        handle: Handle,
685        r: &mut dyn ZeroCopyReader,
686        size: u32,
687        offset: u64,
688        _lock_owner: Option<u64>,
689        _delayed_write: bool,
690        flags: u32,
691        fuse_flags: u32,
692    ) -> io::Result<usize> {
693        let data = self.get_data(handle, inode, libc::O_RDWR)?;
694
695        // Manually implement File::try_clone() by borrowing fd of data.file instead of dup().
696        // It's safe because the `data` variable's lifetime spans the whole function,
697        // so data.file won't be closed.
698        let f = unsafe { File::from_raw_fd(data.borrow_fd().as_raw_fd()) };
699
700        self.check_fd_flags(data.clone(), f.as_raw_fd(), flags)?;
701
702        if self.seal_size.load(Ordering::Relaxed) {
703            let st = stat_fd(&f, None)?;
704            self.seal_size_check(Opcode::Write, st.st_size as u64, offset, size as u64, 0)?;
705        }
706
707        let mut f = ManuallyDrop::new(f);
708
709        // Cap restored when _killpriv is dropped
710        let _killpriv =
711            if self.killpriv_v2.load(Ordering::Relaxed) && (fuse_flags & WRITE_KILL_PRIV != 0) {
712                self::drop_cap_fsetid()?
713            } else {
714                None
715            };
716
717        r.read_to(&mut *f, size as usize, offset)
718    }
719
720    fn getattr(
721        &self,
722        _ctx: &Context,
723        inode: Inode,
724        handle: Option<Handle>,
725    ) -> io::Result<(libc::stat64, Duration)> {
726        self.do_getattr(inode, handle)
727    }
728
729    fn setattr(
730        &self,
731        _ctx: &Context,
732        inode: Inode,
733        attr: libc::stat64,
734        handle: Option<Handle>,
735        valid: SetattrValid,
736    ) -> io::Result<(libc::stat64, Duration)> {
737        let inode_data = self.inode_map.get(inode)?;
738
739        enum Data {
740            Handle(Arc<HandleData>),
741            ProcPath(CString),
742        }
743
744        let file = inode_data.get_file()?;
745        let data = if self.no_open.load(Ordering::Relaxed) {
746            let pathname = CString::new(format!("{}", file.as_raw_fd()))
747                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
748            Data::ProcPath(pathname)
749        } else {
750            // If we have a handle then use it otherwise get a new fd from the inode.
751            if let Some(handle) = handle {
752                let hd = self.handle_map.get(handle, inode)?;
753                Data::Handle(hd)
754            } else {
755                let pathname = CString::new(format!("{}", file.as_raw_fd()))
756                    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
757                Data::ProcPath(pathname)
758            }
759        };
760
761        if valid.contains(SetattrValid::SIZE) && self.seal_size.load(Ordering::Relaxed) {
762            return Err(io::Error::from_raw_os_error(libc::EPERM));
763        }
764
765        if valid.contains(SetattrValid::MODE) {
766            // Safe because this doesn't modify any memory and we check the return value.
767            let res = unsafe {
768                match data {
769                    Data::Handle(ref h) => libc::fchmod(h.borrow_fd().as_raw_fd(), attr.st_mode),
770                    Data::ProcPath(ref p) => {
771                        libc::fchmodat(self.proc_self_fd.as_raw_fd(), p.as_ptr(), attr.st_mode, 0)
772                    }
773                }
774            };
775            if res < 0 {
776                return Err(io::Error::last_os_error());
777            }
778        }
779
780        if valid.intersects(SetattrValid::UID | SetattrValid::GID) {
781            let uid = if valid.contains(SetattrValid::UID) {
782                attr.st_uid
783            } else {
784                // Cannot use -1 here because these are unsigned values.
785                u32::MAX
786            };
787            let gid = if valid.contains(SetattrValid::GID) {
788                attr.st_gid
789            } else {
790                // Cannot use -1 here because these are unsigned values.
791                u32::MAX
792            };
793
794            // Safe because this is a constant value and a valid C string.
795            let empty = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) };
796
797            // Safe because this doesn't modify any memory and we check the return value.
798            let res = unsafe {
799                libc::fchownat(
800                    file.as_raw_fd(),
801                    empty.as_ptr(),
802                    uid,
803                    gid,
804                    libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW,
805                )
806            };
807            if res < 0 {
808                return Err(io::Error::last_os_error());
809            }
810        }
811
812        if valid.contains(SetattrValid::SIZE) {
813            // Cap restored when _killpriv is dropped
814            let _killpriv = if self.killpriv_v2.load(Ordering::Relaxed)
815                && valid.contains(SetattrValid::KILL_SUIDGID)
816            {
817                self::drop_cap_fsetid()?
818            } else {
819                None
820            };
821
822            // Safe because this doesn't modify any memory and we check the return value.
823            let res = match data {
824                Data::Handle(ref h) => unsafe {
825                    libc::ftruncate(h.borrow_fd().as_raw_fd(), attr.st_size)
826                },
827                _ => {
828                    // There is no `ftruncateat` so we need to get a new fd and truncate it.
829                    let f = self.open_inode(inode, libc::O_NONBLOCK | libc::O_RDWR)?;
830                    unsafe { libc::ftruncate(f.as_raw_fd(), attr.st_size) }
831                }
832            };
833            if res < 0 {
834                return Err(io::Error::last_os_error());
835            }
836        }
837
838        if valid.intersects(SetattrValid::ATIME | SetattrValid::MTIME) {
839            let mut tvs = [
840                libc::timespec {
841                    tv_sec: 0,
842                    tv_nsec: libc::UTIME_OMIT,
843                },
844                libc::timespec {
845                    tv_sec: 0,
846                    tv_nsec: libc::UTIME_OMIT,
847                },
848            ];
849
850            if valid.contains(SetattrValid::ATIME_NOW) {
851                tvs[0].tv_nsec = libc::UTIME_NOW;
852            } else if valid.contains(SetattrValid::ATIME) {
853                tvs[0].tv_sec = attr.st_atime;
854                tvs[0].tv_nsec = attr.st_atime_nsec;
855            }
856
857            if valid.contains(SetattrValid::MTIME_NOW) {
858                tvs[1].tv_nsec = libc::UTIME_NOW;
859            } else if valid.contains(SetattrValid::MTIME) {
860                tvs[1].tv_sec = attr.st_mtime;
861                tvs[1].tv_nsec = attr.st_mtime_nsec;
862            }
863
864            // Safe because this doesn't modify any memory and we check the return value.
865            let res = match data {
866                Data::Handle(ref h) => unsafe {
867                    libc::futimens(h.borrow_fd().as_raw_fd(), tvs.as_ptr())
868                },
869                Data::ProcPath(ref p) => unsafe {
870                    libc::utimensat(self.proc_self_fd.as_raw_fd(), p.as_ptr(), tvs.as_ptr(), 0)
871                },
872            };
873            if res < 0 {
874                return Err(io::Error::last_os_error());
875            }
876        }
877
878        self.do_getattr(inode, handle)
879    }
880
881    fn rename(
882        &self,
883        _ctx: &Context,
884        olddir: Inode,
885        oldname: &CStr,
886        newdir: Inode,
887        newname: &CStr,
888        flags: u32,
889    ) -> io::Result<()> {
890        self.validate_path_component(oldname)?;
891        self.validate_path_component(newname)?;
892
893        let old_inode = self.inode_map.get(olddir)?;
894        let new_inode = self.inode_map.get(newdir)?;
895        let old_file = old_inode.get_file()?;
896        let new_file = new_inode.get_file()?;
897
898        // Safe because this doesn't modify any memory and we check the return value.
899        // TODO: Switch to libc::renameat2 once https://github.com/rust-lang/libc/pull/1508 lands
900        // and we have glibc 2.28.
901        let res = unsafe {
902            libc::syscall(
903                libc::SYS_renameat2,
904                old_file.as_raw_fd(),
905                oldname.as_ptr(),
906                new_file.as_raw_fd(),
907                newname.as_ptr(),
908                flags,
909            )
910        };
911        if res == 0 {
912            Ok(())
913        } else {
914            Err(io::Error::last_os_error())
915        }
916    }
917
918    fn mknod(
919        &self,
920        ctx: &Context,
921        parent: Inode,
922        name: &CStr,
923        mode: u32,
924        rdev: u32,
925        umask: u32,
926    ) -> io::Result<Entry> {
927        self.validate_path_component(name)?;
928
929        let data = self.inode_map.get(parent)?;
930        let file = data.get_file()?;
931
932        let res = {
933            let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
934
935            // Safe because this doesn't modify any memory and we check the return value.
936            unsafe {
937                libc::mknodat(
938                    file.as_raw_fd(),
939                    name.as_ptr(),
940                    (mode & !umask) as libc::mode_t,
941                    u64::from(rdev),
942                )
943            }
944        };
945        if res < 0 {
946            Err(io::Error::last_os_error())
947        } else {
948            self.do_lookup(parent, name)
949        }
950    }
951
952    fn link(
953        &self,
954        _ctx: &Context,
955        inode: Inode,
956        newparent: Inode,
957        newname: &CStr,
958    ) -> io::Result<Entry> {
959        self.validate_path_component(newname)?;
960
961        let data = self.inode_map.get(inode)?;
962        let new_inode = self.inode_map.get(newparent)?;
963        let file = data.get_file()?;
964        let new_file = new_inode.get_file()?;
965
966        // Safe because this is a constant value and a valid C string.
967        let empty = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) };
968
969        // Safe because this doesn't modify any memory and we check the return value.
970        let res = unsafe {
971            libc::linkat(
972                file.as_raw_fd(),
973                empty.as_ptr(),
974                new_file.as_raw_fd(),
975                newname.as_ptr(),
976                libc::AT_EMPTY_PATH,
977            )
978        };
979        if res == 0 {
980            self.do_lookup(newparent, newname)
981        } else {
982            Err(io::Error::last_os_error())
983        }
984    }
985
986    fn symlink(
987        &self,
988        ctx: &Context,
989        linkname: &CStr,
990        parent: Inode,
991        name: &CStr,
992    ) -> io::Result<Entry> {
993        self.validate_path_component(name)?;
994
995        let data = self.inode_map.get(parent)?;
996
997        let res = {
998            let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
999
1000            let file = data.get_file()?;
1001            // Safe because this doesn't modify any memory and we check the return value.
1002            unsafe { libc::symlinkat(linkname.as_ptr(), file.as_raw_fd(), name.as_ptr()) }
1003        };
1004        if res == 0 {
1005            self.do_lookup(parent, name)
1006        } else {
1007            Err(io::Error::last_os_error())
1008        }
1009    }
1010
1011    fn readlink(&self, _ctx: &Context, inode: Inode) -> io::Result<Vec<u8>> {
1012        // Safe because this is a constant value and a valid C string.
1013        let empty = unsafe { CStr::from_bytes_with_nul_unchecked(EMPTY_CSTR) };
1014        let mut buf = Vec::<u8>::with_capacity(libc::PATH_MAX as usize);
1015        let data = self.inode_map.get(inode)?;
1016        let file = data.get_file()?;
1017
1018        // Safe because this will only modify the contents of `buf` and we check the return value.
1019        let res = unsafe {
1020            libc::readlinkat(
1021                file.as_raw_fd(),
1022                empty.as_ptr(),
1023                buf.as_mut_ptr() as *mut libc::c_char,
1024                libc::PATH_MAX as usize,
1025            )
1026        };
1027        if res < 0 {
1028            return Err(io::Error::last_os_error());
1029        }
1030
1031        // Safe because we trust the value returned by kernel.
1032        unsafe { buf.set_len(res as usize) };
1033
1034        Ok(buf)
1035    }
1036
1037    fn flush(
1038        &self,
1039        _ctx: &Context,
1040        inode: Inode,
1041        handle: Handle,
1042        _lock_owner: u64,
1043    ) -> io::Result<()> {
1044        if self.no_open.load(Ordering::Relaxed) {
1045            return Err(enosys());
1046        }
1047
1048        let data = self.handle_map.get(handle, inode)?;
1049
1050        // Since this method is called whenever an fd is closed in the client, we can emulate that
1051        // behavior by doing the same thing (dup-ing the fd and then immediately closing it). Safe
1052        // because this doesn't modify any memory and we check the return values.
1053        unsafe {
1054            let newfd = libc::dup(data.borrow_fd().as_raw_fd());
1055            if newfd < 0 {
1056                return Err(io::Error::last_os_error());
1057            }
1058
1059            if libc::close(newfd) < 0 {
1060                Err(io::Error::last_os_error())
1061            } else {
1062                Ok(())
1063            }
1064        }
1065    }
1066
1067    fn fsync(
1068        &self,
1069        _ctx: &Context,
1070        inode: Inode,
1071        datasync: bool,
1072        handle: Handle,
1073    ) -> io::Result<()> {
1074        let data = self.get_data(handle, inode, libc::O_RDONLY)?;
1075        let fd = data.borrow_fd();
1076
1077        // Safe because this doesn't modify any memory and we check the return value.
1078        let res = unsafe {
1079            if datasync {
1080                libc::fdatasync(fd.as_raw_fd())
1081            } else {
1082                libc::fsync(fd.as_raw_fd())
1083            }
1084        };
1085        if res == 0 {
1086            Ok(())
1087        } else {
1088            Err(io::Error::last_os_error())
1089        }
1090    }
1091
1092    fn fsyncdir(
1093        &self,
1094        ctx: &Context,
1095        inode: Inode,
1096        datasync: bool,
1097        handle: Handle,
1098    ) -> io::Result<()> {
1099        self.fsync(ctx, inode, datasync, handle)
1100    }
1101
1102    fn access(&self, ctx: &Context, inode: Inode, mask: u32) -> io::Result<()> {
1103        let data = self.inode_map.get(inode)?;
1104        let st = stat_fd(&data.get_file()?, None)?;
1105        let mode = mask as i32 & (libc::R_OK | libc::W_OK | libc::X_OK);
1106
1107        if mode == libc::F_OK {
1108            // The file exists since we were able to call `stat(2)` on it.
1109            return Ok(());
1110        }
1111
1112        if (mode & libc::R_OK) != 0
1113            && ctx.uid != 0
1114            && (st.st_uid != ctx.uid || st.st_mode & 0o400 == 0)
1115            && (st.st_gid != ctx.gid || st.st_mode & 0o040 == 0)
1116            && st.st_mode & 0o004 == 0
1117        {
1118            return Err(io::Error::from_raw_os_error(libc::EACCES));
1119        }
1120
1121        if (mode & libc::W_OK) != 0
1122            && ctx.uid != 0
1123            && (st.st_uid != ctx.uid || st.st_mode & 0o200 == 0)
1124            && (st.st_gid != ctx.gid || st.st_mode & 0o020 == 0)
1125            && st.st_mode & 0o002 == 0
1126        {
1127            return Err(io::Error::from_raw_os_error(libc::EACCES));
1128        }
1129
1130        // root can only execute something if it is executable by one of the owner, the group, or
1131        // everyone.
1132        if (mode & libc::X_OK) != 0
1133            && (ctx.uid != 0 || st.st_mode & 0o111 == 0)
1134            && (st.st_uid != ctx.uid || st.st_mode & 0o100 == 0)
1135            && (st.st_gid != ctx.gid || st.st_mode & 0o010 == 0)
1136            && st.st_mode & 0o001 == 0
1137        {
1138            return Err(io::Error::from_raw_os_error(libc::EACCES));
1139        }
1140
1141        Ok(())
1142    }
1143
1144    fn setxattr(
1145        &self,
1146        _ctx: &Context,
1147        inode: Inode,
1148        name: &CStr,
1149        value: &[u8],
1150        flags: u32,
1151    ) -> io::Result<()> {
1152        if !self.cfg.xattr {
1153            return Err(enosys());
1154        }
1155
1156        let data = self.inode_map.get(inode)?;
1157        let file = data.get_file()?;
1158        let pathname = CString::new(format!("/proc/self/fd/{}", file.as_raw_fd()))
1159            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1160
1161        // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we
1162        // need to use the {set,get,remove,list}xattr variants.
1163        // Safe because this doesn't modify any memory and we check the return value.
1164        let res = unsafe {
1165            libc::setxattr(
1166                pathname.as_ptr(),
1167                name.as_ptr(),
1168                value.as_ptr() as *const libc::c_void,
1169                value.len(),
1170                flags as libc::c_int,
1171            )
1172        };
1173        if res == 0 {
1174            Ok(())
1175        } else {
1176            Err(io::Error::last_os_error())
1177        }
1178    }
1179
1180    fn getxattr(
1181        &self,
1182        _ctx: &Context,
1183        inode: Inode,
1184        name: &CStr,
1185        size: u32,
1186    ) -> io::Result<GetxattrReply> {
1187        if !self.cfg.xattr {
1188            return Err(enosys());
1189        }
1190
1191        let data = self.inode_map.get(inode)?;
1192        let file = data.get_file()?;
1193        let mut buf = Vec::<u8>::with_capacity(size as usize);
1194        let pathname = CString::new(format!("/proc/self/fd/{}", file.as_raw_fd(),))
1195            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1196
1197        // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we
1198        // need to use the {set,get,remove,list}xattr variants.
1199        // Safe because this will only modify the contents of `buf`.
1200        let res = unsafe {
1201            libc::getxattr(
1202                pathname.as_ptr(),
1203                name.as_ptr(),
1204                buf.as_mut_ptr() as *mut libc::c_void,
1205                size as libc::size_t,
1206            )
1207        };
1208        if res < 0 {
1209            return Err(io::Error::last_os_error());
1210        }
1211
1212        if size == 0 {
1213            Ok(GetxattrReply::Count(res as u32))
1214        } else {
1215            // Safe because we trust the value returned by kernel.
1216            unsafe { buf.set_len(res as usize) };
1217            Ok(GetxattrReply::Value(buf))
1218        }
1219    }
1220
1221    fn listxattr(&self, _ctx: &Context, inode: Inode, size: u32) -> io::Result<ListxattrReply> {
1222        if !self.cfg.xattr {
1223            return Err(enosys());
1224        }
1225
1226        let data = self.inode_map.get(inode)?;
1227        let file = data.get_file()?;
1228        let mut buf = Vec::<u8>::with_capacity(size as usize);
1229        let pathname = CString::new(format!("/proc/self/fd/{}", file.as_raw_fd()))
1230            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1231
1232        // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we
1233        // need to use the {set,get,remove,list}xattr variants.
1234        // Safe because this will only modify the contents of `buf`.
1235        let res = unsafe {
1236            libc::listxattr(
1237                pathname.as_ptr(),
1238                buf.as_mut_ptr() as *mut libc::c_char,
1239                size as libc::size_t,
1240            )
1241        };
1242        if res < 0 {
1243            return Err(io::Error::last_os_error());
1244        }
1245
1246        if size == 0 {
1247            Ok(ListxattrReply::Count(res as u32))
1248        } else {
1249            // Safe because we trust the value returned by kernel.
1250            unsafe { buf.set_len(res as usize) };
1251            Ok(ListxattrReply::Names(buf))
1252        }
1253    }
1254
1255    fn removexattr(&self, _ctx: &Context, inode: Inode, name: &CStr) -> io::Result<()> {
1256        if !self.cfg.xattr {
1257            return Err(enosys());
1258        }
1259
1260        let data = self.inode_map.get(inode)?;
1261        let file = data.get_file()?;
1262        let pathname = CString::new(format!("/proc/self/fd/{}", file.as_raw_fd()))
1263            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1264
1265        // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we
1266        // need to use the {set,get,remove,list}xattr variants.
1267        // Safe because this doesn't modify any memory and we check the return value.
1268        let res = unsafe { libc::removexattr(pathname.as_ptr(), name.as_ptr()) };
1269        if res == 0 {
1270            Ok(())
1271        } else {
1272            Err(io::Error::last_os_error())
1273        }
1274    }
1275
1276    fn fallocate(
1277        &self,
1278        _ctx: &Context,
1279        inode: Inode,
1280        handle: Handle,
1281        mode: u32,
1282        offset: u64,
1283        length: u64,
1284    ) -> io::Result<()> {
1285        // Let the Arc<HandleData> in scope, otherwise fd may get invalid.
1286        let data = self.get_data(handle, inode, libc::O_RDWR)?;
1287        let fd = data.borrow_fd();
1288
1289        if self.seal_size.load(Ordering::Relaxed) {
1290            let st = stat_fd(&fd, None)?;
1291            self.seal_size_check(
1292                Opcode::Fallocate,
1293                st.st_size as u64,
1294                offset,
1295                length,
1296                mode as i32,
1297            )?;
1298        }
1299
1300        // Safe because this doesn't modify any memory and we check the return value.
1301        let res = unsafe {
1302            libc::fallocate64(
1303                fd.as_raw_fd(),
1304                mode as libc::c_int,
1305                offset as libc::off64_t,
1306                length as libc::off64_t,
1307            )
1308        };
1309        if res == 0 {
1310            Ok(())
1311        } else {
1312            Err(io::Error::last_os_error())
1313        }
1314    }
1315
1316    fn lseek(
1317        &self,
1318        _ctx: &Context,
1319        inode: Inode,
1320        handle: Handle,
1321        offset: u64,
1322        whence: u32,
1323    ) -> io::Result<u64> {
1324        // Let the Arc<HandleData> in scope, otherwise fd may get invalid.
1325        let data = self.handle_map.get(handle, inode)?;
1326
1327        // Acquire the lock to get exclusive access, otherwise it may break do_readdir().
1328        let (_guard, file) = data.get_file_mut();
1329
1330        // Safe because this doesn't modify any memory and we check the return value.
1331        let res = unsafe {
1332            libc::lseek(
1333                file.as_raw_fd(),
1334                offset as libc::off64_t,
1335                whence as libc::c_int,
1336            )
1337        };
1338        if res < 0 {
1339            Err(io::Error::last_os_error())
1340        } else {
1341            Ok(res as u64)
1342        }
1343    }
1344}