snix_build/oci/
spec.rs

1//! Module to create a OCI runtime spec for a given [BuildRequest].
2use crate::buildservice::{BuildConstraints, BuildRequest};
3use oci_spec::runtime::{
4    Capability, LinuxIdMappingBuilder, LinuxNamespace, LinuxNamespaceBuilder, LinuxNamespaceType,
5};
6use std::{collections::HashSet, path::Path};
7
8use super::{
9    scratch_name,
10    subuid::{SubordinateError, SubordinateInfo},
11};
12use thiserror::Error;
13
14#[derive(Debug, Error)]
15pub enum SpecError {
16    #[error("oci error: {0}")]
17    OciError(oci_spec::OciSpecError),
18    #[error("subordinate error: {0}")]
19    SubordinateError(SubordinateError),
20}
21
22/// For a given [BuildRequest], return an OCI runtime spec.
23///
24/// While there's no IO occuring in this function, the generated spec contains
25/// path references relative to the "bundle location".
26/// Due to overlayfs requiring its layers to be absolute paths, we also need a
27/// bundle_dir parameter, pointing to the location of the bundle dir itself.
28///
29/// The paths used in the spec are the following (relative to a "bundle root"):
30///
31/// - `inputs`, a directory where the castore nodes specified the build request
32///   inputs are supposed to be populated.
33/// - `outputs`, a directory where all writes to the store_dir during the build
34///   are directed to.
35/// - `root`, a minimal skeleton of files that'll be present at /.
36/// - `scratch`, a directory containing other directories which will be
37///   bind-mounted read-write into the container and used as scratch space
38///   during the build.
39///   No assumptions should be made about what's inside this directory.
40///
41/// Generating these paths, and populating contents, like a skeleton root
42/// is up to another function, this function doesn't do filesystem IO.
43pub(crate) fn make_spec(
44    request: &BuildRequest,
45    rootless: bool,
46    sandbox_shell: &str,
47) -> Result<oci_spec::runtime::Spec, SpecError> {
48    let allow_network = request
49        .constraints
50        .contains(&BuildConstraints::NetworkAccess);
51
52    // Assemble ro_host_mounts. Start with constraints.available_ro_paths.
53    let mut ro_host_mounts: Vec<_> = request
54        .constraints
55        .iter()
56        .filter_map(|constraint| match constraint {
57            BuildConstraints::AvailableReadOnlyPath(path) => Some((path.as_path(), path.as_path())),
58            _ => None,
59        })
60        .collect();
61
62    // If provide_bin_sh is set, mount sandbox_shell to /bin/sh
63    if request
64        .constraints
65        .contains(&BuildConstraints::ProvideBinSh)
66    {
67        ro_host_mounts.push((Path::new(sandbox_shell), Path::new("/bin/sh")))
68    }
69
70    oci_spec::runtime::SpecBuilder::default()
71        .process(
72            configure_process(
73                &request.command_args,
74                &request.working_dir,
75                request
76                    .environment_vars
77                    .iter()
78                    .map(|e| {
79                        (
80                            e.key.as_str(),
81                            // TODO: decide what to do with non-bytes env values
82                            String::from_utf8(e.value.to_vec()).expect("invalid string in env"),
83                        )
84                    })
85                    .collect::<Vec<_>>(),
86                rootless,
87            )
88            .map_err(SpecError::OciError)?,
89        )
90        .linux(configure_linux(allow_network, rootless)?)
91        .root(
92            oci_spec::runtime::RootBuilder::default()
93                .path("root")
94                .readonly(true)
95                .build()
96                .map_err(SpecError::OciError)?,
97        )
98        .hostname("localhost")
99        .mounts(
100            configure_mounts(
101                rootless,
102                allow_network,
103                request.scratch_paths.iter().map(|e| e.as_path()),
104                request.inputs.iter(),
105                &request.inputs_dir,
106                ro_host_mounts,
107            )
108            .map_err(SpecError::OciError)?,
109        )
110        .build()
111        .map_err(SpecError::OciError)
112}
113
114/// Return the Process part of the OCI Runtime spec.
115/// This configures the command, it's working dir, env and terminal setup.
116/// It also takes care of setting rlimits and capabilities.
117/// Capabilities are a bit more complicated in case rootless building is requested.
118fn configure_process<'a>(
119    command_args: &[String],
120    cwd: &Path,
121    env: impl IntoIterator<Item = (&'a str, String)>,
122    rootless: bool,
123) -> Result<oci_spec::runtime::Process, oci_spec::OciSpecError> {
124    let spec_builder = oci_spec::runtime::ProcessBuilder::default()
125        .args(command_args)
126        .env(
127            env.into_iter()
128                .map(|(k, v)| format!("{}={}", k, v))
129                .collect::<Vec<_>>(),
130        )
131        .terminal(true)
132        .user(
133            oci_spec::runtime::UserBuilder::default()
134                .uid(1000u32)
135                .gid(100u32)
136                .build()?,
137        )
138        .cwd(Path::new("/").join(cwd)) // relative to the bundle root, but at least runc wants it to also be absolute.
139        .capabilities({
140            let caps: HashSet<Capability> = if !rootless {
141                HashSet::from([Capability::AuditWrite, Capability::Kill])
142            } else {
143                HashSet::from([
144                    Capability::AuditWrite,
145                    Capability::Chown,
146                    Capability::DacOverride,
147                    Capability::Fowner,
148                    Capability::Fsetid,
149                    Capability::Kill,
150                    Capability::Mknod,
151                    Capability::NetBindService,
152                    Capability::NetRaw,
153                    Capability::Setfcap,
154                    Capability::Setgid,
155                    Capability::Setpcap,
156                    Capability::Setuid,
157                    Capability::SysChroot,
158                ])
159            };
160
161            oci_spec::runtime::LinuxCapabilitiesBuilder::default()
162                .bounding(caps.clone())
163                .effective(caps.clone())
164                .inheritable(caps.clone())
165                .permitted(caps.clone())
166                .ambient(caps)
167                .build()?
168        })
169        .rlimits([oci_spec::runtime::PosixRlimitBuilder::default()
170            .typ(oci_spec::runtime::PosixRlimitType::RlimitNofile)
171            .hard(1024_u64)
172            .soft(1024_u64)
173            .build()?])
174        .no_new_privileges(true);
175
176    spec_builder.build()
177}
178
179/// Return the Linux part of the OCI Runtime spec.
180/// This configures various namespaces, masked and read-only paths.
181fn configure_linux(
182    allow_network: bool,
183    rootless: bool,
184) -> Result<oci_spec::runtime::Linux, SpecError> {
185    let mut linux = oci_spec::runtime::Linux::default();
186
187    // explicitly set namespaces, depending on allow_network.
188    linux.set_namespaces(Some({
189        let mut namespace_types = vec![
190            LinuxNamespaceType::Pid,
191            LinuxNamespaceType::Ipc,
192            LinuxNamespaceType::Uts,
193            LinuxNamespaceType::Mount,
194            // We want to create a cgroup namespace in the future to be able to trace resource usage
195            // For now it's disabled as it causes issues in cases where the host machine is running in a
196            // messed up cgroup
197            // LinuxNamespaceType::Cgroup,
198        ];
199        if !allow_network {
200            namespace_types.push(LinuxNamespaceType::Network)
201        }
202        if rootless {
203            namespace_types.push(LinuxNamespaceType::User)
204        }
205
206        namespace_types
207            .into_iter()
208            .map(|e| LinuxNamespaceBuilder::default().typ(e).build())
209            .collect::<Result<Vec<LinuxNamespace>, _>>()
210            .map_err(SpecError::OciError)?
211    }));
212
213    linux.set_masked_paths(Some(
214        [
215            "/proc/kcore",
216            "/proc/latency_stats",
217            "/proc/timer_list",
218            "/proc/timer_stats",
219            "/proc/sched_debug",
220            "/sys/firmware",
221        ]
222        .into_iter()
223        .map(|e| e.to_string())
224        .collect::<Vec<_>>(),
225    ));
226
227    linux.set_readonly_paths(Some(
228        [
229            "/proc/asound",
230            "/proc/bus",
231            "/proc/fs",
232            "/proc/irq",
233            "/proc/sys",
234            "/proc/sysrq-trigger",
235        ]
236        .into_iter()
237        .map(|e| e.to_string())
238        .collect::<Vec<_>>(),
239    ));
240    let info = SubordinateInfo::for_effective_user().map_err(SpecError::SubordinateError)?;
241    linux.set_uid_mappings(Some(vec![
242        LinuxIdMappingBuilder::default()
243            .host_id(info.uid)
244            .container_id(0_u32)
245            .size(1_u32)
246            .build()
247            .unwrap(),
248        LinuxIdMappingBuilder::default()
249            .host_id(info.subuid)
250            .container_id(1000_u32)
251            .size(1_u32)
252            .build()
253            .unwrap(),
254    ]));
255    linux.set_gid_mappings(Some(vec![
256        LinuxIdMappingBuilder::default()
257            .host_id(info.gid)
258            .container_id(0_u32)
259            .size(1_u32)
260            .build()
261            .unwrap(),
262        LinuxIdMappingBuilder::default()
263            .host_id(info.subgid)
264            .container_id(100_u32)
265            .size(1_u32)
266            .build()
267            .unwrap(),
268    ]));
269
270    Ok(linux)
271}
272
273/// Return the Mounts part of the OCI Runtime spec.
274/// It first sets up the standard mounts, then scratch paths, bind mounts for
275/// all inputs, and finally read-only paths from the hosts.
276fn configure_mounts<'a>(
277    rootless: bool,
278    allow_network: bool,
279    scratch_paths: impl IntoIterator<Item = &'a Path>,
280    inputs: impl Iterator<Item = (&'a snix_castore::PathComponent, &'a snix_castore::Node)>,
281
282    inputs_dir: &Path,
283    ro_host_mounts: impl IntoIterator<Item = (&'a Path, &'a Path)>,
284) -> Result<Vec<oci_spec::runtime::Mount>, oci_spec::OciSpecError> {
285    let mut mounts: Vec<_> = if rootless {
286        oci_spec::runtime::get_rootless_mounts()
287    } else {
288        oci_spec::runtime::get_default_mounts()
289    };
290
291    mounts.push(configure_mount(
292        Path::new("tmpfs"),
293        Path::new("/tmp"),
294        "tmpfs",
295        &["nosuid", "noatime", "mode=700"],
296    )?);
297
298    // For each scratch path, create a bind mount entry.
299    let scratch_root = Path::new("scratch"); // relative path
300    for scratch_path in scratch_paths.into_iter() {
301        let src = scratch_root.join(scratch_name(scratch_path));
302        mounts.push(configure_mount(
303            &src,
304            &Path::new("/").join(scratch_path),
305            "none",
306            &["rbind", "rw"],
307        )?);
308    }
309
310    // For each input, create a bind mount from inputs/$name into $inputs_dir/$name.
311    for (input_name, _input) in inputs {
312        let input_name = std::str::from_utf8(input_name.as_ref()).expect("invalid input name");
313        mounts.push(configure_mount(
314            &Path::new("inputs").join(input_name),
315            &Path::new("/").join(inputs_dir).join(input_name),
316            "none",
317            &[
318                "rbind", "ro",
319                // "nosuid" is required, otherwise mounting will just fail with
320                // a generic permission error.
321                // See https://github.com/wllenyj/containerd/commit/42a386c8164bef16d59590c61ab00806f854d8fd
322                "nosuid", "nodev",
323            ],
324        )?);
325    }
326
327    // Process ro_host_mounts
328    for (src, dst) in ro_host_mounts.into_iter() {
329        mounts.push(configure_mount(src, dst, "none", &["rbind", "ro"])?);
330    }
331
332    // In case network is enabled, also mount in /etc/{resolv.conf,services,hosts}
333    if allow_network {
334        for p in [
335            Path::new("/etc/resolv.conf"),
336            Path::new("/etc/services"),
337            Path::new("/etc/hosts"),
338        ] {
339            mounts.push(configure_mount(p, p, "none", &["rbind", "ro"])?);
340        }
341    }
342
343    Ok(mounts)
344}
345
346/// Helper function to produce a mount.
347fn configure_mount(
348    source: &Path,
349    destination: &Path,
350    typ: &str,
351    options: &[&str],
352) -> Result<oci_spec::runtime::Mount, oci_spec::OciSpecError> {
353    oci_spec::runtime::MountBuilder::default()
354        .destination(destination)
355        .typ(typ.to_string())
356        .source(source)
357        .options(options.iter().map(|e| e.to_string()).collect::<Vec<_>>())
358        .build()
359}