snix_eval/
chunk.rs

1use crate::opcode::{CodeIdx, ConstantIdx, Op, OpArg};
2use crate::value::Value;
3use crate::{CoercionKind, SourceCode};
4use std::io::Write;
5
6/// Maximum size of a u64 encoded in the vu128 varint encoding.
7const U64_VARINT_SIZE: usize = 9;
8
9/// Represents a source location from which one or more operations
10/// were compiled.
11///
12/// The span itself is an index into a [codemap::CodeMap], and the
13/// structure tracks the number of operations that were yielded from
14/// the same span.
15///
16/// At error reporting time, it becomes possible to either just fetch
17/// the textual representation of that span from the codemap, or to
18/// even re-parse the AST using rnix to create more semantically
19/// interesting errors.
20#[derive(Clone, Debug, PartialEq)]
21struct SourceSpan {
22    /// Span into the [codemap::CodeMap].
23    span: codemap::Span,
24
25    /// Index of the first operation covered by this span.
26    start: usize,
27}
28
29/// A chunk is a representation of a sequence of bytecode
30/// instructions, associated constants and additional metadata as
31/// emitted by the compiler.
32#[derive(Debug, Default)]
33pub struct Chunk {
34    pub code: Vec<u8>,
35    pub constants: Vec<Value>,
36    spans: Vec<SourceSpan>,
37
38    /// Index of the last operation (i.e. not data) written to the code vector.
39    /// Some operations (e.g. jump patching) need to know this.
40    last_op: usize,
41}
42
43impl Chunk {
44    pub fn push_op(&mut self, op: Op, span: codemap::Span) -> usize {
45        self.last_op = self.code.len();
46        self.code.push(op as u8);
47        self.push_span(span, self.last_op);
48        self.last_op
49    }
50
51    pub fn push_uvarint(&mut self, data: u64) {
52        let mut encoded = [0u8; U64_VARINT_SIZE];
53        let bytes_written = vu128::encode_u64(&mut encoded, data);
54        self.code.extend_from_slice(&encoded[..bytes_written]);
55    }
56
57    pub fn read_uvarint(&self, idx: usize) -> (u64, usize) {
58        debug_assert!(
59            idx < self.code.len(),
60            "invalid bytecode (missing varint operand)",
61        );
62
63        if self.code.len() - idx >= U64_VARINT_SIZE {
64            vu128::decode_u64(
65                &self.code[idx..idx + U64_VARINT_SIZE]
66                    .try_into()
67                    .expect("size statically checked"),
68            )
69        } else {
70            let mut tmp = [0u8; U64_VARINT_SIZE];
71            tmp[..self.code.len() - idx].copy_from_slice(&self.code[idx..]);
72            vu128::decode_u64(&tmp)
73        }
74    }
75
76    pub fn push_u16(&mut self, data: u16) {
77        self.code.extend_from_slice(&data.to_le_bytes())
78    }
79
80    /// Patches the argument to the jump operand of the jump at the given index
81    /// to point to the *next* instruction that will be emitted.
82    pub fn patch_jump(&mut self, idx: usize) {
83        let offset = (self.code.len() - idx - /* arg idx = */ 1 - /* jump arg size = */ 2) as u16;
84        self.code[idx + 1..idx + 3].copy_from_slice(&offset.to_le_bytes())
85    }
86
87    pub fn read_u16(&self, idx: usize) -> u16 {
88        if idx + 2 > self.code.len() {
89            panic!("Snix bug: invalid bytecode (expected u16 operand not found)")
90        }
91
92        let byte_array: &[u8; 2] = &self.code[idx..idx + 2]
93            .try_into()
94            .expect("fixed-size slice can not fail to convert to array");
95
96        u16::from_le_bytes(*byte_array)
97    }
98
99    /// Get the first span of a chunk, no questions asked.
100    pub fn first_span(&self) -> codemap::Span {
101        self.spans[0].span
102    }
103
104    /// Return the last op in the chunk together with its index, if any.
105    pub fn last_op(&self) -> Option<(Op, usize)> {
106        if self.code.is_empty() {
107            return None;
108        }
109
110        Some((self.code[self.last_op].into(), self.last_op))
111    }
112
113    pub fn push_constant(&mut self, data: Value) -> ConstantIdx {
114        let idx = self.constants.len();
115        self.constants.push(data);
116        ConstantIdx(idx)
117    }
118
119    /// Return a reference to the constant at the given [`ConstantIdx`]
120    pub fn get_constant(&self, constant: ConstantIdx) -> Option<&Value> {
121        self.constants.get(constant.0)
122    }
123
124    fn push_span(&mut self, span: codemap::Span, start: usize) {
125        match self.spans.last_mut() {
126            // We do not need to insert the same span again, as this
127            // instruction was compiled from the same span as the last
128            // one.
129            Some(last) if last.span == span => {}
130
131            // In all other cases, this is a new source span.
132            _ => self.spans.push(SourceSpan { span, start }),
133        }
134    }
135
136    /// Retrieve the [codemap::Span] from which the instruction at
137    /// `offset` was compiled.
138    pub fn get_span(&self, offset: CodeIdx) -> codemap::Span {
139        let position = self
140            .spans
141            .binary_search_by(|span| span.start.cmp(&offset.0));
142
143        let span = match position {
144            Ok(index) => &self.spans[index],
145            Err(index) => {
146                if index == 0 {
147                    &self.spans[0]
148                } else {
149                    &self.spans[index - 1]
150                }
151            }
152        };
153
154        span.span
155    }
156
157    /// Write the disassembler representation of the operation at
158    /// `idx` to the specified writer, and return how many bytes in the code to
159    /// skip for the next op.
160    ///
161    /// # Format
162    ///
163    /// The format of the disassembly is shown in the diagram below.
164    ///
165    /// ```text
166    /// 0x0  1  OpClosure(BP @ 0, 1 upvalues)
167    /// ^    ^  ^        ^
168    /// |    |  |        |
169    /// |    |  |        \ One of the following, depending on the operation:
170    /// |    |  |          * Nothing
171    /// |    |  |          * Single argument
172    /// |    |  |          * Blueprint's index, whether `with`
173    /// |    |  |            was captured and number of upvalues
174    /// |    |  |
175    /// |    |  \ Operation identifier
176    /// |    \ Line number in source code, or continuation character (`|`)
177    /// \ Index of the instruction in the code chunk
178    /// ```
179    pub fn disassemble_op<W: Write>(
180        &self,
181        writer: &mut W,
182        source: &SourceCode,
183        width: usize,
184        idx: CodeIdx,
185    ) -> Result<usize, std::io::Error> {
186        write!(writer, "{:#width$x}\t ", idx.0, width = width)?;
187
188        // Print continuation character if the previous operation was at
189        // the same line, otherwise print the line.
190        let line = source.get_line(self.get_span(idx));
191        if idx.0 > 0 && source.get_line(self.get_span(idx - 1)) == line {
192            write!(writer, "   |\t")?;
193        } else {
194            write!(writer, "{line:4}\t")?;
195        }
196
197        let _fmt_constant = |idx: ConstantIdx| match &self.constants[idx.0] {
198            Value::Thunk(t) => t.debug_repr(),
199            Value::Closure(c) => format!("closure({:p})", c.lambda),
200            Value::Blueprint(b) => format!("blueprint({b:p})"),
201            val => format!("{val}"),
202        };
203
204        let op: Op = self.code[idx.0].into();
205
206        match op.arg_type() {
207            OpArg::None => {
208                writeln!(writer, "Op{op:?}")?;
209                Ok(1)
210            }
211
212            OpArg::Fixed => {
213                let arg = self.read_u16(idx.0 + 1);
214                writeln!(writer, "Op{op:?}({arg})")?;
215                Ok(3)
216            }
217
218            OpArg::Uvarint => {
219                let (arg, size) = self.read_uvarint(idx.0 + 1);
220                write!(writer, "Op{op:?}({arg})")?;
221                if let Op::Constant = &op {
222                    write!(writer, " (={})", self.constants[arg as usize])?;
223                }
224                writeln!(writer)?;
225                Ok(1 + size)
226            }
227
228            OpArg::Custom => match op {
229                Op::CoerceToString => {
230                    let kind: CoercionKind = self.code[idx.0 + 1].into();
231                    writeln!(writer, "Op{op:?}({kind:?})")?;
232                    Ok(2)
233                }
234
235                Op::Closure | Op::ThunkClosure | Op::ThunkSuspended => {
236                    let mut cidx = idx.0 + 1;
237
238                    let (bp_idx, size) = self.read_uvarint(cidx);
239                    cidx += size;
240
241                    let (packed_count, size) = self.read_uvarint(cidx);
242                    cidx += size;
243
244                    let captures_with = packed_count & 0b1 == 1;
245                    let count = packed_count >> 1;
246
247                    write!(writer, "Op{op:?}(BP @ {bp_idx}, ")?;
248                    if captures_with {
249                        write!(writer, "captures with, ")?;
250                    }
251                    writeln!(writer, "{count} upvalues)")?;
252
253                    for _ in 0..count {
254                        let (_, size) = self.read_uvarint(cidx);
255                        cidx += size;
256                    }
257
258                    Ok(cidx - idx.0)
259                }
260                _ => panic!("Snix bug: don't know how to format argument for Op{op:?}"),
261            },
262        }
263    }
264
265    /// Count the number of Opcodes. This function has O(n) time complexity.
266    pub fn op_count(&self) -> usize {
267        if self.code.is_empty() {
268            return 0;
269        }
270
271        let mut idx = 0;
272        let mut count = 0;
273        while idx <= self.last_op {
274            let op: Op = self.code[idx].into();
275            let op_len = match op.arg_type() {
276                OpArg::None => 1,
277                OpArg::Uvarint => {
278                    let (_, len) = self.read_uvarint(idx + 1);
279                    len + 1
280                }
281                OpArg::Fixed => 3,
282                OpArg::Custom => match op {
283                    Op::CoerceToString => 2,
284
285                    Op::Closure | Op::ThunkClosure | Op::ThunkSuspended => {
286                        let mut len = 1;
287
288                        let (_, size) = self.read_uvarint(idx + len);
289                        len += size;
290
291                        let (packed_count, size) = self.read_uvarint(idx + len);
292                        len += size;
293
294                        let count = packed_count >> 1;
295
296                        for _ in 0..count {
297                            let (_, size) = self.read_uvarint(idx + len);
298                            len += size;
299                        }
300
301                        len
302                    }
303                    _ => panic!("Snix bug: arg_type returned Custom for wrong Op"),
304                },
305            };
306            idx += op_len;
307            count += 1;
308        }
309        count
310    }
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316    use crate::test_utils::dummy_span;
317
318    // Note: These tests are about the functionality of the `Chunk` type, the
319    // opcodes used below do *not* represent valid, executable Snix code (and
320    // don't need to).
321
322    #[test]
323    fn push_op() {
324        let mut chunk = Chunk::default();
325        let idx = chunk.push_op(Op::Add, dummy_span());
326        assert_eq!(*chunk.code.last().unwrap(), Op::Add as u8);
327        assert_eq!(chunk.code[idx], Op::Add as u8);
328    }
329
330    #[test]
331    fn push_op_with_arg() {
332        let mut chunk = Chunk::default();
333        let mut idx = chunk.push_op(Op::Constant, dummy_span());
334        chunk.push_uvarint(42);
335
336        assert_eq!(chunk.code[idx], Op::Constant as u8);
337
338        idx += 1;
339        let (arg, size) = chunk.read_uvarint(idx);
340        assert_eq!(idx + size, chunk.code.len());
341        assert_eq!(arg, 42);
342    }
343
344    #[test]
345    fn push_jump() {
346        let mut chunk = Chunk::default();
347
348        chunk.push_op(Op::Constant, dummy_span());
349        chunk.push_uvarint(0);
350
351        let idx = chunk.push_op(Op::Jump, dummy_span());
352        chunk.push_u16(0);
353
354        chunk.push_op(Op::Constant, dummy_span());
355        chunk.push_uvarint(1);
356
357        chunk.patch_jump(idx);
358        chunk.push_op(Op::Return, dummy_span());
359
360        #[rustfmt::skip]
361        let expected: Vec<u8> = vec![
362            Op::Constant as u8, 0,
363            Op::Jump as u8, 2, 0,
364            Op::Constant as u8, 1,
365            Op::Return as u8,
366        ];
367
368        assert_eq!(chunk.code, expected);
369    }
370
371    #[test]
372    fn op_count() {
373        let mut chunk = Chunk::default();
374        assert_eq!(chunk.op_count(), 0);
375
376        chunk.push_op(Op::Constant, dummy_span());
377        chunk.push_uvarint(0);
378
379        let idx = chunk.push_op(Op::Jump, dummy_span());
380        chunk.push_u16(0);
381
382        chunk.push_op(Op::Constant, dummy_span());
383        chunk.push_uvarint(1);
384
385        chunk.patch_jump(idx);
386        chunk.push_op(Op::Return, dummy_span());
387
388        assert_eq!(chunk.op_count(), 4);
389        assert_eq!(chunk.code.len(), 8);
390    }
391}