codemap/
lib.rs

1//! A data structure for tracking source positions in language implementations, inspired by the
2//! [CodeMap type in rustc's libsyntax](https://github.com/rust-lang/rust/blob/master/src/libsyntax/codemap.rs).
3//!
4//! The `CodeMap` tracks all source files and maps positions within them to linear indexes as if all
5//! source files were concatenated. This allows a source position to be represented by a small
6//! 32-bit `Pos` indexing into the `CodeMap`, under the assumption that the total amount of parsed
7//! source code will not exceed 4GiB. The `CodeMap` can look up the source file, line, and column
8//! of a `Pos` or `Span`, as well as provide source code snippets for error reporting.
9//!
10//! # Example
11//! ```
12//! use codemap::CodeMap;
13//! let mut codemap = CodeMap::new();
14//! let file = codemap.add_file("test.rs".to_string(), "fn test(){\n    println!(\"Hello\");\n}\n".to_string());
15//! let string_literal_span = file.span.subspan(24, 31);
16//!
17//! let location = codemap.look_up_span(string_literal_span);
18//! assert_eq!(location.file.name(), "test.rs");
19//! assert_eq!(location.begin.line, 1);
20//! assert_eq!(location.begin.column, 13);
21//! assert_eq!(location.end.line, 1);
22//! assert_eq!(location.end.column, 20);
23//! ```
24
25use std::cmp::{self, Ordering};
26use std::fmt;
27use std::hash::{Hash, Hasher};
28use std::ops::{Add, Deref, Sub};
29use std::sync::Arc;
30
31/// A small, `Copy`, value representing a position in a `CodeMap`'s file.
32#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Debug)]
33pub struct Pos(u32);
34
35impl Add<u64> for Pos {
36    type Output = Pos;
37    fn add(self, other: u64) -> Pos {
38        Pos(self.0 + other as u32)
39    }
40}
41
42impl Sub<Pos> for Pos {
43    type Output = u64;
44    fn sub(self, other: Pos) -> u64 {
45        (self.0 - other.0) as u64
46    }
47}
48
49/// A range of text within a CodeMap.
50#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
51pub struct Span {
52    /// The position in the codemap representing the first byte of the span.
53    low: Pos,
54
55    /// The position after the last byte of the span.
56    high: Pos,
57}
58
59impl Span {
60    /// Makes a span from offsets relative to the start of this span.
61    ///
62    /// # Panics
63    ///   * If `end < begin`
64    ///   * If `end` is beyond the length of the span
65    pub fn subspan(&self, begin: u64, end: u64) -> Span {
66        assert!(end >= begin);
67        assert!(self.low + end <= self.high);
68        Span {
69            low: self.low + begin,
70            high: self.low + end,
71        }
72    }
73
74    /// Checks if a span is contained within this span.
75    pub fn contains(&self, other: Span) -> bool {
76        self.low <= other.low && self.high >= other.high
77    }
78
79    /// The position in the codemap representing the first byte of the span.
80    pub fn low(&self) -> Pos {
81        self.low
82    }
83
84    /// The position after the last byte of the span.
85    pub fn high(&self) -> Pos {
86        self.high
87    }
88
89    /// The length in bytes of the text of the span
90    pub fn len(&self) -> u64 {
91        self.high - self.low
92    }
93
94    /// Create a span that encloses both `self` and `other`.
95    pub fn merge(&self, other: Span) -> Span {
96        Span {
97            low: cmp::min(self.low, other.low),
98            high: cmp::max(self.high, other.high),
99        }
100    }
101}
102
103/// Associate a Span with a value of arbitrary type (e.g. an AST node).
104#[derive(Clone, PartialEq, Eq, Hash, Debug, Copy)]
105pub struct Spanned<T> {
106    pub node: T,
107    pub span: Span,
108}
109
110impl<T> Spanned<T> {
111    /// Maps a `Spanned<T>` to `Spanned<U>` by applying the function to the node,
112    /// leaving the span untouched.
113    pub fn map_node<U, F: FnOnce(T) -> U>(self, op: F) -> Spanned<U> {
114        Spanned {
115            node: op(self.node),
116            span: self.span
117        }
118    }
119}
120
121impl<T> Deref for Spanned<T> {
122    type Target = T;
123
124    fn deref(&self) -> &T {
125        &self.node
126    }
127}
128
129/// A data structure recording source code files for position lookup.
130#[derive(Default, Debug)]
131pub struct CodeMap {
132    files: Vec<Arc<File>>,
133}
134
135impl CodeMap {
136    /// Creates an empty `CodeMap`.
137    pub fn new() -> CodeMap {
138        Default::default()
139    }
140
141    /// Adds a file with the given name and contents.
142    ///
143    /// Use the returned `File` and its `.span` property to create `Spans`
144    /// representing substrings of the file.
145    pub fn add_file(&mut self, name: String, source: String) -> Arc<File> {
146        let low = self.end_pos() + 1;
147        let high = low + source.len() as u64;
148        let mut lines = vec![low];
149        lines.extend(
150            source
151                .match_indices('\n')
152                .map(|(p, _)| low + (p + 1) as u64),
153        );
154
155        let file = Arc::new(File {
156            span: Span { low, high },
157            name,
158            source,
159            lines,
160        });
161
162        self.files.push(file.clone());
163        file
164    }
165
166    fn end_pos(&self) -> Pos {
167        self.files.last().map(|x| x.span.high).unwrap_or(Pos(0))
168    }
169
170    /// Looks up the `File` that contains the specified position.
171    pub fn find_file(&self, pos: Pos) -> &Arc<File> {
172        self.files
173            .binary_search_by(|file| {
174                if file.span.high < pos {
175                    Ordering::Less
176                } else if file.span.low > pos {
177                    Ordering::Greater
178                } else {
179                    Ordering::Equal
180                }
181            })
182            .ok()
183            .map(|i| &self.files[i])
184            .expect("Mapping unknown source location")
185    }
186
187    /// Gets the file, line, and column represented by a `Pos`.
188    pub fn look_up_pos(&self, pos: Pos) -> Loc {
189        let file = self.find_file(pos);
190        let position = file.find_line_col(pos);
191        Loc {
192            file: file.clone(),
193            position,
194        }
195    }
196
197    /// Gets the file and its line and column ranges represented by a `Span`.
198    pub fn look_up_span(&self, span: Span) -> SpanLoc {
199        let file = self.find_file(span.low);
200        let begin = file.find_line_col(span.low);
201        let end = file.find_line_col(span.high);
202        SpanLoc {
203            file: file.clone(),
204            begin,
205            end,
206        }
207    }
208}
209
210/// A `CodeMap`'s record of a source file.
211pub struct File {
212    /// The span representing the entire file.
213    pub span: Span,
214
215    /// The filename as it would be displayed in an error message.
216    name: String,
217
218    /// Contents of the file.
219    source: String,
220
221    /// Byte positions of line beginnings.
222    lines: Vec<Pos>,
223}
224
225impl File {
226    /// Gets the name of the file
227    pub fn name(&self) -> &str {
228        &self.name
229    }
230
231    /// Gets the line number of a Pos.
232    ///
233    /// The lines are 0-indexed (first line is numbered 0)
234    ///
235    /// # Panics
236    ///
237    ///  * If `pos` is not within this file's span
238    pub fn find_line(&self, pos: Pos) -> usize {
239        assert!(pos >= self.span.low);
240        assert!(pos <= self.span.high);
241        match self.lines.binary_search(&pos) {
242            Ok(i) => i,
243            Err(i) => i - 1,
244        }
245    }
246
247    /// Gets the line and column of a Pos.
248    ///
249    /// # Panics
250    ///
251    /// * If `pos` is not with this file's span
252    /// * If `pos` points to a byte in the middle of a UTF-8 character
253    pub fn find_line_col(&self, pos: Pos) -> LineCol {
254        let line = self.find_line(pos);
255        let line_span = self.line_span(line);
256        let byte_col = pos - line_span.low;
257        let column = self.source_slice(line_span)[..byte_col as usize]
258            .chars()
259            .count();
260
261        LineCol { line, column }
262    }
263
264    /// Gets the full source text of the file
265    pub fn source(&self) -> &str {
266        &self.source
267    }
268
269    /// Gets the source text of a Span.
270    ///
271    /// # Panics
272    ///
273    ///   * If `span` is not entirely within this file.
274    pub fn source_slice(&self, span: Span) -> &str {
275        assert!(self.span.contains(span));
276        &self.source[((span.low - self.span.low) as usize)..((span.high - self.span.low) as usize)]
277    }
278
279    /// Gets the span representing a line by line number.
280    ///
281    /// The line number is 0-indexed (first line is numbered 0). The returned span includes the
282    /// line terminator.
283    ///
284    /// # Panics
285    ///
286    ///  * If the line number is out of range
287    pub fn line_span(&self, line: usize) -> Span {
288        assert!(line < self.lines.len());
289        Span {
290            low: self.lines[line],
291            high: *self.lines.get(line + 1).unwrap_or(&self.span.high),
292        }
293    }
294
295    /// Gets the source text of a line.
296    ///
297    /// The string returned does not include the terminating \r or \n characters.
298    ///
299    /// # Panics
300    ///
301    ///  * If the line number is out of range
302    pub fn source_line(&self, line: usize) -> &str {
303        self.source_slice(self.line_span(line))
304            .trim_end_matches(&['\n', '\r'][..])
305    }
306
307    /// Gets the number of lines in the file
308    pub fn num_lines(&self) -> usize {
309        self.lines.len()
310    }
311}
312
313impl fmt::Debug for File {
314    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
315        write!(f, "File({:?})", self.name)
316    }
317}
318
319impl PartialEq for File {
320    /// Compares by identity
321    fn eq(&self, other: &File) -> bool {
322        self as *const _ == other as *const _
323    }
324}
325
326impl Eq for File {}
327
328impl Hash for File {
329    fn hash<H: Hasher>(&self, hasher: &mut H) {
330        self.span.hash(hasher);
331    }
332}
333
334/// A line and column.
335#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
336pub struct LineCol {
337    /// The line number within the file (0-indexed).
338    pub line: usize,
339
340    /// The column within the line (0-indexed).
341    pub column: usize,
342}
343
344/// A file, and a line and column within it.
345#[derive(Clone, Eq, PartialEq, Debug)]
346pub struct Loc {
347    pub file: Arc<File>,
348    pub position: LineCol,
349}
350
351impl fmt::Display for Loc {
352    /// Formats the location as `filename:line:column`, with a 1-indexed
353    /// line and column.
354    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
355        write!(
356            f,
357            "{}:{}:{}",
358            self.file.name,
359            self.position.line + 1,
360            self.position.column + 1
361        )
362    }
363}
364
365/// A file, and a line and column range within it.
366#[derive(Clone, Eq, PartialEq, Debug)]
367pub struct SpanLoc {
368    pub file: Arc<File>,
369    pub begin: LineCol,
370    pub end: LineCol,
371}
372
373impl fmt::Display for SpanLoc {
374    /// Formats the span as `filename:start_line:start_column: end_line:end_column`,
375    /// or if the span is zero-length, `filename:line:column`, with a 1-indexed line and column.
376    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
377        if self.begin == self.end {
378            write!(
379                f,
380                "{}:{}:{}",
381                self.file.name,
382                self.begin.line + 1,
383                self.begin.column + 1
384            )
385        } else {
386            write!(
387                f,
388                "{}:{}:{}: {}:{}",
389                self.file.name,
390                self.begin.line + 1,
391                self.begin.column + 1,
392                self.end.line + 1,
393                self.end.column + 1
394            )
395        }
396    }
397}
398
399#[test]
400fn test_codemap() {
401    let mut codemap = CodeMap::new();
402    let f1 = codemap.add_file("test1.rs".to_string(), "abcd\nefghij\nqwerty".to_string());
403    let f2 = codemap.add_file("test2.rs".to_string(), "foo\nbar".to_string());
404
405    assert_eq!(codemap.find_file(f1.span.low()).name(), "test1.rs");
406    assert_eq!(codemap.find_file(f1.span.high()).name(), "test1.rs");
407    assert_eq!(codemap.find_file(f2.span.low()).name(), "test2.rs");
408    assert_eq!(codemap.find_file(f2.span.high()).name(), "test2.rs");
409
410    let x = f1.span.subspan(5, 10);
411    let f = codemap.find_file(x.low);
412    assert_eq!(f.name, "test1.rs");
413    assert_eq!(
414        f.find_line_col(f.span.low()),
415        LineCol { line: 0, column: 0 }
416    );
417    assert_eq!(
418        f.find_line_col(f.span.low() + 4),
419        LineCol { line: 0, column: 4 }
420    );
421    assert_eq!(
422        f.find_line_col(f.span.low() + 5),
423        LineCol { line: 1, column: 0 }
424    );
425    assert_eq!(
426        f.find_line_col(f.span.low() + 16),
427        LineCol { line: 2, column: 4 }
428    );
429
430    let x = f2.span.subspan(4, 7);
431    assert_eq!(codemap.find_file(x.low()).name(), "test2.rs");
432    assert_eq!(codemap.find_file(x.high()).name(), "test2.rs");
433}
434
435#[test]
436fn test_issue2() {
437    let mut codemap = CodeMap::new();
438    let content = "a \nxyz\r\n";
439    let file = codemap.add_file("<test>".to_owned(), content.to_owned());
440
441    let span = file.span.subspan(2, 3);
442    assert_eq!(
443        codemap.look_up_span(span),
444        SpanLoc {
445            file: file.clone(),
446            begin: LineCol { line: 0, column: 2 },
447            end: LineCol { line: 1, column: 0 }
448        }
449    );
450
451    assert_eq!(file.source_line(0), "a ");
452    assert_eq!(file.source_line(1), "xyz");
453    assert_eq!(file.source_line(2), "");
454}
455
456#[test]
457fn test_multibyte() {
458    let mut codemap = CodeMap::new();
459    let content = "65°00′N 18°00′W 汉语\n🔬";
460    let file = codemap.add_file("<test>".to_owned(), content.to_owned());
461
462    assert_eq!(
463        codemap.look_up_pos(file.span.low() + 21),
464        Loc {
465            file: file.clone(),
466            position: LineCol {
467                line: 0,
468                column: 15
469            }
470        }
471    );
472    assert_eq!(
473        codemap.look_up_pos(file.span.low() + 28),
474        Loc {
475            file: file.clone(),
476            position: LineCol {
477                line: 0,
478                column: 18
479            }
480        }
481    );
482    assert_eq!(
483        codemap.look_up_pos(file.span.low() + 33),
484        Loc {
485            file: file.clone(),
486            position: LineCol { line: 1, column: 1 }
487        }
488    );
489}