1use std::{collections::VecDeque, fmt};
4
5use rowan::{Checkpoint, GreenNode, GreenNodeBuilder, Language, TextRange, TextSize};
6
7use crate::{
8 tokenizer::Token,
9 NixLanguage,
10 SyntaxKind::{self, *},
11 TokenSet,
12};
13
14#[derive(Clone, Debug, PartialEq)]
16#[non_exhaustive]
17pub enum ParseError {
18 Unexpected(TextRange),
20 UnexpectedExtra(TextRange),
22 UnexpectedWanted(SyntaxKind, TextRange, Box<[SyntaxKind]>),
24 UnexpectedDoubleBind(TextRange),
26 UnexpectedEOF,
28 UnexpectedEOFWanted(Box<[SyntaxKind]>),
30 DuplicatedArgs(TextRange, String),
32 RecursionLimitExceeded,
35}
36
37impl fmt::Display for ParseError {
38 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
39 match self {
40 ParseError::Unexpected(range) => {
41 write!(
42 f,
43 "error node at {}..{}",
44 usize::from(range.start()),
45 usize::from(range.end())
46 )
47 }
48 ParseError::UnexpectedExtra(range) => {
49 write!(
50 f,
51 "unexpected token at {}..{}",
52 usize::from(range.start()),
53 usize::from(range.end())
54 )
55 }
56 ParseError::UnexpectedWanted(got, range, kinds) => write!(
57 f,
58 "unexpected {:?} at {}..{}, wanted any of {:?}",
59 got,
60 usize::from(range.start()),
61 usize::from(range.end()),
62 kinds
63 ),
64 ParseError::UnexpectedDoubleBind(range) => {
65 write!(
66 f,
67 "unexpected double bind at {}..{}",
68 usize::from(range.start()),
69 usize::from(range.end())
70 )
71 }
72 ParseError::UnexpectedEOF => write!(f, "unexpected end of file"),
73 ParseError::UnexpectedEOFWanted(kinds) => {
74 write!(f, "unexpected end of file, wanted any of {:?}", kinds)
75 }
76 ParseError::DuplicatedArgs(range, ident) => {
77 write!(
78 f,
79 "argument `{}` is duplicated in {}..{}",
80 ident,
81 usize::from(range.start()),
82 usize::from(range.end())
83 )
84 }
85 ParseError::RecursionLimitExceeded => write!(f, "recursion limit exceeded"),
86 }
87 }
88}
89
90impl std::error::Error for ParseError {}
91
92struct Parser<'a, I>
93where
94 I: Iterator<Item = Token<'a>>,
95{
96 builder: GreenNodeBuilder<'static>,
97 errors: Vec<ParseError>,
98
99 trivia_buffer: Vec<Token<'a>>,
100 buffer: VecDeque<Token<'a>>,
101 iter: I,
102 consumed: TextSize,
103
104 depth: u32,
107}
108impl<'a, I> Parser<'a, I>
109where
110 I: Iterator<Item = Token<'a>>,
111{
112 fn new(iter: I) -> Self {
113 Self {
114 builder: GreenNodeBuilder::new(),
115 errors: Vec::new(),
116
117 trivia_buffer: Vec::with_capacity(1),
118 buffer: VecDeque::with_capacity(1),
119 iter,
120 consumed: TextSize::from(0),
121
122 depth: 0,
123 }
124 }
125
126 fn get_text_position(&self) -> TextSize {
127 self.consumed
128 }
129
130 fn peek_raw(&mut self) -> Option<&Token<'a>> {
131 if self.buffer.is_empty() {
132 if let Some(token) = self.iter.next() {
133 self.buffer.push_back(token);
134 }
135 }
136 self.buffer.front()
137 }
138 fn drain_trivia_buffer(&mut self) {
139 for (t, s) in self.trivia_buffer.drain(..) {
140 self.consumed += TextSize::of(s);
141 self.builder.token(NixLanguage::kind_to_raw(t), s);
142 }
143 }
144 fn eat_trivia(&mut self) {
145 self.peek();
146 self.drain_trivia_buffer();
147 }
148 fn start_node(&mut self, kind: SyntaxKind) {
149 self.eat_trivia();
150 self.builder.start_node(NixLanguage::kind_to_raw(kind));
151 }
152 fn checkpoint(&mut self) -> Checkpoint {
153 self.eat_trivia();
154 self.builder.checkpoint()
155 }
156 fn start_node_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
157 self.builder.start_node_at(checkpoint, NixLanguage::kind_to_raw(kind));
158 }
159 fn finish_node(&mut self) {
160 self.builder.finish_node();
161 }
162 fn start_error_node(&mut self) -> TextSize {
163 self.start_node(NODE_ERROR);
164 self.get_text_position()
165 }
166 fn finish_error_node(&mut self) -> TextSize {
167 self.finish_node();
168 self.get_text_position()
169 }
170 fn bump(&mut self) {
171 match self.try_next() {
172 Some((token, s)) => {
173 if token.is_trivia() {
174 self.trivia_buffer.push((token, s))
175 } else {
176 self.drain_trivia_buffer();
177 self.manual_bump(s, token);
178 }
179 }
180 None => self.errors.push(ParseError::UnexpectedEOF),
181 }
182 }
183 fn try_next(&mut self) -> Option<Token<'a>> {
184 self.buffer.pop_front().or_else(|| self.iter.next())
185 }
186 fn manual_bump(&mut self, s: &str, token: SyntaxKind) {
187 self.consumed += TextSize::of(s);
188 self.builder.token(NixLanguage::kind_to_raw(token), s)
189 }
190 fn peek_data(&mut self) -> Option<&Token<'a>> {
191 while self.peek_raw().map(|&(t, _)| t.is_trivia()).unwrap_or(false) {
192 self.bump();
193 }
194 self.peek_raw()
195 }
196 fn peek(&mut self) -> Option<SyntaxKind> {
197 self.peek_data().map(|&(t, _)| t)
198 }
199 fn expect_peek_any(&mut self, allowed_slice: &[SyntaxKind]) -> Option<SyntaxKind> {
200 let allowed = TokenSet::from_slice(allowed_slice);
201
202 let next = match self.peek() {
203 None => None,
204 Some(kind) if allowed.contains(kind) => Some(kind),
205 Some(kind) => {
206 let start = self.start_error_node();
207 loop {
208 self.bump();
209 if self.peek().map(|kind| allowed.contains(kind)).unwrap_or(true) {
210 break;
211 }
212 }
213 let end = self.finish_error_node();
214 self.errors.push(ParseError::UnexpectedWanted(
215 kind,
216 TextRange::new(start, end),
217 allowed_slice.to_vec().into_boxed_slice(),
218 ));
219
220 self.peek()
221 }
222 };
223 if next.is_none() {
224 self.errors
225 .push(ParseError::UnexpectedEOFWanted(allowed_slice.to_vec().into_boxed_slice()));
226 }
227 next
228 }
229 fn expect(&mut self, expected: SyntaxKind) {
230 if self.expect_peek_any(&[expected]).is_some() {
231 self.bump();
232 }
233 }
234
235 fn expect_ident(&mut self) {
236 if self.expect_peek_any(&[TOKEN_IDENT]).is_some() {
237 self.start_node(NODE_IDENT);
238 self.bump();
239 self.finish_node()
240 }
241 }
242
243 fn parse_dynamic(&mut self) {
244 self.start_node(NODE_DYNAMIC);
245 self.bump();
246 while self.peek().map(|t| t != TOKEN_INTERPOL_END).unwrap_or(false) {
247 self.parse_expr();
248 }
249 self.bump();
250 self.finish_node();
251 }
252
253 fn parse_string(&mut self) {
254 self.start_node(NODE_STRING);
255 self.expect(TOKEN_STRING_START);
256
257 loop {
258 match self.expect_peek_any(&[
259 TOKEN_STRING_END,
260 TOKEN_STRING_CONTENT,
261 TOKEN_INTERPOL_START,
262 ]) {
263 Some(TOKEN_STRING_CONTENT) => self.bump(),
264 Some(TOKEN_INTERPOL_START) => {
265 self.start_node(NODE_INTERPOL);
266 self.bump();
267 self.parse_expr();
268 self.expect(TOKEN_INTERPOL_END);
269 self.finish_node();
270 }
271 _ => break,
273 }
274 }
275 self.expect(TOKEN_STRING_END);
276
277 self.finish_node();
278 }
279 fn parse_attr(&mut self) {
280 match self.peek() {
281 Some(TOKEN_INTERPOL_START) => self.parse_dynamic(),
282 Some(TOKEN_STRING_START) => self.parse_string(),
283 _ => {
284 if self.expect_peek_any(&[TOKEN_IDENT, TOKEN_OR]).is_some() {
285 self.start_node(NODE_IDENT);
286 let (_, s) = self.try_next().unwrap();
287 self.manual_bump(s, TOKEN_IDENT);
288 self.finish_node()
289 }
290 }
291 }
292 }
293 fn parse_attrpath(&mut self) {
294 self.start_node(NODE_ATTRPATH);
295 loop {
296 self.parse_attr();
297
298 if self.peek() == Some(T![.]) {
299 self.bump();
300 } else {
301 break;
302 }
303 }
304 self.finish_node();
305 }
306 fn parse_pattern(&mut self, bound: bool) {
307 if self.peek().map(|t| t == T!['}']).unwrap_or(true) {
308 self.bump();
309 } else {
310 loop {
311 match self.expect_peek_any(&[T!['}'], T![...], TOKEN_IDENT]) {
312 Some(T!['}']) => {
313 self.bump();
314 break;
315 }
316 Some(T![...]) => {
317 self.bump();
318 self.expect(T!['}']);
319 break;
320 }
321 Some(TOKEN_IDENT) => {
322 self.start_node(NODE_PAT_ENTRY);
323 self.expect_ident();
324 if let Some(T![?]) = self.peek() {
325 self.bump();
326 self.parse_expr();
327 }
328 self.finish_node();
329 match self.peek() {
330 Some(T![,]) => self.bump(),
331 _ => {
332 self.expect(T!['}']);
333 break;
334 }
335 }
336 }
337 _ => break,
339 }
340 }
341 }
342
343 if self.peek() == Some(T![@]) {
344 let kind = if bound { NODE_ERROR } else { NODE_PAT_BIND };
345 self.start_node(kind);
346 let start = self.get_text_position();
347 self.bump();
348 self.expect_ident();
349 let end = self.finish_error_node();
350 if bound {
351 self.errors.push(ParseError::UnexpectedDoubleBind(TextRange::new(start, end)));
352 }
353 }
354 }
355 fn parse_set(&mut self, until: SyntaxKind) {
356 loop {
357 match self.peek() {
358 None => break,
359 token if token == Some(until) => break,
360 Some(T![inherit]) => {
361 self.start_node(NODE_INHERIT);
362 self.bump();
363
364 if self.peek() == Some(T!['(']) {
365 self.start_node(NODE_INHERIT_FROM);
366 self.bump();
367 self.parse_expr();
368 self.expect(T![')']);
369 self.finish_node();
370 }
371
372 loop {
373 match self.peek() {
374 Some(t) if t != T![;] => {
375 self.parse_attr();
376 }
377 Some(_) => {
378 break;
379 }
380 None => {
381 self.errors.push(ParseError::UnexpectedEOF);
382 break;
383 }
384 }
385 }
386
387 self.expect(T![;]);
388 self.finish_node();
389 }
390 Some(_) => {
391 self.start_node(NODE_ATTRPATH_VALUE);
392 self.parse_attrpath();
393 self.expect(T![=]);
394 self.parse_expr();
395 self.expect(T![;]);
396 self.finish_node();
397 }
398 }
399 }
400 self.bump(); }
402
403 fn parse_simple(&mut self) -> Checkpoint {
404 let peek = match self.peek() {
405 Some(it) => it,
406 None => {
407 self.errors.push(ParseError::UnexpectedEOF);
408 return self.builder.checkpoint();
414 }
415 };
416 let checkpoint = self.checkpoint();
417 match peek {
418 T!['('] => {
419 self.start_node(NODE_PAREN);
420 self.bump();
421 self.parse_expr();
422 self.bump();
423 self.finish_node();
424 }
425 T![rec] => {
426 self.start_node(NODE_ATTR_SET);
427 self.bump();
428 self.expect(T!['{']);
429 self.parse_set(T!['}']);
430 self.finish_node();
431 }
432 T!['{'] => {
433 let mut peek = [None, None];
435 for i in &mut peek {
436 let mut token;
437 *i = loop {
438 token = self.iter.next();
439 let kind = token.as_ref().map(|&(t, _)| t);
440 if let Some(token) = token {
441 self.buffer.push_back(token);
442 }
443 if kind.map(|t| !t.is_trivia()).unwrap_or(true) {
444 break kind;
445 }
446 };
447 }
448
449 match peek {
450 [Some(TOKEN_IDENT), Some(T![,])]
451 | [Some(TOKEN_IDENT), Some(T![?])]
452 | [Some(TOKEN_IDENT), Some(T!['}'])]
453 | [Some(T![...]), Some(T!['}'])]
454 | [Some(T!['}']), Some(T![:])]
455 | [Some(T!['}']), Some(T![@])] => {
456 self.start_node(NODE_LAMBDA);
458
459 self.start_node(NODE_PATTERN);
460 self.bump();
461 self.parse_pattern(false);
462 self.finish_node();
463
464 self.expect(T![:]);
465 self.parse_expr();
466
467 self.finish_node();
468 }
469 _ => {
470 self.start_node(NODE_ATTR_SET);
472 self.bump();
473 self.parse_set(T!['}']);
474 self.finish_node();
475 }
476 }
477 }
478 T!['['] => {
479 self.start_node(NODE_LIST);
480 self.bump();
481 while self.peek().map(|t| t != T![']']).unwrap_or(false) {
482 self.parse_simple();
483 }
484 self.bump();
485 self.finish_node();
486 }
487 TOKEN_STRING_START => self.parse_string(),
488 TOKEN_PATH => {
489 self.start_node(NODE_PATH);
490 self.bump();
491 let is_complex_path = self.peek().map_or(false, |t| t == TOKEN_INTERPOL_START);
492 if is_complex_path {
493 loop {
494 match self.peek_raw().map(|(t, _)| t) {
495 Some(TOKEN_PATH) => self.bump(),
496 Some(TOKEN_INTERPOL_START) => {
497 self.start_node(NODE_INTERPOL);
498 self.bump();
499 self.parse_expr();
500 self.expect(TOKEN_INTERPOL_END);
501 self.finish_node();
502 }
503 _ => break,
504 }
505 }
506 }
507 self.finish_node();
508 }
509 t if t.is_literal() => {
510 self.start_node(NODE_LITERAL);
511 self.bump();
512 self.finish_node();
513 }
514 TOKEN_IDENT => {
515 self.expect_ident();
516
517 match self.peek() {
518 Some(T![:]) => {
519 self.start_node_at(checkpoint, NODE_LAMBDA);
520 self.start_node_at(checkpoint, NODE_IDENT_PARAM);
521 self.finish_node();
522 self.bump();
523 self.parse_expr();
524 self.finish_node();
525 }
526 Some(T![@]) => {
527 self.start_node_at(checkpoint, NODE_LAMBDA);
528 self.start_node_at(checkpoint, NODE_PATTERN);
529 self.start_node_at(checkpoint, NODE_PAT_BIND);
530 self.bump();
531 self.finish_node(); self.expect(T!['{']);
534 self.parse_pattern(true);
535 self.finish_node(); self.expect(T![:]);
538 self.parse_expr();
539 self.finish_node(); }
541 _ => (),
542 }
543 }
544 kind => {
545 let start = self.start_error_node();
546 self.bump();
547 let end = self.finish_error_node();
548 self.errors.push(ParseError::UnexpectedWanted(
549 kind,
550 TextRange::new(start, end),
551 [T!['('], T![rec], T!['{'], T!['['], TOKEN_STRING_START, TOKEN_IDENT]
552 .to_vec()
553 .into_boxed_slice(),
554 ));
555 }
556 };
557
558 if self.peek() == Some(T![.]) {
559 self.start_node_at(checkpoint, NODE_SELECT);
560 self.bump();
561 self.parse_attrpath();
562 if self.peek() == Some(T![or]) {
563 self.bump();
564 self.parse_simple();
565 }
566 self.finish_node();
567
568 } else if self.peek() == Some(T![or]) {
576 self.start_node_at(checkpoint, NODE_APPLY);
577 self.start_node(NODE_IDENT);
578 let (_, s) = self.try_next().unwrap();
579 self.manual_bump(s, TOKEN_IDENT);
580 self.finish_node();
581 self.finish_node();
582 }
583
584 checkpoint
585 }
586 fn parse_fn(&mut self) -> Checkpoint {
587 let checkpoint = self.parse_simple();
588
589 while self.peek().map(|t| t.is_fn_arg()).unwrap_or(false) {
590 self.start_node_at(checkpoint, NODE_APPLY);
591 self.parse_simple();
592 self.finish_node();
593 }
594 checkpoint
595 }
596 fn parse_negate(&mut self) -> Checkpoint {
597 if self.peek() == Some(T![-]) {
598 let checkpoint = self.checkpoint();
599 self.start_node(NODE_UNARY_OP);
600 self.bump();
601 self.parse_negate();
602 self.finish_node();
603 checkpoint
604 } else {
605 self.parse_fn()
606 }
607 }
608 fn parse_non_assoc(&mut self, next: fn(&mut Self) -> Checkpoint, ops: TokenSet) -> Checkpoint {
609 let checkpoint = next(self);
610 if self.peek().map(|t| ops.contains(t)).unwrap_or(false) {
611 self.start_node_at(checkpoint, NODE_BIN_OP);
612 self.bump();
613 next(self);
614 self.finish_node();
615 }
616 checkpoint
617 }
618 fn parse_left_assoc(&mut self, next: fn(&mut Self) -> Checkpoint, ops: TokenSet) -> Checkpoint {
619 let checkpoint = next(self);
620 while self.peek().map(|t| ops.contains(t)).unwrap_or(false) {
621 self.start_node_at(checkpoint, NODE_BIN_OP);
622 self.bump();
623 next(self);
624 self.finish_node();
625 }
626 checkpoint
627 }
628 fn parse_right_assoc(
629 &mut self,
630 next: fn(&mut Self) -> Checkpoint,
631 ops: TokenSet,
632 ) -> Checkpoint {
633 let checkpoint = next(self);
634 if self.peek().map(|t| ops.contains(t)).unwrap_or(false) {
635 self.start_node_at(checkpoint, NODE_BIN_OP);
636 self.bump();
637 self.parse_right_assoc(next, ops);
638 self.finish_node();
639 }
640 checkpoint
641 }
642 fn parse_hasattr(&mut self) -> Checkpoint {
643 let checkpoint = self.parse_negate();
644 while self.peek().map(|t| t == T![?]).unwrap_or(false) {
645 self.start_node_at(checkpoint, NODE_HAS_ATTR);
646 self.bump();
647 self.parse_attrpath();
648 self.finish_node();
649 }
650 checkpoint
651 }
652 fn parse_concat(&mut self) -> Checkpoint {
653 self.parse_right_assoc(Self::parse_hasattr, T![++] | ())
654 }
655 fn parse_mul(&mut self) -> Checkpoint {
656 self.parse_left_assoc(Self::parse_concat, T![*] | T![/])
657 }
658 fn parse_add(&mut self) -> Checkpoint {
659 self.parse_left_assoc(Self::parse_mul, T![+] | T![-])
660 }
661 fn parse_invert(&mut self) -> Checkpoint {
662 if self.peek() == Some(TOKEN_INVERT) {
663 let checkpoint = self.checkpoint();
664 self.start_node(NODE_UNARY_OP);
665 self.bump();
666 self.parse_invert();
667 self.finish_node();
668 checkpoint
669 } else {
670 self.parse_add()
671 }
672 }
673 fn parse_merge(&mut self) -> Checkpoint {
674 self.parse_right_assoc(Self::parse_invert, T!["//"] | ())
675 }
676 fn parse_compare(&mut self) -> Checkpoint {
677 self.parse_non_assoc(Self::parse_merge, T![<] | T![<=] | T![>] | T![>=])
678 }
679 fn parse_equal(&mut self) -> Checkpoint {
680 self.parse_non_assoc(Self::parse_compare, T![==] | T![!=])
681 }
682 fn parse_and(&mut self) -> Checkpoint {
683 self.parse_left_assoc(Self::parse_equal, T![&&] | ())
684 }
685 fn parse_or(&mut self) -> Checkpoint {
686 self.parse_left_assoc(Self::parse_and, T![||] | ())
687 }
688 fn parse_implication(&mut self) -> Checkpoint {
689 self.parse_right_assoc(Self::parse_or, T![->] | ())
690 }
691 #[inline(always)]
692 fn parse_math(&mut self) -> Checkpoint {
693 self.parse_implication()
695 }
696 pub fn parse_expr(&mut self) -> Checkpoint {
698 if self.depth >= 512 {
700 self.errors.push(ParseError::RecursionLimitExceeded);
701 self.start_error_node();
704 while self.peek().is_some() {
705 self.bump()
706 }
707 self.finish_error_node();
708 return self.checkpoint();
709 }
710 self.depth += 1;
711 let out = match self.peek() {
712 Some(T![let]) => {
713 let checkpoint = self.checkpoint();
714 self.bump();
715
716 if self.peek() == Some(T!['{']) {
717 self.start_node_at(checkpoint, NODE_LEGACY_LET);
718 self.bump();
719 self.parse_set(T!['}']);
720 self.finish_node();
721 } else {
722 self.start_node_at(checkpoint, NODE_LET_IN);
723 self.parse_set(T![in]);
724 self.parse_expr();
725 self.finish_node();
726 }
727 checkpoint
728 }
729 Some(T![with]) => {
730 let checkpoint = self.checkpoint();
731 self.start_node(NODE_WITH);
732 self.bump();
733 self.parse_expr();
734 self.expect(T![;]);
735 self.parse_expr();
736 self.finish_node();
737 checkpoint
738 }
739 Some(T![if]) => {
740 let checkpoint = self.checkpoint();
741 self.start_node(NODE_IF_ELSE);
742 self.bump();
743 self.parse_expr();
744 self.expect(T![then]);
745 self.parse_expr();
746 self.expect(TOKEN_ELSE);
747 self.parse_expr();
748 self.finish_node();
749 checkpoint
750 }
751 Some(T![assert]) => {
752 let checkpoint = self.checkpoint();
753 self.start_node(NODE_ASSERT);
754 self.bump();
755 self.parse_expr();
756 self.expect(T![;]);
757 self.parse_expr();
758 self.finish_node();
759 checkpoint
760 }
761 _ => self.parse_math(),
762 };
763 self.depth -= 1;
764 out
765 }
766}
767
768pub fn parse<'s, I>(iter: I) -> (GreenNode, Vec<ParseError>)
770where
771 I: Iterator<Item = Token<'s>>,
772{
773 let mut parser = Parser::new(iter);
774 parser.builder.start_node(NixLanguage::kind_to_raw(NODE_ROOT));
775 parser.parse_expr();
776 parser.eat_trivia();
777 if parser.peek().is_some() {
778 let start = parser.start_error_node();
779 while parser.peek().is_some() {
780 parser.bump();
781 }
782 let end = parser.finish_error_node();
783 parser.errors.push(ParseError::UnexpectedExtra(TextRange::new(start, end)));
784 parser.eat_trivia();
785 }
786 parser.builder.finish_node();
787 (parser.builder.finish(), parser.errors)
788}