• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! See [`Parser`].
2 
3 use std::cell::Cell;
4 
5 use drop_bomb::DropBomb;
6 use limit::Limit;
7 
8 use crate::{
9     event::Event,
10     input::Input,
11     SyntaxKind::{self, EOF, ERROR, TOMBSTONE},
12     TokenSet, T,
13 };
14 
15 /// `Parser` struct provides the low-level API for
16 /// navigating through the stream of tokens and
17 /// constructing the parse tree. The actual parsing
18 /// happens in the [`grammar`](super::grammar) module.
19 ///
20 /// However, the result of this `Parser` is not a real
21 /// tree, but rather a flat stream of events of the form
22 /// "start expression, consume number literal,
23 /// finish expression". See `Event` docs for more.
24 pub(crate) struct Parser<'t> {
25     inp: &'t Input,
26     pos: usize,
27     events: Vec<Event>,
28     steps: Cell<u32>,
29 }
30 
31 static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000);
32 
33 impl<'t> Parser<'t> {
new(inp: &'t Input) -> Parser<'t>34     pub(super) fn new(inp: &'t Input) -> Parser<'t> {
35         Parser { inp, pos: 0, events: Vec::new(), steps: Cell::new(0) }
36     }
37 
finish(self) -> Vec<Event>38     pub(crate) fn finish(self) -> Vec<Event> {
39         self.events
40     }
41 
42     /// Returns the kind of the current token.
43     /// If parser has already reached the end of input,
44     /// the special `EOF` kind is returned.
current(&self) -> SyntaxKind45     pub(crate) fn current(&self) -> SyntaxKind {
46         self.nth(0)
47     }
48 
49     /// Lookahead operation: returns the kind of the next nth
50     /// token.
nth(&self, n: usize) -> SyntaxKind51     pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
52         assert!(n <= 3);
53 
54         let steps = self.steps.get();
55         assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck");
56         self.steps.set(steps + 1);
57 
58         self.inp.kind(self.pos + n)
59     }
60 
61     /// Checks if the current token is `kind`.
at(&self, kind: SyntaxKind) -> bool62     pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
63         self.nth_at(0, kind)
64     }
65 
nth_at(&self, n: usize, kind: SyntaxKind) -> bool66     pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
67         match kind {
68             T![-=] => self.at_composite2(n, T![-], T![=]),
69             T![->] => self.at_composite2(n, T![-], T![>]),
70             T![::] => self.at_composite2(n, T![:], T![:]),
71             T![!=] => self.at_composite2(n, T![!], T![=]),
72             T![..] => self.at_composite2(n, T![.], T![.]),
73             T![*=] => self.at_composite2(n, T![*], T![=]),
74             T![/=] => self.at_composite2(n, T![/], T![=]),
75             T![&&] => self.at_composite2(n, T![&], T![&]),
76             T![&=] => self.at_composite2(n, T![&], T![=]),
77             T![%=] => self.at_composite2(n, T![%], T![=]),
78             T![^=] => self.at_composite2(n, T![^], T![=]),
79             T![+=] => self.at_composite2(n, T![+], T![=]),
80             T![<<] => self.at_composite2(n, T![<], T![<]),
81             T![<=] => self.at_composite2(n, T![<], T![=]),
82             T![==] => self.at_composite2(n, T![=], T![=]),
83             T![=>] => self.at_composite2(n, T![=], T![>]),
84             T![>=] => self.at_composite2(n, T![>], T![=]),
85             T![>>] => self.at_composite2(n, T![>], T![>]),
86             T![|=] => self.at_composite2(n, T![|], T![=]),
87             T![||] => self.at_composite2(n, T![|], T![|]),
88 
89             T![...] => self.at_composite3(n, T![.], T![.], T![.]),
90             T![..=] => self.at_composite3(n, T![.], T![.], T![=]),
91             T![<<=] => self.at_composite3(n, T![<], T![<], T![=]),
92             T![>>=] => self.at_composite3(n, T![>], T![>], T![=]),
93 
94             _ => self.inp.kind(self.pos + n) == kind,
95         }
96     }
97 
98     /// Consume the next token if `kind` matches.
eat(&mut self, kind: SyntaxKind) -> bool99     pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
100         if !self.at(kind) {
101             return false;
102         }
103         let n_raw_tokens = match kind {
104             T![-=]
105             | T![->]
106             | T![::]
107             | T![!=]
108             | T![..]
109             | T![*=]
110             | T![/=]
111             | T![&&]
112             | T![&=]
113             | T![%=]
114             | T![^=]
115             | T![+=]
116             | T![<<]
117             | T![<=]
118             | T![==]
119             | T![=>]
120             | T![>=]
121             | T![>>]
122             | T![|=]
123             | T![||] => 2,
124 
125             T![...] | T![..=] | T![<<=] | T![>>=] => 3,
126             _ => 1,
127         };
128         self.do_bump(kind, n_raw_tokens);
129         true
130     }
131 
at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool132     fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool {
133         self.inp.kind(self.pos + n) == k1
134             && self.inp.kind(self.pos + n + 1) == k2
135             && self.inp.is_joint(self.pos + n)
136     }
137 
at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool138     fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
139         self.inp.kind(self.pos + n) == k1
140             && self.inp.kind(self.pos + n + 1) == k2
141             && self.inp.kind(self.pos + n + 2) == k3
142             && self.inp.is_joint(self.pos + n)
143             && self.inp.is_joint(self.pos + n + 1)
144     }
145 
146     /// Checks if the current token is in `kinds`.
at_ts(&self, kinds: TokenSet) -> bool147     pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool {
148         kinds.contains(self.current())
149     }
150 
151     /// Checks if the current token is contextual keyword `kw`.
at_contextual_kw(&self, kw: SyntaxKind) -> bool152     pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool {
153         self.inp.contextual_kind(self.pos) == kw
154     }
155 
156     /// Checks if the nth token is contextual keyword `kw`.
nth_at_contextual_kw(&self, n: usize, kw: SyntaxKind) -> bool157     pub(crate) fn nth_at_contextual_kw(&self, n: usize, kw: SyntaxKind) -> bool {
158         self.inp.contextual_kind(self.pos + n) == kw
159     }
160 
161     /// Starts a new node in the syntax tree. All nodes and tokens
162     /// consumed between the `start` and the corresponding `Marker::complete`
163     /// belong to the same node.
start(&mut self) -> Marker164     pub(crate) fn start(&mut self) -> Marker {
165         let pos = self.events.len() as u32;
166         self.push_event(Event::tombstone());
167         Marker::new(pos)
168     }
169 
170     /// Consume the next token. Panics if the parser isn't currently at `kind`.
bump(&mut self, kind: SyntaxKind)171     pub(crate) fn bump(&mut self, kind: SyntaxKind) {
172         assert!(self.eat(kind));
173     }
174 
175     /// Advances the parser by one token
bump_any(&mut self)176     pub(crate) fn bump_any(&mut self) {
177         let kind = self.nth(0);
178         if kind == EOF {
179             return;
180         }
181         self.do_bump(kind, 1);
182     }
183 
184     /// Advances the parser by one token
split_float(&mut self, mut marker: Marker) -> (bool, Marker)185     pub(crate) fn split_float(&mut self, mut marker: Marker) -> (bool, Marker) {
186         assert!(self.at(SyntaxKind::FLOAT_NUMBER));
187         // we have parse `<something>.`
188         // `<something>`.0.1
189         // here we need to insert an extra event
190         //
191         // `<something>`. 0. 1;
192         // here we need to change the follow up parse, the return value will cause us to emulate a dot
193         // the actual splitting happens later
194         let ends_in_dot = !self.inp.is_joint(self.pos);
195         if !ends_in_dot {
196             let new_marker = self.start();
197             let idx = marker.pos as usize;
198             match &mut self.events[idx] {
199                 Event::Start { forward_parent, kind } => {
200                     *kind = SyntaxKind::FIELD_EXPR;
201                     *forward_parent = Some(new_marker.pos - marker.pos);
202                 }
203                 _ => unreachable!(),
204             }
205             marker.bomb.defuse();
206             marker = new_marker;
207         };
208         self.pos += 1;
209         self.push_event(Event::FloatSplitHack { ends_in_dot });
210         (ends_in_dot, marker)
211     }
212 
213     /// Advances the parser by one token, remapping its kind.
214     /// This is useful to create contextual keywords from
215     /// identifiers. For example, the lexer creates a `union`
216     /// *identifier* token, but the parser remaps it to the
217     /// `union` keyword, and keyword is what ends up in the
218     /// final tree.
bump_remap(&mut self, kind: SyntaxKind)219     pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
220         if self.nth(0) == EOF {
221             // FIXME: panic!?
222             return;
223         }
224         self.do_bump(kind, 1);
225     }
226 
227     /// Emit error with the `message`
228     /// FIXME: this should be much more fancy and support
229     /// structured errors with spans and notes, like rustc
230     /// does.
error<T: Into<String>>(&mut self, message: T)231     pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
232         let msg = message.into();
233         self.push_event(Event::Error { msg });
234     }
235 
236     /// Consume the next token if it is `kind` or emit an error
237     /// otherwise.
expect(&mut self, kind: SyntaxKind) -> bool238     pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
239         if self.eat(kind) {
240             return true;
241         }
242         self.error(format!("expected {kind:?}"));
243         false
244     }
245 
246     /// Create an error node and consume the next token.
err_and_bump(&mut self, message: &str)247     pub(crate) fn err_and_bump(&mut self, message: &str) {
248         self.err_recover(message, TokenSet::EMPTY);
249     }
250 
251     /// Create an error node and consume the next token.
err_recover(&mut self, message: &str, recovery: TokenSet)252     pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) {
253         match self.current() {
254             T!['{'] | T!['}'] => {
255                 self.error(message);
256                 return;
257             }
258             _ => (),
259         }
260 
261         if self.at_ts(recovery) {
262             self.error(message);
263             return;
264         }
265 
266         let m = self.start();
267         self.error(message);
268         self.bump_any();
269         m.complete(self, ERROR);
270     }
271 
do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8)272     fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
273         self.pos += n_raw_tokens as usize;
274         self.steps.set(0);
275         self.push_event(Event::Token { kind, n_raw_tokens });
276     }
277 
push_event(&mut self, event: Event)278     fn push_event(&mut self, event: Event) {
279         self.events.push(event);
280     }
281 }
282 
283 /// See [`Parser::start`].
284 pub(crate) struct Marker {
285     pos: u32,
286     bomb: DropBomb,
287 }
288 
289 impl Marker {
new(pos: u32) -> Marker290     fn new(pos: u32) -> Marker {
291         Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") }
292     }
293 
294     /// Finishes the syntax tree node and assigns `kind` to it,
295     /// and mark the create a `CompletedMarker` for possible future
296     /// operation like `.precede()` to deal with forward_parent.
complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker297     pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker {
298         self.bomb.defuse();
299         let idx = self.pos as usize;
300         match &mut p.events[idx] {
301             Event::Start { kind: slot, .. } => {
302                 *slot = kind;
303             }
304             _ => unreachable!(),
305         }
306         p.push_event(Event::Finish);
307         CompletedMarker::new(self.pos, kind)
308     }
309 
310     /// Abandons the syntax tree node. All its children
311     /// are attached to its parent instead.
abandon(mut self, p: &mut Parser<'_>)312     pub(crate) fn abandon(mut self, p: &mut Parser<'_>) {
313         self.bomb.defuse();
314         let idx = self.pos as usize;
315         if idx == p.events.len() - 1 {
316             match p.events.pop() {
317                 Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (),
318                 _ => unreachable!(),
319             }
320         }
321     }
322 }
323 
324 pub(crate) struct CompletedMarker {
325     pos: u32,
326     kind: SyntaxKind,
327 }
328 
329 impl CompletedMarker {
new(pos: u32, kind: SyntaxKind) -> Self330     fn new(pos: u32, kind: SyntaxKind) -> Self {
331         CompletedMarker { pos, kind }
332     }
333 
334     /// This method allows to create a new node which starts
335     /// *before* the current one. That is, parser could start
336     /// node `A`, then complete it, and then after parsing the
337     /// whole `A`, decide that it should have started some node
338     /// `B` before starting `A`. `precede` allows to do exactly
339     /// that. See also docs about
340     /// [`Event::Start::forward_parent`](crate::event::Event::Start::forward_parent).
341     ///
342     /// Given completed events `[START, FINISH]` and its corresponding
343     /// `CompletedMarker(pos: 0, _)`.
344     /// Append a new `START` events as `[START, FINISH, NEWSTART]`,
345     /// then mark `NEWSTART` as `START`'s parent with saving its relative
346     /// distance to `NEWSTART` into forward_parent(=2 in this case);
precede(self, p: &mut Parser<'_>) -> Marker347     pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker {
348         let new_pos = p.start();
349         let idx = self.pos as usize;
350         match &mut p.events[idx] {
351             Event::Start { forward_parent, .. } => {
352                 *forward_parent = Some(new_pos.pos - self.pos);
353             }
354             _ => unreachable!(),
355         }
356         new_pos
357     }
358 
359     /// Extends this completed marker *to the left* up to `m`.
extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker360     pub(crate) fn extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker {
361         m.bomb.defuse();
362         let idx = m.pos as usize;
363         match &mut p.events[idx] {
364             Event::Start { forward_parent, .. } => {
365                 *forward_parent = Some(self.pos - m.pos);
366             }
367             _ => unreachable!(),
368         }
369         self
370     }
371 
kind(&self) -> SyntaxKind372     pub(crate) fn kind(&self) -> SyntaxKind {
373         self.kind
374     }
375 }
376