1 //! See [`Parser`]. 2 3 use std::cell::Cell; 4 5 use drop_bomb::DropBomb; 6 use limit::Limit; 7 8 use crate::{ 9 event::Event, 10 input::Input, 11 SyntaxKind::{self, EOF, ERROR, TOMBSTONE}, 12 TokenSet, T, 13 }; 14 15 /// `Parser` struct provides the low-level API for 16 /// navigating through the stream of tokens and 17 /// constructing the parse tree. The actual parsing 18 /// happens in the [`grammar`](super::grammar) module. 19 /// 20 /// However, the result of this `Parser` is not a real 21 /// tree, but rather a flat stream of events of the form 22 /// "start expression, consume number literal, 23 /// finish expression". See `Event` docs for more. 24 pub(crate) struct Parser<'t> { 25 inp: &'t Input, 26 pos: usize, 27 events: Vec<Event>, 28 steps: Cell<u32>, 29 } 30 31 static PARSER_STEP_LIMIT: Limit = Limit::new(15_000_000); 32 33 impl<'t> Parser<'t> { new(inp: &'t Input) -> Parser<'t>34 pub(super) fn new(inp: &'t Input) -> Parser<'t> { 35 Parser { inp, pos: 0, events: Vec::new(), steps: Cell::new(0) } 36 } 37 finish(self) -> Vec<Event>38 pub(crate) fn finish(self) -> Vec<Event> { 39 self.events 40 } 41 42 /// Returns the kind of the current token. 43 /// If parser has already reached the end of input, 44 /// the special `EOF` kind is returned. current(&self) -> SyntaxKind45 pub(crate) fn current(&self) -> SyntaxKind { 46 self.nth(0) 47 } 48 49 /// Lookahead operation: returns the kind of the next nth 50 /// token. nth(&self, n: usize) -> SyntaxKind51 pub(crate) fn nth(&self, n: usize) -> SyntaxKind { 52 assert!(n <= 3); 53 54 let steps = self.steps.get(); 55 assert!(PARSER_STEP_LIMIT.check(steps as usize).is_ok(), "the parser seems stuck"); 56 self.steps.set(steps + 1); 57 58 self.inp.kind(self.pos + n) 59 } 60 61 /// Checks if the current token is `kind`. at(&self, kind: SyntaxKind) -> bool62 pub(crate) fn at(&self, kind: SyntaxKind) -> bool { 63 self.nth_at(0, kind) 64 } 65 nth_at(&self, n: usize, kind: SyntaxKind) -> bool66 pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool { 67 match kind { 68 T![-=] => self.at_composite2(n, T![-], T![=]), 69 T![->] => self.at_composite2(n, T![-], T![>]), 70 T![::] => self.at_composite2(n, T![:], T![:]), 71 T![!=] => self.at_composite2(n, T![!], T![=]), 72 T![..] => self.at_composite2(n, T![.], T![.]), 73 T![*=] => self.at_composite2(n, T![*], T![=]), 74 T![/=] => self.at_composite2(n, T![/], T![=]), 75 T![&&] => self.at_composite2(n, T![&], T![&]), 76 T![&=] => self.at_composite2(n, T![&], T![=]), 77 T![%=] => self.at_composite2(n, T![%], T![=]), 78 T![^=] => self.at_composite2(n, T![^], T![=]), 79 T![+=] => self.at_composite2(n, T![+], T![=]), 80 T![<<] => self.at_composite2(n, T![<], T![<]), 81 T![<=] => self.at_composite2(n, T![<], T![=]), 82 T![==] => self.at_composite2(n, T![=], T![=]), 83 T![=>] => self.at_composite2(n, T![=], T![>]), 84 T![>=] => self.at_composite2(n, T![>], T![=]), 85 T![>>] => self.at_composite2(n, T![>], T![>]), 86 T![|=] => self.at_composite2(n, T![|], T![=]), 87 T![||] => self.at_composite2(n, T![|], T![|]), 88 89 T![...] => self.at_composite3(n, T![.], T![.], T![.]), 90 T![..=] => self.at_composite3(n, T![.], T![.], T![=]), 91 T![<<=] => self.at_composite3(n, T![<], T![<], T![=]), 92 T![>>=] => self.at_composite3(n, T![>], T![>], T![=]), 93 94 _ => self.inp.kind(self.pos + n) == kind, 95 } 96 } 97 98 /// Consume the next token if `kind` matches. eat(&mut self, kind: SyntaxKind) -> bool99 pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { 100 if !self.at(kind) { 101 return false; 102 } 103 let n_raw_tokens = match kind { 104 T![-=] 105 | T![->] 106 | T![::] 107 | T![!=] 108 | T![..] 109 | T![*=] 110 | T![/=] 111 | T![&&] 112 | T![&=] 113 | T![%=] 114 | T![^=] 115 | T![+=] 116 | T![<<] 117 | T![<=] 118 | T![==] 119 | T![=>] 120 | T![>=] 121 | T![>>] 122 | T![|=] 123 | T![||] => 2, 124 125 T![...] | T![..=] | T![<<=] | T![>>=] => 3, 126 _ => 1, 127 }; 128 self.do_bump(kind, n_raw_tokens); 129 true 130 } 131 at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool132 fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool { 133 self.inp.kind(self.pos + n) == k1 134 && self.inp.kind(self.pos + n + 1) == k2 135 && self.inp.is_joint(self.pos + n) 136 } 137 at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool138 fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool { 139 self.inp.kind(self.pos + n) == k1 140 && self.inp.kind(self.pos + n + 1) == k2 141 && self.inp.kind(self.pos + n + 2) == k3 142 && self.inp.is_joint(self.pos + n) 143 && self.inp.is_joint(self.pos + n + 1) 144 } 145 146 /// Checks if the current token is in `kinds`. at_ts(&self, kinds: TokenSet) -> bool147 pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool { 148 kinds.contains(self.current()) 149 } 150 151 /// Checks if the current token is contextual keyword `kw`. at_contextual_kw(&self, kw: SyntaxKind) -> bool152 pub(crate) fn at_contextual_kw(&self, kw: SyntaxKind) -> bool { 153 self.inp.contextual_kind(self.pos) == kw 154 } 155 156 /// Checks if the nth token is contextual keyword `kw`. nth_at_contextual_kw(&self, n: usize, kw: SyntaxKind) -> bool157 pub(crate) fn nth_at_contextual_kw(&self, n: usize, kw: SyntaxKind) -> bool { 158 self.inp.contextual_kind(self.pos + n) == kw 159 } 160 161 /// Starts a new node in the syntax tree. All nodes and tokens 162 /// consumed between the `start` and the corresponding `Marker::complete` 163 /// belong to the same node. start(&mut self) -> Marker164 pub(crate) fn start(&mut self) -> Marker { 165 let pos = self.events.len() as u32; 166 self.push_event(Event::tombstone()); 167 Marker::new(pos) 168 } 169 170 /// Consume the next token. Panics if the parser isn't currently at `kind`. bump(&mut self, kind: SyntaxKind)171 pub(crate) fn bump(&mut self, kind: SyntaxKind) { 172 assert!(self.eat(kind)); 173 } 174 175 /// Advances the parser by one token bump_any(&mut self)176 pub(crate) fn bump_any(&mut self) { 177 let kind = self.nth(0); 178 if kind == EOF { 179 return; 180 } 181 self.do_bump(kind, 1); 182 } 183 184 /// Advances the parser by one token split_float(&mut self, mut marker: Marker) -> (bool, Marker)185 pub(crate) fn split_float(&mut self, mut marker: Marker) -> (bool, Marker) { 186 assert!(self.at(SyntaxKind::FLOAT_NUMBER)); 187 // we have parse `<something>.` 188 // `<something>`.0.1 189 // here we need to insert an extra event 190 // 191 // `<something>`. 0. 1; 192 // here we need to change the follow up parse, the return value will cause us to emulate a dot 193 // the actual splitting happens later 194 let ends_in_dot = !self.inp.is_joint(self.pos); 195 if !ends_in_dot { 196 let new_marker = self.start(); 197 let idx = marker.pos as usize; 198 match &mut self.events[idx] { 199 Event::Start { forward_parent, kind } => { 200 *kind = SyntaxKind::FIELD_EXPR; 201 *forward_parent = Some(new_marker.pos - marker.pos); 202 } 203 _ => unreachable!(), 204 } 205 marker.bomb.defuse(); 206 marker = new_marker; 207 }; 208 self.pos += 1; 209 self.push_event(Event::FloatSplitHack { ends_in_dot }); 210 (ends_in_dot, marker) 211 } 212 213 /// Advances the parser by one token, remapping its kind. 214 /// This is useful to create contextual keywords from 215 /// identifiers. For example, the lexer creates a `union` 216 /// *identifier* token, but the parser remaps it to the 217 /// `union` keyword, and keyword is what ends up in the 218 /// final tree. bump_remap(&mut self, kind: SyntaxKind)219 pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { 220 if self.nth(0) == EOF { 221 // FIXME: panic!? 222 return; 223 } 224 self.do_bump(kind, 1); 225 } 226 227 /// Emit error with the `message` 228 /// FIXME: this should be much more fancy and support 229 /// structured errors with spans and notes, like rustc 230 /// does. error<T: Into<String>>(&mut self, message: T)231 pub(crate) fn error<T: Into<String>>(&mut self, message: T) { 232 let msg = message.into(); 233 self.push_event(Event::Error { msg }); 234 } 235 236 /// Consume the next token if it is `kind` or emit an error 237 /// otherwise. expect(&mut self, kind: SyntaxKind) -> bool238 pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { 239 if self.eat(kind) { 240 return true; 241 } 242 self.error(format!("expected {kind:?}")); 243 false 244 } 245 246 /// Create an error node and consume the next token. err_and_bump(&mut self, message: &str)247 pub(crate) fn err_and_bump(&mut self, message: &str) { 248 self.err_recover(message, TokenSet::EMPTY); 249 } 250 251 /// Create an error node and consume the next token. err_recover(&mut self, message: &str, recovery: TokenSet)252 pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) { 253 match self.current() { 254 T!['{'] | T!['}'] => { 255 self.error(message); 256 return; 257 } 258 _ => (), 259 } 260 261 if self.at_ts(recovery) { 262 self.error(message); 263 return; 264 } 265 266 let m = self.start(); 267 self.error(message); 268 self.bump_any(); 269 m.complete(self, ERROR); 270 } 271 do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8)272 fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { 273 self.pos += n_raw_tokens as usize; 274 self.steps.set(0); 275 self.push_event(Event::Token { kind, n_raw_tokens }); 276 } 277 push_event(&mut self, event: Event)278 fn push_event(&mut self, event: Event) { 279 self.events.push(event); 280 } 281 } 282 283 /// See [`Parser::start`]. 284 pub(crate) struct Marker { 285 pos: u32, 286 bomb: DropBomb, 287 } 288 289 impl Marker { new(pos: u32) -> Marker290 fn new(pos: u32) -> Marker { 291 Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") } 292 } 293 294 /// Finishes the syntax tree node and assigns `kind` to it, 295 /// and mark the create a `CompletedMarker` for possible future 296 /// operation like `.precede()` to deal with forward_parent. complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker297 pub(crate) fn complete(mut self, p: &mut Parser<'_>, kind: SyntaxKind) -> CompletedMarker { 298 self.bomb.defuse(); 299 let idx = self.pos as usize; 300 match &mut p.events[idx] { 301 Event::Start { kind: slot, .. } => { 302 *slot = kind; 303 } 304 _ => unreachable!(), 305 } 306 p.push_event(Event::Finish); 307 CompletedMarker::new(self.pos, kind) 308 } 309 310 /// Abandons the syntax tree node. All its children 311 /// are attached to its parent instead. abandon(mut self, p: &mut Parser<'_>)312 pub(crate) fn abandon(mut self, p: &mut Parser<'_>) { 313 self.bomb.defuse(); 314 let idx = self.pos as usize; 315 if idx == p.events.len() - 1 { 316 match p.events.pop() { 317 Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), 318 _ => unreachable!(), 319 } 320 } 321 } 322 } 323 324 pub(crate) struct CompletedMarker { 325 pos: u32, 326 kind: SyntaxKind, 327 } 328 329 impl CompletedMarker { new(pos: u32, kind: SyntaxKind) -> Self330 fn new(pos: u32, kind: SyntaxKind) -> Self { 331 CompletedMarker { pos, kind } 332 } 333 334 /// This method allows to create a new node which starts 335 /// *before* the current one. That is, parser could start 336 /// node `A`, then complete it, and then after parsing the 337 /// whole `A`, decide that it should have started some node 338 /// `B` before starting `A`. `precede` allows to do exactly 339 /// that. See also docs about 340 /// [`Event::Start::forward_parent`](crate::event::Event::Start::forward_parent). 341 /// 342 /// Given completed events `[START, FINISH]` and its corresponding 343 /// `CompletedMarker(pos: 0, _)`. 344 /// Append a new `START` events as `[START, FINISH, NEWSTART]`, 345 /// then mark `NEWSTART` as `START`'s parent with saving its relative 346 /// distance to `NEWSTART` into forward_parent(=2 in this case); precede(self, p: &mut Parser<'_>) -> Marker347 pub(crate) fn precede(self, p: &mut Parser<'_>) -> Marker { 348 let new_pos = p.start(); 349 let idx = self.pos as usize; 350 match &mut p.events[idx] { 351 Event::Start { forward_parent, .. } => { 352 *forward_parent = Some(new_pos.pos - self.pos); 353 } 354 _ => unreachable!(), 355 } 356 new_pos 357 } 358 359 /// Extends this completed marker *to the left* up to `m`. extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker360 pub(crate) fn extend_to(self, p: &mut Parser<'_>, mut m: Marker) -> CompletedMarker { 361 m.bomb.defuse(); 362 let idx = m.pos as usize; 363 match &mut p.events[idx] { 364 Event::Start { forward_parent, .. } => { 365 *forward_parent = Some(self.pos - m.pos); 366 } 367 _ => unreachable!(), 368 } 369 self 370 } 371 kind(&self) -> SyntaxKind372 pub(crate) fn kind(&self) -> SyntaxKind { 373 self.kind 374 } 375 } 376