1 use std::char;
2 use std::convert::TryFrom;
3 use std::f64;
4 use std::fmt;
5 use std::num::ParseFloatError;
6 use std::num::ParseIntError;
7
8 use super::float;
9 use super::loc::Loc;
10 use super::loc::FIRST_COL;
11 use super::str_lit::StrLit;
12 use super::str_lit::StrLitDecodeError;
13 use super::token::Token;
14 use super::token::TokenWithLocation;
15 use super::ParserLanguage;
16 use crate::text_format::lexer::JsonNumberLit;
17
18 #[derive(Debug)]
19 pub enum LexerError {
20 IncorrectInput, // TODO: something better than this
21 UnexpectedEof,
22 ExpectChar(char),
23 ParseIntError,
24 ParseFloatError,
25 IncorrectFloatLit, // TODO: how it is different from ParseFloatError?
26 IncorrectJsonEscape,
27 IncorrectJsonNumber,
28 IncorrectUnicodeChar,
29 ExpectHexDigit,
30 ExpectOctDigit,
31 ExpectDecDigit,
32 StrLitDecodeError(StrLitDecodeError),
33 ExpectedIdent,
34 }
35
36 impl fmt::Display for LexerError {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result37 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
38 match self {
39 LexerError::IncorrectInput => write!(f, "Incorrect input"),
40 LexerError::UnexpectedEof => write!(f, "Unexpected EOF"),
41 LexerError::ExpectChar(c) => write!(f, "Expecting char: {}", c),
42 LexerError::ParseIntError => write!(f, "Parse int error"),
43 LexerError::ParseFloatError => write!(f, "Parse float error"),
44 LexerError::IncorrectFloatLit => write!(f, "Incorrect float literal"),
45 LexerError::IncorrectJsonEscape => write!(f, "Incorrect JSON escape"),
46 LexerError::IncorrectJsonNumber => write!(f, "Incorrect JSON number"),
47 LexerError::IncorrectUnicodeChar => write!(f, "Incorrect Unicode char"),
48 LexerError::ExpectHexDigit => write!(f, "Expecting hex digit"),
49 LexerError::ExpectOctDigit => write!(f, "Expecting oct digit"),
50 LexerError::ExpectDecDigit => write!(f, "Expecting dec digit"),
51 LexerError::StrLitDecodeError(e) => write!(f, "{}", e),
52 LexerError::ExpectedIdent => write!(f, "Expecting identifier"),
53 }
54 }
55 }
56
57 impl std::error::Error for LexerError {}
58
59 pub type LexerResult<T> = Result<T, LexerError>;
60
61 impl From<StrLitDecodeError> for LexerError {
from(e: StrLitDecodeError) -> Self62 fn from(e: StrLitDecodeError) -> Self {
63 LexerError::StrLitDecodeError(e)
64 }
65 }
66
67 impl From<ParseIntError> for LexerError {
from(_: ParseIntError) -> Self68 fn from(_: ParseIntError) -> Self {
69 LexerError::ParseIntError
70 }
71 }
72
73 impl From<ParseFloatError> for LexerError {
from(_: ParseFloatError) -> Self74 fn from(_: ParseFloatError) -> Self {
75 LexerError::ParseFloatError
76 }
77 }
78
79 impl From<float::ProtobufFloatParseError> for LexerError {
from(_: float::ProtobufFloatParseError) -> Self80 fn from(_: float::ProtobufFloatParseError) -> Self {
81 LexerError::IncorrectFloatLit
82 }
83 }
84
85 #[derive(Copy, Clone)]
86 pub struct Lexer<'a> {
87 language: ParserLanguage,
88 input: &'a str,
89 pos: usize,
90 pub loc: Loc,
91 }
92
is_letter(c: char) -> bool93 fn is_letter(c: char) -> bool {
94 c.is_alphabetic() || c == '_'
95 }
96
97 impl<'a> Lexer<'a> {
new(input: &'a str, language: ParserLanguage) -> Lexer<'a>98 pub fn new(input: &'a str, language: ParserLanguage) -> Lexer<'a> {
99 Lexer {
100 language,
101 input,
102 pos: 0,
103 loc: Loc::start(),
104 }
105 }
106
107 /// No more chars
eof(&self) -> bool108 pub fn eof(&self) -> bool {
109 self.pos == self.input.len()
110 }
111
112 /// Remaining chars
rem_chars(&self) -> &'a str113 fn rem_chars(&self) -> &'a str {
114 &self.input[self.pos..]
115 }
116
lookahead_char_is<P: FnOnce(char) -> bool>(&self, p: P) -> bool117 pub fn lookahead_char_is<P: FnOnce(char) -> bool>(&self, p: P) -> bool {
118 self.lookahead_char().map_or(false, p)
119 }
120
lookahead_char_is_in(&self, alphabet: &str) -> bool121 fn lookahead_char_is_in(&self, alphabet: &str) -> bool {
122 self.lookahead_char_is(|c| alphabet.contains(c))
123 }
124
next_char_opt(&mut self) -> Option<char>125 fn next_char_opt(&mut self) -> Option<char> {
126 let rem = self.rem_chars();
127 if rem.is_empty() {
128 None
129 } else {
130 let mut char_indices = rem.char_indices();
131 let (_, c) = char_indices.next().unwrap();
132 let c_len = char_indices.next().map(|(len, _)| len).unwrap_or(rem.len());
133 self.pos += c_len;
134 if c == '\n' {
135 self.loc.line += 1;
136 self.loc.col = FIRST_COL;
137 } else {
138 self.loc.col += 1;
139 }
140 Some(c)
141 }
142 }
143
next_char(&mut self) -> LexerResult<char>144 fn next_char(&mut self) -> LexerResult<char> {
145 self.next_char_opt().ok_or(LexerError::UnexpectedEof)
146 }
147
148 /// Skip whitespaces
skip_whitespaces(&mut self)149 fn skip_whitespaces(&mut self) {
150 self.take_while(|c| c.is_whitespace());
151 }
152
skip_c_comment(&mut self) -> LexerResult<()>153 fn skip_c_comment(&mut self) -> LexerResult<()> {
154 if self.skip_if_lookahead_is_str("/*") {
155 let end = "*/";
156 match self.rem_chars().find(end) {
157 None => Err(LexerError::UnexpectedEof),
158 Some(len) => {
159 let new_pos = self.pos + len + end.len();
160 self.skip_to_pos(new_pos);
161 Ok(())
162 }
163 }
164 } else {
165 Ok(())
166 }
167 }
168
skip_cpp_comment(&mut self)169 fn skip_cpp_comment(&mut self) {
170 if self.skip_if_lookahead_is_str("//") {
171 loop {
172 match self.next_char_opt() {
173 Some('\n') | None => break,
174 _ => {}
175 }
176 }
177 }
178 }
179
skip_sh_comment(&mut self)180 fn skip_sh_comment(&mut self) {
181 if self.skip_if_lookahead_is_str("#") {
182 loop {
183 match self.next_char_opt() {
184 Some('\n') | None => break,
185 _ => {}
186 }
187 }
188 }
189 }
190
skip_comment(&mut self) -> LexerResult<()>191 fn skip_comment(&mut self) -> LexerResult<()> {
192 match self.language {
193 ParserLanguage::Proto => {
194 self.skip_c_comment()?;
195 self.skip_cpp_comment();
196 }
197 ParserLanguage::TextFormat => {
198 self.skip_sh_comment();
199 }
200 ParserLanguage::Json => {}
201 }
202 Ok(())
203 }
204
skip_ws(&mut self) -> LexerResult<()>205 pub fn skip_ws(&mut self) -> LexerResult<()> {
206 loop {
207 let pos = self.pos;
208 self.skip_whitespaces();
209 self.skip_comment()?;
210 if pos == self.pos {
211 // Did not advance
212 return Ok(());
213 }
214 }
215 }
216
take_while<F>(&mut self, f: F) -> &'a str where F: Fn(char) -> bool,217 pub fn take_while<F>(&mut self, f: F) -> &'a str
218 where
219 F: Fn(char) -> bool,
220 {
221 let start = self.pos;
222 while self.lookahead_char().map(&f) == Some(true) {
223 self.next_char_opt().unwrap();
224 }
225 let end = self.pos;
226 &self.input[start..end]
227 }
228
lookahead_char(&self) -> Option<char>229 fn lookahead_char(&self) -> Option<char> {
230 self.clone().next_char_opt()
231 }
232
lookahead_is_str(&self, s: &str) -> bool233 fn lookahead_is_str(&self, s: &str) -> bool {
234 self.rem_chars().starts_with(s)
235 }
236
skip_if_lookahead_is_str(&mut self, s: &str) -> bool237 fn skip_if_lookahead_is_str(&mut self, s: &str) -> bool {
238 if self.lookahead_is_str(s) {
239 let new_pos = self.pos + s.len();
240 self.skip_to_pos(new_pos);
241 true
242 } else {
243 false
244 }
245 }
246
next_char_if<P>(&mut self, p: P) -> Option<char> where P: FnOnce(char) -> bool,247 fn next_char_if<P>(&mut self, p: P) -> Option<char>
248 where
249 P: FnOnce(char) -> bool,
250 {
251 let mut clone = self.clone();
252 match clone.next_char_opt() {
253 Some(c) if p(c) => {
254 *self = clone;
255 Some(c)
256 }
257 _ => None,
258 }
259 }
260
next_char_if_eq(&mut self, expect: char) -> bool261 pub fn next_char_if_eq(&mut self, expect: char) -> bool {
262 self.next_char_if(|c| c == expect) != None
263 }
264
next_char_if_in(&mut self, alphabet: &str) -> Option<char>265 fn next_char_if_in(&mut self, alphabet: &str) -> Option<char> {
266 for c in alphabet.chars() {
267 if self.next_char_if_eq(c) {
268 return Some(c);
269 }
270 }
271 None
272 }
273
next_char_expect_eq(&mut self, expect: char) -> LexerResult<()>274 fn next_char_expect_eq(&mut self, expect: char) -> LexerResult<()> {
275 if self.next_char_if_eq(expect) {
276 Ok(())
277 } else {
278 Err(LexerError::ExpectChar(expect))
279 }
280 }
281
next_char_expect<P>(&mut self, expect: P, err: LexerError) -> LexerResult<char> where P: FnOnce(char) -> bool,282 fn next_char_expect<P>(&mut self, expect: P, err: LexerError) -> LexerResult<char>
283 where
284 P: FnOnce(char) -> bool,
285 {
286 self.next_char_if(expect).ok_or(err)
287 }
288
289 // str functions
290
291 /// properly update line and column
skip_to_pos(&mut self, new_pos: usize) -> &'a str292 fn skip_to_pos(&mut self, new_pos: usize) -> &'a str {
293 assert!(new_pos >= self.pos);
294 assert!(new_pos <= self.input.len());
295 let pos = self.pos;
296 while self.pos != new_pos {
297 self.next_char_opt().unwrap();
298 }
299 &self.input[pos..new_pos]
300 }
301
302 // Protobuf grammar
303
304 // char functions
305
306 // letter = "A" … "Z" | "a" … "z"
307 // https://github.com/google/protobuf/issues/4565
next_letter_opt(&mut self) -> Option<char>308 fn next_letter_opt(&mut self) -> Option<char> {
309 self.next_char_if(is_letter)
310 }
311
312 // capitalLetter = "A" … "Z"
_next_capital_letter_opt(&mut self) -> Option<char>313 fn _next_capital_letter_opt(&mut self) -> Option<char> {
314 self.next_char_if(|c| c >= 'A' && c <= 'Z')
315 }
316
next_ident_part(&mut self) -> Option<char>317 fn next_ident_part(&mut self) -> Option<char> {
318 self.next_char_if(|c| c.is_ascii_alphanumeric() || c == '_')
319 }
320
321 // Identifiers
322
323 // ident = letter { letter | decimalDigit | "_" }
next_ident_opt(&mut self) -> LexerResult<Option<String>>324 fn next_ident_opt(&mut self) -> LexerResult<Option<String>> {
325 if let Some(c) = self.next_letter_opt() {
326 let mut ident = String::new();
327 ident.push(c);
328 while let Some(c) = self.next_ident_part() {
329 ident.push(c);
330 }
331 Ok(Some(ident))
332 } else {
333 Ok(None)
334 }
335 }
336
337 // Integer literals
338
339 // hexLit = "0" ( "x" | "X" ) hexDigit { hexDigit }
next_hex_lit_opt(&mut self) -> LexerResult<Option<u64>>340 fn next_hex_lit_opt(&mut self) -> LexerResult<Option<u64>> {
341 Ok(
342 if self.skip_if_lookahead_is_str("0x") || self.skip_if_lookahead_is_str("0X") {
343 let s = self.take_while(|c| c.is_ascii_hexdigit());
344 Some(u64::from_str_radix(s, 16)? as u64)
345 } else {
346 None
347 },
348 )
349 }
350
351 // decimalLit = ( "1" … "9" ) { decimalDigit }
352 // octalLit = "0" { octalDigit }
next_decimal_octal_lit_opt(&mut self) -> LexerResult<Option<u64>>353 fn next_decimal_octal_lit_opt(&mut self) -> LexerResult<Option<u64>> {
354 // do not advance on number parse error
355 let mut clone = self.clone();
356
357 let pos = clone.pos;
358
359 Ok(if clone.next_char_if(|c| c.is_ascii_digit()) != None {
360 clone.take_while(|c| c.is_ascii_digit());
361 let value = clone.input[pos..clone.pos].parse()?;
362 *self = clone;
363 Some(value)
364 } else {
365 None
366 })
367 }
368
369 // hexDigit = "0" … "9" | "A" … "F" | "a" … "f"
next_hex_digit(&mut self) -> LexerResult<u32>370 fn next_hex_digit(&mut self) -> LexerResult<u32> {
371 let mut clone = self.clone();
372 let r = match clone.next_char()? {
373 c if c >= '0' && c <= '9' => c as u32 - b'0' as u32,
374 c if c >= 'A' && c <= 'F' => c as u32 - b'A' as u32 + 10,
375 c if c >= 'a' && c <= 'f' => c as u32 - b'a' as u32 + 10,
376 _ => return Err(LexerError::ExpectHexDigit),
377 };
378 *self = clone;
379 Ok(r)
380 }
381
382 // octalDigit = "0" … "7"
next_octal_digit(&mut self) -> LexerResult<u32>383 fn next_octal_digit(&mut self) -> LexerResult<u32> {
384 self.next_char_expect(|c| c >= '0' && c <= '9', LexerError::ExpectOctDigit)
385 .map(|c| c as u32 - '0' as u32)
386 }
387
388 // decimalDigit = "0" … "9"
next_decimal_digit(&mut self) -> LexerResult<u32>389 fn next_decimal_digit(&mut self) -> LexerResult<u32> {
390 self.next_char_expect(|c| c >= '0' && c <= '9', LexerError::ExpectDecDigit)
391 .map(|c| c as u32 - '0' as u32)
392 }
393
394 // decimals = decimalDigit { decimalDigit }
next_decimal_digits(&mut self) -> LexerResult<()>395 fn next_decimal_digits(&mut self) -> LexerResult<()> {
396 self.next_decimal_digit()?;
397 self.take_while(|c| c >= '0' && c <= '9');
398 Ok(())
399 }
400
401 // intLit = decimalLit | octalLit | hexLit
next_int_lit_opt(&mut self) -> LexerResult<Option<u64>>402 pub fn next_int_lit_opt(&mut self) -> LexerResult<Option<u64>> {
403 assert_ne!(ParserLanguage::Json, self.language);
404
405 self.skip_ws()?;
406 if let Some(i) = self.next_hex_lit_opt()? {
407 return Ok(Some(i));
408 }
409 if let Some(i) = self.next_decimal_octal_lit_opt()? {
410 return Ok(Some(i));
411 }
412 Ok(None)
413 }
414
415 // Floating-point literals
416
417 // exponent = ( "e" | "E" ) [ "+" | "-" ] decimals
next_exponent_opt(&mut self) -> LexerResult<Option<()>>418 fn next_exponent_opt(&mut self) -> LexerResult<Option<()>> {
419 if self.next_char_if_in("eE") != None {
420 self.next_char_if_in("+-");
421 self.next_decimal_digits()?;
422 Ok(Some(()))
423 } else {
424 Ok(None)
425 }
426 }
427
428 // floatLit = ( decimals "." [ decimals ] [ exponent ] | decimals exponent | "."decimals [ exponent ] ) | "inf" | "nan"
next_float_lit(&mut self) -> LexerResult<()>429 fn next_float_lit(&mut self) -> LexerResult<()> {
430 assert_ne!(ParserLanguage::Json, self.language);
431
432 // "inf" and "nan" are handled as part of ident
433 if self.next_char_if_eq('.') {
434 self.next_decimal_digits()?;
435 self.next_exponent_opt()?;
436 } else {
437 self.next_decimal_digits()?;
438 if self.next_char_if_eq('.') {
439 self.next_decimal_digits()?;
440 self.next_exponent_opt()?;
441 } else {
442 if self.next_exponent_opt()? == None {
443 return Err(LexerError::IncorrectFloatLit);
444 }
445 }
446 }
447 Ok(())
448 }
449
450 // String literals
451
452 // charValue = hexEscape | octEscape | charEscape | /[^\0\n\\]/
453 // hexEscape = '\' ( "x" | "X" ) hexDigit hexDigit
454 // https://github.com/google/protobuf/issues/4560
455 // octEscape = '\' octalDigit octalDigit octalDigit
456 // charEscape = '\' ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | '\' | "'" | '"' )
457 // quote = "'" | '"'
next_byte_value(&mut self) -> LexerResult<u8>458 pub fn next_byte_value(&mut self) -> LexerResult<u8> {
459 match self.next_char()? {
460 '\\' => {
461 match self.next_char()? {
462 '\'' => Ok(b'\''),
463 '"' => Ok(b'"'),
464 '\\' => Ok(b'\\'),
465 'a' => Ok(b'\x07'),
466 'b' => Ok(b'\x08'),
467 'f' => Ok(b'\x0c'),
468 'n' => Ok(b'\n'),
469 'r' => Ok(b'\r'),
470 't' => Ok(b'\t'),
471 'v' => Ok(b'\x0b'),
472 'x' => {
473 let d1 = self.next_hex_digit()? as u8;
474 let d2 = self.next_hex_digit()? as u8;
475 Ok(((d1 << 4) | d2) as u8)
476 }
477 d if d >= '0' && d <= '7' => {
478 let mut r = d as u8 - b'0';
479 for _ in 0..2 {
480 match self.next_octal_digit() {
481 Err(_) => break,
482 Ok(d) => r = (r << 3) + d as u8,
483 }
484 }
485 Ok(r)
486 }
487 // https://github.com/google/protobuf/issues/4562
488 // TODO: overflow
489 c => Ok(c as u8),
490 }
491 }
492 '\n' | '\0' => Err(LexerError::IncorrectInput),
493 // TODO: check overflow
494 c => Ok(c as u8),
495 }
496 }
497
char_try_from(i: u32) -> LexerResult<char>498 fn char_try_from(i: u32) -> LexerResult<char> {
499 char::try_from(i).map_err(|_| LexerError::IncorrectUnicodeChar)
500 }
501
next_json_char_value(&mut self) -> LexerResult<char>502 pub fn next_json_char_value(&mut self) -> LexerResult<char> {
503 match self.next_char()? {
504 '\\' => match self.next_char()? {
505 '"' => Ok('"'),
506 '\'' => Ok('\''),
507 '\\' => Ok('\\'),
508 '/' => Ok('/'),
509 'b' => Ok('\x08'),
510 'f' => Ok('\x0c'),
511 'n' => Ok('\n'),
512 'r' => Ok('\r'),
513 't' => Ok('\t'),
514 'u' => {
515 let mut v = 0;
516 for _ in 0..4 {
517 let digit = self.next_hex_digit()?;
518 v = v * 16 + digit;
519 }
520 Self::char_try_from(v)
521 }
522 _ => Err(LexerError::IncorrectJsonEscape),
523 },
524 c => Ok(c),
525 }
526 }
527
528 // https://github.com/google/protobuf/issues/4564
529 // strLit = ( "'" { charValue } "'" ) | ( '"' { charValue } '"' )
next_str_lit_raw(&mut self) -> LexerResult<String>530 fn next_str_lit_raw(&mut self) -> LexerResult<String> {
531 let mut raw = String::new();
532
533 let mut first = true;
534 loop {
535 if !first {
536 self.skip_ws()?;
537 }
538
539 let start = self.pos;
540
541 let q = match self.next_char_if_in("'\"") {
542 Some(q) => q,
543 None if !first => break,
544 None => return Err(LexerError::IncorrectInput),
545 };
546 first = false;
547 while self.lookahead_char() != Some(q) {
548 self.next_byte_value()?;
549 }
550 self.next_char_expect_eq(q)?;
551
552 raw.push_str(&self.input[start + 1..self.pos - 1]);
553 }
554 Ok(raw)
555 }
556
next_str_lit_raw_opt(&mut self) -> LexerResult<Option<String>>557 fn next_str_lit_raw_opt(&mut self) -> LexerResult<Option<String>> {
558 if self.lookahead_char_is_in("'\"") {
559 Ok(Some(self.next_str_lit_raw()?))
560 } else {
561 Ok(None)
562 }
563 }
564
565 /// Parse next token as JSON number
next_json_number_opt(&mut self) -> LexerResult<Option<JsonNumberLit>>566 fn next_json_number_opt(&mut self) -> LexerResult<Option<JsonNumberLit>> {
567 assert_eq!(ParserLanguage::Json, self.language);
568
569 fn is_digit(c: char) -> bool {
570 c >= '0' && c <= '9'
571 }
572
573 fn is_digit_1_9(c: char) -> bool {
574 c >= '1' && c <= '9'
575 }
576
577 if !self.lookahead_char_is_in("-0123456789") {
578 return Ok(None);
579 }
580
581 let mut s = String::new();
582 if self.next_char_if_eq('-') {
583 s.push('-');
584 }
585
586 if self.next_char_if_eq('0') {
587 s.push('0');
588 } else {
589 s.push(self.next_char_expect(is_digit_1_9, LexerError::IncorrectJsonNumber)?);
590 while let Some(c) = self.next_char_if(is_digit) {
591 s.push(c);
592 }
593 }
594
595 if self.next_char_if_eq('.') {
596 s.push('.');
597 s.push(self.next_char_expect(is_digit, LexerError::IncorrectJsonNumber)?);
598 while let Some(c) = self.next_char_if(is_digit) {
599 s.push(c);
600 }
601 }
602
603 if let Some(c) = self.next_char_if_in("eE") {
604 s.push(c);
605 if let Some(c) = self.next_char_if_in("+-") {
606 s.push(c);
607 }
608 s.push(self.next_char_expect(is_digit_1_9, LexerError::IncorrectJsonNumber)?);
609 while let Some(c) = self.next_char_if(is_digit) {
610 s.push(c);
611 }
612 }
613
614 Ok(Some(JsonNumberLit(s)))
615 }
616
next_token_inner(&mut self) -> LexerResult<Token>617 fn next_token_inner(&mut self) -> LexerResult<Token> {
618 if self.language == ParserLanguage::Json {
619 if let Some(v) = self.next_json_number_opt()? {
620 return Ok(Token::JsonNumber(v));
621 }
622 }
623
624 if let Some(ident) = self.next_ident_opt()? {
625 let token = if self.language != ParserLanguage::Json && ident == float::PROTOBUF_NAN {
626 Token::FloatLit(f64::NAN)
627 } else if self.language != ParserLanguage::Json && ident == float::PROTOBUF_INF {
628 Token::FloatLit(f64::INFINITY)
629 } else {
630 Token::Ident(ident.to_owned())
631 };
632 return Ok(token);
633 }
634
635 if self.language != ParserLanguage::Json {
636 let mut clone = self.clone();
637 let pos = clone.pos;
638 if let Ok(_) = clone.next_float_lit() {
639 let f = float::parse_protobuf_float(&self.input[pos..clone.pos])?;
640 *self = clone;
641 return Ok(Token::FloatLit(f));
642 }
643
644 if let Some(lit) = self.next_int_lit_opt()? {
645 return Ok(Token::IntLit(lit));
646 }
647 }
648
649 if let Some(escaped) = self.next_str_lit_raw_opt()? {
650 return Ok(Token::StrLit(StrLit { escaped }));
651 }
652
653 // This branch must be after str lit
654 if let Some(c) = self.next_char_if(|c| c.is_ascii_punctuation()) {
655 return Ok(Token::Symbol(c));
656 }
657
658 if let Some(ident) = self.next_ident_opt()? {
659 return Ok(Token::Ident(ident));
660 }
661
662 Err(LexerError::IncorrectInput)
663 }
664
next_token(&mut self) -> LexerResult<Option<TokenWithLocation>>665 pub fn next_token(&mut self) -> LexerResult<Option<TokenWithLocation>> {
666 self.skip_ws()?;
667 let loc = self.loc;
668
669 Ok(if self.eof() {
670 None
671 } else {
672 let token = self.next_token_inner()?;
673 // Skip whitespace here to update location
674 // to the beginning of the next token
675 self.skip_ws()?;
676 Some(TokenWithLocation { token, loc })
677 })
678 }
679 }
680
681 #[cfg(test)]
682 mod test {
683 use super::*;
684
lex<P, R>(input: &str, parse_what: P) -> R where P: FnOnce(&mut Lexer) -> LexerResult<R>,685 fn lex<P, R>(input: &str, parse_what: P) -> R
686 where
687 P: FnOnce(&mut Lexer) -> LexerResult<R>,
688 {
689 let mut lexer = Lexer::new(input, ParserLanguage::Proto);
690 let r = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc));
691 assert!(lexer.eof(), "check eof failed at {}", lexer.loc);
692 r
693 }
694
lex_opt<P, R>(input: &str, parse_what: P) -> R where P: FnOnce(&mut Lexer) -> LexerResult<Option<R>>,695 fn lex_opt<P, R>(input: &str, parse_what: P) -> R
696 where
697 P: FnOnce(&mut Lexer) -> LexerResult<Option<R>>,
698 {
699 let mut lexer = Lexer::new(input, ParserLanguage::Proto);
700 let o = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc));
701 let r = o.expect(&format!("lexer returned none at {}", lexer.loc));
702 assert!(lexer.eof(), "check eof failed at {}", lexer.loc);
703 r
704 }
705
706 #[test]
test_lexer_int_lit()707 fn test_lexer_int_lit() {
708 let msg = r#"10"#;
709 let mess = lex_opt(msg, |p| p.next_int_lit_opt());
710 assert_eq!(10, mess);
711 }
712
713 #[test]
test_lexer_float_lit()714 fn test_lexer_float_lit() {
715 let msg = r#"12.3"#;
716 let mess = lex(msg, |p| p.next_token_inner());
717 assert_eq!(Token::FloatLit(12.3), mess);
718 }
719 }
720