• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef TOOLS_GN_TOKENIZER_H_
6 #define TOOLS_GN_TOKENIZER_H_
7 
8 #include <vector>
9 
10 #include "base/basictypes.h"
11 #include "base/strings/string_piece.h"
12 #include "base/strings/string_util.h"
13 #include "tools/gn/err.h"
14 #include "tools/gn/token.h"
15 
16 class InputFile;
17 
18 class Tokenizer {
19  public:
20   static std::vector<Token> Tokenize(const InputFile* input_file, Err* err);
21 
22   // Counts lines in the given buffer (the first line is "1") and returns
23   // the byte offset of the beginning of that line, or (size_t)-1 if there
24   // aren't that many lines in the file. Note that this will return the byte
25   // one past the end of the input if the last character is a newline.
26   //
27   // This is a helper function for error output so that the tokenizer's
28   // notion of lines can be used elsewhere.
29   static size_t ByteOffsetOfNthLine(const base::StringPiece& buf, int n);
30 
31   // Returns true if the given offset of the string piece counts as a newline.
32   // The offset must be in the buffer.
33   static bool IsNewline(const base::StringPiece& buffer, size_t offset);
34 
IsIdentifierFirstChar(char c)35   static bool IsIdentifierFirstChar(char c) {
36     return IsAsciiAlpha(c) || c == '_';
37   }
38 
IsIdentifierContinuingChar(char c)39   static bool IsIdentifierContinuingChar(char c) {
40     // Also allow digits after the first char.
41     return IsIdentifierFirstChar(c) || IsAsciiDigit(c);
42   }
43 
44  private:
45   // InputFile must outlive the tokenizer and all generated tokens.
46   explicit Tokenizer(const InputFile* input_file, Err* err);
47   ~Tokenizer();
48 
49   std::vector<Token> Run();
50 
51   void AdvanceToNextToken();
52   Token::Type ClassifyCurrent() const;
53   void AdvanceToEndOfToken(const Location& location, Token::Type type);
54 
55   // Whether from this location back to the beginning of the line is only
56   // whitespace. |location| should be the first character of the token to be
57   // checked.
58   bool AtStartOfLine(size_t location) const;
59 
60   bool IsCurrentWhitespace() const;
61   bool IsCurrentNewline() const;
62   bool IsCurrentStringTerminator(char quote_char) const;
63 
CanIncrement()64   bool CanIncrement() const { return cur_ < input_.size(); }
65 
66   // Increments the current location by one.
67   void Advance();
68 
69   // Returns the current character in the file as a location.
70   Location GetCurrentLocation() const;
71 
72   Err GetErrorForInvalidToken(const Location& location) const;
73 
done()74   bool done() const { return at_end() || has_error(); }
75 
at_end()76   bool at_end() const { return cur_ == input_.size(); }
cur_char()77   char cur_char() const { return input_[cur_]; }
78 
has_error()79   bool has_error() const { return err_->has_error(); }
80 
81   std::vector<Token> tokens_;
82 
83   const InputFile* input_file_;
84   const base::StringPiece input_;
85   Err* err_;
86   size_t cur_;  // Byte offset into input buffer.
87 
88   int line_number_;
89   int char_in_line_;
90 
91   DISALLOW_COPY_AND_ASSIGN(Tokenizer);
92 };
93 
94 #endif  // TOOLS_GN_TOKENIZER_H_
95