• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef TOOLS_GN_TOKENIZER_H_
6 #define TOOLS_GN_TOKENIZER_H_
7 
8 #include <stddef.h>
9 
10 #include <string_view>
11 #include <vector>
12 
13 #include "base/macros.h"
14 #include "gn/err.h"
15 #include "gn/token.h"
16 
17 class InputFile;
18 
19 // Tab (0x09), vertical tab (0x0B), and formfeed (0x0C) are illegal in GN files.
20 // Almost always these are errors. However, in the case of running the formatter
21 // it's nice to convert these to spaces when encountered so that the input can
22 // still be parsed and rewritten correctly by the formatter.
23 enum class WhitespaceTransform {
24   kMaintainOriginalInput,
25   kInvalidToSpace,
26 };
27 
28 class Tokenizer {
29  public:
30   static std::vector<Token> Tokenize(
31       const InputFile* input_file,
32       Err* err,
33       WhitespaceTransform whitespace_transform =
34           WhitespaceTransform::kMaintainOriginalInput);
35 
36   // Counts lines in the given buffer (the first line is "1") and returns
37   // the byte offset of the beginning of that line, or (size_t)-1 if there
38   // aren't that many lines in the file. Note that this will return the byte
39   // one past the end of the input if the last character is a newline.
40   //
41   // This is a helper function for error output so that the tokenizer's
42   // notion of lines can be used elsewhere.
43   static size_t ByteOffsetOfNthLine(const std::string_view& buf, int n);
44 
45   // Returns true if the given offset of the string piece counts as a newline.
46   // The offset must be in the buffer.
47   static bool IsNewline(const std::string_view& buffer, size_t offset);
48 
49   static bool IsIdentifierFirstChar(char c);
50 
51   static bool IsIdentifierContinuingChar(char c);
52 
53  private:
54   // InputFile must outlive the tokenizer and all generated tokens.
55   Tokenizer(const InputFile* input_file,
56             Err* err,
57             WhitespaceTransform whitespace_transform);
58   ~Tokenizer();
59 
60   std::vector<Token> Run();
61 
62   void AdvanceToNextToken();
63   Token::Type ClassifyCurrent() const;
64   void AdvanceToEndOfToken(const Location& location, Token::Type type);
65 
66   // Whether from this location back to the beginning of the line is only
67   // whitespace. |location| should be the first character of the token to be
68   // checked.
69   bool AtStartOfLine(size_t location) const;
70 
71   bool IsCurrentWhitespace() const;
72   bool IsCurrentNewline() const;
73   bool IsCurrentStringTerminator(char quote_char) const;
74 
CanIncrement()75   bool CanIncrement() const { return cur_ < input_.size() - 1; }
76 
77   // Increments the current location by one.
78   void Advance();
79 
80   // Returns the current character in the file as a location.
81   Location GetCurrentLocation() const;
82 
83   Err GetErrorForInvalidToken(const Location& location) const;
84 
done()85   bool done() const { return at_end() || has_error(); }
86 
at_end()87   bool at_end() const { return cur_ == input_.size(); }
cur_char()88   char cur_char() const { return input_[cur_]; }
89 
has_error()90   bool has_error() const { return err_->has_error(); }
91 
92   std::vector<Token> tokens_;
93 
94   const InputFile* input_file_;
95   const std::string_view input_;
96   Err* err_;
97   WhitespaceTransform whitespace_transform_;
98   size_t cur_ = 0;  // Byte offset into input buffer.
99 
100   int line_number_ = 1;
101   int column_number_ = 1;
102 
103   DISALLOW_COPY_AND_ASSIGN(Tokenizer);
104 };
105 
106 #endif  // TOOLS_GN_TOKENIZER_H_
107