• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2017 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/sksl/lex/NFAtoDFA.h"
9 #include "src/sksl/lex/RegexParser.h"
10 #include "src/sksl/lex/TransitionTable.h"
11 
12 #include <fstream>
13 #include <sstream>
14 #include <string>
15 
16 /**
17  * Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex
18  * file is a text file with one token definition per line. Each line is of the form:
19  * <TOKEN_NAME> = <pattern>
20  * where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string.
21  */
22 
23 static constexpr const char* HEADER =
24     "/*\n"
25     " * Copyright 2017 Google Inc.\n"
26     " *\n"
27     " * Use of this source code is governed by a BSD-style license that can be\n"
28     " * found in the LICENSE file.\n"
29     " */\n"
30     "/*****************************************************************************************\n"
31     " ******************** This file was generated by sksllex. Do not edit. *******************\n"
32     " *****************************************************************************************/\n";
33 
writeH(const DFA & dfa,const char * lexer,const char * token,const std::vector<std::string> & tokens,const char * hPath)34 static void writeH(const DFA& dfa, const char* lexer, const char* token,
35                    const std::vector<std::string>& tokens, const char* hPath) {
36     std::ofstream out(hPath);
37     SkASSERT(out.good());
38     out << HEADER;
39     out << "#ifndef SKSL_" << lexer << "\n";
40     out << "#define SKSL_" << lexer << "\n";
41     out << "#include \"include/core/SkStringView.h\"\n";
42     out << "#include <cstddef>\n";
43     out << "#include <cstdint>\n";
44     out << "namespace SkSL {\n";
45     out << "\n";
46     out << "struct " << token << " {\n";
47     out << "    enum class Kind {\n";
48     for (const std::string& t : tokens) {
49         out << "        TK_" << t << ",\n";
50     }
51     out << "        TK_NONE,";
52     out << R"(
53     };
54 
55     )" << token << "() {}";
56 
57     out << token << R"((Kind kind, int32_t offset, int32_t length, int32_t line)
58     : fKind(kind)
59     , fOffset(offset)
60     , fLength(length)
61     , fLine(line) {}
62 
63     Kind fKind      = Kind::TK_NONE;
64     int32_t fOffset = -1;
65     int32_t fLength = -1;
66     int32_t fLine   = -1;
67 };
68 
69 class )" << lexer << R"( {
70 public:
71     void start(skstd::string_view text) {
72         fText = text;
73         fOffset = 0;
74         fLine = 1;
75     }
76 
77     )" << token << R"( next();
78 
79     struct Checkpoint {
80         int32_t fOffset;
81         int32_t fLine;
82     };
83 
84     Checkpoint getCheckpoint() const {
85         return {fOffset, fLine};
86     }
87 
88     void rewindToCheckpoint(Checkpoint checkpoint) {
89         fOffset = checkpoint.fOffset;
90         fLine = checkpoint.fLine;
91     }
92 
93 private:
94     skstd::string_view fText;
95     int32_t fOffset;
96     int32_t fLine;
97 };
98 
99 } // namespace
100 #endif
101 )";
102 }
103 
writeCPP(const DFA & dfa,const char * lexer,const char * token,const char * include,const char * cppPath)104 static void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include,
105                      const char* cppPath) {
106     std::ofstream out(cppPath);
107     SkASSERT(out.good());
108     out << HEADER;
109     out << "#include \"" << include << "\"\n";
110     out << "\n";
111     out << "namespace SkSL {\n";
112     out << "\n";
113 
114     size_t states = 0;
115     for (const auto& row : dfa.fTransitions) {
116         states = std::max(states, row.size());
117     }
118     out << "using State = " << (states <= 256 ? "uint8_t" : "uint16_t") << ";\n";
119     // arbitrarily-chosen character which is greater than START_CHAR and should not appear in actual
120     // input
121     out << "static const uint8_t INVALID_CHAR = 18;";
122     out << "static const int8_t kMappings[" << dfa.fCharMappings.size() << "] = {\n    ";
123     const char* separator = "";
124     for (int m : dfa.fCharMappings) {
125         out << separator << std::to_string(m);
126         separator = ", ";
127     }
128     out << "\n};\n";
129 
130     WriteTransitionTable(out, dfa, states);
131 
132     out << "static const int8_t kAccepts[" << states << "] = {";
133     for (size_t i = 0; i < states; ++i) {
134         if (i < dfa.fAccepts.size()) {
135             out << " " << dfa.fAccepts[i] << ",";
136         } else {
137             out << " " << INVALID << ",";
138         }
139     }
140     out << " };\n";
141     out << "\n";
142 
143     out << token << " " << lexer << "::next() {";
144     out << R"(
145     // note that we cheat here: normally a lexer needs to worry about the case
146     // where a token has a prefix which is not itself a valid token - for instance,
147     // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid
148     // tokens. Our grammar doesn't have this property, so we can simplify the logic
149     // a bit.
150     int32_t startOffset = fOffset;
151     if (startOffset == (int32_t)fText.length()) {
152         return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0, fLine);
153     }
154     State state = 1;
155     for (;;) {
156         if (fOffset >= (int32_t)fText.length()) {
157             if (kAccepts[state] == -1) {
158                 return Token(Token::Kind::TK_END_OF_FILE, startOffset, 0, fLine);
159             }
160             break;
161         }
162         uint8_t c = (uint8_t) fText[fOffset];
163         if (c <= 8 || c >= )" << dfa.fCharMappings.size() << R"() {
164             c = INVALID_CHAR;
165         }
166         State newState = get_transition(kMappings[c], state);
167         if (!newState) {
168             break;
169         }
170         state = newState;
171         ++fOffset;
172         if (c == '\n') {
173             ++fLine;
174         }
175     }
176     Token::Kind kind = ()" << token << R"(::Kind) kAccepts[state];
177     return )" << token << R"((kind, startOffset, fOffset - startOffset, fLine);
178 }
179 
180 } // namespace
181 )";
182 }
183 
process(const char * inPath,const char * lexer,const char * token,const char * hPath,const char * cppPath)184 static void process(const char* inPath, const char* lexer, const char* token, const char* hPath,
185                     const char* cppPath) {
186     NFA nfa;
187     std::vector<std::string> tokens;
188     tokens.push_back("END_OF_FILE");
189     std::string line;
190     std::ifstream in(inPath);
191     while (std::getline(in, line)) {
192         if (line.length() == 0) {
193             continue;
194         }
195         if (line.length() >= 2 && line[0] == '/' && line[1] == '/') {
196             continue;
197         }
198         std::istringstream split(line);
199         std::string name, delimiter, pattern;
200         if (split >> name >> delimiter >> pattern) {
201             SkASSERT(split.eof());
202             SkASSERT(name != "");
203             SkASSERT(delimiter == "=");
204             SkASSERT(pattern != "");
205             tokens.push_back(name);
206             if (pattern[0] == '"') {
207                 SkASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"');
208                 RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]);
209                 for (size_t i = 2; i < pattern.size() - 1; ++i) {
210                     node = RegexNode(RegexNode::kConcat_Kind, node,
211                                      RegexNode(RegexNode::kChar_Kind, pattern[i]));
212                 }
213                 nfa.addRegex(node);
214             }
215             else {
216                 nfa.addRegex(RegexParser().parse(pattern));
217             }
218         }
219     }
220     NFAtoDFA converter(&nfa);
221     DFA dfa = converter.convert();
222     writeH(dfa, lexer, token, tokens, hPath);
223     writeCPP(dfa, lexer, token, (std::string("src/sksl/SkSL") + lexer + ".h").c_str(), cppPath);
224 }
225 
main(int argc,const char ** argv)226 int main(int argc, const char** argv) {
227     if (argc != 6) {
228         printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n");
229         exit(1);
230     }
231     process(argv[1], argv[2], argv[3], argv[4], argv[5]);
232     return 0;
233 }
234