1 /*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/sksl/lex/NFAtoDFA.h"
9 #include "src/sksl/lex/RegexParser.h"
10 #include "src/sksl/lex/TransitionTable.h"
11
12 #include <fstream>
13 #include <sstream>
14 #include <string>
15
16 /**
17 * Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex
18 * file is a text file with one token definition per line. Each line is of the form:
19 * <TOKEN_NAME> = <pattern>
20 * where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string.
21 */
22
23 static constexpr const char* HEADER =
24 "/*\n"
25 " * Copyright 2017 Google Inc.\n"
26 " *\n"
27 " * Use of this source code is governed by a BSD-style license that can be\n"
28 " * found in the LICENSE file.\n"
29 " */\n"
30 "/*****************************************************************************************\n"
31 " ******************** This file was generated by sksllex. Do not edit. *******************\n"
32 " *****************************************************************************************/\n";
33
writeH(const DFA & dfa,const char * lexer,const char * token,const std::vector<std::string> & tokens,const char * hPath)34 static void writeH(const DFA& dfa, const char* lexer, const char* token,
35 const std::vector<std::string>& tokens, const char* hPath) {
36 std::ofstream out(hPath);
37 SkASSERT(out.good());
38 out << HEADER;
39 out << "#ifndef SKSL_" << lexer << "\n";
40 out << "#define SKSL_" << lexer << "\n";
41 out << "#include \"include/core/SkStringView.h\"\n";
42 out << "#include <cstddef>\n";
43 out << "#include <cstdint>\n";
44 out << "namespace SkSL {\n";
45 out << "\n";
46 out << "struct " << token << " {\n";
47 out << " enum class Kind {\n";
48 for (const std::string& t : tokens) {
49 out << " TK_" << t << ",\n";
50 }
51 out << " TK_NONE,";
52 out << R"(
53 };
54
55 )" << token << "() {}";
56
57 out << token << R"((Kind kind, int32_t offset, int32_t length, int32_t line)
58 : fKind(kind)
59 , fOffset(offset)
60 , fLength(length)
61 , fLine(line) {}
62
63 Kind fKind = Kind::TK_NONE;
64 int32_t fOffset = -1;
65 int32_t fLength = -1;
66 int32_t fLine = -1;
67 };
68
69 class )" << lexer << R"( {
70 public:
71 void start(skstd::string_view text) {
72 fText = text;
73 fOffset = 0;
74 fLine = 1;
75 }
76
77 )" << token << R"( next();
78
79 struct Checkpoint {
80 int32_t fOffset;
81 int32_t fLine;
82 };
83
84 Checkpoint getCheckpoint() const {
85 return {fOffset, fLine};
86 }
87
88 void rewindToCheckpoint(Checkpoint checkpoint) {
89 fOffset = checkpoint.fOffset;
90 fLine = checkpoint.fLine;
91 }
92
93 private:
94 skstd::string_view fText;
95 int32_t fOffset;
96 int32_t fLine;
97 };
98
99 } // namespace
100 #endif
101 )";
102 }
103
writeCPP(const DFA & dfa,const char * lexer,const char * token,const char * include,const char * cppPath)104 static void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include,
105 const char* cppPath) {
106 std::ofstream out(cppPath);
107 SkASSERT(out.good());
108 out << HEADER;
109 out << "#include \"" << include << "\"\n";
110 out << "\n";
111 out << "namespace SkSL {\n";
112 out << "\n";
113
114 size_t states = 0;
115 for (const auto& row : dfa.fTransitions) {
116 states = std::max(states, row.size());
117 }
118 out << "using State = " << (states <= 256 ? "uint8_t" : "uint16_t") << ";\n";
119 // arbitrarily-chosen character which is greater than START_CHAR and should not appear in actual
120 // input
121 out << "static const uint8_t INVALID_CHAR = 18;";
122 out << "static const int8_t kMappings[" << dfa.fCharMappings.size() << "] = {\n ";
123 const char* separator = "";
124 for (int m : dfa.fCharMappings) {
125 out << separator << std::to_string(m);
126 separator = ", ";
127 }
128 out << "\n};\n";
129
130 WriteTransitionTable(out, dfa, states);
131
132 out << "static const int8_t kAccepts[" << states << "] = {";
133 for (size_t i = 0; i < states; ++i) {
134 if (i < dfa.fAccepts.size()) {
135 out << " " << dfa.fAccepts[i] << ",";
136 } else {
137 out << " " << INVALID << ",";
138 }
139 }
140 out << " };\n";
141 out << "\n";
142
143 out << token << " " << lexer << "::next() {";
144 out << R"(
145 // note that we cheat here: normally a lexer needs to worry about the case
146 // where a token has a prefix which is not itself a valid token - for instance,
147 // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid
148 // tokens. Our grammar doesn't have this property, so we can simplify the logic
149 // a bit.
150 int32_t startOffset = fOffset;
151 if (startOffset == (int32_t)fText.length()) {
152 return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0, fLine);
153 }
154 State state = 1;
155 for (;;) {
156 if (fOffset >= (int32_t)fText.length()) {
157 if (kAccepts[state] == -1) {
158 return Token(Token::Kind::TK_END_OF_FILE, startOffset, 0, fLine);
159 }
160 break;
161 }
162 uint8_t c = (uint8_t) fText[fOffset];
163 if (c <= 8 || c >= )" << dfa.fCharMappings.size() << R"() {
164 c = INVALID_CHAR;
165 }
166 State newState = get_transition(kMappings[c], state);
167 if (!newState) {
168 break;
169 }
170 state = newState;
171 ++fOffset;
172 if (c == '\n') {
173 ++fLine;
174 }
175 }
176 Token::Kind kind = ()" << token << R"(::Kind) kAccepts[state];
177 return )" << token << R"((kind, startOffset, fOffset - startOffset, fLine);
178 }
179
180 } // namespace
181 )";
182 }
183
process(const char * inPath,const char * lexer,const char * token,const char * hPath,const char * cppPath)184 static void process(const char* inPath, const char* lexer, const char* token, const char* hPath,
185 const char* cppPath) {
186 NFA nfa;
187 std::vector<std::string> tokens;
188 tokens.push_back("END_OF_FILE");
189 std::string line;
190 std::ifstream in(inPath);
191 while (std::getline(in, line)) {
192 if (line.length() == 0) {
193 continue;
194 }
195 if (line.length() >= 2 && line[0] == '/' && line[1] == '/') {
196 continue;
197 }
198 std::istringstream split(line);
199 std::string name, delimiter, pattern;
200 if (split >> name >> delimiter >> pattern) {
201 SkASSERT(split.eof());
202 SkASSERT(name != "");
203 SkASSERT(delimiter == "=");
204 SkASSERT(pattern != "");
205 tokens.push_back(name);
206 if (pattern[0] == '"') {
207 SkASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"');
208 RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]);
209 for (size_t i = 2; i < pattern.size() - 1; ++i) {
210 node = RegexNode(RegexNode::kConcat_Kind, node,
211 RegexNode(RegexNode::kChar_Kind, pattern[i]));
212 }
213 nfa.addRegex(node);
214 }
215 else {
216 nfa.addRegex(RegexParser().parse(pattern));
217 }
218 }
219 }
220 NFAtoDFA converter(&nfa);
221 DFA dfa = converter.convert();
222 writeH(dfa, lexer, token, tokens, hPath);
223 writeCPP(dfa, lexer, token, (std::string("src/sksl/SkSL") + lexer + ".h").c_str(), cppPath);
224 }
225
main(int argc,const char ** argv)226 int main(int argc, const char** argv) {
227 if (argc != 6) {
228 printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n");
229 exit(1);
230 }
231 process(argv[1], argv[2], argv[3], argv[4], argv[5]);
232 return 0;
233 }
234