/* * Copyright 2017 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include "src/sksl/lex/NFAtoDFA.h" #include "src/sksl/lex/RegexParser.h" #include "src/sksl/lex/TransitionTable.h" #include #include #include /** * Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex * file is a text file with one token definition per line. Each line is of the form: * = * where is either a regular expression (e.g [0-9]) or a double-quoted literal string. */ static constexpr const char* HEADER = "/*\n" " * Copyright 2017 Google Inc.\n" " *\n" " * Use of this source code is governed by a BSD-style license that can be\n" " * found in the LICENSE file.\n" " */\n" "/*****************************************************************************************\n" " ******************** This file was generated by sksllex. Do not edit. *******************\n" " *****************************************************************************************/\n"; static void writeH(const DFA& dfa, const char* lexer, const char* token, const std::vector& tokens, const char* hPath) { std::ofstream out(hPath); SkASSERT(out.good()); out << HEADER; out << "#ifndef SKSL_" << lexer << "\n"; out << "#define SKSL_" << lexer << "\n"; out << "#include \"include/core/SkStringView.h\"\n"; out << "#include \n"; out << "#include \n"; out << "namespace SkSL {\n"; out << "\n"; out << "struct " << token << " {\n"; out << " enum class Kind {\n"; for (const std::string& t : tokens) { out << " TK_" << t << ",\n"; } out << " TK_NONE,"; out << R"( }; )" << token << "() {}"; out << token << R"((Kind kind, int32_t offset, int32_t length, int32_t line) : fKind(kind) , fOffset(offset) , fLength(length) , fLine(line) {} Kind fKind = Kind::TK_NONE; int32_t fOffset = -1; int32_t fLength = -1; int32_t fLine = -1; }; class )" << lexer << R"( { public: void start(skstd::string_view text) { fText = text; fOffset = 0; fLine = 1; } )" << token << R"( next(); struct Checkpoint { int32_t fOffset; int32_t fLine; }; Checkpoint getCheckpoint() const { return {fOffset, fLine}; } void rewindToCheckpoint(Checkpoint checkpoint) { fOffset = checkpoint.fOffset; fLine = checkpoint.fLine; } private: skstd::string_view fText; int32_t fOffset; int32_t fLine; }; } // namespace #endif )"; } static void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include, const char* cppPath) { std::ofstream out(cppPath); SkASSERT(out.good()); out << HEADER; out << "#include \"" << include << "\"\n"; out << "\n"; out << "namespace SkSL {\n"; out << "\n"; size_t states = 0; for (const auto& row : dfa.fTransitions) { states = std::max(states, row.size()); } out << "using State = " << (states <= 256 ? "uint8_t" : "uint16_t") << ";\n"; // arbitrarily-chosen character which is greater than START_CHAR and should not appear in actual // input out << "static const uint8_t INVALID_CHAR = 18;"; out << "static const int8_t kMappings[" << dfa.fCharMappings.size() << "] = {\n "; const char* separator = ""; for (int m : dfa.fCharMappings) { out << separator << std::to_string(m); separator = ", "; } out << "\n};\n"; WriteTransitionTable(out, dfa, states); out << "static const int8_t kAccepts[" << states << "] = {"; for (size_t i = 0; i < states; ++i) { if (i < dfa.fAccepts.size()) { out << " " << dfa.fAccepts[i] << ","; } else { out << " " << INVALID << ","; } } out << " };\n"; out << "\n"; out << token << " " << lexer << "::next() {"; out << R"( // note that we cheat here: normally a lexer needs to worry about the case // where a token has a prefix which is not itself a valid token - for instance, // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid // tokens. Our grammar doesn't have this property, so we can simplify the logic // a bit. int32_t startOffset = fOffset; if (startOffset == (int32_t)fText.length()) { return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0, fLine); } State state = 1; for (;;) { if (fOffset >= (int32_t)fText.length()) { if (kAccepts[state] == -1) { return Token(Token::Kind::TK_END_OF_FILE, startOffset, 0, fLine); } break; } uint8_t c = (uint8_t) fText[fOffset]; if (c <= 8 || c >= )" << dfa.fCharMappings.size() << R"() { c = INVALID_CHAR; } State newState = get_transition(kMappings[c], state); if (!newState) { break; } state = newState; ++fOffset; if (c == '\n') { ++fLine; } } Token::Kind kind = ()" << token << R"(::Kind) kAccepts[state]; return )" << token << R"((kind, startOffset, fOffset - startOffset, fLine); } } // namespace )"; } static void process(const char* inPath, const char* lexer, const char* token, const char* hPath, const char* cppPath) { NFA nfa; std::vector tokens; tokens.push_back("END_OF_FILE"); std::string line; std::ifstream in(inPath); while (std::getline(in, line)) { if (line.length() == 0) { continue; } if (line.length() >= 2 && line[0] == '/' && line[1] == '/') { continue; } std::istringstream split(line); std::string name, delimiter, pattern; if (split >> name >> delimiter >> pattern) { SkASSERT(split.eof()); SkASSERT(name != ""); SkASSERT(delimiter == "="); SkASSERT(pattern != ""); tokens.push_back(name); if (pattern[0] == '"') { SkASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"'); RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]); for (size_t i = 2; i < pattern.size() - 1; ++i) { node = RegexNode(RegexNode::kConcat_Kind, node, RegexNode(RegexNode::kChar_Kind, pattern[i])); } nfa.addRegex(node); } else { nfa.addRegex(RegexParser().parse(pattern)); } } } NFAtoDFA converter(&nfa); DFA dfa = converter.convert(); writeH(dfa, lexer, token, tokens, hPath); writeCPP(dfa, lexer, token, (std::string("src/sksl/SkSL") + lexer + ".h").c_str(), cppPath); } int main(int argc, const char** argv) { if (argc != 6) { printf("usage: sksllex \n"); exit(1); } process(argv[1], argv[2], argv[3], argv[4], argv[5]); return 0; }