• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2002, 2020 Rene Rivera.
2 ** Distributed under the Boost Software License, Version 1.0.
3 ** (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
4 */
5 
6 #include <cstdio>
7 #include <string>
8 #include <algorithm>
9 #include <cctype>
10 #include <set>
11 #include <cstring>
12 
13 /*
14 # yyacc - yacc wrapper
15 #
16 # Allows tokens to be written as `literal` and then automatically
17 # substituted with #defined tokens.
18 #
19 # Usage:
20 #    yyacc file.y filetab.h file.yy
21 #
22 # inputs:
23 #    file.yy        yacc grammar with ` literals
24 #
25 # outputs:
26 #    file.y        yacc grammar
27 #    filetab.h    array of string <-> token mappings
28 #
29 # 3-13-93
30 #    Documented and p moved in sed command (for some reason,
31 #    s/x/y/p doesn't work).
32 # 10-12-93
33 #    Take basename as second argument.
34 # 12-31-96
35 #    reversed order of args to be compatible with GenFile rule
36 # 11-20-2002
37 #    Reimplemented as a C program for portability. (Rene Rivera)
38 # 05-xx-2020
39 #    Reimplement yet again, in C++. (Rene Rivera)
40 */
41 
42 static const std::string usage[] = {
43     "yyacc <grammar output.y> <token table output.h> <grammar source.yy>"
44     };
45 
print_usage()46 void print_usage()
47 {
48     for (auto u: usage)
49     {
50         std::printf("%s\n", u.c_str());
51     }
52 }
53 
tokenize_string(std::string s)54 std::string tokenize_string(std::string s)
55 {
56     std::string result = s;
57     if (s == ":") result = "_colon";
58     else if (s == "!") result = "_bang";
59     else if (s == "!=") result = "_bang_equals";
60     else if (s == "&&") result = "_amperamper";
61     else if (s == "&") result = "_amper";
62     else if (s == "+") result = "_plus";
63     else if (s == "+=") result = "_plus_equals";
64     else if (s == "||") result = "_barbar";
65     else if (s == "|") result = "_bar";
66     else if (s == ";") result = "_semic";
67     else if (s == "-") result = "_minus";
68     else if (s == "<") result = "_langle";
69     else if (s == "<=") result = "_langle_equals";
70     else if (s == ">") result = "_rangle";
71     else if (s == ">=") result = "_rangle_equals";
72     else if (s == ".") result = "_period";
73     else if (s == "?") result = "_question";
74     else if (s == "?=") result = "_question_equals";
75     else if (s == "=") result = "_equals";
76     else if (s == ",") result = "_comma";
77     else if (s == "[") result = "_lbracket";
78     else if (s == "]") result = "_rbracket";
79     else if (s == "{") result = "_lbrace";
80     else if (s == "}") result = "_rbrace";
81     else if (s == "(") result = "_lparen";
82     else if (s == ")") result = "_rparen";
83 
84     std::transform(
85         result.begin(), result.end(), result.begin(),
86         [](unsigned char c){ return std::toupper(c); });
87     return result+"_t";
88 }
89 
90 struct literal
91 {
92     std::string string;
93     std::string token;
94 
operator <literal95     bool operator<(const literal & x) const
96     {
97         return this->string < x.string;
98     }
99 };
100 
main(int argc,char ** argv)101 int main(int argc, char ** argv)
102 {
103     int result = 0;
104     if (argc != 4)
105     {
106         print_usage();
107         result = 1;
108     }
109     else
110     {
111         FILE * token_output_f = 0;
112         FILE * grammar_output_f = 0;
113         FILE * grammar_source_f = 0;
114 
115         grammar_source_f = fopen(argv[3],"r");
116         if (grammar_source_f == 0) { result = 1; }
117         if (result == 0)
118         {
119             std::set<literal> literals;
120             char l[2048];
121             while (1)
122             {
123                 if (fgets(l,2048,grammar_source_f) != 0)
124                 {
125                     char * c = l;
126                     while (1)
127                     {
128                         char * c1 = std::strchr(c,'`');
129                         if (c1 != 0)
130                         {
131                             char * c2 = std::strchr(c1+1,'`');
132                             if (c2 != 0)
133                             {
134                                 auto l = std::string(c1+1,c2-c1-1);
135                                 literals.insert({ l, tokenize_string(l) });
136                                 c = c2+1;
137                             }
138                             else
139                                 break;
140                         }
141                         else
142                             break;
143                     }
144                 }
145                 else
146                 {
147                     break;
148                 }
149             }
150             token_output_f = std::fopen(argv[2],"w");
151             if (token_output_f != 0)
152             {
153                 for (const literal & l: literals)
154                 {
155                     std::fprintf(token_output_f,"    { \"%s\", %s },\n",l.string.c_str(), l.token.c_str());
156                 }
157                 std::fclose(token_output_f);
158             }
159             else
160                 result = 1;
161             if (result == 0)
162             {
163                 grammar_output_f = std::fopen(argv[1],"w");
164                 if (grammar_output_f != 0)
165                 {
166                     for (const literal & l: literals)
167                     {
168                         fprintf(grammar_output_f,"%%token %s\n",l.token.c_str());
169                     }
170                     rewind(grammar_source_f);
171                     while (1)
172                     {
173                         if (fgets(l,2048,grammar_source_f) != 0)
174                         {
175                             char * c = l;
176                             while (1)
177                             {
178                                 char * c1 = strchr(c,'`');
179                                 if (c1 != 0)
180                                 {
181                                     char * c2 = strchr(c1+1,'`');
182                                     if (c2 != 0)
183                                     {
184                                         auto replacement = literals.find({std::string(c1+1,c2-c1-1), ""});
185                                         *c1 = 0;
186                                         std::fprintf(grammar_output_f,"%s%s",c,replacement->token.c_str());
187                                         c = c2+1;
188                                     }
189                                     else
190                                     {
191                                         std::fprintf(grammar_output_f,"%s",c);
192                                         break;
193                                     }
194                                 }
195                                 else
196                                 {
197                                     std::fprintf(grammar_output_f,"%s",c);
198                                     break;
199                                 }
200                             }
201                         }
202                         else
203                         {
204                             break;
205                         }
206                     }
207                     std::fclose(grammar_output_f);
208                 }
209                 else
210                     result = 1;
211             }
212         }
213         if (result != 0)
214         {
215             perror("yyacc");
216         }
217     }
218     return result;
219 }
220