• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.codegen;
29 
30 import org.antlr.Tool;
31 import org.stringtemplate.v4.ST;
32 import org.antlr.tool.Grammar;
33 
34 import java.io.IOException;
35 import java.util.ArrayList;
36 
37 public class CTarget extends Target {
38 
39     ArrayList strings = new ArrayList();
40 
41     @Override
genRecognizerFile(Tool tool, CodeGenerator generator, Grammar grammar, ST outputFileST)42     protected void genRecognizerFile(Tool tool,
43             CodeGenerator generator,
44             Grammar grammar,
45             ST outputFileST)
46             throws IOException {
47 
48         // Before we write this, and cause it to generate its string,
49         // we need to add all the string literals that we are going to match
50         //
51         outputFileST.add("literals", strings);
52         String fileName = generator.getRecognizerFileName(grammar.name, grammar.type);
53         generator.write(outputFileST, fileName);
54     }
55 
56     @Override
genRecognizerHeaderFile(Tool tool, CodeGenerator generator, Grammar grammar, ST headerFileST, String extName)57     protected void genRecognizerHeaderFile(Tool tool,
58             CodeGenerator generator,
59             Grammar grammar,
60             ST headerFileST,
61             String extName)
62             throws IOException {
63         // Pick up the file name we are generating. This method will return a
64         // a file suffixed with .c, so we must substring and add the extName
65         // to it as we cannot assign into strings in Java.
66         ///
67         String fileName = generator.getRecognizerFileName(grammar.name, grammar.type);
68         fileName = fileName.substring(0, fileName.length() - 2) + extName;
69 
70         generator.write(headerFileST, fileName);
71     }
72 
chooseWhereCyclicDFAsGo(Tool tool, CodeGenerator generator, Grammar grammar, ST recognizerST, ST cyclicDFAST)73     protected ST chooseWhereCyclicDFAsGo(Tool tool,
74             CodeGenerator generator,
75             Grammar grammar,
76             ST recognizerST,
77             ST cyclicDFAST) {
78         return recognizerST;
79     }
80 
81     /** Is scope in @scope::name {action} valid for this kind of grammar?
82      *  Targets like C++ may want to allow new scopes like headerfile or
83      *  some such.  The action names themselves are not policed at the
84      *  moment so targets can add template actions w/o having to recompile
85      *  ANTLR.
86      */
87     @Override
isValidActionScope(int grammarType, String scope)88     public boolean isValidActionScope(int grammarType, String scope) {
89         switch (grammarType) {
90             case Grammar.LEXER:
91                 if (scope.equals("lexer")) {
92                     return true;
93                 }
94                 if (scope.equals("header")) {
95                     return true;
96                 }
97                 if (scope.equals("includes")) {
98                     return true;
99                 }
100                 if (scope.equals("preincludes")) {
101                     return true;
102                 }
103                 if (scope.equals("overrides")) {
104                     return true;
105                 }
106                 break;
107             case Grammar.PARSER:
108                 if (scope.equals("parser")) {
109                     return true;
110                 }
111                 if (scope.equals("header")) {
112                     return true;
113                 }
114                 if (scope.equals("includes")) {
115                     return true;
116                 }
117                 if (scope.equals("preincludes")) {
118                     return true;
119                 }
120                 if (scope.equals("overrides")) {
121                     return true;
122                 }
123                 break;
124             case Grammar.COMBINED:
125                 if (scope.equals("parser")) {
126                     return true;
127                 }
128                 if (scope.equals("lexer")) {
129                     return true;
130                 }
131                 if (scope.equals("header")) {
132                     return true;
133                 }
134                 if (scope.equals("includes")) {
135                     return true;
136                 }
137                 if (scope.equals("preincludes")) {
138                     return true;
139                 }
140                 if (scope.equals("overrides")) {
141                     return true;
142                 }
143                 break;
144             case Grammar.TREE_PARSER:
145                 if (scope.equals("treeparser")) {
146                     return true;
147                 }
148                 if (scope.equals("header")) {
149                     return true;
150                 }
151                 if (scope.equals("includes")) {
152                     return true;
153                 }
154                 if (scope.equals("preincludes")) {
155                     return true;
156                 }
157                 if (scope.equals("overrides")) {
158                     return true;
159                 }
160                 break;
161         }
162         return false;
163     }
164 
165     @Override
getTargetCharLiteralFromANTLRCharLiteral( CodeGenerator generator, String literal)166     public String getTargetCharLiteralFromANTLRCharLiteral(
167             CodeGenerator generator,
168             String literal) {
169 
170         if (literal.startsWith("'\\u")) {
171             literal = "0x" + literal.substring(3, 7);
172         } else {
173             int c = literal.charAt(1);
174 
175             if (c < 32 || c > 127) {
176                 literal = "0x" + Integer.toHexString(c);
177             }
178         }
179 
180         return literal;
181     }
182 
183     /** Convert from an ANTLR string literal found in a grammar file to
184      *  an equivalent string literal in the C target.
185      *  Because we must support Unicode character sets and have chosen
186      *  to have the lexer match UTF32 characters, then we must encode
187      *  string matches to use 32 bit character arrays. Here then we
188      *  must produce the C array and cater for the case where the
189      *  lexer has been encoded with a string such as 'xyz\n',
190      */
191     @Override
getTargetStringLiteralFromANTLRStringLiteral( CodeGenerator generator, String literal)192     public String getTargetStringLiteralFromANTLRStringLiteral(
193             CodeGenerator generator,
194             String literal) {
195         int index;
196         String bytes;
197         StringBuffer buf = new StringBuffer();
198 
199         buf.append("{ ");
200 
201         // We need ot lose any escaped characters of the form \x and just
202         // replace them with their actual values as well as lose the surrounding
203         // quote marks.
204         //
205         for (int i = 1; i < literal.length() - 1; i++) {
206             buf.append("0x");
207 
208             if (literal.charAt(i) == '\\') {
209                 i++; // Assume that there is a next character, this will just yield
210                 // invalid strings if not, which is what the input would be of course - invalid
211                 switch (literal.charAt(i)) {
212                     case 'u':
213                     case 'U':
214                         buf.append(literal.substring(i + 1, i + 5));  // Already a hex string
215                         i = i + 5;                                // Move to next string/char/escape
216                         break;
217 
218                     case 'n':
219                     case 'N':
220 
221                         buf.append("0A");
222                         break;
223 
224                     case 'r':
225                     case 'R':
226 
227                         buf.append("0D");
228                         break;
229 
230                     case 't':
231                     case 'T':
232 
233                         buf.append("09");
234                         break;
235 
236                     case 'b':
237                     case 'B':
238 
239                         buf.append("08");
240                         break;
241 
242                     case 'f':
243                     case 'F':
244 
245                         buf.append("0C");
246                         break;
247 
248                     default:
249 
250                         // Anything else is what it is!
251                         //
252                         buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase());
253                         break;
254                 }
255             } else {
256                 buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase());
257             }
258             buf.append(", ");
259         }
260         buf.append(" ANTLR3_STRING_TERMINATOR}");
261 
262         bytes = buf.toString();
263         index = strings.indexOf(bytes);
264 
265         if (index == -1) {
266             strings.add(bytes);
267             index = strings.indexOf(bytes);
268         }
269 
270         String strref = "lit_" + String.valueOf(index + 1);
271 
272         return strref;
273     }
274 
275     /**
276      * Overrides the standard grammar analysis so we can prepare the analyser
277      * a little differently from the other targets.
278      *
279      * In particular we want to influence the way the code generator makes assumptions about
280      * switchs vs ifs, vs table driven DFAs. In general, C code should be generated that
281      * has the minimum use of tables, and tha meximum use of large switch statements. This
282      * allows the optimizers to generate very efficient code, it can reduce object code size
283      * by about 30% and give about a 20% performance improvement over not doing this. Hence,
284      * for the C target only, we change the defaults here, but only if they are still set to the
285      * defaults.
286      *
287      * @param generator An instance of the generic code generator class.
288      * @param grammar The grammar that we are currently analyzing
289      */
290     @Override
performGrammarAnalysis(CodeGenerator generator, Grammar grammar)291     protected void performGrammarAnalysis(CodeGenerator generator, Grammar grammar) {
292 
293         // Check to see if the maximum inline DFA states is still set to
294         // the default size. If it is then whack it all the way up to the maximum that
295         // we can sensibly get away with.
296         //
297         if (CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE == CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE ) {
298 
299             CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE = 65535;
300         }
301 
302         // Check to see if the maximum switch size is still set to the default
303         // and bring it up much higher if it is. Modern C compilers can handle
304         // much bigger switch statements than say Java can and if anyone finds a compiler
305         // that cannot deal with such big switches, all the need do is generate the
306         // code with a reduced -Xmaxswitchcaselabels nnn
307         //
308         if  (CodeGenerator.MAX_SWITCH_CASE_LABELS == CodeGenerator.MSCL_DEFAULT) {
309 
310             CodeGenerator.MAX_SWITCH_CASE_LABELS = 3000;
311         }
312 
313         // Check to see if the number of transitions considered a miminum for using
314         // a switch is still at the default. Because a switch is still generally faster than
315         // an if even with small sets, and given that the optimizer will do the best thing with it
316         // anyway, then we simply want to generate a switch for any number of states.
317         //
318         if (CodeGenerator.MIN_SWITCH_ALTS == CodeGenerator.MSA_DEFAULT) {
319 
320             CodeGenerator.MIN_SWITCH_ALTS = 1;
321         }
322 
323         // Now we allow the superclass implementation to do whatever it feels it
324         // must do.
325         //
326         super.performGrammarAnalysis(generator, grammar);
327     }
328 }
329 
330