1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.codegen; 29 30 import org.antlr.Tool; 31 import org.stringtemplate.v4.ST; 32 import org.antlr.tool.Grammar; 33 34 import java.io.IOException; 35 import java.util.ArrayList; 36 import java.util.List; 37 38 public class CTarget extends Target { 39 40 List<String> strings = new ArrayList<String>(); 41 42 @Override genRecognizerFile(Tool tool, CodeGenerator generator, Grammar grammar, ST outputFileST)43 protected void genRecognizerFile(Tool tool, 44 CodeGenerator generator, 45 Grammar grammar, 46 ST outputFileST) 47 throws IOException { 48 49 // Before we write this, and cause it to generate its string, 50 // we need to add all the string literals that we are going to match 51 // 52 outputFileST.add("literals", strings); 53 String fileName = generator.getRecognizerFileName(grammar.name, grammar.type); 54 generator.write(outputFileST, fileName); 55 } 56 57 @Override genRecognizerHeaderFile(Tool tool, CodeGenerator generator, Grammar grammar, ST headerFileST, String extName)58 protected void genRecognizerHeaderFile(Tool tool, 59 CodeGenerator generator, 60 Grammar grammar, 61 ST headerFileST, 62 String extName) 63 throws IOException { 64 // Pick up the file name we are generating. This method will return a 65 // a file suffixed with .c, so we must substring and add the extName 66 // to it as we cannot assign into strings in Java. 67 /// 68 String fileName = generator.getRecognizerFileName(grammar.name, grammar.type); 69 fileName = fileName.substring(0, fileName.length() - 2) + extName; 70 71 generator.write(headerFileST, fileName); 72 } 73 chooseWhereCyclicDFAsGo(Tool tool, CodeGenerator generator, Grammar grammar, ST recognizerST, ST cyclicDFAST)74 protected ST chooseWhereCyclicDFAsGo(Tool tool, 75 CodeGenerator generator, 76 Grammar grammar, 77 ST recognizerST, 78 ST cyclicDFAST) { 79 return recognizerST; 80 } 81 82 /** Is scope in @scope::name {action} valid for this kind of grammar? 83 * Targets like C++ may want to allow new scopes like headerfile or 84 * some such. The action names themselves are not policed at the 85 * moment so targets can add template actions w/o having to recompile 86 * ANTLR. 87 */ 88 @Override isValidActionScope(int grammarType, String scope)89 public boolean isValidActionScope(int grammarType, String scope) { 90 switch (grammarType) { 91 case Grammar.LEXER: 92 if (scope.equals("lexer")) { 93 return true; 94 } 95 if (scope.equals("header")) { 96 return true; 97 } 98 if (scope.equals("includes")) { 99 return true; 100 } 101 if (scope.equals("preincludes")) { 102 return true; 103 } 104 if (scope.equals("overrides")) { 105 return true; 106 } 107 break; 108 case Grammar.PARSER: 109 if (scope.equals("parser")) { 110 return true; 111 } 112 if (scope.equals("header")) { 113 return true; 114 } 115 if (scope.equals("includes")) { 116 return true; 117 } 118 if (scope.equals("preincludes")) { 119 return true; 120 } 121 if (scope.equals("overrides")) { 122 return true; 123 } 124 break; 125 case Grammar.COMBINED: 126 if (scope.equals("parser")) { 127 return true; 128 } 129 if (scope.equals("lexer")) { 130 return true; 131 } 132 if (scope.equals("header")) { 133 return true; 134 } 135 if (scope.equals("includes")) { 136 return true; 137 } 138 if (scope.equals("preincludes")) { 139 return true; 140 } 141 if (scope.equals("overrides")) { 142 return true; 143 } 144 break; 145 case Grammar.TREE_PARSER: 146 if (scope.equals("treeparser")) { 147 return true; 148 } 149 if (scope.equals("header")) { 150 return true; 151 } 152 if (scope.equals("includes")) { 153 return true; 154 } 155 if (scope.equals("preincludes")) { 156 return true; 157 } 158 if (scope.equals("overrides")) { 159 return true; 160 } 161 break; 162 } 163 return false; 164 } 165 166 @Override getTargetCharLiteralFromANTLRCharLiteral( CodeGenerator generator, String literal)167 public String getTargetCharLiteralFromANTLRCharLiteral( 168 CodeGenerator generator, 169 String literal) { 170 171 if (literal.startsWith("'\\u")) { 172 literal = "0x" + literal.substring(3, 7); 173 } else { 174 int c = literal.charAt(1); 175 176 if (c < 32 || c > 127) { 177 literal = "0x" + Integer.toHexString(c); 178 } 179 } 180 181 return literal; 182 } 183 184 /** Convert from an ANTLR string literal found in a grammar file to 185 * an equivalent string literal in the C target. 186 * Because we must support Unicode character sets and have chosen 187 * to have the lexer match UTF32 characters, then we must encode 188 * string matches to use 32 bit character arrays. Here then we 189 * must produce the C array and cater for the case where the 190 * lexer has been encoded with a string such as 'xyz\n', 191 */ 192 @Override getTargetStringLiteralFromANTLRStringLiteral( CodeGenerator generator, String literal)193 public String getTargetStringLiteralFromANTLRStringLiteral( 194 CodeGenerator generator, 195 String literal) { 196 int index; 197 String bytes; 198 StringBuilder buf = new StringBuilder(); 199 200 buf.append("{ "); 201 202 // We need ot lose any escaped characters of the form \x and just 203 // replace them with their actual values as well as lose the surrounding 204 // quote marks. 205 // 206 for (int i = 1; i < literal.length() - 1; i++) { 207 buf.append("0x"); 208 209 if (literal.charAt(i) == '\\') { 210 i++; // Assume that there is a next character, this will just yield 211 // invalid strings if not, which is what the input would be of course - invalid 212 switch (literal.charAt(i)) { 213 case 'u': 214 case 'U': 215 buf.append(literal.substring(i + 1, i + 5)); // Already a hex string 216 i = i + 5; // Move to next string/char/escape 217 break; 218 219 case 'n': 220 case 'N': 221 222 buf.append("0A"); 223 break; 224 225 case 'r': 226 case 'R': 227 228 buf.append("0D"); 229 break; 230 231 case 't': 232 case 'T': 233 234 buf.append("09"); 235 break; 236 237 case 'b': 238 case 'B': 239 240 buf.append("08"); 241 break; 242 243 case 'f': 244 case 'F': 245 246 buf.append("0C"); 247 break; 248 249 default: 250 251 // Anything else is what it is! 252 // 253 buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase()); 254 break; 255 } 256 } else { 257 buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase()); 258 } 259 buf.append(", "); 260 } 261 buf.append(" ANTLR3_STRING_TERMINATOR}"); 262 263 bytes = buf.toString(); 264 index = strings.indexOf(bytes); 265 266 if (index == -1) { 267 strings.add(bytes); 268 index = strings.indexOf(bytes); 269 } 270 271 String strref = "lit_" + String.valueOf(index + 1); 272 273 return strref; 274 } 275 276 /** 277 * Overrides the standard grammar analysis so we can prepare the analyser 278 * a little differently from the other targets. 279 * 280 * In particular we want to influence the way the code generator makes assumptions about 281 * switchs vs ifs, vs table driven DFAs. In general, C code should be generated that 282 * has the minimum use of tables, and tha meximum use of large switch statements. This 283 * allows the optimizers to generate very efficient code, it can reduce object code size 284 * by about 30% and give about a 20% performance improvement over not doing this. Hence, 285 * for the C target only, we change the defaults here, but only if they are still set to the 286 * defaults. 287 * 288 * @param generator An instance of the generic code generator class. 289 * @param grammar The grammar that we are currently analyzing 290 */ 291 @Override performGrammarAnalysis(CodeGenerator generator, Grammar grammar)292 protected void performGrammarAnalysis(CodeGenerator generator, Grammar grammar) { 293 294 // Check to see if the maximum inline DFA states is still set to 295 // the default size. If it is then whack it all the way up to the maximum that 296 // we can sensibly get away with. 297 // 298 if (CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE == CodeGenerator.MADSI_DEFAULT ) { 299 300 CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE = 65535; 301 } 302 303 // Check to see if the maximum switch size is still set to the default 304 // and bring it up much higher if it is. Modern C compilers can handle 305 // much bigger switch statements than say Java can and if anyone finds a compiler 306 // that cannot deal with such big switches, all the need do is generate the 307 // code with a reduced -Xmaxswitchcaselabels nnn 308 // 309 if (CodeGenerator.MAX_SWITCH_CASE_LABELS == CodeGenerator.MSCL_DEFAULT) { 310 311 CodeGenerator.MAX_SWITCH_CASE_LABELS = 3000; 312 } 313 314 // Check to see if the number of transitions considered a miminum for using 315 // a switch is still at the default. Because a switch is still generally faster than 316 // an if even with small sets, and given that the optimizer will do the best thing with it 317 // anyway, then we simply want to generate a switch for any number of states. 318 // 319 if (CodeGenerator.MIN_SWITCH_ALTS == CodeGenerator.MSA_DEFAULT) { 320 321 CodeGenerator.MIN_SWITCH_ALTS = 1; 322 } 323 324 // Now we allow the superclass implementation to do whatever it feels it 325 // must do. 326 // 327 super.performGrammarAnalysis(generator, grammar); 328 } 329 } 330 331