• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.codegen;
29 
30 import org.antlr.Tool;
31 import org.antlr.analysis.Label;
32 import org.antlr.runtime.Token;
33 import org.stringtemplate.v4.ST;
34 import org.antlr.tool.Grammar;
35 
36 import java.io.IOException;
37 import java.util.List;
38 
39 /** The code generator for ANTLR can usually be retargeted just by providing
40  *  a new X.stg file for language X, however, sometimes the files that must
41  *  be generated vary enough that some X-specific functionality is required.
42  *  For example, in C, you must generate header files whereas in Java you do not.
43  *  Other languages may want to keep DFA separate from the main
44  *  generated recognizer file.
45  *
46  *  The notion of a Code Generator target abstracts out the creation
47  *  of the various files.  As new language targets get added to the ANTLR
48  *  system, this target class may have to be altered to handle more
49  *  functionality.  Eventually, just about all language generation issues
50  *  will be expressible in terms of these methods.
51  *
52  *  If org.antlr.codegen.XTarget class exists, it is used else
53  *  Target base class is used.  I am using a superclass rather than an
54  *  interface for this target concept because I can add functionality
55  *  later without breaking previously written targets (extra interface
56  *  methods would force adding dummy functions to all code generator
57  *  target classes).
58  *
59  */
60 public class Target {
61 
62 	/** For pure strings of Java 16-bit unicode char, how can we display
63 	 *  it in the target language as a literal.  Useful for dumping
64 	 *  predicates and such that may refer to chars that need to be escaped
65 	 *  when represented as strings.  Also, templates need to be escaped so
66 	 *  that the target language can hold them as a string.
67 	 *
68 	 *  I have defined (via the constructor) the set of typical escapes,
69 	 *  but your Target subclass is free to alter the translated chars or
70 	 *  add more definitions.  This is nonstatic so each target can have
71 	 *  a different set in memory at same time.
72 	 */
73 	protected String[] targetCharValueEscape = new String[255];
74 
Target()75 	public Target() {
76 		targetCharValueEscape['\n'] = "\\n";
77 		targetCharValueEscape['\r'] = "\\r";
78 		targetCharValueEscape['\t'] = "\\t";
79 		targetCharValueEscape['\b'] = "\\b";
80 		targetCharValueEscape['\f'] = "\\f";
81 		targetCharValueEscape['\\'] = "\\\\";
82 		targetCharValueEscape['\''] = "\\'";
83 		targetCharValueEscape['"'] = "\\\"";
84 	}
85 
genRecognizerFile(Tool tool, CodeGenerator generator, Grammar grammar, ST outputFileST)86 	protected void genRecognizerFile(Tool tool,
87 									 CodeGenerator generator,
88 									 Grammar grammar,
89 									 ST outputFileST)
90 		throws IOException
91 	{
92 		String fileName =
93 			generator.getRecognizerFileName(grammar.name, grammar.type);
94 		generator.write(outputFileST, fileName);
95 	}
96 
genRecognizerHeaderFile(Tool tool, CodeGenerator generator, Grammar grammar, ST headerFileST, String extName)97 	protected void genRecognizerHeaderFile(Tool tool,
98 										   CodeGenerator generator,
99 										   Grammar grammar,
100 										   ST headerFileST,
101 										   String extName) // e.g., ".h"
102 		throws IOException
103 	{
104 		// no header file by default
105 	}
106 
performGrammarAnalysis(CodeGenerator generator, Grammar grammar)107 	protected void performGrammarAnalysis(CodeGenerator generator,
108 										  Grammar grammar)
109 	{
110 		// Build NFAs from the grammar AST
111 		grammar.buildNFA();
112 
113 		// Create the DFA predictors for each decision
114 		grammar.createLookaheadDFAs();
115 	}
116 
117 	/** Is scope in @scope::name {action} valid for this kind of grammar?
118 	 *  Targets like C++ may want to allow new scopes like headerfile or
119 	 *  some such.  The action names themselves are not policed at the
120 	 *  moment so targets can add template actions w/o having to recompile
121 	 *  ANTLR.
122 	 */
isValidActionScope(int grammarType, String scope)123 	public boolean isValidActionScope(int grammarType, String scope) {
124 		switch (grammarType) {
125 			case Grammar.LEXER :
126 				if ( scope.equals("lexer") ) {return true;}
127 				break;
128 			case Grammar.PARSER :
129 				if ( scope.equals("parser") ) {return true;}
130 				break;
131 			case Grammar.COMBINED :
132 				if ( scope.equals("parser") ) {return true;}
133 				if ( scope.equals("lexer") ) {return true;}
134 				break;
135 			case Grammar.TREE_PARSER :
136 				if ( scope.equals("treeparser") ) {return true;}
137 				break;
138 		}
139 		return false;
140 	}
141 
142 	/** Target must be able to override the labels used for token types */
getTokenTypeAsTargetLabel(CodeGenerator generator, int ttype)143 	public String getTokenTypeAsTargetLabel(CodeGenerator generator, int ttype) {
144 		String name = generator.grammar.getTokenDisplayName(ttype);
145 		// If name is a literal, return the token type instead
146 		if ( name.charAt(0)=='\'' ) {
147 			return String.valueOf(ttype);
148 		}
149 		return name;
150 	}
151 
152 	/** Convert from an ANTLR char literal found in a grammar file to
153 	 *  an equivalent char literal in the target language.  For most
154 	 *  languages, this means leaving 'x' as 'x'.  Actually, we need
155 	 *  to escape '\u000A' so that it doesn't get converted to \n by
156 	 *  the compiler.  Convert the literal to the char value and then
157 	 *  to an appropriate target char literal.
158 	 *
159 	 *  Expect single quotes around the incoming literal.
160 	 */
getTargetCharLiteralFromANTLRCharLiteral( CodeGenerator generator, String literal)161 	public String getTargetCharLiteralFromANTLRCharLiteral(
162 		CodeGenerator generator,
163 		String literal)
164 	{
165 		StringBuffer buf = new StringBuffer();
166 		buf.append('\'');
167 		int c = Grammar.getCharValueFromGrammarCharLiteral(literal);
168 		if ( c<Label.MIN_CHAR_VALUE ) {
169 			return "'\u0000'";
170 		}
171 		if ( c<targetCharValueEscape.length &&
172 			 targetCharValueEscape[c]!=null )
173 		{
174 			buf.append(targetCharValueEscape[c]);
175 		}
176 		else if ( Character.UnicodeBlock.of((char)c)==
177 				  Character.UnicodeBlock.BASIC_LATIN &&
178 				  !Character.isISOControl((char)c) )
179 		{
180 			// normal char
181 			buf.append((char)c);
182 		}
183 		else {
184 			// must be something unprintable...use \\uXXXX
185 			// turn on the bit above max "\\uFFFF" value so that we pad with zeros
186 			// then only take last 4 digits
187 			String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
188 			buf.append("\\u");
189 			buf.append(hex);
190 		}
191 
192 		buf.append('\'');
193 		return buf.toString();
194 	}
195 
196 	/** Convert from an ANTLR string literal found in a grammar file to
197 	 *  an equivalent string literal in the target language.  For Java, this
198 	 *  is the translation 'a\n"' -> "a\n\"".  Expect single quotes
199 	 *  around the incoming literal.  Just flip the quotes and replace
200 	 *  double quotes with \"
201      *
202      *  Note that we have decided to allow poeple to use '\"' without
203      *  penalty, so we must build the target string in a loop as Utils.replae
204      *  cannot handle both \" and " without a lot of messing around.
205      *
206 	 */
getTargetStringLiteralFromANTLRStringLiteral( CodeGenerator generator, String literal)207 	public String getTargetStringLiteralFromANTLRStringLiteral(
208 		CodeGenerator generator,
209 		String literal)
210 	{
211         StringBuilder sb = new StringBuilder();
212         StringBuffer is = new StringBuffer(literal);
213 
214         // Opening quote
215         //
216         sb.append('"');
217 
218         for (int i = 1; i < is.length() -1; i++) {
219             if  (is.charAt(i) == '\\') {
220                 // Anything escaped is what it is! We assume that
221                 // people know how to escape characters correctly. However
222                 // we catch anything that does not need an escape in Java (which
223                 // is what the default implementation is dealing with and remove
224                 // the escape. The C target does this for instance.
225                 //
226                 switch (is.charAt(i+1)) {
227                     // Pass through any escapes that Java also needs
228                     //
229                     case    '"':
230                     case    'n':
231                     case    'r':
232                     case    't':
233                     case    'b':
234                     case    'f':
235                     case    '\\':
236                     case    'u':    // Assume unnnn
237                         sb.append('\\');    // Pass the escape through
238                         break;
239                     default:
240                         // Remove the escape by virtue of not adding it here
241                         // Thus \' becomes ' and so on
242                         //
243                         break;
244                 }
245 
246                 // Go past the \ character
247                 //
248                 i++;
249             } else {
250                 // Chracters that don't need \ in ANTLR 'strings' but do in Java
251                 //
252                 if (is.charAt(i) == '"') {
253                     // We need to escape " in Java
254                     //
255                     sb.append('\\');
256                 }
257             }
258             // Add in the next character, which may have been escaped
259             //
260             sb.append(is.charAt(i));
261         }
262 
263         // Append closing " and return
264         //
265         sb.append('"');
266 
267 		return sb.toString();
268 	}
269 
270 	/** Given a random string of Java unicode chars, return a new string with
271 	 *  optionally appropriate quote characters for target language and possibly
272 	 *  with some escaped characters.  For example, if the incoming string has
273 	 *  actual newline characters, the output of this method would convert them
274 	 *  to the two char sequence \n for Java, C, C++, ...  The new string has
275 	 *  double-quotes around it as well.  Example String in memory:
276 	 *
277 	 *     a"[newlinechar]b'c[carriagereturnchar]d[tab]e\f
278 	 *
279 	 *  would be converted to the valid Java s:
280 	 *
281 	 *     "a\"\nb'c\rd\te\\f"
282 	 *
283 	 *  or
284 	 *
285 	 *     a\"\nb'c\rd\te\\f
286 	 *
287 	 *  depending on the quoted arg.
288 	 */
getTargetStringLiteralFromString(String s, boolean quoted)289 	public String getTargetStringLiteralFromString(String s, boolean quoted) {
290 		if ( s==null ) {
291 			return null;
292 		}
293 
294 		StringBuffer buf = new StringBuffer();
295 		if ( quoted ) {
296 			buf.append('"');
297 		}
298 		for (int i=0; i<s.length(); i++) {
299 			int c = s.charAt(i);
300 			if ( c!='\'' && // don't escape single quotes in strings for java
301 				 c<targetCharValueEscape.length &&
302 				 targetCharValueEscape[c]!=null )
303 			{
304 				buf.append(targetCharValueEscape[c]);
305 			}
306 			else {
307 				buf.append((char)c);
308 			}
309 		}
310 		if ( quoted ) {
311 			buf.append('"');
312 		}
313 		return buf.toString();
314 	}
315 
getTargetStringLiteralFromString(String s)316 	public String getTargetStringLiteralFromString(String s) {
317 		return getTargetStringLiteralFromString(s, false);
318 	}
319 
320 	/** Convert long to 0xNNNNNNNNNNNNNNNN by default for spitting out
321 	 *  with bitsets.  I.e., convert bytes to hex string.
322 	 */
getTarget64BitStringFromValue(long word)323 	public String getTarget64BitStringFromValue(long word) {
324 		int numHexDigits = 8*2;
325 		StringBuffer buf = new StringBuffer(numHexDigits+2);
326 		buf.append("0x");
327 		String digits = Long.toHexString(word);
328 		digits = digits.toUpperCase();
329 		int padding = numHexDigits - digits.length();
330 		// pad left with zeros
331 		for (int i=1; i<=padding; i++) {
332 			buf.append('0');
333 		}
334 		buf.append(digits);
335 		return buf.toString();
336 	}
337 
encodeIntAsCharEscape(int v)338 	public String encodeIntAsCharEscape(int v) {
339 		if ( v<=127 ) {
340 			return "\\"+Integer.toOctalString(v);
341 		}
342 		String hex = Integer.toHexString(v|0x10000).substring(1,5);
343 		return "\\u"+hex;
344 	}
345 
346 	/** Some targets only support ASCII or 8-bit chars/strings.  For example,
347 	 *  C++ will probably want to return 0xFF here.
348 	 */
getMaxCharValue(CodeGenerator generator)349 	public int getMaxCharValue(CodeGenerator generator) {
350 		return Label.MAX_CHAR_VALUE;
351 	}
352 
353 	/** Give target a chance to do some postprocessing on actions.
354 	 *  Python for example will have to fix the indention.
355 	 */
postProcessAction(List chunks, Token actionToken)356 	public List postProcessAction(List chunks, Token actionToken) {
357 		return chunks;
358 	}
359 
360 }
361