• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.clearsilver.jsilver.autoescape;
18 
19 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR;
20 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_CSS;
21 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_JS;
22 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_UNQUOTED_JS;
23 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_URI;
24 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_ATTR_URI_START;
25 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_HTML;
26 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_JS;
27 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_JS_UNQUOTED;
28 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_STYLE;
29 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR;
30 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_CSS;
31 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_JS;
32 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS;
33 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_URI;
34 import static com.google.clearsilver.jsilver.autoescape.EscapeMode.ESCAPE_AUTO_UNQUOTED_ATTR_URI_START;
35 import com.google.clearsilver.jsilver.exceptions.JSilverAutoEscapingException;
36 import com.google.streamhtmlparser.ExternalState;
37 import com.google.streamhtmlparser.HtmlParser;
38 import com.google.streamhtmlparser.HtmlParserFactory;
39 import com.google.streamhtmlparser.ParseException;
40 
41 import java.util.HashMap;
42 import java.util.HashSet;
43 import java.util.Map;
44 
45 /**
46  * Encapsulates auto escaping logic.
47  */
48 public class AutoEscapeContext {
49   /**
50    * Map of content-type to corresponding {@code HtmlParser.Mode}, used by {@code setContentType} to
51    * specify the content type of provided input. Valid values and the corresponding mode are: <br>
52    * <table>
53    * <tr>
54    * <td>text/html</td>
55    * <td>HtmlParser.Mode.HTML</td>
56    * </tr>
57    * <tr>
58    * <td>text/plain</td>
59    * <td>HtmlParser.Mode.HTML</td>
60    * </tr>
61    * <tr>
62    * <td>application/javascript</td>
63    * <td>HtmlParser.Mode.JS</td>
64    * </tr>
65    * <tr>
66    * <td>application/json</td>
67    * <td>HtmlParser.Mode.JS</td>
68    * </tr>
69    * <tr>
70    * <td>text/javascript</td>
71    * <td>HtmlParser.Mode.JS</td>
72    * </tr>
73    * <tr>
74    * <td>text/css</td>
75    * <td>HtmlParser.Mode.CSS</td>
76    * </tr>
77    * </table>
78    *
79    * @see #setContentType
80    */
81   public static final Map<String, HtmlParser.Mode> CONTENT_TYPE_LIST;
82 
83   // These options are used to provide extra information to HtmlParserFactory.createParserInMode or
84   // HtmlParserFactory.createParserInAttribute, which is required for certain modes.
85   private static final HashSet<HtmlParserFactory.AttributeOptions> quotedJsAttributeOption;
86   private static final HashSet<HtmlParserFactory.AttributeOptions> partialUrlAttributeOption;
87   private static final HashSet<HtmlParserFactory.ModeOptions> jsModeOption;
88 
89   private HtmlParser htmlParser;
90 
91   static {
92     quotedJsAttributeOption = new HashSet<HtmlParserFactory.AttributeOptions>();
93     quotedJsAttributeOption.add(HtmlParserFactory.AttributeOptions.JS_QUOTED);
94 
95     partialUrlAttributeOption = new HashSet<HtmlParserFactory.AttributeOptions>();
96     partialUrlAttributeOption.add(HtmlParserFactory.AttributeOptions.URL_PARTIAL);
97 
98     jsModeOption = new HashSet<HtmlParserFactory.ModeOptions>();
99     jsModeOption.add(HtmlParserFactory.ModeOptions.JS_QUOTED);
100 
101     CONTENT_TYPE_LIST = new HashMap<String, HtmlParser.Mode>();
102     CONTENT_TYPE_LIST.put("text/html", HtmlParser.Mode.HTML);
103     CONTENT_TYPE_LIST.put("text/plain", HtmlParser.Mode.HTML);
104     CONTENT_TYPE_LIST.put("application/javascript", HtmlParser.Mode.JS);
105     CONTENT_TYPE_LIST.put("application/json", HtmlParser.Mode.JS);
106     CONTENT_TYPE_LIST.put("text/javascript", HtmlParser.Mode.JS);
107     CONTENT_TYPE_LIST.put("text/css", HtmlParser.Mode.CSS);
108   }
109 
110   /**
111    * Name of resource being auto escaped. Will be used in error and display messages.
112    */
113   private String resourceName;
114 
AutoEscapeContext()115   public AutoEscapeContext() {
116     this(EscapeMode.ESCAPE_AUTO, null);
117   }
118 
119   /**
120    * Create a new context in the state represented by mode.
121    *
122    * @param mode EscapeMode object.
123    */
AutoEscapeContext(EscapeMode mode)124   public AutoEscapeContext(EscapeMode mode) {
125     this(mode, null);
126   }
127 
128   /**
129    * Create a new context in the state represented by mode. If a non-null resourceName is provided,
130    * it will be used in displaying error messages.
131    *
132    * @param mode The initial EscapeMode for this context
133    * @param resourceName Name of the resource being auto escaped.
134    */
AutoEscapeContext(EscapeMode mode, String resourceName)135   public AutoEscapeContext(EscapeMode mode, String resourceName) {
136     this.resourceName = resourceName;
137     htmlParser = createHtmlParser(mode);
138   }
139 
140   /**
141    * Create a new context that is a copy of the current state of this context.
142    *
143    * @return New {@code AutoEscapeContext} that is a snapshot of the current state of this context.
144    */
cloneCurrentEscapeContext()145   public AutoEscapeContext cloneCurrentEscapeContext() {
146     AutoEscapeContext autoEscapeContext = new AutoEscapeContext();
147     autoEscapeContext.resourceName = resourceName;
148     autoEscapeContext.htmlParser = HtmlParserFactory.createParser(htmlParser);
149     return autoEscapeContext;
150   }
151 
152   /**
153    * Sets the current position in the resource being auto escaped. Useful for generating detailed
154    * error messages.
155    *
156    * @param line line number.
157    * @param column column number within line.
158    */
setCurrentPosition(int line, int column)159   public void setCurrentPosition(int line, int column) {
160     htmlParser.setLineNumber(line);
161     htmlParser.setColumnNumber(column);
162   }
163 
164   /**
165    * Returns the name of the resource currently being auto escaped.
166    */
getResourceName()167   public String getResourceName() {
168     return resourceName;
169   }
170 
171   /**
172    * Returns the current line number within the resource being auto escaped.
173    */
getLineNumber()174   public int getLineNumber() {
175     return htmlParser.getLineNumber();
176   }
177 
178   /**
179    * Returns the current column number within the resource being auto escaped.
180    */
getColumnNumber()181   public int getColumnNumber() {
182     return htmlParser.getColumnNumber();
183   }
184 
createHtmlParser(EscapeMode mode)185   private HtmlParser createHtmlParser(EscapeMode mode) {
186     switch (mode) {
187       case ESCAPE_AUTO:
188       case ESCAPE_AUTO_HTML:
189         return HtmlParserFactory.createParser();
190 
191       case ESCAPE_AUTO_JS_UNQUOTED:
192         // <script>START HERE
193         return HtmlParserFactory.createParserInMode(HtmlParser.Mode.JS, null);
194 
195       case ESCAPE_AUTO_JS:
196         // <script> var a = 'START HERE
197         return HtmlParserFactory.createParserInMode(HtmlParser.Mode.JS, jsModeOption);
198 
199       case ESCAPE_AUTO_STYLE:
200         // <style>START HERE
201         return HtmlParserFactory.createParserInMode(HtmlParser.Mode.CSS, null);
202 
203       case ESCAPE_AUTO_ATTR:
204         // <input text="START HERE
205         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.REGULAR, true, null);
206 
207       case ESCAPE_AUTO_UNQUOTED_ATTR:
208         // <input text=START HERE
209         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.REGULAR, false, null);
210 
211       case ESCAPE_AUTO_ATTR_URI:
212         // <a href="http://www.google.com/a?START HERE
213         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, true,
214             partialUrlAttributeOption);
215 
216       case ESCAPE_AUTO_UNQUOTED_ATTR_URI:
217         // <a href=http://www.google.com/a?START HERE
218         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, false,
219             partialUrlAttributeOption);
220 
221       case ESCAPE_AUTO_ATTR_URI_START:
222         // <a href="START HERE
223         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, true, null);
224 
225       case ESCAPE_AUTO_UNQUOTED_ATTR_URI_START:
226         // <a href=START HERE
227         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.URI, false, null);
228 
229       case ESCAPE_AUTO_ATTR_JS:
230         // <input onclick="doClick('START HERE
231         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, true,
232             quotedJsAttributeOption);
233 
234       case ESCAPE_AUTO_ATTR_UNQUOTED_JS:
235         // <input onclick="doClick(START HERE
236         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, true, null);
237 
238       case ESCAPE_AUTO_UNQUOTED_ATTR_JS:
239         // <input onclick=doClick('START HERE
240         throw new JSilverAutoEscapingException(
241             "Attempting to start HTML parser in unsupported mode" + mode, resourceName);
242 
243       case ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS:
244         // <input onclick=doClick(START HERE
245         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.JS, false, null);
246 
247       case ESCAPE_AUTO_ATTR_CSS:
248         // <input style="START HERE
249         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.STYLE, true, null);
250 
251       case ESCAPE_AUTO_UNQUOTED_ATTR_CSS:
252         // <input style=START HERE
253         return HtmlParserFactory.createParserInAttribute(HtmlParser.ATTR_TYPE.STYLE, false, null);
254 
255       default:
256         throw new JSilverAutoEscapingException("Attempting to start HTML parser in invalid mode"
257             + mode, resourceName);
258     }
259   }
260 
261   /**
262    * Parse the given data and update internal state accordingly.
263    *
264    * @param data Input to parse, usually the contents of a template.
265    */
parseData(String data)266   public void parseData(String data) {
267     try {
268       htmlParser.parse(data);
269     } catch (ParseException e) {
270       // ParseException displays the proper position, so do not store line and column
271       // number here.
272       throw new JSilverAutoEscapingException("Error in HtmlParser: " + e, resourceName);
273     }
274   }
275 
276   /**
277    * Lets the AutoEscapeContext know that some input was skipped.
278    *
279    * This method will usually be called for variables in the input stream. The AutoEscapeContext is
280    * told that the input stream contained some additional data but does not get to see the data. It
281    * can adjust its internal state accordingly.
282    */
insertText()283   public void insertText() {
284     try {
285       htmlParser.insertText();
286     } catch (ParseException e) {
287       throw new JSilverAutoEscapingException("Error during insertText(): " + e, resourceName,
288           htmlParser.getLineNumber(), htmlParser.getColumnNumber());
289     }
290   }
291 
292   /**
293    * Determines whether an included template that begins in state {@code start} is allowed to end in
294    * state {@code end}. Usually included templates are only allowed to end in the same context they
295    * begin in. This lets auto escaping parse the remainder of the parent template without needing to
296    * know the ending context of the included template. However, there is one exception where auto
297    * escaping will allow a different ending context: if the included template is a URI attribute
298    * value, it is allowed to change context from {@code ATTR_URI_START} to {@code ATTR_URI}. This
299    * does not cause any issues because the including template will call {@code insertText} when it
300    * encounters the include command, and {@code insertText} will cause the HTML parser to switch its
301    * internal state in the same way.
302    */
isPermittedStateChangeForIncludes(AutoEscapeState start, AutoEscapeState end)303   public boolean isPermittedStateChangeForIncludes(AutoEscapeState start, AutoEscapeState end) {
304     return start.equals(end)
305         || (start.equals(AutoEscapeState.ATTR_URI_START) && end.equals(AutoEscapeState.ATTR_URI))
306         || (start.equals(AutoEscapeState.UNQUOTED_ATTR_URI_START) && end
307             .equals(AutoEscapeState.UNQUOTED_ATTR_URI));
308   }
309 
310   /**
311    * Determine the correct escaping to apply for a variable.
312    *
313    * Looks at the current state of the htmlParser, and determines what escaping to apply to a
314    * variable in this state.
315    *
316    * @return Name of escaping function to use in this state.
317    */
getEscapingFunctionForCurrentState()318   public String getEscapingFunctionForCurrentState() {
319     return getCurrentState().getFunctionName();
320   }
321 
322   /**
323    * Returns the EscapeMode which will bring AutoEscapeContext into this state.
324    *
325    * Initializing a new AutoEscapeContext with this EscapeMode will bring it into the state that the
326    * current AutoEscapeContext object is in.
327    *
328    * @return An EscapeMode object.
329    */
getEscapeModeForCurrentState()330   public EscapeMode getEscapeModeForCurrentState() {
331     return getCurrentState().getEscapeMode();
332   }
333 
334   /**
335    * Calls the HtmlParser API to determine current state.
336    *
337    * This function is mostly a wrapper around the HtmlParser API. It gathers all the necessary
338    * information using that API and returns a single enum representing the current state.
339    *
340    * @return AutoEscapeState enum representing the current state.
341    */
getCurrentState()342   public AutoEscapeState getCurrentState() {
343     ExternalState state = htmlParser.getState();
344     String tag = htmlParser.getTag();
345 
346     // Currently we do not do any escaping inside CSS blocks, so ignore them.
347     if (state.equals(HtmlParser.STATE_CSS_FILE) || tag.equals("style")) {
348 
349       return AutoEscapeState.STYLE;
350     }
351 
352     // Handle variables inside <script> tags.
353     if (htmlParser.inJavascript() && !state.equals(HtmlParser.STATE_VALUE)) {
354       if (htmlParser.isJavascriptQuoted()) {
355         // <script> var a = "<?cs var: Blah ?>"; </script>
356         return AutoEscapeState.JS;
357       } else {
358         // <script> var a = <?cs var: Blah ?>; </script>
359         // No quotes around the variable, hence it can inject arbitrary javascript.
360         // So severely restrict the values it may contain.
361         return AutoEscapeState.JS_UNQUOTED;
362       }
363     }
364 
365     // Inside an HTML tag or attribute name
366     if (state.equals(HtmlParser.STATE_ATTR) || state.equals(HtmlParser.STATE_TAG)) {
367       return AutoEscapeState.ATTR;
368       // TODO: Need a strict validation function for tag and attribute names.
369     } else if (state.equals(HtmlParser.STATE_VALUE)) {
370       // Inside an HTML attribute value
371       return getCurrentAttributeState();
372     } else if (state.equals(HtmlParser.STATE_COMMENT) || state.equals(HtmlParser.STATE_TEXT)) {
373       // Default is assumed to be HTML body
374       // <b>Hello <?cs var: UserName ?></b> :
375       return AutoEscapeState.HTML;
376     }
377 
378     throw new JSilverAutoEscapingException("Invalid state received from HtmlParser: "
379         + state.toString(), resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber());
380   }
381 
getCurrentAttributeState()382   private AutoEscapeState getCurrentAttributeState() {
383     HtmlParser.ATTR_TYPE type = htmlParser.getAttributeType();
384     boolean attrQuoted = htmlParser.isAttributeQuoted();
385 
386     switch (type) {
387       case REGULAR:
388         // <input value="<?cs var: Blah ?>"> :
389         if (attrQuoted) {
390           return AutoEscapeState.ATTR;
391         } else {
392           return AutoEscapeState.UNQUOTED_ATTR;
393         }
394 
395       case URI:
396         if (htmlParser.isUrlStart()) {
397           // <a href="<?cs var: X ?>">
398           if (attrQuoted) {
399             return AutoEscapeState.ATTR_URI_START;
400           } else {
401             return AutoEscapeState.UNQUOTED_ATTR_URI_START;
402           }
403         } else {
404           // <a href="http://www.google.com/a?x=<?cs var: X ?>">
405           if (attrQuoted) {
406             // TODO: Html escaping because that is what Clearsilver does right now.
407             // May change this to url escaping soon.
408             return AutoEscapeState.ATTR_URI;
409           } else {
410             return AutoEscapeState.UNQUOTED_ATTR_URI;
411           }
412         }
413 
414       case JS:
415         if (htmlParser.isJavascriptQuoted()) {
416           /*
417            * Note: js_escape() hex encodes all html metacharacters. Therefore it is safe to not do
418            * an HTML escape around this.
419            */
420           if (attrQuoted) {
421             // <input onclick="alert('<?cs var:Blah ?>');">
422             return AutoEscapeState.ATTR_JS;
423           } else {
424             // <input onclick=alert('<?cs var: Blah ?>');>
425             return AutoEscapeState.UNQUOTED_ATTR_JS;
426           }
427         } else {
428           if (attrQuoted) {
429             /* <input onclick="alert(<?cs var:Blah ?>);"> */
430             return AutoEscapeState.ATTR_UNQUOTED_JS;
431           } else {
432 
433             /* <input onclick=alert(<?cs var:Blah ?>);> */
434             return AutoEscapeState.UNQUOTED_ATTR_UNQUOTED_JS;
435           }
436         }
437 
438       case STYLE:
439         // <input style="border:<?cs var: FancyBorder ?>"> :
440         if (attrQuoted) {
441           return AutoEscapeState.ATTR_CSS;
442         } else {
443           return AutoEscapeState.UNQUOTED_ATTR_CSS;
444         }
445 
446       default:
447         throw new JSilverAutoEscapingException("Invalid attribute type in HtmlParser: " + type,
448             resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber());
449     }
450   }
451 
452   /**
453    * Resets the state of the underlying html parser to a state consistent with the {@code
454    * contentType} provided. This method should be used when the starting auto escaping context of a
455    * resource cannot be determined from its contents - for example, a CSS stylesheet or a javascript
456    * source file.
457    *
458    * @param contentType MIME type header representing the content being parsed.
459    * @see #CONTENT_TYPE_LIST
460    */
setContentType(String contentType)461   public void setContentType(String contentType) {
462     HtmlParser.Mode mode = CONTENT_TYPE_LIST.get(contentType);
463     if (mode == null) {
464       throw new JSilverAutoEscapingException("Invalid content type specified: " + contentType,
465           resourceName, htmlParser.getLineNumber(), htmlParser.getColumnNumber());
466 
467     }
468     htmlParser.resetMode(mode);
469   }
470 
471   /**
472    * Enum representing states of the data being parsed.
473    *
474    * This enumeration lists all the states in which autoescaping would have some effect.
475    *
476    */
477   public static enum AutoEscapeState {
478     HTML("html", ESCAPE_AUTO_HTML), JS("js", ESCAPE_AUTO_JS), STYLE("css", ESCAPE_AUTO_STYLE), JS_UNQUOTED(
479         "js_check_number", ESCAPE_AUTO_JS_UNQUOTED), ATTR("html", ESCAPE_AUTO_ATTR), UNQUOTED_ATTR(
480         "html_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR), ATTR_URI("html", ESCAPE_AUTO_ATTR_URI), UNQUOTED_ATTR_URI(
481         "html_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_URI), ATTR_URI_START("url_validate",
482         ESCAPE_AUTO_ATTR_URI_START), UNQUOTED_ATTR_URI_START("url_validate_unquoted",
483         ESCAPE_AUTO_UNQUOTED_ATTR_URI_START), ATTR_JS("js", ESCAPE_AUTO_ATTR_JS), ATTR_UNQUOTED_JS(
484         "js_check_number", ESCAPE_AUTO_ATTR_UNQUOTED_JS), UNQUOTED_ATTR_JS("js_attr_unquoted",
485         ESCAPE_AUTO_UNQUOTED_ATTR_JS), UNQUOTED_ATTR_UNQUOTED_JS("js_check_number",
486         ESCAPE_AUTO_UNQUOTED_ATTR_UNQUOTED_JS), ATTR_CSS("css", ESCAPE_AUTO_ATTR_CSS), UNQUOTED_ATTR_CSS(
487         "css_unquoted", ESCAPE_AUTO_UNQUOTED_ATTR_CSS);
488 
489     private final String functionName;
490     private final EscapeMode escapeMode;
491 
AutoEscapeState(String functionName, EscapeMode mode)492     private AutoEscapeState(String functionName, EscapeMode mode) {
493       this.functionName = functionName;
494       this.escapeMode = mode;
495     }
496 
getFunctionName()497     public String getFunctionName() {
498       return functionName;
499     }
500 
getEscapeMode()501     public EscapeMode getEscapeMode() {
502       return escapeMode;
503     }
504   }
505 }
506