• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<HTML>
2<BODY BGCOLOR="white">
3<PRE>
4<FONT color="green">001</FONT>    // Copyright (c) 2011, Mike Samuel<a name="line.1"></a>
5<FONT color="green">002</FONT>    // All rights reserved.<a name="line.2"></a>
6<FONT color="green">003</FONT>    //<a name="line.3"></a>
7<FONT color="green">004</FONT>    // Redistribution and use in source and binary forms, with or without<a name="line.4"></a>
8<FONT color="green">005</FONT>    // modification, are permitted provided that the following conditions<a name="line.5"></a>
9<FONT color="green">006</FONT>    // are met:<a name="line.6"></a>
10<FONT color="green">007</FONT>    //<a name="line.7"></a>
11<FONT color="green">008</FONT>    // Redistributions of source code must retain the above copyright<a name="line.8"></a>
12<FONT color="green">009</FONT>    // notice, this list of conditions and the following disclaimer.<a name="line.9"></a>
13<FONT color="green">010</FONT>    // Redistributions in binary form must reproduce the above copyright<a name="line.10"></a>
14<FONT color="green">011</FONT>    // notice, this list of conditions and the following disclaimer in the<a name="line.11"></a>
15<FONT color="green">012</FONT>    // documentation and/or other materials provided with the distribution.<a name="line.12"></a>
16<FONT color="green">013</FONT>    // Neither the name of the OWASP nor the names of its contributors may<a name="line.13"></a>
17<FONT color="green">014</FONT>    // be used to endorse or promote products derived from this software<a name="line.14"></a>
18<FONT color="green">015</FONT>    // without specific prior written permission.<a name="line.15"></a>
19<FONT color="green">016</FONT>    // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS<a name="line.16"></a>
20<FONT color="green">017</FONT>    // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT<a name="line.17"></a>
21<FONT color="green">018</FONT>    // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS<a name="line.18"></a>
22<FONT color="green">019</FONT>    // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE<a name="line.19"></a>
23<FONT color="green">020</FONT>    // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,<a name="line.20"></a>
24<FONT color="green">021</FONT>    // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,<a name="line.21"></a>
25<FONT color="green">022</FONT>    // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;<a name="line.22"></a>
26<FONT color="green">023</FONT>    // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER<a name="line.23"></a>
27<FONT color="green">024</FONT>    // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT<a name="line.24"></a>
28<FONT color="green">025</FONT>    // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN<a name="line.25"></a>
29<FONT color="green">026</FONT>    // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE<a name="line.26"></a>
30<FONT color="green">027</FONT>    // POSSIBILITY OF SUCH DAMAGE.<a name="line.27"></a>
31<FONT color="green">028</FONT>    <a name="line.28"></a>
32<FONT color="green">029</FONT>    package org.owasp.html;<a name="line.29"></a>
33<FONT color="green">030</FONT>    <a name="line.30"></a>
34<FONT color="green">031</FONT>    import com.google.common.collect.ImmutableMap;<a name="line.31"></a>
35<FONT color="green">032</FONT>    <a name="line.32"></a>
36<FONT color="green">033</FONT>    /**<a name="line.33"></a>
37<FONT color="green">034</FONT>     * From section 8.1.2.6 of http://www.whatwg.org/specs/web-apps/current-work/<a name="line.34"></a>
38<FONT color="green">035</FONT>     * &lt;p&gt;<a name="line.35"></a>
39<FONT color="green">036</FONT>     * The text in CDATA and RCDATA elements must not contain any<a name="line.36"></a>
40<FONT color="green">037</FONT>     * occurrences of the string "&lt;/" (U+003C LESS-THAN SIGN, U+002F<a name="line.37"></a>
41<FONT color="green">038</FONT>     * SOLIDUS) followed by characters that case-insensitively match the<a name="line.38"></a>
42<FONT color="green">039</FONT>     * tag name of the element followed by one of U+0009 CHARACTER<a name="line.39"></a>
43<FONT color="green">040</FONT>     * TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C<a name="line.40"></a>
44<FONT color="green">041</FONT>     * FORM FEED (FF), U+0020 SPACE, U+003E GREATER-THAN SIGN (&gt;), or<a name="line.41"></a>
45<FONT color="green">042</FONT>     * U+002F SOLIDUS (/), unless that string is part of an escaping<a name="line.42"></a>
46<FONT color="green">043</FONT>     * text span.<a name="line.43"></a>
47<FONT color="green">044</FONT>     * &lt;/p&gt;<a name="line.44"></a>
48<FONT color="green">045</FONT>     *<a name="line.45"></a>
49<FONT color="green">046</FONT>     * &lt;p&gt;<a name="line.46"></a>
50<FONT color="green">047</FONT>     * See also<a name="line.47"></a>
51<FONT color="green">048</FONT>     * http://www.whatwg.org/specs/web-apps/current-work/#cdata-rcdata-restrictions<a name="line.48"></a>
52<FONT color="green">049</FONT>     * for the elements which fall in each category.<a name="line.49"></a>
53<FONT color="green">050</FONT>     * &lt;/p&gt;<a name="line.50"></a>
54<FONT color="green">051</FONT>     *<a name="line.51"></a>
55<FONT color="green">052</FONT>     * @author Mike Samuel &lt;mikesamuel@gmail.com&gt;<a name="line.52"></a>
56<FONT color="green">053</FONT>     */<a name="line.53"></a>
57<FONT color="green">054</FONT>    public enum HtmlTextEscapingMode {<a name="line.54"></a>
58<FONT color="green">055</FONT>      /**<a name="line.55"></a>
59<FONT color="green">056</FONT>       * Normally escaped character data that breaks around comments and tags.<a name="line.56"></a>
60<FONT color="green">057</FONT>       */<a name="line.57"></a>
61<FONT color="green">058</FONT>      PCDATA,<a name="line.58"></a>
62<FONT color="green">059</FONT>      /**<a name="line.59"></a>
63<FONT color="green">060</FONT>       * A span of text where HTML special characters are interpreted literally,<a name="line.60"></a>
64<FONT color="green">061</FONT>       * as in a SCRIPT tag.<a name="line.61"></a>
65<FONT color="green">062</FONT>       */<a name="line.62"></a>
66<FONT color="green">063</FONT>      CDATA,<a name="line.63"></a>
67<FONT color="green">064</FONT>      /**<a name="line.64"></a>
68<FONT color="green">065</FONT>       * Like {@link #CDATA} but only for certain browsers.<a name="line.65"></a>
69<FONT color="green">066</FONT>       */<a name="line.66"></a>
70<FONT color="green">067</FONT>      CDATA_SOMETIMES,<a name="line.67"></a>
71<FONT color="green">068</FONT>      /**<a name="line.68"></a>
72<FONT color="green">069</FONT>       * A span of text and character entity references where HTML special<a name="line.69"></a>
73<FONT color="green">070</FONT>       * characters are interpreted literally, as in a TITLE tag.<a name="line.70"></a>
74<FONT color="green">071</FONT>       */<a name="line.71"></a>
75<FONT color="green">072</FONT>      RCDATA,<a name="line.72"></a>
76<FONT color="green">073</FONT>      /**<a name="line.73"></a>
77<FONT color="green">074</FONT>       * A span of text where HTML special characters are interpreted literally,<a name="line.74"></a>
78<FONT color="green">075</FONT>       * where there is no end tag.  PLAIN_TEXT runs until the end of the file.<a name="line.75"></a>
79<FONT color="green">076</FONT>       */<a name="line.76"></a>
80<FONT color="green">077</FONT>      PLAIN_TEXT,<a name="line.77"></a>
81<FONT color="green">078</FONT>    <a name="line.78"></a>
82<FONT color="green">079</FONT>      /**<a name="line.79"></a>
83<FONT color="green">080</FONT>       * Cannot contain data.<a name="line.80"></a>
84<FONT color="green">081</FONT>       */<a name="line.81"></a>
85<FONT color="green">082</FONT>      VOID,<a name="line.82"></a>
86<FONT color="green">083</FONT>      ;<a name="line.83"></a>
87<FONT color="green">084</FONT>    <a name="line.84"></a>
88<FONT color="green">085</FONT>      private static final ImmutableMap&lt;String, HtmlTextEscapingMode&gt; ESCAPING_MODES<a name="line.85"></a>
89<FONT color="green">086</FONT>          = ImmutableMap.&lt;String, HtmlTextEscapingMode&gt;builder()<a name="line.86"></a>
90<FONT color="green">087</FONT>          .put("iframe", CDATA)<a name="line.87"></a>
91<FONT color="green">088</FONT>          // HTML5 does not treat listing as CDATA and treats XMP as deprecated,<a name="line.88"></a>
92<FONT color="green">089</FONT>          // but HTML2 does at<a name="line.89"></a>
93<FONT color="green">090</FONT>          // http://www.w3.org/MarkUp/1995-archive/NonStandard.html<a name="line.90"></a>
94<FONT color="green">091</FONT>          // Listing is not supported by browsers.<a name="line.91"></a>
95<FONT color="green">092</FONT>          .put("listing", CDATA_SOMETIMES)<a name="line.92"></a>
96<FONT color="green">093</FONT>          .put("xmp", CDATA)<a name="line.93"></a>
97<FONT color="green">094</FONT>    <a name="line.94"></a>
98<FONT color="green">095</FONT>          // Technically, noembed, noscript and noframes are CDATA_SOMETIMES but<a name="line.95"></a>
99<FONT color="green">096</FONT>          // we can only be hurt by allowing tag content that looks like text so<a name="line.96"></a>
100<FONT color="green">097</FONT>          // we treat them as regular..<a name="line.97"></a>
101<FONT color="green">098</FONT>          //.put("noembed", CDATA_SOMETIMES)<a name="line.98"></a>
102<FONT color="green">099</FONT>          //.put("noframes", CDATA_SOMETIMES)<a name="line.99"></a>
103<FONT color="green">100</FONT>          //.put("noscript", CDATA_SOMETIMES)<a name="line.100"></a>
104<FONT color="green">101</FONT>          .put("comment", CDATA_SOMETIMES)  // IE only<a name="line.101"></a>
105<FONT color="green">102</FONT>    <a name="line.102"></a>
106<FONT color="green">103</FONT>          // Runs till end of file.<a name="line.103"></a>
107<FONT color="green">104</FONT>          .put("plaintext", PLAIN_TEXT)<a name="line.104"></a>
108<FONT color="green">105</FONT>    <a name="line.105"></a>
109<FONT color="green">106</FONT>          .put("script", CDATA)<a name="line.106"></a>
110<FONT color="green">107</FONT>          .put("style", CDATA)<a name="line.107"></a>
111<FONT color="green">108</FONT>    <a name="line.108"></a>
112<FONT color="green">109</FONT>          // Textarea and Title are RCDATA, not CDATA, so decode entity references.<a name="line.109"></a>
113<FONT color="green">110</FONT>          .put("textarea", RCDATA)<a name="line.110"></a>
114<FONT color="green">111</FONT>          .put("title", RCDATA)<a name="line.111"></a>
115<FONT color="green">112</FONT>    <a name="line.112"></a>
116<FONT color="green">113</FONT>          // Nodes that can't contain content.<a name="line.113"></a>
117<FONT color="green">114</FONT>          // http://www.w3.org/TR/html-markup/syntax.html#void-elements<a name="line.114"></a>
118<FONT color="green">115</FONT>          .put("area", VOID)<a name="line.115"></a>
119<FONT color="green">116</FONT>          .put("base", VOID)<a name="line.116"></a>
120<FONT color="green">117</FONT>          .put("br", VOID)<a name="line.117"></a>
121<FONT color="green">118</FONT>          .put("col", VOID)<a name="line.118"></a>
122<FONT color="green">119</FONT>          .put("command", VOID)<a name="line.119"></a>
123<FONT color="green">120</FONT>          .put("embed", VOID)<a name="line.120"></a>
124<FONT color="green">121</FONT>          .put("hr", VOID)<a name="line.121"></a>
125<FONT color="green">122</FONT>          .put("img", VOID)<a name="line.122"></a>
126<FONT color="green">123</FONT>          .put("input", VOID)<a name="line.123"></a>
127<FONT color="green">124</FONT>          .put("keygen", VOID)<a name="line.124"></a>
128<FONT color="green">125</FONT>          .put("link", VOID)<a name="line.125"></a>
129<FONT color="green">126</FONT>          .put("meta", VOID)<a name="line.126"></a>
130<FONT color="green">127</FONT>          .put("param", VOID)<a name="line.127"></a>
131<FONT color="green">128</FONT>          .put("source", VOID)<a name="line.128"></a>
132<FONT color="green">129</FONT>          .put("track", VOID)<a name="line.129"></a>
133<FONT color="green">130</FONT>          .put("wbr", VOID)<a name="line.130"></a>
134<FONT color="green">131</FONT>    <a name="line.131"></a>
135<FONT color="green">132</FONT>           // EMPTY per http://www.w3.org/TR/REC-html32#basefont<a name="line.132"></a>
136<FONT color="green">133</FONT>          .put("basefont", VOID)<a name="line.133"></a>
137<FONT color="green">134</FONT>          .build();<a name="line.134"></a>
138<FONT color="green">135</FONT>    <a name="line.135"></a>
139<FONT color="green">136</FONT>    <a name="line.136"></a>
140<FONT color="green">137</FONT>      /**<a name="line.137"></a>
141<FONT color="green">138</FONT>       * The mode used for content following a start tag with the given name.<a name="line.138"></a>
142<FONT color="green">139</FONT>       */<a name="line.139"></a>
143<FONT color="green">140</FONT>      public static HtmlTextEscapingMode getModeForTag(String canonTagName) {<a name="line.140"></a>
144<FONT color="green">141</FONT>        HtmlTextEscapingMode mode = ESCAPING_MODES.get(canonTagName);<a name="line.141"></a>
145<FONT color="green">142</FONT>        return mode != null ? mode : PCDATA;<a name="line.142"></a>
146<FONT color="green">143</FONT>      }<a name="line.143"></a>
147<FONT color="green">144</FONT>    <a name="line.144"></a>
148<FONT color="green">145</FONT>      /**<a name="line.145"></a>
149<FONT color="green">146</FONT>       * True iff the content following the given tag allows escaping text<a name="line.146"></a>
150<FONT color="green">147</FONT>       * spans: {@code &lt;!--&amp;hellip;--&gt;} that escape even things that might<a name="line.147"></a>
151<FONT color="green">148</FONT>       * be an end tag for the corresponding open tag.<a name="line.148"></a>
152<FONT color="green">149</FONT>       */<a name="line.149"></a>
153<FONT color="green">150</FONT>      public static boolean allowsEscapingTextSpan(String canonTagName) {<a name="line.150"></a>
154<FONT color="green">151</FONT>        // &lt;xmp&gt; and &lt;plaintext&gt; do not admit escaping text spans.<a name="line.151"></a>
155<FONT color="green">152</FONT>        return "style".equals(canonTagName) || "script".equals(canonTagName)<a name="line.152"></a>
156<FONT color="green">153</FONT>            || "noembed".equals(canonTagName) || "noscript".equals(canonTagName)<a name="line.153"></a>
157<FONT color="green">154</FONT>            || "noframes".equals(canonTagName);<a name="line.154"></a>
158<FONT color="green">155</FONT>      }<a name="line.155"></a>
159<FONT color="green">156</FONT>    <a name="line.156"></a>
160<FONT color="green">157</FONT>      /**<a name="line.157"></a>
161<FONT color="green">158</FONT>       * True if content immediately following the start tag must be treated as<a name="line.158"></a>
162<FONT color="green">159</FONT>       * special CDATA so that &amp;lt;'s are not treated as starting tags, comments<a name="line.159"></a>
163<FONT color="green">160</FONT>       * or directives.<a name="line.160"></a>
164<FONT color="green">161</FONT>       */<a name="line.161"></a>
165<FONT color="green">162</FONT>      public static boolean isTagFollowedByLiteralContent(String canonTagName) {<a name="line.162"></a>
166<FONT color="green">163</FONT>        HtmlTextEscapingMode mode = getModeForTag(canonTagName);<a name="line.163"></a>
167<FONT color="green">164</FONT>        return mode != PCDATA &amp;&amp; mode != VOID;<a name="line.164"></a>
168<FONT color="green">165</FONT>      }<a name="line.165"></a>
169<FONT color="green">166</FONT>    <a name="line.166"></a>
170<FONT color="green">167</FONT>      /**<a name="line.167"></a>
171<FONT color="green">168</FONT>       * True iff the tag cannot contain any content -- will an HTML parser consider<a name="line.168"></a>
172<FONT color="green">169</FONT>       * the element to have ended immediately after the start tag.<a name="line.169"></a>
173<FONT color="green">170</FONT>       */<a name="line.170"></a>
174<FONT color="green">171</FONT>      public static boolean isVoidElement(String canonTagName) {<a name="line.171"></a>
175<FONT color="green">172</FONT>        return getModeForTag(canonTagName) == VOID;<a name="line.172"></a>
176<FONT color="green">173</FONT>      }<a name="line.173"></a>
177<FONT color="green">174</FONT>    }<a name="line.174"></a>
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238</PRE>
239</BODY>
240</HTML>
241