1<HTML> 2<BODY BGCOLOR="white"> 3<PRE> 4<FONT color="green">001</FONT> // Copyright (c) 2011, Mike Samuel<a name="line.1"></a> 5<FONT color="green">002</FONT> // All rights reserved.<a name="line.2"></a> 6<FONT color="green">003</FONT> //<a name="line.3"></a> 7<FONT color="green">004</FONT> // Redistribution and use in source and binary forms, with or without<a name="line.4"></a> 8<FONT color="green">005</FONT> // modification, are permitted provided that the following conditions<a name="line.5"></a> 9<FONT color="green">006</FONT> // are met:<a name="line.6"></a> 10<FONT color="green">007</FONT> //<a name="line.7"></a> 11<FONT color="green">008</FONT> // Redistributions of source code must retain the above copyright<a name="line.8"></a> 12<FONT color="green">009</FONT> // notice, this list of conditions and the following disclaimer.<a name="line.9"></a> 13<FONT color="green">010</FONT> // Redistributions in binary form must reproduce the above copyright<a name="line.10"></a> 14<FONT color="green">011</FONT> // notice, this list of conditions and the following disclaimer in the<a name="line.11"></a> 15<FONT color="green">012</FONT> // documentation and/or other materials provided with the distribution.<a name="line.12"></a> 16<FONT color="green">013</FONT> // Neither the name of the OWASP nor the names of its contributors may<a name="line.13"></a> 17<FONT color="green">014</FONT> // be used to endorse or promote products derived from this software<a name="line.14"></a> 18<FONT color="green">015</FONT> // without specific prior written permission.<a name="line.15"></a> 19<FONT color="green">016</FONT> // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS<a name="line.16"></a> 20<FONT color="green">017</FONT> // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT<a name="line.17"></a> 21<FONT color="green">018</FONT> // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS<a name="line.18"></a> 22<FONT color="green">019</FONT> // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE<a name="line.19"></a> 23<FONT color="green">020</FONT> // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,<a name="line.20"></a> 24<FONT color="green">021</FONT> // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,<a name="line.21"></a> 25<FONT color="green">022</FONT> // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;<a name="line.22"></a> 26<FONT color="green">023</FONT> // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER<a name="line.23"></a> 27<FONT color="green">024</FONT> // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT<a name="line.24"></a> 28<FONT color="green">025</FONT> // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN<a name="line.25"></a> 29<FONT color="green">026</FONT> // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE<a name="line.26"></a> 30<FONT color="green">027</FONT> // POSSIBILITY OF SUCH DAMAGE.<a name="line.27"></a> 31<FONT color="green">028</FONT> <a name="line.28"></a> 32<FONT color="green">029</FONT> package org.owasp.html;<a name="line.29"></a> 33<FONT color="green">030</FONT> <a name="line.30"></a> 34<FONT color="green">031</FONT> import java.util.LinkedHashSet;<a name="line.31"></a> 35<FONT color="green">032</FONT> import java.util.List;<a name="line.32"></a> 36<FONT color="green">033</FONT> import java.util.Set;<a name="line.33"></a> 37<FONT color="green">034</FONT> <a name="line.34"></a> 38<FONT color="green">035</FONT> import javax.annotation.Nullable;<a name="line.35"></a> 39<FONT color="green">036</FONT> <a name="line.36"></a> 40<FONT color="green">037</FONT> /**<a name="line.37"></a> 41<FONT color="green">038</FONT> * Sits between the HTML parser, and then policy, and the renderer so that it<a name="line.38"></a> 42<FONT color="green">039</FONT> * can report dropped elements and attributes to an {@link HtmlChangeListener}.<a name="line.39"></a> 43<FONT color="green">040</FONT> *<a name="line.40"></a> 44<FONT color="green">041</FONT> * <pre><a name="line.41"></a> 45<FONT color="green">042</FONT> * HtmlChangeReporter&lt;T&gt; hcr = new HtmlChangeReporter&lt;T&gt;(<a name="line.42"></a> 46<FONT color="green">043</FONT> * renderer, htmlChangeListener, context);<a name="line.43"></a> 47<FONT color="green">044</FONT> * hcr.setPolicy(policyFactory.apply(hcr.getWrappedRenderer()));<a name="line.44"></a> 48<FONT color="green">045</FONT> * HtmlSanitizer.sanitize(html, hcr.getWrappedPolicy());<a name="line.45"></a> 49<FONT color="green">046</FONT> * </pre><a name="line.46"></a> 50<FONT color="green">047</FONT> *<a name="line.47"></a> 51<FONT color="green">048</FONT> * The renderer receives events from the policy unchanged, but the reporter<a name="line.48"></a> 52<FONT color="green">049</FONT> * notices differences between the events from the lexer and those from the<a name="line.49"></a> 53<FONT color="green">050</FONT> * policy.<a name="line.50"></a> 54<FONT color="green">051</FONT> *<a name="line.51"></a> 55<FONT color="green">052</FONT> * @param <T> The type of context value passed to the<a name="line.52"></a> 56<FONT color="green">053</FONT> */<a name="line.53"></a> 57<FONT color="green">054</FONT> public final class HtmlChangeReporter<T> {<a name="line.54"></a> 58<FONT color="green">055</FONT> private final OutputChannel output;<a name="line.55"></a> 59<FONT color="green">056</FONT> private final InputChannel<T> input;<a name="line.56"></a> 60<FONT color="green">057</FONT> <a name="line.57"></a> 61<FONT color="green">058</FONT> public HtmlChangeReporter(<a name="line.58"></a> 62<FONT color="green">059</FONT> HtmlStreamEventReceiver renderer,<a name="line.59"></a> 63<FONT color="green">060</FONT> HtmlChangeListener<? super T> listener, @Nullable T context) {<a name="line.60"></a> 64<FONT color="green">061</FONT> this.output = new OutputChannel(renderer);<a name="line.61"></a> 65<FONT color="green">062</FONT> this.input = new InputChannel<T>(output, listener, context);<a name="line.62"></a> 66<FONT color="green">063</FONT> }<a name="line.63"></a> 67<FONT color="green">064</FONT> <a name="line.64"></a> 68<FONT color="green">065</FONT> /**<a name="line.65"></a> 69<FONT color="green">066</FONT> * Associates an input channel. {@code this} receives events and forwards<a name="line.66"></a> 70<FONT color="green">067</FONT> * them to input.<a name="line.67"></a> 71<FONT color="green">068</FONT> */<a name="line.68"></a> 72<FONT color="green">069</FONT> public void setPolicy(HtmlSanitizer.Policy policy) {<a name="line.69"></a> 73<FONT color="green">070</FONT> this.input.policy = policy;<a name="line.70"></a> 74<FONT color="green">071</FONT> }<a name="line.71"></a> 75<FONT color="green">072</FONT> <a name="line.72"></a> 76<FONT color="green">073</FONT> public HtmlStreamEventReceiver getWrappedRenderer() { return output; }<a name="line.73"></a> 77<FONT color="green">074</FONT> <a name="line.74"></a> 78<FONT color="green">075</FONT> public HtmlSanitizer.Policy getWrappedPolicy() { return input; }<a name="line.75"></a> 79<FONT color="green">076</FONT> <a name="line.76"></a> 80<FONT color="green">077</FONT> private static final class InputChannel<T> implements HtmlSanitizer.Policy {<a name="line.77"></a> 81<FONT color="green">078</FONT> HtmlStreamEventReceiver policy;<a name="line.78"></a> 82<FONT color="green">079</FONT> final OutputChannel output;<a name="line.79"></a> 83<FONT color="green">080</FONT> final T context;<a name="line.80"></a> 84<FONT color="green">081</FONT> final HtmlChangeListener<? super T> listener;<a name="line.81"></a> 85<FONT color="green">082</FONT> <a name="line.82"></a> 86<FONT color="green">083</FONT> InputChannel(<a name="line.83"></a> 87<FONT color="green">084</FONT> OutputChannel output, HtmlChangeListener<? super T> listener,<a name="line.84"></a> 88<FONT color="green">085</FONT> @Nullable T context) {<a name="line.85"></a> 89<FONT color="green">086</FONT> this.output = output;<a name="line.86"></a> 90<FONT color="green">087</FONT> this.context = context;<a name="line.87"></a> 91<FONT color="green">088</FONT> this.listener = listener;<a name="line.88"></a> 92<FONT color="green">089</FONT> }<a name="line.89"></a> 93<FONT color="green">090</FONT> <a name="line.90"></a> 94<FONT color="green">091</FONT> public void openDocument() {<a name="line.91"></a> 95<FONT color="green">092</FONT> policy.openDocument();<a name="line.92"></a> 96<FONT color="green">093</FONT> }<a name="line.93"></a> 97<FONT color="green">094</FONT> <a name="line.94"></a> 98<FONT color="green">095</FONT> public void closeDocument() {<a name="line.95"></a> 99<FONT color="green">096</FONT> policy.closeDocument();<a name="line.96"></a> 100<FONT color="green">097</FONT> }<a name="line.97"></a> 101<FONT color="green">098</FONT> <a name="line.98"></a> 102<FONT color="green">099</FONT> public void openTag(String elementName, List<String> attrs) {<a name="line.99"></a> 103<FONT color="green">100</FONT> output.expectedElementName = elementName;<a name="line.100"></a> 104<FONT color="green">101</FONT> output.expectedAttrNames.clear();<a name="line.101"></a> 105<FONT color="green">102</FONT> for (int i = 0, n = attrs.size(); i < n; i += 2) {<a name="line.102"></a> 106<FONT color="green">103</FONT> output.expectedAttrNames.add(attrs.get(i));<a name="line.103"></a> 107<FONT color="green">104</FONT> }<a name="line.104"></a> 108<FONT color="green">105</FONT> policy.openTag(elementName, attrs);<a name="line.105"></a> 109<FONT color="green">106</FONT> {<a name="line.106"></a> 110<FONT color="green">107</FONT> // Gather the notification details to avoid any problems with the<a name="line.107"></a> 111<FONT color="green">108</FONT> // listener re-entering the stream event receiver. This shouldn't<a name="line.108"></a> 112<FONT color="green">109</FONT> // occur, but if it does it will be a source of subtle confusing bugs.<a name="line.109"></a> 113<FONT color="green">110</FONT> String discardedElementName = output.expectedElementName;<a name="line.110"></a> 114<FONT color="green">111</FONT> output.expectedElementName = null;<a name="line.111"></a> 115<FONT color="green">112</FONT> int nExpected = output.expectedAttrNames.size();<a name="line.112"></a> 116<FONT color="green">113</FONT> String[] discardedAttrNames =<a name="line.113"></a> 117<FONT color="green">114</FONT> nExpected != 0 && discardedElementName == null<a name="line.114"></a> 118<FONT color="green">115</FONT> ? output.expectedAttrNames.toArray(new String[nExpected])<a name="line.115"></a> 119<FONT color="green">116</FONT> : ZERO_STRINGS;<a name="line.116"></a> 120<FONT color="green">117</FONT> output.expectedAttrNames.clear();<a name="line.117"></a> 121<FONT color="green">118</FONT> // Dispatch notifications to the listener.<a name="line.118"></a> 122<FONT color="green">119</FONT> if (discardedElementName != null) {<a name="line.119"></a> 123<FONT color="green">120</FONT> listener.discardedTag(context, discardedElementName);<a name="line.120"></a> 124<FONT color="green">121</FONT> }<a name="line.121"></a> 125<FONT color="green">122</FONT> if (discardedAttrNames.length != 0) {<a name="line.122"></a> 126<FONT color="green">123</FONT> listener.discardedAttributes(<a name="line.123"></a> 127<FONT color="green">124</FONT> context, elementName, discardedAttrNames);<a name="line.124"></a> 128<FONT color="green">125</FONT> }<a name="line.125"></a> 129<FONT color="green">126</FONT> }<a name="line.126"></a> 130<FONT color="green">127</FONT> }<a name="line.127"></a> 131<FONT color="green">128</FONT> <a name="line.128"></a> 132<FONT color="green">129</FONT> public void closeTag(String elementName) {<a name="line.129"></a> 133<FONT color="green">130</FONT> policy.closeTag(elementName);<a name="line.130"></a> 134<FONT color="green">131</FONT> }<a name="line.131"></a> 135<FONT color="green">132</FONT> <a name="line.132"></a> 136<FONT color="green">133</FONT> public void text(String textChunk) {<a name="line.133"></a> 137<FONT color="green">134</FONT> policy.text(textChunk);<a name="line.134"></a> 138<FONT color="green">135</FONT> }<a name="line.135"></a> 139<FONT color="green">136</FONT> }<a name="line.136"></a> 140<FONT color="green">137</FONT> <a name="line.137"></a> 141<FONT color="green">138</FONT> private static final class OutputChannel implements HtmlStreamEventReceiver {<a name="line.138"></a> 142<FONT color="green">139</FONT> private final HtmlStreamEventReceiver renderer;<a name="line.139"></a> 143<FONT color="green">140</FONT> String expectedElementName;<a name="line.140"></a> 144<FONT color="green">141</FONT> Set<String> expectedAttrNames = new LinkedHashSet<String>();<a name="line.141"></a> 145<FONT color="green">142</FONT> <a name="line.142"></a> 146<FONT color="green">143</FONT> OutputChannel(HtmlStreamEventReceiver renderer) {<a name="line.143"></a> 147<FONT color="green">144</FONT> this.renderer = renderer;<a name="line.144"></a> 148<FONT color="green">145</FONT> }<a name="line.145"></a> 149<FONT color="green">146</FONT> <a name="line.146"></a> 150<FONT color="green">147</FONT> public void openDocument() {<a name="line.147"></a> 151<FONT color="green">148</FONT> renderer.openDocument();<a name="line.148"></a> 152<FONT color="green">149</FONT> }<a name="line.149"></a> 153<FONT color="green">150</FONT> <a name="line.150"></a> 154<FONT color="green">151</FONT> public void closeDocument() {<a name="line.151"></a> 155<FONT color="green">152</FONT> renderer.closeDocument();<a name="line.152"></a> 156<FONT color="green">153</FONT> }<a name="line.153"></a> 157<FONT color="green">154</FONT> <a name="line.154"></a> 158<FONT color="green">155</FONT> public void openTag(String elementName, List<String> attrs) {<a name="line.155"></a> 159<FONT color="green">156</FONT> if (elementName.equals(expectedElementName)) {<a name="line.156"></a> 160<FONT color="green">157</FONT> expectedElementName = null;<a name="line.157"></a> 161<FONT color="green">158</FONT> }<a name="line.158"></a> 162<FONT color="green">159</FONT> for (int i = 0, n = attrs.size(); i < n; i += 2) {<a name="line.159"></a> 163<FONT color="green">160</FONT> expectedAttrNames.remove(attrs.get(i));<a name="line.160"></a> 164<FONT color="green">161</FONT> }<a name="line.161"></a> 165<FONT color="green">162</FONT> renderer.openTag(elementName, attrs);<a name="line.162"></a> 166<FONT color="green">163</FONT> }<a name="line.163"></a> 167<FONT color="green">164</FONT> <a name="line.164"></a> 168<FONT color="green">165</FONT> public void closeTag(String elementName) {<a name="line.165"></a> 169<FONT color="green">166</FONT> renderer.closeTag(elementName);<a name="line.166"></a> 170<FONT color="green">167</FONT> }<a name="line.167"></a> 171<FONT color="green">168</FONT> <a name="line.168"></a> 172<FONT color="green">169</FONT> public void text(String text) {<a name="line.169"></a> 173<FONT color="green">170</FONT> renderer.text(text);<a name="line.170"></a> 174<FONT color="green">171</FONT> }<a name="line.171"></a> 175<FONT color="green">172</FONT> }<a name="line.172"></a> 176<FONT color="green">173</FONT> <a name="line.173"></a> 177<FONT color="green">174</FONT> private static final String[] ZERO_STRINGS = new String[0];<a name="line.174"></a> 178<FONT color="green">175</FONT> }<a name="line.175"></a> 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239</PRE> 240</BODY> 241</HTML> 242