• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011, Mike Samuel
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions
6 // are met:
7 //
8 // Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // Neither the name of the OWASP nor the names of its contributors may
14 // be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
28 
29 package org.owasp.html;
30 
31 import java.io.File;
32 import java.io.StringReader;
33 import java.util.List;
34 import java.util.ListIterator;
35 
36 import org.w3c.dom.Node;
37 import org.xml.sax.InputSource;
38 
39 import com.google.common.base.Charsets;
40 import com.google.common.io.Files;
41 
42 import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
43 
44 public class Benchmark {
45 
main(String[] args)46   public static void main(String[] args) throws Exception {
47     String html = Files.toString(new File(args[0]), Charsets.UTF_8);
48 
49     boolean timeLibhtmlparser = true;
50     boolean timeSanitize = true;
51     boolean timePolicyBuilder = true;
52 
53     if (args.length > 1) {
54       String s = args[1];
55       timeLibhtmlparser = s.contains("h");
56       timeSanitize = s.contains("s");
57       timePolicyBuilder = s.contains("p");
58     }
59 
60     int n = 0;  // Defeat optimizations.
61 
62     if (timeLibhtmlparser) {
63       for (int i = 100; --i >= 0;) {
64         n += parseUsingLibhtmlparser(html);
65       }
66     }
67 
68     if (timeSanitize) {
69       for (int i = 100; --i >= 0;) {
70         n += sanitize(html).length();
71       }
72     }
73 
74     if (timePolicyBuilder) {
75       for (int i = 100; --i >= 0;) {
76         n += sanitizeUsingPolicyBuilder(html).length();
77       }
78     }
79 
80     long t0 = 0, t1 = -1;
81     if (timeLibhtmlparser) {
82       t0 = System.nanoTime();
83       for (int i = 100; --i >= 0;) {
84         n += parseUsingLibhtmlparser(html);
85       }
86       t1 = System.nanoTime();
87     }
88 
89     long t2 = 0, t3 = -1;
90     if (timeSanitize) {
91       t2 = System.nanoTime();
92       for (int i = 100; --i >= 0;) {
93         n += sanitize(html).length();
94       }
95       t3 = System.nanoTime();
96     }
97 
98     long t4 = 0, t5 = -1;
99     if (timePolicyBuilder) {
100       t4 = System.nanoTime();
101       for (int i = 100; --i >= 0;) {
102         n += sanitizeUsingPolicyBuilder(html).length();
103       }
104       t5 = System.nanoTime();
105     }
106 
107     // Defeat optimization by using n.
108     if (n < 0) {
109       throw new AssertionError("Oh noes underflow");
110     }
111 
112     if (timeLibhtmlparser) {
113       System.err.println(String.format(
114           "Tree parse           : %12d", (t1 - t0)));
115     }
116     if (timeSanitize) {
117       System.err.println(String.format(
118           "Full sanitize custom : %12d", (t3 - t2)));
119     }
120     if (timePolicyBuilder) {
121       System.err.println(String.format(
122           "Full sanitize w/ PB  : %12d", (t5 - t4)));
123     }
124   }
125 
parseUsingLibhtmlparser(String html)126   private static int parseUsingLibhtmlparser(String html) throws Exception {
127     HtmlDocumentBuilder parser = new HtmlDocumentBuilder();
128     Node node = parser.parse(new InputSource(new StringReader(html)));
129     return System.identityHashCode(node) >> 24;
130   }
131 
sanitize(String html)132   private static String sanitize(String html) throws Exception {
133     StringBuilder sb = new StringBuilder(html.length());
134 
135     final HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
136         sb, new Handler<String>() {
137 
138           public void handle(String x) {
139             throw new AssertionError(x);
140           }
141         });
142 
143     HtmlSanitizer.sanitize(html, new HtmlSanitizer.Policy() {
144 
145       public void openDocument() {
146         renderer.openDocument();
147       }
148 
149       public void closeDocument() {
150         renderer.closeDocument();
151       }
152 
153       public void text(String textChunk) {
154         renderer.text(textChunk);
155       }
156 
157       public void openTag(String elementName, List<String> attrs) {
158         if ("a".equals(elementName)) {
159           for (ListIterator<String> it = attrs.listIterator(); it.hasNext();) {
160             String name = it.next();
161             if ("href".equals(name)) {
162               it.next();
163             } else {
164               it.remove();
165               it.next();
166               it.remove();
167             }
168           }
169           renderer.openTag(elementName, attrs);
170         }
171       }
172 
173       public void closeTag(String elementName) {
174         if ("a".equals(elementName)) {
175           renderer.closeTag(elementName);
176         }
177       }
178     });
179     return sb.toString();
180   }
181 
182   private static HtmlPolicyBuilder policyBuilder;
183 
sanitizeUsingPolicyBuilder(String html)184   private static String sanitizeUsingPolicyBuilder(String html)
185       throws Exception {
186     if (policyBuilder == null) {
187       policyBuilder = new HtmlPolicyBuilder()
188           .allowStandardUrlProtocols()
189           .allowElements("a")
190           .allowAttributes("href").onElements("a");
191     }
192 
193     StringBuilder sb = new StringBuilder(html.length());
194 
195     HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
196         sb, new Handler<String>() {
197           public void handle(String x) {
198             throw new AssertionError(x);
199           }
200         });
201 
202     HtmlSanitizer.sanitize(html, policyBuilder.build(renderer));
203     return sb.toString();
204   }
205 
206 }
207