// Copyright (c) 2011, Mike Samuel
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// Neither the name of the OWASP nor the names of its contributors may
// be used to endorse or promote products derived from this software
// without specific prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

package org.owasp.html;

import java.io.File;
import java.io.StringReader;
import java.util.List;
import java.util.ListIterator;

import org.w3c.dom.Node;
import org.xml.sax.InputSource;

import com.google.common.base.Charsets;
import com.google.common.io.Files;

import nu.validator.htmlparser.dom.HtmlDocumentBuilder;

public class Benchmark {

  public static void main(String[] args) throws Exception {
    String html = Files.toString(new File(args[0]), Charsets.UTF_8);

    boolean timeLibhtmlparser = true;
    boolean timeSanitize = true;
    boolean timePolicyBuilder = true;

    if (args.length > 1) {
      String s = args[1];
      timeLibhtmlparser = s.contains("h");
      timeSanitize = s.contains("s");
      timePolicyBuilder = s.contains("p");
    }

    int n = 0;  // Defeat optimizations.

    if (timeLibhtmlparser) {
      for (int i = 100; --i >= 0;) {
        n += parseUsingLibhtmlparser(html);
      }
    }

    if (timeSanitize) {
      for (int i = 100; --i >= 0;) {
        n += sanitize(html).length();
      }
    }

    if (timePolicyBuilder) {
      for (int i = 100; --i >= 0;) {
        n += sanitizeUsingPolicyBuilder(html).length();
      }
    }

    long t0 = 0, t1 = -1;
    if (timeLibhtmlparser) {
      t0 = System.nanoTime();
      for (int i = 100; --i >= 0;) {
        n += parseUsingLibhtmlparser(html);
      }
      t1 = System.nanoTime();
    }

    long t2 = 0, t3 = -1;
    if (timeSanitize) {
      t2 = System.nanoTime();
      for (int i = 100; --i >= 0;) {
        n += sanitize(html).length();
      }
      t3 = System.nanoTime();
    }

    long t4 = 0, t5 = -1;
    if (timePolicyBuilder) {
      t4 = System.nanoTime();
      for (int i = 100; --i >= 0;) {
        n += sanitizeUsingPolicyBuilder(html).length();
      }
      t5 = System.nanoTime();
    }

    // Defeat optimization by using n.
    if (n < 0) {
      throw new AssertionError("Oh noes underflow");
    }

    if (timeLibhtmlparser) {
      System.err.println(String.format(
          "Tree parse           : %12d", (t1 - t0)));
    }
    if (timeSanitize) {
      System.err.println(String.format(
          "Full sanitize custom : %12d", (t3 - t2)));
    }
    if (timePolicyBuilder) {
      System.err.println(String.format(
          "Full sanitize w/ PB  : %12d", (t5 - t4)));
    }
  }

  private static int parseUsingLibhtmlparser(String html) throws Exception {
    HtmlDocumentBuilder parser = new HtmlDocumentBuilder();
    Node node = parser.parse(new InputSource(new StringReader(html)));
    return System.identityHashCode(node) >> 24;
  }

  private static String sanitize(String html) throws Exception {
    StringBuilder sb = new StringBuilder(html.length());

    final HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
        sb, new Handler<String>() {

          public void handle(String x) {
            throw new AssertionError(x);
          }
        });

    HtmlSanitizer.sanitize(html, new HtmlSanitizer.Policy() {

      public void openDocument() {
        renderer.openDocument();
      }

      public void closeDocument() {
        renderer.closeDocument();
      }

      public void text(String textChunk) {
        renderer.text(textChunk);
      }

      public void openTag(String elementName, List<String> attrs) {
        if ("a".equals(elementName)) {
          for (ListIterator<String> it = attrs.listIterator(); it.hasNext();) {
            String name = it.next();
            if ("href".equals(name)) {
              it.next();
            } else {
              it.remove();
              it.next();
              it.remove();
            }
          }
          renderer.openTag(elementName, attrs);
        }
      }

      public void closeTag(String elementName) {
        if ("a".equals(elementName)) {
          renderer.closeTag(elementName);
        }
      }
    });
    return sb.toString();
  }

  private static HtmlPolicyBuilder policyBuilder;

  private static String sanitizeUsingPolicyBuilder(String html)
      throws Exception {
    if (policyBuilder == null) {
      policyBuilder = new HtmlPolicyBuilder()
          .allowStandardUrlProtocols()
          .allowElements("a")
          .allowAttributes("href").onElements("a");
    }

    StringBuilder sb = new StringBuilder(html.length());

    HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
        sb, new Handler<String>() {
          public void handle(String x) {
            throw new AssertionError(x);
          }
        });

    HtmlSanitizer.sanitize(html, policyBuilder.build(renderer));
    return sb.toString();
  }

}
