/* * Copyright (C) 2010 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.streamhtmlparser; import com.google.streamhtmlparser.impl.HtmlParserImpl; import java.util.Set; import java.util.logging.Logger; /** * A factory class to obtain instances of an {@link HtmlParser}. * Currently each instance is a new object given these are fairly * light-weight. * *
In the unlikely case that this class fails to initialize properly
* (a developer error), an error is emitted to the error console and the logs
* and the specialized parser creation methods will throw
* an {@link AssertionError} on all invokations.
*/
public class HtmlParserFactory {
private static final Logger logger =
Logger.getLogger(HtmlParserFactory.class.getName());
/**
* To provide additional options when creating an {@code HtmlParser} using
* {@link HtmlParserFactory#createParserInAttribute(HtmlParser.ATTR_TYPE,
* boolean, Set)}
*/
public enum AttributeOptions {
/**
* Indicates that the attribute value is Javascript-quoted. Only takes
* effect for Javascript-accepting attributes - as identified by
* {@link HtmlParser.ATTR_TYPE#JS} - and only when the attribute is also
* HTML quoted.
*/
JS_QUOTED,
/**
* Indicates the attribute value is only a part of a URL as opposed to a
* full URL. In particular, the value is not at the start of a URL and
* hence does not necessitate validation of the URL scheme.
* Only valid for URI-accepting attributes - as identified by
* {@link HtmlParser.ATTR_TYPE#URI}.
*/
URL_PARTIAL,
}
/**
* To provide additional options when creating an {@code HtmlParser} using
* {@link HtmlParserFactory#createParserInMode(HtmlParser.Mode, Set)}
*/
public enum ModeOptions {
/**
* Indicates that the parser is inside a quoted {@code String}. Only
* valid in the {@link HtmlParser.Mode#JS} mode.
*/
JS_QUOTED
}
private static final HtmlParser parserInDefaultAttr = createParser();
private static final HtmlParser parserInDefaultAttrQ = createParser();
private static final HtmlParser parserInUriAttrComplete = createParser();
private static final HtmlParser parserInUriAttrQComplete = createParser();
private static final HtmlParser parserInUriAttrPartial = createParser();
private static final HtmlParser parserInUriAttrQPartial = createParser();
private static final HtmlParser parserInJsAttr = createParser();
private static final HtmlParser parserInJsAttrQ = createParser();
private static final HtmlParser parserInQJsAttr = createParser();
private static final HtmlParser parserInStyleAttr = createParser();
private static final HtmlParser parserInStyleAttrQ = createParser();
private static final HtmlParser parserInJsQ = createParser();
/**
* Protects all the createParserXXX methods by throwing a run-time exception
* if this class failed to initialize properly.
*/
private static boolean initSuccess = false;
static {
try {
initializeParsers();
initSuccess = true;
} catch (ParseException e) {
// Log a severe error and print it to stderr along with a stack trace.
String error = HtmlParserFactory.class.getName() +
" Failed initialization: " + e.getMessage();
logger.severe(error);
System.err.println(error);
e.printStackTrace();
}
}
// Static class.
private HtmlParserFactory() {
} // COV_NF_LINE
/**
* Returns an {@code HtmlParser} object ready to parse HTML input.
*
* @return an {@code HtmlParser} in the provided mode
*/
public static HtmlParser createParser() {
return new HtmlParserImpl();
}
/**
* Returns an {@code HtmlParser} object initialized with the
* requested Mode. Provide non {@code null} options to provide
* a more precise initialization with the desired Mode.
*
* @param mode the mode to reset the parser with
* @param options additional options or {@code null} for none
* @return an {@code HtmlParser} in the provided mode
* @throws AssertionError when this class failed to initialize
*/
public static HtmlParser createParserInMode(HtmlParser.Mode mode,
Set For example, to create a parser in a state akin to that
* after the parser has parsed "<a href=\"", invoke:
* You must provide the proper value of quoting or the parser
* will go into an unexpected state.
* As a special-case, when called with the {@code HtmlParser.ATTR_TYPE}
* of {@code HtmlParser.ATTR_TYPE.NONE}, the parser is created in a state
* inside an HTML tag where it expects an attribute name not an attribute
* value. It becomes equivalent to a parser initialized in the
* {@code HTML_IN_TAG} mode.
*
* @param attrtype the attribute type which the parser should be in
* @param quoted whether the attribute value is enclosed in double quotes
* @param options additional options or {@code null} for none
* @return an {@code HtmlParser} initialized in the given attribute type
* and quoting
* @throws AssertionError when this class failed to initialize
*/
public static HtmlParser createParserInAttribute(
HtmlParser.ATTR_TYPE attrtype,
boolean quoted, Set In the very unexpected case of the parsing failing (developer error),
* this class will fail to initialize properly.
*
* In addition:
*
* createParserInAttribute(HtmlParser.ATTR_TYPE.URI, true)}
*
*
*
*
*
* @throws ParseException if parsing failed.
*/
private static void initializeParsers() throws ParseException {
parserInDefaultAttr.parse("