/* * Copyright (C) 2010 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.streamhtmlparser.impl; import com.google.common.base.Preconditions; import com.google.streamhtmlparser.ExternalState; import com.google.streamhtmlparser.Parser; import com.google.streamhtmlparser.ParseException; import com.google.streamhtmlparser.util.HtmlUtils; import java.util.Map; /** * An implementation of the {@code Parser} interface that is common to both * {@code HtmlParser} and {@code JavascriptParser}. * *
Provides methods for parsing input and ensuring that all in-state, * entering-a-state and exiting-a-state callbacks are invoked as appropriate. * *
This class started as abstract but it was found better for testing to
* make it instantiatable so that the parsing logic can be tested with dummy
* state transitions.
*/
public class GenericParser implements Parser {
protected final ParserStateTable parserStateTable;
protected final Map Absent any callbacks defined, this function simply determines the
* next state to switch to based on the However some states have specific callbacks defined which when
* receiving specific characters may decide to overwrite the next state to
* go to. Hence the next state is a function both of the main state table
* in {@code ParserStateTable} as well as specific run-time information
* from the callback functions.
*
* Also note that the callbacks are called in a proper sequence,
* first the exit-state one then the enter-state one and finally the
* in-state one. Changing the order may result in a functional change.
*
* @param input the input character to parse (process)
* @throws ParseException if an unrecoverable error occurred during parsing
*/
@Override
public void parse(char input) throws ParseException {
InternalState nextState =
parserStateTable.getNextState(currentState, input);
if (nextState == InternalState.INTERNAL_ERROR_STATE) {
String errorMsg =
String.format("Unexpected character '%s' in int_state '%s' " +
"(ext_state '%s')",
HtmlUtils.encodeCharForAscii(input),
currentState.getName(), getState().getName());
currentState = InternalState.INTERNAL_ERROR_STATE;
throw new ParseException(this, errorMsg);
}
if (currentState != nextState) {
nextState = handleExitState(currentState, nextState, input);
}
if (currentState != nextState) {
nextState = handleEnterState(nextState, nextState, input);
}
nextState = handleInState(nextState, input);
currentState = nextState;
record(input);
columnNumber++;
if (input == '\n') {
lineNumber++;
columnNumber = 1;
}
}
/**
* Return the current state of the parser.
*/
@Override
public ExternalState getState() {
if (!intToExtStateTable.containsKey(currentState)) {
throw new NullPointerException("Did not find external state mapping " +
"For internal state: " + currentState);
}
return intToExtStateTable.get(currentState);
}
/**
* Reset the parser back to its initial default state.
*/
@Override
public void reset() {
currentState = initialState;
lineNumber = 1;
columnNumber = 1;
}
/**
* Sets the current line number which is returned during error messages.
*/
@Override
public void setLineNumber(int lineNumber) {
this.lineNumber = lineNumber;
}
/**
* Returns the current line number.
*/
@Override
public int getLineNumber() {
return lineNumber;
}
/**
* Sets the current column number which is returned during error messages.
*/
@Override
public void setColumnNumber(int columnNumber) {
this.columnNumber = columnNumber;
}
/**
* Returns the current column number.
*/
@Override
public int getColumnNumber() {
return columnNumber;
}
InternalState getCurrentInternalState() {
return currentState;
}
protected void setNextState(InternalState nextState) throws ParseException {
Preconditions.checkNotNull(nextState); // Developer error if it triggers.
/* We are not actually parsing hence providing
* a null char to the event handlers.
*/
// TODO: Complicated logic to follow in C++ but clean it up.
final char nullChar = '\0';
if (currentState != nextState) {
nextState = handleExitState(currentState, nextState, nullChar);
}
if (currentState != nextState) {
handleEnterState(nextState, nextState, nullChar);
}
currentState = nextState;
}
/**
* Invoked when the parser enters a new state.
*
* @param currentState the current state of the parser
* @param expectedNextState the next state according to the
* state table definition
* @param input the last character parsed
* @return the state to change to, could be the same as the
* {@code expectedNextState} provided
* @throws ParseException if an unrecoverable error occurred during parsing
*/
protected InternalState handleEnterState(InternalState currentState,
InternalState expectedNextState,
char input) throws ParseException {
return expectedNextState;
}
/**
* Invoked when the parser exits a state.
*
* @param currentState the current state of the parser
* @param expectedNextState the next state according to the
* state table definition
* @param input the last character parsed
* @return the state to change to, could be the same as the
* {@code expectedNextState} provided
* @throws ParseException if an unrecoverable error occurred during parsing
*/
protected InternalState handleExitState(InternalState currentState,
InternalState expectedNextState,
char input) throws ParseException {
return expectedNextState;
}
/**
* Invoked for each character read when no state change occured.
*
* @param currentState the current state of the parser
* @param input the last character parsed
* @return the state to change to, could be the same as the
* {@code expectedNextState} provided
* @throws ParseException if an unrecoverable error occurred during parsing
*/
protected InternalState handleInState(InternalState currentState,
char input) throws ParseException {
return currentState;
}
/**
* Perform some processing on the given character. Derived classes
* may override this method in order to perform additional logic
* on every processed character beyond the logic defined in
* state transitions.
*
* @param input the input character to operate on
*/
protected void record(char input) { }
}
ParserStateTable
which is
* derived from a state-machine configuration file in the original C++ parser.
*
*