/** XML parser by Oliver Zeigermann October 10, 2005 */ lexer grammar t012lexerXML; options { language = JavaScript; } @lexer::members { this.lout = []; this.output = function(line) { this.lout.push(line); }; } DOCUMENT : XMLDECL? WS? DOCTYPE? WS? ELEMENT WS? ; fragment DOCTYPE : '' ; fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ; fragment PI : '' ; fragment XMLDECL : '' ; fragment ELEMENT : ( START_TAG (ELEMENT | t=PCDATA {this.output("PCDATA: \""+$t.text+"\"")} | t=CDATA {this.output("CDATA: \""+$t.text+"\"")} | t=COMMENT {this.output("Comment: \""+$t.text+"\"")} | pi=PI )* END_TAG | EMPTY_ELEMENT ) ; fragment START_TAG : '<' WS? name=GENERIC_ID WS? {this.output("Start Tag: "+$name.text)} ( ATTRIBUTE WS? )* '>' ; fragment EMPTY_ELEMENT : '<' WS? name=GENERIC_ID WS? {this.output("Empty Element: "+$name.text)} ( ATTRIBUTE WS? )* '/>' ; fragment ATTRIBUTE : name=GENERIC_ID WS? '=' WS? value=VALUE {this.output("Attr: "+$name.text+"="+$value.text)} ; fragment END_TAG : '' {this.output("End Tag: "+$name.text)} ; fragment COMMENT : '' ; fragment CDATA : '' ; fragment PCDATA : (~'<')+ ; fragment VALUE : ( '\"' (~'\"')* '\"' | '\'' (~'\'')* '\'' ) ; fragment GENERIC_ID : ( LETTER | '_' | ':') ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )* ; fragment LETTER : 'a'..'z' | 'A'..'Z' ; fragment WS : ( ' ' | '\t' | ( '\n' | '\r\n' | '\r' ) )+ ;