1 // This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. 2 // 3 // TagSoup is licensed under the Apache License, 4 // Version 2.0. You may obtain a copy of this license at 5 // http://www.apache.org/licenses/LICENSE-2.0 . You may also have 6 // additional legal rights not granted by this license. 7 // 8 // TagSoup is distributed in the hope that it will be useful, but 9 // unless required by applicable law or agreed to in writing, TagSoup 10 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 11 // OF ANY KIND, either express or implied; not even the implied warranty 12 // of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 13 // 14 // 15 // PYX Scanner 16 17 package org.ccil.cowan.tagsoup; 18 import java.io.*; 19 import org.xml.sax.SAXException; 20 21 /** 22 A Scanner that accepts PYX format instead of HTML. 23 Useful primarily for debugging. 24 **/ 25 public class PYXScanner implements Scanner { 26 resetDocumentLocator(String publicid, String systemid)27 public void resetDocumentLocator(String publicid, String systemid) { 28 // Need this method for interface compatibility, but note 29 // that PyxScanner does not implement Locator. 30 } 31 scan(Reader r, ScanHandler h)32 public void scan(Reader r, ScanHandler h) throws IOException, SAXException { 33 BufferedReader br = new BufferedReader(r); 34 String s; 35 char[] buff = null; 36 boolean instag = false; 37 while ((s = br.readLine()) != null) { 38 int size = s.length(); 39 if (buff == null || buff.length < size) { 40 buff = new char[size]; 41 } 42 s.getChars(0, size, buff, 0); 43 switch (buff[0]) { 44 case '(': 45 if (instag) { 46 h.stagc(buff, 0, 0); 47 instag = false; 48 } 49 h.gi(buff, 1, size - 1); 50 instag = true; 51 break; 52 case ')': 53 if (instag) { 54 h.stagc(buff, 0, 0); 55 instag = false; 56 } 57 h.etag(buff, 1, size - 1); 58 break; 59 case '?': 60 if (instag) { 61 h.stagc(buff, 0, 0); 62 instag = false; 63 } 64 h.pi(buff, 1, size - 1); 65 break; 66 case 'A': 67 int sp = s.indexOf(' '); 68 h.aname(buff, 1, sp - 1); 69 h.aval(buff, sp + 1, size - sp - 1); 70 break; 71 case '-': 72 if (instag) { 73 h.stagc(buff, 0, 0); 74 instag = false; 75 } 76 if (s.equals("-\\n")) { 77 buff[0] = '\n'; 78 h.pcdata(buff, 0, 1); 79 } 80 else { 81 // FIXME: 82 // Does not decode \t and \\ in input 83 h.pcdata(buff, 1, size - 1); 84 } 85 break; 86 case 'E': 87 if (instag) { 88 h.stagc(buff, 0, 0); 89 instag = false; 90 } 91 h.entity(buff, 1, size - 1); 92 break; 93 default: 94 // System.err.print("Gotcha "); 95 // System.err.print(s); 96 // System.err.print('\n'); 97 break; 98 } 99 } 100 h.eof(buff, 0, 0); 101 } 102 startCDATA()103 public void startCDATA() { } 104 main(String[] argv)105 public static void main(String[] argv) throws IOException, SAXException { 106 Scanner s = new PYXScanner(); 107 Reader r = new InputStreamReader(System.in, "UTF-8"); 108 Writer w = new BufferedWriter(new OutputStreamWriter(System.out, "UTF-8")); 109 s.scan(r, new PYXWriter(w)); 110 } 111 } 112