1 // This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. 2 // 3 // TagSoup is licensed under the Apache License, 4 // Version 2.0. You may obtain a copy of this license at 5 // http://www.apache.org/licenses/LICENSE-2.0 . You may also have 6 // additional legal rights not granted by this license. 7 // 8 // TagSoup is distributed in the hope that it will be useful, but 9 // unless required by applicable law or agreed to in writing, TagSoup 10 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 11 // OF ANY KIND, either express or implied; not even the implied warranty 12 // of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 13 // 14 // 15 // This file is part of TagSoup. 16 // 17 // This program is free software; you can redistribute it and/or modify 18 // it under the terms of the GNU General Public License as published by 19 // the Free Software Foundation; either version 2 of the License, or 20 // (at your option) any later version. You may also distribute 21 // and/or modify it under version 2.1 of the Academic Free License. 22 // 23 // This program is distributed in the hope that it will be useful, 24 // but WITHOUT ANY WARRANTY; without even the implied warranty of 25 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 26 // 27 // 28 // PYX Scanner 29 30 package org.ccil.cowan.tagsoup; 31 import java.io.*; 32 import org.xml.sax.SAXException; 33 34 /** 35 A Scanner that accepts PYX format instead of HTML. 36 Useful primarily for debugging. 37 **/ 38 public class PYXScanner implements Scanner { 39 resetDocumentLocator(String publicid, String systemid)40 public void resetDocumentLocator(String publicid, String systemid) { 41 // Need this method for interface compatibility, but note 42 // that PyxScanner does not implement Locator. 43 } 44 scan(Reader r, ScanHandler h)45 public void scan(Reader r, ScanHandler h) throws IOException, SAXException { 46 BufferedReader br = new BufferedReader(r); 47 String s; 48 char[] buff = null; 49 boolean instag = false; 50 while ((s = br.readLine()) != null) { 51 int size = s.length(); 52 if (buff == null || buff.length < size) { 53 buff = new char[size]; 54 } 55 s.getChars(0, size, buff, 0); 56 switch (buff[0]) { 57 case '(': 58 if (instag) { 59 h.stagc(buff, 0, 0); 60 instag = false; 61 } 62 h.gi(buff, 1, size - 1); 63 instag = true; 64 break; 65 case ')': 66 if (instag) { 67 h.stagc(buff, 0, 0); 68 instag = false; 69 } 70 h.etag(buff, 1, size - 1); 71 break; 72 case '?': 73 if (instag) { 74 h.stagc(buff, 0, 0); 75 instag = false; 76 } 77 h.pi(buff, 1, size - 1); 78 break; 79 case 'A': 80 int sp = s.indexOf(' '); 81 h.aname(buff, 1, sp - 1); 82 h.aval(buff, sp + 1, size - sp - 1); 83 break; 84 case '-': 85 if (instag) { 86 h.stagc(buff, 0, 0); 87 instag = false; 88 } 89 if (s.equals("-\\n")) { 90 buff[0] = '\n'; 91 h.pcdata(buff, 0, 1); 92 } 93 else { 94 // FIXME: 95 // Does not decode \t and \\ in input 96 h.pcdata(buff, 1, size - 1); 97 } 98 break; 99 case 'E': 100 if (instag) { 101 h.stagc(buff, 0, 0); 102 instag = false; 103 } 104 h.entity(buff, 1, size - 1); 105 break; 106 default: 107 // System.err.print("Gotcha "); 108 // System.err.print(s); 109 // System.err.print('\n'); 110 break; 111 } 112 } 113 h.eof(buff, 0, 0); 114 } 115 startCDATA()116 public void startCDATA() { } 117 main(String[] argv)118 public static void main(String[] argv) throws IOException, SAXException { 119 Scanner s = new PYXScanner(); 120 Reader r = new InputStreamReader(System.in, "UTF-8"); 121 Writer w = new BufferedWriter(new OutputStreamWriter(System.out, "UTF-8")); 122 s.scan(r, new PYXWriter(w)); 123 } 124 } 125