• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
2 //
3 // TagSoup is licensed under the Apache License,
4 // Version 2.0.  You may obtain a copy of this license at
5 // http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
6 // additional legal rights not granted by this license.
7 //
8 // TagSoup is distributed in the hope that it will be useful, but
9 // unless required by applicable law or agreed to in writing, TagSoup
10 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
11 // OF ANY KIND, either express or implied; not even the implied warranty
12 // of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 //
14 //
15 // PYX Scanner
16 
17 package org.ccil.cowan.tagsoup;
18 import java.io.*;
19 import org.xml.sax.SAXException;
20 
21 /**
22 A Scanner that accepts PYX format instead of HTML.
23 Useful primarily for debugging.
24 **/
25 public class PYXScanner implements Scanner {
26 
resetDocumentLocator(String publicid, String systemid)27         public void resetDocumentLocator(String publicid, String systemid) {
28 	// Need this method for interface compatibility, but note
29 	// that PyxScanner does not implement Locator.
30         }
31 
scan(Reader r, ScanHandler h)32 	public void scan(Reader r, ScanHandler h) throws IOException, SAXException {
33 		BufferedReader br = new BufferedReader(r);
34 		String s;
35 		char[] buff = null;
36 		boolean instag = false;
37 		while ((s = br.readLine()) != null) {
38 			int size = s.length();
39 			if (buff == null || buff.length < size) {
40 				buff = new char[size];
41 				}
42 			s.getChars(0, size, buff, 0);
43 			switch (buff[0]) {
44 			case '(':
45 				if (instag) {
46 					h.stagc(buff, 0, 0);
47 					instag = false;
48 					}
49 				h.gi(buff, 1, size - 1);
50 				instag = true;
51 				break;
52 			case ')':
53 				if (instag) {
54 					h.stagc(buff, 0, 0);
55 					instag = false;
56 					}
57 				h.etag(buff, 1, size - 1);
58 				break;
59 			case '?':
60 				if (instag) {
61 					h.stagc(buff, 0, 0);
62 					instag = false;
63 					}
64 				h.pi(buff, 1, size - 1);
65 				break;
66 			case 'A':
67 				int sp = s.indexOf(' ');
68 				h.aname(buff, 1, sp - 1);
69 				h.aval(buff, sp + 1, size - sp - 1);
70 				break;
71 			case '-':
72 				if (instag) {
73 					h.stagc(buff, 0, 0);
74 					instag = false;
75 					}
76 				if (s.equals("-\\n")) {
77 					buff[0] = '\n';
78 					h.pcdata(buff, 0, 1);
79 					}
80 				else {
81 					// FIXME:
82 					// Does not decode \t and \\ in input
83 					h.pcdata(buff, 1, size - 1);
84 					}
85 				break;
86 			case 'E':
87 				if (instag) {
88 					h.stagc(buff, 0, 0);
89 					instag = false;
90 					}
91 				h.entity(buff, 1, size - 1);
92 				break;
93 			default:
94 //				System.err.print("Gotcha ");
95 //				System.err.print(s);
96 //				System.err.print('\n');
97 				break;
98 				}
99 			}
100 		h.eof(buff, 0, 0);
101 		}
102 
startCDATA()103 	public void startCDATA() { }
104 
main(String[] argv)105 	public static void main(String[] argv) throws IOException, SAXException {
106 		Scanner s = new PYXScanner();
107 		Reader r = new InputStreamReader(System.in, "UTF-8");
108 		Writer w = new BufferedWriter(new OutputStreamWriter(System.out, "UTF-8"));
109 		s.scan(r, new PYXWriter(w));
110 		}
111 	}
112