1 package com.google.polo.json; 2 3 /* 4 Copyright (c) 2002 JSON.org 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy 7 of this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights 9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 copies of the Software, and to permit persons to whom the Software is 11 furnished to do so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 The Software shall be used for Good, not Evil. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 SOFTWARE. 25 */ 26 27 /** 28 * The XMLTokener extends the JSONTokener to provide additional methods 29 * for the parsing of XML texts. 30 * @author JSON.org 31 * @version 2008-09-18 32 */ 33 public class XMLTokener extends JSONTokener { 34 35 36 /** The table of entity values. It initially contains Character values for 37 * amp, apos, gt, lt, quot. 38 */ 39 public static final java.util.HashMap entity; 40 41 static { 42 entity = new java.util.HashMap(8); 43 entity.put("amp", XML.AMP); 44 entity.put("apos", XML.APOS); 45 entity.put("gt", XML.GT); 46 entity.put("lt", XML.LT); 47 entity.put("quot", XML.QUOT); 48 } 49 50 /** 51 * Construct an XMLTokener from a string. 52 * @param s A source string. 53 */ XMLTokener(String s)54 public XMLTokener(String s) { 55 super(s); 56 } 57 58 /** 59 * Get the text in the CDATA block. 60 * @return The string up to the <code>]]></code>. 61 * @throws JSONException If the <code>]]></code> is not found. 62 */ nextCDATA()63 public String nextCDATA() throws JSONException { 64 char c; 65 int i; 66 StringBuffer sb = new StringBuffer(); 67 for (;;) { 68 c = next(); 69 if (c == 0) { 70 throw syntaxError("Unclosed CDATA"); 71 } 72 sb.append(c); 73 i = sb.length() - 3; 74 if (i >= 0 && sb.charAt(i) == ']' && 75 sb.charAt(i + 1) == ']' && sb.charAt(i + 2) == '>') { 76 sb.setLength(i); 77 return sb.toString(); 78 } 79 } 80 } 81 82 83 /** 84 * Get the next XML outer token, trimming whitespace. There are two kinds 85 * of tokens: the '<' character which begins a markup tag, and the content 86 * text between markup tags. 87 * 88 * @return A string, or a '<' Character, or null if there is no more 89 * source text. 90 * @throws JSONException 91 */ nextContent()92 public Object nextContent() throws JSONException { 93 char c; 94 StringBuffer sb; 95 do { 96 c = next(); 97 } while (Character.isWhitespace(c)); 98 if (c == 0) { 99 return null; 100 } 101 if (c == '<') { 102 return XML.LT; 103 } 104 sb = new StringBuffer(); 105 for (;;) { 106 if (c == '<' || c == 0) { 107 back(); 108 return sb.toString().trim(); 109 } 110 if (c == '&') { 111 sb.append(nextEntity(c)); 112 } else { 113 sb.append(c); 114 } 115 c = next(); 116 } 117 } 118 119 120 /** 121 * Return the next entity. These entities are translated to Characters: 122 * <code>& ' > < "</code>. 123 * @param a An ampersand character. 124 * @return A Character or an entity String if the entity is not recognized. 125 * @throws JSONException If missing ';' in XML entity. 126 */ nextEntity(char a)127 public Object nextEntity(char a) throws JSONException { 128 StringBuffer sb = new StringBuffer(); 129 for (;;) { 130 char c = next(); 131 if (Character.isLetterOrDigit(c) || c == '#') { 132 sb.append(Character.toLowerCase(c)); 133 } else if (c == ';') { 134 break; 135 } else { 136 throw syntaxError("Missing ';' in XML entity: &" + sb); 137 } 138 } 139 String s = sb.toString(); 140 Object e = entity.get(s); 141 return e != null ? e : a + s + ";"; 142 } 143 144 145 /** 146 * Returns the next XML meta token. This is used for skipping over <!...> 147 * and <?...?> structures. 148 * @return Syntax characters (<code>< > / = ! ?</code>) are returned as 149 * Character, and strings and names are returned as Boolean. We don't care 150 * what the values actually are. 151 * @throws JSONException If a string is not properly closed or if the XML 152 * is badly structured. 153 */ nextMeta()154 public Object nextMeta() throws JSONException { 155 char c; 156 char q; 157 do { 158 c = next(); 159 } while (Character.isWhitespace(c)); 160 switch (c) { 161 case 0: 162 throw syntaxError("Misshaped meta tag"); 163 case '<': 164 return XML.LT; 165 case '>': 166 return XML.GT; 167 case '/': 168 return XML.SLASH; 169 case '=': 170 return XML.EQ; 171 case '!': 172 return XML.BANG; 173 case '?': 174 return XML.QUEST; 175 case '"': 176 case '\'': 177 q = c; 178 for (;;) { 179 c = next(); 180 if (c == 0) { 181 throw syntaxError("Unterminated string"); 182 } 183 if (c == q) { 184 return Boolean.TRUE; 185 } 186 } 187 default: 188 for (;;) { 189 c = next(); 190 if (Character.isWhitespace(c)) { 191 return Boolean.TRUE; 192 } 193 switch (c) { 194 case 0: 195 case '<': 196 case '>': 197 case '/': 198 case '=': 199 case '!': 200 case '?': 201 case '"': 202 case '\'': 203 back(); 204 return Boolean.TRUE; 205 } 206 } 207 } 208 } 209 210 211 /** 212 * Get the next XML Token. These tokens are found inside of angle 213 * brackets. It may be one of these characters: <code>/ > = ! ?</code> or it 214 * may be a string wrapped in single quotes or double quotes, or it may be a 215 * name. 216 * @return a String or a Character. 217 * @throws JSONException If the XML is not well formed. 218 */ nextToken()219 public Object nextToken() throws JSONException { 220 char c; 221 char q; 222 StringBuffer sb; 223 do { 224 c = next(); 225 } while (Character.isWhitespace(c)); 226 switch (c) { 227 case 0: 228 throw syntaxError("Misshaped element"); 229 case '<': 230 throw syntaxError("Misplaced '<'"); 231 case '>': 232 return XML.GT; 233 case '/': 234 return XML.SLASH; 235 case '=': 236 return XML.EQ; 237 case '!': 238 return XML.BANG; 239 case '?': 240 return XML.QUEST; 241 242 // Quoted string 243 244 case '"': 245 case '\'': 246 q = c; 247 sb = new StringBuffer(); 248 for (;;) { 249 c = next(); 250 if (c == 0) { 251 throw syntaxError("Unterminated string"); 252 } 253 if (c == q) { 254 return sb.toString(); 255 } 256 if (c == '&') { 257 sb.append(nextEntity(c)); 258 } else { 259 sb.append(c); 260 } 261 } 262 default: 263 264 // Name 265 266 sb = new StringBuffer(); 267 for (;;) { 268 sb.append(c); 269 c = next(); 270 if (Character.isWhitespace(c)) { 271 return sb.toString(); 272 } 273 switch (c) { 274 case 0: 275 return sb.toString(); 276 case '>': 277 case '/': 278 case '=': 279 case '!': 280 case '?': 281 case '[': 282 case ']': 283 back(); 284 return sb.toString(); 285 case '<': 286 case '"': 287 case '\'': 288 throw syntaxError("Bad character in a name"); 289 } 290 } 291 } 292 } 293 294 295 /** 296 * Skip characters until past the requested string. 297 * If it is not found, we are left at the end of the source with a result of false. 298 * @param to A string to skip past. 299 * @throws JSONException 300 */ skipPast(String to)301 public boolean skipPast(String to) throws JSONException { 302 boolean b; 303 char c; 304 int i; 305 int j; 306 int offset = 0; 307 int n = to.length(); 308 char[] circle = new char[n]; 309 310 /* 311 * First fill the circle buffer with as many characters as are in the 312 * to string. If we reach an early end, bail. 313 */ 314 315 for (i = 0; i < n; i += 1) { 316 c = next(); 317 if (c == 0) { 318 return false; 319 } 320 circle[i] = c; 321 } 322 /* 323 * We will loop, possibly for all of the remaining characters. 324 */ 325 for (;;) { 326 j = offset; 327 b = true; 328 /* 329 * Compare the circle buffer with the to string. 330 */ 331 for (i = 0; i < n; i += 1) { 332 if (circle[j] != to.charAt(i)) { 333 b = false; 334 break; 335 } 336 j += 1; 337 if (j >= n) { 338 j -= n; 339 } 340 } 341 /* 342 * If we exit the loop with b intact, then victory is ours. 343 */ 344 if (b) { 345 return true; 346 } 347 /* 348 * Get the next character. If there isn't one, then defeat is ours. 349 */ 350 c = next(); 351 if (c == 0) { 352 return false; 353 } 354 /* 355 * Shove the character in the circle buffer and advance the 356 * circle offset. The offset is mod n. 357 */ 358 circle[offset] = c; 359 offset += 1; 360 if (offset >= n) { 361 offset -= n; 362 } 363 } 364 } 365 } 366