1 /* 2 * Copyright (C) 2008-2009 Marc Blank 3 * Licensed to The Android Open Source Project. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package com.android.exchange.adapter; 19 20 import android.content.Context; 21 import android.util.Log; 22 23 import com.android.exchange.Eas; 24 import com.android.exchange.EasException; 25 import com.android.exchange.utility.FileLogger; 26 import com.google.common.annotations.VisibleForTesting; 27 28 import java.io.ByteArrayOutputStream; 29 import java.io.FileNotFoundException; 30 import java.io.FileOutputStream; 31 import java.io.IOException; 32 import java.io.InputStream; 33 import java.util.ArrayList; 34 35 /** 36 * Extremely fast and lightweight WBXML parser, implementing only the subset of WBXML that 37 * EAS uses (as defined in the EAS specification) 38 * 39 */ 40 public abstract class Parser { 41 private static final boolean LOG_VERBOSE = false; 42 43 // The following constants are Wbxml standard 44 public static final int START_DOCUMENT = 0; 45 public static final int DONE = 1; 46 public static final int START = 2; 47 public static final int END = 3; 48 public static final int TEXT = 4; 49 public static final int END_DOCUMENT = 3; 50 private static final int NOT_FETCHED = Integer.MIN_VALUE; 51 private static final int NOT_ENDED = Integer.MIN_VALUE; 52 private static final int EOF_BYTE = -1; 53 private boolean logging = false; 54 private boolean capture = false; 55 private String logTag = "EAS Parser"; 56 57 // Where tags start in a page 58 private static final int TAG_BASE = 5; 59 60 private ArrayList<Integer> captureArray; 61 62 // The input stream for this parser 63 private InputStream in; 64 65 // The current tag depth 66 private int depth; 67 68 // The upcoming (saved) id from the stream 69 private int nextId = NOT_FETCHED; 70 71 // The current tag table (i.e. the tag table for the current page) 72 private String[] tagTable; 73 74 // An array of tag tables, as defined in EasTags 75 static private String[][] tagTables = new String[Tags.pages.length + 1][]; 76 77 // The stack of names of tags being processed; used when debug = true 78 private String[] nameArray = new String[32]; 79 80 // The stack of tags being processed 81 private int[] startTagArray = new int[32]; 82 83 // The following vars are available to all to avoid method calls that represent the state of 84 // the parser at any given time 85 public int endTag = NOT_ENDED; 86 87 public int startTag; 88 89 // The type of the last token read 90 public int type; 91 92 // The current page 93 public int page; 94 95 // The current tag 96 public int tag; 97 98 // The name of the current tag 99 public String name; 100 101 // Whether the current tag is associated with content (a value) 102 private boolean noContent; 103 104 // The value read, as a String. Only one of text or num will be valid, depending on whether the 105 // value was requested as a String or an int (to avoid wasted effort in parsing) 106 public String text; 107 108 // The value read, as an int 109 public int num; 110 111 // The value read, as bytes 112 public byte[] bytes; 113 114 /** 115 * Generated when the parser comes to EOF prematurely during parsing (i.e. in error) 116 */ 117 public class EofException extends IOException { 118 private static final long serialVersionUID = 1L; 119 } 120 121 /** 122 * An EmptyStreamException is an EofException that occurs reading the first byte in the parser's 123 * input stream; in other words, the stream had no content. 124 */ 125 public class EmptyStreamException extends EofException { 126 private static final long serialVersionUID = 1L; 127 } 128 129 public class EodException extends IOException { 130 private static final long serialVersionUID = 1L; 131 } 132 133 public class EasParserException extends IOException { 134 private static final long serialVersionUID = 1L; 135 EasParserException()136 EasParserException() { 137 super("WBXML format error"); 138 } 139 EasParserException(String reason)140 EasParserException(String reason) { 141 super(reason); 142 } 143 } 144 parse()145 public boolean parse() throws IOException, EasException { 146 return false; 147 } 148 149 /** 150 * Initialize the tag tables; they are constant 151 * 152 */ 153 { 154 String[][] pages = Tags.pages; 155 for (int i = 0; i < pages.length; i++) { 156 String[] page = pages[i]; 157 if (page.length > 0) { 158 tagTables[i] = page; 159 } 160 } 161 } 162 Parser(InputStream in)163 public Parser(InputStream in) throws IOException { 164 setInput(in, true); 165 logging = Eas.PARSER_LOG; 166 } 167 168 /** 169 * Constructor for use when switching parsers within a input stream 170 * @param parser an existing, initialized parser 171 * @throws IOException 172 */ Parser(Parser parser)173 public Parser(Parser parser) throws IOException { 174 setInput(parser.in, false); 175 logging = Eas.PARSER_LOG; 176 } 177 178 /** 179 * Set the debug state of the parser. When debugging is on, every token is logged (Log.v) to 180 * the console. 181 * 182 * @param val the desired state for debug output 183 */ setDebug(boolean val)184 public void setDebug(boolean val) { 185 logging = val; 186 } 187 getInput()188 protected InputStream getInput() { 189 return in; 190 } 191 192 /** 193 * Set the tag used for logging. When debugging is on, every token is logged (Log.v) to 194 * the console. 195 * 196 * @param val the logging tag 197 */ setLoggingTag(String val)198 public void setLoggingTag(String val) { 199 logTag = val; 200 } 201 202 /** 203 * Turns on data capture; this is used to create test streams that represent "live" data and 204 * can be used against the various parsers. 205 */ captureOn()206 public void captureOn() { 207 capture = true; 208 captureArray = new ArrayList<Integer>(); 209 } 210 211 /** 212 * Turns off data capture; writes the captured data to a specified file. 213 */ captureOff(Context context, String file)214 public void captureOff(Context context, String file) { 215 try { 216 FileOutputStream out = context.openFileOutput(file, Context.MODE_WORLD_WRITEABLE); 217 out.write(captureArray.toString().getBytes()); 218 out.close(); 219 } catch (FileNotFoundException e) { 220 // This is debug code; exceptions aren't interesting. 221 } catch (IOException e) { 222 // This is debug code; exceptions aren't interesting. 223 } 224 } 225 226 /** 227 * Return the value of the current tag, as a byte array. Note that the result of this call 228 * is indeterminate, and possibly null, if the value of the tag is not a byte array 229 * 230 * @return the byte array value of the current tag 231 * @throws IOException 232 */ getValueBytes()233 public byte[] getValueBytes() throws IOException { 234 getValue(); 235 return bytes; 236 } 237 238 /** 239 * Return the value of the current tag, as a String. Note that the result of this call is 240 * indeterminate, and possibly null, if the value of the tag is not an immediate string 241 * 242 * @return the String value of the current tag 243 * @throws IOException 244 */ getValue()245 public String getValue() throws IOException { 246 // The false argument tells getNext to return the value as a String 247 getNext(false); 248 // This means there was no value given, just <Foo/>; we'll return empty string for now 249 if (type == END) { 250 if (logging) { 251 log("No value for tag: " + tagTable[startTag - TAG_BASE]); 252 } 253 return ""; 254 } 255 // Save the value 256 String val = text; 257 // Read the next token; it had better be the end of the current tag 258 getNext(false); 259 // If not, throw an exception 260 if (type != END) { 261 throw new IOException("No END found!"); 262 } 263 return val; 264 } 265 266 /** 267 * Return the value of the current tag, as an integer. Note that the value of this call is 268 * indeterminate if the value of this tag is not an immediate string parsed as an integer 269 * 270 * @return the integer value of the current tag 271 * @throws IOException 272 */ getValueInt()273 public int getValueInt() throws IOException { 274 // The true argument to getNext indicates the desire for an integer return value 275 getNext(true); 276 if (type == END) { 277 return 0; 278 } 279 // Save the value 280 int val = num; 281 // Read the next token; it had better be the end of the current tag 282 getNext(false); 283 // If not, throw an exception 284 if (type != END) { 285 throw new IOException("No END found!"); 286 } 287 return val; 288 } 289 290 /** 291 * Return the next tag found in the stream; special tags END and END_DOCUMENT are used to 292 * mark the end of the current tag and end of document. If we hit end of document without 293 * looking for it, generate an EodException. The tag returned consists of the page number 294 * shifted PAGE_SHIFT bits OR'd with the tag retrieved from the stream. Thus, all tags returned 295 * are unique. 296 * 297 * @param endingTag the tag that would represent the end of the tag we're processing 298 * @return the next tag found 299 * @throws IOException 300 */ nextTag(int endingTag)301 public int nextTag(int endingTag) throws IOException { 302 // Lose the page information 303 endTag = endingTag &= Tags.PAGE_MASK; 304 while (getNext(false) != DONE) { 305 // If we're a start, set tag to include the page and return it 306 if (type == START) { 307 tag = page | startTag; 308 return tag; 309 // If we're at the ending tag we're looking for, return the END signal 310 } else if (type == END && startTag == endTag) { 311 return END; 312 } 313 } 314 // We're at end of document here. If we're looking for it, return END_DOCUMENT 315 if (endTag == START_DOCUMENT) { 316 return END_DOCUMENT; 317 } 318 // Otherwise, we've prematurely hit end of document, so exception out 319 // EodException is a subclass of IOException; this will be treated as an IO error by 320 // ExchangeService 321 throw new EodException(); 322 } 323 324 /** 325 * Skip anything found in the stream until the end of the current tag is reached. This can be 326 * used to ignore stretches of xml that aren't needed by the parser. 327 * 328 * @throws IOException 329 */ skipTag()330 public void skipTag() throws IOException { 331 int thisTag = startTag; 332 // Just loop until we hit the end of the current tag 333 while (getNext(false) != DONE) { 334 if (type == END && startTag == thisTag) { 335 return; 336 } 337 } 338 339 // If we're at end of document, that's bad 340 throw new EofException(); 341 } 342 343 /** 344 * Retrieve the next token from the input stream 345 * 346 * @return the token found 347 * @throws IOException 348 */ nextToken()349 public int nextToken() throws IOException { 350 getNext(false); 351 return type; 352 } 353 354 /** 355 * Initializes the parser with an input stream; reads the first 4 bytes (which are always the 356 * same in EAS, and then sets the tag table to point to page 0 (by definition, the starting 357 * page). 358 * 359 * @param in the InputStream associated with this parser 360 * @throws IOException 361 */ setInput(InputStream in, boolean initialize)362 public void setInput(InputStream in, boolean initialize) throws IOException { 363 this.in = in; 364 if (initialize) { 365 // If we fail on the very first byte, report an empty stream 366 try { 367 readByte(); // version 368 } catch (EofException e) { 369 throw new EmptyStreamException(); 370 } 371 readInt(); // ? 372 readInt(); // 106 (UTF-8) 373 readInt(); // string table length 374 } 375 tagTable = tagTables[0]; 376 } 377 378 @VisibleForTesting resetInput(InputStream in)379 void resetInput(InputStream in) { 380 this.in = in; 381 try { 382 // Read leading zero 383 read(); 384 } catch (IOException e) { 385 } 386 } 387 log(String str)388 void log(String str) { 389 int cr = str.indexOf('\n'); 390 if (cr > 0) { 391 str = str.substring(0, cr); 392 } 393 Log.v(logTag, str); 394 if (Eas.FILE_LOG) { 395 FileLogger.log(logTag, str); 396 } 397 } 398 pushTag(int id)399 protected void pushTag(int id) { 400 page = id >> Tags.PAGE_SHIFT; 401 tagTable = tagTables[page]; 402 push(id); 403 } 404 pop()405 private void pop() { 406 if (logging) { 407 name = nameArray[depth]; 408 log("</" + name + '>'); 409 } 410 // Retrieve the now-current startTag from our stack 411 startTag = endTag = startTagArray[depth]; 412 depth--; 413 } 414 push(int id)415 private void push(int id) { 416 // The tag is in the low 6 bits 417 startTag = id & 0x3F; 418 // If the high bit is set, there is content (a value) to be read 419 noContent = (id & 0x40) == 0; 420 depth++; 421 if (logging) { 422 name = tagTable[startTag - TAG_BASE]; 423 nameArray[depth] = name; 424 log("<" + name + (noContent ? '/' : "") + '>'); 425 } 426 // Save the startTag to our stack 427 startTagArray[depth] = startTag; 428 } 429 430 /** 431 * Return the next piece of data from the stream. The return value indicates the type of data 432 * that has been retrieved - START (start of tag), END (end of tag), DONE (end of stream), or 433 * TEXT (the value of a tag) 434 * 435 * @param asInt whether a TEXT value should be parsed as a String or an int. 436 * @return the type of data retrieved 437 * @throws IOException 438 */ getNext(boolean asInt)439 private final int getNext(boolean asInt) throws IOException { 440 if (noContent) { 441 nameArray[depth--] = null; 442 type = END; 443 noContent = false; 444 return type; 445 } 446 447 text = null; 448 name = null; 449 450 int id = nextId (); 451 while (id == Wbxml.SWITCH_PAGE) { 452 nextId = NOT_FETCHED; 453 // Get the new page number 454 int pg = readByte(); 455 // Save the shifted page to add into the startTag in nextTag 456 page = pg << Tags.PAGE_SHIFT; 457 if (LOG_VERBOSE) { 458 log("Page: " + page); 459 } 460 // Retrieve the current tag table 461 tagTable = tagTables[pg]; 462 id = nextId(); 463 } 464 nextId = NOT_FETCHED; 465 466 switch (id) { 467 case EOF_BYTE: 468 // End of document 469 type = DONE; 470 break; 471 472 case Wbxml.END: 473 type = END; 474 pop(); 475 break; 476 477 case Wbxml.STR_I: 478 // Inline string 479 type = TEXT; 480 if (asInt) { 481 num = readInlineInt(); 482 } else { 483 text = readInlineString(); 484 } 485 if (logging) { 486 name = tagTable[startTag - TAG_BASE]; 487 log(name + ": " + (asInt ? Integer.toString(num) : text)); 488 } 489 break; 490 491 case Wbxml.OPAQUE: 492 // Integer length + opaque data 493 int length = readInt(); 494 bytes = new byte[length]; 495 for (int i = 0; i < length; i++) { 496 bytes[i] = (byte)readByte(); 497 } 498 if (logging) { 499 name = tagTable[startTag - TAG_BASE]; 500 log(name + ": (opaque:" + length + ") "); 501 } 502 break; 503 504 default: 505 type = START; 506 push(id); 507 } 508 509 // Return the type of data we're dealing with 510 return type; 511 } 512 513 /** 514 * Read an int from the input stream, and capture it if necessary for debugging. Seems a small 515 * price to pay... 516 * 517 * @return the int read 518 * @throws IOException 519 */ read()520 private int read() throws IOException { 521 int i; 522 i = in.read(); 523 if (capture) { 524 captureArray.add(i); 525 } 526 if (LOG_VERBOSE) { 527 log("Byte: " + i); 528 } 529 return i; 530 } 531 nextId()532 private int nextId() throws IOException { 533 if (nextId == NOT_FETCHED) { 534 nextId = read(); 535 } 536 return nextId; 537 } 538 readByte()539 private int readByte() throws IOException { 540 int i = read(); 541 if (i == EOF_BYTE) { 542 throw new EofException(); 543 } 544 return i; 545 } 546 547 /** 548 * Read an integer from the stream; this is called when the parser knows that what follows is 549 * an inline string representing an integer (e.g. the Read tag in Email has a value known to 550 * be either "0" or "1") 551 * 552 * @return the integer as parsed from the stream 553 * @throws IOException 554 */ readInlineInt()555 private int readInlineInt() throws IOException { 556 int result = 0; 557 558 while (true) { 559 int i = readByte(); 560 // Inline strings are always terminated with a zero byte 561 if (i == 0) { 562 return result; 563 } 564 if (i >= '0' && i <= '9') { 565 result = (result * 10) + (i - '0'); 566 } else { 567 throw new IOException("Non integer"); 568 } 569 } 570 } 571 readInt()572 private int readInt() throws IOException { 573 int result = 0; 574 int i; 575 576 do { 577 i = readByte(); 578 result = (result << 7) | (i & 0x7f); 579 } while ((i & 0x80) != 0); 580 581 return result; 582 } 583 584 /** 585 * Read an inline string from the stream 586 * 587 * @return the String as parsed from the stream 588 * @throws IOException 589 */ readInlineString()590 private String readInlineString() throws IOException { 591 ByteArrayOutputStream outputStream = new ByteArrayOutputStream(256); 592 while (true) { 593 int i = read(); 594 if (i == 0) { 595 break; 596 } else if (i == EOF_BYTE) { 597 throw new EofException(); 598 } 599 outputStream.write(i); 600 } 601 outputStream.flush(); 602 String res = outputStream.toString("UTF-8"); 603 outputStream.close(); 604 return res; 605 } 606 } 607