1 /* 2 * Copyright (C) 2008-2009 Marc Blank 3 * Licensed to The Android Open Source Project. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package com.android.exchange.adapter; 19 20 import android.content.Context; 21 22 import com.android.exchange.Eas; 23 import com.android.exchange.EasException; 24 import com.android.exchange.utility.FileLogger; 25 import com.android.mail.utils.LogUtils; 26 import com.google.common.annotations.VisibleForTesting; 27 28 import java.io.ByteArrayOutputStream; 29 import java.io.FileNotFoundException; 30 import java.io.FileOutputStream; 31 import java.io.IOException; 32 import java.io.InputStream; 33 import java.util.ArrayDeque; 34 import java.util.ArrayList; 35 import java.util.Arrays; 36 import java.util.Deque; 37 38 /** 39 * Extremely fast and lightweight WBXML parser, implementing only the subset of WBXML that 40 * EAS uses (as defined in the EAS specification). 41 * 42 * Supports: 43 * WBXML tokens to encode XML tags 44 * WBXML code pages to support multiple XML namespaces 45 * Inline strings 46 * Opaque data 47 * 48 * Does not support: (throws EasParserException) 49 * String tables 50 * Entities 51 * Processing instructions 52 * Attribute encoding 53 * 54 */ 55 public abstract class Parser { 56 private static final boolean LOG_VERBOSE = false; 57 58 private static final String LOG_TAG = Eas.LOG_TAG; 59 60 // The following constants are Wbxml standard 61 public static final int START_DOCUMENT = 0; 62 public static final int END_DOCUMENT = 1; 63 private static final int DONE = 1; 64 private static final int START = 2; 65 public static final int END = 3; 66 private static final int TEXT = 4; 67 private static final int OPAQUE = 5; 68 private static final int NOT_ENDED = Integer.MIN_VALUE; 69 private static final int EOF_BYTE = -1; 70 71 private boolean logging = false; 72 private boolean capture = false; 73 74 private ArrayList<Integer> captureArray; 75 76 // The input stream for this parser 77 private InputStream in; 78 79 // The current tag depth 80 private int depth; 81 82 // The stack of names of tags being processed; used when debug = true 83 private String[] nameArray = new String[32]; 84 85 public class Tag { 86 private final int mPage; 87 private final int mIndex; 88 // Whether the tag is associated with content (a value) 89 public final boolean mNoContent; 90 private final String mName; 91 Tag(final int page, final int id)92 public Tag(final int page, final int id) { 93 mPage = page; 94 // The tag is in the low 6 bits 95 mIndex = id & Tags.PAGE_MASK; 96 // If the high bit is set, there is content (a value) to be read 97 mNoContent = (id & Wbxml.WITH_CONTENT) == 0; 98 if (Tags.isGlobalTag(mIndex)) { 99 mName = "unsupported-WBXML"; 100 } else if (!Tags.isValidTag(mPage, mIndex)) { 101 mName = "unknown"; 102 } else { 103 mName = Tags.getTagName(mPage, mIndex); 104 } 105 } 106 getTagNum()107 public int getTagNum() { 108 if (Tags.isGlobalTag(mIndex)) { 109 return mIndex; 110 } 111 return (mPage << Tags.PAGE_SHIFT) | mIndex; 112 } 113 114 @Override toString()115 public String toString() { 116 return mName; 117 } 118 } 119 120 // The stack of tags being processed 121 private final Deque<Tag> startTagArray = new ArrayDeque<Tag>(); 122 123 private Tag startTag; 124 125 // The type of the last token read (eg, TEXT, OPAQUE, END, etc). 126 private int type; 127 128 // The current page. As of EAS 14.1, this is a value 0-24. 129 private int page; 130 131 // The current tag. The low order 6 bits contain the tag index and the 132 // higher order bits the page number. The format matches that used for 133 // the tag enums defined in Tags.java. 134 public int tag; 135 136 // Whether the current tag is associated with content (a value) 137 public boolean noContent; 138 139 // The value read, as a String 140 private String text; 141 142 // The value read, as bytes 143 private byte[] bytes; 144 145 // TODO: Define a new parse exception type rather than lumping these in as IOExceptions. 146 147 /** 148 * Generated when the parser comes to EOF prematurely during parsing (i.e. in error) 149 */ 150 public class EofException extends IOException { 151 private static final long serialVersionUID = 1L; 152 } 153 154 /** 155 * An EmptyStreamException is an EofException that occurs reading the first byte in the parser's 156 * input stream; in other words, the stream had no content. 157 */ 158 public class EmptyStreamException extends EofException { 159 private static final long serialVersionUID = 1L; 160 } 161 162 public class EodException extends IOException { 163 private static final long serialVersionUID = 1L; 164 } 165 166 public class EasParserException extends IOException { 167 private static final long serialVersionUID = 1L; 168 EasParserException()169 EasParserException() { 170 super("WBXML format error"); 171 } 172 EasParserException(final String reason)173 EasParserException(final String reason) { 174 super(reason); 175 } 176 } 177 parse()178 public boolean parse() throws IOException, EasException { 179 return false; 180 } 181 Parser(final InputStream in)182 public Parser(final InputStream in) throws IOException { 183 setInput(in, true); 184 logging = Eas.PARSER_LOG; 185 } 186 187 /** 188 * Constructor for use when switching parsers within a input stream 189 * @param parser an existing, initialized parser 190 * @throws IOException 191 */ Parser(final Parser parser)192 public Parser(final Parser parser) throws IOException { 193 setInput(parser.in, false); 194 logging = Eas.PARSER_LOG; 195 } 196 197 /** 198 * Set the debug state of the parser. When debugging is on, every token is logged (LogUtils.v) 199 * to the console. 200 * 201 * @param val the desired state for debug output 202 */ 203 @VisibleForTesting setDebug(final boolean val)204 public void setDebug(final boolean val) { 205 logging = val; 206 } 207 getInput()208 protected InputStream getInput() { 209 return in; 210 } 211 212 /** 213 * Turns on data capture; this is used to create test streams that represent "live" data and 214 * can be used against the various parsers. 215 */ captureOn()216 public void captureOn() { 217 capture = true; 218 captureArray = new ArrayList<Integer>(); 219 } 220 221 /** 222 * Turns off data capture; writes the captured data to a specified file. 223 */ captureOff(final Context context, final String file)224 public void captureOff(final Context context, final String file) { 225 try { 226 final FileOutputStream out = context.openFileOutput(file, 227 Context.MODE_WORLD_WRITEABLE); 228 out.write(captureArray.toString().getBytes()); 229 out.close(); 230 } catch (FileNotFoundException e) { 231 // This is debug code; exceptions aren't interesting. 232 } catch (IOException e) { 233 // This is debug code; exceptions aren't interesting. 234 } 235 } 236 237 /** 238 * Return the value of the current tag, as a byte array. Throws EasParserException 239 * if neither opaque nor text data is present. Never returns null--returns 240 * an empty byte[] array for empty data. 241 * 242 * @return the byte array value of the current tag 243 * @throws IOException 244 */ getValueBytes()245 public byte[] getValueBytes() throws IOException { 246 final String name = startTag.toString(); 247 248 getNext(); 249 // This means there was no value given, just <Foo/>; we'll return empty array 250 if (type == END) { 251 log("No value for tag: " + name); 252 return new byte[0]; 253 } else if (type != OPAQUE && type != TEXT) { 254 throw new EasParserException("Expected OPAQUE or TEXT data for tag " + name); 255 } 256 257 // Save the value 258 final byte[] val = type == OPAQUE ? bytes : text.getBytes("UTF-8"); 259 // Read the next token; it had better be the end of the current tag 260 getNext(); 261 // If not, throw an exception 262 if (type != END) { 263 throw new EasParserException("No END found for tag " + name); 264 } 265 return val; 266 } 267 268 /** 269 * Return the value of the current tag, as a String. Throws EasParserException 270 * for non-text data. Never returns null--returns an empty string if no data. 271 * 272 * @return the String value of the current tag 273 * @throws IOException 274 */ getValue()275 public String getValue() throws IOException { 276 final String name = startTag.toString(); 277 278 getNext(); 279 // This means there was no value given, just <Foo/>; we'll return empty string for now 280 if (type == END) { 281 log("No value for tag: " + name); 282 return ""; 283 } else if (type != TEXT) { 284 throw new EasParserException("Expected TEXT data for tag " + name); 285 } 286 287 // Save the value 288 final String val = text; 289 // Read the next token; it had better be the end of the current tag 290 getNext(); 291 // If not, throw an exception 292 if (type != END) { 293 throw new EasParserException("No END found for tag " + name); 294 } 295 return val; 296 } 297 298 /** 299 * Return the value of the current tag, as an integer. Throws EasParserException 300 * for non text data, and text data that doesn't parse as an integer. Returns 301 * 0 for empty data. 302 * 303 * @return the integer value of the current tag 304 * @throws IOException 305 */ getValueInt()306 public int getValueInt() throws IOException { 307 final String val = getValue(); 308 if (val.length() == 0) { 309 return 0; 310 } 311 312 int num; 313 try { 314 num = Integer.parseInt(val); 315 } catch (NumberFormatException e) { 316 throw new EasParserException("Tag " + startTag + ": " + e.getMessage()); 317 } 318 return num; 319 } 320 321 /** 322 * Return the next tag found in the stream; special tags END and END_DOCUMENT are used to 323 * mark the end of the current tag and end of document. If we hit end of document without 324 * looking for it, generate an EodException. The tag returned consists of the page number 325 * shifted PAGE_SHIFT bits OR'd with the tag retrieved from the stream. Thus, all tags returned 326 * are unique. 327 * 328 * @param endingTag the tag that would represent the end of the tag we're processing 329 * @return the next tag found 330 * @throws IOException 331 */ nextTag(final int endingTag)332 public int nextTag(final int endingTag) throws IOException { 333 while (getNext() != DONE) { 334 // If we're a start, set tag to include the page and return it 335 if (type == START) { 336 tag = startTag.getTagNum(); 337 return tag; 338 // If we're at the ending tag we're looking for, return the END signal 339 } else if (type == END && startTag.getTagNum() == endingTag) { 340 return END; 341 } 342 } 343 // We're at end of document here. If we're looking for it, return END_DOCUMENT 344 if (endingTag == START_DOCUMENT) { 345 return END_DOCUMENT; 346 } 347 // Otherwise, we've prematurely hit end of document, so exception out 348 // EodException is a subclass of IOException; this will be treated as an IO error by 349 // EasService 350 throw new EodException(); 351 } 352 353 /** 354 * Skip anything found in the stream until the end of the current tag is reached. This can be 355 * used to ignore stretches of xml that aren't needed by the parser. 356 * 357 * @throws IOException 358 */ skipTag()359 public void skipTag() throws IOException { 360 final int thisTag = startTag.getTagNum(); 361 // Just loop until we hit the end of the current tag 362 while (getNext() != DONE) { 363 if (type == END && startTag.getTagNum() == thisTag) { 364 return; 365 } 366 } 367 368 // If we're at end of document, that's bad 369 throw new EofException(); 370 } 371 372 /** 373 * Initializes the parser with an input stream; reads the first 4 bytes (which are always the 374 * same in EAS, and then sets the tag table to point to page 0 (by definition, the starting 375 * page). 376 * 377 * @param in the InputStream associated with this parser 378 * @throws IOException 379 */ setInput(final InputStream in, final boolean initialize)380 public void setInput(final InputStream in, final boolean initialize) throws IOException { 381 this.in = in; 382 if ((in != null) && initialize) { 383 // If we fail on the very first byte, report an empty stream 384 try { 385 final int version = readByte(); // version 386 } catch (EofException e) { 387 throw new EmptyStreamException(); 388 } 389 readInt(); // public identifier 390 readInt(); // 106 (UTF-8) 391 final int stringTableLength = readInt(); // string table length 392 if (stringTableLength != 0) { 393 throw new EasParserException("WBXML string table unsupported"); 394 } 395 } 396 } 397 398 @VisibleForTesting resetInput(final InputStream in)399 void resetInput(final InputStream in) { 400 this.in = in; 401 try { 402 // Read leading zero 403 read(); 404 } catch (IOException e) { 405 } 406 } 407 log(final String str)408 void log(final String str) { 409 if (!logging) { 410 return; 411 } 412 final String logStr; 413 int cr = str.indexOf('\n'); 414 if (cr > 0) { 415 logStr = str.substring(0, cr); 416 } else { 417 logStr = str; 418 } 419 final char [] charArray = new char[startTagArray.size() * 2]; 420 Arrays.fill(charArray, ' '); 421 final String indent = new String(charArray); 422 LogUtils.v(LOG_TAG, "%s", indent + logStr); 423 if (Eas.FILE_LOG) { 424 FileLogger.log(LOG_TAG, logStr); 425 } 426 } 427 logVerbose(final String str)428 void logVerbose(final String str) { 429 if (LOG_VERBOSE) { 430 log(str); 431 } 432 } 433 pushTag(final int id)434 protected void pushTag(final int id) { 435 page = id >>> Tags.PAGE_SHIFT; 436 push(id); 437 } 438 pop()439 private void pop() { 440 // Retrieve the now-current startTag from our stack 441 startTag = startTagArray.removeFirst(); 442 log("</" + startTag + '>'); 443 } 444 push(final int id)445 private void push(final int id) { 446 startTag = new Tag(page, id); 447 noContent = startTag.mNoContent; 448 log("<" + startTag + (noContent ? '/' : "") + '>'); 449 // Save the startTag to our stack 450 startTagArray.addFirst(startTag); 451 } 452 453 /** 454 * Return the next piece of data from the stream. The return value indicates the type of data 455 * that has been retrieved - START (start of tag), END (end of tag), DONE (end of stream), or 456 * TEXT (the value of a tag) 457 * 458 * @return the type of data retrieved 459 * @throws IOException 460 */ getNext()461 private final int getNext() throws IOException { 462 bytes = null; 463 text = null; 464 465 if (noContent) { 466 startTagArray.removeFirst(); 467 type = END; 468 noContent = false; 469 return type; 470 } 471 472 int id = read(); 473 while (id == Wbxml.SWITCH_PAGE) { 474 // Get the new page number 475 page = readByte(); 476 // Retrieve the current tag table 477 if (!Tags.isValidPage(page)) { 478 // Unknown code page. These seem to happen mostly because of 479 // invalid data from the server so throw an exception here. 480 throw new EasParserException("Unknown code page " + page); 481 } 482 logVerbose("Page: " + page); 483 id = read(); 484 } 485 486 switch (id) { 487 case EOF_BYTE: 488 // End of document 489 type = DONE; 490 break; 491 492 case Wbxml.END: 493 type = END; 494 pop(); 495 break; 496 497 case Wbxml.STR_I: 498 // Inline string 499 type = TEXT; 500 text = readInlineString(); 501 log(startTag + ": " + text); 502 break; 503 504 case Wbxml.OPAQUE: 505 // Integer length + opaque data 506 type = OPAQUE; 507 final int length = readInt(); 508 bytes = new byte[length]; 509 for (int i = 0; i < length; i++) { 510 bytes[i] = (byte)readByte(); 511 } 512 log(startTag + ": (opaque:" + length + ") "); 513 break; 514 515 default: 516 if (Tags.isGlobalTag(id & Tags.PAGE_MASK)) { 517 throw new EasParserException(String.format( 518 "Unhandled WBXML global token 0x%02X", id)); 519 } 520 if ((id & Wbxml.WITH_ATTRIBUTES) != 0) { 521 throw new EasParserException(String.format( 522 "Attributes unsupported, tag 0x%02X", id)); 523 } 524 type = START; 525 push(id); 526 } 527 528 // Return the type of data we're dealing with 529 return type; 530 } 531 532 /** 533 * Read an int from the input stream, and capture it if necessary for debugging. Seems a small 534 * price to pay... 535 * 536 * @return the int read 537 * @throws IOException 538 */ read()539 private int read() throws IOException { 540 int i; 541 i = in.read(); 542 if (capture) { 543 captureArray.add(i); 544 } 545 logVerbose("Byte: " + i); 546 return i; 547 } 548 readByte()549 private int readByte() throws IOException { 550 int i = read(); 551 if (i == EOF_BYTE) { 552 throw new EofException(); 553 } 554 return i; 555 } 556 557 /** 558 * Throws EasParserException if detects integer encoded with more than 5 559 * bytes. A uint_32 needs 5 bytes to fully encode 32 bits so if the high 560 * bit is set for more than 4 bytes, something is wrong with the data 561 * stream. 562 */ readInt()563 private int readInt() throws IOException { 564 int result = 0; 565 int i; 566 int numBytes = 0; 567 568 do { 569 if (++numBytes > 5) { 570 throw new EasParserException("Invalid integer encoding, too many bytes"); 571 } 572 i = readByte(); 573 result = (result << 7) | (i & 0x7f); 574 } while ((i & 0x80) != 0); 575 576 return result; 577 } 578 579 /** 580 * Read an inline string from the stream 581 * 582 * @return the String as parsed from the stream 583 * @throws IOException 584 */ readInlineString()585 private String readInlineString() throws IOException { 586 final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(256); 587 while (true) { 588 final int i = read(); 589 if (i == 0) { 590 break; 591 } else if (i == EOF_BYTE) { 592 throw new EofException(); 593 } 594 outputStream.write(i); 595 } 596 outputStream.flush(); 597 final String res = outputStream.toString("UTF-8"); 598 outputStream.close(); 599 return res; 600 } 601 } 602