• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008-2009 Marc Blank
3  * Licensed to The Android Open Source Project.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 package com.android.exchange.adapter;
19 
20 import android.content.Context;
21 
22 import com.android.exchange.Eas;
23 import com.android.exchange.EasException;
24 import com.android.exchange.utility.FileLogger;
25 import com.android.mail.utils.LogUtils;
26 import com.google.common.annotations.VisibleForTesting;
27 
28 import java.io.ByteArrayOutputStream;
29 import java.io.FileNotFoundException;
30 import java.io.FileOutputStream;
31 import java.io.IOException;
32 import java.io.InputStream;
33 import java.util.ArrayDeque;
34 import java.util.ArrayList;
35 import java.util.Arrays;
36 import java.util.Deque;
37 
38 /**
39  * Extremely fast and lightweight WBXML parser, implementing only the subset of WBXML that
40  * EAS uses (as defined in the EAS specification).
41  *
42  * Supports:
43  *      WBXML tokens to encode XML tags
44  *      WBXML code pages to support multiple XML namespaces
45  *      Inline strings
46  *      Opaque data
47  *
48  * Does not support: (throws EasParserException)
49  *      String tables
50  *      Entities
51  *      Processing instructions
52  *      Attribute encoding
53  *
54  */
55 public abstract class Parser {
56     private static final boolean LOG_VERBOSE = false;
57 
58     private static final String LOG_TAG = Eas.LOG_TAG;
59 
60     // The following constants are Wbxml standard
61     public static final int START_DOCUMENT = 0;
62     public static final int END_DOCUMENT = 1;
63     private static final int DONE = 1;
64     private static final int START = 2;
65     public static final int END = 3;
66     private static final int TEXT = 4;
67     private static final int OPAQUE = 5;
68     private static final int NOT_ENDED = Integer.MIN_VALUE;
69     private static final int EOF_BYTE = -1;
70 
71     private boolean logging = false;
72     private boolean capture = false;
73 
74     private ArrayList<Integer> captureArray;
75 
76     // The input stream for this parser
77     private InputStream in;
78 
79     // The current tag depth
80     private int depth;
81 
82     // The stack of names of tags being processed; used when debug = true
83     private String[] nameArray = new String[32];
84 
85     public class Tag {
86         private final int mPage;
87         private final int mIndex;
88         // Whether the tag is associated with content (a value)
89         public final boolean mNoContent;
90         private final String mName;
91 
Tag(final int page, final int id)92         public Tag(final int page, final int id) {
93             mPage = page;
94             // The tag is in the low 6 bits
95             mIndex = id & Tags.PAGE_MASK;
96             // If the high bit is set, there is content (a value) to be read
97             mNoContent = (id & Wbxml.WITH_CONTENT) == 0;
98             if (Tags.isGlobalTag(mIndex)) {
99                 mName = "unsupported-WBXML";
100             } else if (!Tags.isValidTag(mPage, mIndex)) {
101                 mName = "unknown";
102             } else {
103                 mName = Tags.getTagName(mPage, mIndex);
104             }
105         }
106 
getTagNum()107         public int getTagNum() {
108             if (Tags.isGlobalTag(mIndex)) {
109                 return mIndex;
110             }
111             return (mPage << Tags.PAGE_SHIFT) | mIndex;
112         }
113 
114         @Override
toString()115         public String toString() {
116             return mName;
117         }
118     }
119 
120     // The stack of tags being processed
121     private final Deque<Tag> startTagArray = new ArrayDeque<Tag>();
122 
123     private Tag startTag;
124 
125     // The type of the last token read (eg, TEXT, OPAQUE, END, etc).
126     private int type;
127 
128     // The current page. As of EAS 14.1, this is a value 0-24.
129     private int page;
130 
131     // The current tag. The low order 6 bits contain the tag index and the
132     // higher order bits the page number. The format matches that used for
133     // the tag enums defined in Tags.java.
134     public int tag;
135 
136     // Whether the current tag is associated with content (a value)
137     public boolean noContent;
138 
139     // The value read, as a String
140     private String text;
141 
142     // The value read, as bytes
143     private byte[] bytes;
144 
145     // TODO: Define a new parse exception type rather than lumping these in as IOExceptions.
146 
147     /**
148      * Generated when the parser comes to EOF prematurely during parsing (i.e. in error)
149      */
150     public class EofException extends IOException {
151         private static final long serialVersionUID = 1L;
152     }
153 
154     /**
155      * An EmptyStreamException is an EofException that occurs reading the first byte in the parser's
156      * input stream; in other words, the stream had no content.
157      */
158     public class EmptyStreamException extends EofException {
159         private static final long serialVersionUID = 1L;
160     }
161 
162     public class EodException extends IOException {
163         private static final long serialVersionUID = 1L;
164     }
165 
166     public class EasParserException extends IOException {
167         private static final long serialVersionUID = 1L;
168 
EasParserException()169         EasParserException() {
170             super("WBXML format error");
171         }
172 
EasParserException(final String reason)173         EasParserException(final String reason) {
174             super(reason);
175         }
176     }
177 
parse()178     public boolean parse() throws IOException, EasException {
179         return false;
180     }
181 
Parser(final InputStream in)182     public Parser(final InputStream in) throws IOException {
183         setInput(in, true);
184         logging = Eas.PARSER_LOG;
185     }
186 
187     /**
188      * Constructor for use when switching parsers within a input stream
189      * @param parser an existing, initialized parser
190      * @throws IOException
191      */
Parser(final Parser parser)192     public Parser(final Parser parser) throws IOException {
193         setInput(parser.in, false);
194         logging = Eas.PARSER_LOG;
195     }
196 
197     /**
198      * Set the debug state of the parser.  When debugging is on, every token is logged (LogUtils.v)
199      * to the console.
200      *
201      * @param val the desired state for debug output
202      */
203     @VisibleForTesting
setDebug(final boolean val)204     public void setDebug(final boolean val) {
205         logging = val;
206     }
207 
getInput()208     protected InputStream getInput() {
209         return in;
210     }
211 
212     /**
213      * Turns on data capture; this is used to create test streams that represent "live" data and
214      * can be used against the various parsers.
215      */
captureOn()216     public void captureOn() {
217         capture = true;
218         captureArray = new ArrayList<Integer>();
219     }
220 
221     /**
222      * Turns off data capture; writes the captured data to a specified file.
223      */
captureOff(final Context context, final String file)224     public void captureOff(final Context context, final String file) {
225         try {
226             final FileOutputStream out = context.openFileOutput(file,
227                     Context.MODE_WORLD_WRITEABLE);
228             out.write(captureArray.toString().getBytes());
229             out.close();
230         } catch (FileNotFoundException e) {
231             // This is debug code; exceptions aren't interesting.
232         } catch (IOException e) {
233             // This is debug code; exceptions aren't interesting.
234         }
235     }
236 
237     /**
238      * Return the value of the current tag, as a byte array. Throws EasParserException
239      * if neither opaque nor text data is present. Never returns null--returns
240      * an empty byte[] array for empty data.
241      *
242      * @return the byte array value of the current tag
243      * @throws IOException
244      */
getValueBytes()245     public byte[] getValueBytes() throws IOException {
246         final String name = startTag.toString();
247 
248         getNext();
249         // This means there was no value given, just <Foo/>; we'll return empty array
250         if (type == END) {
251             log("No value for tag: " + name);
252             return new byte[0];
253         } else if (type != OPAQUE && type != TEXT) {
254             throw new EasParserException("Expected OPAQUE or TEXT data for tag " + name);
255         }
256 
257         // Save the value
258         final byte[] val = type == OPAQUE ? bytes : text.getBytes("UTF-8");
259         // Read the next token; it had better be the end of the current tag
260         getNext();
261         // If not, throw an exception
262         if (type != END) {
263             throw new EasParserException("No END found for tag " + name);
264         }
265         return val;
266     }
267 
268     /**
269      * Return the value of the current tag, as a String. Throws EasParserException
270      * for non-text data. Never returns null--returns an empty string if no data.
271      *
272      * @return the String value of the current tag
273      * @throws IOException
274      */
getValue()275     public String getValue() throws IOException {
276         final String name = startTag.toString();
277 
278         getNext();
279         // This means there was no value given, just <Foo/>; we'll return empty string for now
280         if (type == END) {
281             log("No value for tag: " + name);
282             return "";
283         } else if (type != TEXT) {
284             throw new EasParserException("Expected TEXT data for tag " + name);
285         }
286 
287         // Save the value
288         final String val = text;
289         // Read the next token; it had better be the end of the current tag
290         getNext();
291         // If not, throw an exception
292         if (type != END) {
293             throw new EasParserException("No END found for tag " + name);
294         }
295         return val;
296     }
297 
298     /**
299      * Return the value of the current tag, as an integer. Throws EasParserException
300      * for non text data, and text data that doesn't parse as an integer. Returns
301      * 0 for empty data.
302      *
303      * @return the integer value of the current tag
304      * @throws IOException
305      */
getValueInt()306     public int getValueInt() throws IOException {
307         final String val = getValue();
308         if (val.length() == 0) {
309             return 0;
310         }
311 
312         int num;
313         try {
314             num = Integer.parseInt(val);
315         } catch (NumberFormatException e) {
316             throw new EasParserException("Tag " + startTag + ": " + e.getMessage());
317         }
318         return num;
319     }
320 
321     /**
322      * Return the next tag found in the stream; special tags END and END_DOCUMENT are used to
323      * mark the end of the current tag and end of document.  If we hit end of document without
324      * looking for it, generate an EodException.  The tag returned consists of the page number
325      * shifted PAGE_SHIFT bits OR'd with the tag retrieved from the stream.  Thus, all tags returned
326      * are unique.
327      *
328      * @param endingTag the tag that would represent the end of the tag we're processing
329      * @return the next tag found
330      * @throws IOException
331      */
nextTag(final int endingTag)332     public int nextTag(final int endingTag) throws IOException {
333         while (getNext() != DONE) {
334             // If we're a start, set tag to include the page and return it
335             if (type == START) {
336                 tag = startTag.getTagNum();
337                 return tag;
338             // If we're at the ending tag we're looking for, return the END signal
339             } else if (type == END && startTag.getTagNum() == endingTag) {
340                 return END;
341             }
342         }
343         // We're at end of document here.  If we're looking for it, return END_DOCUMENT
344         if (endingTag == START_DOCUMENT) {
345             return END_DOCUMENT;
346         }
347         // Otherwise, we've prematurely hit end of document, so exception out
348         // EodException is a subclass of IOException; this will be treated as an IO error by
349         // EasService
350         throw new EodException();
351     }
352 
353     /**
354      * Skip anything found in the stream until the end of the current tag is reached.  This can be
355      * used to ignore stretches of xml that aren't needed by the parser.
356      *
357      * @throws IOException
358      */
skipTag()359     public void skipTag() throws IOException {
360         final int thisTag = startTag.getTagNum();
361         // Just loop until we hit the end of the current tag
362         while (getNext() != DONE) {
363             if (type == END && startTag.getTagNum() == thisTag) {
364                 return;
365             }
366         }
367 
368         // If we're at end of document, that's bad
369         throw new EofException();
370     }
371 
372     /**
373      * Initializes the parser with an input stream; reads the first 4 bytes (which are always the
374      * same in EAS, and then sets the tag table to point to page 0 (by definition, the starting
375      * page).
376      *
377      * @param in the InputStream associated with this parser
378      * @throws IOException
379      */
setInput(final InputStream in, final boolean initialize)380     public void setInput(final InputStream in, final boolean initialize) throws IOException {
381         this.in = in;
382         if ((in != null) && initialize) {
383             // If we fail on the very first byte, report an empty stream
384             try {
385                 final int version = readByte(); // version
386             } catch (EofException e) {
387                 throw new EmptyStreamException();
388             }
389             readInt();  // public identifier
390             readInt();  // 106 (UTF-8)
391             final int stringTableLength = readInt();  // string table length
392             if (stringTableLength != 0) {
393                 throw new EasParserException("WBXML string table unsupported");
394             }
395         }
396     }
397 
398     @VisibleForTesting
resetInput(final InputStream in)399     void resetInput(final InputStream in) {
400         this.in = in;
401         try {
402             // Read leading zero
403             read();
404         } catch (IOException e) {
405         }
406     }
407 
log(final String str)408     void log(final String str) {
409         if (!logging) {
410             return;
411         }
412         final String logStr;
413         int cr = str.indexOf('\n');
414         if (cr > 0) {
415             logStr = str.substring(0, cr);
416         } else {
417             logStr = str;
418         }
419         final char [] charArray = new char[startTagArray.size() * 2];
420         Arrays.fill(charArray, ' ');
421         final String indent = new String(charArray);
422         LogUtils.v(LOG_TAG, "%s", indent + logStr);
423         if (Eas.FILE_LOG) {
424             FileLogger.log(LOG_TAG, logStr);
425         }
426     }
427 
logVerbose(final String str)428     void logVerbose(final String str) {
429         if (LOG_VERBOSE) {
430             log(str);
431         }
432     }
433 
pushTag(final int id)434     protected void pushTag(final int id) {
435         page = id >>> Tags.PAGE_SHIFT;
436         push(id);
437     }
438 
pop()439     private void pop() {
440         // Retrieve the now-current startTag from our stack
441         startTag = startTagArray.removeFirst();
442         log("</" + startTag + '>');
443     }
444 
push(final int id)445     private void push(final int id) {
446         startTag = new Tag(page, id);
447         noContent = startTag.mNoContent;
448         log("<" + startTag + (noContent ? '/' : "") + '>');
449         // Save the startTag to our stack
450         startTagArray.addFirst(startTag);
451     }
452 
453     /**
454      * Return the next piece of data from the stream.  The return value indicates the type of data
455      * that has been retrieved - START (start of tag), END (end of tag), DONE (end of stream), or
456      * TEXT (the value of a tag)
457      *
458      * @return the type of data retrieved
459      * @throws IOException
460      */
getNext()461     private final int getNext() throws IOException {
462         bytes = null;
463         text = null;
464 
465         if (noContent) {
466             startTagArray.removeFirst();
467             type = END;
468             noContent = false;
469             return type;
470         }
471 
472         int id = read();
473         while (id == Wbxml.SWITCH_PAGE) {
474             // Get the new page number
475             page = readByte();
476             // Retrieve the current tag table
477             if (!Tags.isValidPage(page)) {
478                 // Unknown code page. These seem to happen mostly because of
479                 // invalid data from the server so throw an exception here.
480                 throw new EasParserException("Unknown code page " + page);
481             }
482             logVerbose("Page: " + page);
483             id = read();
484         }
485 
486         switch (id) {
487             case EOF_BYTE:
488                 // End of document
489                 type = DONE;
490                 break;
491 
492             case Wbxml.END:
493                 type = END;
494                 pop();
495                 break;
496 
497             case Wbxml.STR_I:
498                 // Inline string
499                 type = TEXT;
500                 text = readInlineString();
501                 log(startTag + ": " + text);
502                 break;
503 
504             case Wbxml.OPAQUE:
505                 // Integer length + opaque data
506                 type = OPAQUE;
507                 final int length = readInt();
508                 bytes = new byte[length];
509                 for (int i = 0; i < length; i++) {
510                     bytes[i] = (byte)readByte();
511                 }
512                 log(startTag + ": (opaque:" + length + ") ");
513                 break;
514 
515             default:
516                 if (Tags.isGlobalTag(id & Tags.PAGE_MASK)) {
517                     throw new EasParserException(String.format(
518                                     "Unhandled WBXML global token 0x%02X", id));
519                 }
520                 if ((id & Wbxml.WITH_ATTRIBUTES) != 0) {
521                     throw new EasParserException(String.format(
522                                     "Attributes unsupported, tag 0x%02X", id));
523                 }
524                 type = START;
525                 push(id);
526         }
527 
528         // Return the type of data we're dealing with
529         return type;
530     }
531 
532     /**
533      * Read an int from the input stream, and capture it if necessary for debugging.  Seems a small
534      * price to pay...
535      *
536      * @return the int read
537      * @throws IOException
538      */
read()539     private int read() throws IOException {
540         int i;
541         i = in.read();
542         if (capture) {
543             captureArray.add(i);
544         }
545         logVerbose("Byte: " + i);
546         return i;
547     }
548 
readByte()549     private int readByte() throws IOException {
550         int i = read();
551         if (i == EOF_BYTE) {
552             throw new EofException();
553         }
554         return i;
555     }
556 
557     /**
558      * Throws EasParserException if detects integer encoded with more than 5
559      * bytes. A uint_32 needs 5 bytes to fully encode 32 bits so if the high
560      * bit is set for more than 4 bytes, something is wrong with the data
561      * stream.
562      */
readInt()563     private int readInt() throws IOException {
564         int result = 0;
565         int i;
566         int numBytes = 0;
567 
568         do {
569             if (++numBytes > 5) {
570                 throw new EasParserException("Invalid integer encoding, too many bytes");
571             }
572             i = readByte();
573             result = (result << 7) | (i & 0x7f);
574         } while ((i & 0x80) != 0);
575 
576         return result;
577     }
578 
579     /**
580      * Read an inline string from the stream
581      *
582      * @return the String as parsed from the stream
583      * @throws IOException
584      */
readInlineString()585     private String readInlineString() throws IOException {
586         final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(256);
587         while (true) {
588             final int i = read();
589             if (i == 0) {
590                 break;
591             } else if (i == EOF_BYTE) {
592                 throw new EofException();
593             }
594             outputStream.write(i);
595         }
596         outputStream.flush();
597         final String res = outputStream.toString("UTF-8");
598         outputStream.close();
599         return res;
600     }
601 }
602