• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package android.pim.vcard;
17 
18 import android.pim.vcard.exception.VCardAgentNotSupportedException;
19 import android.pim.vcard.exception.VCardException;
20 import android.pim.vcard.exception.VCardInvalidCommentLineException;
21 import android.pim.vcard.exception.VCardInvalidLineException;
22 import android.pim.vcard.exception.VCardNestedException;
23 import android.pim.vcard.exception.VCardVersionException;
24 import android.text.TextUtils;
25 import android.util.Log;
26 
27 import java.io.BufferedReader;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.io.InputStreamReader;
31 import java.io.Reader;
32 import java.util.ArrayList;
33 import java.util.HashSet;
34 import java.util.List;
35 import java.util.Set;
36 
37 /**
38  * <p>
39  * Basic implementation achieving vCard parsing. Based on vCard 2.1,
40  * </p>
41  * @hide
42  */
43 /* package */ class VCardParserImpl_V21 {
44     private static final String LOG_TAG = "VCardParserImpl_V21";
45 
46     private static final class EmptyInterpreter implements VCardInterpreter {
47         @Override
end()48         public void end() {
49         }
50         @Override
endEntry()51         public void endEntry() {
52         }
53         @Override
endProperty()54         public void endProperty() {
55         }
56         @Override
propertyGroup(String group)57         public void propertyGroup(String group) {
58         }
59         @Override
propertyName(String name)60         public void propertyName(String name) {
61         }
62         @Override
propertyParamType(String type)63         public void propertyParamType(String type) {
64         }
65         @Override
propertyParamValue(String value)66         public void propertyParamValue(String value) {
67         }
68         @Override
propertyValues(List<String> values)69         public void propertyValues(List<String> values) {
70         }
71         @Override
start()72         public void start() {
73         }
74         @Override
startEntry()75         public void startEntry() {
76         }
77         @Override
startProperty()78         public void startProperty() {
79         }
80     }
81 
82     protected static final class CustomBufferedReader extends BufferedReader {
83         private long mTime;
84 
85         /**
86          * Needed since "next line" may be null due to end of line.
87          */
88         private boolean mNextLineIsValid;
89         private String mNextLine;
90 
CustomBufferedReader(Reader in)91         public CustomBufferedReader(Reader in) {
92             super(in);
93         }
94 
95         @Override
readLine()96         public String readLine() throws IOException {
97             if (mNextLineIsValid) {
98                 final String ret = mNextLine;
99                 mNextLine = null;
100                 mNextLineIsValid = false;
101                 return ret;
102             }
103 
104             long start = System.currentTimeMillis();
105             final String line = super.readLine();
106             long end = System.currentTimeMillis();
107             mTime += end - start;
108             return line;
109         }
110 
111         /**
112          * Read one line, but make this object store it in its queue.
113          */
peekLine()114         public String peekLine() throws IOException {
115             if (!mNextLineIsValid) {
116                 long start = System.currentTimeMillis();
117                 final String line = super.readLine();
118                 long end = System.currentTimeMillis();
119                 mTime += end - start;
120 
121                 mNextLine = line;
122                 mNextLineIsValid = true;
123             }
124 
125             return mNextLine;
126         }
127 
getTotalmillisecond()128         public long getTotalmillisecond() {
129             return mTime;
130         }
131     }
132 
133     private static final String DEFAULT_ENCODING = "8BIT";
134 
135     protected boolean mCanceled;
136     protected VCardInterpreter mInterpreter;
137 
138     protected final String mIntermediateCharset;
139 
140     /**
141      * <p>
142      * The encoding type for deconding byte streams. This member variable is
143      * reset to a default encoding every time when a new item comes.
144      * </p>
145      * <p>
146      * "Encoding" in vCard is different from "Charset". It is mainly used for
147      * addresses, notes, images. "7BIT", "8BIT", "BASE64", and
148      * "QUOTED-PRINTABLE" are known examples.
149      * </p>
150      */
151     protected String mCurrentEncoding;
152 
153     /**
154      * <p>
155      * The reader object to be used internally.
156      * </p>
157      * <p>
158      * Developers should not directly read a line from this object. Use
159      * getLine() unless there some reason.
160      * </p>
161      */
162     protected CustomBufferedReader mReader;
163 
164     /**
165      * <p>
166      * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard
167      * specification, but happens to be seen in real world vCard.
168      * </p>
169      */
170     protected final Set<String> mUnknownTypeSet = new HashSet<String>();
171 
172     /**
173      * <p>
174      * Set for storing unkonwn VALUE attributes, which is not acceptable in
175      * vCard specification, but happens to be seen in real world vCard.
176      * </p>
177      */
178     protected final Set<String> mUnknownValueSet = new HashSet<String>();
179 
180 
181     // In some cases, vCard is nested. Currently, we only consider the most
182     // interior vCard data.
183     // See v21_foma_1.vcf in test directory for more information.
184     // TODO: Don't ignore by using count, but read all of information outside vCard.
185     private int mNestCount;
186 
187     // Used only for parsing END:VCARD.
188     private String mPreviousLine;
189 
190     // For measuring performance.
191     private long mTimeTotal;
192     private long mTimeReadStartRecord;
193     private long mTimeReadEndRecord;
194     private long mTimeStartProperty;
195     private long mTimeEndProperty;
196     private long mTimeParseItems;
197     private long mTimeParseLineAndHandleGroup;
198     private long mTimeParsePropertyValues;
199     private long mTimeParseAdrOrgN;
200     private long mTimeHandleMiscPropertyValue;
201     private long mTimeHandleQuotedPrintable;
202     private long mTimeHandleBase64;
203 
VCardParserImpl_V21()204     public VCardParserImpl_V21() {
205         this(VCardConfig.VCARD_TYPE_DEFAULT);
206     }
207 
VCardParserImpl_V21(int vcardType)208     public VCardParserImpl_V21(int vcardType) {
209         if ((vcardType & VCardConfig.FLAG_TORELATE_NEST) != 0) {
210             mNestCount = 1;
211         }
212 
213         mIntermediateCharset =  VCardConfig.DEFAULT_INTERMEDIATE_CHARSET;
214     }
215 
216     /**
217      * <p>
218      * Parses the file at the given position.
219      * </p>
220      */
221     // <pre class="prettyprint">vcard_file = [wsls] vcard [wsls]</pre>
parseVCardFile()222     protected void parseVCardFile() throws IOException, VCardException {
223         boolean readingFirstFile = true;
224         while (true) {
225             if (mCanceled) {
226                 break;
227             }
228             if (!parseOneVCard(readingFirstFile)) {
229                 break;
230             }
231             readingFirstFile = false;
232         }
233 
234         if (mNestCount > 0) {
235             boolean useCache = true;
236             for (int i = 0; i < mNestCount; i++) {
237                 readEndVCard(useCache, true);
238                 useCache = false;
239             }
240         }
241     }
242 
243     /**
244      * @return true when a given property name is a valid property name.
245      */
isValidPropertyName(final String propertyName)246     protected boolean isValidPropertyName(final String propertyName) {
247         if (!(getKnownPropertyNameSet().contains(propertyName.toUpperCase()) ||
248                 propertyName.startsWith("X-"))
249                 && !mUnknownTypeSet.contains(propertyName)) {
250             mUnknownTypeSet.add(propertyName);
251             Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName);
252         }
253         return true;
254     }
255 
256     /**
257      * @return String. It may be null, or its length may be 0
258      * @throws IOException
259      */
getLine()260     protected String getLine() throws IOException {
261         return mReader.readLine();
262     }
263 
peekLine()264     protected String peekLine() throws IOException {
265         return mReader.peekLine();
266     }
267 
268     /**
269      * @return String with it's length > 0
270      * @throws IOException
271      * @throws VCardException when the stream reached end of line
272      */
getNonEmptyLine()273     protected String getNonEmptyLine() throws IOException, VCardException {
274         String line;
275         while (true) {
276             line = getLine();
277             if (line == null) {
278                 throw new VCardException("Reached end of buffer.");
279             } else if (line.trim().length() > 0) {
280                 return line;
281             }
282         }
283     }
284 
285     /*
286      * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF
287      *         items *CRLF
288      *         "END" [ws] ":" [ws] "VCARD"
289      */
parseOneVCard(boolean firstRead)290     private boolean parseOneVCard(boolean firstRead) throws IOException, VCardException {
291         boolean allowGarbage = false;
292         if (firstRead) {
293             if (mNestCount > 0) {
294                 for (int i = 0; i < mNestCount; i++) {
295                     if (!readBeginVCard(allowGarbage)) {
296                         return false;
297                     }
298                     allowGarbage = true;
299                 }
300             }
301         }
302 
303         if (!readBeginVCard(allowGarbage)) {
304             return false;
305         }
306         final long beforeStartEntry = System.currentTimeMillis();
307         mInterpreter.startEntry();
308         mTimeReadStartRecord += System.currentTimeMillis() - beforeStartEntry;
309 
310         final long beforeParseItems = System.currentTimeMillis();
311         parseItems();
312         mTimeParseItems += System.currentTimeMillis() - beforeParseItems;
313 
314         readEndVCard(true, false);
315 
316         final long beforeEndEntry = System.currentTimeMillis();
317         mInterpreter.endEntry();
318         mTimeReadEndRecord += System.currentTimeMillis() - beforeEndEntry;
319         return true;
320     }
321 
322     /**
323      * @return True when successful. False when reaching the end of line
324      * @throws IOException
325      * @throws VCardException
326      */
readBeginVCard(boolean allowGarbage)327     protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException {
328         String line;
329         do {
330             while (true) {
331                 line = getLine();
332                 if (line == null) {
333                     return false;
334                 } else if (line.trim().length() > 0) {
335                     break;
336                 }
337             }
338             final String[] strArray = line.split(":", 2);
339             final int length = strArray.length;
340 
341             // Although vCard 2.1/3.0 specification does not allow lower cases,
342             // we found vCard file emitted by some external vCard expoter have such
343             // invalid Strings.
344             // So we allow it.
345             // e.g.
346             // BEGIN:vCard
347             if (length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN")
348                     && strArray[1].trim().equalsIgnoreCase("VCARD")) {
349                 return true;
350             } else if (!allowGarbage) {
351                 if (mNestCount > 0) {
352                     mPreviousLine = line;
353                     return false;
354                 } else {
355                     throw new VCardException("Expected String \"BEGIN:VCARD\" did not come "
356                             + "(Instead, \"" + line + "\" came)");
357                 }
358             }
359         } while (allowGarbage);
360 
361         throw new VCardException("Reached where must not be reached.");
362     }
363 
364     /**
365      * <p>
366      * The arguments useCache and allowGarbase are usually true and false
367      * accordingly when this function is called outside this function itself.
368      * </p>
369      *
370      * @param useCache When true, line is obtained from mPreviousline.
371      *            Otherwise, getLine() is used.
372      * @param allowGarbage When true, ignore non "END:VCARD" line.
373      * @throws IOException
374      * @throws VCardException
375      */
readEndVCard(boolean useCache, boolean allowGarbage)376     protected void readEndVCard(boolean useCache, boolean allowGarbage) throws IOException,
377             VCardException {
378         String line;
379         do {
380             if (useCache) {
381                 // Though vCard specification does not allow lower cases,
382                 // some data may have them, so we allow it.
383                 line = mPreviousLine;
384             } else {
385                 while (true) {
386                     line = getLine();
387                     if (line == null) {
388                         throw new VCardException("Expected END:VCARD was not found.");
389                     } else if (line.trim().length() > 0) {
390                         break;
391                     }
392                 }
393             }
394 
395             String[] strArray = line.split(":", 2);
396             if (strArray.length == 2 && strArray[0].trim().equalsIgnoreCase("END")
397                     && strArray[1].trim().equalsIgnoreCase("VCARD")) {
398                 return;
399             } else if (!allowGarbage) {
400                 throw new VCardException("END:VCARD != \"" + mPreviousLine + "\"");
401             }
402             useCache = false;
403         } while (allowGarbage);
404     }
405 
406     /*
407      * items = *CRLF item / item
408      */
parseItems()409     protected void parseItems() throws IOException, VCardException {
410         boolean ended = false;
411 
412         final long beforeBeginProperty = System.currentTimeMillis();
413         mInterpreter.startProperty();
414         mTimeStartProperty += System.currentTimeMillis() - beforeBeginProperty;
415         ended = parseItem();
416         if (!ended) {
417             final long beforeEndProperty = System.currentTimeMillis();
418             mInterpreter.endProperty();
419             mTimeEndProperty += System.currentTimeMillis() - beforeEndProperty;
420         }
421 
422         while (!ended) {
423             final long beforeStartProperty = System.currentTimeMillis();
424             mInterpreter.startProperty();
425             mTimeStartProperty += System.currentTimeMillis() - beforeStartProperty;
426             try {
427                 ended = parseItem();
428             } catch (VCardInvalidCommentLineException e) {
429                 Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored.");
430                 ended = false;
431             }
432 
433             if (!ended) {
434                 final long beforeEndProperty = System.currentTimeMillis();
435                 mInterpreter.endProperty();
436                 mTimeEndProperty += System.currentTimeMillis() - beforeEndProperty;
437             }
438         }
439     }
440 
441     /*
442      * item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR"
443      * [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts
444      * CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."]
445      * "AGENT" [params] ":" vcard CRLF
446      */
parseItem()447     protected boolean parseItem() throws IOException, VCardException {
448         mCurrentEncoding = DEFAULT_ENCODING;
449 
450         final String line = getNonEmptyLine();
451         long start = System.currentTimeMillis();
452 
453         String[] propertyNameAndValue = separateLineAndHandleGroup(line);
454         if (propertyNameAndValue == null) {
455             return true;
456         }
457         if (propertyNameAndValue.length != 2) {
458             throw new VCardInvalidLineException("Invalid line \"" + line + "\"");
459         }
460         String propertyName = propertyNameAndValue[0].toUpperCase();
461         String propertyValue = propertyNameAndValue[1];
462 
463         mTimeParseLineAndHandleGroup += System.currentTimeMillis() - start;
464 
465         if (propertyName.equals("ADR") || propertyName.equals("ORG") || propertyName.equals("N")) {
466             start = System.currentTimeMillis();
467             handleMultiplePropertyValue(propertyName, propertyValue);
468             mTimeParseAdrOrgN += System.currentTimeMillis() - start;
469             return false;
470         } else if (propertyName.equals("AGENT")) {
471             handleAgent(propertyValue);
472             return false;
473         } else if (isValidPropertyName(propertyName)) {
474             if (propertyName.equals("BEGIN")) {
475                 if (propertyValue.equals("VCARD")) {
476                     throw new VCardNestedException("This vCard has nested vCard data in it.");
477                 } else {
478                     throw new VCardException("Unknown BEGIN type: " + propertyValue);
479                 }
480             } else if (propertyName.equals("VERSION") && !propertyValue.equals(getVersionString())) {
481                 throw new VCardVersionException("Incompatible version: " + propertyValue + " != "
482                         + getVersionString());
483             }
484             start = System.currentTimeMillis();
485             handlePropertyValue(propertyName, propertyValue);
486             mTimeParsePropertyValues += System.currentTimeMillis() - start;
487             return false;
488         }
489 
490         throw new VCardException("Unknown property name: \"" + propertyName + "\"");
491     }
492 
493     // For performance reason, the states for group and property name are merged into one.
494     static private final int STATE_GROUP_OR_PROPERTY_NAME = 0;
495     static private final int STATE_PARAMS = 1;
496     // vCard 3.0 specification allows double-quoted parameters, while vCard 2.1 does not.
497     static private final int STATE_PARAMS_IN_DQUOTE = 2;
498 
separateLineAndHandleGroup(String line)499     protected String[] separateLineAndHandleGroup(String line) throws VCardException {
500         final String[] propertyNameAndValue = new String[2];
501         final int length = line.length();
502         if (length > 0 && line.charAt(0) == '#') {
503             throw new VCardInvalidCommentLineException();
504         }
505 
506         int state = STATE_GROUP_OR_PROPERTY_NAME;
507         int nameIndex = 0;
508 
509         // This loop is developed so that we don't have to take care of bottle neck here.
510         // Refactor carefully when you need to do so.
511         for (int i = 0; i < length; i++) {
512             final char ch = line.charAt(i);
513             switch (state) {
514                 case STATE_GROUP_OR_PROPERTY_NAME: {
515                     if (ch == ':') {  // End of a property name.
516                         final String propertyName = line.substring(nameIndex, i);
517                         if (propertyName.equalsIgnoreCase("END")) {
518                             mPreviousLine = line;
519                             return null;
520                         }
521                         mInterpreter.propertyName(propertyName);
522                         propertyNameAndValue[0] = propertyName;
523                         if (i < length - 1) {
524                             propertyNameAndValue[1] = line.substring(i + 1);
525                         } else {
526                             propertyNameAndValue[1] = "";
527                         }
528                         return propertyNameAndValue;
529                     } else if (ch == '.') {  // Each group is followed by the dot.
530                         final String groupName = line.substring(nameIndex, i);
531                         if (groupName.length() == 0) {
532                             Log.w(LOG_TAG, "Empty group found. Ignoring.");
533                         } else {
534                             mInterpreter.propertyGroup(groupName);
535                         }
536                         nameIndex = i + 1;  // Next should be another group or a property name.
537                     } else if (ch == ';') {  // End of property name and beginneng of parameters.
538                         final String propertyName = line.substring(nameIndex, i);
539                         if (propertyName.equalsIgnoreCase("END")) {
540                             mPreviousLine = line;
541                             return null;
542                         }
543                         mInterpreter.propertyName(propertyName);
544                         propertyNameAndValue[0] = propertyName;
545                         nameIndex = i + 1;
546                         state = STATE_PARAMS;  // Start parameter parsing.
547                     }
548                     // TODO: comma support (in vCard 3.0 and 4.0).
549                     break;
550                 }
551                 case STATE_PARAMS: {
552                     if (ch == '"') {
553                         if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
554                             Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
555                                     "Silently allow it");
556                         }
557                         state = STATE_PARAMS_IN_DQUOTE;
558                     } else if (ch == ';') {  // Starts another param.
559                         handleParams(line.substring(nameIndex, i));
560                         nameIndex = i + 1;
561                     } else if (ch == ':') {  // End of param and beginenning of values.
562                         handleParams(line.substring(nameIndex, i));
563                         if (i < length - 1) {
564                             propertyNameAndValue[1] = line.substring(i + 1);
565                         } else {
566                             propertyNameAndValue[1] = "";
567                         }
568                         return propertyNameAndValue;
569                     }
570                     break;
571                 }
572                 case STATE_PARAMS_IN_DQUOTE: {
573                     if (ch == '"') {
574                         if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
575                             Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
576                                     "Silently allow it");
577                         }
578                         state = STATE_PARAMS;
579                     }
580                     break;
581                 }
582             }
583         }
584 
585         throw new VCardInvalidLineException("Invalid line: \"" + line + "\"");
586     }
587 
588     /*
589      * params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param /
590      * param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws]
591      * pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "="
592      * [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "="
593      * [ws] word / knowntype
594      */
handleParams(String params)595     protected void handleParams(String params) throws VCardException {
596         final String[] strArray = params.split("=", 2);
597         if (strArray.length == 2) {
598             final String paramName = strArray[0].trim().toUpperCase();
599             String paramValue = strArray[1].trim();
600             if (paramName.equals("TYPE")) {
601                 handleType(paramValue);
602             } else if (paramName.equals("VALUE")) {
603                 handleValue(paramValue);
604             } else if (paramName.equals("ENCODING")) {
605                 handleEncoding(paramValue);
606             } else if (paramName.equals("CHARSET")) {
607                 handleCharset(paramValue);
608             } else if (paramName.equals("LANGUAGE")) {
609                 handleLanguage(paramValue);
610             } else if (paramName.startsWith("X-")) {
611                 handleAnyParam(paramName, paramValue);
612             } else {
613                 throw new VCardException("Unknown type \"" + paramName + "\"");
614             }
615         } else {
616             handleParamWithoutName(strArray[0]);
617         }
618     }
619 
620     /**
621      * vCard 3.0 parser implementation may throw VCardException.
622      */
623     @SuppressWarnings("unused")
handleParamWithoutName(final String paramValue)624     protected void handleParamWithoutName(final String paramValue) throws VCardException {
625         handleType(paramValue);
626     }
627 
628     /*
629      * ptypeval = knowntype / "X-" word
630      */
handleType(final String ptypeval)631     protected void handleType(final String ptypeval) {
632         if (!(getKnownTypeSet().contains(ptypeval.toUpperCase())
633                 || ptypeval.startsWith("X-"))
634                 && !mUnknownTypeSet.contains(ptypeval)) {
635             mUnknownTypeSet.add(ptypeval);
636             Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), ptypeval));
637         }
638         mInterpreter.propertyParamType("TYPE");
639         mInterpreter.propertyParamValue(ptypeval);
640     }
641 
642     /*
643      * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word
644      */
handleValue(final String pvalueval)645     protected void handleValue(final String pvalueval) {
646         if (!(getKnownValueSet().contains(pvalueval.toUpperCase())
647                 || pvalueval.startsWith("X-")
648                 || mUnknownValueSet.contains(pvalueval))) {
649             mUnknownValueSet.add(pvalueval);
650             Log.w(LOG_TAG, String.format(
651                     "The value unsupported by TYPE of %s: ", getVersion(), pvalueval));
652         }
653         mInterpreter.propertyParamType("VALUE");
654         mInterpreter.propertyParamValue(pvalueval);
655     }
656 
657     /*
658      * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word
659      */
handleEncoding(String pencodingval)660     protected void handleEncoding(String pencodingval) throws VCardException {
661         if (getAvailableEncodingSet().contains(pencodingval) ||
662                 pencodingval.startsWith("X-")) {
663             mInterpreter.propertyParamType("ENCODING");
664             mInterpreter.propertyParamValue(pencodingval);
665             mCurrentEncoding = pencodingval;
666         } else {
667             throw new VCardException("Unknown encoding \"" + pencodingval + "\"");
668         }
669     }
670 
671     /**
672      * <p>
673      * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC 1521),
674      * but recent vCard files often contain other charset like UTF-8, SHIFT_JIS, etc.
675      * We allow any charset.
676      * </p>
677      */
handleCharset(String charsetval)678     protected void handleCharset(String charsetval) {
679         mInterpreter.propertyParamType("CHARSET");
680         mInterpreter.propertyParamValue(charsetval);
681     }
682 
683     /**
684      * See also Section 7.1 of RFC 1521
685      */
handleLanguage(String langval)686     protected void handleLanguage(String langval) throws VCardException {
687         String[] strArray = langval.split("-");
688         if (strArray.length != 2) {
689             throw new VCardException("Invalid Language: \"" + langval + "\"");
690         }
691         String tmp = strArray[0];
692         int length = tmp.length();
693         for (int i = 0; i < length; i++) {
694             if (!isAsciiLetter(tmp.charAt(i))) {
695                 throw new VCardException("Invalid Language: \"" + langval + "\"");
696             }
697         }
698         tmp = strArray[1];
699         length = tmp.length();
700         for (int i = 0; i < length; i++) {
701             if (!isAsciiLetter(tmp.charAt(i))) {
702                 throw new VCardException("Invalid Language: \"" + langval + "\"");
703             }
704         }
705         mInterpreter.propertyParamType(VCardConstants.PARAM_LANGUAGE);
706         mInterpreter.propertyParamValue(langval);
707     }
708 
isAsciiLetter(char ch)709     private boolean isAsciiLetter(char ch) {
710         if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
711             return true;
712         }
713         return false;
714     }
715 
716     /**
717      * Mainly for "X-" type. This accepts any kind of type without check.
718      */
handleAnyParam(String paramName, String paramValue)719     protected void handleAnyParam(String paramName, String paramValue) {
720         mInterpreter.propertyParamType(paramName);
721         mInterpreter.propertyParamValue(paramValue);
722     }
723 
handlePropertyValue(String propertyName, String propertyValue)724     protected void handlePropertyValue(String propertyName, String propertyValue)
725             throws IOException, VCardException {
726         final String upperEncoding = mCurrentEncoding.toUpperCase();
727         if (upperEncoding.equals(VCardConstants.PARAM_ENCODING_QP)) {
728             final long start = System.currentTimeMillis();
729             final String result = getQuotedPrintable(propertyValue);
730             final ArrayList<String> v = new ArrayList<String>();
731             v.add(result);
732             mInterpreter.propertyValues(v);
733             mTimeHandleQuotedPrintable += System.currentTimeMillis() - start;
734         } else if (upperEncoding.equals(VCardConstants.PARAM_ENCODING_BASE64)
735                 || upperEncoding.equals(VCardConstants.PARAM_ENCODING_B)) {
736             final long start = System.currentTimeMillis();
737             // It is very rare, but some BASE64 data may be so big that
738             // OutOfMemoryError occurs. To ignore such cases, use try-catch.
739             try {
740                 final ArrayList<String> arrayList = new ArrayList<String>();
741                 arrayList.add(getBase64(propertyValue));
742                 mInterpreter.propertyValues(arrayList);
743             } catch (OutOfMemoryError error) {
744                 Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!");
745                 mInterpreter.propertyValues(null);
746             }
747             mTimeHandleBase64 += System.currentTimeMillis() - start;
748         } else {
749             if (!(upperEncoding.equals("7BIT") || upperEncoding.equals("8BIT") ||
750                     upperEncoding.startsWith("X-"))) {
751                 Log.w(LOG_TAG,
752                         String.format("The encoding \"%s\" is unsupported by vCard %s",
753                                 mCurrentEncoding, getVersionString()));
754             }
755 
756             // Some device uses line folding defined in RFC 2425, which is not allowed
757             // in vCard 2.1 (while needed in vCard 3.0).
758             //
759             // e.g.
760             // BEGIN:VCARD
761             // VERSION:2.1
762             // N:;Omega;;;
763             // EMAIL;INTERNET:"Omega"
764             //   <omega@example.com>
765             // FN:Omega
766             // END:VCARD
767             //
768             // The vCard above assumes that email address should become:
769             // "Omega" <omega@example.com>
770             //
771             // But vCard 2.1 requires Quote-Printable when a line contains line break(s).
772             //
773             // For more information about line folding,
774             // see "5.8.1. Line delimiting and folding" in RFC 2425.
775             //
776             // We take care of this case more formally in vCard 3.0, so we only need to
777             // do this in vCard 2.1.
778             if (getVersion() == VCardConfig.VERSION_21) {
779                 StringBuilder builder = null;
780                 while (true) {
781                     final String nextLine = peekLine();
782                     // We don't need to care too much about this exceptional case,
783                     // but we should not wrongly eat up "END:VCARD", since it critically
784                     // breaks this parser's state machine.
785                     // Thus we roughly look over the next line and confirm it is at least not
786                     // "END:VCARD". This extra fee is worth paying. This is exceptional
787                     // anyway.
788                     if (!TextUtils.isEmpty(nextLine) &&
789                             nextLine.charAt(0) == ' ' &&
790                             !"END:VCARD".contains(nextLine.toUpperCase())) {
791                         getLine();  // Drop the next line.
792 
793                         if (builder == null) {
794                             builder = new StringBuilder();
795                             builder.append(propertyValue);
796                         }
797                         builder.append(nextLine.substring(1));
798                     } else {
799                         break;
800                     }
801                 }
802                 if (builder != null) {
803                     propertyValue = builder.toString();
804                 }
805             }
806 
807             final long start = System.currentTimeMillis();
808             ArrayList<String> v = new ArrayList<String>();
809             v.add(maybeUnescapeText(propertyValue));
810             mInterpreter.propertyValues(v);
811             mTimeHandleMiscPropertyValue += System.currentTimeMillis() - start;
812         }
813     }
814 
815     /**
816      * <p>
817      * Parses and returns Quoted-Printable.
818      * </p>
819      *
820      * @param firstString The string following a parameter name and attributes.
821      *            Example: "string" in
822      *            "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r".
823      * @return whole Quoted-Printable string, including a given argument and
824      *         following lines. Excludes the last empty line following to Quoted
825      *         Printable lines.
826      * @throws IOException
827      * @throws VCardException
828      */
getQuotedPrintable(String firstString)829     private String getQuotedPrintable(String firstString) throws IOException, VCardException {
830         // Specifically, there may be some padding between = and CRLF.
831         // See the following:
832         //
833         // qp-line := *(qp-segment transport-padding CRLF)
834         // qp-part transport-padding
835         // qp-segment := qp-section *(SPACE / TAB) "="
836         // ; Maximum length of 76 characters
837         //
838         // e.g. (from RFC 2045)
839         // Now's the time =
840         // for all folk to come=
841         // to the aid of their country.
842         if (firstString.trim().endsWith("=")) {
843             // remove "transport-padding"
844             int pos = firstString.length() - 1;
845             while (firstString.charAt(pos) != '=') {
846             }
847             StringBuilder builder = new StringBuilder();
848             builder.append(firstString.substring(0, pos + 1));
849             builder.append("\r\n");
850             String line;
851             while (true) {
852                 line = getLine();
853                 if (line == null) {
854                     throw new VCardException("File ended during parsing a Quoted-Printable String");
855                 }
856                 if (line.trim().endsWith("=")) {
857                     // remove "transport-padding"
858                     pos = line.length() - 1;
859                     while (line.charAt(pos) != '=') {
860                     }
861                     builder.append(line.substring(0, pos + 1));
862                     builder.append("\r\n");
863                 } else {
864                     builder.append(line);
865                     break;
866                 }
867             }
868             return builder.toString();
869         } else {
870             return firstString;
871         }
872     }
873 
getBase64(String firstString)874     protected String getBase64(String firstString) throws IOException, VCardException {
875         StringBuilder builder = new StringBuilder();
876         builder.append(firstString);
877 
878         while (true) {
879             String line = getLine();
880             if (line == null) {
881                 throw new VCardException("File ended during parsing BASE64 binary");
882             }
883             if (line.length() == 0) {
884                 break;
885             }
886             builder.append(line);
887         }
888 
889         return builder.toString();
890     }
891 
892     /**
893      * <p>
894      * Mainly for "ADR", "ORG", and "N"
895      * </p>
896      */
897     /*
898      * addressparts = 0*6(strnosemi ";") strnosemi ; PO Box, Extended Addr,
899      * Street, Locality, Region, Postal Code, Country Name orgparts =
900      * *(strnosemi ";") strnosemi ; First is Organization Name, remainder are
901      * Organization Units. nameparts = 0*4(strnosemi ";") strnosemi ; Family,
902      * Given, Middle, Prefix, Suffix. ; Example:Public;John;Q.;Reverend Dr.;III,
903      * Esq. strnosemi = *(*nonsemi ("\;" / "\" CRLF)) *nonsemi ; To include a
904      * semicolon in this string, it must be escaped ; with a "\" character. We
905      * do not care the number of "strnosemi" here. We are not sure whether we
906      * should add "\" CRLF to each value. We exclude them for now.
907      */
handleMultiplePropertyValue(String propertyName, String propertyValue)908     protected void handleMultiplePropertyValue(String propertyName, String propertyValue)
909             throws IOException, VCardException {
910         // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some
911         // softwares/devices
912         // emit such data.
913         if (mCurrentEncoding.equalsIgnoreCase("QUOTED-PRINTABLE")) {
914             propertyValue = getQuotedPrintable(propertyValue);
915         }
916 
917         mInterpreter.propertyValues(VCardUtils.constructListFromValue(propertyValue,
918                 getVersion()));
919     }
920 
921     /*
922      * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an
923      * error toward the AGENT property.
924      * // TODO: Support AGENT property.
925      * item =
926      * ... / [groups "."] "AGENT" [params] ":" vcard CRLF vcard = "BEGIN" [ws]
927      * ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD"
928      */
handleAgent(final String propertyValue)929     protected void handleAgent(final String propertyValue) throws VCardException {
930         if (!propertyValue.toUpperCase().contains("BEGIN:VCARD")) {
931             // Apparently invalid line seen in Windows Mobile 6.5. Ignore them.
932             return;
933         } else {
934             throw new VCardAgentNotSupportedException("AGENT Property is not supported now.");
935         }
936     }
937 
938     /**
939      * For vCard 3.0.
940      */
maybeUnescapeText(final String text)941     protected String maybeUnescapeText(final String text) {
942         return text;
943     }
944 
945     /**
946      * Returns unescaped String if the character should be unescaped. Return
947      * null otherwise. e.g. In vCard 2.1, "\;" should be unescaped into ";"
948      * while "\x" should not be.
949      */
maybeUnescapeCharacter(final char ch)950     protected String maybeUnescapeCharacter(final char ch) {
951         return unescapeCharacter(ch);
952     }
953 
unescapeCharacter(final char ch)954     /* package */ static String unescapeCharacter(final char ch) {
955         // Original vCard 2.1 specification does not allow transformation
956         // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous
957         // implementation of
958         // this class allowed them, so keep it as is.
959         if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') {
960             return String.valueOf(ch);
961         } else {
962             return null;
963         }
964     }
965 
showPerformanceInfo()966     private void showPerformanceInfo() {
967         Log.d(LOG_TAG, "Total parsing time:  " + mTimeTotal + " ms");
968         Log.d(LOG_TAG, "Total readLine time: " + mReader.getTotalmillisecond() + " ms");
969         Log.d(LOG_TAG, "Time for handling the beggining of the record: " + mTimeReadStartRecord
970                 + " ms");
971         Log.d(LOG_TAG, "Time for handling the end of the record: " + mTimeReadEndRecord + " ms");
972         Log.d(LOG_TAG, "Time for parsing line, and handling group: " + mTimeParseLineAndHandleGroup
973                 + " ms");
974         Log.d(LOG_TAG, "Time for parsing ADR, ORG, and N fields:" + mTimeParseAdrOrgN + " ms");
975         Log.d(LOG_TAG, "Time for parsing property values: " + mTimeParsePropertyValues + " ms");
976         Log.d(LOG_TAG, "Time for handling normal property values: " + mTimeHandleMiscPropertyValue
977                 + " ms");
978         Log.d(LOG_TAG, "Time for handling Quoted-Printable: " + mTimeHandleQuotedPrintable + " ms");
979         Log.d(LOG_TAG, "Time for handling Base64: " + mTimeHandleBase64 + " ms");
980     }
981 
982     /**
983      * @return {@link VCardConfig#VERSION_21}
984      */
getVersion()985     protected int getVersion() {
986         return VCardConfig.VERSION_21;
987     }
988 
989     /**
990      * @return {@link VCardConfig#VERSION_30}
991      */
getVersionString()992     protected String getVersionString() {
993         return VCardConstants.VERSION_V21;
994     }
995 
getKnownPropertyNameSet()996     protected Set<String> getKnownPropertyNameSet() {
997         return VCardParser_V21.sKnownPropertyNameSet;
998     }
999 
getKnownTypeSet()1000     protected Set<String> getKnownTypeSet() {
1001         return VCardParser_V21.sKnownTypeSet;
1002     }
1003 
getKnownValueSet()1004     protected Set<String> getKnownValueSet() {
1005         return VCardParser_V21.sKnownValueSet;
1006     }
1007 
getAvailableEncodingSet()1008     protected Set<String> getAvailableEncodingSet() {
1009         return VCardParser_V21.sAvailableEncoding;
1010     }
1011 
getDefaultEncoding()1012     protected String getDefaultEncoding() {
1013         return DEFAULT_ENCODING;
1014     }
1015 
1016 
parse(InputStream is, VCardInterpreter interpreter)1017     public void parse(InputStream is, VCardInterpreter interpreter)
1018             throws IOException, VCardException {
1019         if (is == null) {
1020             throw new NullPointerException("InputStream must not be null.");
1021         }
1022 
1023         final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset);
1024         mReader = new CustomBufferedReader(tmpReader);
1025 
1026         mInterpreter = (interpreter != null ? interpreter : new EmptyInterpreter());
1027 
1028         final long start = System.currentTimeMillis();
1029         if (mInterpreter != null) {
1030             mInterpreter.start();
1031         }
1032         parseVCardFile();
1033         if (mInterpreter != null) {
1034             mInterpreter.end();
1035         }
1036         mTimeTotal += System.currentTimeMillis() - start;
1037 
1038         if (VCardConfig.showPerformanceLog()) {
1039             showPerformanceInfo();
1040         }
1041     }
1042 
cancel()1043     public final void cancel() {
1044         mCanceled = true;
1045     }
1046 }
1047