• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2007 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.net;
18 
19 import java.util.ArrayList;
20 import java.util.HashMap;
21 import java.util.List;
22 import java.util.Set;
23 import java.util.StringTokenizer;
24 
25 /**
26  *
27  * Sanitizes the Query portion of a URL. Simple example:
28  * <code>
29  * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
30  * sanitizer.setAllowUnregisteredParamaters(true);
31  * sanitizer.parseUrl("http://example.com/?name=Joe+User");
32  * String name = sanitizer.getValue("name"));
33  * // name now contains "Joe_User"
34  * </code>
35  *
36  * Register ValueSanitizers to customize the way individual
37  * parameters are sanitized:
38  * <code>
39  * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
40  * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal());
41  * sanitizer.parseUrl("http://example.com/?name=Joe+User");
42  * String name = sanitizer.getValue("name"));
43  * // name now contains "Joe User". (The string is first decoded, which
44  * // converts the '+' to a ' '. Then the string is sanitized, which
45  * // converts the ' ' to an '_'. (The ' ' is converted because the default
46  * unregistered parameter sanitizer does not allow any special characters,
47  * and ' ' is a special character.)
48  * </code>
49  *
50  * There are several ways to create ValueSanitizers. In order of increasing
51  * sophistication:
52  * <ol>
53  * <li>Call one of the UrlQuerySanitizer.createXXX() methods.
54  * <li>Construct your own instance of
55  * UrlQuerySanitizer.IllegalCharacterValueSanitizer.
56  * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value
57  * sanitizer.
58  * </ol>
59  *
60  */
61 public class UrlQuerySanitizer {
62 
63     /**
64      * A simple tuple that holds parameter-value pairs.
65      *
66      */
67     public class ParameterValuePair {
68         /**
69          * Construct a parameter-value tuple.
70          * @param parameter an unencoded parameter
71          * @param value an unencoded value
72          */
ParameterValuePair(String parameter, String value)73         public ParameterValuePair(String parameter,
74                 String value) {
75             mParameter = parameter;
76             mValue = value;
77         }
78         /**
79          * The unencoded parameter
80          */
81         public String mParameter;
82         /**
83          * The unencoded value
84          */
85         public String mValue;
86     }
87 
88     final private HashMap<String, ValueSanitizer> mSanitizers =
89         new HashMap<String, ValueSanitizer>();
90     final private HashMap<String, String> mEntries =
91         new HashMap<String, String>();
92     final private ArrayList<ParameterValuePair> mEntriesList =
93         new ArrayList<ParameterValuePair>();
94     private boolean mAllowUnregisteredParamaters;
95     private boolean mPreferFirstRepeatedParameter;
96     private ValueSanitizer mUnregisteredParameterValueSanitizer =
97         getAllIllegal();
98 
99     /**
100      * A functor used to sanitize a single query value.
101      *
102      */
103     public static interface ValueSanitizer {
104         /**
105          * Sanitize an unencoded value.
106          * @param value
107          * @return the sanitized unencoded value
108          */
sanitize(String value)109         public String sanitize(String value);
110     }
111 
112     /**
113      * Sanitize values based on which characters they contain. Illegal
114      * characters are replaced with either space or '_', depending upon
115      * whether space is a legal character or not.
116      */
117     public static class IllegalCharacterValueSanitizer implements
118         ValueSanitizer {
119         private int mFlags;
120 
121         /**
122          * Allow space (' ') characters.
123          */
124         public final static int SPACE_OK =              1 << 0;
125         /**
126          * Allow whitespace characters other than space. The
127          * other whitespace characters are
128          * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab)
129          */
130         public final static int OTHER_WHITESPACE_OK =  1 << 1;
131         /**
132          * Allow characters with character codes 128 to 255.
133          */
134         public final static int NON_7_BIT_ASCII_OK =    1 << 2;
135         /**
136          * Allow double quote characters. ('"')
137          */
138         public final static int DQUOTE_OK =             1 << 3;
139         /**
140          * Allow single quote characters. ('\'')
141          */
142         public final static int SQUOTE_OK =             1 << 4;
143         /**
144          * Allow less-than characters. ('<')
145          */
146         public final static int LT_OK =                 1 << 5;
147         /**
148          * Allow greater-than characters. ('>')
149          */
150         public final static int GT_OK =                 1 << 6;
151         /**
152          * Allow ampersand characters ('&')
153          */
154         public final static int AMP_OK =                1 << 7;
155         /**
156          * Allow percent-sign characters ('%')
157          */
158         public final static int PCT_OK =                1 << 8;
159         /**
160          * Allow nul characters ('\0')
161          */
162         public final static int NUL_OK =                1 << 9;
163         /**
164          * Allow text to start with a script URL
165          * such as "javascript:" or "vbscript:"
166          */
167         public final static int SCRIPT_URL_OK =         1 << 10;
168 
169         /**
170          * Mask with all fields set to OK
171          */
172         public final static int ALL_OK =                0x7ff;
173 
174         /**
175          * Mask with both regular space and other whitespace OK
176          */
177         public final static int ALL_WHITESPACE_OK =
178             SPACE_OK | OTHER_WHITESPACE_OK;
179 
180 
181         // Common flag combinations:
182 
183         /**
184          * <ul>
185          * <li>Deny all special characters.
186          * <li>Deny script URLs.
187          * </ul>
188          */
189         public final static int ALL_ILLEGAL =
190             0;
191         /**
192          * <ul>
193          * <li>Allow all special characters except Nul. ('\0').
194          * <li>Allow script URLs.
195          * </ul>
196          */
197         public final static int ALL_BUT_NUL_LEGAL =
198             ALL_OK & ~NUL_OK;
199         /**
200          * <ul>
201          * <li>Allow all special characters except for:
202          * <ul>
203          *  <li>whitespace characters
204          *  <li>Nul ('\0')
205          * </ul>
206          * <li>Allow script URLs.
207          * </ul>
208          */
209         public final static int ALL_BUT_WHITESPACE_LEGAL =
210             ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK);
211         /**
212          * <ul>
213          * <li>Allow characters used by encoded URLs.
214          * <li>Deny script URLs.
215          * </ul>
216          */
217         public final static int URL_LEGAL =
218             NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK;
219         /**
220          * <ul>
221          * <li>Allow characters used by encoded URLs.
222          * <li>Allow spaces.
223          * <li>Deny script URLs.
224          * </ul>
225          */
226         public final static int URL_AND_SPACE_LEGAL =
227             URL_LEGAL | SPACE_OK;
228         /**
229          * <ul>
230          * <li>Allow ampersand.
231          * <li>Deny script URLs.
232          * </ul>
233          */
234         public final static int AMP_LEGAL =
235             AMP_OK;
236         /**
237          * <ul>
238          * <li>Allow ampersand.
239          * <li>Allow space.
240          * <li>Deny script URLs.
241          * </ul>
242          */
243         public final static int AMP_AND_SPACE_LEGAL =
244             AMP_OK | SPACE_OK;
245         /**
246          * <ul>
247          * <li>Allow space.
248          * <li>Deny script URLs.
249          * </ul>
250          */
251         public final static int SPACE_LEGAL =
252             SPACE_OK;
253         /**
254          * <ul>
255          * <li>Allow all but.
256          * <ul>
257          *  <li>Nul ('\0')
258          *  <li>Angle brackets ('<', '>')
259          * </ul>
260          * <li>Deny script URLs.
261          * </ul>
262          */
263         public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL =
264             ALL_OK & ~(NUL_OK | LT_OK | GT_OK);
265 
266         /**
267          *  Script URL definitions
268          */
269 
270         private final static String JAVASCRIPT_PREFIX = "javascript:";
271 
272         private final static String VBSCRIPT_PREFIX = "vbscript:";
273 
274         private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min(
275                 JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length());
276 
277         /**
278          * Construct a sanitizer. The parameters set the behavior of the
279          * sanitizer.
280          * @param flags some combination of the XXX_OK flags.
281          */
IllegalCharacterValueSanitizer( int flags)282         public IllegalCharacterValueSanitizer(
283             int flags) {
284             mFlags = flags;
285         }
286         /**
287          * Sanitize a value.
288          * <ol>
289          * <li>If script URLs are not OK, the will be removed.
290          * <li>If neither spaces nor other white space is OK, then
291          * white space will be trimmed from the beginning and end of
292          * the URL. (Just the actual white space characters are trimmed, not
293          * other control codes.)
294          * <li> Illegal characters will be replaced with
295          * either ' ' or '_', depending on whether a space is itself a
296          * legal character.
297          * </ol>
298          * @param value
299          * @return the sanitized value
300          */
sanitize(String value)301         public String sanitize(String value) {
302             if (value == null) {
303                 return null;
304             }
305             int length = value.length();
306             if ((mFlags & SCRIPT_URL_OK) != 0) {
307                 if (length >= MIN_SCRIPT_PREFIX_LENGTH) {
308                     String asLower = value.toLowerCase();
309                     if (asLower.startsWith(JAVASCRIPT_PREFIX)  ||
310                         asLower.startsWith(VBSCRIPT_PREFIX)) {
311                         return "";
312                     }
313                 }
314             }
315 
316             // If whitespace isn't OK, get rid of whitespace at beginning
317             // and end of value.
318             if ( (mFlags & ALL_WHITESPACE_OK) == 0) {
319                 value = trimWhitespace(value);
320                 // The length could have changed, so we need to correct
321                 // the length variable.
322                 length = value.length();
323             }
324 
325             StringBuilder stringBuilder = new StringBuilder(length);
326             for(int i = 0; i < length; i++) {
327                 char c = value.charAt(i);
328                 if (!characterIsLegal(c)) {
329                     if ((mFlags & SPACE_OK) != 0) {
330                         c = ' ';
331                     }
332                     else {
333                         c = '_';
334                     }
335                 }
336                 stringBuilder.append(c);
337             }
338             return stringBuilder.toString();
339         }
340 
341         /**
342          * Trim whitespace from the beginning and end of a string.
343          * <p>
344          * Note: can't use {@link String#trim} because {@link String#trim} has a
345          * different definition of whitespace than we want.
346          * @param value the string to trim
347          * @return the trimmed string
348          */
trimWhitespace(String value)349         private String trimWhitespace(String value) {
350             int start = 0;
351             int last = value.length() - 1;
352             int end = last;
353             while (start <= end && isWhitespace(value.charAt(start))) {
354                 start++;
355             }
356             while (end >= start && isWhitespace(value.charAt(end))) {
357                 end--;
358             }
359             if (start == 0 && end == last) {
360                 return value;
361             }
362             return value.substring(start, end + 1);
363         }
364 
365         /**
366          * Check if c is whitespace.
367          * @param c character to test
368          * @return true if c is a whitespace character
369          */
isWhitespace(char c)370         private boolean isWhitespace(char c) {
371             switch(c) {
372             case ' ':
373             case '\t':
374             case '\f':
375             case '\n':
376             case '\r':
377             case 11: /* VT */
378                 return true;
379             default:
380                 return false;
381             }
382         }
383 
384         /**
385          * Check whether an individual character is legal. Uses the
386          * flag bit-set passed into the constructor.
387          * @param c
388          * @return true if c is a legal character
389          */
characterIsLegal(char c)390         private boolean characterIsLegal(char c) {
391             switch(c) {
392             case ' ' : return (mFlags & SPACE_OK) != 0;
393             case '\t': case '\f': case '\n': case '\r': case 11: /* VT */
394               return (mFlags & OTHER_WHITESPACE_OK) != 0;
395             case '\"': return (mFlags & DQUOTE_OK) != 0;
396             case '\'': return (mFlags & SQUOTE_OK) != 0;
397             case '<' : return (mFlags & LT_OK) != 0;
398             case '>' : return (mFlags & GT_OK) != 0;
399             case '&' : return (mFlags & AMP_OK) != 0;
400             case '%' : return (mFlags & PCT_OK) != 0;
401             case '\0': return (mFlags & NUL_OK) != 0;
402             default  : return (c >= 32 && c < 127) ||
403                 ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0));
404             }
405         }
406     }
407 
408     /**
409      * Get the current value sanitizer used when processing
410      * unregistered parameter values.
411      * <p>
412      * <b>Note:</b> The default unregistered parameter value sanitizer is
413      * one that doesn't allow any special characters, similar to what
414      * is returned by calling createAllIllegal.
415      *
416      * @return the current ValueSanitizer used to sanitize unregistered
417      * parameter values.
418      */
getUnregisteredParameterValueSanitizer()419     public ValueSanitizer getUnregisteredParameterValueSanitizer() {
420         return mUnregisteredParameterValueSanitizer;
421     }
422 
423     /**
424      * Set the value sanitizer used when processing unregistered
425      * parameter values.
426      * @param sanitizer set the ValueSanitizer used to sanitize unregistered
427      * parameter values.
428      */
setUnregisteredParameterValueSanitizer( ValueSanitizer sanitizer)429     public void setUnregisteredParameterValueSanitizer(
430             ValueSanitizer sanitizer) {
431         mUnregisteredParameterValueSanitizer = sanitizer;
432     }
433 
434 
435     // Private fields for singleton sanitizers:
436 
437     private static final ValueSanitizer sAllIllegal =
438         new IllegalCharacterValueSanitizer(
439                 IllegalCharacterValueSanitizer.ALL_ILLEGAL);
440 
441     private static final ValueSanitizer sAllButNulLegal =
442         new IllegalCharacterValueSanitizer(
443                 IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL);
444 
445     private static final ValueSanitizer sAllButWhitespaceLegal =
446         new IllegalCharacterValueSanitizer(
447                 IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL);
448 
449     private static final ValueSanitizer sURLLegal =
450         new IllegalCharacterValueSanitizer(
451                 IllegalCharacterValueSanitizer.URL_LEGAL);
452 
453     private static final ValueSanitizer sUrlAndSpaceLegal =
454         new IllegalCharacterValueSanitizer(
455                 IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL);
456 
457     private static final ValueSanitizer sAmpLegal =
458         new IllegalCharacterValueSanitizer(
459                 IllegalCharacterValueSanitizer.AMP_LEGAL);
460 
461     private static final ValueSanitizer sAmpAndSpaceLegal =
462         new IllegalCharacterValueSanitizer(
463                 IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL);
464 
465     private static final ValueSanitizer sSpaceLegal =
466         new IllegalCharacterValueSanitizer(
467                 IllegalCharacterValueSanitizer.SPACE_LEGAL);
468 
469     private static final ValueSanitizer sAllButNulAndAngleBracketsLegal =
470         new IllegalCharacterValueSanitizer(
471                 IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL);
472 
473     /**
474      * Return a value sanitizer that does not allow any special characters,
475      * and also does not allow script URLs.
476      * @return a value sanitizer
477      */
getAllIllegal()478     public static final ValueSanitizer getAllIllegal() {
479         return sAllIllegal;
480     }
481 
482     /**
483      * Return a value sanitizer that allows everything except Nul ('\0')
484      * characters. Script URLs are allowed.
485      * @return a value sanitizer
486      */
getAllButNulLegal()487     public static final ValueSanitizer getAllButNulLegal() {
488         return sAllButNulLegal;
489     }
490     /**
491      * Return a value sanitizer that allows everything except Nul ('\0')
492      * characters, space (' '), and other whitespace characters.
493      * Script URLs are allowed.
494      * @return a value sanitizer
495      */
getAllButWhitespaceLegal()496     public static final ValueSanitizer getAllButWhitespaceLegal() {
497         return sAllButWhitespaceLegal;
498     }
499     /**
500      * Return a value sanitizer that allows all the characters used by
501      * encoded URLs. Does not allow script URLs.
502      * @return a value sanitizer
503      */
getUrlLegal()504     public static final ValueSanitizer getUrlLegal() {
505         return sURLLegal;
506     }
507     /**
508      * Return a value sanitizer that allows all the characters used by
509      * encoded URLs and allows spaces, which are not technically legal
510      * in encoded URLs, but commonly appear anyway.
511      * Does not allow script URLs.
512      * @return a value sanitizer
513      */
getUrlAndSpaceLegal()514     public static final ValueSanitizer getUrlAndSpaceLegal() {
515         return sUrlAndSpaceLegal;
516     }
517     /**
518      * Return a value sanitizer that does not allow any special characters
519      * except ampersand ('&'). Does not allow script URLs.
520      * @return a value sanitizer
521      */
getAmpLegal()522     public static final ValueSanitizer getAmpLegal() {
523         return sAmpLegal;
524     }
525     /**
526      * Return a value sanitizer that does not allow any special characters
527      * except ampersand ('&') and space (' '). Does not allow script URLs.
528      * @return a value sanitizer
529      */
getAmpAndSpaceLegal()530     public static final ValueSanitizer getAmpAndSpaceLegal() {
531         return sAmpAndSpaceLegal;
532     }
533     /**
534      * Return a value sanitizer that does not allow any special characters
535      * except space (' '). Does not allow script URLs.
536      * @return a value sanitizer
537      */
getSpaceLegal()538     public static final ValueSanitizer getSpaceLegal() {
539         return sSpaceLegal;
540     }
541     /**
542      * Return a value sanitizer that allows any special characters
543      * except angle brackets ('<' and '>') and Nul ('\0').
544      * Allows script URLs.
545      * @return a value sanitizer
546      */
getAllButNulAndAngleBracketsLegal()547     public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() {
548         return sAllButNulAndAngleBracketsLegal;
549     }
550 
551     /**
552      * Constructs a UrlQuerySanitizer.
553      * <p>
554      * Defaults:
555      * <ul>
556      * <li>unregistered parameters are not allowed.
557      * <li>the last instance of a repeated parameter is preferred.
558      * <li>The default value sanitizer is an AllIllegal value sanitizer.
559      * <ul>
560      */
UrlQuerySanitizer()561     public UrlQuerySanitizer() {
562     }
563 
564     /**
565      * Constructs a UrlQuerySanitizer and parse a URL.
566      * This constructor is provided for convenience when the
567      * default parsing behavior is acceptable.
568      * <p>
569      * Because the URL is parsed before the constructor returns, there isn't
570      * a chance to configure the sanitizer to change the parsing behavior.
571      * <p>
572      * <code>
573      * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl);
574      * String name = sanitizer.getValue("name");
575      * </code>
576      * <p>
577      * Defaults:
578      * <ul>
579      * <li>unregistered parameters <em>are</em> allowed.
580      * <li>the last instance of a repeated parameter is preferred.
581      * <li>The default value sanitizer is an AllIllegal value sanitizer.
582      * <ul>
583      */
UrlQuerySanitizer(String url)584     public UrlQuerySanitizer(String url) {
585         setAllowUnregisteredParamaters(true);
586         parseUrl(url);
587     }
588 
589     /**
590      * Parse the query parameters out of an encoded URL.
591      * Works by extracting the query portion from the URL and then
592      * calling parseQuery(). If there is no query portion it is
593      * treated as if the query portion is an empty string.
594      * @param url the encoded URL to parse.
595      */
parseUrl(String url)596     public void parseUrl(String url) {
597         int queryIndex = url.indexOf('?');
598         String query;
599         if (queryIndex >= 0) {
600             query = url.substring(queryIndex + 1);
601         }
602         else {
603             query = "";
604         }
605         parseQuery(query);
606     }
607 
608     /**
609      * Parse a query. A query string is any number of parameter-value clauses
610      * separated by any non-zero number of ampersands. A parameter-value clause
611      * is a parameter followed by an equal sign, followed by a value. If the
612      * equal sign is missing, the value is assumed to be the empty string.
613      * @param query the query to parse.
614      */
parseQuery(String query)615     public void parseQuery(String query) {
616         clear();
617         // Split by '&'
618         StringTokenizer tokenizer = new StringTokenizer(query, "&");
619         while(tokenizer.hasMoreElements()) {
620             String attributeValuePair = tokenizer.nextToken();
621             if (attributeValuePair.length() > 0) {
622                 int assignmentIndex = attributeValuePair.indexOf('=');
623                 if (assignmentIndex < 0) {
624                     // No assignment found, treat as if empty value
625                     parseEntry(attributeValuePair, "");
626                 }
627                 else {
628                     parseEntry(attributeValuePair.substring(0, assignmentIndex),
629                             attributeValuePair.substring(assignmentIndex + 1));
630                 }
631             }
632         }
633     }
634 
635     /**
636      * Get a set of all of the parameters found in the sanitized query.
637      * <p>
638      * Note: Do not modify this set. Treat it as a read-only set.
639      * @return all the parameters found in the current query.
640      */
getParameterSet()641     public Set<String> getParameterSet() {
642         return mEntries.keySet();
643     }
644 
645     /**
646      * An array list of all of the parameter value pairs in the sanitized
647      * query, in the order they appeared in the query. May contain duplicate
648      * parameters.
649      * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p>
650      */
getParameterList()651     public List<ParameterValuePair> getParameterList() {
652         return mEntriesList;
653     }
654 
655     /**
656      * Check if a parameter exists in the current sanitized query.
657      * @param parameter the unencoded name of a parameter.
658      * @return true if the paramater exists in the current sanitized queary.
659      */
hasParameter(String parameter)660     public boolean hasParameter(String parameter) {
661         return mEntries.containsKey(parameter);
662     }
663 
664     /**
665      * Get the value for a parameter in the current sanitized query.
666      * Returns null if the parameter does not
667      * exit.
668      * @param parameter the unencoded name of a parameter.
669      * @return the sanitized unencoded value of the parameter,
670      * or null if the parameter does not exist.
671      */
getValue(String parameter)672     public String getValue(String parameter) {
673         return mEntries.get(parameter);
674     }
675 
676     /**
677      * Register a value sanitizer for a particular parameter. Can also be used
678      * to replace or remove an already-set value sanitizer.
679      * <p>
680      * Registering a non-null value sanitizer for a particular parameter
681      * makes that parameter a registered parameter.
682      * @param parameter an unencoded parameter name
683      * @param valueSanitizer the value sanitizer to use for a particular
684      * parameter. May be null in order to unregister that parameter.
685      * @see #getAllowUnregisteredParamaters()
686      */
registerParameter(String parameter, ValueSanitizer valueSanitizer)687     public void registerParameter(String parameter,
688             ValueSanitizer valueSanitizer) {
689         if (valueSanitizer == null) {
690             mSanitizers.remove(parameter);
691         }
692         mSanitizers.put(parameter, valueSanitizer);
693     }
694 
695     /**
696      * Register a value sanitizer for an array of parameters.
697      * @param parameters An array of unencoded parameter names.
698      * @param valueSanitizer
699      * @see #registerParameter
700      */
registerParameters(String[] parameters, ValueSanitizer valueSanitizer)701     public void registerParameters(String[] parameters,
702             ValueSanitizer valueSanitizer) {
703         int length = parameters.length;
704         for(int i = 0; i < length; i++) {
705             mSanitizers.put(parameters[i], valueSanitizer);
706         }
707     }
708 
709     /**
710      * Set whether or not unregistered parameters are allowed. If they
711      * are not allowed, then they will be dropped when a query is sanitized.
712      * <p>
713      * Defaults to false.
714      * @param allowUnregisteredParamaters true to allow unregistered parameters.
715      * @see #getAllowUnregisteredParamaters()
716      */
setAllowUnregisteredParamaters( boolean allowUnregisteredParamaters)717     public void setAllowUnregisteredParamaters(
718             boolean allowUnregisteredParamaters) {
719         mAllowUnregisteredParamaters = allowUnregisteredParamaters;
720     }
721 
722     /**
723      * Get whether or not unregistered parameters are allowed. If not
724      * allowed, they will be dropped when a query is parsed.
725      * @return true if unregistered parameters are allowed.
726      * @see #setAllowUnregisteredParamaters(boolean)
727      */
getAllowUnregisteredParamaters()728     public boolean getAllowUnregisteredParamaters() {
729         return mAllowUnregisteredParamaters;
730     }
731 
732     /**
733      * Set whether or not the first occurrence of a repeated parameter is
734      * preferred. True means the first repeated parameter is preferred.
735      * False means that the last repeated parameter is preferred.
736      * <p>
737      * The preferred parameter is the one that is returned when getParameter
738      * is called.
739      * <p>
740      * defaults to false.
741      * @param preferFirstRepeatedParameter True if the first repeated
742      * parameter is preferred.
743      * @see #getPreferFirstRepeatedParameter()
744      */
setPreferFirstRepeatedParameter( boolean preferFirstRepeatedParameter)745     public void setPreferFirstRepeatedParameter(
746             boolean preferFirstRepeatedParameter) {
747         mPreferFirstRepeatedParameter = preferFirstRepeatedParameter;
748     }
749 
750     /**
751      * Get whether or not the first occurrence of a repeated parameter is
752      * preferred.
753      * @return true if the first occurrence of a repeated parameter is
754      * preferred.
755      * @see #setPreferFirstRepeatedParameter(boolean)
756      */
getPreferFirstRepeatedParameter()757     public boolean getPreferFirstRepeatedParameter() {
758         return mPreferFirstRepeatedParameter;
759     }
760 
761     /**
762      * Parse an escaped parameter-value pair. The default implementation
763      * unescapes both the parameter and the value, then looks up the
764      * effective value sanitizer for the parameter and uses it to sanitize
765      * the value. If all goes well then addSanitizedValue is called with
766      * the unescaped parameter and the sanitized unescaped value.
767      * @param parameter an escaped parameter
768      * @param value an unsanitzied escaped value
769      */
parseEntry(String parameter, String value)770     protected void parseEntry(String parameter, String value) {
771         String unescapedParameter = unescape(parameter);
772          ValueSanitizer valueSanitizer =
773             getEffectiveValueSanitizer(unescapedParameter);
774 
775         if (valueSanitizer == null) {
776             return;
777         }
778         String unescapedValue = unescape(value);
779         String sanitizedValue = valueSanitizer.sanitize(unescapedValue);
780         addSanitizedEntry(unescapedParameter, sanitizedValue);
781     }
782 
783     /**
784      * Record a sanitized parameter-value pair. Override if you want to
785      * do additional filtering or validation.
786      * @param parameter an unescaped parameter
787      * @param value a sanitized unescaped value
788      */
addSanitizedEntry(String parameter, String value)789     protected void addSanitizedEntry(String parameter, String value) {
790         mEntriesList.add(
791                 new ParameterValuePair(parameter, value));
792         if (mPreferFirstRepeatedParameter) {
793             if (mEntries.containsKey(parameter)) {
794                 return;
795             }
796         }
797         mEntries.put(parameter, value);
798     }
799 
800     /**
801      * Get the value sanitizer for a parameter. Returns null if there
802      * is no value sanitizer registered for the parameter.
803      * @param parameter the unescaped parameter
804      * @return the currently registered value sanitizer for this parameter.
805      * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer)
806      */
getValueSanitizer(String parameter)807     public ValueSanitizer getValueSanitizer(String parameter) {
808         return mSanitizers.get(parameter);
809     }
810 
811     /**
812      * Get the effective value sanitizer for a parameter. Like getValueSanitizer,
813      * except if there is no value sanitizer registered for a parameter, and
814      * unregistered paramaters are allowed, then the default value sanitizer is
815      * returned.
816      * @param parameter an unescaped parameter
817      * @return the effective value sanitizer for a parameter.
818      */
getEffectiveValueSanitizer(String parameter)819     public ValueSanitizer getEffectiveValueSanitizer(String parameter) {
820         ValueSanitizer sanitizer = getValueSanitizer(parameter);
821         if (sanitizer == null && mAllowUnregisteredParamaters) {
822             sanitizer = getUnregisteredParameterValueSanitizer();
823         }
824         return sanitizer;
825     }
826 
827     /**
828      * Unescape an escaped string.
829      * <ul>
830      * <li>'+' characters are replaced by
831      * ' ' characters.
832      * <li>Valid "%xx" escape sequences are replaced by the
833      * corresponding unescaped character.
834      * <li>Invalid escape sequences such as %1z", are passed through unchanged.
835      * <ol>
836      * @param string the escaped string
837      * @return the unescaped string.
838      */
unescape(String string)839     public String unescape(String string) {
840         // Early exit if no escaped characters.
841         int firstEscape = string.indexOf('%');
842         if ( firstEscape < 0) {
843             firstEscape = string.indexOf('+');
844             if (firstEscape < 0) {
845                 return string;
846             }
847         }
848 
849         int length = string.length();
850 
851         StringBuilder stringBuilder = new StringBuilder(length);
852         stringBuilder.append(string.substring(0, firstEscape));
853         for (int i = firstEscape; i < length; i++) {
854             char c = string.charAt(i);
855             if (c == '+') {
856                 c = ' ';
857             }
858             else if ( c == '%' && i + 2 < length) {
859                 char c1 = string.charAt(i + 1);
860                 char c2 = string.charAt(i + 2);
861                 if (isHexDigit(c1) && isHexDigit(c2)) {
862                     c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2));
863                     i += 2;
864                 }
865             }
866             stringBuilder.append(c);
867         }
868         return stringBuilder.toString();
869     }
870 
871     /**
872      * Test if a character is a hexidecimal digit. Both upper case and lower
873      * case hex digits are allowed.
874      * @param c the character to test
875      * @return true if c is a hex digit.
876      */
isHexDigit(char c)877     protected boolean isHexDigit(char c) {
878         return decodeHexDigit(c) >= 0;
879     }
880 
881     /**
882      * Convert a character that represents a hexidecimal digit into an integer.
883      * If the character is not a hexidecimal digit, then -1 is returned.
884      * Both upper case and lower case hex digits are allowed.
885      * @param c the hexidecimal digit.
886      * @return the integer value of the hexidecimal digit.
887      */
888 
decodeHexDigit(char c)889     protected int decodeHexDigit(char c) {
890         if (c >= '0' && c <= '9') {
891             return c - '0';
892         }
893         else if (c >= 'A' && c <= 'F') {
894             return c - 'A' + 10;
895         }
896         else if (c >= 'a' && c <= 'f') {
897             return c - 'a' + 10;
898         }
899         else {
900             return -1;
901         }
902     }
903 
904     /**
905      * Clear the existing entries. Called to get ready to parse a new
906      * query string.
907      */
clear()908     protected void clear() {
909         mEntries.clear();
910         mEntriesList.clear();
911     }
912 }
913 
914