• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.thirdparty.publicsuffix;
18 
19 import com.google.common.annotations.GwtCompatible;
20 import com.google.common.base.Joiner;
21 import com.google.common.collect.ImmutableMap;
22 import com.google.common.collect.Lists;
23 
24 import java.util.List;
25 
26 /**
27  * Parser for a map of reversed domain names stored as a serialized radix tree.
28  */
29 @GwtCompatible
30 class TrieParser {
31 
32   private static final Joiner PREFIX_JOINER = Joiner.on("");
33 
34   /**
35    * Parses a serialized trie representation of a map of reversed public
36    * suffixes into an immutable map of public suffixes.
37    */
parseTrie(CharSequence encoded)38   static ImmutableMap<String, PublicSuffixType> parseTrie(CharSequence encoded) {
39     ImmutableMap.Builder<String, PublicSuffixType> builder = ImmutableMap.builder();
40     int encodedLen = encoded.length();
41     int idx = 0;
42     while (idx < encodedLen) {
43       idx += doParseTrieToBuilder(
44           Lists.<CharSequence>newLinkedList(),
45           encoded.subSequence(idx, encodedLen),
46           builder);
47     }
48     return builder.build();
49   }
50 
51   /**
52    * Parses a trie node and returns the number of characters consumed.
53    *
54    * @param stack The prefixes that preceed the characters represented by this
55    *     node. Each entry of the stack is in reverse order.
56    * @param encoded The serialized trie.
57    * @param builder A map builder to which all entries will be added.
58    * @return The number of characters consumed from {@code encoded}.
59    */
doParseTrieToBuilder( List<CharSequence> stack, CharSequence encoded, ImmutableMap.Builder<String, PublicSuffixType> builder)60   private static int doParseTrieToBuilder(
61       List<CharSequence> stack,
62       CharSequence encoded,
63       ImmutableMap.Builder<String, PublicSuffixType> builder) {
64 
65     int encodedLen = encoded.length();
66     int idx = 0;
67     char c = '\0';
68 
69     // Read all of the characters for this node.
70     for ( ; idx < encodedLen; idx++) {
71       c = encoded.charAt(idx);
72       if (c == '&' || c == '?' || c == '!' || c == ':' || c == ',') {
73         break;
74       }
75     }
76 
77     stack.add(0, reverse(encoded.subSequence(0, idx)));
78 
79     if (c == '!' || c == '?' || c == ':' || c == ',') {
80       // '!' represents an interior node that represents an ICANN entry in the map.
81       // '?' represents a leaf node, which represents an ICANN entry in map.
82       // ':' represents an interior node that represents a private entry in the map
83       // ',' represents a leaf node, which represents a private entry in the map.
84       String domain = PREFIX_JOINER.join(stack);
85       if (domain.length() > 0) {
86         builder.put(domain, PublicSuffixType.fromCode(c));
87       }
88     }
89     idx++;
90 
91     if (c != '?' && c != ',') {
92       while (idx < encodedLen) {
93         // Read all the children
94         idx += doParseTrieToBuilder(stack, encoded.subSequence(idx, encodedLen), builder);
95         if (encoded.charAt(idx) == '?' || encoded.charAt(idx) == ',') {
96           // An extra '?' or ',' after a child node indicates the end of all children of this node.
97           idx++;
98           break;
99         }
100       }
101     }
102     stack.remove(0);
103     return idx;
104   }
105 
106   /**
107    * Reverses a character sequence. This is borrowed from
108    * https://code.google.com/p/google-web-toolkit/source/detail?r=11591#
109    * and can be replaced with a simple {@code StringBuffer#reverse} once GWT 2.6 is available.
110    */
reverse(CharSequence s)111   private static CharSequence reverse(CharSequence s) {
112     int length = s.length();
113     if (length <= 1) {
114       return s;
115     }
116 
117     char[] buffer = new char[length];
118     buffer[0] = s.charAt(length - 1);
119 
120     for (int i = 1; i < length; i++) {
121       buffer[i] = s.charAt(length - 1 - i);
122       if (Character.isSurrogatePair(buffer[i], buffer[i - 1])) {
123         swap(buffer, i - 1, i);
124       }
125     }
126 
127     return new String(buffer);
128   }
129 
swap(char[] buffer, int f, int s)130   private static void swap(char[] buffer, int f, int s) {
131     char tmp = buffer[f];
132     buffer[f] = buffer[s];
133     buffer[s] = tmp;
134   }
135 }
136