1 /* 2 * Copyright (C) 2008 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 15 package com.google.thirdparty.publicsuffix; 16 17 import com.google.common.annotations.GwtCompatible; 18 import com.google.common.base.Joiner; 19 import com.google.common.collect.ImmutableMap; 20 import com.google.common.collect.Queues; 21 import java.util.Deque; 22 23 /** Parser for a map of reversed domain names stored as a serialized radix tree. */ 24 @GwtCompatible 25 final class TrieParser { 26 private static final Joiner PREFIX_JOINER = Joiner.on(""); 27 28 /** 29 * Parses a serialized trie representation of a map of reversed public suffixes into an immutable 30 * map of public suffixes. 31 */ parseTrie(CharSequence encoded)32 static ImmutableMap<String, PublicSuffixType> parseTrie(CharSequence encoded) { 33 ImmutableMap.Builder<String, PublicSuffixType> builder = ImmutableMap.builder(); 34 int encodedLen = encoded.length(); 35 int idx = 0; 36 while (idx < encodedLen) { 37 idx += doParseTrieToBuilder(Queues.<CharSequence>newArrayDeque(), encoded, idx, builder); 38 } 39 return builder.buildOrThrow(); 40 } 41 42 /** 43 * Parses a trie node and returns the number of characters consumed. 44 * 45 * @param stack The prefixes that precede the characters represented by this node. Each entry of 46 * the stack is in reverse order. 47 * @param encoded The serialized trie. 48 * @param start An index in the encoded serialized trie to begin reading characters from. 49 * @param builder A map builder to which all entries will be added. 50 * @return The number of characters consumed from {@code encoded}. 51 */ doParseTrieToBuilder( Deque<CharSequence> stack, CharSequence encoded, int start, ImmutableMap.Builder<String, PublicSuffixType> builder)52 private static int doParseTrieToBuilder( 53 Deque<CharSequence> stack, 54 CharSequence encoded, 55 int start, 56 ImmutableMap.Builder<String, PublicSuffixType> builder) { 57 58 int encodedLen = encoded.length(); 59 int idx = start; 60 char c = '\0'; 61 62 // Read all of the characters for this node. 63 for (; idx < encodedLen; idx++) { 64 c = encoded.charAt(idx); 65 if (c == '&' || c == '?' || c == '!' || c == ':' || c == ',') { 66 break; 67 } 68 } 69 70 stack.push(reverse(encoded.subSequence(start, idx))); 71 72 if (c == '!' || c == '?' || c == ':' || c == ',') { 73 // '!' represents an interior node that represents a REGISTRY entry in the map. 74 // '?' represents a leaf node, which represents a REGISTRY entry in map. 75 // ':' represents an interior node that represents a private entry in the map 76 // ',' represents a leaf node, which represents a private entry in the map. 77 String domain = PREFIX_JOINER.join(stack); 78 if (domain.length() > 0) { 79 builder.put(domain, PublicSuffixType.fromCode(c)); 80 } 81 } 82 idx++; 83 84 if (c != '?' && c != ',') { 85 while (idx < encodedLen) { 86 // Read all the children 87 idx += doParseTrieToBuilder(stack, encoded, idx, builder); 88 if (encoded.charAt(idx) == '?' || encoded.charAt(idx) == ',') { 89 // An extra '?' or ',' after a child node indicates the end of all children of this node. 90 idx++; 91 break; 92 } 93 } 94 } 95 stack.pop(); 96 return idx - start; 97 } 98 reverse(CharSequence s)99 private static CharSequence reverse(CharSequence s) { 100 return new StringBuilder(s).reverse(); 101 } 102 } 103