1 /* 2 * Copyright (C) 2008 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.thirdparty.publicsuffix; 18 19 import com.google.common.annotations.GwtCompatible; 20 import com.google.common.base.Joiner; 21 import com.google.common.collect.ImmutableMap; 22 import com.google.common.collect.Lists; 23 24 import java.util.List; 25 26 /** 27 * Parser for a map of reversed domain names stored as a serialized radix tree. 28 */ 29 @GwtCompatible 30 class TrieParser { 31 32 private static final Joiner PREFIX_JOINER = Joiner.on(""); 33 34 /** 35 * Parses a serialized trie representation of a map of reversed public 36 * suffixes into an immutable map of public suffixes. 37 */ parseTrie(CharSequence encoded)38 static ImmutableMap<String, PublicSuffixType> parseTrie(CharSequence encoded) { 39 ImmutableMap.Builder<String, PublicSuffixType> builder = ImmutableMap.builder(); 40 int encodedLen = encoded.length(); 41 int idx = 0; 42 while (idx < encodedLen) { 43 idx += doParseTrieToBuilder( 44 Lists.<CharSequence>newLinkedList(), 45 encoded.subSequence(idx, encodedLen), 46 builder); 47 } 48 return builder.build(); 49 } 50 51 /** 52 * Parses a trie node and returns the number of characters consumed. 53 * 54 * @param stack The prefixes that preceed the characters represented by this 55 * node. Each entry of the stack is in reverse order. 56 * @param encoded The serialized trie. 57 * @param builder A map builder to which all entries will be added. 58 * @return The number of characters consumed from {@code encoded}. 59 */ doParseTrieToBuilder( List<CharSequence> stack, CharSequence encoded, ImmutableMap.Builder<String, PublicSuffixType> builder)60 private static int doParseTrieToBuilder( 61 List<CharSequence> stack, 62 CharSequence encoded, 63 ImmutableMap.Builder<String, PublicSuffixType> builder) { 64 65 int encodedLen = encoded.length(); 66 int idx = 0; 67 char c = '\0'; 68 69 // Read all of the characters for this node. 70 for ( ; idx < encodedLen; idx++) { 71 c = encoded.charAt(idx); 72 if (c == '&' || c == '?' || c == '!' || c == ':' || c == ',') { 73 break; 74 } 75 } 76 77 stack.add(0, reverse(encoded.subSequence(0, idx))); 78 79 if (c == '!' || c == '?' || c == ':' || c == ',') { 80 // '!' represents an interior node that represents an ICANN entry in the map. 81 // '?' represents a leaf node, which represents an ICANN entry in map. 82 // ':' represents an interior node that represents a private entry in the map 83 // ',' represents a leaf node, which represents a private entry in the map. 84 String domain = PREFIX_JOINER.join(stack); 85 if (domain.length() > 0) { 86 builder.put(domain, PublicSuffixType.fromCode(c)); 87 } 88 } 89 idx++; 90 91 if (c != '?' && c != ',') { 92 while (idx < encodedLen) { 93 // Read all the children 94 idx += doParseTrieToBuilder(stack, encoded.subSequence(idx, encodedLen), builder); 95 if (encoded.charAt(idx) == '?' || encoded.charAt(idx) == ',') { 96 // An extra '?' or ',' after a child node indicates the end of all children of this node. 97 idx++; 98 break; 99 } 100 } 101 } 102 stack.remove(0); 103 return idx; 104 } 105 106 /** 107 * Reverses a character sequence. This is borrowed from 108 * https://code.google.com/p/google-web-toolkit/source/detail?r=11591# 109 * and can be replaced with a simple {@code StringBuffer#reverse} once GWT 2.6 is available. 110 */ reverse(CharSequence s)111 private static CharSequence reverse(CharSequence s) { 112 int length = s.length(); 113 if (length <= 1) { 114 return s; 115 } 116 117 char[] buffer = new char[length]; 118 buffer[0] = s.charAt(length - 1); 119 120 for (int i = 1; i < length; i++) { 121 buffer[i] = s.charAt(length - 1 - i); 122 if (Character.isSurrogatePair(buffer[i], buffer[i - 1])) { 123 swap(buffer, i - 1, i); 124 } 125 } 126 127 return new String(buffer); 128 } 129 swap(char[] buffer, int f, int s)130 private static void swap(char[] buffer, int f, int s) { 131 char tmp = buffer[f]; 132 buffer[f] = buffer[s]; 133 buffer[s] = tmp; 134 } 135 } 136