• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/base/lookup_string_in_fixed_set.h"
6 
7 #include <string.h>
8 
9 #include <algorithm>
10 #include <cstdint>
11 #include <limits>
12 #include <ostream>
13 #include <utility>
14 #include <vector>
15 
16 #include "base/base_paths.h"
17 #include "base/containers/span.h"
18 #include "base/files/file_path.h"
19 #include "base/files/file_util.h"
20 #include "base/path_service.h"
21 #include "base/strings/string_util.h"
22 #include "base/strings/stringprintf.h"
23 #include "testing/gtest/include/gtest/gtest.h"
24 
25 namespace net {
26 namespace {
27 namespace test1 {
28 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc.cc"
29 }
30 namespace test3 {
31 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc.cc"
32 }
33 namespace test4 {
34 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc.cc"
35 }
36 namespace test5 {
37 #include "net/base/registry_controlled_domains/effective_tld_names_unittest5-inc.cc"
38 }
39 namespace test6 {
40 #include "net/base/registry_controlled_domains/effective_tld_names_unittest6-inc.cc"
41 }
42 
43 struct Expectation {
44   const char* const key;
45   int value;
46 };
47 
PrintTo(const Expectation & expectation,std::ostream * os)48 void PrintTo(const Expectation& expectation, std::ostream* os) {
49   *os << "{\"" << expectation.key << "\", " << expectation.value << "}";
50 }
51 
52 class LookupStringInFixedSetTest : public testing::TestWithParam<Expectation> {
53  protected:
LookupInGraph(base::span<const uint8_t> graph,const char * key)54   int LookupInGraph(base::span<const uint8_t> graph, const char* key) {
55     return LookupStringInFixedSet(graph, key, strlen(key));
56   }
57 };
58 
59 class Dafsa1Test : public LookupStringInFixedSetTest {};
60 
TEST_P(Dafsa1Test,BasicTest)61 TEST_P(Dafsa1Test, BasicTest) {
62   const Expectation& param = GetParam();
63   EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key));
64 }
65 
66 const Expectation kBasicTestCases[] = {
67     {"", -1},      {"j", -1},          {"jp", 0}, {"jjp", -1}, {"jpp", -1},
68     {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2},  {"b.c", 1},  {"priv.no", 4},
69 };
70 
71 // Helper function for EnumerateDafsaLanaguage.
RecursivelyEnumerateDafsaLanguage(const FixedSetIncrementalLookup & lookup,std::vector<char> * sequence,std::vector<std::string> * language)72 void RecursivelyEnumerateDafsaLanguage(const FixedSetIncrementalLookup& lookup,
73                                        std::vector<char>* sequence,
74                                        std::vector<std::string>* language) {
75   int result = lookup.GetResultForCurrentSequence();
76   if (result != kDafsaNotFound) {
77     std::string line(sequence->begin(), sequence->end());
78     line += base::StringPrintf(", %d", result);
79     language->emplace_back(std::move(line));
80   }
81   // Try appending each char value.
82   for (char c = std::numeric_limits<char>::min();; ++c) {
83     FixedSetIncrementalLookup continued_lookup = lookup;
84     if (continued_lookup.Advance(c)) {
85       sequence->push_back(c);
86       size_t saved_language_size = language->size();
87       RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language);
88       CHECK_LT(saved_language_size, language->size())
89           << "DAFSA includes a branch to nowhere at node: "
90           << std::string(sequence->begin(), sequence->end());
91       sequence->pop_back();
92     }
93     if (c == std::numeric_limits<char>::max())
94       break;
95   }
96 }
97 
98 // Uses FixedSetIncrementalLookup to build a vector of every string in the
99 // language of the DAFSA.
EnumerateDafsaLanguage(base::span<const uint8_t> graph)100 std::vector<std::string> EnumerateDafsaLanguage(
101     base::span<const uint8_t> graph) {
102   FixedSetIncrementalLookup query(graph);
103   std::vector<char> sequence;
104   std::vector<std::string> language;
105   RecursivelyEnumerateDafsaLanguage(query, &sequence, &language);
106   return language;
107 }
108 
109 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
110                          Dafsa1Test,
111                          ::testing::ValuesIn(kBasicTestCases));
112 
113 class Dafsa3Test : public LookupStringInFixedSetTest {};
114 
115 // This DAFSA is constructed so that labels begin and end with unique
116 // characters, which makes it impossible to merge labels. Each inner node
117 // is about 100 bytes and a one byte offset can at most add 64 bytes to
118 // previous offset. Thus the paths must go over two byte offsets.
TEST_P(Dafsa3Test,TestDafsaTwoByteOffsets)119 TEST_P(Dafsa3Test, TestDafsaTwoByteOffsets) {
120   const Expectation& param = GetParam();
121   EXPECT_EQ(param.value, LookupInGraph(test3::kDafsa, param.key));
122 }
123 
124 const Expectation kTwoByteOffsetTestCases[] = {
125     {"0________________________________________________________________________"
126      "____________________________0",
127      0},
128     {"7________________________________________________________________________"
129      "____________________________7",
130      4},
131     {"a________________________________________________________________________"
132      "____________________________8",
133      -1},
134 };
135 
136 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
137                          Dafsa3Test,
138                          ::testing::ValuesIn(kTwoByteOffsetTestCases));
139 
140 class Dafsa4Test : public LookupStringInFixedSetTest {};
141 
142 // This DAFSA is constructed so that labels begin and end with unique
143 // characters, which makes it impossible to merge labels. The byte array
144 // has a size of ~54k. A two byte offset can add at most add 8k to the
145 // previous offset. Since we can skip only forward in memory, the nodes
146 // representing the return values must be located near the end of the byte
147 // array. The probability that we can reach from an arbitrary inner node to
148 // a return value without using a three byte offset is small (but not zero).
149 // The test is repeated with some different keys and with a reasonable
150 // probability at least one of the tested paths has go over a three byte
151 // offset.
TEST_P(Dafsa4Test,TestDafsaThreeByteOffsets)152 TEST_P(Dafsa4Test, TestDafsaThreeByteOffsets) {
153   const Expectation& param = GetParam();
154   EXPECT_EQ(param.value, LookupInGraph(test4::kDafsa, param.key));
155 }
156 
157 const Expectation kThreeByteOffsetTestCases[] = {
158     {"Z6_______________________________________________________________________"
159      "_____________________________Z6",
160      0},
161     {"Z7_______________________________________________________________________"
162      "_____________________________Z7",
163      4},
164     {"Za_______________________________________________________________________"
165      "_____________________________Z8",
166      -1},
167 };
168 
169 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
170                          Dafsa4Test,
171                          ::testing::ValuesIn(kThreeByteOffsetTestCases));
172 
173 class Dafsa5Test : public LookupStringInFixedSetTest {};
174 
175 // This DAFSA is constructed from words with similar prefixes but distinct
176 // suffixes. The DAFSA will then form a trie with the implicit source node
177 // as root.
TEST_P(Dafsa5Test,TestDafsaJoinedPrefixes)178 TEST_P(Dafsa5Test, TestDafsaJoinedPrefixes) {
179   const Expectation& param = GetParam();
180   EXPECT_EQ(param.value, LookupInGraph(test5::kDafsa, param.key));
181 }
182 
183 const Expectation kJoinedPrefixesTestCases[] = {
184     {"ai", 0},   {"bj", 4},   {"aak", 0},   {"bbl", 4},
185     {"aaa", -1}, {"bbb", -1}, {"aaaam", 0}, {"bbbbn", 0},
186 };
187 
188 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
189                          Dafsa5Test,
190                          ::testing::ValuesIn(kJoinedPrefixesTestCases));
191 
192 class Dafsa6Test : public LookupStringInFixedSetTest {};
193 
194 // This DAFSA is constructed from words with similar suffixes but distinct
195 // prefixes. The DAFSA will then form a trie with the implicit sink node as
196 // root.
TEST_P(Dafsa6Test,TestDafsaJoinedSuffixes)197 TEST_P(Dafsa6Test, TestDafsaJoinedSuffixes) {
198   const Expectation& param = GetParam();
199   EXPECT_EQ(param.value, LookupInGraph(test6::kDafsa, param.key));
200 }
201 
202 const Expectation kJoinedSuffixesTestCases[] = {
203     {"ia", 0},   {"jb", 4},   {"kaa", 0},   {"lbb", 4},
204     {"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0},
205 };
206 
207 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
208                          Dafsa6Test,
209                          ::testing::ValuesIn(kJoinedSuffixesTestCases));
210 
211 // Validates that the generated DAFSA contains exactly the same information as
212 // effective_tld_names_unittest1.gperf.
TEST(LookupStringInFixedSetTest,Dafsa1EnumerateLanguage)213 TEST(LookupStringInFixedSetTest, Dafsa1EnumerateLanguage) {
214   auto language = EnumerateDafsaLanguage(test1::kDafsa);
215 
216   // These are the lines of effective_tld_names_unittest1.gperf, in sorted
217   // order.
218   std::vector<std::string> expected_language = {
219       "ac.jp, 0",       "b.c, 1",     "bar.baz.com, 0", "bar.jp, 2",
220       "baz.bar.jp, 2",  "c, 2",       "jp, 0",          "no, 0",
221       "pref.bar.jp, 1", "priv.no, 4", "private, 4",     "xn--fiqs8s, 0",
222   };
223 
224   EXPECT_EQ(expected_language, language);
225 }
226 
227 // Validates that the generated DAFSA contains exactly the same information as
228 // effective_tld_names_unittest5.gperf.
TEST(LookupStringInFixedSetTest,Dafsa5EnumerateLanguage)229 TEST(LookupStringInFixedSetTest, Dafsa5EnumerateLanguage) {
230   auto language = EnumerateDafsaLanguage(test5::kDafsa);
231 
232   std::vector<std::string> expected_language = {
233       "aaaam, 0", "aak, 0", "ai, 0", "bbbbn, 0", "bbl, 4", "bj, 4",
234   };
235 
236   EXPECT_EQ(expected_language, language);
237 }
238 
239 // Validates that the generated DAFSA contains exactly the same information as
240 // effective_tld_names_unittest6.gperf.
TEST(LookupStringInFixedSetTest,Dafsa6EnumerateLanguage)241 TEST(LookupStringInFixedSetTest, Dafsa6EnumerateLanguage) {
242   auto language = EnumerateDafsaLanguage(test6::kDafsa);
243 
244   std::vector<std::string> expected_language = {
245       "ia, 0", "jb, 4", "kaa, 0", "lbb, 4", "maaaa, 0", "nbbbb, 0",
246   };
247 
248   EXPECT_EQ(expected_language, language);
249 }
250 
251 }  // namespace
252 }  // namespace net
253