1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/base/lookup_string_in_fixed_set.h"
6
7 #include <string.h>
8
9 #include <algorithm>
10 #include <cstdint>
11 #include <limits>
12 #include <ostream>
13 #include <utility>
14 #include <vector>
15
16 #include "base/base_paths.h"
17 #include "base/containers/span.h"
18 #include "base/files/file_path.h"
19 #include "base/files/file_util.h"
20 #include "base/path_service.h"
21 #include "base/strings/string_util.h"
22 #include "base/strings/stringprintf.h"
23 #include "testing/gtest/include/gtest/gtest.h"
24
25 namespace net {
26 namespace {
27 namespace test1 {
28 #include "net/base/registry_controlled_domains/effective_tld_names_unittest1-inc.cc"
29 }
30 namespace test3 {
31 #include "net/base/registry_controlled_domains/effective_tld_names_unittest3-inc.cc"
32 }
33 namespace test4 {
34 #include "net/base/registry_controlled_domains/effective_tld_names_unittest4-inc.cc"
35 }
36 namespace test5 {
37 #include "net/base/registry_controlled_domains/effective_tld_names_unittest5-inc.cc"
38 }
39 namespace test6 {
40 #include "net/base/registry_controlled_domains/effective_tld_names_unittest6-inc.cc"
41 }
42
43 struct Expectation {
44 const char* const key;
45 int value;
46 };
47
PrintTo(const Expectation & expectation,std::ostream * os)48 void PrintTo(const Expectation& expectation, std::ostream* os) {
49 *os << "{\"" << expectation.key << "\", " << expectation.value << "}";
50 }
51
52 class LookupStringInFixedSetTest : public testing::TestWithParam<Expectation> {
53 protected:
LookupInGraph(base::span<const uint8_t> graph,const char * key)54 int LookupInGraph(base::span<const uint8_t> graph, const char* key) {
55 return LookupStringInFixedSet(graph, key, strlen(key));
56 }
57 };
58
59 class Dafsa1Test : public LookupStringInFixedSetTest {};
60
TEST_P(Dafsa1Test,BasicTest)61 TEST_P(Dafsa1Test, BasicTest) {
62 const Expectation& param = GetParam();
63 EXPECT_EQ(param.value, LookupInGraph(test1::kDafsa, param.key));
64 }
65
66 const Expectation kBasicTestCases[] = {
67 {"", -1}, {"j", -1}, {"jp", 0}, {"jjp", -1}, {"jpp", -1},
68 {"bar.jp", 2}, {"pref.bar.jp", 1}, {"c", 2}, {"b.c", 1}, {"priv.no", 4},
69 };
70
71 // Helper function for EnumerateDafsaLanaguage.
RecursivelyEnumerateDafsaLanguage(const FixedSetIncrementalLookup & lookup,std::vector<char> * sequence,std::vector<std::string> * language)72 void RecursivelyEnumerateDafsaLanguage(const FixedSetIncrementalLookup& lookup,
73 std::vector<char>* sequence,
74 std::vector<std::string>* language) {
75 int result = lookup.GetResultForCurrentSequence();
76 if (result != kDafsaNotFound) {
77 std::string line(sequence->begin(), sequence->end());
78 line += base::StringPrintf(", %d", result);
79 language->emplace_back(std::move(line));
80 }
81 // Try appending each char value.
82 for (char c = std::numeric_limits<char>::min();; ++c) {
83 FixedSetIncrementalLookup continued_lookup = lookup;
84 if (continued_lookup.Advance(c)) {
85 sequence->push_back(c);
86 size_t saved_language_size = language->size();
87 RecursivelyEnumerateDafsaLanguage(continued_lookup, sequence, language);
88 CHECK_LT(saved_language_size, language->size())
89 << "DAFSA includes a branch to nowhere at node: "
90 << std::string(sequence->begin(), sequence->end());
91 sequence->pop_back();
92 }
93 if (c == std::numeric_limits<char>::max())
94 break;
95 }
96 }
97
98 // Uses FixedSetIncrementalLookup to build a vector of every string in the
99 // language of the DAFSA.
EnumerateDafsaLanguage(base::span<const uint8_t> graph)100 std::vector<std::string> EnumerateDafsaLanguage(
101 base::span<const uint8_t> graph) {
102 FixedSetIncrementalLookup query(graph);
103 std::vector<char> sequence;
104 std::vector<std::string> language;
105 RecursivelyEnumerateDafsaLanguage(query, &sequence, &language);
106 return language;
107 }
108
109 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
110 Dafsa1Test,
111 ::testing::ValuesIn(kBasicTestCases));
112
113 class Dafsa3Test : public LookupStringInFixedSetTest {};
114
115 // This DAFSA is constructed so that labels begin and end with unique
116 // characters, which makes it impossible to merge labels. Each inner node
117 // is about 100 bytes and a one byte offset can at most add 64 bytes to
118 // previous offset. Thus the paths must go over two byte offsets.
TEST_P(Dafsa3Test,TestDafsaTwoByteOffsets)119 TEST_P(Dafsa3Test, TestDafsaTwoByteOffsets) {
120 const Expectation& param = GetParam();
121 EXPECT_EQ(param.value, LookupInGraph(test3::kDafsa, param.key));
122 }
123
124 const Expectation kTwoByteOffsetTestCases[] = {
125 {"0________________________________________________________________________"
126 "____________________________0",
127 0},
128 {"7________________________________________________________________________"
129 "____________________________7",
130 4},
131 {"a________________________________________________________________________"
132 "____________________________8",
133 -1},
134 };
135
136 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
137 Dafsa3Test,
138 ::testing::ValuesIn(kTwoByteOffsetTestCases));
139
140 class Dafsa4Test : public LookupStringInFixedSetTest {};
141
142 // This DAFSA is constructed so that labels begin and end with unique
143 // characters, which makes it impossible to merge labels. The byte array
144 // has a size of ~54k. A two byte offset can add at most add 8k to the
145 // previous offset. Since we can skip only forward in memory, the nodes
146 // representing the return values must be located near the end of the byte
147 // array. The probability that we can reach from an arbitrary inner node to
148 // a return value without using a three byte offset is small (but not zero).
149 // The test is repeated with some different keys and with a reasonable
150 // probability at least one of the tested paths has go over a three byte
151 // offset.
TEST_P(Dafsa4Test,TestDafsaThreeByteOffsets)152 TEST_P(Dafsa4Test, TestDafsaThreeByteOffsets) {
153 const Expectation& param = GetParam();
154 EXPECT_EQ(param.value, LookupInGraph(test4::kDafsa, param.key));
155 }
156
157 const Expectation kThreeByteOffsetTestCases[] = {
158 {"Z6_______________________________________________________________________"
159 "_____________________________Z6",
160 0},
161 {"Z7_______________________________________________________________________"
162 "_____________________________Z7",
163 4},
164 {"Za_______________________________________________________________________"
165 "_____________________________Z8",
166 -1},
167 };
168
169 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
170 Dafsa4Test,
171 ::testing::ValuesIn(kThreeByteOffsetTestCases));
172
173 class Dafsa5Test : public LookupStringInFixedSetTest {};
174
175 // This DAFSA is constructed from words with similar prefixes but distinct
176 // suffixes. The DAFSA will then form a trie with the implicit source node
177 // as root.
TEST_P(Dafsa5Test,TestDafsaJoinedPrefixes)178 TEST_P(Dafsa5Test, TestDafsaJoinedPrefixes) {
179 const Expectation& param = GetParam();
180 EXPECT_EQ(param.value, LookupInGraph(test5::kDafsa, param.key));
181 }
182
183 const Expectation kJoinedPrefixesTestCases[] = {
184 {"ai", 0}, {"bj", 4}, {"aak", 0}, {"bbl", 4},
185 {"aaa", -1}, {"bbb", -1}, {"aaaam", 0}, {"bbbbn", 0},
186 };
187
188 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
189 Dafsa5Test,
190 ::testing::ValuesIn(kJoinedPrefixesTestCases));
191
192 class Dafsa6Test : public LookupStringInFixedSetTest {};
193
194 // This DAFSA is constructed from words with similar suffixes but distinct
195 // prefixes. The DAFSA will then form a trie with the implicit sink node as
196 // root.
TEST_P(Dafsa6Test,TestDafsaJoinedSuffixes)197 TEST_P(Dafsa6Test, TestDafsaJoinedSuffixes) {
198 const Expectation& param = GetParam();
199 EXPECT_EQ(param.value, LookupInGraph(test6::kDafsa, param.key));
200 }
201
202 const Expectation kJoinedSuffixesTestCases[] = {
203 {"ia", 0}, {"jb", 4}, {"kaa", 0}, {"lbb", 4},
204 {"aaa", -1}, {"bbb", -1}, {"maaaa", 0}, {"nbbbb", 0},
205 };
206
207 INSTANTIATE_TEST_SUITE_P(LookupStringInFixedSetTest,
208 Dafsa6Test,
209 ::testing::ValuesIn(kJoinedSuffixesTestCases));
210
211 // Validates that the generated DAFSA contains exactly the same information as
212 // effective_tld_names_unittest1.gperf.
TEST(LookupStringInFixedSetTest,Dafsa1EnumerateLanguage)213 TEST(LookupStringInFixedSetTest, Dafsa1EnumerateLanguage) {
214 auto language = EnumerateDafsaLanguage(test1::kDafsa);
215
216 // These are the lines of effective_tld_names_unittest1.gperf, in sorted
217 // order.
218 std::vector<std::string> expected_language = {
219 "ac.jp, 0", "b.c, 1", "bar.baz.com, 0", "bar.jp, 2",
220 "baz.bar.jp, 2", "c, 2", "jp, 0", "no, 0",
221 "pref.bar.jp, 1", "priv.no, 4", "private, 4", "xn--fiqs8s, 0",
222 };
223
224 EXPECT_EQ(expected_language, language);
225 }
226
227 // Validates that the generated DAFSA contains exactly the same information as
228 // effective_tld_names_unittest5.gperf.
TEST(LookupStringInFixedSetTest,Dafsa5EnumerateLanguage)229 TEST(LookupStringInFixedSetTest, Dafsa5EnumerateLanguage) {
230 auto language = EnumerateDafsaLanguage(test5::kDafsa);
231
232 std::vector<std::string> expected_language = {
233 "aaaam, 0", "aak, 0", "ai, 0", "bbbbn, 0", "bbl, 4", "bj, 4",
234 };
235
236 EXPECT_EQ(expected_language, language);
237 }
238
239 // Validates that the generated DAFSA contains exactly the same information as
240 // effective_tld_names_unittest6.gperf.
TEST(LookupStringInFixedSetTest,Dafsa6EnumerateLanguage)241 TEST(LookupStringInFixedSetTest, Dafsa6EnumerateLanguage) {
242 auto language = EnumerateDafsaLanguage(test6::kDafsa);
243
244 std::vector<std::string> expected_language = {
245 "ia, 0", "jb, 4", "kaa, 0", "lbb, 4", "maaaa, 0", "nbbbb, 0",
246 };
247
248 EXPECT_EQ(expected_language, language);
249 }
250
251 } // namespace
252 } // namespace net
253