1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // This utility program exists to process the False Start blacklist file into
6 // a static hash table so that it can be efficiently queried by Chrome.
7
8 #include <stdio.h>
9 #include <stdlib.h>
10
11 #include <set>
12 #include <string>
13 #include <vector>
14
15 #include "base/basictypes.h"
16 #include "net/base/ssl_false_start_blacklist.h"
17
18 using net::SSLFalseStartBlacklist;
19
20 static const unsigned kBuckets = SSLFalseStartBlacklist::kBuckets;
21
22 static bool verbose = false;
23
24 static int
usage(const char * argv0)25 usage(const char* argv0) {
26 fprintf(stderr, "Usage: %s <blacklist file> <output .c file>\n", argv0);
27 return 1;
28 }
29
30 // StripWWWPrefix removes "www." from the beginning of any elements of the
31 // vector.
StripWWWPrefix(std::vector<std::string> * hosts)32 static void StripWWWPrefix(std::vector<std::string>* hosts) {
33 static const char kPrefix[] = "www.";
34 static const unsigned kPrefixLen = sizeof(kPrefix) - 1;
35
36 for (size_t i = 0; i < hosts->size(); i++) {
37 const std::string& h = (*hosts)[i];
38 if (h.size() >= kPrefixLen &&
39 memcmp(h.data(), kPrefix, kPrefixLen) == 0) {
40 (*hosts)[i] = h.substr(kPrefixLen, h.size() - kPrefixLen);
41 }
42 }
43 }
44
45 // RemoveDuplicateEntries removes all duplicates from |hosts|.
RemoveDuplicateEntries(std::vector<std::string> * hosts)46 static void RemoveDuplicateEntries(std::vector<std::string>* hosts) {
47 std::set<std::string> hosts_set;
48 std::vector<std::string> ret;
49
50 for (std::vector<std::string>::const_iterator
51 i = hosts->begin(); i != hosts->end(); i++) {
52 if (hosts_set.count(*i)) {
53 if (verbose)
54 fprintf(stderr, "Removing duplicate entry for %s\n", i->c_str());
55 continue;
56 }
57 hosts_set.insert(*i);
58 ret.push_back(*i);
59 }
60
61 hosts->swap(ret);
62 }
63
64 // ParentDomain returns the parent domain for a given domain name or the empty
65 // string if the name is a top-level domain.
ParentDomain(const std::string & in)66 static std::string ParentDomain(const std::string& in) {
67 for (size_t i = 0; i < in.size(); i++) {
68 if (in[i] == '.') {
69 return in.substr(i + 1, in.size() - i - 1);
70 }
71 }
72
73 return std::string();
74 }
75
76 // RemoveRedundantEntries removes any entries which are subdomains of other
77 // entries. (i.e. foo.example.com would be removed if example.com were also
78 // included.)
RemoveRedundantEntries(std::vector<std::string> * hosts)79 static void RemoveRedundantEntries(std::vector<std::string>* hosts) {
80 std::set<std::string> hosts_set;
81 std::vector<std::string> ret;
82
83 for (std::vector<std::string>::const_iterator
84 i = hosts->begin(); i != hosts->end(); i++) {
85 hosts_set.insert(*i);
86 }
87
88 for (std::vector<std::string>::const_iterator
89 i = hosts->begin(); i != hosts->end(); i++) {
90 std::string parent = ParentDomain(*i);
91 while (!parent.empty()) {
92 if (hosts_set.count(parent))
93 break;
94 parent = ParentDomain(parent);
95 }
96 if (parent.empty()) {
97 ret.push_back(*i);
98 } else {
99 if (verbose)
100 fprintf(stderr, "Removing %s as redundant\n", i->c_str());
101 }
102 }
103
104 hosts->swap(ret);
105 }
106
107 // CheckLengths returns true iff every host is less than 256 bytes long (not
108 // including the terminating NUL) and contains two or more labels.
CheckLengths(const std::vector<std::string> & hosts)109 static bool CheckLengths(const std::vector<std::string>& hosts) {
110 for (std::vector<std::string>::const_iterator
111 i = hosts.begin(); i != hosts.end(); i++) {
112 if (i->size() >= 256) {
113 fprintf(stderr, "Entry %s is too large\n", i->c_str());
114 return false;
115 }
116 if (SSLFalseStartBlacklist::LastTwoLabels(i->c_str()) == NULL) {
117 fprintf(stderr, "Entry %s contains too few labels\n", i->c_str());
118 return false;
119 }
120 }
121
122 return true;
123 }
124
main(int argc,char ** argv)125 int main(int argc, char** argv) {
126 if (argc != 3)
127 return usage(argv[0]);
128
129 const char* input_file = argv[1];
130 const char* output_file = argv[2];
131 FILE* input = fopen(input_file, "rb");
132 if (!input) {
133 perror("open");
134 return usage(argv[0]);
135 }
136
137 if (fseek(input, 0, SEEK_END)) {
138 perror("fseek");
139 return 1;
140 }
141
142 const long input_size = ftell(input);
143
144 if (fseek(input, 0, SEEK_SET)) {
145 perror("fseek");
146 return 1;
147 }
148
149 char* buffer = static_cast<char*>(malloc(input_size));
150 long done = 0;
151 while (done < input_size) {
152 size_t n = fread(buffer + done, 1, input_size - done, input);
153 if (n == 0) {
154 perror("fread");
155 free(buffer);
156 fclose(input);
157 return 1;
158 }
159 done += n;
160 }
161 fclose(input);
162
163 std::vector<std::string> hosts;
164
165 off_t line_start = 0;
166 bool is_comment = false;
167 bool non_whitespace_seen = false;
168 for (long i = 0; i <= input_size; i++) {
169 if (i == input_size || buffer[i] == '\n') {
170 if (!is_comment && non_whitespace_seen) {
171 long len = i - line_start;
172 if (i > 0 && buffer[i-1] == '\r')
173 len--;
174 hosts.push_back(std::string(&buffer[line_start], len));
175 }
176 is_comment = false;
177 non_whitespace_seen = false;
178 line_start = i + 1;
179 continue;
180 }
181
182 if (i == line_start && buffer[i] == '#')
183 is_comment = true;
184 if (buffer[i] != ' ' && buffer[i] != '\t' && buffer[i] != '\r')
185 non_whitespace_seen = true;
186 }
187 free(buffer);
188
189 fprintf(stderr, "Have %d hosts after parse\n", (int) hosts.size());
190 StripWWWPrefix(&hosts);
191 RemoveDuplicateEntries(&hosts);
192 fprintf(stderr, "Have %d hosts after removing duplicates\n", (int) hosts.size());
193 RemoveRedundantEntries(&hosts);
194 fprintf(stderr, "Have %d hosts after removing redundants\n", (int) hosts.size());
195 if (!CheckLengths(hosts)) {
196 fprintf(stderr, "One or more entries is too large or too small\n");
197 return 2;
198 }
199
200 fprintf(stderr, "Using %d entry hash table\n", kBuckets);
201 uint32 table[kBuckets];
202 std::vector<std::string> buckets[kBuckets];
203
204 for (std::vector<std::string>::const_iterator
205 i = hosts.begin(); i != hosts.end(); i++) {
206 const char* last_two_labels =
207 SSLFalseStartBlacklist::LastTwoLabels(i->c_str());
208 const unsigned h = SSLFalseStartBlacklist::Hash(last_two_labels);
209 buckets[h & (kBuckets - 1)].push_back(*i);
210 }
211
212 std::string table_data;
213 unsigned max_bucket_size = 0;
214 for (unsigned i = 0; i < kBuckets; i++) {
215 if (buckets[i].size() > max_bucket_size)
216 max_bucket_size = buckets[i].size();
217
218 table[i] = table_data.size();
219 for (std::vector<std::string>::const_iterator
220 j = buckets[i].begin(); j != buckets[i].end(); j++) {
221 table_data.push_back((char) j->size());
222 table_data.append(*j);
223 }
224 }
225
226 fprintf(stderr, "Largest bucket has %d entries\n", max_bucket_size);
227
228 FILE* out = fopen(output_file, "w+");
229 if (!out) {
230 perror("opening output file");
231 return 4;
232 }
233
234 fprintf(out, "// Copyright (c) 2010 The Chromium Authors. All rights "
235 "reserved.\n// Use of this source code is governed by a BSD-style "
236 "license that can be\n// found in the LICENSE file.\n\n");
237 fprintf(out, "// WARNING: this code is generated by\n"
238 "// ssl_false_start_blacklist_process.cc. Do not edit.\n\n");
239 fprintf(out, "#include \"base/basictypes.h\"\n\n");
240 fprintf(out, "#include \"net/base/ssl_false_start_blacklist.h\"\n\n");
241 fprintf(out, "namespace net {\n\n");
242 fprintf(out, "const uint32 SSLFalseStartBlacklist::kHashTable[%d + 1] = {\n",
243 kBuckets);
244 for (unsigned i = 0; i < kBuckets; i++) {
245 fprintf(out, " %u,\n", (unsigned) table[i]);
246 }
247 fprintf(out, " %u,\n", (unsigned) table_data.size());
248 fprintf(out, "};\n\n");
249
250 fprintf(out, "const char SSLFalseStartBlacklist::kHashData[] = {\n");
251 for (unsigned i = 0, line_length = 0; i < table_data.size(); i++) {
252 if (line_length == 0)
253 fprintf(out, " ");
254 uint8 c = static_cast<uint8>(table_data[i]);
255 line_length += fprintf(out, "%d, ", c);
256 if (i == table_data.size() - 1) {
257 fprintf(out, "\n};\n");
258 } else if (line_length >= 70) {
259 fprintf(out, "\n");
260 line_length = 0;
261 }
262 }
263 fprintf(out, "\n} // namespace net\n");
264 fclose(out);
265
266 return 0;
267 }
268