• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // This utility program exists to process the False Start blacklist file into
6 // a static hash table so that it can be efficiently queried by Chrome.
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 
11 #include <set>
12 #include <string>
13 #include <vector>
14 
15 #include "base/basictypes.h"
16 #include "net/base/ssl_false_start_blacklist.h"
17 
18 using net::SSLFalseStartBlacklist;
19 
20 static const unsigned kBuckets = SSLFalseStartBlacklist::kBuckets;
21 
22 static bool verbose = false;
23 
24 static int
usage(const char * argv0)25 usage(const char* argv0) {
26   fprintf(stderr, "Usage: %s <blacklist file> <output .c file>\n", argv0);
27   return 1;
28 }
29 
30 // StripWWWPrefix removes "www." from the beginning of any elements of the
31 // vector.
StripWWWPrefix(std::vector<std::string> * hosts)32 static void StripWWWPrefix(std::vector<std::string>* hosts) {
33   static const char kPrefix[] = "www.";
34   static const unsigned kPrefixLen = sizeof(kPrefix) - 1;
35 
36   for (size_t i = 0; i < hosts->size(); i++) {
37     const std::string& h = (*hosts)[i];
38     if (h.size() >= kPrefixLen &&
39         memcmp(h.data(), kPrefix, kPrefixLen) == 0) {
40       (*hosts)[i] = h.substr(kPrefixLen, h.size() - kPrefixLen);
41     }
42   }
43 }
44 
45 // RemoveDuplicateEntries removes all duplicates from |hosts|.
RemoveDuplicateEntries(std::vector<std::string> * hosts)46 static void RemoveDuplicateEntries(std::vector<std::string>* hosts) {
47   std::set<std::string> hosts_set;
48   std::vector<std::string> ret;
49 
50   for (std::vector<std::string>::const_iterator
51        i = hosts->begin(); i != hosts->end(); i++) {
52     if (hosts_set.count(*i)) {
53       if (verbose)
54         fprintf(stderr, "Removing duplicate entry for %s\n", i->c_str());
55       continue;
56     }
57     hosts_set.insert(*i);
58     ret.push_back(*i);
59   }
60 
61   hosts->swap(ret);
62 }
63 
64 // ParentDomain returns the parent domain for a given domain name or the empty
65 // string if the name is a top-level domain.
ParentDomain(const std::string & in)66 static std::string ParentDomain(const std::string& in) {
67   for (size_t i = 0; i < in.size(); i++) {
68     if (in[i] == '.') {
69       return in.substr(i + 1, in.size() - i - 1);
70     }
71   }
72 
73   return std::string();
74 }
75 
76 // RemoveRedundantEntries removes any entries which are subdomains of other
77 // entries. (i.e. foo.example.com would be removed if example.com were also
78 // included.)
RemoveRedundantEntries(std::vector<std::string> * hosts)79 static void RemoveRedundantEntries(std::vector<std::string>* hosts) {
80   std::set<std::string> hosts_set;
81   std::vector<std::string> ret;
82 
83   for (std::vector<std::string>::const_iterator
84        i = hosts->begin(); i != hosts->end(); i++) {
85     hosts_set.insert(*i);
86   }
87 
88   for (std::vector<std::string>::const_iterator
89        i = hosts->begin(); i != hosts->end(); i++) {
90     std::string parent = ParentDomain(*i);
91     while (!parent.empty()) {
92       if (hosts_set.count(parent))
93         break;
94       parent = ParentDomain(parent);
95     }
96     if (parent.empty()) {
97       ret.push_back(*i);
98     } else {
99       if (verbose)
100         fprintf(stderr, "Removing %s as redundant\n", i->c_str());
101     }
102   }
103 
104   hosts->swap(ret);
105 }
106 
107 // CheckLengths returns true iff every host is less than 256 bytes long (not
108 // including the terminating NUL) and contains two or more labels.
CheckLengths(const std::vector<std::string> & hosts)109 static bool CheckLengths(const std::vector<std::string>& hosts) {
110   for (std::vector<std::string>::const_iterator
111        i = hosts.begin(); i != hosts.end(); i++) {
112      if (i->size() >= 256) {
113        fprintf(stderr, "Entry %s is too large\n", i->c_str());
114        return false;
115      }
116      if (SSLFalseStartBlacklist::LastTwoLabels(i->c_str()) == NULL) {
117        fprintf(stderr, "Entry %s contains too few labels\n", i->c_str());
118        return false;
119      }
120   }
121 
122   return true;
123 }
124 
main(int argc,char ** argv)125 int main(int argc, char** argv) {
126   if (argc != 3)
127     return usage(argv[0]);
128 
129   const char* input_file = argv[1];
130   const char* output_file = argv[2];
131   FILE* input = fopen(input_file, "rb");
132   if (!input) {
133     perror("open");
134     return usage(argv[0]);
135   }
136 
137   if (fseek(input, 0, SEEK_END)) {
138     perror("fseek");
139     return 1;
140   }
141 
142   const long input_size = ftell(input);
143 
144   if (fseek(input, 0, SEEK_SET)) {
145     perror("fseek");
146     return 1;
147   }
148 
149   char* buffer = static_cast<char*>(malloc(input_size));
150   long done = 0;
151   while (done < input_size) {
152     size_t n = fread(buffer + done, 1, input_size - done, input);
153     if (n == 0) {
154       perror("fread");
155       free(buffer);
156       fclose(input);
157       return 1;
158     }
159     done += n;
160   }
161   fclose(input);
162 
163   std::vector<std::string> hosts;
164 
165   off_t line_start = 0;
166   bool is_comment = false;
167   bool non_whitespace_seen = false;
168   for (long i = 0; i <= input_size; i++) {
169     if (i == input_size || buffer[i] == '\n') {
170       if (!is_comment && non_whitespace_seen) {
171         long len = i - line_start;
172         if (i > 0 && buffer[i-1] == '\r')
173           len--;
174         hosts.push_back(std::string(&buffer[line_start], len));
175       }
176       is_comment = false;
177       non_whitespace_seen = false;
178       line_start = i + 1;
179       continue;
180     }
181 
182     if (i == line_start && buffer[i] == '#')
183       is_comment = true;
184     if (buffer[i] != ' ' && buffer[i] != '\t' && buffer[i] != '\r')
185       non_whitespace_seen = true;
186   }
187   free(buffer);
188 
189   fprintf(stderr, "Have %d hosts after parse\n", (int) hosts.size());
190   StripWWWPrefix(&hosts);
191   RemoveDuplicateEntries(&hosts);
192   fprintf(stderr, "Have %d hosts after removing duplicates\n", (int) hosts.size());
193   RemoveRedundantEntries(&hosts);
194   fprintf(stderr, "Have %d hosts after removing redundants\n", (int) hosts.size());
195   if (!CheckLengths(hosts)) {
196     fprintf(stderr, "One or more entries is too large or too small\n");
197     return 2;
198   }
199 
200   fprintf(stderr, "Using %d entry hash table\n", kBuckets);
201   uint32 table[kBuckets];
202   std::vector<std::string> buckets[kBuckets];
203 
204   for (std::vector<std::string>::const_iterator
205        i = hosts.begin(); i != hosts.end(); i++) {
206     const char* last_two_labels =
207         SSLFalseStartBlacklist::LastTwoLabels(i->c_str());
208     const unsigned h = SSLFalseStartBlacklist::Hash(last_two_labels);
209     buckets[h & (kBuckets - 1)].push_back(*i);
210   }
211 
212   std::string table_data;
213   unsigned max_bucket_size = 0;
214   for (unsigned i = 0; i < kBuckets; i++) {
215     if (buckets[i].size() > max_bucket_size)
216       max_bucket_size = buckets[i].size();
217 
218     table[i] = table_data.size();
219     for (std::vector<std::string>::const_iterator
220          j = buckets[i].begin(); j != buckets[i].end(); j++) {
221       table_data.push_back((char) j->size());
222       table_data.append(*j);
223     }
224   }
225 
226   fprintf(stderr, "Largest bucket has %d entries\n", max_bucket_size);
227 
228   FILE* out = fopen(output_file, "w+");
229   if (!out) {
230     perror("opening output file");
231     return 4;
232   }
233 
234   fprintf(out, "// Copyright (c) 2010 The Chromium Authors. All rights "
235           "reserved.\n// Use of this source code is governed by a BSD-style "
236           "license that can be\n// found in the LICENSE file.\n\n");
237   fprintf(out, "// WARNING: this code is generated by\n"
238                "// ssl_false_start_blacklist_process.cc. Do not edit.\n\n");
239   fprintf(out, "#include \"base/basictypes.h\"\n\n");
240   fprintf(out, "#include \"net/base/ssl_false_start_blacklist.h\"\n\n");
241   fprintf(out, "namespace net {\n\n");
242   fprintf(out, "const uint32 SSLFalseStartBlacklist::kHashTable[%d + 1] = {\n",
243           kBuckets);
244   for (unsigned i = 0; i < kBuckets; i++) {
245     fprintf(out, "  %u,\n", (unsigned) table[i]);
246   }
247   fprintf(out, "  %u,\n", (unsigned) table_data.size());
248   fprintf(out, "};\n\n");
249 
250   fprintf(out, "const char SSLFalseStartBlacklist::kHashData[] = {\n");
251   for (unsigned i = 0, line_length = 0; i < table_data.size(); i++) {
252     if (line_length == 0)
253       fprintf(out, "  ");
254     uint8 c = static_cast<uint8>(table_data[i]);
255     line_length += fprintf(out, "%d, ", c);
256     if (i == table_data.size() - 1) {
257       fprintf(out, "\n};\n");
258     } else if (line_length >= 70) {
259       fprintf(out, "\n");
260       line_length = 0;
261     }
262   }
263   fprintf(out, "\n}  // namespace net\n");
264   fclose(out);
265 
266   return 0;
267 }
268