• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2  // Use of this source code is governed by a BSD-style license that can be
3  // found in the LICENSE file.
4  
5  // This utility program exists to process the False Start blacklist file into
6  // a static hash table so that it can be efficiently queried by Chrome.
7  
8  #include <stdio.h>
9  #include <stdlib.h>
10  
11  #include <set>
12  #include <string>
13  #include <vector>
14  
15  #include "base/basictypes.h"
16  #include "net/base/ssl_false_start_blacklist.h"
17  
18  using net::SSLFalseStartBlacklist;
19  
20  static const unsigned kBuckets = SSLFalseStartBlacklist::kBuckets;
21  
22  static bool verbose = false;
23  
24  static int
usage(const char * argv0)25  usage(const char* argv0) {
26    fprintf(stderr, "Usage: %s <blacklist file> <output .c file>\n", argv0);
27    return 1;
28  }
29  
30  // StripWWWPrefix removes "www." from the beginning of any elements of the
31  // vector.
StripWWWPrefix(std::vector<std::string> * hosts)32  static void StripWWWPrefix(std::vector<std::string>* hosts) {
33    static const char kPrefix[] = "www.";
34    static const unsigned kPrefixLen = sizeof(kPrefix) - 1;
35  
36    for (size_t i = 0; i < hosts->size(); i++) {
37      const std::string& h = (*hosts)[i];
38      if (h.size() >= kPrefixLen &&
39          memcmp(h.data(), kPrefix, kPrefixLen) == 0) {
40        (*hosts)[i] = h.substr(kPrefixLen, h.size() - kPrefixLen);
41      }
42    }
43  }
44  
45  // RemoveDuplicateEntries removes all duplicates from |hosts|.
RemoveDuplicateEntries(std::vector<std::string> * hosts)46  static void RemoveDuplicateEntries(std::vector<std::string>* hosts) {
47    std::set<std::string> hosts_set;
48    std::vector<std::string> ret;
49  
50    for (std::vector<std::string>::const_iterator
51         i = hosts->begin(); i != hosts->end(); i++) {
52      if (hosts_set.count(*i)) {
53        if (verbose)
54          fprintf(stderr, "Removing duplicate entry for %s\n", i->c_str());
55        continue;
56      }
57      hosts_set.insert(*i);
58      ret.push_back(*i);
59    }
60  
61    hosts->swap(ret);
62  }
63  
64  // ParentDomain returns the parent domain for a given domain name or the empty
65  // string if the name is a top-level domain.
ParentDomain(const std::string & in)66  static std::string ParentDomain(const std::string& in) {
67    for (size_t i = 0; i < in.size(); i++) {
68      if (in[i] == '.') {
69        return in.substr(i + 1, in.size() - i - 1);
70      }
71    }
72  
73    return std::string();
74  }
75  
76  // RemoveRedundantEntries removes any entries which are subdomains of other
77  // entries. (i.e. foo.example.com would be removed if example.com were also
78  // included.)
RemoveRedundantEntries(std::vector<std::string> * hosts)79  static void RemoveRedundantEntries(std::vector<std::string>* hosts) {
80    std::set<std::string> hosts_set;
81    std::vector<std::string> ret;
82  
83    for (std::vector<std::string>::const_iterator
84         i = hosts->begin(); i != hosts->end(); i++) {
85      hosts_set.insert(*i);
86    }
87  
88    for (std::vector<std::string>::const_iterator
89         i = hosts->begin(); i != hosts->end(); i++) {
90      std::string parent = ParentDomain(*i);
91      while (!parent.empty()) {
92        if (hosts_set.count(parent))
93          break;
94        parent = ParentDomain(parent);
95      }
96      if (parent.empty()) {
97        ret.push_back(*i);
98      } else {
99        if (verbose)
100          fprintf(stderr, "Removing %s as redundant\n", i->c_str());
101      }
102    }
103  
104    hosts->swap(ret);
105  }
106  
107  // CheckLengths returns true iff every host is less than 256 bytes long (not
108  // including the terminating NUL) and contains two or more labels.
CheckLengths(const std::vector<std::string> & hosts)109  static bool CheckLengths(const std::vector<std::string>& hosts) {
110    for (std::vector<std::string>::const_iterator
111         i = hosts.begin(); i != hosts.end(); i++) {
112       if (i->size() >= 256) {
113         fprintf(stderr, "Entry %s is too large\n", i->c_str());
114         return false;
115       }
116       if (SSLFalseStartBlacklist::LastTwoLabels(i->c_str()) == NULL) {
117         fprintf(stderr, "Entry %s contains too few labels\n", i->c_str());
118         return false;
119       }
120    }
121  
122    return true;
123  }
124  
main(int argc,char ** argv)125  int main(int argc, char** argv) {
126    if (argc != 3)
127      return usage(argv[0]);
128  
129    const char* input_file = argv[1];
130    const char* output_file = argv[2];
131    FILE* input = fopen(input_file, "rb");
132    if (!input) {
133      perror("open");
134      return usage(argv[0]);
135    }
136  
137    if (fseek(input, 0, SEEK_END)) {
138      perror("fseek");
139      return 1;
140    }
141  
142    const long input_size = ftell(input);
143  
144    if (fseek(input, 0, SEEK_SET)) {
145      perror("fseek");
146      return 1;
147    }
148  
149    char* buffer = static_cast<char*>(malloc(input_size));
150    long done = 0;
151    while (done < input_size) {
152      size_t n = fread(buffer + done, 1, input_size - done, input);
153      if (n == 0) {
154        perror("fread");
155        free(buffer);
156        fclose(input);
157        return 1;
158      }
159      done += n;
160    }
161    fclose(input);
162  
163    std::vector<std::string> hosts;
164  
165    off_t line_start = 0;
166    bool is_comment = false;
167    bool non_whitespace_seen = false;
168    for (long i = 0; i <= input_size; i++) {
169      if (i == input_size || buffer[i] == '\n') {
170        if (!is_comment && non_whitespace_seen) {
171          long len = i - line_start;
172          if (i > 0 && buffer[i-1] == '\r')
173            len--;
174          hosts.push_back(std::string(&buffer[line_start], len));
175        }
176        is_comment = false;
177        non_whitespace_seen = false;
178        line_start = i + 1;
179        continue;
180      }
181  
182      if (i == line_start && buffer[i] == '#')
183        is_comment = true;
184      if (buffer[i] != ' ' && buffer[i] != '\t' && buffer[i] != '\r')
185        non_whitespace_seen = true;
186    }
187    free(buffer);
188  
189    fprintf(stderr, "Have %d hosts after parse\n", (int) hosts.size());
190    StripWWWPrefix(&hosts);
191    RemoveDuplicateEntries(&hosts);
192    fprintf(stderr, "Have %d hosts after removing duplicates\n", (int) hosts.size());
193    RemoveRedundantEntries(&hosts);
194    fprintf(stderr, "Have %d hosts after removing redundants\n", (int) hosts.size());
195    if (!CheckLengths(hosts)) {
196      fprintf(stderr, "One or more entries is too large or too small\n");
197      return 2;
198    }
199  
200    fprintf(stderr, "Using %d entry hash table\n", kBuckets);
201    uint32 table[kBuckets];
202    std::vector<std::string> buckets[kBuckets];
203  
204    for (std::vector<std::string>::const_iterator
205         i = hosts.begin(); i != hosts.end(); i++) {
206      const char* last_two_labels =
207          SSLFalseStartBlacklist::LastTwoLabels(i->c_str());
208      const unsigned h = SSLFalseStartBlacklist::Hash(last_two_labels);
209      buckets[h & (kBuckets - 1)].push_back(*i);
210    }
211  
212    std::string table_data;
213    unsigned max_bucket_size = 0;
214    for (unsigned i = 0; i < kBuckets; i++) {
215      if (buckets[i].size() > max_bucket_size)
216        max_bucket_size = buckets[i].size();
217  
218      table[i] = table_data.size();
219      for (std::vector<std::string>::const_iterator
220           j = buckets[i].begin(); j != buckets[i].end(); j++) {
221        table_data.push_back((char) j->size());
222        table_data.append(*j);
223      }
224    }
225  
226    fprintf(stderr, "Largest bucket has %d entries\n", max_bucket_size);
227  
228    FILE* out = fopen(output_file, "w+");
229    if (!out) {
230      perror("opening output file");
231      return 4;
232    }
233  
234    fprintf(out, "// Copyright (c) 2010 The Chromium Authors. All rights "
235            "reserved.\n// Use of this source code is governed by a BSD-style "
236            "license that can be\n// found in the LICENSE file.\n\n");
237    fprintf(out, "// WARNING: this code is generated by\n"
238                 "// ssl_false_start_blacklist_process.cc. Do not edit.\n\n");
239    fprintf(out, "#include \"base/basictypes.h\"\n\n");
240    fprintf(out, "#include \"net/base/ssl_false_start_blacklist.h\"\n\n");
241    fprintf(out, "namespace net {\n\n");
242    fprintf(out, "const uint32 SSLFalseStartBlacklist::kHashTable[%d + 1] = {\n",
243            kBuckets);
244    for (unsigned i = 0; i < kBuckets; i++) {
245      fprintf(out, "  %u,\n", (unsigned) table[i]);
246    }
247    fprintf(out, "  %u,\n", (unsigned) table_data.size());
248    fprintf(out, "};\n\n");
249  
250    fprintf(out, "const char SSLFalseStartBlacklist::kHashData[] = {\n");
251    for (unsigned i = 0, line_length = 0; i < table_data.size(); i++) {
252      if (line_length == 0)
253        fprintf(out, "  ");
254      uint8 c = static_cast<uint8>(table_data[i]);
255      line_length += fprintf(out, "%d, ", c);
256      if (i == table_data.size() - 1) {
257        fprintf(out, "\n};\n");
258      } else if (line_length >= 70) {
259        fprintf(out, "\n");
260        line_length = 0;
261      }
262    }
263    fprintf(out, "\n}  // namespace net\n");
264    fclose(out);
265  
266    return 0;
267  }
268