• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // This command-line program converts an effective-TLD data file in UTF-8 from
6 // the format provided by Mozilla to the format expected by Chrome.  This
7 // program generates an intermediate file which is then used by gperf to
8 // generate a perfect hash map.  The benefit of this approach is that no time is
9 // spent on program initialization to generate the map of this data.
10 //
11 // Running this program finds "effective_tld_names.dat" in the expected location
12 // in the source checkout and generates "effective_tld_names.gperf" next to it.
13 //
14 // Any errors or warnings from this program are recorded in tld_cleanup.log.
15 //
16 // In particular, it
17 //  * Strips blank lines and comments, as well as notes for individual rules.
18 //  * Strips a single leading and/or trailing dot from each rule, if present.
19 //  * Logs a warning if a rule contains '!' or '*.' other than at the beginning
20 //    of the rule.  (This also catches multiple ! or *. at the start of a rule.)
21 //  * Logs a warning if GURL reports a rule as invalid, but keeps the rule.
22 //  * Canonicalizes each rule's domain by converting it to a GURL and back.
23 //  * Adds explicit rules for true TLDs found in any rule.
24 //  * Marks entries in the file between "// ===BEGIN PRIVATE DOMAINS==="
25 //    and "// ===END PRIVATE DOMAINS===" as private.
26 
27 #include "base/at_exit.h"
28 #include "base/command_line.h"
29 #include "base/files/file_path.h"
30 #include "base/files/file_util.h"
31 #include "base/i18n/icu_util.h"
32 #include "base/logging.h"
33 #include "base/path_service.h"
34 #include "base/process/memory.h"
35 #include "net/tools/tld_cleanup/tld_cleanup_util.h"
36 
main(int argc,const char * argv[])37 int main(int argc, const char* argv[]) {
38   base::EnableTerminationOnHeapCorruption();
39   if (argc != 1) {
40     fprintf(stderr, "Normalizes and verifies UTF-8 TLD data files\n");
41     fprintf(stderr, "Usage: %s\n", argv[0]);
42     return 1;
43   }
44 
45   // Manages the destruction of singletons.
46   base::AtExitManager exit_manager;
47 
48   // Only use OutputDebugString in debug mode.
49 #ifdef NDEBUG
50   logging::LoggingDestination destination = logging::LOG_TO_FILE;
51 #else
52   logging::LoggingDestination destination =
53       logging::LOG_TO_ALL;
54 #endif
55 
56   base::CommandLine::Init(argc, argv);
57 
58   base::FilePath log_filename;
59   base::PathService::Get(base::DIR_EXE, &log_filename);
60   log_filename = log_filename.AppendASCII("tld_cleanup.log");
61   logging::LoggingSettings settings;
62   settings.logging_dest = destination;
63   settings.log_file_path = log_filename.value().c_str();
64   settings.delete_old = logging::DELETE_OLD_LOG_FILE;
65   logging::InitLogging(settings);
66 
67   base::i18n::InitializeICU();
68 
69   base::FilePath input_file;
70   base::PathService::Get(base::DIR_SOURCE_ROOT, &input_file);
71   input_file = input_file.Append(FILE_PATH_LITERAL("net"))
72                          .Append(FILE_PATH_LITERAL("base"))
73                          .Append(FILE_PATH_LITERAL(
74                              "registry_controlled_domains"))
75                          .Append(FILE_PATH_LITERAL("effective_tld_names.dat"));
76   base::FilePath output_file;
77   base::PathService::Get(base::DIR_SOURCE_ROOT, &output_file);
78   output_file = output_file.Append(FILE_PATH_LITERAL("net"))
79                            .Append(FILE_PATH_LITERAL("base"))
80                            .Append(FILE_PATH_LITERAL(
81                                "registry_controlled_domains"))
82                            .Append(FILE_PATH_LITERAL(
83                                "effective_tld_names.gperf"));
84   net::tld_cleanup::NormalizeResult result =
85       net::tld_cleanup::NormalizeFile(input_file, output_file);
86   if (result != net::tld_cleanup::NormalizeResult::kSuccess) {
87     fprintf(stderr,
88             "Errors or warnings processing file.  See log in tld_cleanup.log.");
89   }
90 
91   if (result == net::tld_cleanup::NormalizeResult::kError)
92     return 1;
93   return 0;
94 }
95