• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/dns/dns_hosts.h"
6 
7 #include "base/files/file_util.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_util.h"
11 
12 using base::StringPiece;
13 
14 namespace net {
15 
16 namespace {
17 
18 // Parses the contents of a hosts file.  Returns one token (IP or hostname) at
19 // a time.  Doesn't copy anything; accepts the file as a StringPiece and
20 // returns tokens as StringPieces.
21 class HostsParser {
22  public:
HostsParser(const StringPiece & text,ParseHostsCommaMode comma_mode)23   explicit HostsParser(const StringPiece& text, ParseHostsCommaMode comma_mode)
24       : text_(text),
25         data_(text.data()),
26         end_(text.size()),
27         pos_(0),
28         token_is_ip_(false),
29         comma_mode_(comma_mode) {}
30 
31   // Advances to the next token (IP or hostname).  Returns whether another
32   // token was available.  |token_is_ip| and |token| can be used to find out
33   // the type and text of the token.
Advance()34   bool Advance() {
35     bool next_is_ip = (pos_ == 0);
36     while (pos_ < end_ && pos_ != std::string::npos) {
37       switch (text_[pos_]) {
38         case ' ':
39         case '\t':
40           SkipWhitespace();
41           break;
42 
43         case '\r':
44         case '\n':
45           next_is_ip = true;
46           pos_++;
47           break;
48 
49         case '#':
50           SkipRestOfLine();
51           break;
52 
53         case ',':
54           if (comma_mode_ == PARSE_HOSTS_COMMA_IS_WHITESPACE) {
55             SkipWhitespace();
56             break;
57           }
58 
59           // If comma_mode_ is COMMA_IS_TOKEN, fall through:
60 
61         default: {
62           size_t token_start = pos_;
63           SkipToken();
64           size_t token_end = (pos_ == std::string::npos) ? end_ : pos_;
65 
66           token_ = StringPiece(data_ + token_start, token_end - token_start);
67           token_is_ip_ = next_is_ip;
68 
69           return true;
70         }
71       }
72     }
73 
74     return false;
75   }
76 
77   // Fast-forwards the parser to the next line.  Should be called if an IP
78   // address doesn't parse, to avoid wasting time tokenizing hostnames that
79   // will be ignored.
SkipRestOfLine()80   void SkipRestOfLine() {
81     pos_ = text_.find("\n", pos_);
82   }
83 
84   // Returns whether the last-parsed token is an IP address (true) or a
85   // hostname (false).
token_is_ip()86   bool token_is_ip() { return token_is_ip_; }
87 
88   // Returns the text of the last-parsed token as a StringPiece referencing
89   // the same underlying memory as the StringPiece passed to the constructor.
90   // Returns an empty StringPiece if no token has been parsed or the end of
91   // the input string has been reached.
token()92   const StringPiece& token() { return token_; }
93 
94  private:
SkipToken()95   void SkipToken() {
96     switch (comma_mode_) {
97       case PARSE_HOSTS_COMMA_IS_TOKEN:
98         pos_ = text_.find_first_of(" \t\n\r#", pos_);
99         break;
100       case PARSE_HOSTS_COMMA_IS_WHITESPACE:
101         pos_ = text_.find_first_of(" ,\t\n\r#", pos_);
102         break;
103     }
104   }
105 
SkipWhitespace()106   void SkipWhitespace() {
107     switch (comma_mode_) {
108       case PARSE_HOSTS_COMMA_IS_TOKEN:
109         pos_ = text_.find_first_not_of(" \t", pos_);
110         break;
111       case PARSE_HOSTS_COMMA_IS_WHITESPACE:
112         pos_ = text_.find_first_not_of(" ,\t", pos_);
113         break;
114     }
115   }
116 
117   const StringPiece text_;
118   const char* data_;
119   const size_t end_;
120 
121   size_t pos_;
122   StringPiece token_;
123   bool token_is_ip_;
124 
125   const ParseHostsCommaMode comma_mode_;
126 
127   DISALLOW_COPY_AND_ASSIGN(HostsParser);
128 };
129 
ParseHostsWithCommaMode(const std::string & contents,DnsHosts * dns_hosts,ParseHostsCommaMode comma_mode)130 void ParseHostsWithCommaMode(const std::string& contents,
131                              DnsHosts* dns_hosts,
132                              ParseHostsCommaMode comma_mode) {
133   CHECK(dns_hosts);
134   DnsHosts& hosts = *dns_hosts;
135 
136   StringPiece ip_text;
137   IPAddressNumber ip;
138   AddressFamily family = ADDRESS_FAMILY_IPV4;
139   HostsParser parser(contents, comma_mode);
140   while (parser.Advance()) {
141     if (parser.token_is_ip()) {
142       StringPiece new_ip_text = parser.token();
143       // Some ad-blocking hosts files contain thousands of entries pointing to
144       // the same IP address (usually 127.0.0.1).  Don't bother parsing the IP
145       // again if it's the same as the one above it.
146       if (new_ip_text != ip_text) {
147         IPAddressNumber new_ip;
148         if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) {
149           ip_text = new_ip_text;
150           ip.swap(new_ip);
151           family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6;
152         } else {
153           parser.SkipRestOfLine();
154         }
155       }
156     } else {
157       DnsHostsKey key(parser.token().as_string(), family);
158       base::StringToLowerASCII(&key.first);
159       IPAddressNumber& mapped_ip = hosts[key];
160       if (mapped_ip.empty())
161         mapped_ip = ip;
162       // else ignore this entry (first hit counts)
163     }
164   }
165 }
166 
167 }  // namespace
168 
ParseHostsWithCommaModeForTesting(const std::string & contents,DnsHosts * dns_hosts,ParseHostsCommaMode comma_mode)169 void ParseHostsWithCommaModeForTesting(const std::string& contents,
170                                        DnsHosts* dns_hosts,
171                                        ParseHostsCommaMode comma_mode) {
172   ParseHostsWithCommaMode(contents, dns_hosts, comma_mode);
173 }
174 
ParseHosts(const std::string & contents,DnsHosts * dns_hosts)175 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) {
176   ParseHostsCommaMode comma_mode;
177 #if defined(OS_MACOSX)
178   // Mac OS X allows commas to separate hostnames.
179   comma_mode = PARSE_HOSTS_COMMA_IS_WHITESPACE;
180 #else
181   // Linux allows commas in hostnames.
182   comma_mode = PARSE_HOSTS_COMMA_IS_TOKEN;
183 #endif
184 
185   ParseHostsWithCommaMode(contents, dns_hosts, comma_mode);
186 }
187 
ParseHostsFile(const base::FilePath & path,DnsHosts * dns_hosts)188 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) {
189   dns_hosts->clear();
190   // Missing file indicates empty HOSTS.
191   if (!base::PathExists(path))
192     return true;
193 
194   int64 size;
195   if (!base::GetFileSize(path, &size))
196     return false;
197 
198   UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size);
199 
200   // Reject HOSTS files larger than |kMaxHostsSize| bytes.
201   const int64 kMaxHostsSize = 1 << 25;  // 32MB
202   if (size > kMaxHostsSize)
203     return false;
204 
205   std::string contents;
206   if (!base::ReadFileToString(path, &contents))
207     return false;
208 
209   ParseHosts(contents, dns_hosts);
210   return true;
211 }
212 
213 }  // namespace net
214 
215