1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/dns/dns_hosts.h"
6
7 #include "base/files/file_util.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_util.h"
11
12 using base::StringPiece;
13
14 namespace net {
15
16 namespace {
17
18 // Parses the contents of a hosts file. Returns one token (IP or hostname) at
19 // a time. Doesn't copy anything; accepts the file as a StringPiece and
20 // returns tokens as StringPieces.
21 class HostsParser {
22 public:
HostsParser(const StringPiece & text,ParseHostsCommaMode comma_mode)23 explicit HostsParser(const StringPiece& text, ParseHostsCommaMode comma_mode)
24 : text_(text),
25 data_(text.data()),
26 end_(text.size()),
27 pos_(0),
28 token_is_ip_(false),
29 comma_mode_(comma_mode) {}
30
31 // Advances to the next token (IP or hostname). Returns whether another
32 // token was available. |token_is_ip| and |token| can be used to find out
33 // the type and text of the token.
Advance()34 bool Advance() {
35 bool next_is_ip = (pos_ == 0);
36 while (pos_ < end_ && pos_ != std::string::npos) {
37 switch (text_[pos_]) {
38 case ' ':
39 case '\t':
40 SkipWhitespace();
41 break;
42
43 case '\r':
44 case '\n':
45 next_is_ip = true;
46 pos_++;
47 break;
48
49 case '#':
50 SkipRestOfLine();
51 break;
52
53 case ',':
54 if (comma_mode_ == PARSE_HOSTS_COMMA_IS_WHITESPACE) {
55 SkipWhitespace();
56 break;
57 }
58
59 // If comma_mode_ is COMMA_IS_TOKEN, fall through:
60
61 default: {
62 size_t token_start = pos_;
63 SkipToken();
64 size_t token_end = (pos_ == std::string::npos) ? end_ : pos_;
65
66 token_ = StringPiece(data_ + token_start, token_end - token_start);
67 token_is_ip_ = next_is_ip;
68
69 return true;
70 }
71 }
72 }
73
74 return false;
75 }
76
77 // Fast-forwards the parser to the next line. Should be called if an IP
78 // address doesn't parse, to avoid wasting time tokenizing hostnames that
79 // will be ignored.
SkipRestOfLine()80 void SkipRestOfLine() {
81 pos_ = text_.find("\n", pos_);
82 }
83
84 // Returns whether the last-parsed token is an IP address (true) or a
85 // hostname (false).
token_is_ip()86 bool token_is_ip() { return token_is_ip_; }
87
88 // Returns the text of the last-parsed token as a StringPiece referencing
89 // the same underlying memory as the StringPiece passed to the constructor.
90 // Returns an empty StringPiece if no token has been parsed or the end of
91 // the input string has been reached.
token()92 const StringPiece& token() { return token_; }
93
94 private:
SkipToken()95 void SkipToken() {
96 switch (comma_mode_) {
97 case PARSE_HOSTS_COMMA_IS_TOKEN:
98 pos_ = text_.find_first_of(" \t\n\r#", pos_);
99 break;
100 case PARSE_HOSTS_COMMA_IS_WHITESPACE:
101 pos_ = text_.find_first_of(" ,\t\n\r#", pos_);
102 break;
103 }
104 }
105
SkipWhitespace()106 void SkipWhitespace() {
107 switch (comma_mode_) {
108 case PARSE_HOSTS_COMMA_IS_TOKEN:
109 pos_ = text_.find_first_not_of(" \t", pos_);
110 break;
111 case PARSE_HOSTS_COMMA_IS_WHITESPACE:
112 pos_ = text_.find_first_not_of(" ,\t", pos_);
113 break;
114 }
115 }
116
117 const StringPiece text_;
118 const char* data_;
119 const size_t end_;
120
121 size_t pos_;
122 StringPiece token_;
123 bool token_is_ip_;
124
125 const ParseHostsCommaMode comma_mode_;
126
127 DISALLOW_COPY_AND_ASSIGN(HostsParser);
128 };
129
ParseHostsWithCommaMode(const std::string & contents,DnsHosts * dns_hosts,ParseHostsCommaMode comma_mode)130 void ParseHostsWithCommaMode(const std::string& contents,
131 DnsHosts* dns_hosts,
132 ParseHostsCommaMode comma_mode) {
133 CHECK(dns_hosts);
134 DnsHosts& hosts = *dns_hosts;
135
136 StringPiece ip_text;
137 IPAddressNumber ip;
138 AddressFamily family = ADDRESS_FAMILY_IPV4;
139 HostsParser parser(contents, comma_mode);
140 while (parser.Advance()) {
141 if (parser.token_is_ip()) {
142 StringPiece new_ip_text = parser.token();
143 // Some ad-blocking hosts files contain thousands of entries pointing to
144 // the same IP address (usually 127.0.0.1). Don't bother parsing the IP
145 // again if it's the same as the one above it.
146 if (new_ip_text != ip_text) {
147 IPAddressNumber new_ip;
148 if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) {
149 ip_text = new_ip_text;
150 ip.swap(new_ip);
151 family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6;
152 } else {
153 parser.SkipRestOfLine();
154 }
155 }
156 } else {
157 DnsHostsKey key(parser.token().as_string(), family);
158 base::StringToLowerASCII(&key.first);
159 IPAddressNumber& mapped_ip = hosts[key];
160 if (mapped_ip.empty())
161 mapped_ip = ip;
162 // else ignore this entry (first hit counts)
163 }
164 }
165 }
166
167 } // namespace
168
ParseHostsWithCommaModeForTesting(const std::string & contents,DnsHosts * dns_hosts,ParseHostsCommaMode comma_mode)169 void ParseHostsWithCommaModeForTesting(const std::string& contents,
170 DnsHosts* dns_hosts,
171 ParseHostsCommaMode comma_mode) {
172 ParseHostsWithCommaMode(contents, dns_hosts, comma_mode);
173 }
174
ParseHosts(const std::string & contents,DnsHosts * dns_hosts)175 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) {
176 ParseHostsCommaMode comma_mode;
177 #if defined(OS_MACOSX)
178 // Mac OS X allows commas to separate hostnames.
179 comma_mode = PARSE_HOSTS_COMMA_IS_WHITESPACE;
180 #else
181 // Linux allows commas in hostnames.
182 comma_mode = PARSE_HOSTS_COMMA_IS_TOKEN;
183 #endif
184
185 ParseHostsWithCommaMode(contents, dns_hosts, comma_mode);
186 }
187
ParseHostsFile(const base::FilePath & path,DnsHosts * dns_hosts)188 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) {
189 dns_hosts->clear();
190 // Missing file indicates empty HOSTS.
191 if (!base::PathExists(path))
192 return true;
193
194 int64 size;
195 if (!base::GetFileSize(path, &size))
196 return false;
197
198 UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size);
199
200 // Reject HOSTS files larger than |kMaxHostsSize| bytes.
201 const int64 kMaxHostsSize = 1 << 25; // 32MB
202 if (size > kMaxHostsSize)
203 return false;
204
205 std::string contents;
206 if (!base::ReadFileToString(path, &contents))
207 return false;
208
209 ParseHosts(contents, dns_hosts);
210 return true;
211 }
212
213 } // namespace net
214
215