• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/dns/dns_hosts.h"
6 
7 #include "base/file_util.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/string_tokenizer.h"
12 
13 using base::StringPiece;
14 
15 namespace net {
16 
17 // Parses the contents of a hosts file.  Returns one token (IP or hostname) at
18 // a time.  Doesn't copy anything; accepts the file as a StringPiece and
19 // returns tokens as StringPieces.
20 class HostsParser {
21  public:
HostsParser(const StringPiece & text)22   explicit HostsParser(const StringPiece& text)
23       : text_(text),
24         data_(text.data()),
25         end_(text.size()),
26         pos_(0),
27         token_(),
28         token_is_ip_(false) {}
29 
30   // Advances to the next token (IP or hostname).  Returns whether another
31   // token was available.  |token_is_ip| and |token| can be used to find out
32   // the type and text of the token.
Advance()33   bool Advance() {
34     bool next_is_ip = (pos_ == 0);
35     while (pos_ < end_ && pos_ != std::string::npos) {
36       switch (text_[pos_]) {
37         case ' ':
38         case '\t':
39           SkipWhitespace();
40           break;
41 
42         case '\r':
43         case '\n':
44           next_is_ip = true;
45           pos_++;
46           break;
47 
48         case '#':
49           SkipRestOfLine();
50           break;
51 
52         default: {
53           size_t token_start = pos_;
54           SkipToken();
55           size_t token_end = (pos_ == std::string::npos) ? end_ : pos_;
56 
57           token_ = StringPiece(data_ + token_start, token_end - token_start);
58           token_is_ip_ = next_is_ip;
59 
60           return true;
61         }
62       }
63     }
64 
65     text_ = StringPiece();
66     return false;
67   }
68 
69   // Fast-forwards the parser to the next line.  Should be called if an IP
70   // address doesn't parse, to avoid wasting time tokenizing hostnames that
71   // will be ignored.
SkipRestOfLine()72   void SkipRestOfLine() {
73     pos_ = text_.find("\n", pos_);
74   }
75 
76   // Returns whether the last-parsed token is an IP address (true) or a
77   // hostname (false).
token_is_ip()78   bool token_is_ip() { return token_is_ip_; }
79 
80   // Returns the text of the last-parsed token as a StringPiece referencing
81   // the same underlying memory as the StringPiece passed to the constructor.
82   // Returns an empty StringPiece if no token has been parsed or the end of
83   // the input string has been reached.
token()84   const StringPiece& token() { return token_; }
85 
86  private:
SkipToken()87   void SkipToken() {
88     pos_ = text_.find_first_of(" \t\n\r#", pos_);
89   }
90 
SkipWhitespace()91   void SkipWhitespace() {
92     pos_ = text_.find_first_not_of(" \t", pos_);
93   }
94 
95   StringPiece text_;
96   const char* data_;
97   const size_t end_;
98 
99   size_t pos_;
100   StringPiece token_;
101   bool token_is_ip_;
102 
103   DISALLOW_COPY_AND_ASSIGN(HostsParser);
104 };
105 
106 
107 
ParseHosts(const std::string & contents,DnsHosts * dns_hosts)108 void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) {
109   CHECK(dns_hosts);
110   DnsHosts& hosts = *dns_hosts;
111 
112   StringPiece ip_text;
113   IPAddressNumber ip;
114   AddressFamily family = ADDRESS_FAMILY_IPV4;
115   HostsParser parser(contents);
116   while (parser.Advance()) {
117     if (parser.token_is_ip()) {
118       StringPiece new_ip_text = parser.token();
119       // Some ad-blocking hosts files contain thousands of entries pointing to
120       // the same IP address (usually 127.0.0.1).  Don't bother parsing the IP
121       // again if it's the same as the one above it.
122       if (new_ip_text != ip_text) {
123         IPAddressNumber new_ip;
124         if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) {
125           ip_text = new_ip_text;
126           ip.swap(new_ip);
127           family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6;
128         } else {
129           parser.SkipRestOfLine();
130         }
131       }
132     } else {
133       DnsHostsKey key(parser.token().as_string(), family);
134       StringToLowerASCII(&key.first);
135       IPAddressNumber& mapped_ip = hosts[key];
136       if (mapped_ip.empty())
137         mapped_ip = ip;
138       // else ignore this entry (first hit counts)
139     }
140   }
141 }
142 
ParseHostsFile(const base::FilePath & path,DnsHosts * dns_hosts)143 bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) {
144   dns_hosts->clear();
145   // Missing file indicates empty HOSTS.
146   if (!base::PathExists(path))
147     return true;
148 
149   int64 size;
150   if (!base::GetFileSize(path, &size))
151     return false;
152 
153   UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size);
154 
155   // Reject HOSTS files larger than |kMaxHostsSize| bytes.
156   const int64 kMaxHostsSize = 1 << 25;  // 32MB
157   if (size > kMaxHostsSize)
158     return false;
159 
160   std::string contents;
161   if (!base::ReadFileToString(path, &contents))
162     return false;
163 
164   ParseHosts(contents, dns_hosts);
165   return true;
166 }
167 
168 }  // namespace net
169 
170