1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/prerender/prerender_util.h"
6
7 #include "base/logging.h"
8 #include "base/metrics/histogram.h"
9 #include "base/metrics/sparse_histogram.h"
10 #include "base/strings/string_util.h"
11 #include "content/public/browser/resource_request_info.h"
12 #include "net/http/http_response_headers.h"
13 #include "net/url_request/url_request.h"
14 #include "url/url_canon.h"
15 #include "url/url_parse.h"
16 #include "url/url_util.h"
17 #include "webkit/common/resource_type.h"
18
19 namespace prerender {
20
21 namespace {
22
23 const char kModPagespeedHeader[] = "X-Mod-Pagespeed";
24 const char kPageSpeedHeader[] = "X-Page-Speed";
25 const char kPagespeedServerHistogram[] =
26 "Prerender.PagespeedHeader.ServerCounts";
27 const char kPagespeedVersionHistogram[] =
28 "Prerender.PagespeedHeader.VersionCounts";
29
30 enum PagespeedHeaderServerType {
31 PAGESPEED_TOTAL_RESPONSES = 0,
32 PAGESPEED_MOD_PAGESPEED_SERVER = 1,
33 PAGESPEED_NGX_PAGESPEED_SERVER = 2,
34 PAGESPEED_PAGESPEED_SERVICE_SERVER = 3,
35 PAGESPEED_UNKNOWN_SERVER = 4,
36 PAGESPEED_SERVER_MAXIMUM = 5
37 };
38
39 // Private function to parse the PageSpeed version number and encode it in
40 // buckets 2 through 99: if it is in the format a.b.c.d-e the bucket will be
41 // 2 + 2 * (max(c, 10) - 10) + (d > 1 ? 1 : 0); if it is not in this format
42 // we return zero.
GetXModPagespeedBucketFromVersion(const std::string & version)43 int GetXModPagespeedBucketFromVersion(const std::string& version) {
44 int a, b, c, d, e;
45 int num_parsed = sscanf(version.c_str(), "%d.%d.%d.%d-%d",
46 &a, &b, &c, &d, &e);
47 int output = 0;
48 if (num_parsed == 5) {
49 output = 2;
50 if (c > 10)
51 output += 2 * (c - 10);
52 if (d > 1)
53 output++;
54 if (output < 2 || output > 99)
55 output = 0;
56 }
57 return output;
58 }
59
60 // Private function to parse the X-Page-Speed header value and determine
61 // whether it is in the PageSpeed Service format, namely m_n_dc were m_n is
62 // a version number and dc is an encoded 2-character value.
IsPageSpeedServiceVersionNumber(const std::string & version)63 bool IsPageSpeedServiceVersionNumber(const std::string& version) {
64 int a, b;
65 char c, d, e; // e is to detect EOL as we check that it /isn't/ converted.
66 int num_parsed = sscanf(version.c_str(), "%d_%d_%c%c%c", &a, &b, &c, &d, &e);
67 return (num_parsed == 4);
68 }
69
70 enum PrerenderSchemeCancelReason {
71 PRERENDER_SCHEME_CANCEL_REASON_EXTERNAL_PROTOCOL,
72 PRERENDER_SCHEME_CANCEL_REASON_DATA,
73 PRERENDER_SCHEME_CANCEL_REASON_BLOB,
74 PRERENDER_SCHEME_CANCEL_REASON_FILE,
75 PRERENDER_SCHEME_CANCEL_REASON_FILESYSTEM,
76 PRERENDER_SCHEME_CANCEL_REASON_WEBSOCKET,
77 PRERENDER_SCHEME_CANCEL_REASON_FTP,
78 PRERENDER_SCHEME_CANCEL_REASON_CHROME,
79 PRERENDER_SCHEME_CANCEL_REASON_CHROME_EXTENSION,
80 PRERENDER_SCHEME_CANCEL_REASON_ABOUT,
81 PRERENDER_SCHEME_CANCEL_REASON_UNKNOWN,
82 PRERENDER_SCHEME_CANCEL_REASON_MAX,
83 };
84
ReportPrerenderSchemeCancelReason(PrerenderSchemeCancelReason reason)85 void ReportPrerenderSchemeCancelReason(PrerenderSchemeCancelReason reason) {
86 UMA_HISTOGRAM_ENUMERATION(
87 "Prerender.SchemeCancelReason", reason,
88 PRERENDER_SCHEME_CANCEL_REASON_MAX);
89 }
90
91 } // namespace
92
93 const char kChromeNavigateExtraDataKey[] = "chrome_navigate";
94
MaybeGetQueryStringBasedAliasURL(const GURL & url,GURL * alias_url)95 bool MaybeGetQueryStringBasedAliasURL(
96 const GURL& url, GURL* alias_url) {
97 DCHECK(alias_url);
98 url::Parsed parsed;
99 url::ParseStandardURL(url.spec().c_str(), url.spec().length(), &parsed);
100 url::Component query = parsed.query;
101 url::Component key, value;
102 while (url::ExtractQueryKeyValue(url.spec().c_str(), &query, &key, &value)) {
103 if (key.len != 3 || strncmp(url.spec().c_str() + key.begin, "url", key.len))
104 continue;
105 // We found a url= query string component.
106 if (value.len < 1)
107 continue;
108 url::RawCanonOutputW<1024> decoded_url;
109 url::DecodeURLEscapeSequences(url.spec().c_str() + value.begin, value.len,
110 &decoded_url);
111 GURL new_url(base::string16(decoded_url.data(), decoded_url.length()));
112 if (!new_url.is_empty() && new_url.is_valid()) {
113 *alias_url = new_url;
114 return true;
115 }
116 return false;
117 }
118 return false;
119 }
120
GetQueryStringBasedExperiment(const GURL & url)121 uint8 GetQueryStringBasedExperiment(const GURL& url) {
122 url::Parsed parsed;
123 url::ParseStandardURL(url.spec().c_str(), url.spec().length(), &parsed);
124 url::Component query = parsed.query;
125 url::Component key, value;
126 while (url::ExtractQueryKeyValue(url.spec().c_str(), &query, &key, &value)) {
127 if (key.len != 3 || strncmp(url.spec().c_str() + key.begin, "lpe", key.len))
128 continue;
129
130 // We found a lpe= query string component.
131 if (value.len != 1)
132 continue;
133 uint8 exp = *(url.spec().c_str() + value.begin) - '0';
134 if (exp < 1 || exp > 9)
135 continue;
136 return exp;
137 }
138 return kNoExperiment;
139 }
140
IsGoogleDomain(const GURL & url)141 bool IsGoogleDomain(const GURL& url) {
142 return StartsWithASCII(url.host(), std::string("www.google."), true);
143 }
144
IsGoogleSearchResultURL(const GURL & url)145 bool IsGoogleSearchResultURL(const GURL& url) {
146 if (!IsGoogleDomain(url))
147 return false;
148 return (url.path().empty() ||
149 StartsWithASCII(url.path(), std::string("/search"), true) ||
150 (url.path() == "/") ||
151 StartsWithASCII(url.path(), std::string("/webhp"), true));
152 }
153
IsNoSwapInExperiment(uint8 experiment_id)154 bool IsNoSwapInExperiment(uint8 experiment_id) {
155 // Currently, experiments 5 and 6 fall in this category.
156 return experiment_id == 5 || experiment_id == 6;
157 }
158
IsControlGroupExperiment(uint8 experiment_id)159 bool IsControlGroupExperiment(uint8 experiment_id) {
160 // Currently, experiments 7 and 8 fall in this category.
161 return experiment_id == 7 || experiment_id == 8;
162 }
163
GatherPagespeedData(const ResourceType::Type resource_type,const GURL & request_url,const net::HttpResponseHeaders * response_headers)164 void GatherPagespeedData(const ResourceType::Type resource_type,
165 const GURL& request_url,
166 const net::HttpResponseHeaders* response_headers) {
167 if (resource_type != ResourceType::MAIN_FRAME ||
168 !request_url.SchemeIsHTTPOrHTTPS())
169 return;
170
171 // bucket 0 counts every response seen.
172 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
173 PAGESPEED_TOTAL_RESPONSES,
174 PAGESPEED_SERVER_MAXIMUM);
175 if (!response_headers)
176 return;
177
178 void* iter = NULL;
179 std::string name;
180 std::string value;
181 while (response_headers->EnumerateHeaderLines(&iter, &name, &value)) {
182 if (name == kModPagespeedHeader) {
183 // Bucket 1 counts occurences of the X-Mod-Pagespeed header.
184 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
185 PAGESPEED_MOD_PAGESPEED_SERVER,
186 PAGESPEED_SERVER_MAXIMUM);
187 if (!value.empty()) {
188 // If the header value is in the X-Mod-Pagespeed version number format
189 // then increment the appropriate bucket, otherwise increment bucket 1,
190 // which is the catch-all "unknown version number" bucket.
191 int bucket = GetXModPagespeedBucketFromVersion(value);
192 if (bucket > 0) {
193 UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, bucket);
194 } else {
195 UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, 1);
196 }
197 }
198 break;
199 } else if (name == kPageSpeedHeader) {
200 // X-Page-Speed header versions are either in the X-Mod-Pagespeed format,
201 // indicating an nginx installation, or they're in the PageSpeed Service
202 // format, indicating a PSS installation, or in some other format,
203 // indicating an unknown installation [possibly IISpeed].
204 if (!value.empty()) {
205 int bucket = GetXModPagespeedBucketFromVersion(value);
206 if (bucket > 0) {
207 // Bucket 2 counts occurences of the X-Page-Speed header with a
208 // value in the X-Mod-Pagespeed version number format. We also
209 // count these responses in the version histogram.
210 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
211 PAGESPEED_NGX_PAGESPEED_SERVER,
212 PAGESPEED_SERVER_MAXIMUM);
213 UMA_HISTOGRAM_SPARSE_SLOWLY(kPagespeedVersionHistogram, bucket);
214 } else if (IsPageSpeedServiceVersionNumber(value)) {
215 // Bucket 3 counts occurences of the X-Page-Speed header with a
216 // value in the PageSpeed Service version number format.
217 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
218 PAGESPEED_PAGESPEED_SERVICE_SERVER,
219 PAGESPEED_SERVER_MAXIMUM);
220 } else {
221 // Bucket 4 counts occurences of all other values.
222 UMA_HISTOGRAM_ENUMERATION(kPagespeedServerHistogram,
223 PAGESPEED_UNKNOWN_SERVER,
224 PAGESPEED_SERVER_MAXIMUM);
225 }
226 }
227 break;
228 }
229 }
230 }
231
URLRequestResponseStarted(net::URLRequest * request)232 void URLRequestResponseStarted(net::URLRequest* request) {
233 const content::ResourceRequestInfo* info =
234 content::ResourceRequestInfo::ForRequest(request);
235 GatherPagespeedData(info->GetResourceType(),
236 request->url(),
237 request->response_headers());
238 }
239
ReportPrerenderExternalURL()240 void ReportPrerenderExternalURL() {
241 ReportPrerenderSchemeCancelReason(
242 PRERENDER_SCHEME_CANCEL_REASON_EXTERNAL_PROTOCOL);
243 }
244
ReportUnsupportedPrerenderScheme(const GURL & url)245 void ReportUnsupportedPrerenderScheme(const GURL& url) {
246 if (url.SchemeIs("data")) {
247 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_DATA);
248 } else if (url.SchemeIs("blob")) {
249 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_BLOB);
250 } else if (url.SchemeIsFile()) {
251 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_FILE);
252 } else if (url.SchemeIsFileSystem()) {
253 ReportPrerenderSchemeCancelReason(
254 PRERENDER_SCHEME_CANCEL_REASON_FILESYSTEM);
255 } else if (url.SchemeIs("ws") || url.SchemeIs("wss")) {
256 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_WEBSOCKET);
257 } else if (url.SchemeIs("ftp")) {
258 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_FTP);
259 } else if (url.SchemeIs("chrome")) {
260 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_CHROME);
261 } else if (url.SchemeIs("chrome-extension")) {
262 ReportPrerenderSchemeCancelReason(
263 PRERENDER_SCHEME_CANCEL_REASON_CHROME_EXTENSION);
264 } else if (url.SchemeIs("about")) {
265 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_ABOUT);
266 } else {
267 ReportPrerenderSchemeCancelReason(PRERENDER_SCHEME_CANCEL_REASON_UNKNOWN);
268 }
269 }
270
271 } // namespace prerender
272