1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "webkit/glue/site_isolation_metrics.h"
6
7 #include <set>
8
9 #include "base/hash_tables.h"
10 #include "base/metrics/histogram.h"
11 #include "net/base/mime_sniffer.h"
12 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"
13 #include "third_party/WebKit/Source/WebKit/chromium/public/WebSecurityOrigin.h"
14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h"
15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"
16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURLRequest.h"
17 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURLResponse.h"
18
19 using WebKit::WebFrame;
20 using WebKit::WebSecurityOrigin;
21 using WebKit::WebString;
22 using WebKit::WebURL;
23 using WebKit::WebURLRequest;
24 using WebKit::WebURLResponse;
25
26 namespace webkit_glue {
27
28 typedef base::hash_map<unsigned, WebURLRequest::TargetType> TargetTypeMap;
29 typedef base::hash_map<std::string, int> MimeTypeMap;
30 typedef std::set<std::string> CrossOriginTextHtmlResponseSet;
31
GetTargetTypeMap()32 static TargetTypeMap* GetTargetTypeMap() {
33 static TargetTypeMap target_type_map_;
34 return &target_type_map_;
35 }
36
37 // Copied from net/base/mime_util.cc, supported_non_image_types[]
38 static const char* const kCrossOriginMimeTypesToLog[] = {
39 "text/cache-manifest",
40 "text/html",
41 "text/xml",
42 "text/xsl",
43 "text/plain",
44 "text/vnd.chromium.ftp-dir",
45 "text/",
46 "text/css",
47 "image/svg+xml",
48 "application/xml",
49 "application/xhtml+xml",
50 "application/rss+xml",
51 "application/atom+xml",
52 "application/json",
53 "application/x-x509-user-cert",
54 "multipart/x-mixed-replace",
55 "(NONE)" // Keep track of missing MIME types as well
56 };
57
GetMimeTypeMap()58 static MimeTypeMap* GetMimeTypeMap() {
59 static MimeTypeMap mime_type_map_;
60 if (!mime_type_map_.size()) {
61 for (size_t i = 0; i < arraysize(kCrossOriginMimeTypesToLog); ++i)
62 mime_type_map_[kCrossOriginMimeTypesToLog[i]] = i;
63 }
64 return &mime_type_map_;
65 }
66
67 // This is set is used to keep track of the response urls that we want to
68 // sniff, since we will have to wait for the payload to arrive.
GetCrossOriginTextHtmlResponseSet()69 static CrossOriginTextHtmlResponseSet* GetCrossOriginTextHtmlResponseSet() {
70 static CrossOriginTextHtmlResponseSet cross_origin_text_html_response_set_;
71 return &cross_origin_text_html_response_set_;
72 }
73
LogVerifiedTextHtmlResponse()74 static void LogVerifiedTextHtmlResponse() {
75 UMA_HISTOGRAM_COUNTS(
76 "SiteIsolation.CrossSiteNonFrameResponse_verified_texthtml_BLOCK", 1);
77 }
78
LogMislabeledTextHtmlResponse()79 static void LogMislabeledTextHtmlResponse() {
80 UMA_HISTOGRAM_COUNTS(
81 "SiteIsolation.CrossSiteNonFrameResponse_mislabeled_texthtml", 1);
82 }
83
AddRequest(unsigned identifier,WebURLRequest::TargetType target_type)84 void SiteIsolationMetrics::AddRequest(unsigned identifier,
85 WebURLRequest::TargetType target_type) {
86 TargetTypeMap& target_type_map = *GetTargetTypeMap();
87 target_type_map[identifier] = target_type;
88 }
89
90 // Check whether the given response is allowed due to access control headers.
91 // This is basically a copy of the logic of passesAccessControlCheck() in
92 // WebCore/loader/CrossOriginAccessControl.cpp.
AllowedByAccessControlHeader(WebFrame * frame,const WebURLResponse & response)93 bool SiteIsolationMetrics::AllowedByAccessControlHeader(
94 WebFrame* frame, const WebURLResponse& response) {
95 WebString access_control_origin = response.httpHeaderField(
96 WebString::fromUTF8("Access-Control-Allow-Origin"));
97 WebSecurityOrigin security_origin =
98 WebSecurityOrigin::createFromString(access_control_origin);
99 return access_control_origin == WebString::fromUTF8("*") ||
100 frame->securityOrigin().canAccess(security_origin);
101 }
102
103 // We want to log any cross-site request that we don't think a renderer should
104 // be allowed to make. We can safely ignore frame requests (since we'd like
105 // those to be in a separate renderer) and plugin requests, even if they are
106 // cross-origin.
107 //
108 // For comparison, we keep counts of:
109 // - All requests made by a renderer
110 // - All cross-site requests
111 //
112 // Then, for cross-site non-frame/plugin requests, we keep track of:
113 // - Counts for MIME types of interest
114 // - Counts of those MIME types that carry CORS headers
115 // - Counts of mislabeled text/html responses (without CORS)
116 // As well as those we would block:
117 // - Counts of verified text/html responses (without CORS)
118 // - Counts of XML/JSON responses (without CORS)
119 //
120 // This will let us say what percentage of requests we would end up blocking.
LogMimeTypeForCrossOriginRequest(WebFrame * frame,unsigned identifier,const WebURLResponse & response)121 void SiteIsolationMetrics::LogMimeTypeForCrossOriginRequest(
122 WebFrame* frame, unsigned identifier, const WebURLResponse& response) {
123 UMA_HISTOGRAM_COUNTS("SiteIsolation.Requests", 1);
124
125 TargetTypeMap& target_type_map = *GetTargetTypeMap();
126 TargetTypeMap::iterator iter = target_type_map.find(identifier);
127 if (iter != target_type_map.end()) {
128 WebURLRequest::TargetType target_type = iter->second;
129 target_type_map.erase(iter);
130
131 // Focus on cross-site requests.
132 if (!frame->securityOrigin().canAccess(
133 WebSecurityOrigin::create(response.url()))) {
134 UMA_HISTOGRAM_COUNTS("SiteIsolation.CrossSiteRequests", 1);
135
136 // Now focus on non-frame, non-plugin requests.
137 if (target_type != WebURLRequest::TargetIsMainFrame &&
138 target_type != WebURLRequest::TargetIsSubframe &&
139 target_type != WebURLRequest::TargetIsObject) {
140 // If it is part of a MIME type we might block, log the MIME type.
141 std::string mime_type = response.mimeType().utf8();
142 MimeTypeMap mime_type_map = *GetMimeTypeMap();
143 // Also track it if it lacks a MIME type.
144 // TODO(creis): 304 responses have no MIME type, so we don't handle
145 // them correctly. Can we look up their MIME type from the cache?
146 if (mime_type == "")
147 mime_type = "(NONE)";
148 MimeTypeMap::iterator mime_type_iter = mime_type_map.find(mime_type);
149 if (mime_type_iter != mime_type_map.end()) {
150 UMA_HISTOGRAM_ENUMERATION(
151 "SiteIsolation.CrossSiteNonFrameResponse_MIME_Type",
152 mime_type_iter->second,
153 arraysize(kCrossOriginMimeTypesToLog));
154
155 // We also check access control headers, in case this
156 // cross-origin request has been explicitly permitted.
157 if (AllowedByAccessControlHeader(frame, response)) {
158 UMA_HISTOGRAM_ENUMERATION(
159 "SiteIsolation.CrossSiteNonFrameResponse_With_CORS_MIME_Type",
160 mime_type_iter->second,
161 arraysize(kCrossOriginMimeTypesToLog));
162 } else {
163 // Without access control headers, we might block this request.
164 // Sometimes resources are mislabled as text/html, though, and we
165 // should only block them if we can verify that. To do so, we sniff
166 // the content once we have some of the payload.
167 if (mime_type == "text/html") {
168 // Remember the response until we can sniff its contents.
169 GetCrossOriginTextHtmlResponseSet()->insert(
170 response.url().spec());
171 } else if (mime_type == "text/xml" ||
172 mime_type == "text/xsl" ||
173 mime_type == "application/xml" ||
174 mime_type == "application/xhtml+xml" ||
175 mime_type == "application/rss+xml" ||
176 mime_type == "application/atom+xml" ||
177 mime_type == "application/json") {
178 // We will also block XML and JSON MIME types for cross-site
179 // non-frame requests without CORS headers.
180 UMA_HISTOGRAM_COUNTS(
181 "SiteIsolation.CrossSiteNonFrameResponse_xml_or_json_BLOCK",
182 1);
183 }
184 }
185 }
186 }
187 }
188 }
189 }
190
SniffCrossOriginHTML(const WebURL & response_url,const char * data,int len)191 void SiteIsolationMetrics::SniffCrossOriginHTML(const WebURL& response_url,
192 const char* data,
193 int len) {
194 if (!response_url.isValid())
195 return;
196
197 // Look up the URL to see if it is a text/html request we are tracking.
198 CrossOriginTextHtmlResponseSet& cross_origin_text_html_response_set =
199 *GetCrossOriginTextHtmlResponseSet();
200 CrossOriginTextHtmlResponseSet::iterator request_iter =
201 cross_origin_text_html_response_set.find(response_url.spec());
202 if (request_iter != cross_origin_text_html_response_set.end()) {
203 // Log whether it actually looks like HTML.
204 std::string sniffed_mime_type;
205 bool successful = net::SniffMimeType(data, len, response_url,
206 "", &sniffed_mime_type);
207 if (successful && sniffed_mime_type == "text/html")
208 LogVerifiedTextHtmlResponse();
209 else
210 LogMislabeledTextHtmlResponse();
211 cross_origin_text_html_response_set.erase(request_iter);
212 }
213 }
214
RemoveCompletedResponse(const WebURL & response_url)215 void SiteIsolationMetrics::RemoveCompletedResponse(
216 const WebURL& response_url) {
217 if (!response_url.isValid())
218 return;
219
220 // Ensure we don't leave responses in the set after they've completed.
221 CrossOriginTextHtmlResponseSet& cross_origin_text_html_response_set =
222 *GetCrossOriginTextHtmlResponseSet();
223 CrossOriginTextHtmlResponseSet::iterator request_iter =
224 cross_origin_text_html_response_set.find(response_url.spec());
225 if (request_iter != cross_origin_text_html_response_set.end()) {
226 LogMislabeledTextHtmlResponse();
227 cross_origin_text_html_response_set.erase(request_iter);
228 }
229 }
230
231 } // namespace webkit_glue
232