1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/extensions/activity_log/uma_policy.h"
6
7 #include "base/metrics/histogram.h"
8 #include "base/strings/stringprintf.h"
9 #include "chrome/browser/browser_process.h"
10 #include "chrome/browser/extensions/active_script_controller.h"
11 #include "chrome/browser/extensions/activity_log/activity_action_constants.h"
12 #include "chrome/browser/extensions/activity_log/ad_network_database.h"
13 #include "chrome/browser/sessions/session_id.h"
14 #include "chrome/browser/ui/browser.h"
15 #include "chrome/browser/ui/browser_list.h"
16 #include "chrome/browser/ui/tabs/tab_strip_model.h"
17 #include "chrome/common/url_constants.h"
18 #include "content/public/browser/web_contents.h"
19 #include "content/public/common/url_constants.h"
20 #include "extensions/browser/extension_registry.h"
21 #include "extensions/common/dom_action_types.h"
22 #include "extensions/common/extension.h"
23 #include "extensions/common/manifest.h"
24
25 namespace extensions {
26
27 namespace {
28
29 // For convenience.
30 const int kNoStatus = UmaPolicy::NONE;
31 const int kContentScript = 1 << UmaPolicy::CONTENT_SCRIPT;
32 const int kReadDom = 1 << UmaPolicy::READ_DOM;
33 const int kModifiedDom = 1 << UmaPolicy::MODIFIED_DOM;
34 const int kDomMethod = 1 << UmaPolicy::DOM_METHOD;
35 const int kDocumentWrite = 1 << UmaPolicy::DOCUMENT_WRITE;
36 const int kInnerHtml = 1 << UmaPolicy::INNER_HTML;
37 const int kCreatedScript = 1 << UmaPolicy::CREATED_SCRIPT;
38 const int kCreatedIframe = 1 << UmaPolicy::CREATED_IFRAME;
39 const int kCreatedDiv = 1 << UmaPolicy::CREATED_DIV;
40 const int kCreatedLink = 1 << UmaPolicy::CREATED_LINK;
41 const int kCreatedInput = 1 << UmaPolicy::CREATED_INPUT;
42 const int kCreatedEmbed = 1 << UmaPolicy::CREATED_EMBED;
43 const int kCreatedObject = 1 << UmaPolicy::CREATED_OBJECT;
44 const int kAdInjected = 1 << UmaPolicy::AD_INJECTED;
45 const int kAdRemoved = 1 << UmaPolicy::AD_REMOVED;
46 const int kAdReplaced = 1 << UmaPolicy::AD_REPLACED;
47 const int kAdLikelyInjected = 1 << UmaPolicy::AD_LIKELY_INJECTED;
48 const int kAdLikelyReplaced = 1 << UmaPolicy::AD_LIKELY_REPLACED;
49
50 // A mask of all the ad injection flags.
51 const int kAnyAdActivity = kAdInjected |
52 kAdRemoved |
53 kAdReplaced |
54 kAdLikelyInjected |
55 kAdLikelyReplaced;
56
57 } // namespace
58
59 // Class constants, also used in testing. --------------------------------------
60
61 const char UmaPolicy::kNumberOfTabs[] = "num_tabs";
62 const size_t UmaPolicy::kMaxTabsTracked = 50;
63
64 // Setup and shutdown. ---------------------------------------------------------
65
UmaPolicy(Profile * profile)66 UmaPolicy::UmaPolicy(Profile* profile)
67 : ActivityLogPolicy(profile), profile_(profile) {
68 DCHECK(!profile->IsOffTheRecord());
69 BrowserList::AddObserver(this);
70 }
71
~UmaPolicy()72 UmaPolicy::~UmaPolicy() {
73 BrowserList::RemoveObserver(this);
74 }
75
76 // Unlike the other policies, UmaPolicy can commit suicide directly because it
77 // doesn't have a dependency on a database.
Close()78 void UmaPolicy::Close() {
79 delete this;
80 }
81
82 // Process actions. ------------------------------------------------------------
83
ProcessAction(scoped_refptr<Action> action)84 void UmaPolicy::ProcessAction(scoped_refptr<Action> action) {
85 if (!action->page_url().is_valid() && !action->arg_url().is_valid())
86 return;
87 if (action->page_incognito() || action->arg_incognito())
88 return;
89 std::string url;
90 int status = MatchActionToStatus(action);
91 if (action->page_url().is_valid()) {
92 url = CleanURL(action->page_url());
93 } else if (status & kContentScript) {
94 // This is for the tabs.executeScript case.
95 url = CleanURL(action->arg_url());
96 }
97 if (url.empty())
98 return;
99
100 SiteMap::iterator site_lookup = url_status_.find(url);
101 if (site_lookup != url_status_.end())
102 site_lookup->second[action->extension_id()] |= status;
103 }
104
MatchActionToStatus(scoped_refptr<Action> action)105 int UmaPolicy::MatchActionToStatus(scoped_refptr<Action> action) {
106 if (action->action_type() == Action::ACTION_CONTENT_SCRIPT) {
107 return kContentScript;
108 } else if (action->action_type() == Action::ACTION_API_CALL &&
109 action->api_name() == "tabs.executeScript") {
110 return kContentScript;
111 } else if (action->action_type() != Action::ACTION_DOM_ACCESS) {
112 return kNoStatus;
113 }
114
115 int dom_verb;
116 if (!action->other() ||
117 !action->other()->GetIntegerWithoutPathExpansion(
118 activity_log_constants::kActionDomVerb, &dom_verb)) {
119 return kNoStatus;
120 }
121
122 int ret_bit = kNoStatus;
123 DomActionType::Type dom_type = static_cast<DomActionType::Type>(dom_verb);
124 if (dom_type == DomActionType::GETTER)
125 return kReadDom;
126 if (dom_type == DomActionType::SETTER) {
127 ret_bit |= kModifiedDom;
128 } else if (dom_type == DomActionType::METHOD) {
129 ret_bit |= kDomMethod;
130 } else {
131 return kNoStatus;
132 }
133
134 if (action->api_name() == "HTMLDocument.write" ||
135 action->api_name() == "HTMLDocument.writeln") {
136 ret_bit |= kDocumentWrite;
137 } else if (action->api_name() == "Element.innerHTML") {
138 ret_bit |= kInnerHtml;
139 } else if (action->api_name() == "Document.createElement") {
140 std::string arg;
141 action->args()->GetString(0, &arg);
142 if (arg == "script") {
143 ret_bit |= kCreatedScript;
144 } else if (arg == "iframe") {
145 ret_bit |= kCreatedIframe;
146 } else if (arg == "div") {
147 ret_bit |= kCreatedDiv;
148 } else if (arg == "a") {
149 ret_bit |= kCreatedLink;
150 } else if (arg == "input") {
151 ret_bit |= kCreatedInput;
152 } else if (arg == "embed") {
153 ret_bit |= kCreatedEmbed;
154 } else if (arg == "object") {
155 ret_bit |= kCreatedObject;
156 }
157 }
158
159 const Action::InjectionType ad_injection =
160 action->DidInjectAd(g_browser_process->rappor_service());
161 switch (ad_injection) {
162 case Action::INJECTION_NEW_AD:
163 ret_bit |= kAdInjected;
164 break;
165 case Action::INJECTION_REMOVED_AD:
166 ret_bit |= kAdRemoved;
167 break;
168 case Action::INJECTION_REPLACED_AD:
169 ret_bit |= kAdReplaced;
170 break;
171 case Action::INJECTION_LIKELY_NEW_AD:
172 ret_bit |= kAdLikelyInjected;
173 break;
174 case Action::INJECTION_LIKELY_REPLACED_AD:
175 ret_bit |= kAdLikelyReplaced;
176 break;
177 case Action::NO_AD_INJECTION:
178 break;
179 case Action::NUM_INJECTION_TYPES:
180 NOTREACHED();
181 }
182
183 return ret_bit;
184 }
185
HistogramOnClose(const std::string & cleaned_url,content::WebContents * web_contents)186 void UmaPolicy::HistogramOnClose(const std::string& cleaned_url,
187 content::WebContents* web_contents) {
188 // Let's try to avoid histogramming useless URLs.
189 if (cleaned_url.empty() || cleaned_url == url::kAboutBlankURL ||
190 cleaned_url == chrome::kChromeUINewTabURL)
191 return;
192
193 int statuses[MAX_STATUS - 1];
194 std::memset(statuses, 0, sizeof(statuses));
195
196 ActiveScriptController* active_script_controller =
197 ActiveScriptController::GetForWebContents(web_contents);
198 SiteMap::iterator site_lookup = url_status_.find(cleaned_url);
199 const ExtensionMap& exts = site_lookup->second;
200 std::set<std::string> ad_injectors;
201 for (ExtensionMap::const_iterator ext_iter = exts.begin();
202 ext_iter != exts.end();
203 ++ext_iter) {
204 if (ext_iter->first == kNumberOfTabs)
205 continue;
206 for (int i = NONE + 1; i < MAX_STATUS; ++i) {
207 if (ext_iter->second & (1 << i))
208 statuses[i-1]++;
209 }
210
211 if (ext_iter->second & kAnyAdActivity)
212 ad_injectors.insert(ext_iter->first);
213 }
214 if (active_script_controller)
215 active_script_controller->OnAdInjectionDetected(ad_injectors);
216
217 ExtensionRegistry* registry = ExtensionRegistry::Get(profile_);
218 for (std::set<std::string>::const_iterator iter = ad_injectors.begin();
219 iter != ad_injectors.end();
220 ++iter) {
221 const Extension* extension =
222 registry->GetExtensionById(*iter, ExtensionRegistry::EVERYTHING);
223 if (extension) {
224 UMA_HISTOGRAM_ENUMERATION("Extensions.AdInjection.InstallLocation",
225 extension->location(),
226 Manifest::NUM_LOCATIONS);
227 }
228 }
229
230 std::string prefix = "ExtensionActivity.";
231 if (GURL(cleaned_url).host() != "www.google.com") {
232 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT),
233 statuses[CONTENT_SCRIPT - 1]);
234 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(READ_DOM),
235 statuses[READ_DOM - 1]);
236 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(MODIFIED_DOM),
237 statuses[MODIFIED_DOM - 1]);
238 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOM_METHOD),
239 statuses[DOM_METHOD - 1]);
240 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOCUMENT_WRITE),
241 statuses[DOCUMENT_WRITE - 1]);
242 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(INNER_HTML),
243 statuses[INNER_HTML - 1]);
244 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_SCRIPT),
245 statuses[CREATED_SCRIPT - 1]);
246 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_IFRAME),
247 statuses[CREATED_IFRAME - 1]);
248 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_DIV),
249 statuses[CREATED_DIV - 1]);
250 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_LINK),
251 statuses[CREATED_LINK - 1]);
252 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_INPUT),
253 statuses[CREATED_INPUT - 1]);
254 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_EMBED),
255 statuses[CREATED_EMBED - 1]);
256 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_OBJECT),
257 statuses[CREATED_OBJECT - 1]);
258 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_INJECTED),
259 statuses[AD_INJECTED - 1]);
260 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REMOVED),
261 statuses[AD_REMOVED - 1]);
262 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REPLACED),
263 statuses[AD_REPLACED - 1]);
264 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_INJECTED),
265 statuses[AD_LIKELY_INJECTED - 1]);
266 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_REPLACED),
267 statuses[AD_LIKELY_REPLACED - 1]);
268 } else {
269 prefix += "Google.";
270 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT),
271 statuses[CONTENT_SCRIPT - 1]);
272 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(READ_DOM),
273 statuses[READ_DOM - 1]);
274 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(MODIFIED_DOM),
275 statuses[MODIFIED_DOM - 1]);
276 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOM_METHOD),
277 statuses[DOM_METHOD - 1]);
278 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOCUMENT_WRITE),
279 statuses[DOCUMENT_WRITE - 1]);
280 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(INNER_HTML),
281 statuses[INNER_HTML - 1]);
282 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_SCRIPT),
283 statuses[CREATED_SCRIPT - 1]);
284 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_IFRAME),
285 statuses[CREATED_IFRAME - 1]);
286 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_DIV),
287 statuses[CREATED_DIV - 1]);
288 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_LINK),
289 statuses[CREATED_LINK - 1]);
290 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_INPUT),
291 statuses[CREATED_INPUT - 1]);
292 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_EMBED),
293 statuses[CREATED_EMBED - 1]);
294 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_OBJECT),
295 statuses[CREATED_OBJECT - 1]);
296 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_INJECTED),
297 statuses[AD_INJECTED - 1]);
298 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REMOVED),
299 statuses[AD_REMOVED - 1]);
300 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REPLACED),
301 statuses[AD_REPLACED - 1]);
302 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_INJECTED),
303 statuses[AD_LIKELY_INJECTED - 1]);
304 UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_REPLACED),
305 statuses[AD_LIKELY_REPLACED - 1]);
306 }
307 }
308
309 // Handle tab tracking. --------------------------------------------------------
310
OnBrowserAdded(Browser * browser)311 void UmaPolicy::OnBrowserAdded(Browser* browser) {
312 if (!profile_->IsSameProfile(browser->profile()))
313 return;
314 browser->tab_strip_model()->AddObserver(this);
315 }
316
OnBrowserRemoved(Browser * browser)317 void UmaPolicy::OnBrowserRemoved(Browser* browser) {
318 if (!profile_->IsSameProfile(browser->profile()))
319 return;
320 browser->tab_strip_model()->RemoveObserver(this);
321 }
322
323 // Use the value from SessionID::IdForTab, *not* |index|. |index| will be
324 // duplicated across tabs in a session, whereas IdForTab uniquely identifies
325 // each tab.
TabChangedAt(content::WebContents * contents,int index,TabChangeType change_type)326 void UmaPolicy::TabChangedAt(content::WebContents* contents,
327 int index,
328 TabChangeType change_type) {
329 if (change_type != TabStripModelObserver::LOADING_ONLY)
330 return;
331 if (!contents)
332 return;
333
334 std::string url = CleanURL(contents->GetLastCommittedURL());
335 int32 tab_id = SessionID::IdForTab(contents);
336
337 std::map<int32, std::string>::iterator tab_it = tab_list_.find(tab_id);
338
339 // Ignore tabs that haven't changed status.
340 if (tab_it != tab_list_.end() && tab_it->second == url)
341 return;
342
343 // Is this an existing tab whose URL has changed.
344 if (tab_it != tab_list_.end()) {
345 CleanupClosedPage(tab_it->second, contents);
346 tab_list_.erase(tab_id);
347 }
348
349 // Check that tab_list_ isn't over the kMaxTabsTracked budget.
350 if (tab_list_.size() >= kMaxTabsTracked)
351 return;
352
353 // Set up the new entries.
354 tab_list_[tab_id] = url;
355 SetupOpenedPage(url);
356 }
357
358 // Use the value from SessionID::IdForTab, *not* |index|. |index| will be
359 // duplicated across tabs in a session, whereas IdForTab uniquely identifies
360 // each tab.
TabClosingAt(TabStripModel * tab_strip_model,content::WebContents * contents,int index)361 void UmaPolicy::TabClosingAt(TabStripModel* tab_strip_model,
362 content::WebContents* contents,
363 int index) {
364 if (!contents)
365 return;
366 std::string url = CleanURL(contents->GetLastCommittedURL());
367 int32 tab_id = SessionID::IdForTab(contents);
368 std::map<int, std::string>::iterator tab_it = tab_list_.find(tab_id);
369 if (tab_it != tab_list_.end())
370 tab_list_.erase(tab_id);
371
372 CleanupClosedPage(url, contents);
373 }
374
SetupOpenedPage(const std::string & url)375 void UmaPolicy::SetupOpenedPage(const std::string& url) {
376 url_status_[url][kNumberOfTabs]++;
377 }
378
CleanupClosedPage(const std::string & cleaned_url,content::WebContents * web_contents)379 void UmaPolicy::CleanupClosedPage(const std::string& cleaned_url,
380 content::WebContents* web_contents) {
381 SiteMap::iterator old_site_lookup = url_status_.find(cleaned_url);
382 if (old_site_lookup == url_status_.end())
383 return;
384 old_site_lookup->second[kNumberOfTabs]--;
385 if (old_site_lookup->second[kNumberOfTabs] == 0) {
386 HistogramOnClose(cleaned_url, web_contents);
387 url_status_.erase(cleaned_url);
388 }
389 }
390
391 // Helpers. --------------------------------------------------------------------
392
393 // We don't want to treat # ref navigations as if they were new pageloads.
394 // So we get rid of the ref if it has it.
395 // We convert to a string in the hopes that this is faster than Replacements.
CleanURL(const GURL & gurl)396 std::string UmaPolicy::CleanURL(const GURL& gurl) {
397 if (gurl.spec().empty())
398 return GURL(url::kAboutBlankURL).spec();
399 if (!gurl.is_valid())
400 return gurl.spec();
401 if (!gurl.has_ref())
402 return gurl.spec();
403 std::string port = "";
404 if (gurl.has_port())
405 port = ":" + gurl.port();
406 std::string query = "";
407 if (gurl.has_query())
408 query = "?" + gurl.query();
409 return base::StringPrintf("%s://%s%s%s%s",
410 gurl.scheme().c_str(),
411 gurl.host().c_str(),
412 port.c_str(),
413 gurl.path().c_str(),
414 query.c_str());
415 }
416
GetHistogramName(PageStatus status)417 const char* UmaPolicy::GetHistogramName(PageStatus status) {
418 switch (status) {
419 case CONTENT_SCRIPT:
420 return "ContentScript";
421 case READ_DOM:
422 return "ReadDom";
423 case MODIFIED_DOM:
424 return "ModifiedDom";
425 case DOM_METHOD:
426 return "InvokedDomMethod";
427 case DOCUMENT_WRITE:
428 return "DocumentWrite";
429 case INNER_HTML:
430 return "InnerHtml";
431 case CREATED_SCRIPT:
432 return "CreatedScript";
433 case CREATED_IFRAME:
434 return "CreatedIframe";
435 case CREATED_DIV:
436 return "CreatedDiv";
437 case CREATED_LINK:
438 return "CreatedLink";
439 case CREATED_INPUT:
440 return "CreatedInput";
441 case CREATED_EMBED:
442 return "CreatedEmbed";
443 case CREATED_OBJECT:
444 return "CreatedObject";
445 case AD_INJECTED:
446 return "AdInjected";
447 case AD_REMOVED:
448 return "AdRemoved";
449 case AD_REPLACED:
450 return "AdReplaced";
451 case AD_LIKELY_INJECTED:
452 return "AdLikelyInjected";
453 case AD_LIKELY_REPLACED:
454 return "AdLikelyReplaced";
455 case NONE:
456 case MAX_STATUS:
457 default:
458 NOTREACHED();
459 return "";
460 }
461 }
462
463 } // namespace extensions
464