1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/extensions/activity_log/activity_actions.h"
6
7 #include <algorithm> // for std::find.
8 #include <string>
9
10 #include "base/command_line.h"
11 #include "base/format_macros.h"
12 #include "base/json/json_string_value_serializer.h"
13 #include "base/logging.h"
14 #include "base/macros.h"
15 #include "base/memory/singleton.h"
16 #include "base/metrics/histogram.h"
17 #include "base/strings/string_number_conversions.h"
18 #include "base/strings/string_util.h"
19 #include "base/strings/stringprintf.h"
20 #include "base/values.h"
21 #include "chrome/browser/extensions/activity_log/activity_action_constants.h"
22 #include "chrome/browser/extensions/activity_log/ad_network_database.h"
23 #include "chrome/browser/extensions/activity_log/fullstream_ui_policy.h"
24 #include "chrome/browser/ui/browser.h"
25 #include "chrome/common/chrome_switches.h"
26 #include "components/rappor/rappor_service.h"
27 #include "content/public/browser/web_contents.h"
28 #include "extensions/common/ad_injection_constants.h"
29 #include "extensions/common/constants.h"
30 #include "extensions/common/dom_action_types.h"
31 #include "sql/statement.h"
32 #include "url/gurl.h"
33
34 namespace constants = activity_log_constants;
35
36 namespace extensions {
37
38 namespace {
39
40 namespace keys = ad_injection_constants::keys;
41
42 // The list of APIs for which we upload the URL to RAPPOR.
43 const char* kApisForRapporMetric[] = {
44 ad_injection_constants::kHtmlIframeSrcApiName,
45 ad_injection_constants::kHtmlEmbedSrcApiName,
46 ad_injection_constants::kHtmlAnchorHrefApiName
47 };
48
49 // The "Extensions.PossibleAdInjection2" metric uses different Rappor
50 // parameters than the original metric.
51 const char* kExtensionAdInjectionRapporMetricName =
52 "Extensions.PossibleAdInjection2";
53
54 // The names of different types of HTML elements we check for ad injection.
55 const char* kIframeElementType = "HTMLIFrameElement";
56 const char* kEmbedElementType = "HTMLEmbedElement";
57 const char* kAnchorElementType = "HTMLAnchorElement";
58
Serialize(const base::Value * value)59 std::string Serialize(const base::Value* value) {
60 std::string value_as_text;
61 if (!value) {
62 value_as_text = "null";
63 } else {
64 JSONStringValueSerializer serializer(&value_as_text);
65 serializer.SerializeAndOmitBinaryValues(*value);
66 }
67 return value_as_text;
68 }
69
70 } // namespace
71
72 using api::activity_log_private::ExtensionActivity;
73
Action(const std::string & extension_id,const base::Time & time,const ActionType action_type,const std::string & api_name,int64 action_id)74 Action::Action(const std::string& extension_id,
75 const base::Time& time,
76 const ActionType action_type,
77 const std::string& api_name,
78 int64 action_id)
79 : extension_id_(extension_id),
80 time_(time),
81 action_type_(action_type),
82 api_name_(api_name),
83 page_incognito_(false),
84 arg_incognito_(false),
85 count_(0),
86 action_id_(action_id) {}
87
~Action()88 Action::~Action() {}
89
90 // TODO(mvrable): As an optimization, we might return this directly if the
91 // refcount is one. However, there are likely to be other stray references in
92 // many cases that will prevent this optimization.
Clone() const93 scoped_refptr<Action> Action::Clone() const {
94 scoped_refptr<Action> clone(
95 new Action(
96 extension_id(), time(), action_type(), api_name(), action_id()));
97 if (args())
98 clone->set_args(make_scoped_ptr(args()->DeepCopy()));
99 clone->set_page_url(page_url());
100 clone->set_page_title(page_title());
101 clone->set_page_incognito(page_incognito());
102 clone->set_arg_url(arg_url());
103 clone->set_arg_incognito(arg_incognito());
104 if (other())
105 clone->set_other(make_scoped_ptr(other()->DeepCopy()));
106 return clone;
107 }
108
DidInjectAd(rappor::RapporService * rappor_service) const109 Action::InjectionType Action::DidInjectAd(
110 rappor::RapporService* rappor_service) const {
111 MaybeUploadUrl(rappor_service);
112
113 // We should always have an AdNetworkDatabase, but, on the offchance we don't,
114 // don't crash in a release build.
115 if (!AdNetworkDatabase::Get()) {
116 NOTREACHED();
117 return NO_AD_INJECTION;
118 }
119
120 AdType ad_type = AD_TYPE_NONE;
121 InjectionType injection_type = NO_AD_INJECTION;
122
123 if (EndsWith(api_name_,
124 ad_injection_constants::kAppendChildApiSuffix,
125 true /* case senstive */)) {
126 injection_type = CheckAppendChild(&ad_type);
127 } else {
128 // Check if the action modified an element's src/href.
129 if (api_name_ == ad_injection_constants::kHtmlIframeSrcApiName)
130 ad_type = AD_TYPE_IFRAME;
131 else if (api_name_ == ad_injection_constants::kHtmlEmbedSrcApiName)
132 ad_type = AD_TYPE_EMBED;
133 else if (api_name_ == ad_injection_constants::kHtmlAnchorHrefApiName)
134 ad_type = AD_TYPE_ANCHOR;
135
136 if (ad_type != AD_TYPE_NONE)
137 injection_type = CheckSrcModification();
138 }
139
140 if (injection_type != NO_AD_INJECTION) {
141 UMA_HISTOGRAM_ENUMERATION(
142 "Extensions.AdInjection.AdType", ad_type, Action::NUM_AD_TYPES);
143 }
144
145 return injection_type;
146 }
147
set_args(scoped_ptr<base::ListValue> args)148 void Action::set_args(scoped_ptr<base::ListValue> args) {
149 args_.reset(args.release());
150 }
151
mutable_args()152 base::ListValue* Action::mutable_args() {
153 if (!args_.get()) {
154 args_.reset(new base::ListValue());
155 }
156 return args_.get();
157 }
158
set_page_url(const GURL & page_url)159 void Action::set_page_url(const GURL& page_url) {
160 page_url_ = page_url;
161 }
162
set_arg_url(const GURL & arg_url)163 void Action::set_arg_url(const GURL& arg_url) {
164 arg_url_ = arg_url;
165 }
166
set_other(scoped_ptr<base::DictionaryValue> other)167 void Action::set_other(scoped_ptr<base::DictionaryValue> other) {
168 other_.reset(other.release());
169 }
170
mutable_other()171 base::DictionaryValue* Action::mutable_other() {
172 if (!other_.get()) {
173 other_.reset(new base::DictionaryValue());
174 }
175 return other_.get();
176 }
177
SerializePageUrl() const178 std::string Action::SerializePageUrl() const {
179 return (page_incognito() ? constants::kIncognitoUrl : "") + page_url().spec();
180 }
181
ParsePageUrl(const std::string & url)182 void Action::ParsePageUrl(const std::string& url) {
183 set_page_incognito(StartsWithASCII(url, constants::kIncognitoUrl, true));
184 if (page_incognito())
185 set_page_url(GURL(url.substr(strlen(constants::kIncognitoUrl))));
186 else
187 set_page_url(GURL(url));
188 }
189
SerializeArgUrl() const190 std::string Action::SerializeArgUrl() const {
191 return (arg_incognito() ? constants::kIncognitoUrl : "") + arg_url().spec();
192 }
193
ParseArgUrl(const std::string & url)194 void Action::ParseArgUrl(const std::string& url) {
195 set_arg_incognito(StartsWithASCII(url, constants::kIncognitoUrl, true));
196 if (arg_incognito())
197 set_arg_url(GURL(url.substr(strlen(constants::kIncognitoUrl))));
198 else
199 set_arg_url(GURL(url));
200 }
201
ConvertToExtensionActivity()202 scoped_ptr<ExtensionActivity> Action::ConvertToExtensionActivity() {
203 scoped_ptr<ExtensionActivity> result(new ExtensionActivity);
204
205 // We do this translation instead of using the same enum because the database
206 // values need to be stable; this allows us to change the extension API
207 // without affecting the database.
208 switch (action_type()) {
209 case ACTION_API_CALL:
210 result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_CALL;
211 break;
212 case ACTION_API_EVENT:
213 result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_EVENT;
214 break;
215 case ACTION_CONTENT_SCRIPT:
216 result->activity_type = ExtensionActivity::ACTIVITY_TYPE_CONTENT_SCRIPT;
217 break;
218 case ACTION_DOM_ACCESS:
219 result->activity_type = ExtensionActivity::ACTIVITY_TYPE_DOM_ACCESS;
220 break;
221 case ACTION_DOM_EVENT:
222 result->activity_type = ExtensionActivity::ACTIVITY_TYPE_DOM_EVENT;
223 break;
224 case ACTION_WEB_REQUEST:
225 result->activity_type = ExtensionActivity::ACTIVITY_TYPE_WEB_REQUEST;
226 break;
227 case UNUSED_ACTION_API_BLOCKED:
228 case ACTION_ANY:
229 default:
230 // This shouldn't be reached, but some people might have old or otherwise
231 // weird db entries. Treat it like an API call if that happens.
232 result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_CALL;
233 break;
234 }
235
236 result->extension_id.reset(new std::string(extension_id()));
237 result->time.reset(new double(time().ToJsTime()));
238 result->count.reset(new double(count()));
239 result->api_call.reset(new std::string(api_name()));
240 result->args.reset(new std::string(Serialize(args())));
241 if (action_id() != -1)
242 result->activity_id.reset(
243 new std::string(base::StringPrintf("%" PRId64, action_id())));
244 if (page_url().is_valid()) {
245 if (!page_title().empty())
246 result->page_title.reset(new std::string(page_title()));
247 result->page_url.reset(new std::string(SerializePageUrl()));
248 }
249 if (arg_url().is_valid())
250 result->arg_url.reset(new std::string(SerializeArgUrl()));
251
252 if (other()) {
253 scoped_ptr<ExtensionActivity::Other> other_field(
254 new ExtensionActivity::Other);
255 bool prerender;
256 if (other()->GetBooleanWithoutPathExpansion(constants::kActionPrerender,
257 &prerender)) {
258 other_field->prerender.reset(new bool(prerender));
259 }
260 const base::DictionaryValue* web_request;
261 if (other()->GetDictionaryWithoutPathExpansion(constants::kActionWebRequest,
262 &web_request)) {
263 other_field->web_request.reset(new std::string(
264 ActivityLogPolicy::Util::Serialize(web_request)));
265 }
266 std::string extra;
267 if (other()->GetStringWithoutPathExpansion(constants::kActionExtra, &extra))
268 other_field->extra.reset(new std::string(extra));
269 int dom_verb;
270 if (other()->GetIntegerWithoutPathExpansion(constants::kActionDomVerb,
271 &dom_verb)) {
272 switch (static_cast<DomActionType::Type>(dom_verb)) {
273 case DomActionType::GETTER:
274 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_GETTER;
275 break;
276 case DomActionType::SETTER:
277 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_SETTER;
278 break;
279 case DomActionType::METHOD:
280 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_METHOD;
281 break;
282 case DomActionType::INSERTED:
283 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_INSERTED;
284 break;
285 case DomActionType::XHR:
286 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_XHR;
287 break;
288 case DomActionType::WEBREQUEST:
289 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_WEBREQUEST;
290 break;
291 case DomActionType::MODIFIED:
292 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_MODIFIED;
293 break;
294 default:
295 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_NONE;
296 }
297 } else {
298 other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_NONE;
299 }
300 result->other.reset(other_field.release());
301 }
302
303 return result.Pass();
304 }
305
PrintForDebug() const306 std::string Action::PrintForDebug() const {
307 std::string result = base::StringPrintf("ACTION ID=%" PRId64, action_id());
308 result += " EXTENSION ID=" + extension_id() + " CATEGORY=";
309 switch (action_type_) {
310 case ACTION_API_CALL:
311 result += "api_call";
312 break;
313 case ACTION_API_EVENT:
314 result += "api_event_callback";
315 break;
316 case ACTION_WEB_REQUEST:
317 result += "webrequest";
318 break;
319 case ACTION_CONTENT_SCRIPT:
320 result += "content_script";
321 break;
322 case UNUSED_ACTION_API_BLOCKED:
323 // This is deprecated.
324 result += "api_blocked";
325 break;
326 case ACTION_DOM_EVENT:
327 result += "dom_event";
328 break;
329 case ACTION_DOM_ACCESS:
330 result += "dom_access";
331 break;
332 default:
333 result += base::StringPrintf("type%d", static_cast<int>(action_type_));
334 }
335
336 result += " API=" + api_name_;
337 if (args_.get()) {
338 result += " ARGS=" + Serialize(args_.get());
339 }
340 if (page_url_.is_valid()) {
341 if (page_incognito_)
342 result += " PAGE_URL=(incognito)" + page_url_.spec();
343 else
344 result += " PAGE_URL=" + page_url_.spec();
345 }
346 if (!page_title_.empty()) {
347 base::StringValue title(page_title_);
348 result += " PAGE_TITLE=" + Serialize(&title);
349 }
350 if (arg_url_.is_valid()) {
351 if (arg_incognito_)
352 result += " ARG_URL=(incognito)" + arg_url_.spec();
353 else
354 result += " ARG_URL=" + arg_url_.spec();
355 }
356 if (other_.get()) {
357 result += " OTHER=" + Serialize(other_.get());
358 }
359
360 result += base::StringPrintf(" COUNT=%d", count_);
361 return result;
362 }
363
UrlCouldBeAd(const GURL & url) const364 bool Action::UrlCouldBeAd(const GURL& url) const {
365 // Ads can only be valid urls that don't match the page's host (linking to the
366 // current page should be considered valid use), and aren't local to the
367 // extension.
368 return url.is_valid() &&
369 !url.is_empty() &&
370 url.host() != page_url_.host() &&
371 !url.SchemeIs(kExtensionScheme);
372 }
373
MaybeUploadUrl(rappor::RapporService * rappor_service) const374 void Action::MaybeUploadUrl(rappor::RapporService* rappor_service) const {
375 // Don't bother recording if the url is innocuous (or no |rappor_service|).
376 if (!rappor_service || !UrlCouldBeAd(arg_url_))
377 return;
378
379 bool can_inject_ads = false;
380 for (size_t i = 0; i < arraysize(kApisForRapporMetric); ++i) {
381 if (api_name_ == kApisForRapporMetric[i]) {
382 can_inject_ads = true;
383 break;
384 }
385 }
386
387 if (!can_inject_ads)
388 return;
389
390 // Record the URL - an ad *may* have been injected.
391 rappor_service->RecordSample(kExtensionAdInjectionRapporMetricName,
392 rappor::ETLD_PLUS_ONE_RAPPOR_TYPE,
393 arg_url_.host());
394 }
395
CheckSrcModification() const396 Action::InjectionType Action::CheckSrcModification() const {
397 const AdNetworkDatabase* database = AdNetworkDatabase::Get();
398
399 bool arg_url_could_be_ad = UrlCouldBeAd(arg_url_);
400
401 GURL prev_url;
402 std::string prev_url_string;
403 if (args_.get() && args_->GetString(1u, &prev_url_string))
404 prev_url = GURL(prev_url_string);
405
406 bool prev_url_valid = prev_url.is_valid() && !prev_url.is_empty();
407
408 bool injected_ad = arg_url_could_be_ad && database->IsAdNetwork(arg_url_);
409 bool replaced_ad = prev_url_valid && database->IsAdNetwork(prev_url);
410
411 if (injected_ad && replaced_ad)
412 return INJECTION_REPLACED_AD;
413 if (injected_ad)
414 return INJECTION_NEW_AD;
415 if (replaced_ad)
416 return INJECTION_REMOVED_AD;
417
418 // If the extension modified the URL with an external, valid URL then there's
419 // a good chance it's ad injection. Log it as a likely one, which also helps
420 // us determine the effectiveness of our IsAdNetwork() recognition.
421 if (arg_url_could_be_ad) {
422 if (prev_url_valid)
423 return INJECTION_LIKELY_REPLACED_AD;
424 return INJECTION_LIKELY_NEW_AD;
425 }
426
427 return NO_AD_INJECTION;
428 }
429
CheckAppendChild(AdType * ad_type_out) const430 Action::InjectionType Action::CheckAppendChild(AdType* ad_type_out) const {
431 const base::DictionaryValue* child = NULL;
432 if (!args_->GetDictionary(0u, &child))
433 return NO_AD_INJECTION;
434
435 return CheckDomObject(child, ad_type_out);
436 }
437
CheckDomObject(const base::DictionaryValue * object,AdType * ad_type_out) const438 Action::InjectionType Action::CheckDomObject(
439 const base::DictionaryValue* object,
440 AdType* ad_type_out) const {
441 DCHECK(ad_type_out);
442 std::string type;
443 object->GetString(keys::kType, &type);
444
445 AdType ad_type = AD_TYPE_NONE;
446 std::string url_key;
447 if (type == kIframeElementType) {
448 ad_type = AD_TYPE_IFRAME;
449 url_key = keys::kSrc;
450 } else if (type == kEmbedElementType) {
451 ad_type = AD_TYPE_EMBED;
452 url_key = keys::kSrc;
453 } else if (type == kAnchorElementType) {
454 ad_type = AD_TYPE_ANCHOR;
455 url_key = keys::kHref;
456 }
457
458 if (!url_key.empty()) {
459 std::string url;
460 if (object->GetString(url_key, &url)) {
461 GURL gurl(url);
462 if (UrlCouldBeAd(gurl)) {
463 *ad_type_out = ad_type;
464 if (AdNetworkDatabase::Get()->IsAdNetwork(gurl))
465 return INJECTION_NEW_AD;
466 // If the extension injected an URL which is not local to itself or the
467 // page, there is a good chance it could be a new ad, and our database
468 // missed it.
469 return INJECTION_LIKELY_NEW_AD;
470 }
471 }
472 }
473
474 const base::ListValue* children = NULL;
475 if (object->GetList(keys::kChildren, &children)) {
476 const base::DictionaryValue* child = NULL;
477 for (size_t i = 0;
478 i < children->GetSize() &&
479 i < ad_injection_constants::kMaximumChildrenToCheck;
480 ++i) {
481 if (children->GetDictionary(i, &child)) {
482 InjectionType type = CheckDomObject(child, ad_type_out);
483 if (type != NO_AD_INJECTION)
484 return type;
485 }
486 }
487 }
488
489 return NO_AD_INJECTION;
490 }
491
operator ()(const scoped_refptr<Action> & lhs,const scoped_refptr<Action> & rhs) const492 bool ActionComparator::operator()(
493 const scoped_refptr<Action>& lhs,
494 const scoped_refptr<Action>& rhs) const {
495 if (lhs->time() != rhs->time())
496 return lhs->time() < rhs->time();
497 else if (lhs->action_id() != rhs->action_id())
498 return lhs->action_id() < rhs->action_id();
499 else
500 return ActionComparatorExcludingTimeAndActionId()(lhs, rhs);
501 }
502
operator ()(const scoped_refptr<Action> & lhs,const scoped_refptr<Action> & rhs) const503 bool ActionComparatorExcludingTimeAndActionId::operator()(
504 const scoped_refptr<Action>& lhs,
505 const scoped_refptr<Action>& rhs) const {
506 if (lhs->extension_id() != rhs->extension_id())
507 return lhs->extension_id() < rhs->extension_id();
508 if (lhs->action_type() != rhs->action_type())
509 return lhs->action_type() < rhs->action_type();
510 if (lhs->api_name() != rhs->api_name())
511 return lhs->api_name() < rhs->api_name();
512
513 // args might be null; treat a null value as less than all non-null values,
514 // including the empty string.
515 if (!lhs->args() && rhs->args())
516 return true;
517 if (lhs->args() && !rhs->args())
518 return false;
519 if (lhs->args() && rhs->args()) {
520 std::string lhs_args = ActivityLogPolicy::Util::Serialize(lhs->args());
521 std::string rhs_args = ActivityLogPolicy::Util::Serialize(rhs->args());
522 if (lhs_args != rhs_args)
523 return lhs_args < rhs_args;
524 }
525
526 // Compare URLs as strings, and treat the incognito flag as a separate field.
527 if (lhs->page_url().spec() != rhs->page_url().spec())
528 return lhs->page_url().spec() < rhs->page_url().spec();
529 if (lhs->page_incognito() != rhs->page_incognito())
530 return lhs->page_incognito() < rhs->page_incognito();
531
532 if (lhs->page_title() != rhs->page_title())
533 return lhs->page_title() < rhs->page_title();
534
535 if (lhs->arg_url().spec() != rhs->arg_url().spec())
536 return lhs->arg_url().spec() < rhs->arg_url().spec();
537 if (lhs->arg_incognito() != rhs->arg_incognito())
538 return lhs->arg_incognito() < rhs->arg_incognito();
539
540 // other is treated much like the args field.
541 if (!lhs->other() && rhs->other())
542 return true;
543 if (lhs->other() && !rhs->other())
544 return false;
545 if (lhs->other() && rhs->other()) {
546 std::string lhs_other = ActivityLogPolicy::Util::Serialize(lhs->other());
547 std::string rhs_other = ActivityLogPolicy::Util::Serialize(rhs->other());
548 if (lhs_other != rhs_other)
549 return lhs_other < rhs_other;
550 }
551
552 // All fields compare as equal if this point is reached.
553 return false;
554 }
555
556 } // namespace extensions
557