1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/renderer/safe_browsing/features.h"
6
7 #include "base/logging.h"
8 #include "base/metrics/histogram.h"
9
10 namespace safe_browsing {
11
12 const size_t FeatureMap::kMaxFeatureMapSize = 10000;
13
FeatureMap()14 FeatureMap::FeatureMap() {}
~FeatureMap()15 FeatureMap::~FeatureMap() {}
16
AddBooleanFeature(const std::string & name)17 bool FeatureMap::AddBooleanFeature(const std::string& name) {
18 return AddRealFeature(name, 1.0);
19 }
20
AddRealFeature(const std::string & name,double value)21 bool FeatureMap::AddRealFeature(const std::string& name, double value) {
22 if (features_.size() >= kMaxFeatureMapSize) {
23 // If we hit this case, it indicates that either kMaxFeatureMapSize is
24 // too small, or there is a bug causing too many features to be added.
25 // In this case, we'll log to a histogram so we can see that this is
26 // happening, and make phishing classification fail silently.
27 LOG(ERROR) << "Not adding feature: " << name << " because the "
28 << "feature map is too large.";
29 UMA_HISTOGRAM_COUNTS("SBClientPhishing.TooManyFeatures", 1);
30 return false;
31 }
32 // We only expect features in the range [0.0, 1.0], so fail if the feature is
33 // outside this range.
34 if (value < 0.0 || value > 1.0) {
35 LOG(ERROR) << "Not adding feature: " << name << " because the value "
36 << value << " is not in the range [0.0, 1.0].";
37 UMA_HISTOGRAM_COUNTS("SBClientPhishing.IllegalFeatureValue", 1);
38 return false;
39 }
40
41 features_[name] = value;
42 return true;
43 }
44
Clear()45 void FeatureMap::Clear() {
46 features_.clear();
47 }
48
49 namespace features {
50 // URL host features
51 const char kUrlHostIsIpAddress[] = "UrlHostIsIpAddress";
52 const char kUrlTldToken[] = "UrlTld=";
53 const char kUrlDomainToken[] = "UrlDomain=";
54 const char kUrlOtherHostToken[] = "UrlOtherHostToken=";
55
56 // URL host aggregate features
57 const char kUrlNumOtherHostTokensGTOne[] = "UrlNumOtherHostTokens>1";
58 const char kUrlNumOtherHostTokensGTThree[] = "UrlNumOtherHostTokens>3";
59
60 // URL path features
61 const char kUrlPathToken[] = "UrlPathToken=";
62
63 // DOM HTML form features
64 const char kPageHasForms[] = "PageHasForms";
65 const char kPageActionOtherDomainFreq[] = "PageActionOtherDomainFreq";
66 const char kPageHasTextInputs[] = "PageHasTextInputs";
67 const char kPageHasPswdInputs[] = "PageHasPswdInputs";
68 const char kPageHasRadioInputs[] = "PageHasRadioInputs";
69 const char kPageHasCheckInputs[] = "PageHasCheckInputs";
70
71 // DOM HTML link features
72 const char kPageExternalLinksFreq[] = "PageExternalLinksFreq";
73 const char kPageLinkDomain[] = "PageLinkDomain=";
74 const char kPageSecureLinksFreq[] = "PageSecureLinksFreq";
75
76 // DOM HTML script features
77 const char kPageNumScriptTagsGTOne[] = "PageNumScriptTags>1";
78 const char kPageNumScriptTagsGTSix[] = "PageNumScriptTags>6";
79
80 // Other DOM HTML features
81 const char kPageImgOtherDomainFreq[] = "PageImgOtherDomainFreq";
82
83 // Page term features
84 const char kPageTerm[] = "PageTerm=";
85
86 } // namespace features
87 } // namespace safe_browsing
88