• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/renderer/safe_browsing/features.h"
6 
7 #include "base/logging.h"
8 #include "base/metrics/histogram.h"
9 
10 namespace safe_browsing {
11 
12 const size_t FeatureMap::kMaxFeatureMapSize = 10000;
13 
FeatureMap()14 FeatureMap::FeatureMap() {}
~FeatureMap()15 FeatureMap::~FeatureMap() {}
16 
AddBooleanFeature(const std::string & name)17 bool FeatureMap::AddBooleanFeature(const std::string& name) {
18   return AddRealFeature(name, 1.0);
19 }
20 
AddRealFeature(const std::string & name,double value)21 bool FeatureMap::AddRealFeature(const std::string& name, double value) {
22   if (features_.size() >= kMaxFeatureMapSize) {
23     // If we hit this case, it indicates that either kMaxFeatureMapSize is
24     // too small, or there is a bug causing too many features to be added.
25     // In this case, we'll log to a histogram so we can see that this is
26     // happening, and make phishing classification fail silently.
27     LOG(ERROR) << "Not adding feature: " << name << " because the "
28                << "feature map is too large.";
29     UMA_HISTOGRAM_COUNTS("SBClientPhishing.TooManyFeatures", 1);
30     return false;
31   }
32   // We only expect features in the range [0.0, 1.0], so fail if the feature is
33   // outside this range.
34   if (value < 0.0 || value > 1.0) {
35     LOG(ERROR) << "Not adding feature: " << name << " because the value "
36                << value << " is not in the range [0.0, 1.0].";
37     UMA_HISTOGRAM_COUNTS("SBClientPhishing.IllegalFeatureValue", 1);
38     return false;
39   }
40 
41   features_[name] = value;
42   return true;
43 }
44 
Clear()45 void FeatureMap::Clear() {
46   features_.clear();
47 }
48 
49 namespace features {
50 // URL host features
51 const char kUrlHostIsIpAddress[] = "UrlHostIsIpAddress";
52 const char kUrlTldToken[] = "UrlTld=";
53 const char kUrlDomainToken[] = "UrlDomain=";
54 const char kUrlOtherHostToken[] = "UrlOtherHostToken=";
55 
56 // URL host aggregate features
57 const char kUrlNumOtherHostTokensGTOne[] = "UrlNumOtherHostTokens>1";
58 const char kUrlNumOtherHostTokensGTThree[] = "UrlNumOtherHostTokens>3";
59 
60 // URL path features
61 const char kUrlPathToken[] = "UrlPathToken=";
62 
63 // DOM HTML form features
64 const char kPageHasForms[] = "PageHasForms";
65 const char kPageActionOtherDomainFreq[] = "PageActionOtherDomainFreq";
66 const char kPageHasTextInputs[] = "PageHasTextInputs";
67 const char kPageHasPswdInputs[] = "PageHasPswdInputs";
68 const char kPageHasRadioInputs[] = "PageHasRadioInputs";
69 const char kPageHasCheckInputs[] = "PageHasCheckInputs";
70 
71 // DOM HTML link features
72 const char kPageExternalLinksFreq[] = "PageExternalLinksFreq";
73 const char kPageLinkDomain[] = "PageLinkDomain=";
74 const char kPageSecureLinksFreq[] = "PageSecureLinksFreq";
75 
76 // DOM HTML script features
77 const char kPageNumScriptTagsGTOne[] = "PageNumScriptTags>1";
78 const char kPageNumScriptTagsGTSix[] = "PageNumScriptTags>6";
79 
80 // Other DOM HTML features
81 const char kPageImgOtherDomainFreq[] = "PageImgOtherDomainFreq";
82 
83 // Page term features
84 const char kPageTerm[] = "PageTerm=";
85 
86 }  // namespace features
87 }  // namespace safe_browsing
88