• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Implementation of the MalwareDetails class.
6 
7 #include "chrome/browser/safe_browsing/malware_details.h"
8 
9 #include "base/callback.h"
10 #include "base/lazy_instance.h"
11 #include "chrome/browser/net/chrome_url_request_context.h"
12 #include "chrome/browser/profiles/profile.h"
13 #include "chrome/browser/safe_browsing/malware_details_cache.h"
14 #include "chrome/browser/safe_browsing/report.pb.h"
15 #include "chrome/browser/safe_browsing/safe_browsing_service.h"
16 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
17 #include "content/browser/browser_thread.h"
18 #include "content/browser/renderer_host/render_view_host.h"
19 #include "content/browser/tab_contents/navigation_entry.h"
20 #include "content/browser/tab_contents/tab_contents.h"
21 #include "net/base/io_buffer.h"
22 #include "net/disk_cache/disk_cache.h"
23 #include "net/url_request/url_request_context_getter.h"
24 
25 using safe_browsing::ClientMalwareReportRequest;
26 
27 // Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details
28 static const uint32 kMaxDomNodes = 500;
29 
30 // static
31 MalwareDetailsFactory* MalwareDetails::factory_ = NULL;
32 
33 // The default MalwareDetailsFactory.  Global, made a singleton so we
34 // don't leak it.
35 class MalwareDetailsFactoryImpl
36     : public MalwareDetailsFactory {
37  public:
CreateMalwareDetails(SafeBrowsingService * sb_service,TabContents * tab_contents,const SafeBrowsingService::UnsafeResource & unsafe_resource)38   MalwareDetails* CreateMalwareDetails(
39       SafeBrowsingService* sb_service,
40       TabContents* tab_contents,
41       const SafeBrowsingService::UnsafeResource& unsafe_resource) {
42     return new MalwareDetails(sb_service, tab_contents, unsafe_resource);
43   }
44 
45  private:
46   friend struct base::DefaultLazyInstanceTraits<
47       MalwareDetailsFactoryImpl>;
48 
MalwareDetailsFactoryImpl()49   MalwareDetailsFactoryImpl() { }
50 
51   DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl);
52 };
53 
54 static base::LazyInstance<MalwareDetailsFactoryImpl>
55     g_malware_details_factory_impl(base::LINKER_INITIALIZED);
56 
57 // Create a MalwareDetails for the given tab.
58 /* static */
NewMalwareDetails(SafeBrowsingService * sb_service,TabContents * tab_contents,const SafeBrowsingService::UnsafeResource & resource)59 MalwareDetails* MalwareDetails::NewMalwareDetails(
60     SafeBrowsingService* sb_service,
61     TabContents* tab_contents,
62     const SafeBrowsingService::UnsafeResource& resource) {
63   // Set up the factory if this has not been done already (tests do that
64   // before this method is called).
65   if (!factory_)
66     factory_ = g_malware_details_factory_impl.Pointer();
67   return factory_->CreateMalwareDetails(sb_service, tab_contents, resource);
68 }
69 
70 // Create a MalwareDetails for the given tab. Runs in the UI thread.
MalwareDetails(SafeBrowsingService * sb_service,TabContents * tab_contents,const SafeBrowsingService::UnsafeResource & resource)71 MalwareDetails::MalwareDetails(
72     SafeBrowsingService* sb_service,
73     TabContents* tab_contents,
74     const SafeBrowsingService::UnsafeResource& resource)
75     : TabContentsObserver(tab_contents),
76       request_context_getter_(tab_contents->profile()->GetRequestContext()),
77       sb_service_(sb_service),
78       resource_(resource),
79       cache_collector_(new MalwareDetailsCacheCollector) {
80   StartCollection();
81 }
82 
~MalwareDetails()83 MalwareDetails::~MalwareDetails() {
84 }
85 
OnMessageReceived(const IPC::Message & message)86 bool MalwareDetails::OnMessageReceived(const IPC::Message& message) {
87   bool handled = true;
88   IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message)
89     IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails,
90                         OnReceivedMalwareDOMDetails)
91     IPC_MESSAGE_UNHANDLED(handled = false)
92   IPC_END_MESSAGE_MAP()
93   return handled;
94 }
95 
IsPublicUrl(const GURL & url) const96 bool MalwareDetails::IsPublicUrl(const GURL& url) const {
97   return url.SchemeIs("http");  // TODO(panayiotis): also skip internal urls.
98 }
99 
100 // Looks for a Resource for the given url in resources_.  If found, it
101 // updates |resource|. Otherwise, it creates a new message, adds it to
102 // resources_ and updates |resource| to point to it.
FindOrCreateResource(const GURL & url)103 ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource(
104     const GURL& url) {
105   safe_browsing::ResourceMap::iterator it = resources_.find(url.spec());
106   if (it != resources_.end()) {
107     return it->second.get();
108   }
109 
110   // Create the resource for |url|.
111   int id = resources_.size();
112   linked_ptr<ClientMalwareReportRequest::Resource> new_resource(
113       new ClientMalwareReportRequest::Resource());
114   new_resource->set_url(url.spec());
115   new_resource->set_id(id);
116   resources_[url.spec()] = new_resource;
117   return new_resource.get();
118 }
119 
AddUrl(const GURL & url,const GURL & parent,const std::string & tagname,const std::vector<GURL> * children)120 void MalwareDetails::AddUrl(const GURL& url,
121                             const GURL& parent,
122                             const std::string& tagname,
123                             const std::vector<GURL>* children) {
124   if (!IsPublicUrl(url))
125     return;
126 
127   // Find (or create) the resource for the url.
128   ClientMalwareReportRequest::Resource* url_resource =
129       FindOrCreateResource(url);
130   if (!tagname.empty()) {
131     url_resource->set_tag_name(tagname);
132   }
133   if (!parent.is_empty() && IsPublicUrl(parent)) {
134     // Add the resource for the parent.
135     ClientMalwareReportRequest::Resource* parent_resource =
136         FindOrCreateResource(parent);
137     // Update the parent-child relation
138     url_resource->set_parent_id(parent_resource->id());
139   }
140   if (children) {
141     for (std::vector<GURL>::const_iterator it = children->begin();
142          it != children->end(); it++) {
143       ClientMalwareReportRequest::Resource* child_resource =
144           FindOrCreateResource(*it);
145       url_resource->add_child_ids(child_resource->id());
146     }
147   }
148 }
149 
StartCollection()150 void MalwareDetails::StartCollection() {
151   DVLOG(1) << "Starting to compute malware details.";
152   report_.reset(new ClientMalwareReportRequest());
153 
154   if (IsPublicUrl(resource_.url)) {
155     report_->set_malware_url(resource_.url.spec());
156   }
157 
158   GURL page_url = tab_contents()->GetURL();
159   if (IsPublicUrl(page_url)) {
160     report_->set_page_url(page_url.spec());
161   }
162 
163   GURL referrer_url;
164   NavigationEntry* nav_entry = tab_contents()->controller().GetActiveEntry();
165   if (nav_entry) {
166     referrer_url = nav_entry->referrer();
167     if (IsPublicUrl(referrer_url)) {
168       report_->set_referrer_url(referrer_url.spec());
169     }
170   }
171 
172   // Add the nodes, starting from the page url.
173   AddUrl(page_url, GURL(), "", NULL);
174 
175   // Add the resource_url and its original url, if non-empty and different.
176   if (!resource_.original_url.is_empty() &&
177       resource_.url != resource_.original_url) {
178     // Add original_url, as the parent of resource_url.
179     AddUrl(resource_.original_url, GURL(), "", NULL);
180     AddUrl(resource_.url, resource_.original_url, "", NULL);
181   } else {
182     AddUrl(resource_.url, GURL(), "", NULL);
183   }
184 
185   // Add the redirect urls, if non-empty. The redirect urls do not include the
186   // original url, but include the unsafe url which is the last one of the
187   // redirect urls chain
188   GURL parent_url;
189   // Set the original url as the parent of the first redirect url if it's not
190   // empty.
191   if (!resource_.original_url.is_empty()) {
192     parent_url = resource_.original_url;
193   }
194   // Set the previous redirect url as the parent of the next one
195   for (unsigned int i = 0; i < resource_.redirect_urls.size(); ++i) {
196     AddUrl(resource_.redirect_urls[i], parent_url, "", NULL);
197     parent_url = resource_.redirect_urls[i];
198   }
199 
200   // Add the referrer url.
201   if (nav_entry && !referrer_url.is_empty()) {
202     AddUrl(referrer_url, GURL(), "", NULL);
203   }
204 
205   // Get URLs of frames, scripts etc from the DOM.
206   // OnReceivedMalwareDOMDetails will be called when the renderer replies.
207   tab_contents()->render_view_host()->GetMalwareDOMDetails();
208 }
209 
210 // When the renderer is done, this is called.
OnReceivedMalwareDOMDetails(const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node> & params)211 void MalwareDetails::OnReceivedMalwareDOMDetails(
212     const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
213   // Schedule this in IO thread, so it doesn't conflict with future users
214   // of our data structures (eg GetSerializedReport).
215   BrowserThread::PostTask(
216       BrowserThread::IO, FROM_HERE,
217       NewRunnableMethod(
218           this, &MalwareDetails::AddDOMDetails, params));
219 }
220 
AddDOMDetails(const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node> & params)221 void MalwareDetails::AddDOMDetails(
222     const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
223   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
224   DVLOG(1) << "Nodes from the DOM: " << params.size();
225 
226   // If we have already started collecting data from the HTTP cache, don't
227   // modify our state.
228   if (cache_collector_->HasStarted())
229     return;
230 
231   // Add the urls from the DOM to |resources_|.  The renderer could be
232   // sending bogus messages, so limit the number of nodes we accept.
233   for (uint32 i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
234     SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i];
235     DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
236     AddUrl(node.url, node.parent, node.tag_name, &(node.children));
237   }
238 }
239 
240 // Called from the SB Service on the IO thread, after the user has
241 // closed the tab, or clicked proceed or goback.  Since the user needs
242 // to take an action, we expect this to be called after
243 // OnReceivedMalwareDOMDetails in most cases. If not, we don't include
244 // the DOM data in our report.
FinishCollection()245 void MalwareDetails::FinishCollection() {
246   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
247 
248   cache_collector_->StartCacheCollection(
249       request_context_getter_,
250       &resources_,
251       &cache_result_,
252       NewRunnableMethod(this, &MalwareDetails::OnCacheCollectionReady));
253 }
254 
OnCacheCollectionReady()255 void MalwareDetails::OnCacheCollectionReady() {
256   DVLOG(1) << "OnCacheCollectionReady.";
257   // Add all the urls in our |resources_| maps to the |report_| protocol buffer.
258   for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
259        it != resources_.end(); it++) {
260     ClientMalwareReportRequest::Resource* pb_resource =
261         report_->add_resources();
262     pb_resource->CopyFrom(*(it->second));
263   }
264 
265   report_->set_complete(cache_result_);
266 
267   // Send the report, using the SafeBrowsingService.
268   std::string serialized;
269   if (!report_->SerializeToString(&serialized)) {
270     DLOG(ERROR) << "Unable to serialize the malware report.";
271     return;
272   }
273 
274   sb_service_->SendSerializedMalwareDetails(serialized);
275 }
276