• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Implementation of the MalwareDetails class.
6 
7 #include "chrome/browser/safe_browsing/malware_details.h"
8 
9 #include "base/bind.h"
10 #include "base/lazy_instance.h"
11 #include "chrome/browser/net/chrome_url_request_context.h"
12 #include "chrome/browser/profiles/profile.h"
13 #include "chrome/browser/safe_browsing/malware_details_cache.h"
14 #include "chrome/browser/safe_browsing/malware_details_history.h"
15 #include "chrome/browser/safe_browsing/report.pb.h"
16 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
17 #include "content/public/browser/browser_thread.h"
18 #include "content/public/browser/navigation_controller.h"
19 #include "content/public/browser/navigation_entry.h"
20 #include "content/public/browser/render_view_host.h"
21 #include "content/public/browser/web_contents.h"
22 #include "net/base/io_buffer.h"
23 #include "net/disk_cache/disk_cache.h"
24 #include "net/url_request/url_request_context_getter.h"
25 
26 using content::BrowserThread;
27 using content::NavigationEntry;
28 using content::WebContents;
29 using safe_browsing::ClientMalwareReportRequest;
30 
31 // Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details
32 static const uint32 kMaxDomNodes = 500;
33 
34 // static
35 MalwareDetailsFactory* MalwareDetails::factory_ = NULL;
36 
37 // The default MalwareDetailsFactory.  Global, made a singleton so we
38 // don't leak it.
39 class MalwareDetailsFactoryImpl
40     : public MalwareDetailsFactory {
41  public:
CreateMalwareDetails(SafeBrowsingUIManager * ui_manager,WebContents * web_contents,const SafeBrowsingUIManager::UnsafeResource & unsafe_resource)42   virtual MalwareDetails* CreateMalwareDetails(
43       SafeBrowsingUIManager* ui_manager,
44       WebContents* web_contents,
45       const SafeBrowsingUIManager::UnsafeResource& unsafe_resource) OVERRIDE {
46     return new MalwareDetails(ui_manager, web_contents, unsafe_resource);
47   }
48 
49  private:
50   friend struct base::DefaultLazyInstanceTraits<
51       MalwareDetailsFactoryImpl>;
52 
MalwareDetailsFactoryImpl()53   MalwareDetailsFactoryImpl() { }
54 
55   DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl);
56 };
57 
58 static base::LazyInstance<MalwareDetailsFactoryImpl>
59     g_malware_details_factory_impl = LAZY_INSTANCE_INITIALIZER;
60 
61 // Create a MalwareDetails for the given tab.
62 /* static */
NewMalwareDetails(SafeBrowsingUIManager * ui_manager,WebContents * web_contents,const UnsafeResource & resource)63 MalwareDetails* MalwareDetails::NewMalwareDetails(
64     SafeBrowsingUIManager* ui_manager,
65     WebContents* web_contents,
66     const UnsafeResource& resource) {
67   // Set up the factory if this has not been done already (tests do that
68   // before this method is called).
69   if (!factory_)
70     factory_ = g_malware_details_factory_impl.Pointer();
71   return factory_->CreateMalwareDetails(ui_manager, web_contents, resource);
72 }
73 
74 // Create a MalwareDetails for the given tab. Runs in the UI thread.
MalwareDetails(SafeBrowsingUIManager * ui_manager,content::WebContents * web_contents,const UnsafeResource & resource)75 MalwareDetails::MalwareDetails(
76     SafeBrowsingUIManager* ui_manager,
77     content::WebContents* web_contents,
78     const UnsafeResource& resource)
79     : content::WebContentsObserver(web_contents),
80       profile_(Profile::FromBrowserContext(web_contents->GetBrowserContext())),
81       request_context_getter_(profile_->GetRequestContext()),
82       ui_manager_(ui_manager),
83       resource_(resource),
84       cache_result_(false),
85       cache_collector_(new MalwareDetailsCacheCollector),
86       redirects_collector_(
87           new MalwareDetailsRedirectsCollector(profile_)) {
88   StartCollection();
89 }
90 
~MalwareDetails()91 MalwareDetails::~MalwareDetails() {
92 }
93 
OnMessageReceived(const IPC::Message & message)94 bool MalwareDetails::OnMessageReceived(const IPC::Message& message) {
95   bool handled = true;
96   IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message)
97     IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails,
98                         OnReceivedMalwareDOMDetails)
99     IPC_MESSAGE_UNHANDLED(handled = false)
100   IPC_END_MESSAGE_MAP()
101   return handled;
102 }
103 
IsPublicUrl(const GURL & url) const104 bool MalwareDetails::IsPublicUrl(const GURL& url) const {
105   return url.SchemeIs("http");  // TODO(panayiotis): also skip internal urls.
106 }
107 
108 // Looks for a Resource for the given url in resources_.  If found, it
109 // updates |resource|. Otherwise, it creates a new message, adds it to
110 // resources_ and updates |resource| to point to it.
FindOrCreateResource(const GURL & url)111 ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource(
112     const GURL& url) {
113   safe_browsing::ResourceMap::iterator it = resources_.find(url.spec());
114   if (it != resources_.end()) {
115     return it->second.get();
116   }
117 
118   // Create the resource for |url|.
119   int id = resources_.size();
120   linked_ptr<ClientMalwareReportRequest::Resource> new_resource(
121       new ClientMalwareReportRequest::Resource());
122   new_resource->set_url(url.spec());
123   new_resource->set_id(id);
124   resources_[url.spec()] = new_resource;
125   return new_resource.get();
126 }
127 
AddUrl(const GURL & url,const GURL & parent,const std::string & tagname,const std::vector<GURL> * children)128 void MalwareDetails::AddUrl(const GURL& url,
129                             const GURL& parent,
130                             const std::string& tagname,
131                             const std::vector<GURL>* children) {
132   if (!url.is_valid() || !IsPublicUrl(url))
133     return;
134 
135   // Find (or create) the resource for the url.
136   ClientMalwareReportRequest::Resource* url_resource =
137       FindOrCreateResource(url);
138   if (!tagname.empty()) {
139     url_resource->set_tag_name(tagname);
140   }
141   if (!parent.is_empty() && IsPublicUrl(parent)) {
142     // Add the resource for the parent.
143     ClientMalwareReportRequest::Resource* parent_resource =
144         FindOrCreateResource(parent);
145     // Update the parent-child relation
146     url_resource->set_parent_id(parent_resource->id());
147   }
148   if (children) {
149     for (std::vector<GURL>::const_iterator it = children->begin();
150          it != children->end(); it++) {
151       ClientMalwareReportRequest::Resource* child_resource =
152           FindOrCreateResource(*it);
153       url_resource->add_child_ids(child_resource->id());
154     }
155   }
156 }
157 
StartCollection()158 void MalwareDetails::StartCollection() {
159   DVLOG(1) << "Starting to compute malware details.";
160   report_.reset(new ClientMalwareReportRequest());
161 
162   if (IsPublicUrl(resource_.url)) {
163     report_->set_malware_url(resource_.url.spec());
164   }
165 
166   GURL page_url = web_contents()->GetURL();
167   if (IsPublicUrl(page_url)) {
168     report_->set_page_url(page_url.spec());
169   }
170 
171   GURL referrer_url;
172   NavigationEntry* nav_entry = web_contents()->GetController().GetActiveEntry();
173   if (nav_entry) {
174     referrer_url = nav_entry->GetReferrer().url;
175     if (IsPublicUrl(referrer_url)) {
176       report_->set_referrer_url(referrer_url.spec());
177     }
178   }
179 
180   // Add the nodes, starting from the page url.
181   AddUrl(page_url, GURL(), std::string(), NULL);
182 
183   // Add the resource_url and its original url, if non-empty and different.
184   if (!resource_.original_url.is_empty() &&
185       resource_.url != resource_.original_url) {
186     // Add original_url, as the parent of resource_url.
187     AddUrl(resource_.original_url, GURL(), std::string(), NULL);
188     AddUrl(resource_.url, resource_.original_url, std::string(), NULL);
189   } else {
190     AddUrl(resource_.url, GURL(), std::string(), NULL);
191   }
192 
193   // Add the redirect urls, if non-empty. The redirect urls do not include the
194   // original url, but include the unsafe url which is the last one of the
195   // redirect urls chain
196   GURL parent_url;
197   // Set the original url as the parent of the first redirect url if it's not
198   // empty.
199   if (!resource_.original_url.is_empty()) {
200     parent_url = resource_.original_url;
201   }
202   // Set the previous redirect url as the parent of the next one
203   for (unsigned int i = 0; i < resource_.redirect_urls.size(); ++i) {
204     AddUrl(resource_.redirect_urls[i], parent_url, std::string(), NULL);
205     parent_url = resource_.redirect_urls[i];
206   }
207 
208   // Add the referrer url.
209   if (nav_entry && !referrer_url.is_empty()) {
210     AddUrl(referrer_url, GURL(), std::string(), NULL);
211   }
212 
213   // Get URLs of frames, scripts etc from the DOM.
214   // OnReceivedMalwareDOMDetails will be called when the renderer replies.
215   content::RenderViewHost* view = web_contents()->GetRenderViewHost();
216   view->Send(new SafeBrowsingMsg_GetMalwareDOMDetails(view->GetRoutingID()));
217 }
218 
219 // When the renderer is done, this is called.
OnReceivedMalwareDOMDetails(const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node> & params)220 void MalwareDetails::OnReceivedMalwareDOMDetails(
221     const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
222   // Schedule this in IO thread, so it doesn't conflict with future users
223   // of our data structures (eg GetSerializedReport).
224   BrowserThread::PostTask(
225       BrowserThread::IO, FROM_HERE,
226       base::Bind(&MalwareDetails::AddDOMDetails, this, params));
227 }
228 
AddDOMDetails(const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node> & params)229 void MalwareDetails::AddDOMDetails(
230     const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
231   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
232   DVLOG(1) << "Nodes from the DOM: " << params.size();
233 
234   // If we have already started getting redirects from history service,
235   // don't modify state, otherwise will invalidate the iterators.
236   if (redirects_collector_->HasStarted())
237     return;
238 
239   // If we have already started collecting data from the HTTP cache, don't
240   // modify our state.
241   if (cache_collector_->HasStarted())
242     return;
243 
244   // Add the urls from the DOM to |resources_|.  The renderer could be
245   // sending bogus messages, so limit the number of nodes we accept.
246   for (uint32 i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
247     SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i];
248     DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
249     AddUrl(node.url, node.parent, node.tag_name, &(node.children));
250   }
251 }
252 
253 // Called from the SB Service on the IO thread, after the user has
254 // closed the tab, or clicked proceed or goback.  Since the user needs
255 // to take an action, we expect this to be called after
256 // OnReceivedMalwareDOMDetails in most cases. If not, we don't include
257 // the DOM data in our report.
FinishCollection()258 void MalwareDetails::FinishCollection() {
259   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
260 
261   std::vector<GURL> urls;
262   for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
263        it != resources_.end(); it++) {
264     urls.push_back(GURL(it->first));
265   }
266   redirects_collector_->StartHistoryCollection(
267       urls,
268       base::Bind(&MalwareDetails::OnRedirectionCollectionReady, this));
269 }
270 
OnRedirectionCollectionReady()271 void MalwareDetails::OnRedirectionCollectionReady() {
272   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
273   const std::vector<safe_browsing::RedirectChain>& redirects =
274       redirects_collector_->GetCollectedUrls();
275 
276   for (size_t i = 0; i < redirects.size(); ++i)
277     AddRedirectUrlList(redirects[i]);
278 
279   // Call the cache collector
280   cache_collector_->StartCacheCollection(
281       request_context_getter_.get(),
282       &resources_,
283       &cache_result_,
284       base::Bind(&MalwareDetails::OnCacheCollectionReady, this));
285 }
286 
AddRedirectUrlList(const std::vector<GURL> & urls)287 void MalwareDetails::AddRedirectUrlList(const std::vector<GURL>& urls) {
288   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
289   for (size_t i = 0; i < urls.size()-1; ++i) {
290     AddUrl(urls[i], urls[i + 1], std::string(), NULL);
291   }
292 }
293 
OnCacheCollectionReady()294 void MalwareDetails::OnCacheCollectionReady() {
295   DVLOG(1) << "OnCacheCollectionReady.";
296   // Add all the urls in our |resources_| maps to the |report_| protocol buffer.
297   for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
298        it != resources_.end(); it++) {
299     ClientMalwareReportRequest::Resource* pb_resource =
300         report_->add_resources();
301     pb_resource->CopyFrom(*(it->second));
302   }
303 
304   report_->set_complete(cache_result_);
305 
306   // Send the report, using the SafeBrowsingService.
307   std::string serialized;
308   if (!report_->SerializeToString(&serialized)) {
309     DLOG(ERROR) << "Unable to serialize the malware report.";
310     return;
311   }
312 
313   ui_manager_->SendSerializedMalwareDetails(serialized);
314 }
315