1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Implementation of the MalwareDetails class.
6
7 #include "chrome/browser/safe_browsing/malware_details.h"
8
9 #include "base/bind.h"
10 #include "base/lazy_instance.h"
11 #include "chrome/browser/profiles/profile.h"
12 #include "chrome/browser/safe_browsing/malware_details_cache.h"
13 #include "chrome/browser/safe_browsing/malware_details_history.h"
14 #include "chrome/browser/safe_browsing/report.pb.h"
15 #include "chrome/common/safe_browsing/safebrowsing_messages.h"
16 #include "content/public/browser/browser_thread.h"
17 #include "content/public/browser/navigation_controller.h"
18 #include "content/public/browser/navigation_entry.h"
19 #include "content/public/browser/render_view_host.h"
20 #include "content/public/browser/web_contents.h"
21 #include "net/url_request/url_request_context_getter.h"
22
23 using content::BrowserThread;
24 using content::NavigationEntry;
25 using content::WebContents;
26 using safe_browsing::ClientMalwareReportRequest;
27
28 // Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details
29 static const uint32 kMaxDomNodes = 500;
30
31 // static
32 MalwareDetailsFactory* MalwareDetails::factory_ = NULL;
33
34 // The default MalwareDetailsFactory. Global, made a singleton so we
35 // don't leak it.
36 class MalwareDetailsFactoryImpl : public MalwareDetailsFactory {
37 public:
CreateMalwareDetails(SafeBrowsingUIManager * ui_manager,WebContents * web_contents,const SafeBrowsingUIManager::UnsafeResource & unsafe_resource)38 virtual MalwareDetails* CreateMalwareDetails(
39 SafeBrowsingUIManager* ui_manager,
40 WebContents* web_contents,
41 const SafeBrowsingUIManager::UnsafeResource& unsafe_resource) OVERRIDE {
42 return new MalwareDetails(ui_manager, web_contents, unsafe_resource);
43 }
44
45 private:
46 friend struct base::DefaultLazyInstanceTraits<MalwareDetailsFactoryImpl>;
47
MalwareDetailsFactoryImpl()48 MalwareDetailsFactoryImpl() {}
49
50 DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl);
51 };
52
53 static base::LazyInstance<MalwareDetailsFactoryImpl>
54 g_malware_details_factory_impl = LAZY_INSTANCE_INITIALIZER;
55
56 // Create a MalwareDetails for the given tab.
57 /* static */
NewMalwareDetails(SafeBrowsingUIManager * ui_manager,WebContents * web_contents,const UnsafeResource & resource)58 MalwareDetails* MalwareDetails::NewMalwareDetails(
59 SafeBrowsingUIManager* ui_manager,
60 WebContents* web_contents,
61 const UnsafeResource& resource) {
62 // Set up the factory if this has not been done already (tests do that
63 // before this method is called).
64 if (!factory_)
65 factory_ = g_malware_details_factory_impl.Pointer();
66 return factory_->CreateMalwareDetails(ui_manager, web_contents, resource);
67 }
68
69 // Create a MalwareDetails for the given tab. Runs in the UI thread.
MalwareDetails(SafeBrowsingUIManager * ui_manager,content::WebContents * web_contents,const UnsafeResource & resource)70 MalwareDetails::MalwareDetails(
71 SafeBrowsingUIManager* ui_manager,
72 content::WebContents* web_contents,
73 const UnsafeResource& resource)
74 : content::WebContentsObserver(web_contents),
75 profile_(Profile::FromBrowserContext(web_contents->GetBrowserContext())),
76 request_context_getter_(profile_->GetRequestContext()),
77 ui_manager_(ui_manager),
78 resource_(resource),
79 cache_result_(false),
80 cache_collector_(new MalwareDetailsCacheCollector),
81 redirects_collector_(
82 new MalwareDetailsRedirectsCollector(profile_)) {
83 StartCollection();
84 }
85
~MalwareDetails()86 MalwareDetails::~MalwareDetails() {
87 }
88
OnMessageReceived(const IPC::Message & message)89 bool MalwareDetails::OnMessageReceived(const IPC::Message& message) {
90 bool handled = true;
91 IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message)
92 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails,
93 OnReceivedMalwareDOMDetails)
94 IPC_MESSAGE_UNHANDLED(handled = false)
95 IPC_END_MESSAGE_MAP()
96 return handled;
97 }
98
IsPublicUrl(const GURL & url) const99 bool MalwareDetails::IsPublicUrl(const GURL& url) const {
100 return url.SchemeIs("http"); // TODO(panayiotis): also skip internal urls.
101 }
102
103 // Looks for a Resource for the given url in resources_. If found, it
104 // updates |resource|. Otherwise, it creates a new message, adds it to
105 // resources_ and updates |resource| to point to it.
FindOrCreateResource(const GURL & url)106 ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource(
107 const GURL& url) {
108 safe_browsing::ResourceMap::iterator it = resources_.find(url.spec());
109 if (it != resources_.end())
110 return it->second.get();
111
112 // Create the resource for |url|.
113 int id = resources_.size();
114 linked_ptr<ClientMalwareReportRequest::Resource> new_resource(
115 new ClientMalwareReportRequest::Resource());
116 new_resource->set_url(url.spec());
117 new_resource->set_id(id);
118 resources_[url.spec()] = new_resource;
119 return new_resource.get();
120 }
121
AddUrl(const GURL & url,const GURL & parent,const std::string & tagname,const std::vector<GURL> * children)122 void MalwareDetails::AddUrl(const GURL& url,
123 const GURL& parent,
124 const std::string& tagname,
125 const std::vector<GURL>* children) {
126 if (!url.is_valid() || !IsPublicUrl(url))
127 return;
128
129 // Find (or create) the resource for the url.
130 ClientMalwareReportRequest::Resource* url_resource =
131 FindOrCreateResource(url);
132 if (!tagname.empty())
133 url_resource->set_tag_name(tagname);
134 if (!parent.is_empty() && IsPublicUrl(parent)) {
135 // Add the resource for the parent.
136 ClientMalwareReportRequest::Resource* parent_resource =
137 FindOrCreateResource(parent);
138 // Update the parent-child relation
139 url_resource->set_parent_id(parent_resource->id());
140 }
141 if (children) {
142 for (std::vector<GURL>::const_iterator it = children->begin();
143 it != children->end(); ++it) {
144 ClientMalwareReportRequest::Resource* child_resource =
145 FindOrCreateResource(*it);
146 url_resource->add_child_ids(child_resource->id());
147 }
148 }
149 }
150
StartCollection()151 void MalwareDetails::StartCollection() {
152 DVLOG(1) << "Starting to compute malware details.";
153 report_.reset(new ClientMalwareReportRequest());
154
155 if (IsPublicUrl(resource_.url))
156 report_->set_malware_url(resource_.url.spec());
157
158 GURL page_url = web_contents()->GetURL();
159 if (IsPublicUrl(page_url))
160 report_->set_page_url(page_url.spec());
161
162 GURL referrer_url;
163 NavigationEntry* nav_entry = web_contents()->GetController().GetActiveEntry();
164 if (nav_entry) {
165 referrer_url = nav_entry->GetReferrer().url;
166 if (IsPublicUrl(referrer_url)) {
167 report_->set_referrer_url(referrer_url.spec());
168 }
169 }
170
171 // Add the nodes, starting from the page url.
172 AddUrl(page_url, GURL(), std::string(), NULL);
173
174 // Add the resource_url and its original url, if non-empty and different.
175 if (!resource_.original_url.is_empty() &&
176 resource_.url != resource_.original_url) {
177 // Add original_url, as the parent of resource_url.
178 AddUrl(resource_.original_url, GURL(), std::string(), NULL);
179 AddUrl(resource_.url, resource_.original_url, std::string(), NULL);
180 } else {
181 AddUrl(resource_.url, GURL(), std::string(), NULL);
182 }
183
184 // Add the redirect urls, if non-empty. The redirect urls do not include the
185 // original url, but include the unsafe url which is the last one of the
186 // redirect urls chain
187 GURL parent_url;
188 // Set the original url as the parent of the first redirect url if it's not
189 // empty.
190 if (!resource_.original_url.is_empty())
191 parent_url = resource_.original_url;
192
193 // Set the previous redirect url as the parent of the next one
194 for (size_t i = 0; i < resource_.redirect_urls.size(); ++i) {
195 AddUrl(resource_.redirect_urls[i], parent_url, std::string(), NULL);
196 parent_url = resource_.redirect_urls[i];
197 }
198
199 // Add the referrer url.
200 if (nav_entry && !referrer_url.is_empty())
201 AddUrl(referrer_url, GURL(), std::string(), NULL);
202
203 // Get URLs of frames, scripts etc from the DOM.
204 // OnReceivedMalwareDOMDetails will be called when the renderer replies.
205 content::RenderViewHost* view = web_contents()->GetRenderViewHost();
206 view->Send(new SafeBrowsingMsg_GetMalwareDOMDetails(view->GetRoutingID()));
207 }
208
209 // When the renderer is done, this is called.
OnReceivedMalwareDOMDetails(const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node> & params)210 void MalwareDetails::OnReceivedMalwareDOMDetails(
211 const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
212 // Schedule this in IO thread, so it doesn't conflict with future users
213 // of our data structures (eg GetSerializedReport).
214 BrowserThread::PostTask(
215 BrowserThread::IO, FROM_HERE,
216 base::Bind(&MalwareDetails::AddDOMDetails, this, params));
217 }
218
AddDOMDetails(const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node> & params)219 void MalwareDetails::AddDOMDetails(
220 const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
221 DCHECK_CURRENTLY_ON(BrowserThread::IO);
222 DVLOG(1) << "Nodes from the DOM: " << params.size();
223
224 // If we have already started getting redirects from history service,
225 // don't modify state, otherwise will invalidate the iterators.
226 if (redirects_collector_->HasStarted())
227 return;
228
229 // If we have already started collecting data from the HTTP cache, don't
230 // modify our state.
231 if (cache_collector_->HasStarted())
232 return;
233
234 // Add the urls from the DOM to |resources_|. The renderer could be
235 // sending bogus messages, so limit the number of nodes we accept.
236 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
237 SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i];
238 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
239 AddUrl(node.url, node.parent, node.tag_name, &(node.children));
240 }
241 }
242
243 // Called from the SB Service on the IO thread, after the user has
244 // closed the tab, or clicked proceed or goback. Since the user needs
245 // to take an action, we expect this to be called after
246 // OnReceivedMalwareDOMDetails in most cases. If not, we don't include
247 // the DOM data in our report.
FinishCollection()248 void MalwareDetails::FinishCollection() {
249 DCHECK_CURRENTLY_ON(BrowserThread::IO);
250
251 std::vector<GURL> urls;
252 for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
253 it != resources_.end(); ++it) {
254 urls.push_back(GURL(it->first));
255 }
256 redirects_collector_->StartHistoryCollection(
257 urls,
258 base::Bind(&MalwareDetails::OnRedirectionCollectionReady, this));
259 }
260
OnRedirectionCollectionReady()261 void MalwareDetails::OnRedirectionCollectionReady() {
262 DCHECK_CURRENTLY_ON(BrowserThread::IO);
263 const std::vector<safe_browsing::RedirectChain>& redirects =
264 redirects_collector_->GetCollectedUrls();
265
266 for (size_t i = 0; i < redirects.size(); ++i)
267 AddRedirectUrlList(redirects[i]);
268
269 // Call the cache collector
270 cache_collector_->StartCacheCollection(
271 request_context_getter_.get(),
272 &resources_,
273 &cache_result_,
274 base::Bind(&MalwareDetails::OnCacheCollectionReady, this));
275 }
276
AddRedirectUrlList(const std::vector<GURL> & urls)277 void MalwareDetails::AddRedirectUrlList(const std::vector<GURL>& urls) {
278 DCHECK_CURRENTLY_ON(BrowserThread::IO);
279 for (size_t i = 0; i < urls.size() - 1; ++i) {
280 AddUrl(urls[i], urls[i + 1], std::string(), NULL);
281 }
282 }
283
OnCacheCollectionReady()284 void MalwareDetails::OnCacheCollectionReady() {
285 DVLOG(1) << "OnCacheCollectionReady.";
286 // Add all the urls in our |resources_| maps to the |report_| protocol buffer.
287 for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
288 it != resources_.end(); ++it) {
289 ClientMalwareReportRequest::Resource* pb_resource =
290 report_->add_resources();
291 pb_resource->CopyFrom(*(it->second));
292 }
293
294 report_->set_complete(cache_result_);
295
296 // Send the report, using the SafeBrowsingService.
297 std::string serialized;
298 if (!report_->SerializeToString(&serialized)) {
299 DLOG(ERROR) << "Unable to serialize the malware report.";
300 return;
301 }
302
303 ui_manager_->SendSerializedMalwareDetails(serialized);
304 }
305