• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008, 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1.  Redistributions of source code must retain the above copyright
9  *     notice, this list of conditions and the following disclaimer.
10  * 2.  Redistributions in binary form must reproduce the above copyright
11  *     notice, this list of conditions and the following disclaimer in the
12  *     documentation and/or other materials provided with the distribution.
13  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14  *     its contributors may be used to endorse or promote products derived
15  *     from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "config.h"
30 #include "LegacyWebArchive.h"
31 
32 #include "MemoryCache.h"
33 #include "Document.h"
34 #include "DocumentLoader.h"
35 #include "Frame.h"
36 #include "FrameLoader.h"
37 #include "FrameTree.h"
38 #include "HTMLFrameOwnerElement.h"
39 #include "HTMLNames.h"
40 #include "IconDatabase.h"
41 #include "Image.h"
42 #include "KURLHash.h"
43 #include "Logging.h"
44 #include "markup.h"
45 #include "Node.h"
46 #include "Range.h"
47 #include "SelectionController.h"
48 #include "SharedBuffer.h"
49 #include <wtf/text/CString.h>
50 #include <wtf/text/StringConcatenate.h>
51 #include <wtf/ListHashSet.h>
52 #include <wtf/RetainPtr.h>
53 
54 namespace WebCore {
55 
56 static const CFStringRef LegacyWebArchiveMainResourceKey = CFSTR("WebMainResource");
57 static const CFStringRef LegacyWebArchiveSubresourcesKey = CFSTR("WebSubresources");
58 static const CFStringRef LegacyWebArchiveSubframeArchivesKey = CFSTR("WebSubframeArchives");
59 static const CFStringRef LegacyWebArchiveResourceDataKey = CFSTR("WebResourceData");
60 static const CFStringRef LegacyWebArchiveResourceFrameNameKey = CFSTR("WebResourceFrameName");
61 static const CFStringRef LegacyWebArchiveResourceMIMETypeKey = CFSTR("WebResourceMIMEType");
62 static const CFStringRef LegacyWebArchiveResourceURLKey = CFSTR("WebResourceURL");
63 static const CFStringRef LegacyWebArchiveResourceTextEncodingNameKey = CFSTR("WebResourceTextEncodingName");
64 static const CFStringRef LegacyWebArchiveResourceResponseKey = CFSTR("WebResourceResponse");
65 static const CFStringRef LegacyWebArchiveResourceResponseVersionKey = CFSTR("WebResourceResponseVersion");
66 
createPropertyListRepresentation(ArchiveResource * resource,MainResourceStatus isMainResource)67 RetainPtr<CFDictionaryRef> LegacyWebArchive::createPropertyListRepresentation(ArchiveResource* resource, MainResourceStatus isMainResource)
68 {
69     if (!resource) {
70         // The property list representation of a null/empty WebResource has the following 3 objects stored as nil.
71         // FIXME: 0 is not serializable. Presumably we need to use kCFNull here instead for compatibility.
72         // FIXME: But why do we need to support a resource of 0? Who relies on that?
73         RetainPtr<CFMutableDictionaryRef> propertyList(AdoptCF, CFDictionaryCreateMutable(0, 3, 0, 0));
74         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceDataKey, 0);
75         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceURLKey, 0);
76         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceMIMETypeKey, 0);
77         return propertyList;
78     }
79 
80     RetainPtr<CFMutableDictionaryRef> propertyList(AdoptCF, CFDictionaryCreateMutable(0, 6, 0, &kCFTypeDictionaryValueCallBacks));
81 
82     // Resource data can be empty, but must be represented by an empty CFDataRef
83     SharedBuffer* data = resource->data();
84     RetainPtr<CFDataRef> cfData;
85     if (data)
86         cfData.adoptCF(data->createCFData());
87     else
88         cfData.adoptCF(CFDataCreate(0, 0, 0));
89     CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceDataKey, cfData.get());
90 
91     // Resource URL cannot be null
92     RetainPtr<CFStringRef> cfURL(AdoptCF, resource->url().string().createCFString());
93     if (cfURL)
94         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceURLKey, cfURL.get());
95     else {
96         LOG(Archives, "LegacyWebArchive - NULL resource URL is invalid - returning null property list");
97         return 0;
98     }
99 
100     // FrameName should be left out if empty for subresources, but always included for main resources
101     const String& frameName(resource->frameName());
102     if (!frameName.isEmpty() || isMainResource) {
103         RetainPtr<CFStringRef> cfFrameName(AdoptCF, frameName.createCFString());
104         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceFrameNameKey, cfFrameName.get());
105     }
106 
107     // Set MIMEType, TextEncodingName, and ResourceResponse only if they actually exist
108     const String& mimeType(resource->mimeType());
109     if (!mimeType.isEmpty()) {
110         RetainPtr<CFStringRef> cfMIMEType(AdoptCF, mimeType.createCFString());
111         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceMIMETypeKey, cfMIMEType.get());
112     }
113 
114     const String& textEncoding(resource->textEncoding());
115     if (!textEncoding.isEmpty()) {
116         RetainPtr<CFStringRef> cfTextEncoding(AdoptCF, textEncoding.createCFString());
117         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceTextEncodingNameKey, cfTextEncoding.get());
118     }
119 
120     // Don't include the resource response for the main resource
121     if (!isMainResource) {
122         RetainPtr<CFDataRef> resourceResponseData = createPropertyListRepresentation(resource->response());
123         if (resourceResponseData)
124             CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceResponseKey, resourceResponseData.get());
125     }
126 
127     return propertyList;
128 }
129 
createPropertyListRepresentation(Archive * archive)130 RetainPtr<CFDictionaryRef> LegacyWebArchive::createPropertyListRepresentation(Archive* archive)
131 {
132     RetainPtr<CFMutableDictionaryRef> propertyList(AdoptCF, CFDictionaryCreateMutable(0, 3, 0, &kCFTypeDictionaryValueCallBacks));
133 
134     RetainPtr<CFDictionaryRef> mainResourceDict = createPropertyListRepresentation(archive->mainResource(), MainResource);
135     ASSERT(mainResourceDict);
136     if (!mainResourceDict)
137         return 0;
138     CFDictionarySetValue(propertyList.get(), LegacyWebArchiveMainResourceKey, mainResourceDict.get());
139 
140     RetainPtr<CFMutableArrayRef> subresourcesArray(AdoptCF, CFArrayCreateMutable(0, archive->subresources().size(), &kCFTypeArrayCallBacks));
141     const Vector<RefPtr<ArchiveResource> >& subresources(archive->subresources());
142     for (unsigned i = 0; i < subresources.size(); ++i) {
143         RetainPtr<CFDictionaryRef> subresource = createPropertyListRepresentation(subresources[i].get(), Subresource);
144         if (subresource)
145             CFArrayAppendValue(subresourcesArray.get(), subresource.get());
146         else
147             LOG(Archives, "LegacyWebArchive - Failed to create property list for subresource");
148     }
149     if (CFArrayGetCount(subresourcesArray.get()))
150         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveSubresourcesKey, subresourcesArray.get());
151 
152     RetainPtr<CFMutableArrayRef> subframesArray(AdoptCF, CFArrayCreateMutable(0, archive->subframeArchives().size(), &kCFTypeArrayCallBacks));
153     const Vector<RefPtr<Archive> >& subframeArchives(archive->subframeArchives());
154     for (unsigned i = 0; i < subframeArchives.size(); ++i) {
155         RetainPtr<CFDictionaryRef> subframeArchive = createPropertyListRepresentation(subframeArchives[i].get());
156         if (subframeArchive)
157             CFArrayAppendValue(subframesArray.get(), subframeArchive.get());
158         else
159             LOG(Archives, "LegacyWebArchive - Failed to create property list for subframe archive");
160     }
161     if (CFArrayGetCount(subframesArray.get()))
162         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveSubframeArchivesKey, subframesArray.get());
163 
164     return propertyList;
165 }
166 
createResourceResponseFromPropertyListData(CFDataRef data,CFStringRef responseDataType)167 ResourceResponse LegacyWebArchive::createResourceResponseFromPropertyListData(CFDataRef data, CFStringRef responseDataType)
168 {
169     ASSERT(data);
170     if (!data)
171         return ResourceResponse();
172 
173     // If the ResourceResponseVersion (passed in as responseDataType) exists at all, this is a "new" web archive that we
174     // can parse well in a cross platform manner If it doesn't exist, we will assume this is an "old" web archive with,
175     // NSURLResponse objects in it and parse the ResourceResponse as such.
176     if (!responseDataType)
177         return createResourceResponseFromMacArchivedData(data);
178 
179     // FIXME: Parse the "new" format that the above comment references here. This format doesn't exist yet.
180     return ResourceResponse();
181 }
182 
createResource(CFDictionaryRef dictionary)183 PassRefPtr<ArchiveResource> LegacyWebArchive::createResource(CFDictionaryRef dictionary)
184 {
185     ASSERT(dictionary);
186     if (!dictionary)
187         return 0;
188 
189     CFDataRef resourceData = static_cast<CFDataRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceDataKey));
190     if (resourceData && CFGetTypeID(resourceData) != CFDataGetTypeID()) {
191         LOG(Archives, "LegacyWebArchive - Resource data is not of type CFData, cannot create invalid resource");
192         return 0;
193     }
194 
195     CFStringRef frameName = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceFrameNameKey));
196     if (frameName && CFGetTypeID(frameName) != CFStringGetTypeID()) {
197         LOG(Archives, "LegacyWebArchive - Frame name is not of type CFString, cannot create invalid resource");
198         return 0;
199     }
200 
201     CFStringRef mimeType = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceMIMETypeKey));
202     if (!mimeType || CFGetTypeID(mimeType) != CFStringGetTypeID()) {
203         LOG(Archives, "LegacyWebArchive - MIME type is not of type CFString, cannot create invalid resource");
204         return 0;
205     }
206 
207     CFStringRef url = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceURLKey));
208     if (url && CFGetTypeID(url) != CFStringGetTypeID()) {
209         LOG(Archives, "LegacyWebArchive - URL is not of type CFString, cannot create invalid resource");
210         return 0;
211     }
212 
213     CFStringRef textEncoding = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceTextEncodingNameKey));
214     if (textEncoding && CFGetTypeID(textEncoding) != CFStringGetTypeID()) {
215         LOG(Archives, "LegacyWebArchive - Text encoding is not of type CFString, cannot create invalid resource");
216         return 0;
217     }
218 
219     ResourceResponse response;
220 
221     CFDataRef resourceResponseData = static_cast<CFDataRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceResponseKey));
222     if (resourceResponseData) {
223         if (CFGetTypeID(resourceResponseData) != CFDataGetTypeID()) {
224             LOG(Archives, "LegacyWebArchive - Resource response data is not of type CFData, cannot create invalid resource");
225             return 0;
226         }
227 
228         CFStringRef resourceResponseVersion = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceResponseVersionKey));
229         if (resourceResponseVersion && CFGetTypeID(resourceResponseVersion) != CFStringGetTypeID()) {
230             LOG(Archives, "LegacyWebArchive - Resource response version is not of type CFString, cannot create invalid resource");
231             return 0;
232         }
233 
234         response = createResourceResponseFromPropertyListData(resourceResponseData, resourceResponseVersion);
235     }
236 
237     return ArchiveResource::create(SharedBuffer::wrapCFData(resourceData), KURL(KURL(), url), mimeType, textEncoding, frameName, response);
238 }
239 
create()240 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create()
241 {
242     return adoptRef(new LegacyWebArchive);
243 }
244 
create(PassRefPtr<ArchiveResource> mainResource,Vector<PassRefPtr<ArchiveResource>> & subresources,Vector<PassRefPtr<LegacyWebArchive>> & subframeArchives)245 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(PassRefPtr<ArchiveResource> mainResource, Vector<PassRefPtr<ArchiveResource> >& subresources, Vector<PassRefPtr<LegacyWebArchive> >& subframeArchives)
246 {
247     ASSERT(mainResource);
248     if (!mainResource)
249         return 0;
250 
251     RefPtr<LegacyWebArchive> archive = create();
252     archive->setMainResource(mainResource);
253 
254     for (unsigned i = 0; i < subresources.size(); ++i)
255         archive->addSubresource(subresources[i]);
256 
257     for (unsigned i = 0; i < subframeArchives.size(); ++i)
258         archive->addSubframeArchive(subframeArchives[i]);
259 
260     return archive.release();
261 }
262 
create(SharedBuffer * data)263 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(SharedBuffer* data)
264 {
265     LOG(Archives, "LegacyWebArchive - Creating from raw data");
266 
267     RefPtr<LegacyWebArchive> archive = create();
268 
269     ASSERT(data);
270     if (!data)
271         return 0;
272 
273     RetainPtr<CFDataRef> cfData(AdoptCF, data->createCFData());
274     if (!cfData)
275         return 0;
276 
277     CFStringRef errorString = 0;
278 
279     RetainPtr<CFDictionaryRef> plist(AdoptCF, static_cast<CFDictionaryRef>(CFPropertyListCreateFromXMLData(0, cfData.get(), kCFPropertyListImmutable, &errorString)));
280     if (!plist) {
281 #ifndef NDEBUG
282         const char* cError = errorString ? CFStringGetCStringPtr(errorString, kCFStringEncodingUTF8) : "unknown error";
283         LOG(Archives, "LegacyWebArchive - Error parsing PropertyList from archive data - %s", cError);
284 #endif
285         if (errorString)
286             CFRelease(errorString);
287         return 0;
288     }
289 
290     if (CFGetTypeID(plist.get()) != CFDictionaryGetTypeID()) {
291         LOG(Archives, "LegacyWebArchive - Archive property list is not the expected CFDictionary, aborting invalid WebArchive");
292         return 0;
293     }
294 
295     if (!archive->extract(plist.get()))
296         return 0;
297 
298     return archive.release();
299 }
300 
extract(CFDictionaryRef dictionary)301 bool LegacyWebArchive::extract(CFDictionaryRef dictionary)
302 {
303     ASSERT(dictionary);
304     if (!dictionary) {
305         LOG(Archives, "LegacyWebArchive - Null root CFDictionary, aborting invalid WebArchive");
306         return false;
307     }
308 
309     CFDictionaryRef mainResourceDict = static_cast<CFDictionaryRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveMainResourceKey));
310     if (!mainResourceDict) {
311         LOG(Archives, "LegacyWebArchive - No main resource in archive, aborting invalid WebArchive");
312         return false;
313     }
314     if (CFGetTypeID(mainResourceDict) != CFDictionaryGetTypeID()) {
315         LOG(Archives, "LegacyWebArchive - Main resource is not the expected CFDictionary, aborting invalid WebArchive");
316         return false;
317     }
318 
319     setMainResource(createResource(mainResourceDict));
320     if (!mainResource()) {
321         LOG(Archives, "LegacyWebArchive - Failed to parse main resource from CFDictionary or main resource does not exist, aborting invalid WebArchive");
322         return false;
323     }
324 
325     CFArrayRef subresourceArray = static_cast<CFArrayRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveSubresourcesKey));
326     if (subresourceArray && CFGetTypeID(subresourceArray) != CFArrayGetTypeID()) {
327         LOG(Archives, "LegacyWebArchive - Subresources is not the expected Array, aborting invalid WebArchive");
328         return false;
329     }
330 
331     if (subresourceArray) {
332         CFIndex count = CFArrayGetCount(subresourceArray);
333         for (CFIndex i = 0; i < count; ++i) {
334             CFDictionaryRef subresourceDict = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(subresourceArray, i));
335             if (CFGetTypeID(subresourceDict) != CFDictionaryGetTypeID()) {
336                 LOG(Archives, "LegacyWebArchive - Subresource is not expected CFDictionary, aborting invalid WebArchive");
337                 return false;
338             }
339             addSubresource(createResource(subresourceDict));
340         }
341     }
342 
343     CFArrayRef subframeArray = static_cast<CFArrayRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveSubframeArchivesKey));
344     if (subframeArray && CFGetTypeID(subframeArray) != CFArrayGetTypeID()) {
345         LOG(Archives, "LegacyWebArchive - Subframe archives is not the expected Array, aborting invalid WebArchive");
346         return false;
347     }
348 
349     if (subframeArray) {
350         CFIndex count = CFArrayGetCount(subframeArray);
351         for (CFIndex i = 0; i < count; ++i) {
352             CFDictionaryRef subframeDict = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(subframeArray, i));
353             if (CFGetTypeID(subframeDict) != CFDictionaryGetTypeID()) {
354                 LOG(Archives, "LegacyWebArchive - Subframe array is not expected CFDictionary, aborting invalid WebArchive");
355                 return false;
356             }
357 
358             RefPtr<LegacyWebArchive> subframeArchive = create();
359             if (subframeArchive->extract(subframeDict))
360                 addSubframeArchive(subframeArchive.release());
361             else
362                 LOG(Archives, "LegacyWebArchive - Invalid subframe archive skipped");
363         }
364     }
365 
366     return true;
367 }
368 
rawDataRepresentation()369 RetainPtr<CFDataRef> LegacyWebArchive::rawDataRepresentation()
370 {
371     RetainPtr<CFDictionaryRef> propertyList = createPropertyListRepresentation(this);
372     ASSERT(propertyList);
373     if (!propertyList) {
374         LOG(Archives, "LegacyWebArchive - Failed to create property list for archive, returning no data");
375         return 0;
376     }
377 
378     RetainPtr<CFWriteStreamRef> stream(AdoptCF, CFWriteStreamCreateWithAllocatedBuffers(0, 0));
379 
380     CFWriteStreamOpen(stream.get());
381     CFPropertyListWriteToStream(propertyList.get(), stream.get(), kCFPropertyListBinaryFormat_v1_0, 0);
382 
383     RetainPtr<CFDataRef> plistData(AdoptCF, static_cast<CFDataRef>(CFWriteStreamCopyProperty(stream.get(), kCFStreamPropertyDataWritten)));
384     ASSERT(plistData);
385 
386     CFWriteStreamClose(stream.get());
387 
388     if (!plistData) {
389         LOG(Archives, "LegacyWebArchive - Failed to convert property list into raw data, returning no data");
390         return 0;
391     }
392 
393     return plistData;
394 }
395 
396 #if !PLATFORM(MAC)
397 
createResourceResponseFromMacArchivedData(CFDataRef responseData)398 ResourceResponse LegacyWebArchive::createResourceResponseFromMacArchivedData(CFDataRef responseData)
399 {
400     // FIXME: If is is possible to parse in a serialized NSURLResponse manually, without using
401     // NSKeyedUnarchiver, manipulating plists directly, then we want to do that here.
402     // Until then, this can be done on Mac only.
403     return ResourceResponse();
404 }
405 
createPropertyListRepresentation(const ResourceResponse & response)406 RetainPtr<CFDataRef> LegacyWebArchive::createPropertyListRepresentation(const ResourceResponse& response)
407 {
408     // FIXME: Write out the "new" format described in createResourceResponseFromPropertyListData once we invent it.
409     return 0;
410 }
411 
412 #endif
413 
create(Node * node)414 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Node* node)
415 {
416     ASSERT(node);
417     if (!node)
418         return create();
419 
420     Document* document = node->document();
421     Frame* frame = document ? document->frame() : 0;
422     if (!frame)
423         return create();
424 
425     Vector<Node*> nodeList;
426     String markupString = createMarkup(node, IncludeNode, &nodeList);
427     Node::NodeType nodeType = node->nodeType();
428     if (nodeType != Node::DOCUMENT_NODE && nodeType != Node::DOCUMENT_TYPE_NODE)
429         markupString = frame->documentTypeString() + markupString;
430 
431     return create(markupString, frame, nodeList);
432 }
433 
create(Frame * frame)434 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Frame* frame)
435 {
436     ASSERT(frame);
437 
438     DocumentLoader* documentLoader = frame->loader()->documentLoader();
439 
440     if (!documentLoader)
441         return 0;
442 
443     Vector<PassRefPtr<LegacyWebArchive> > subframeArchives;
444 
445     unsigned children = frame->tree()->childCount();
446     for (unsigned i = 0; i < children; ++i) {
447         RefPtr<LegacyWebArchive> childFrameArchive = create(frame->tree()->child(i));
448         if (childFrameArchive)
449             subframeArchives.append(childFrameArchive.release());
450     }
451 
452     Vector<PassRefPtr<ArchiveResource> > subresources;
453     documentLoader->getSubresources(subresources);
454 
455     return create(documentLoader->mainResource(), subresources, subframeArchives);
456 }
457 
create(Range * range)458 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Range* range)
459 {
460     if (!range)
461         return 0;
462 
463     Node* startContainer = range->startContainer();
464     if (!startContainer)
465         return 0;
466 
467     Document* document = startContainer->document();
468     if (!document)
469         return 0;
470 
471     Frame* frame = document->frame();
472     if (!frame)
473         return 0;
474 
475     Vector<Node*> nodeList;
476 
477     // FIXME: This is always "for interchange". Is that right? See the previous method.
478     String markupString = frame->documentTypeString() + createMarkup(range, &nodeList, AnnotateForInterchange);
479 
480     return create(markupString, frame, nodeList);
481 }
482 
create(const String & markupString,Frame * frame,const Vector<Node * > & nodes)483 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(const String& markupString, Frame* frame, const Vector<Node*>& nodes)
484 {
485     ASSERT(frame);
486 
487     const ResourceResponse& response = frame->loader()->documentLoader()->response();
488     KURL responseURL = response.url();
489 
490     // it's possible to have a response without a URL here
491     // <rdar://problem/5454935>
492     if (responseURL.isNull())
493         responseURL = KURL(ParsedURLString, "");
494 
495     PassRefPtr<ArchiveResource> mainResource = ArchiveResource::create(utf8Buffer(markupString), responseURL, response.mimeType(), "UTF-8", frame->tree()->uniqueName());
496 
497     Vector<PassRefPtr<LegacyWebArchive> > subframeArchives;
498     Vector<PassRefPtr<ArchiveResource> > subresources;
499     HashSet<KURL> uniqueSubresources;
500 
501     size_t nodesSize = nodes.size();
502     for (size_t i = 0; i < nodesSize; ++i) {
503         Node* node = nodes[i];
504         Frame* childFrame;
505         if ((node->hasTagName(HTMLNames::frameTag) || node->hasTagName(HTMLNames::iframeTag) || node->hasTagName(HTMLNames::objectTag)) &&
506              (childFrame = static_cast<HTMLFrameOwnerElement*>(node)->contentFrame())) {
507             RefPtr<LegacyWebArchive> subframeArchive = create(childFrame->document());
508 
509             if (subframeArchive)
510                 subframeArchives.append(subframeArchive);
511             else
512                 LOG_ERROR("Unabled to archive subframe %s", childFrame->tree()->uniqueName().string().utf8().data());
513         } else {
514             ListHashSet<KURL> subresourceURLs;
515             node->getSubresourceURLs(subresourceURLs);
516 
517             DocumentLoader* documentLoader = frame->loader()->documentLoader();
518             ListHashSet<KURL>::iterator iterEnd = subresourceURLs.end();
519             for (ListHashSet<KURL>::iterator iter = subresourceURLs.begin(); iter != iterEnd; ++iter) {
520                 const KURL& subresourceURL = *iter;
521                 if (uniqueSubresources.contains(subresourceURL))
522                     continue;
523 
524                 uniqueSubresources.add(subresourceURL);
525 
526                 RefPtr<ArchiveResource> resource = documentLoader->subresource(subresourceURL);
527                 if (resource) {
528                     subresources.append(resource.release());
529                     continue;
530                 }
531 
532                 CachedResource* cachedResource = memoryCache()->resourceForURL(subresourceURL);
533                 if (cachedResource) {
534                     resource = ArchiveResource::create(cachedResource->data(), subresourceURL, cachedResource->response());
535                     if (resource) {
536                         subresources.append(resource.release());
537                         continue;
538                     }
539                 }
540 
541                 // FIXME: should do something better than spew to console here
542                 LOG_ERROR("Failed to archive subresource for %s", subresourceURL.string().utf8().data());
543             }
544         }
545     }
546 
547     // Add favicon if one exists for this page, if we are archiving the entire page.
548     if (nodesSize && nodes[0]->isDocumentNode() && iconDatabase().isEnabled()) {
549         const String& iconURL = iconDatabase().synchronousIconURLForPageURL(responseURL);
550         if (!iconURL.isEmpty() && iconDatabase().synchronousIconDataKnownForIconURL(iconURL)) {
551             if (Image* iconImage = iconDatabase().synchronousIconForPageURL(responseURL, IntSize(16, 16))) {
552                 if (RefPtr<ArchiveResource> resource = ArchiveResource::create(iconImage->data(), KURL(ParsedURLString, iconURL), "image/x-icon", "", ""))
553                     subresources.append(resource.release());
554             }
555         }
556     }
557 
558     return create(mainResource, subresources, subframeArchives);
559 }
560 
createFromSelection(Frame * frame)561 PassRefPtr<LegacyWebArchive> LegacyWebArchive::createFromSelection(Frame* frame)
562 {
563     if (!frame)
564         return 0;
565 
566     RefPtr<Range> selectionRange = frame->selection()->toNormalizedRange();
567     Vector<Node*> nodeList;
568     String markupString = frame->documentTypeString() + createMarkup(selectionRange.get(), &nodeList, AnnotateForInterchange);
569 
570     RefPtr<LegacyWebArchive> archive = create(markupString, frame, nodeList);
571 
572     if (!frame->document() || !frame->document()->isFrameSet())
573         return archive.release();
574 
575     // Wrap the frameset document in an iframe so it can be pasted into
576     // another document (which will have a body or frameset of its own).
577     String iframeMarkup = makeString("<iframe frameborder=\"no\" marginwidth=\"0\" marginheight=\"0\" width=\"98%%\" height=\"98%%\" src=\"",
578                                      frame->loader()->documentLoader()->response().url().string(), "\"></iframe>");
579     RefPtr<ArchiveResource> iframeResource = ArchiveResource::create(utf8Buffer(iframeMarkup), blankURL(), "text/html", "UTF-8", String());
580 
581     Vector<PassRefPtr<ArchiveResource> > subresources;
582 
583     Vector<PassRefPtr<LegacyWebArchive> > subframeArchives;
584     subframeArchives.append(archive);
585 
586     archive = create(iframeResource.release(), subresources, subframeArchives);
587 
588     return archive.release();
589 }
590 
591 }
592