• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008, 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1.  Redistributions of source code must retain the above copyright
9  *     notice, this list of conditions and the following disclaimer.
10  * 2.  Redistributions in binary form must reproduce the above copyright
11  *     notice, this list of conditions and the following disclaimer in the
12  *     documentation and/or other materials provided with the distribution.
13  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14  *     its contributors may be used to endorse or promote products derived
15  *     from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "config.h"
30 #include "LegacyWebArchive.h"
31 
32 #include "CString.h"
33 #include "Cache.h"
34 #include "Document.h"
35 #include "DocumentLoader.h"
36 #include "Frame.h"
37 #include "FrameLoader.h"
38 #include "FrameTree.h"
39 #include "HTMLFrameOwnerElement.h"
40 #include "HTMLNames.h"
41 #include "IconDatabase.h"
42 #include "Image.h"
43 #include "KURLHash.h"
44 #include "Logging.h"
45 #include "markup.h"
46 #include "Node.h"
47 #include "Range.h"
48 #include "SelectionController.h"
49 #include "SharedBuffer.h"
50 #include <wtf/ListHashSet.h>
51 #include <wtf/RetainPtr.h>
52 
53 namespace WebCore {
54 
55 static const CFStringRef LegacyWebArchiveMainResourceKey = CFSTR("WebMainResource");
56 static const CFStringRef LegacyWebArchiveSubresourcesKey = CFSTR("WebSubresources");
57 static const CFStringRef LegacyWebArchiveSubframeArchivesKey = CFSTR("WebSubframeArchives");
58 static const CFStringRef LegacyWebArchiveResourceDataKey = CFSTR("WebResourceData");
59 static const CFStringRef LegacyWebArchiveResourceFrameNameKey = CFSTR("WebResourceFrameName");
60 static const CFStringRef LegacyWebArchiveResourceMIMETypeKey = CFSTR("WebResourceMIMEType");
61 static const CFStringRef LegacyWebArchiveResourceURLKey = CFSTR("WebResourceURL");
62 static const CFStringRef LegacyWebArchiveResourceTextEncodingNameKey = CFSTR("WebResourceTextEncodingName");
63 static const CFStringRef LegacyWebArchiveResourceResponseKey = CFSTR("WebResourceResponse");
64 static const CFStringRef LegacyWebArchiveResourceResponseVersionKey = CFSTR("WebResourceResponseVersion");
65 
createPropertyListRepresentation(ArchiveResource * resource,MainResourceStatus isMainResource)66 RetainPtr<CFDictionaryRef> LegacyWebArchive::createPropertyListRepresentation(ArchiveResource* resource, MainResourceStatus isMainResource)
67 {
68     if (!resource) {
69         // The property list representation of a null/empty WebResource has the following 3 objects stored as nil.
70         // FIXME: 0 is not serializable. Presumably we need to use kCFNull here instead for compatibility.
71         // FIXME: But why do we need to support a resource of 0? Who relies on that?
72         RetainPtr<CFMutableDictionaryRef> propertyList(AdoptCF, CFDictionaryCreateMutable(0, 3, 0, 0));
73         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceDataKey, 0);
74         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceURLKey, 0);
75         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceMIMETypeKey, 0);
76         return propertyList;
77     }
78 
79     RetainPtr<CFMutableDictionaryRef> propertyList(AdoptCF, CFDictionaryCreateMutable(0, 6, 0, &kCFTypeDictionaryValueCallBacks));
80 
81     // Resource data can be empty, but must be represented by an empty CFDataRef
82     SharedBuffer* data = resource->data();
83     RetainPtr<CFDataRef> cfData;
84     if (data)
85         cfData.adoptCF(data->createCFData());
86     else
87         cfData.adoptCF(CFDataCreate(0, 0, 0));
88     CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceDataKey, cfData.get());
89 
90     // Resource URL cannot be null
91     RetainPtr<CFStringRef> cfURL(AdoptCF, resource->url().string().createCFString());
92     if (cfURL)
93         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceURLKey, cfURL.get());
94     else {
95         LOG(Archives, "LegacyWebArchive - NULL resource URL is invalid - returning null property list");
96         return 0;
97     }
98 
99     // FrameName should be left out if empty for subresources, but always included for main resources
100     const String& frameName(resource->frameName());
101     if (!frameName.isEmpty() || isMainResource) {
102         RetainPtr<CFStringRef> cfFrameName(AdoptCF, frameName.createCFString());
103         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceFrameNameKey, cfFrameName.get());
104     }
105 
106     // Set MIMEType, TextEncodingName, and ResourceResponse only if they actually exist
107     const String& mimeType(resource->mimeType());
108     if (!mimeType.isEmpty()) {
109         RetainPtr<CFStringRef> cfMIMEType(AdoptCF, mimeType.createCFString());
110         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceMIMETypeKey, cfMIMEType.get());
111     }
112 
113     const String& textEncoding(resource->textEncoding());
114     if (!textEncoding.isEmpty()) {
115         RetainPtr<CFStringRef> cfTextEncoding(AdoptCF, textEncoding.createCFString());
116         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceTextEncodingNameKey, cfTextEncoding.get());
117     }
118 
119     // Don't include the resource response for the main resource
120     if (!isMainResource) {
121         RetainPtr<CFDataRef> resourceResponseData = createPropertyListRepresentation(resource->response());
122         if (resourceResponseData)
123             CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceResponseKey, resourceResponseData.get());
124     }
125 
126     return propertyList;
127 }
128 
createPropertyListRepresentation(Archive * archive)129 RetainPtr<CFDictionaryRef> LegacyWebArchive::createPropertyListRepresentation(Archive* archive)
130 {
131     RetainPtr<CFMutableDictionaryRef> propertyList(AdoptCF, CFDictionaryCreateMutable(0, 3, 0, &kCFTypeDictionaryValueCallBacks));
132 
133     RetainPtr<CFDictionaryRef> mainResourceDict = createPropertyListRepresentation(archive->mainResource(), MainResource);
134     ASSERT(mainResourceDict);
135     if (!mainResourceDict)
136         return 0;
137     CFDictionarySetValue(propertyList.get(), LegacyWebArchiveMainResourceKey, mainResourceDict.get());
138 
139     RetainPtr<CFMutableArrayRef> subresourcesArray(AdoptCF, CFArrayCreateMutable(0, archive->subresources().size(), &kCFTypeArrayCallBacks));
140     const Vector<RefPtr<ArchiveResource> >& subresources(archive->subresources());
141     for (unsigned i = 0; i < subresources.size(); ++i) {
142         RetainPtr<CFDictionaryRef> subresource = createPropertyListRepresentation(subresources[i].get(), Subresource);
143         if (subresource)
144             CFArrayAppendValue(subresourcesArray.get(), subresource.get());
145         else
146             LOG(Archives, "LegacyWebArchive - Failed to create property list for subresource");
147     }
148     if (CFArrayGetCount(subresourcesArray.get()))
149         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveSubresourcesKey, subresourcesArray.get());
150 
151     RetainPtr<CFMutableArrayRef> subframesArray(AdoptCF, CFArrayCreateMutable(0, archive->subframeArchives().size(), &kCFTypeArrayCallBacks));
152     const Vector<RefPtr<Archive> >& subframeArchives(archive->subframeArchives());
153     for (unsigned i = 0; i < subframeArchives.size(); ++i) {
154         RetainPtr<CFDictionaryRef> subframeArchive = createPropertyListRepresentation(subframeArchives[i].get());
155         if (subframeArchive)
156             CFArrayAppendValue(subframesArray.get(), subframeArchive.get());
157         else
158             LOG(Archives, "LegacyWebArchive - Failed to create property list for subframe archive");
159     }
160     if (CFArrayGetCount(subframesArray.get()))
161         CFDictionarySetValue(propertyList.get(), LegacyWebArchiveSubframeArchivesKey, subframesArray.get());
162 
163     return propertyList;
164 }
165 
createResourceResponseFromPropertyListData(CFDataRef data,CFStringRef responseDataType)166 ResourceResponse LegacyWebArchive::createResourceResponseFromPropertyListData(CFDataRef data, CFStringRef responseDataType)
167 {
168     ASSERT(data);
169     if (!data)
170         return ResourceResponse();
171 
172     // If the ResourceResponseVersion (passed in as responseDataType) exists at all, this is a "new" web archive that we
173     // can parse well in a cross platform manner If it doesn't exist, we will assume this is an "old" web archive with,
174     // NSURLResponse objects in it and parse the ResourceResponse as such.
175     if (!responseDataType)
176         return createResourceResponseFromMacArchivedData(data);
177 
178     // FIXME: Parse the "new" format that the above comment references here. This format doesn't exist yet.
179     return ResourceResponse();
180 }
181 
createResource(CFDictionaryRef dictionary)182 PassRefPtr<ArchiveResource> LegacyWebArchive::createResource(CFDictionaryRef dictionary)
183 {
184     ASSERT(dictionary);
185     if (!dictionary)
186         return 0;
187 
188     CFDataRef resourceData = static_cast<CFDataRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceDataKey));
189     if (resourceData && CFGetTypeID(resourceData) != CFDataGetTypeID()) {
190         LOG(Archives, "LegacyWebArchive - Resource data is not of type CFData, cannot create invalid resource");
191         return 0;
192     }
193 
194     CFStringRef frameName = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceFrameNameKey));
195     if (frameName && CFGetTypeID(frameName) != CFStringGetTypeID()) {
196         LOG(Archives, "LegacyWebArchive - Frame name is not of type CFString, cannot create invalid resource");
197         return 0;
198     }
199 
200     CFStringRef mimeType = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceMIMETypeKey));
201     if (mimeType && CFGetTypeID(mimeType) != CFStringGetTypeID()) {
202         LOG(Archives, "LegacyWebArchive - MIME type is not of type CFString, cannot create invalid resource");
203         return 0;
204     }
205 
206     CFStringRef url = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceURLKey));
207     if (url && CFGetTypeID(url) != CFStringGetTypeID()) {
208         LOG(Archives, "LegacyWebArchive - URL is not of type CFString, cannot create invalid resource");
209         return 0;
210     }
211 
212     CFStringRef textEncoding = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceTextEncodingNameKey));
213     if (textEncoding && CFGetTypeID(textEncoding) != CFStringGetTypeID()) {
214         LOG(Archives, "LegacyWebArchive - Text encoding is not of type CFString, cannot create invalid resource");
215         return 0;
216     }
217 
218     ResourceResponse response;
219 
220     CFDataRef resourceResponseData = static_cast<CFDataRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceResponseKey));
221     if (resourceResponseData) {
222         if (CFGetTypeID(resourceResponseData) != CFDataGetTypeID()) {
223             LOG(Archives, "LegacyWebArchive - Resource response data is not of type CFData, cannot create invalid resource");
224             return 0;
225         }
226 
227         CFStringRef resourceResponseVersion = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceResponseVersionKey));
228         if (resourceResponseVersion && CFGetTypeID(resourceResponseVersion) != CFStringGetTypeID()) {
229             LOG(Archives, "LegacyWebArchive - Resource response version is not of type CFString, cannot create invalid resource");
230             return 0;
231         }
232 
233         response = createResourceResponseFromPropertyListData(resourceResponseData, resourceResponseVersion);
234     }
235 
236     return ArchiveResource::create(SharedBuffer::create(CFDataGetBytePtr(resourceData), CFDataGetLength(resourceData)), KURL(url), mimeType, textEncoding, frameName, response);
237 }
238 
create()239 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create()
240 {
241     return adoptRef(new LegacyWebArchive);
242 }
243 
create(PassRefPtr<ArchiveResource> mainResource,Vector<PassRefPtr<ArchiveResource>> & subresources,Vector<PassRefPtr<LegacyWebArchive>> & subframeArchives)244 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(PassRefPtr<ArchiveResource> mainResource, Vector<PassRefPtr<ArchiveResource> >& subresources, Vector<PassRefPtr<LegacyWebArchive> >& subframeArchives)
245 {
246     ASSERT(mainResource);
247     if (!mainResource)
248         return 0;
249 
250     RefPtr<LegacyWebArchive> archive = create();
251     archive->setMainResource(mainResource);
252 
253     for (unsigned i = 0; i < subresources.size(); ++i)
254         archive->addSubresource(subresources[i]);
255 
256     for (unsigned i = 0; i < subframeArchives.size(); ++i)
257         archive->addSubframeArchive(subframeArchives[i]);
258 
259     return archive.release();
260 }
261 
create(SharedBuffer * data)262 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(SharedBuffer* data)
263 {
264     LOG(Archives, "LegacyWebArchive - Creating from raw data");
265 
266     RefPtr<LegacyWebArchive> archive = create();
267 
268     ASSERT(data);
269     if (!data)
270         return 0;
271 
272     RetainPtr<CFDataRef> cfData(AdoptCF, data->createCFData());
273     if (!cfData)
274         return 0;
275 
276     CFStringRef errorString = 0;
277 
278     RetainPtr<CFDictionaryRef> plist(AdoptCF, static_cast<CFDictionaryRef>(CFPropertyListCreateFromXMLData(0, cfData.get(), kCFPropertyListImmutable, &errorString)));
279     if (!plist) {
280 #ifndef NDEBUG
281         const char* cError = errorString ? CFStringGetCStringPtr(errorString, kCFStringEncodingUTF8) : "unknown error";
282         LOG(Archives, "LegacyWebArchive - Error parsing PropertyList from archive data - %s", cError);
283 #endif
284         if (errorString)
285             CFRelease(errorString);
286         return 0;
287     }
288 
289     if (CFGetTypeID(plist.get()) != CFDictionaryGetTypeID()) {
290         LOG(Archives, "LegacyWebArchive - Archive property list is not the expected CFDictionary, aborting invalid WebArchive");
291         return 0;
292     }
293 
294     if (!archive->extract(plist.get()))
295         return 0;
296 
297     return archive.release();
298 }
299 
extract(CFDictionaryRef dictionary)300 bool LegacyWebArchive::extract(CFDictionaryRef dictionary)
301 {
302     ASSERT(dictionary);
303     if (!dictionary) {
304         LOG(Archives, "LegacyWebArchive - Null root CFDictionary, aborting invalid WebArchive");
305         return false;
306     }
307 
308     CFDictionaryRef mainResourceDict = static_cast<CFDictionaryRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveMainResourceKey));
309     if (!mainResourceDict) {
310         LOG(Archives, "LegacyWebArchive - No main resource in archive, aborting invalid WebArchive");
311         return false;
312     }
313     if (CFGetTypeID(mainResourceDict) != CFDictionaryGetTypeID()) {
314         LOG(Archives, "LegacyWebArchive - Main resource is not the expected CFDictionary, aborting invalid WebArchive");
315         return false;
316     }
317 
318     setMainResource(createResource(mainResourceDict));
319     if (!mainResource()) {
320         LOG(Archives, "LegacyWebArchive - Failed to parse main resource from CFDictionary or main resource does not exist, aborting invalid WebArchive");
321         return false;
322     }
323 
324     CFArrayRef subresourceArray = static_cast<CFArrayRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveSubresourcesKey));
325     if (subresourceArray && CFGetTypeID(subresourceArray) != CFArrayGetTypeID()) {
326         LOG(Archives, "LegacyWebArchive - Subresources is not the expected Array, aborting invalid WebArchive");
327         return false;
328     }
329 
330     if (subresourceArray) {
331         CFIndex count = CFArrayGetCount(subresourceArray);
332         for (CFIndex i = 0; i < count; ++i) {
333             CFDictionaryRef subresourceDict = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(subresourceArray, i));
334             if (CFGetTypeID(subresourceDict) != CFDictionaryGetTypeID()) {
335                 LOG(Archives, "LegacyWebArchive - Subresource is not expected CFDictionary, aborting invalid WebArchive");
336                 return false;
337             }
338             addSubresource(createResource(subresourceDict));
339         }
340     }
341 
342     CFArrayRef subframeArray = static_cast<CFArrayRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveSubframeArchivesKey));
343     if (subframeArray && CFGetTypeID(subframeArray) != CFArrayGetTypeID()) {
344         LOG(Archives, "LegacyWebArchive - Subframe archives is not the expected Array, aborting invalid WebArchive");
345         return false;
346     }
347 
348     if (subframeArray) {
349         CFIndex count = CFArrayGetCount(subframeArray);
350         for (CFIndex i = 0; i < count; ++i) {
351             CFDictionaryRef subframeDict = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(subframeArray, i));
352             if (CFGetTypeID(subframeDict) != CFDictionaryGetTypeID()) {
353                 LOG(Archives, "LegacyWebArchive - Subframe array is not expected CFDictionary, aborting invalid WebArchive");
354                 return false;
355             }
356 
357             RefPtr<LegacyWebArchive> subframeArchive = create();
358             if (subframeArchive->extract(subframeDict))
359                 addSubframeArchive(subframeArchive.release());
360             else
361                 LOG(Archives, "LegacyWebArchive - Invalid subframe archive skipped");
362         }
363     }
364 
365     return true;
366 }
367 
rawDataRepresentation()368 RetainPtr<CFDataRef> LegacyWebArchive::rawDataRepresentation()
369 {
370     RetainPtr<CFDictionaryRef> propertyList = createPropertyListRepresentation(this);
371     ASSERT(propertyList);
372     if (!propertyList) {
373         LOG(Archives, "LegacyWebArchive - Failed to create property list for archive, returning no data");
374         return 0;
375     }
376 
377     RetainPtr<CFWriteStreamRef> stream(AdoptCF, CFWriteStreamCreateWithAllocatedBuffers(0, 0));
378 
379     CFWriteStreamOpen(stream.get());
380     CFPropertyListWriteToStream(propertyList.get(), stream.get(), kCFPropertyListBinaryFormat_v1_0, 0);
381 
382     RetainPtr<CFDataRef> plistData(AdoptCF, static_cast<CFDataRef>(CFWriteStreamCopyProperty(stream.get(), kCFStreamPropertyDataWritten)));
383     ASSERT(plistData);
384 
385     CFWriteStreamClose(stream.get());
386 
387     if (!plistData) {
388         LOG(Archives, "LegacyWebArchive - Failed to convert property list into raw data, returning no data");
389         return 0;
390     }
391 
392     return plistData;
393 }
394 
395 #if !PLATFORM(MAC)
396 
createResourceResponseFromMacArchivedData(CFDataRef responseData)397 ResourceResponse LegacyWebArchive::createResourceResponseFromMacArchivedData(CFDataRef responseData)
398 {
399     // FIXME: If is is possible to parse in a serialized NSURLResponse manually, without using
400     // NSKeyedUnarchiver, manipulating plists directly, then we want to do that here.
401     // Until then, this can be done on Mac only.
402     return ResourceResponse();
403 }
404 
createPropertyListRepresentation(const ResourceResponse & response)405 RetainPtr<CFDataRef> LegacyWebArchive::createPropertyListRepresentation(const ResourceResponse& response)
406 {
407     // FIXME: Write out the "new" format described in createResourceResponseFromPropertyListData once we invent it.
408     return 0;
409 }
410 
411 #endif
412 
create(Node * node)413 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Node* node)
414 {
415     ASSERT(node);
416     if (!node)
417         return create();
418 
419     Document* document = node->document();
420     Frame* frame = document ? document->frame() : 0;
421     if (!frame)
422         return create();
423 
424     Vector<Node*> nodeList;
425     String markupString = createMarkup(node, IncludeNode, &nodeList);
426     Node::NodeType nodeType = node->nodeType();
427     if (nodeType != Node::DOCUMENT_NODE && nodeType != Node::DOCUMENT_TYPE_NODE)
428         markupString = frame->documentTypeString() + markupString;
429 
430     return create(markupString, frame, nodeList);
431 }
432 
create(Frame * frame)433 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Frame* frame)
434 {
435     ASSERT(frame);
436 
437     DocumentLoader* documentLoader = frame->loader()->documentLoader();
438 
439     if (!documentLoader)
440         return 0;
441 
442     Vector<PassRefPtr<LegacyWebArchive> > subframeArchives;
443 
444     unsigned children = frame->tree()->childCount();
445     for (unsigned i = 0; i < children; ++i) {
446         RefPtr<LegacyWebArchive> childFrameArchive = create(frame->tree()->child(i));
447         if (childFrameArchive)
448             subframeArchives.append(childFrameArchive.release());
449     }
450 
451     Vector<PassRefPtr<ArchiveResource> > subresources;
452     documentLoader->getSubresources(subresources);
453 
454     return create(documentLoader->mainResource(), subresources, subframeArchives);
455 }
456 
create(Range * range)457 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Range* range)
458 {
459     if (!range)
460         return 0;
461 
462     Node* startContainer = range->startContainer();
463     if (!startContainer)
464         return 0;
465 
466     Document* document = startContainer->document();
467     if (!document)
468         return 0;
469 
470     Frame* frame = document->frame();
471     if (!frame)
472         return 0;
473 
474     Vector<Node*> nodeList;
475 
476     // FIXME: This is always "for interchange". Is that right? See the previous method.
477     String markupString = frame->documentTypeString() + createMarkup(range, &nodeList, AnnotateForInterchange);
478 
479     return create(markupString, frame, nodeList);
480 }
481 
create(const String & markupString,Frame * frame,const Vector<Node * > & nodes)482 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(const String& markupString, Frame* frame, const Vector<Node*>& nodes)
483 {
484     ASSERT(frame);
485 
486     const ResourceResponse& response = frame->loader()->documentLoader()->response();
487     KURL responseURL = response.url();
488 
489     // it's possible to have a response without a URL here
490     // <rdar://problem/5454935>
491     if (responseURL.isNull())
492         responseURL = KURL("");
493 
494     PassRefPtr<ArchiveResource> mainResource = ArchiveResource::create(utf8Buffer(markupString), responseURL, response.mimeType(), "UTF-8", frame->tree()->name());
495 
496     Vector<PassRefPtr<LegacyWebArchive> > subframeArchives;
497     Vector<PassRefPtr<ArchiveResource> > subresources;
498     HashSet<KURL> uniqueSubresources;
499 
500     size_t nodesSize = nodes.size();
501     for (size_t i = 0; i < nodesSize; ++i) {
502         Node* node = nodes[i];
503         Frame* childFrame;
504         if ((node->hasTagName(HTMLNames::frameTag) || node->hasTagName(HTMLNames::iframeTag) || node->hasTagName(HTMLNames::objectTag)) &&
505              (childFrame = static_cast<HTMLFrameOwnerElement*>(node)->contentFrame())) {
506             RefPtr<LegacyWebArchive> subframeArchive = create(childFrame->document());
507 
508             if (subframeArchive)
509                 subframeArchives.append(subframeArchive);
510             else
511                 LOG_ERROR("Unabled to archive subframe %s", childFrame->tree()->name().string().utf8().data());
512         } else {
513             ListHashSet<KURL> subresourceURLs;
514             node->getSubresourceURLs(subresourceURLs);
515 
516             DocumentLoader* documentLoader = frame->loader()->documentLoader();
517             ListHashSet<KURL>::iterator iterEnd = subresourceURLs.end();
518             for (ListHashSet<KURL>::iterator iter = subresourceURLs.begin(); iter != iterEnd; ++iter) {
519                 const KURL& subresourceURL = *iter;
520                 if (uniqueSubresources.contains(subresourceURL))
521                     continue;
522 
523                 uniqueSubresources.add(subresourceURL);
524 
525                 RefPtr<ArchiveResource> resource = documentLoader->subresource(subresourceURL);
526                 if (resource) {
527                     subresources.append(resource.release());
528                     continue;
529                 }
530 
531                 CachedResource *cachedResource = cache()->resourceForURL(subresourceURL);
532                 if (cachedResource) {
533                     resource = ArchiveResource::create(cachedResource->data(), subresourceURL, cachedResource->response());
534                     if (resource) {
535                         subresources.append(resource.release());
536                         continue;
537                     }
538                 }
539 
540                 // FIXME: should do something better than spew to console here
541                 LOG_ERROR("Failed to archive subresource for %s", subresourceURL.string().utf8().data());
542             }
543         }
544     }
545 
546     // Add favicon if one exists for this page, if we are archiving the entire page.
547     if (nodesSize && nodes[0]->isDocumentNode() && iconDatabase() && iconDatabase()->isEnabled()) {
548         const String& iconURL = iconDatabase()->iconURLForPageURL(responseURL);
549         if (!iconURL.isEmpty() && iconDatabase()->iconDataKnownForIconURL(iconURL)) {
550             if (Image* iconImage = iconDatabase()->iconForPageURL(responseURL, IntSize(16, 16))) {
551                 if (RefPtr<ArchiveResource> resource = ArchiveResource::create(iconImage->data(), KURL(iconURL), "image/x-icon", "", ""))
552                     subresources.append(resource.release());
553             }
554         }
555     }
556 
557     return create(mainResource, subresources, subframeArchives);
558 }
559 
createFromSelection(Frame * frame)560 PassRefPtr<LegacyWebArchive> LegacyWebArchive::createFromSelection(Frame* frame)
561 {
562     if (!frame)
563         return 0;
564 
565     RefPtr<Range> selectionRange = frame->selection()->toNormalizedRange();
566     Vector<Node*> nodeList;
567     String markupString = frame->documentTypeString() + createMarkup(selectionRange.get(), &nodeList, AnnotateForInterchange);
568 
569     RefPtr<LegacyWebArchive> archive = create(markupString, frame, nodeList);
570 
571     if (!frame->document() || !frame->document()->isFrameSet())
572         return archive.release();
573 
574     // Wrap the frameset document in an iframe so it can be pasted into
575     // another document (which will have a body or frameset of its own).
576     String iframeMarkup = String::format("<iframe frameborder=\"no\" marginwidth=\"0\" marginheight=\"0\" width=\"98%%\" height=\"98%%\" src=\"%s\"></iframe>",
577                                          frame->loader()->documentLoader()->response().url().string().utf8().data());
578     RefPtr<ArchiveResource> iframeResource = ArchiveResource::create(utf8Buffer(iframeMarkup), blankURL(), "text/html", "UTF-8", String());
579 
580     Vector<PassRefPtr<ArchiveResource> > subresources;
581 
582     Vector<PassRefPtr<LegacyWebArchive> > subframeArchives;
583     subframeArchives.append(archive);
584 
585     archive = create(iframeResource.release(), subresources, subframeArchives);
586 
587     return archive.release();
588 }
589 
590 }
591