1 /*
2 * Copyright (C) 2008, 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14 * its contributors may be used to endorse or promote products derived
15 * from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "config.h"
30 #include "LegacyWebArchive.h"
31
32 #include "CString.h"
33 #include "Cache.h"
34 #include "Document.h"
35 #include "DocumentLoader.h"
36 #include "Frame.h"
37 #include "FrameLoader.h"
38 #include "FrameTree.h"
39 #include "HTMLFrameOwnerElement.h"
40 #include "HTMLNames.h"
41 #include "IconDatabase.h"
42 #include "Image.h"
43 #include "KURLHash.h"
44 #include "Logging.h"
45 #include "markup.h"
46 #include "Node.h"
47 #include "Range.h"
48 #include "SelectionController.h"
49 #include "SharedBuffer.h"
50 #include <wtf/ListHashSet.h>
51 #include <wtf/RetainPtr.h>
52
53 namespace WebCore {
54
55 static const CFStringRef LegacyWebArchiveMainResourceKey = CFSTR("WebMainResource");
56 static const CFStringRef LegacyWebArchiveSubresourcesKey = CFSTR("WebSubresources");
57 static const CFStringRef LegacyWebArchiveSubframeArchivesKey = CFSTR("WebSubframeArchives");
58 static const CFStringRef LegacyWebArchiveResourceDataKey = CFSTR("WebResourceData");
59 static const CFStringRef LegacyWebArchiveResourceFrameNameKey = CFSTR("WebResourceFrameName");
60 static const CFStringRef LegacyWebArchiveResourceMIMETypeKey = CFSTR("WebResourceMIMEType");
61 static const CFStringRef LegacyWebArchiveResourceURLKey = CFSTR("WebResourceURL");
62 static const CFStringRef LegacyWebArchiveResourceTextEncodingNameKey = CFSTR("WebResourceTextEncodingName");
63 static const CFStringRef LegacyWebArchiveResourceResponseKey = CFSTR("WebResourceResponse");
64 static const CFStringRef LegacyWebArchiveResourceResponseVersionKey = CFSTR("WebResourceResponseVersion");
65
createPropertyListRepresentation(ArchiveResource * resource,MainResourceStatus isMainResource)66 RetainPtr<CFDictionaryRef> LegacyWebArchive::createPropertyListRepresentation(ArchiveResource* resource, MainResourceStatus isMainResource)
67 {
68 if (!resource) {
69 // The property list representation of a null/empty WebResource has the following 3 objects stored as nil.
70 // FIXME: 0 is not serializable. Presumably we need to use kCFNull here instead for compatibility.
71 // FIXME: But why do we need to support a resource of 0? Who relies on that?
72 RetainPtr<CFMutableDictionaryRef> propertyList(AdoptCF, CFDictionaryCreateMutable(0, 3, 0, 0));
73 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceDataKey, 0);
74 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceURLKey, 0);
75 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceMIMETypeKey, 0);
76 return propertyList;
77 }
78
79 RetainPtr<CFMutableDictionaryRef> propertyList(AdoptCF, CFDictionaryCreateMutable(0, 6, 0, &kCFTypeDictionaryValueCallBacks));
80
81 // Resource data can be empty, but must be represented by an empty CFDataRef
82 SharedBuffer* data = resource->data();
83 RetainPtr<CFDataRef> cfData;
84 if (data)
85 cfData.adoptCF(data->createCFData());
86 else
87 cfData.adoptCF(CFDataCreate(0, 0, 0));
88 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceDataKey, cfData.get());
89
90 // Resource URL cannot be null
91 RetainPtr<CFStringRef> cfURL(AdoptCF, resource->url().string().createCFString());
92 if (cfURL)
93 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceURLKey, cfURL.get());
94 else {
95 LOG(Archives, "LegacyWebArchive - NULL resource URL is invalid - returning null property list");
96 return 0;
97 }
98
99 // FrameName should be left out if empty for subresources, but always included for main resources
100 const String& frameName(resource->frameName());
101 if (!frameName.isEmpty() || isMainResource) {
102 RetainPtr<CFStringRef> cfFrameName(AdoptCF, frameName.createCFString());
103 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceFrameNameKey, cfFrameName.get());
104 }
105
106 // Set MIMEType, TextEncodingName, and ResourceResponse only if they actually exist
107 const String& mimeType(resource->mimeType());
108 if (!mimeType.isEmpty()) {
109 RetainPtr<CFStringRef> cfMIMEType(AdoptCF, mimeType.createCFString());
110 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceMIMETypeKey, cfMIMEType.get());
111 }
112
113 const String& textEncoding(resource->textEncoding());
114 if (!textEncoding.isEmpty()) {
115 RetainPtr<CFStringRef> cfTextEncoding(AdoptCF, textEncoding.createCFString());
116 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceTextEncodingNameKey, cfTextEncoding.get());
117 }
118
119 // Don't include the resource response for the main resource
120 if (!isMainResource) {
121 RetainPtr<CFDataRef> resourceResponseData = createPropertyListRepresentation(resource->response());
122 if (resourceResponseData)
123 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceResponseKey, resourceResponseData.get());
124 }
125
126 return propertyList;
127 }
128
createPropertyListRepresentation(Archive * archive)129 RetainPtr<CFDictionaryRef> LegacyWebArchive::createPropertyListRepresentation(Archive* archive)
130 {
131 RetainPtr<CFMutableDictionaryRef> propertyList(AdoptCF, CFDictionaryCreateMutable(0, 3, 0, &kCFTypeDictionaryValueCallBacks));
132
133 RetainPtr<CFDictionaryRef> mainResourceDict = createPropertyListRepresentation(archive->mainResource(), MainResource);
134 ASSERT(mainResourceDict);
135 if (!mainResourceDict)
136 return 0;
137 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveMainResourceKey, mainResourceDict.get());
138
139 RetainPtr<CFMutableArrayRef> subresourcesArray(AdoptCF, CFArrayCreateMutable(0, archive->subresources().size(), &kCFTypeArrayCallBacks));
140 const Vector<RefPtr<ArchiveResource> >& subresources(archive->subresources());
141 for (unsigned i = 0; i < subresources.size(); ++i) {
142 RetainPtr<CFDictionaryRef> subresource = createPropertyListRepresentation(subresources[i].get(), Subresource);
143 if (subresource)
144 CFArrayAppendValue(subresourcesArray.get(), subresource.get());
145 else
146 LOG(Archives, "LegacyWebArchive - Failed to create property list for subresource");
147 }
148 if (CFArrayGetCount(subresourcesArray.get()))
149 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveSubresourcesKey, subresourcesArray.get());
150
151 RetainPtr<CFMutableArrayRef> subframesArray(AdoptCF, CFArrayCreateMutable(0, archive->subframeArchives().size(), &kCFTypeArrayCallBacks));
152 const Vector<RefPtr<Archive> >& subframeArchives(archive->subframeArchives());
153 for (unsigned i = 0; i < subframeArchives.size(); ++i) {
154 RetainPtr<CFDictionaryRef> subframeArchive = createPropertyListRepresentation(subframeArchives[i].get());
155 if (subframeArchive)
156 CFArrayAppendValue(subframesArray.get(), subframeArchive.get());
157 else
158 LOG(Archives, "LegacyWebArchive - Failed to create property list for subframe archive");
159 }
160 if (CFArrayGetCount(subframesArray.get()))
161 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveSubframeArchivesKey, subframesArray.get());
162
163 return propertyList;
164 }
165
createResourceResponseFromPropertyListData(CFDataRef data,CFStringRef responseDataType)166 ResourceResponse LegacyWebArchive::createResourceResponseFromPropertyListData(CFDataRef data, CFStringRef responseDataType)
167 {
168 ASSERT(data);
169 if (!data)
170 return ResourceResponse();
171
172 // If the ResourceResponseVersion (passed in as responseDataType) exists at all, this is a "new" web archive that we
173 // can parse well in a cross platform manner If it doesn't exist, we will assume this is an "old" web archive with,
174 // NSURLResponse objects in it and parse the ResourceResponse as such.
175 if (!responseDataType)
176 return createResourceResponseFromMacArchivedData(data);
177
178 // FIXME: Parse the "new" format that the above comment references here. This format doesn't exist yet.
179 return ResourceResponse();
180 }
181
createResource(CFDictionaryRef dictionary)182 PassRefPtr<ArchiveResource> LegacyWebArchive::createResource(CFDictionaryRef dictionary)
183 {
184 ASSERT(dictionary);
185 if (!dictionary)
186 return 0;
187
188 CFDataRef resourceData = static_cast<CFDataRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceDataKey));
189 if (resourceData && CFGetTypeID(resourceData) != CFDataGetTypeID()) {
190 LOG(Archives, "LegacyWebArchive - Resource data is not of type CFData, cannot create invalid resource");
191 return 0;
192 }
193
194 CFStringRef frameName = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceFrameNameKey));
195 if (frameName && CFGetTypeID(frameName) != CFStringGetTypeID()) {
196 LOG(Archives, "LegacyWebArchive - Frame name is not of type CFString, cannot create invalid resource");
197 return 0;
198 }
199
200 CFStringRef mimeType = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceMIMETypeKey));
201 if (mimeType && CFGetTypeID(mimeType) != CFStringGetTypeID()) {
202 LOG(Archives, "LegacyWebArchive - MIME type is not of type CFString, cannot create invalid resource");
203 return 0;
204 }
205
206 CFStringRef url = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceURLKey));
207 if (url && CFGetTypeID(url) != CFStringGetTypeID()) {
208 LOG(Archives, "LegacyWebArchive - URL is not of type CFString, cannot create invalid resource");
209 return 0;
210 }
211
212 CFStringRef textEncoding = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceTextEncodingNameKey));
213 if (textEncoding && CFGetTypeID(textEncoding) != CFStringGetTypeID()) {
214 LOG(Archives, "LegacyWebArchive - Text encoding is not of type CFString, cannot create invalid resource");
215 return 0;
216 }
217
218 ResourceResponse response;
219
220 CFDataRef resourceResponseData = static_cast<CFDataRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceResponseKey));
221 if (resourceResponseData) {
222 if (CFGetTypeID(resourceResponseData) != CFDataGetTypeID()) {
223 LOG(Archives, "LegacyWebArchive - Resource response data is not of type CFData, cannot create invalid resource");
224 return 0;
225 }
226
227 CFStringRef resourceResponseVersion = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceResponseVersionKey));
228 if (resourceResponseVersion && CFGetTypeID(resourceResponseVersion) != CFStringGetTypeID()) {
229 LOG(Archives, "LegacyWebArchive - Resource response version is not of type CFString, cannot create invalid resource");
230 return 0;
231 }
232
233 response = createResourceResponseFromPropertyListData(resourceResponseData, resourceResponseVersion);
234 }
235
236 return ArchiveResource::create(SharedBuffer::create(CFDataGetBytePtr(resourceData), CFDataGetLength(resourceData)), KURL(url), mimeType, textEncoding, frameName, response);
237 }
238
create()239 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create()
240 {
241 return adoptRef(new LegacyWebArchive);
242 }
243
create(PassRefPtr<ArchiveResource> mainResource,Vector<PassRefPtr<ArchiveResource>> & subresources,Vector<PassRefPtr<LegacyWebArchive>> & subframeArchives)244 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(PassRefPtr<ArchiveResource> mainResource, Vector<PassRefPtr<ArchiveResource> >& subresources, Vector<PassRefPtr<LegacyWebArchive> >& subframeArchives)
245 {
246 ASSERT(mainResource);
247 if (!mainResource)
248 return 0;
249
250 RefPtr<LegacyWebArchive> archive = create();
251 archive->setMainResource(mainResource);
252
253 for (unsigned i = 0; i < subresources.size(); ++i)
254 archive->addSubresource(subresources[i]);
255
256 for (unsigned i = 0; i < subframeArchives.size(); ++i)
257 archive->addSubframeArchive(subframeArchives[i]);
258
259 return archive.release();
260 }
261
create(SharedBuffer * data)262 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(SharedBuffer* data)
263 {
264 LOG(Archives, "LegacyWebArchive - Creating from raw data");
265
266 RefPtr<LegacyWebArchive> archive = create();
267
268 ASSERT(data);
269 if (!data)
270 return 0;
271
272 RetainPtr<CFDataRef> cfData(AdoptCF, data->createCFData());
273 if (!cfData)
274 return 0;
275
276 CFStringRef errorString = 0;
277
278 RetainPtr<CFDictionaryRef> plist(AdoptCF, static_cast<CFDictionaryRef>(CFPropertyListCreateFromXMLData(0, cfData.get(), kCFPropertyListImmutable, &errorString)));
279 if (!plist) {
280 #ifndef NDEBUG
281 const char* cError = errorString ? CFStringGetCStringPtr(errorString, kCFStringEncodingUTF8) : "unknown error";
282 LOG(Archives, "LegacyWebArchive - Error parsing PropertyList from archive data - %s", cError);
283 #endif
284 if (errorString)
285 CFRelease(errorString);
286 return 0;
287 }
288
289 if (CFGetTypeID(plist.get()) != CFDictionaryGetTypeID()) {
290 LOG(Archives, "LegacyWebArchive - Archive property list is not the expected CFDictionary, aborting invalid WebArchive");
291 return 0;
292 }
293
294 if (!archive->extract(plist.get()))
295 return 0;
296
297 return archive.release();
298 }
299
extract(CFDictionaryRef dictionary)300 bool LegacyWebArchive::extract(CFDictionaryRef dictionary)
301 {
302 ASSERT(dictionary);
303 if (!dictionary) {
304 LOG(Archives, "LegacyWebArchive - Null root CFDictionary, aborting invalid WebArchive");
305 return false;
306 }
307
308 CFDictionaryRef mainResourceDict = static_cast<CFDictionaryRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveMainResourceKey));
309 if (!mainResourceDict) {
310 LOG(Archives, "LegacyWebArchive - No main resource in archive, aborting invalid WebArchive");
311 return false;
312 }
313 if (CFGetTypeID(mainResourceDict) != CFDictionaryGetTypeID()) {
314 LOG(Archives, "LegacyWebArchive - Main resource is not the expected CFDictionary, aborting invalid WebArchive");
315 return false;
316 }
317
318 setMainResource(createResource(mainResourceDict));
319 if (!mainResource()) {
320 LOG(Archives, "LegacyWebArchive - Failed to parse main resource from CFDictionary or main resource does not exist, aborting invalid WebArchive");
321 return false;
322 }
323
324 CFArrayRef subresourceArray = static_cast<CFArrayRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveSubresourcesKey));
325 if (subresourceArray && CFGetTypeID(subresourceArray) != CFArrayGetTypeID()) {
326 LOG(Archives, "LegacyWebArchive - Subresources is not the expected Array, aborting invalid WebArchive");
327 return false;
328 }
329
330 if (subresourceArray) {
331 CFIndex count = CFArrayGetCount(subresourceArray);
332 for (CFIndex i = 0; i < count; ++i) {
333 CFDictionaryRef subresourceDict = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(subresourceArray, i));
334 if (CFGetTypeID(subresourceDict) != CFDictionaryGetTypeID()) {
335 LOG(Archives, "LegacyWebArchive - Subresource is not expected CFDictionary, aborting invalid WebArchive");
336 return false;
337 }
338 addSubresource(createResource(subresourceDict));
339 }
340 }
341
342 CFArrayRef subframeArray = static_cast<CFArrayRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveSubframeArchivesKey));
343 if (subframeArray && CFGetTypeID(subframeArray) != CFArrayGetTypeID()) {
344 LOG(Archives, "LegacyWebArchive - Subframe archives is not the expected Array, aborting invalid WebArchive");
345 return false;
346 }
347
348 if (subframeArray) {
349 CFIndex count = CFArrayGetCount(subframeArray);
350 for (CFIndex i = 0; i < count; ++i) {
351 CFDictionaryRef subframeDict = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(subframeArray, i));
352 if (CFGetTypeID(subframeDict) != CFDictionaryGetTypeID()) {
353 LOG(Archives, "LegacyWebArchive - Subframe array is not expected CFDictionary, aborting invalid WebArchive");
354 return false;
355 }
356
357 RefPtr<LegacyWebArchive> subframeArchive = create();
358 if (subframeArchive->extract(subframeDict))
359 addSubframeArchive(subframeArchive.release());
360 else
361 LOG(Archives, "LegacyWebArchive - Invalid subframe archive skipped");
362 }
363 }
364
365 return true;
366 }
367
rawDataRepresentation()368 RetainPtr<CFDataRef> LegacyWebArchive::rawDataRepresentation()
369 {
370 RetainPtr<CFDictionaryRef> propertyList = createPropertyListRepresentation(this);
371 ASSERT(propertyList);
372 if (!propertyList) {
373 LOG(Archives, "LegacyWebArchive - Failed to create property list for archive, returning no data");
374 return 0;
375 }
376
377 RetainPtr<CFWriteStreamRef> stream(AdoptCF, CFWriteStreamCreateWithAllocatedBuffers(0, 0));
378
379 CFWriteStreamOpen(stream.get());
380 CFPropertyListWriteToStream(propertyList.get(), stream.get(), kCFPropertyListBinaryFormat_v1_0, 0);
381
382 RetainPtr<CFDataRef> plistData(AdoptCF, static_cast<CFDataRef>(CFWriteStreamCopyProperty(stream.get(), kCFStreamPropertyDataWritten)));
383 ASSERT(plistData);
384
385 CFWriteStreamClose(stream.get());
386
387 if (!plistData) {
388 LOG(Archives, "LegacyWebArchive - Failed to convert property list into raw data, returning no data");
389 return 0;
390 }
391
392 return plistData;
393 }
394
395 #if !PLATFORM(MAC)
396
createResourceResponseFromMacArchivedData(CFDataRef responseData)397 ResourceResponse LegacyWebArchive::createResourceResponseFromMacArchivedData(CFDataRef responseData)
398 {
399 // FIXME: If is is possible to parse in a serialized NSURLResponse manually, without using
400 // NSKeyedUnarchiver, manipulating plists directly, then we want to do that here.
401 // Until then, this can be done on Mac only.
402 return ResourceResponse();
403 }
404
createPropertyListRepresentation(const ResourceResponse & response)405 RetainPtr<CFDataRef> LegacyWebArchive::createPropertyListRepresentation(const ResourceResponse& response)
406 {
407 // FIXME: Write out the "new" format described in createResourceResponseFromPropertyListData once we invent it.
408 return 0;
409 }
410
411 #endif
412
create(Node * node)413 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Node* node)
414 {
415 ASSERT(node);
416 if (!node)
417 return create();
418
419 Document* document = node->document();
420 Frame* frame = document ? document->frame() : 0;
421 if (!frame)
422 return create();
423
424 Vector<Node*> nodeList;
425 String markupString = createMarkup(node, IncludeNode, &nodeList);
426 Node::NodeType nodeType = node->nodeType();
427 if (nodeType != Node::DOCUMENT_NODE && nodeType != Node::DOCUMENT_TYPE_NODE)
428 markupString = frame->documentTypeString() + markupString;
429
430 return create(markupString, frame, nodeList);
431 }
432
create(Frame * frame)433 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Frame* frame)
434 {
435 ASSERT(frame);
436
437 DocumentLoader* documentLoader = frame->loader()->documentLoader();
438
439 if (!documentLoader)
440 return 0;
441
442 Vector<PassRefPtr<LegacyWebArchive> > subframeArchives;
443
444 unsigned children = frame->tree()->childCount();
445 for (unsigned i = 0; i < children; ++i) {
446 RefPtr<LegacyWebArchive> childFrameArchive = create(frame->tree()->child(i));
447 if (childFrameArchive)
448 subframeArchives.append(childFrameArchive.release());
449 }
450
451 Vector<PassRefPtr<ArchiveResource> > subresources;
452 documentLoader->getSubresources(subresources);
453
454 return create(documentLoader->mainResource(), subresources, subframeArchives);
455 }
456
create(Range * range)457 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Range* range)
458 {
459 if (!range)
460 return 0;
461
462 Node* startContainer = range->startContainer();
463 if (!startContainer)
464 return 0;
465
466 Document* document = startContainer->document();
467 if (!document)
468 return 0;
469
470 Frame* frame = document->frame();
471 if (!frame)
472 return 0;
473
474 Vector<Node*> nodeList;
475
476 // FIXME: This is always "for interchange". Is that right? See the previous method.
477 String markupString = frame->documentTypeString() + createMarkup(range, &nodeList, AnnotateForInterchange);
478
479 return create(markupString, frame, nodeList);
480 }
481
create(const String & markupString,Frame * frame,const Vector<Node * > & nodes)482 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(const String& markupString, Frame* frame, const Vector<Node*>& nodes)
483 {
484 ASSERT(frame);
485
486 const ResourceResponse& response = frame->loader()->documentLoader()->response();
487 KURL responseURL = response.url();
488
489 // it's possible to have a response without a URL here
490 // <rdar://problem/5454935>
491 if (responseURL.isNull())
492 responseURL = KURL("");
493
494 PassRefPtr<ArchiveResource> mainResource = ArchiveResource::create(utf8Buffer(markupString), responseURL, response.mimeType(), "UTF-8", frame->tree()->name());
495
496 Vector<PassRefPtr<LegacyWebArchive> > subframeArchives;
497 Vector<PassRefPtr<ArchiveResource> > subresources;
498 HashSet<KURL> uniqueSubresources;
499
500 size_t nodesSize = nodes.size();
501 for (size_t i = 0; i < nodesSize; ++i) {
502 Node* node = nodes[i];
503 Frame* childFrame;
504 if ((node->hasTagName(HTMLNames::frameTag) || node->hasTagName(HTMLNames::iframeTag) || node->hasTagName(HTMLNames::objectTag)) &&
505 (childFrame = static_cast<HTMLFrameOwnerElement*>(node)->contentFrame())) {
506 RefPtr<LegacyWebArchive> subframeArchive = create(childFrame->document());
507
508 if (subframeArchive)
509 subframeArchives.append(subframeArchive);
510 else
511 LOG_ERROR("Unabled to archive subframe %s", childFrame->tree()->name().string().utf8().data());
512 } else {
513 ListHashSet<KURL> subresourceURLs;
514 node->getSubresourceURLs(subresourceURLs);
515
516 DocumentLoader* documentLoader = frame->loader()->documentLoader();
517 ListHashSet<KURL>::iterator iterEnd = subresourceURLs.end();
518 for (ListHashSet<KURL>::iterator iter = subresourceURLs.begin(); iter != iterEnd; ++iter) {
519 const KURL& subresourceURL = *iter;
520 if (uniqueSubresources.contains(subresourceURL))
521 continue;
522
523 uniqueSubresources.add(subresourceURL);
524
525 RefPtr<ArchiveResource> resource = documentLoader->subresource(subresourceURL);
526 if (resource) {
527 subresources.append(resource.release());
528 continue;
529 }
530
531 CachedResource *cachedResource = cache()->resourceForURL(subresourceURL);
532 if (cachedResource) {
533 resource = ArchiveResource::create(cachedResource->data(), subresourceURL, cachedResource->response());
534 if (resource) {
535 subresources.append(resource.release());
536 continue;
537 }
538 }
539
540 // FIXME: should do something better than spew to console here
541 LOG_ERROR("Failed to archive subresource for %s", subresourceURL.string().utf8().data());
542 }
543 }
544 }
545
546 // Add favicon if one exists for this page, if we are archiving the entire page.
547 if (nodesSize && nodes[0]->isDocumentNode() && iconDatabase() && iconDatabase()->isEnabled()) {
548 const String& iconURL = iconDatabase()->iconURLForPageURL(responseURL);
549 if (!iconURL.isEmpty() && iconDatabase()->iconDataKnownForIconURL(iconURL)) {
550 if (Image* iconImage = iconDatabase()->iconForPageURL(responseURL, IntSize(16, 16))) {
551 if (RefPtr<ArchiveResource> resource = ArchiveResource::create(iconImage->data(), KURL(iconURL), "image/x-icon", "", ""))
552 subresources.append(resource.release());
553 }
554 }
555 }
556
557 return create(mainResource, subresources, subframeArchives);
558 }
559
createFromSelection(Frame * frame)560 PassRefPtr<LegacyWebArchive> LegacyWebArchive::createFromSelection(Frame* frame)
561 {
562 if (!frame)
563 return 0;
564
565 RefPtr<Range> selectionRange = frame->selection()->toNormalizedRange();
566 Vector<Node*> nodeList;
567 String markupString = frame->documentTypeString() + createMarkup(selectionRange.get(), &nodeList, AnnotateForInterchange);
568
569 RefPtr<LegacyWebArchive> archive = create(markupString, frame, nodeList);
570
571 if (!frame->document() || !frame->document()->isFrameSet())
572 return archive.release();
573
574 // Wrap the frameset document in an iframe so it can be pasted into
575 // another document (which will have a body or frameset of its own).
576 String iframeMarkup = String::format("<iframe frameborder=\"no\" marginwidth=\"0\" marginheight=\"0\" width=\"98%%\" height=\"98%%\" src=\"%s\"></iframe>",
577 frame->loader()->documentLoader()->response().url().string().utf8().data());
578 RefPtr<ArchiveResource> iframeResource = ArchiveResource::create(utf8Buffer(iframeMarkup), blankURL(), "text/html", "UTF-8", String());
579
580 Vector<PassRefPtr<ArchiveResource> > subresources;
581
582 Vector<PassRefPtr<LegacyWebArchive> > subframeArchives;
583 subframeArchives.append(archive);
584
585 archive = create(iframeResource.release(), subresources, subframeArchives);
586
587 return archive.release();
588 }
589
590 }
591