1 /*
2 * Copyright (C) 2008, 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14 * its contributors may be used to endorse or promote products derived
15 * from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "config.h"
30 #include "LegacyWebArchive.h"
31
32 #include "MemoryCache.h"
33 #include "Document.h"
34 #include "DocumentLoader.h"
35 #include "Frame.h"
36 #include "FrameLoader.h"
37 #include "FrameTree.h"
38 #include "HTMLFrameOwnerElement.h"
39 #include "HTMLNames.h"
40 #include "IconDatabase.h"
41 #include "Image.h"
42 #include "KURLHash.h"
43 #include "Logging.h"
44 #include "markup.h"
45 #include "Node.h"
46 #include "Range.h"
47 #include "SelectionController.h"
48 #include "SharedBuffer.h"
49 #include <wtf/text/CString.h>
50 #include <wtf/text/StringConcatenate.h>
51 #include <wtf/ListHashSet.h>
52 #include <wtf/RetainPtr.h>
53
54 namespace WebCore {
55
56 static const CFStringRef LegacyWebArchiveMainResourceKey = CFSTR("WebMainResource");
57 static const CFStringRef LegacyWebArchiveSubresourcesKey = CFSTR("WebSubresources");
58 static const CFStringRef LegacyWebArchiveSubframeArchivesKey = CFSTR("WebSubframeArchives");
59 static const CFStringRef LegacyWebArchiveResourceDataKey = CFSTR("WebResourceData");
60 static const CFStringRef LegacyWebArchiveResourceFrameNameKey = CFSTR("WebResourceFrameName");
61 static const CFStringRef LegacyWebArchiveResourceMIMETypeKey = CFSTR("WebResourceMIMEType");
62 static const CFStringRef LegacyWebArchiveResourceURLKey = CFSTR("WebResourceURL");
63 static const CFStringRef LegacyWebArchiveResourceTextEncodingNameKey = CFSTR("WebResourceTextEncodingName");
64 static const CFStringRef LegacyWebArchiveResourceResponseKey = CFSTR("WebResourceResponse");
65 static const CFStringRef LegacyWebArchiveResourceResponseVersionKey = CFSTR("WebResourceResponseVersion");
66
createPropertyListRepresentation(ArchiveResource * resource,MainResourceStatus isMainResource)67 RetainPtr<CFDictionaryRef> LegacyWebArchive::createPropertyListRepresentation(ArchiveResource* resource, MainResourceStatus isMainResource)
68 {
69 if (!resource) {
70 // The property list representation of a null/empty WebResource has the following 3 objects stored as nil.
71 // FIXME: 0 is not serializable. Presumably we need to use kCFNull here instead for compatibility.
72 // FIXME: But why do we need to support a resource of 0? Who relies on that?
73 RetainPtr<CFMutableDictionaryRef> propertyList(AdoptCF, CFDictionaryCreateMutable(0, 3, 0, 0));
74 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceDataKey, 0);
75 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceURLKey, 0);
76 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceMIMETypeKey, 0);
77 return propertyList;
78 }
79
80 RetainPtr<CFMutableDictionaryRef> propertyList(AdoptCF, CFDictionaryCreateMutable(0, 6, 0, &kCFTypeDictionaryValueCallBacks));
81
82 // Resource data can be empty, but must be represented by an empty CFDataRef
83 SharedBuffer* data = resource->data();
84 RetainPtr<CFDataRef> cfData;
85 if (data)
86 cfData.adoptCF(data->createCFData());
87 else
88 cfData.adoptCF(CFDataCreate(0, 0, 0));
89 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceDataKey, cfData.get());
90
91 // Resource URL cannot be null
92 RetainPtr<CFStringRef> cfURL(AdoptCF, resource->url().string().createCFString());
93 if (cfURL)
94 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceURLKey, cfURL.get());
95 else {
96 LOG(Archives, "LegacyWebArchive - NULL resource URL is invalid - returning null property list");
97 return 0;
98 }
99
100 // FrameName should be left out if empty for subresources, but always included for main resources
101 const String& frameName(resource->frameName());
102 if (!frameName.isEmpty() || isMainResource) {
103 RetainPtr<CFStringRef> cfFrameName(AdoptCF, frameName.createCFString());
104 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceFrameNameKey, cfFrameName.get());
105 }
106
107 // Set MIMEType, TextEncodingName, and ResourceResponse only if they actually exist
108 const String& mimeType(resource->mimeType());
109 if (!mimeType.isEmpty()) {
110 RetainPtr<CFStringRef> cfMIMEType(AdoptCF, mimeType.createCFString());
111 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceMIMETypeKey, cfMIMEType.get());
112 }
113
114 const String& textEncoding(resource->textEncoding());
115 if (!textEncoding.isEmpty()) {
116 RetainPtr<CFStringRef> cfTextEncoding(AdoptCF, textEncoding.createCFString());
117 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceTextEncodingNameKey, cfTextEncoding.get());
118 }
119
120 // Don't include the resource response for the main resource
121 if (!isMainResource) {
122 RetainPtr<CFDataRef> resourceResponseData = createPropertyListRepresentation(resource->response());
123 if (resourceResponseData)
124 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceResponseKey, resourceResponseData.get());
125 }
126
127 return propertyList;
128 }
129
createPropertyListRepresentation(Archive * archive)130 RetainPtr<CFDictionaryRef> LegacyWebArchive::createPropertyListRepresentation(Archive* archive)
131 {
132 RetainPtr<CFMutableDictionaryRef> propertyList(AdoptCF, CFDictionaryCreateMutable(0, 3, 0, &kCFTypeDictionaryValueCallBacks));
133
134 RetainPtr<CFDictionaryRef> mainResourceDict = createPropertyListRepresentation(archive->mainResource(), MainResource);
135 ASSERT(mainResourceDict);
136 if (!mainResourceDict)
137 return 0;
138 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveMainResourceKey, mainResourceDict.get());
139
140 RetainPtr<CFMutableArrayRef> subresourcesArray(AdoptCF, CFArrayCreateMutable(0, archive->subresources().size(), &kCFTypeArrayCallBacks));
141 const Vector<RefPtr<ArchiveResource> >& subresources(archive->subresources());
142 for (unsigned i = 0; i < subresources.size(); ++i) {
143 RetainPtr<CFDictionaryRef> subresource = createPropertyListRepresentation(subresources[i].get(), Subresource);
144 if (subresource)
145 CFArrayAppendValue(subresourcesArray.get(), subresource.get());
146 else
147 LOG(Archives, "LegacyWebArchive - Failed to create property list for subresource");
148 }
149 if (CFArrayGetCount(subresourcesArray.get()))
150 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveSubresourcesKey, subresourcesArray.get());
151
152 RetainPtr<CFMutableArrayRef> subframesArray(AdoptCF, CFArrayCreateMutable(0, archive->subframeArchives().size(), &kCFTypeArrayCallBacks));
153 const Vector<RefPtr<Archive> >& subframeArchives(archive->subframeArchives());
154 for (unsigned i = 0; i < subframeArchives.size(); ++i) {
155 RetainPtr<CFDictionaryRef> subframeArchive = createPropertyListRepresentation(subframeArchives[i].get());
156 if (subframeArchive)
157 CFArrayAppendValue(subframesArray.get(), subframeArchive.get());
158 else
159 LOG(Archives, "LegacyWebArchive - Failed to create property list for subframe archive");
160 }
161 if (CFArrayGetCount(subframesArray.get()))
162 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveSubframeArchivesKey, subframesArray.get());
163
164 return propertyList;
165 }
166
createResourceResponseFromPropertyListData(CFDataRef data,CFStringRef responseDataType)167 ResourceResponse LegacyWebArchive::createResourceResponseFromPropertyListData(CFDataRef data, CFStringRef responseDataType)
168 {
169 ASSERT(data);
170 if (!data)
171 return ResourceResponse();
172
173 // If the ResourceResponseVersion (passed in as responseDataType) exists at all, this is a "new" web archive that we
174 // can parse well in a cross platform manner If it doesn't exist, we will assume this is an "old" web archive with,
175 // NSURLResponse objects in it and parse the ResourceResponse as such.
176 if (!responseDataType)
177 return createResourceResponseFromMacArchivedData(data);
178
179 // FIXME: Parse the "new" format that the above comment references here. This format doesn't exist yet.
180 return ResourceResponse();
181 }
182
createResource(CFDictionaryRef dictionary)183 PassRefPtr<ArchiveResource> LegacyWebArchive::createResource(CFDictionaryRef dictionary)
184 {
185 ASSERT(dictionary);
186 if (!dictionary)
187 return 0;
188
189 CFDataRef resourceData = static_cast<CFDataRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceDataKey));
190 if (resourceData && CFGetTypeID(resourceData) != CFDataGetTypeID()) {
191 LOG(Archives, "LegacyWebArchive - Resource data is not of type CFData, cannot create invalid resource");
192 return 0;
193 }
194
195 CFStringRef frameName = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceFrameNameKey));
196 if (frameName && CFGetTypeID(frameName) != CFStringGetTypeID()) {
197 LOG(Archives, "LegacyWebArchive - Frame name is not of type CFString, cannot create invalid resource");
198 return 0;
199 }
200
201 CFStringRef mimeType = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceMIMETypeKey));
202 if (!mimeType || CFGetTypeID(mimeType) != CFStringGetTypeID()) {
203 LOG(Archives, "LegacyWebArchive - MIME type is not of type CFString, cannot create invalid resource");
204 return 0;
205 }
206
207 CFStringRef url = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceURLKey));
208 if (url && CFGetTypeID(url) != CFStringGetTypeID()) {
209 LOG(Archives, "LegacyWebArchive - URL is not of type CFString, cannot create invalid resource");
210 return 0;
211 }
212
213 CFStringRef textEncoding = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceTextEncodingNameKey));
214 if (textEncoding && CFGetTypeID(textEncoding) != CFStringGetTypeID()) {
215 LOG(Archives, "LegacyWebArchive - Text encoding is not of type CFString, cannot create invalid resource");
216 return 0;
217 }
218
219 ResourceResponse response;
220
221 CFDataRef resourceResponseData = static_cast<CFDataRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceResponseKey));
222 if (resourceResponseData) {
223 if (CFGetTypeID(resourceResponseData) != CFDataGetTypeID()) {
224 LOG(Archives, "LegacyWebArchive - Resource response data is not of type CFData, cannot create invalid resource");
225 return 0;
226 }
227
228 CFStringRef resourceResponseVersion = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceResponseVersionKey));
229 if (resourceResponseVersion && CFGetTypeID(resourceResponseVersion) != CFStringGetTypeID()) {
230 LOG(Archives, "LegacyWebArchive - Resource response version is not of type CFString, cannot create invalid resource");
231 return 0;
232 }
233
234 response = createResourceResponseFromPropertyListData(resourceResponseData, resourceResponseVersion);
235 }
236
237 return ArchiveResource::create(SharedBuffer::wrapCFData(resourceData), KURL(KURL(), url), mimeType, textEncoding, frameName, response);
238 }
239
create()240 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create()
241 {
242 return adoptRef(new LegacyWebArchive);
243 }
244
create(PassRefPtr<ArchiveResource> mainResource,Vector<PassRefPtr<ArchiveResource>> & subresources,Vector<PassRefPtr<LegacyWebArchive>> & subframeArchives)245 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(PassRefPtr<ArchiveResource> mainResource, Vector<PassRefPtr<ArchiveResource> >& subresources, Vector<PassRefPtr<LegacyWebArchive> >& subframeArchives)
246 {
247 ASSERT(mainResource);
248 if (!mainResource)
249 return 0;
250
251 RefPtr<LegacyWebArchive> archive = create();
252 archive->setMainResource(mainResource);
253
254 for (unsigned i = 0; i < subresources.size(); ++i)
255 archive->addSubresource(subresources[i]);
256
257 for (unsigned i = 0; i < subframeArchives.size(); ++i)
258 archive->addSubframeArchive(subframeArchives[i]);
259
260 return archive.release();
261 }
262
create(SharedBuffer * data)263 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(SharedBuffer* data)
264 {
265 LOG(Archives, "LegacyWebArchive - Creating from raw data");
266
267 RefPtr<LegacyWebArchive> archive = create();
268
269 ASSERT(data);
270 if (!data)
271 return 0;
272
273 RetainPtr<CFDataRef> cfData(AdoptCF, data->createCFData());
274 if (!cfData)
275 return 0;
276
277 CFStringRef errorString = 0;
278
279 RetainPtr<CFDictionaryRef> plist(AdoptCF, static_cast<CFDictionaryRef>(CFPropertyListCreateFromXMLData(0, cfData.get(), kCFPropertyListImmutable, &errorString)));
280 if (!plist) {
281 #ifndef NDEBUG
282 const char* cError = errorString ? CFStringGetCStringPtr(errorString, kCFStringEncodingUTF8) : "unknown error";
283 LOG(Archives, "LegacyWebArchive - Error parsing PropertyList from archive data - %s", cError);
284 #endif
285 if (errorString)
286 CFRelease(errorString);
287 return 0;
288 }
289
290 if (CFGetTypeID(plist.get()) != CFDictionaryGetTypeID()) {
291 LOG(Archives, "LegacyWebArchive - Archive property list is not the expected CFDictionary, aborting invalid WebArchive");
292 return 0;
293 }
294
295 if (!archive->extract(plist.get()))
296 return 0;
297
298 return archive.release();
299 }
300
extract(CFDictionaryRef dictionary)301 bool LegacyWebArchive::extract(CFDictionaryRef dictionary)
302 {
303 ASSERT(dictionary);
304 if (!dictionary) {
305 LOG(Archives, "LegacyWebArchive - Null root CFDictionary, aborting invalid WebArchive");
306 return false;
307 }
308
309 CFDictionaryRef mainResourceDict = static_cast<CFDictionaryRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveMainResourceKey));
310 if (!mainResourceDict) {
311 LOG(Archives, "LegacyWebArchive - No main resource in archive, aborting invalid WebArchive");
312 return false;
313 }
314 if (CFGetTypeID(mainResourceDict) != CFDictionaryGetTypeID()) {
315 LOG(Archives, "LegacyWebArchive - Main resource is not the expected CFDictionary, aborting invalid WebArchive");
316 return false;
317 }
318
319 setMainResource(createResource(mainResourceDict));
320 if (!mainResource()) {
321 LOG(Archives, "LegacyWebArchive - Failed to parse main resource from CFDictionary or main resource does not exist, aborting invalid WebArchive");
322 return false;
323 }
324
325 CFArrayRef subresourceArray = static_cast<CFArrayRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveSubresourcesKey));
326 if (subresourceArray && CFGetTypeID(subresourceArray) != CFArrayGetTypeID()) {
327 LOG(Archives, "LegacyWebArchive - Subresources is not the expected Array, aborting invalid WebArchive");
328 return false;
329 }
330
331 if (subresourceArray) {
332 CFIndex count = CFArrayGetCount(subresourceArray);
333 for (CFIndex i = 0; i < count; ++i) {
334 CFDictionaryRef subresourceDict = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(subresourceArray, i));
335 if (CFGetTypeID(subresourceDict) != CFDictionaryGetTypeID()) {
336 LOG(Archives, "LegacyWebArchive - Subresource is not expected CFDictionary, aborting invalid WebArchive");
337 return false;
338 }
339 addSubresource(createResource(subresourceDict));
340 }
341 }
342
343 CFArrayRef subframeArray = static_cast<CFArrayRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveSubframeArchivesKey));
344 if (subframeArray && CFGetTypeID(subframeArray) != CFArrayGetTypeID()) {
345 LOG(Archives, "LegacyWebArchive - Subframe archives is not the expected Array, aborting invalid WebArchive");
346 return false;
347 }
348
349 if (subframeArray) {
350 CFIndex count = CFArrayGetCount(subframeArray);
351 for (CFIndex i = 0; i < count; ++i) {
352 CFDictionaryRef subframeDict = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(subframeArray, i));
353 if (CFGetTypeID(subframeDict) != CFDictionaryGetTypeID()) {
354 LOG(Archives, "LegacyWebArchive - Subframe array is not expected CFDictionary, aborting invalid WebArchive");
355 return false;
356 }
357
358 RefPtr<LegacyWebArchive> subframeArchive = create();
359 if (subframeArchive->extract(subframeDict))
360 addSubframeArchive(subframeArchive.release());
361 else
362 LOG(Archives, "LegacyWebArchive - Invalid subframe archive skipped");
363 }
364 }
365
366 return true;
367 }
368
rawDataRepresentation()369 RetainPtr<CFDataRef> LegacyWebArchive::rawDataRepresentation()
370 {
371 RetainPtr<CFDictionaryRef> propertyList = createPropertyListRepresentation(this);
372 ASSERT(propertyList);
373 if (!propertyList) {
374 LOG(Archives, "LegacyWebArchive - Failed to create property list for archive, returning no data");
375 return 0;
376 }
377
378 RetainPtr<CFWriteStreamRef> stream(AdoptCF, CFWriteStreamCreateWithAllocatedBuffers(0, 0));
379
380 CFWriteStreamOpen(stream.get());
381 CFPropertyListWriteToStream(propertyList.get(), stream.get(), kCFPropertyListBinaryFormat_v1_0, 0);
382
383 RetainPtr<CFDataRef> plistData(AdoptCF, static_cast<CFDataRef>(CFWriteStreamCopyProperty(stream.get(), kCFStreamPropertyDataWritten)));
384 ASSERT(plistData);
385
386 CFWriteStreamClose(stream.get());
387
388 if (!plistData) {
389 LOG(Archives, "LegacyWebArchive - Failed to convert property list into raw data, returning no data");
390 return 0;
391 }
392
393 return plistData;
394 }
395
396 #if !PLATFORM(MAC)
397
createResourceResponseFromMacArchivedData(CFDataRef responseData)398 ResourceResponse LegacyWebArchive::createResourceResponseFromMacArchivedData(CFDataRef responseData)
399 {
400 // FIXME: If is is possible to parse in a serialized NSURLResponse manually, without using
401 // NSKeyedUnarchiver, manipulating plists directly, then we want to do that here.
402 // Until then, this can be done on Mac only.
403 return ResourceResponse();
404 }
405
createPropertyListRepresentation(const ResourceResponse & response)406 RetainPtr<CFDataRef> LegacyWebArchive::createPropertyListRepresentation(const ResourceResponse& response)
407 {
408 // FIXME: Write out the "new" format described in createResourceResponseFromPropertyListData once we invent it.
409 return 0;
410 }
411
412 #endif
413
create(Node * node)414 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Node* node)
415 {
416 ASSERT(node);
417 if (!node)
418 return create();
419
420 Document* document = node->document();
421 Frame* frame = document ? document->frame() : 0;
422 if (!frame)
423 return create();
424
425 Vector<Node*> nodeList;
426 String markupString = createMarkup(node, IncludeNode, &nodeList);
427 Node::NodeType nodeType = node->nodeType();
428 if (nodeType != Node::DOCUMENT_NODE && nodeType != Node::DOCUMENT_TYPE_NODE)
429 markupString = frame->documentTypeString() + markupString;
430
431 return create(markupString, frame, nodeList);
432 }
433
create(Frame * frame)434 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Frame* frame)
435 {
436 ASSERT(frame);
437
438 DocumentLoader* documentLoader = frame->loader()->documentLoader();
439
440 if (!documentLoader)
441 return 0;
442
443 Vector<PassRefPtr<LegacyWebArchive> > subframeArchives;
444
445 unsigned children = frame->tree()->childCount();
446 for (unsigned i = 0; i < children; ++i) {
447 RefPtr<LegacyWebArchive> childFrameArchive = create(frame->tree()->child(i));
448 if (childFrameArchive)
449 subframeArchives.append(childFrameArchive.release());
450 }
451
452 Vector<PassRefPtr<ArchiveResource> > subresources;
453 documentLoader->getSubresources(subresources);
454
455 return create(documentLoader->mainResource(), subresources, subframeArchives);
456 }
457
create(Range * range)458 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Range* range)
459 {
460 if (!range)
461 return 0;
462
463 Node* startContainer = range->startContainer();
464 if (!startContainer)
465 return 0;
466
467 Document* document = startContainer->document();
468 if (!document)
469 return 0;
470
471 Frame* frame = document->frame();
472 if (!frame)
473 return 0;
474
475 Vector<Node*> nodeList;
476
477 // FIXME: This is always "for interchange". Is that right? See the previous method.
478 String markupString = frame->documentTypeString() + createMarkup(range, &nodeList, AnnotateForInterchange);
479
480 return create(markupString, frame, nodeList);
481 }
482
create(const String & markupString,Frame * frame,const Vector<Node * > & nodes)483 PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(const String& markupString, Frame* frame, const Vector<Node*>& nodes)
484 {
485 ASSERT(frame);
486
487 const ResourceResponse& response = frame->loader()->documentLoader()->response();
488 KURL responseURL = response.url();
489
490 // it's possible to have a response without a URL here
491 // <rdar://problem/5454935>
492 if (responseURL.isNull())
493 responseURL = KURL(ParsedURLString, "");
494
495 PassRefPtr<ArchiveResource> mainResource = ArchiveResource::create(utf8Buffer(markupString), responseURL, response.mimeType(), "UTF-8", frame->tree()->uniqueName());
496
497 Vector<PassRefPtr<LegacyWebArchive> > subframeArchives;
498 Vector<PassRefPtr<ArchiveResource> > subresources;
499 HashSet<KURL> uniqueSubresources;
500
501 size_t nodesSize = nodes.size();
502 for (size_t i = 0; i < nodesSize; ++i) {
503 Node* node = nodes[i];
504 Frame* childFrame;
505 if ((node->hasTagName(HTMLNames::frameTag) || node->hasTagName(HTMLNames::iframeTag) || node->hasTagName(HTMLNames::objectTag)) &&
506 (childFrame = static_cast<HTMLFrameOwnerElement*>(node)->contentFrame())) {
507 RefPtr<LegacyWebArchive> subframeArchive = create(childFrame->document());
508
509 if (subframeArchive)
510 subframeArchives.append(subframeArchive);
511 else
512 LOG_ERROR("Unabled to archive subframe %s", childFrame->tree()->uniqueName().string().utf8().data());
513 } else {
514 ListHashSet<KURL> subresourceURLs;
515 node->getSubresourceURLs(subresourceURLs);
516
517 DocumentLoader* documentLoader = frame->loader()->documentLoader();
518 ListHashSet<KURL>::iterator iterEnd = subresourceURLs.end();
519 for (ListHashSet<KURL>::iterator iter = subresourceURLs.begin(); iter != iterEnd; ++iter) {
520 const KURL& subresourceURL = *iter;
521 if (uniqueSubresources.contains(subresourceURL))
522 continue;
523
524 uniqueSubresources.add(subresourceURL);
525
526 RefPtr<ArchiveResource> resource = documentLoader->subresource(subresourceURL);
527 if (resource) {
528 subresources.append(resource.release());
529 continue;
530 }
531
532 CachedResource* cachedResource = memoryCache()->resourceForURL(subresourceURL);
533 if (cachedResource) {
534 resource = ArchiveResource::create(cachedResource->data(), subresourceURL, cachedResource->response());
535 if (resource) {
536 subresources.append(resource.release());
537 continue;
538 }
539 }
540
541 // FIXME: should do something better than spew to console here
542 LOG_ERROR("Failed to archive subresource for %s", subresourceURL.string().utf8().data());
543 }
544 }
545 }
546
547 // Add favicon if one exists for this page, if we are archiving the entire page.
548 if (nodesSize && nodes[0]->isDocumentNode() && iconDatabase().isEnabled()) {
549 const String& iconURL = iconDatabase().synchronousIconURLForPageURL(responseURL);
550 if (!iconURL.isEmpty() && iconDatabase().synchronousIconDataKnownForIconURL(iconURL)) {
551 if (Image* iconImage = iconDatabase().synchronousIconForPageURL(responseURL, IntSize(16, 16))) {
552 if (RefPtr<ArchiveResource> resource = ArchiveResource::create(iconImage->data(), KURL(ParsedURLString, iconURL), "image/x-icon", "", ""))
553 subresources.append(resource.release());
554 }
555 }
556 }
557
558 return create(mainResource, subresources, subframeArchives);
559 }
560
createFromSelection(Frame * frame)561 PassRefPtr<LegacyWebArchive> LegacyWebArchive::createFromSelection(Frame* frame)
562 {
563 if (!frame)
564 return 0;
565
566 RefPtr<Range> selectionRange = frame->selection()->toNormalizedRange();
567 Vector<Node*> nodeList;
568 String markupString = frame->documentTypeString() + createMarkup(selectionRange.get(), &nodeList, AnnotateForInterchange);
569
570 RefPtr<LegacyWebArchive> archive = create(markupString, frame, nodeList);
571
572 if (!frame->document() || !frame->document()->isFrameSet())
573 return archive.release();
574
575 // Wrap the frameset document in an iframe so it can be pasted into
576 // another document (which will have a body or frameset of its own).
577 String iframeMarkup = makeString("<iframe frameborder=\"no\" marginwidth=\"0\" marginheight=\"0\" width=\"98%%\" height=\"98%%\" src=\"",
578 frame->loader()->documentLoader()->response().url().string(), "\"></iframe>");
579 RefPtr<ArchiveResource> iframeResource = ArchiveResource::create(utf8Buffer(iframeMarkup), blankURL(), "text/html", "UTF-8", String());
580
581 Vector<PassRefPtr<ArchiveResource> > subresources;
582
583 Vector<PassRefPtr<LegacyWebArchive> > subframeArchives;
584 subframeArchives.append(archive);
585
586 archive = create(iframeResource.release(), subresources, subframeArchives);
587
588 return archive.release();
589 }
590
591 }
592