• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  *     * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *     * Redistributions in binary form must reproduce the above
11  * copyright notice, this list of conditions and the following disclaimer
12  * in the documentation and/or other materials provided with the
13  * distribution.
14  *     * Neither the name of Google Inc. nor the names of its
15  * contributors may be used to endorse or promote products derived from
16  * this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include "config.h"
32 #include "platform/mhtml/MHTMLArchive.h"
33 
34 #include "platform/DateComponents.h"
35 #include "platform/MIMETypeRegistry.h"
36 #include "platform/SerializedResource.h"
37 #include "platform/SharedBuffer.h"
38 #include "platform/mhtml/ArchiveResource.h"
39 #include "platform/mhtml/MHTMLParser.h"
40 #include "platform/text/QuotedPrintable.h"
41 #include "platform/weborigin/SchemeRegistry.h"
42 #include "wtf/CryptographicallyRandomNumber.h"
43 #include "wtf/DateMath.h"
44 #include "wtf/text/Base64.h"
45 #include "wtf/text/StringBuilder.h"
46 
47 namespace blink {
48 
49 const char* const quotedPrintable = "quoted-printable";
50 const char* const base64 = "base64";
51 const char* const binary = "binary";
52 
generateRandomBoundary()53 static String generateRandomBoundary()
54 {
55     // Trying to generate random boundaries similar to IE/UnMHT (ex: ----=_NextPart_000_001B_01CC157B.96F808A0).
56     const size_t randomValuesLength = 10;
57     char randomValues[randomValuesLength];
58     cryptographicallyRandomValues(&randomValues, randomValuesLength);
59     StringBuilder stringBuilder;
60     stringBuilder.appendLiteral("----=_NextPart_000_");
61     for (size_t i = 0; i < randomValuesLength; ++i) {
62         if (i == 2)
63             stringBuilder.append('_');
64         else if (i == 6)
65             stringBuilder.append('.');
66         stringBuilder.append(lowerNibbleToASCIIHexDigit(randomValues[i]));
67         stringBuilder.append(upperNibbleToASCIIHexDigit(randomValues[i]));
68     }
69     return stringBuilder.toString();
70 }
71 
replaceNonPrintableCharacters(const String & text)72 static String replaceNonPrintableCharacters(const String& text)
73 {
74     StringBuilder stringBuilder;
75     for (size_t i = 0; i < text.length(); ++i) {
76         if (isASCIIPrintable(text[i]))
77             stringBuilder.append(text[i]);
78         else
79             stringBuilder.append('?');
80     }
81     return stringBuilder.toString();
82 }
83 
MHTMLArchive()84 MHTMLArchive::MHTMLArchive()
85 {
86 }
87 
~MHTMLArchive()88 MHTMLArchive::~MHTMLArchive()
89 {
90 #if !ENABLE(OILPAN)
91     // Because all frames know about each other we need to perform a deep clearing of the archives graph.
92     clearAllSubframeArchives();
93 #endif
94 }
95 
create()96 PassRefPtrWillBeRawPtr<MHTMLArchive> MHTMLArchive::create()
97 {
98     return adoptRefWillBeNoop(new MHTMLArchive);
99 }
100 
create(const KURL & url,SharedBuffer * data)101 PassRefPtrWillBeRawPtr<MHTMLArchive> MHTMLArchive::create(const KURL& url, SharedBuffer* data)
102 {
103     // For security reasons we only load MHTML pages from local URLs.
104     if (!SchemeRegistry::shouldTreatURLSchemeAsLocal(url.protocol()))
105         return nullptr;
106 
107     MHTMLParser parser(data);
108     RefPtrWillBeRawPtr<MHTMLArchive> mainArchive = parser.parseArchive();
109     if (!mainArchive)
110         return nullptr; // Invalid MHTML file.
111 
112     // Since MHTML is a flat format, we need to make all frames aware of all resources.
113     for (size_t i = 0; i < parser.frameCount(); ++i) {
114         RefPtrWillBeRawPtr<MHTMLArchive> archive = parser.frameAt(i);
115         for (size_t j = 1; j < parser.frameCount(); ++j) {
116             if (i != j)
117                 archive->addSubframeArchive(parser.frameAt(j));
118         }
119         for (size_t j = 0; j < parser.subResourceCount(); ++j)
120             archive->addSubresource(parser.subResourceAt(j));
121     }
122     return mainArchive.release();
123 }
124 
generateMHTMLData(const Vector<SerializedResource> & resources,EncodingPolicy encodingPolicy,const String & title,const String & mimeType)125 PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(const Vector<SerializedResource>& resources, EncodingPolicy encodingPolicy, const String& title, const String& mimeType)
126 {
127     String boundary = generateRandomBoundary();
128     String endOfResourceBoundary = "--" + boundary + "\r\n";
129 
130     DateComponents now;
131     now.setMillisecondsSinceEpochForDateTime(currentTimeMS());
132     String dateString = makeRFC2822DateString(now.weekDay(), now.monthDay(), now.month(), now.fullYear(), now.hour(), now.minute(), now.second(), 0);
133 
134     StringBuilder stringBuilder;
135     stringBuilder.appendLiteral("From: <Saved by WebKit>\r\n");
136     stringBuilder.appendLiteral("Subject: ");
137     // We replace non ASCII characters with '?' characters to match IE's behavior.
138     stringBuilder.append(replaceNonPrintableCharacters(title));
139     stringBuilder.appendLiteral("\r\nDate: ");
140     stringBuilder.append(dateString);
141     stringBuilder.appendLiteral("\r\nMIME-Version: 1.0\r\n");
142     stringBuilder.appendLiteral("Content-Type: multipart/related;\r\n");
143     stringBuilder.appendLiteral("\ttype=\"");
144     stringBuilder.append(mimeType);
145     stringBuilder.appendLiteral("\";\r\n");
146     stringBuilder.appendLiteral("\tboundary=\"");
147     stringBuilder.append(boundary);
148     stringBuilder.appendLiteral("\"\r\n\r\n");
149 
150     // We use utf8() below instead of ascii() as ascii() replaces CRLFs with ?? (we still only have put ASCII characters in it).
151     ASSERT(stringBuilder.toString().containsOnlyASCII());
152     CString asciiString = stringBuilder.toString().utf8();
153     RefPtr<SharedBuffer> mhtmlData = SharedBuffer::create();
154     mhtmlData->append(asciiString.data(), asciiString.length());
155 
156     for (size_t i = 0; i < resources.size(); ++i) {
157         const SerializedResource& resource = resources[i];
158 
159         stringBuilder.clear();
160         stringBuilder.append(endOfResourceBoundary);
161         stringBuilder.appendLiteral("Content-Type: ");
162         stringBuilder.append(resource.mimeType);
163 
164         const char* contentEncoding = 0;
165         if (encodingPolicy == UseBinaryEncoding)
166             contentEncoding = binary;
167         else if (MIMETypeRegistry::isSupportedJavaScriptMIMEType(resource.mimeType) || MIMETypeRegistry::isSupportedNonImageMIMEType(resource.mimeType))
168             contentEncoding = quotedPrintable;
169         else
170             contentEncoding = base64;
171 
172         stringBuilder.appendLiteral("\r\nContent-Transfer-Encoding: ");
173         stringBuilder.append(contentEncoding);
174         stringBuilder.appendLiteral("\r\nContent-Location: ");
175         stringBuilder.append(resource.url);
176         stringBuilder.appendLiteral("\r\n\r\n");
177 
178         asciiString = stringBuilder.toString().utf8();
179         mhtmlData->append(asciiString.data(), asciiString.length());
180 
181         if (!strcmp(contentEncoding, binary)) {
182             const char* data;
183             size_t position = 0;
184             while (size_t length = resource.data->getSomeData(data, position)) {
185                 mhtmlData->append(data, length);
186                 position += length;
187             }
188         } else {
189             // FIXME: ideally we would encode the content as a stream without having to fetch it all.
190             const char* data = resource.data->data();
191             size_t dataLength = resource.data->size();
192             Vector<char> encodedData;
193             if (!strcmp(contentEncoding, quotedPrintable)) {
194                 quotedPrintableEncode(data, dataLength, encodedData);
195                 mhtmlData->append(encodedData.data(), encodedData.size());
196                 mhtmlData->append("\r\n", 2);
197             } else {
198                 ASSERT(!strcmp(contentEncoding, base64));
199                 // We are not specifying insertLFs = true below as it would cut the lines with LFs and MHTML requires CRLFs.
200                 base64Encode(data, dataLength, encodedData);
201                 const size_t maximumLineLength = 76;
202                 size_t index = 0;
203                 size_t encodedDataLength = encodedData.size();
204                 do {
205                     size_t lineLength = std::min(encodedDataLength - index, maximumLineLength);
206                     mhtmlData->append(encodedData.data() + index, lineLength);
207                     mhtmlData->append("\r\n", 2);
208                     index += maximumLineLength;
209                 } while (index < encodedDataLength);
210             }
211         }
212     }
213 
214     asciiString = String("--" + boundary + "--\r\n").utf8();
215     mhtmlData->append(asciiString.data(), asciiString.length());
216 
217     return mhtmlData.release();
218 }
219 
220 #if !ENABLE(OILPAN)
clearAllSubframeArchives()221 void MHTMLArchive::clearAllSubframeArchives()
222 {
223     SubFrameArchives clearedArchives;
224     clearAllSubframeArchivesImpl(&clearedArchives);
225 }
226 
clearAllSubframeArchivesImpl(SubFrameArchives * clearedArchives)227 void MHTMLArchive::clearAllSubframeArchivesImpl(SubFrameArchives* clearedArchives)
228 {
229     for (SubFrameArchives::iterator it = m_subframeArchives.begin(); it != m_subframeArchives.end(); ++it) {
230         if (!clearedArchives->contains(*it)) {
231             clearedArchives->append(*it);
232             (*it)->clearAllSubframeArchivesImpl(clearedArchives);
233         }
234     }
235     m_subframeArchives.clear();
236 }
237 #endif
238 
setMainResource(PassRefPtrWillBeRawPtr<ArchiveResource> mainResource)239 void MHTMLArchive::setMainResource(PassRefPtrWillBeRawPtr<ArchiveResource> mainResource)
240 {
241     m_mainResource = mainResource;
242 }
243 
addSubresource(PassRefPtrWillBeRawPtr<ArchiveResource> subResource)244 void MHTMLArchive::addSubresource(PassRefPtrWillBeRawPtr<ArchiveResource> subResource)
245 {
246     m_subresources.append(subResource);
247 }
248 
addSubframeArchive(PassRefPtrWillBeRawPtr<MHTMLArchive> subframeArchive)249 void MHTMLArchive::addSubframeArchive(PassRefPtrWillBeRawPtr<MHTMLArchive> subframeArchive)
250 {
251     m_subframeArchives.append(subframeArchive);
252 }
253 
trace(Visitor * visitor)254 void MHTMLArchive::trace(Visitor* visitor)
255 {
256     visitor->trace(m_mainResource);
257     visitor->trace(m_subresources);
258     visitor->trace(m_subframeArchives);
259 }
260 
261 }
262