• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  *     * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *     * Redistributions in binary form must reproduce the above
11  * copyright notice, this list of conditions and the following disclaimer
12  * in the documentation and/or other materials provided with the
13  * distribution.
14  *     * Neither the name of Google Inc. nor the names of its
15  * contributors may be used to endorse or promote products derived from
16  * this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include "config.h"
32 #include "platform/mhtml/MHTMLArchive.h"
33 
34 #include "platform/DateComponents.h"
35 #include "platform/MIMETypeRegistry.h"
36 #include "platform/SerializedResource.h"
37 #include "platform/SharedBuffer.h"
38 #include "platform/mhtml/MHTMLParser.h"
39 #include "platform/text/QuotedPrintable.h"
40 #include "platform/weborigin/SchemeRegistry.h"
41 #include "wtf/CryptographicallyRandomNumber.h"
42 #include "wtf/DateMath.h"
43 #include "wtf/text/Base64.h"
44 #include "wtf/text/StringBuilder.h"
45 
46 namespace WebCore {
47 
48 const char* const quotedPrintable = "quoted-printable";
49 const char* const base64 = "base64";
50 const char* const binary = "binary";
51 
generateRandomBoundary()52 static String generateRandomBoundary()
53 {
54     // Trying to generate random boundaries similar to IE/UnMHT (ex: ----=_NextPart_000_001B_01CC157B.96F808A0).
55     const size_t randomValuesLength = 10;
56     char randomValues[randomValuesLength];
57     cryptographicallyRandomValues(&randomValues, randomValuesLength);
58     StringBuilder stringBuilder;
59     stringBuilder.append("----=_NextPart_000_");
60     for (size_t i = 0; i < randomValuesLength; ++i) {
61         if (i == 2)
62             stringBuilder.append('_');
63         else if (i == 6)
64             stringBuilder.append('.');
65         stringBuilder.append(lowerNibbleToASCIIHexDigit(randomValues[i]));
66         stringBuilder.append(upperNibbleToASCIIHexDigit(randomValues[i]));
67     }
68     return stringBuilder.toString();
69 }
70 
replaceNonPrintableCharacters(const String & text)71 static String replaceNonPrintableCharacters(const String& text)
72 {
73     StringBuilder stringBuilder;
74     for (size_t i = 0; i < text.length(); ++i) {
75         if (isASCIIPrintable(text[i]))
76             stringBuilder.append(text[i]);
77         else
78             stringBuilder.append('?');
79     }
80     return stringBuilder.toString();
81 }
82 
MHTMLArchive()83 MHTMLArchive::MHTMLArchive()
84 {
85 }
86 
~MHTMLArchive()87 MHTMLArchive::~MHTMLArchive()
88 {
89     // Because all frames know about each other we need to perform a deep clearing of the archives graph.
90     clearAllSubframeArchives();
91 }
92 
create()93 PassRefPtr<MHTMLArchive> MHTMLArchive::create()
94 {
95     return adoptRef(new MHTMLArchive);
96 }
97 
create(const KURL & url,SharedBuffer * data)98 PassRefPtr<MHTMLArchive> MHTMLArchive::create(const KURL& url, SharedBuffer* data)
99 {
100     // For security reasons we only load MHTML pages from local URLs.
101     if (!SchemeRegistry::shouldTreatURLSchemeAsLocal(url.protocol()))
102         return nullptr;
103 
104     MHTMLParser parser(data);
105     RefPtr<MHTMLArchive> mainArchive = parser.parseArchive();
106     if (!mainArchive)
107         return nullptr; // Invalid MHTML file.
108 
109     // Since MHTML is a flat format, we need to make all frames aware of all resources.
110     for (size_t i = 0; i < parser.frameCount(); ++i) {
111         RefPtr<MHTMLArchive> archive = parser.frameAt(i);
112         for (size_t j = 1; j < parser.frameCount(); ++j) {
113             if (i != j)
114                 archive->addSubframeArchive(parser.frameAt(j));
115         }
116         for (size_t j = 0; j < parser.subResourceCount(); ++j)
117             archive->addSubresource(parser.subResourceAt(j));
118     }
119     return mainArchive.release();
120 }
121 
generateMHTMLData(const Vector<SerializedResource> & resources,EncodingPolicy encodingPolicy,const String & title,const String & mimeType)122 PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(const Vector<SerializedResource>& resources, EncodingPolicy encodingPolicy, const String& title, const String& mimeType)
123 {
124     String boundary = generateRandomBoundary();
125     String endOfResourceBoundary = "--" + boundary + "\r\n";
126 
127     DateComponents now;
128     now.setMillisecondsSinceEpochForDateTime(currentTimeMS());
129     String dateString = makeRFC2822DateString(now.weekDay(), now.monthDay(), now.month(), now.fullYear(), now.hour(), now.minute(), now.second(), 0);
130 
131     StringBuilder stringBuilder;
132     stringBuilder.append("From: <Saved by WebKit>\r\n");
133     stringBuilder.append("Subject: ");
134     // We replace non ASCII characters with '?' characters to match IE's behavior.
135     stringBuilder.append(replaceNonPrintableCharacters(title));
136     stringBuilder.append("\r\nDate: ");
137     stringBuilder.append(dateString);
138     stringBuilder.append("\r\nMIME-Version: 1.0\r\n");
139     stringBuilder.append("Content-Type: multipart/related;\r\n");
140     stringBuilder.append("\ttype=\"");
141     stringBuilder.append(mimeType);
142     stringBuilder.append("\";\r\n");
143     stringBuilder.append("\tboundary=\"");
144     stringBuilder.append(boundary);
145     stringBuilder.append("\"\r\n\r\n");
146 
147     // We use utf8() below instead of ascii() as ascii() replaces CRLFs with ?? (we still only have put ASCII characters in it).
148     ASSERT(stringBuilder.toString().containsOnlyASCII());
149     CString asciiString = stringBuilder.toString().utf8();
150     RefPtr<SharedBuffer> mhtmlData = SharedBuffer::create();
151     mhtmlData->append(asciiString.data(), asciiString.length());
152 
153     for (size_t i = 0; i < resources.size(); ++i) {
154         const SerializedResource& resource = resources[i];
155 
156         stringBuilder.clear();
157         stringBuilder.append(endOfResourceBoundary);
158         stringBuilder.append("Content-Type: ");
159         stringBuilder.append(resource.mimeType);
160 
161         const char* contentEncoding = 0;
162         if (encodingPolicy == UseBinaryEncoding)
163             contentEncoding = binary;
164         else if (MIMETypeRegistry::isSupportedJavaScriptMIMEType(resource.mimeType) || MIMETypeRegistry::isSupportedNonImageMIMEType(resource.mimeType))
165             contentEncoding = quotedPrintable;
166         else
167             contentEncoding = base64;
168 
169         stringBuilder.append("\r\nContent-Transfer-Encoding: ");
170         stringBuilder.append(contentEncoding);
171         stringBuilder.append("\r\nContent-Location: ");
172         stringBuilder.append(resource.url);
173         stringBuilder.append("\r\n\r\n");
174 
175         asciiString = stringBuilder.toString().utf8();
176         mhtmlData->append(asciiString.data(), asciiString.length());
177 
178         if (!strcmp(contentEncoding, binary)) {
179             const char* data;
180             size_t position = 0;
181             while (size_t length = resource.data->getSomeData(data, position)) {
182                 mhtmlData->append(data, length);
183                 position += length;
184             }
185         } else {
186             // FIXME: ideally we would encode the content as a stream without having to fetch it all.
187             const char* data = resource.data->data();
188             size_t dataLength = resource.data->size();
189             Vector<char> encodedData;
190             if (!strcmp(contentEncoding, quotedPrintable)) {
191                 quotedPrintableEncode(data, dataLength, encodedData);
192                 mhtmlData->append(encodedData.data(), encodedData.size());
193                 mhtmlData->append("\r\n", 2);
194             } else {
195                 ASSERT(!strcmp(contentEncoding, base64));
196                 // We are not specifying insertLFs = true below as it would cut the lines with LFs and MHTML requires CRLFs.
197                 base64Encode(data, dataLength, encodedData);
198                 const size_t maximumLineLength = 76;
199                 size_t index = 0;
200                 size_t encodedDataLength = encodedData.size();
201                 do {
202                     size_t lineLength = std::min(encodedDataLength - index, maximumLineLength);
203                     mhtmlData->append(encodedData.data() + index, lineLength);
204                     mhtmlData->append("\r\n", 2);
205                     index += maximumLineLength;
206                 } while (index < encodedDataLength);
207             }
208         }
209     }
210 
211     asciiString = String("--" + boundary + "--\r\n").utf8();
212     mhtmlData->append(asciiString.data(), asciiString.length());
213 
214     return mhtmlData.release();
215 }
216 
clearAllSubframeArchives()217 void MHTMLArchive::clearAllSubframeArchives()
218 {
219     Vector<RefPtr<MHTMLArchive> > clearedArchives;
220     clearAllSubframeArchivesImpl(&clearedArchives);
221 }
222 
clearAllSubframeArchivesImpl(Vector<RefPtr<MHTMLArchive>> * clearedArchives)223 void MHTMLArchive::clearAllSubframeArchivesImpl(Vector<RefPtr<MHTMLArchive> >* clearedArchives)
224 {
225     for (Vector<RefPtr<MHTMLArchive> >::iterator it = m_subframeArchives.begin(); it != m_subframeArchives.end(); ++it) {
226         if (!clearedArchives->contains(*it)) {
227             clearedArchives->append(*it);
228             (*it)->clearAllSubframeArchivesImpl(clearedArchives);
229         }
230     }
231     m_subframeArchives.clear();
232 }
233 
234 }
235