1 /*
2 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile Inc. http://www.torchmobile.com/
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
16 * its contributors may be used to endorse or promote products derived
17 * from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "config.h"
32 #include "HTTPParsers.h"
33
34 #include "CString.h"
35 #include "PlatformString.h"
36 #include <wtf/DateMath.h>
37
38 using namespace WTF;
39
40 namespace WebCore {
41
42 // true if there is more to parse
skipWhiteSpace(const String & str,int & pos,bool fromHttpEquivMeta)43 static inline bool skipWhiteSpace(const String& str, int& pos, bool fromHttpEquivMeta)
44 {
45 int len = str.length();
46
47 if (fromHttpEquivMeta) {
48 while (pos != len && str[pos] <= ' ')
49 ++pos;
50 } else {
51 while (pos != len && (str[pos] == '\t' || str[pos] == ' '))
52 ++pos;
53 }
54
55 return pos != len;
56 }
57
parseHTTPRefresh(const String & refresh,bool fromHttpEquivMeta,double & delay,String & url)58 bool parseHTTPRefresh(const String& refresh, bool fromHttpEquivMeta, double& delay, String& url)
59 {
60 int len = refresh.length();
61 int pos = 0;
62
63 if (!skipWhiteSpace(refresh, pos, fromHttpEquivMeta))
64 return false;
65
66 while (pos != len && refresh[pos] != ',' && refresh[pos] != ';')
67 ++pos;
68
69 if (pos == len) { // no URL
70 url = String();
71 bool ok;
72 delay = refresh.stripWhiteSpace().toDouble(&ok);
73 return ok;
74 } else {
75 bool ok;
76 delay = refresh.left(pos).stripWhiteSpace().toDouble(&ok);
77 if (!ok)
78 return false;
79
80 ++pos;
81 skipWhiteSpace(refresh, pos, fromHttpEquivMeta);
82 int urlStartPos = pos;
83 if (refresh.find("url", urlStartPos, false) == urlStartPos) {
84 urlStartPos += 3;
85 skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
86 if (refresh[urlStartPos] == '=') {
87 ++urlStartPos;
88 skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
89 } else
90 urlStartPos = pos; // e.g. "Refresh: 0; url.html"
91 }
92
93 int urlEndPos = len;
94
95 if (refresh[urlStartPos] == '"' || refresh[urlStartPos] == '\'') {
96 UChar quotationMark = refresh[urlStartPos];
97 urlStartPos++;
98 while (urlEndPos > urlStartPos) {
99 urlEndPos--;
100 if (refresh[urlEndPos] == quotationMark)
101 break;
102 }
103
104 // https://bugs.webkit.org/show_bug.cgi?id=27868
105 // Sometimes there is no closing quote for the end of the URL even though there was an opening quote.
106 // If we looped over the entire alleged URL string back to the opening quote, just go ahead and use everything
107 // after the opening quote instead.
108 if (urlEndPos == urlStartPos)
109 urlEndPos = len;
110 }
111
112 url = refresh.substring(urlStartPos, urlEndPos - urlStartPos).stripWhiteSpace();
113 return true;
114 }
115 }
116
parseDate(const String & value)117 double parseDate(const String& value)
118 {
119 return parseDateFromNullTerminatedCharacters(value.utf8().data());
120 }
121
filenameFromHTTPContentDisposition(const String & value)122 String filenameFromHTTPContentDisposition(const String& value)
123 {
124 Vector<String> keyValuePairs;
125 value.split(';', keyValuePairs);
126
127 unsigned length = keyValuePairs.size();
128 for (unsigned i = 0; i < length; i++) {
129 int valueStartPos = keyValuePairs[i].find('=');
130 if (valueStartPos < 0)
131 continue;
132
133 String key = keyValuePairs[i].left(valueStartPos).stripWhiteSpace();
134
135 if (key.isEmpty() || key != "filename")
136 continue;
137
138 String value = keyValuePairs[i].substring(valueStartPos + 1).stripWhiteSpace();
139
140 // Remove quotes if there are any
141 if (value[0] == '\"')
142 value = value.substring(1, value.length() - 2);
143
144 return value;
145 }
146
147 return String();
148 }
149
extractMIMETypeFromMediaType(const String & mediaType)150 String extractMIMETypeFromMediaType(const String& mediaType)
151 {
152 Vector<UChar, 64> mimeType;
153 unsigned length = mediaType.length();
154 mimeType.reserveCapacity(length);
155 for (unsigned i = 0; i < length; i++) {
156 UChar c = mediaType[i];
157
158 if (c == ';')
159 break;
160
161 // While RFC 2616 does not allow it, other browsers allow multiple values in the HTTP media
162 // type header field, Content-Type. In such cases, the media type string passed here may contain
163 // the multiple values separated by commas. For now, this code ignores text after the first comma,
164 // which prevents it from simply failing to parse such types altogether. Later for better
165 // compatibility we could consider using the first or last valid MIME type instead.
166 // See https://bugs.webkit.org/show_bug.cgi?id=25352 for more discussion.
167 if (c == ',')
168 break;
169
170 // FIXME: The following is not correct. RFC 2616 allows linear white space before and
171 // after the MIME type, but not within the MIME type itself. And linear white space
172 // includes only a few specific ASCII characters; a small subset of isSpaceOrNewline.
173 // See https://bugs.webkit.org/show_bug.cgi?id=8644 for a bug tracking part of this.
174 if (isSpaceOrNewline(c))
175 continue;
176
177 mimeType.append(c);
178 }
179
180 if (mimeType.size() == length)
181 return mediaType;
182 return String(mimeType.data(), mimeType.size());
183 }
184
extractCharsetFromMediaType(const String & mediaType)185 String extractCharsetFromMediaType(const String& mediaType)
186 {
187 int pos = 0;
188 int length = (int)mediaType.length();
189
190 while (pos < length) {
191 pos = mediaType.find("charset", pos, false);
192 if (pos <= 0)
193 return String();
194
195 // is what we found a beginning of a word?
196 if (mediaType[pos-1] > ' ' && mediaType[pos-1] != ';') {
197 pos += 7;
198 continue;
199 }
200
201 pos += 7;
202
203 // skip whitespace
204 while (pos != length && mediaType[pos] <= ' ')
205 ++pos;
206
207 if (mediaType[pos++] != '=') // this "charset" substring wasn't a parameter name, but there may be others
208 continue;
209
210 while (pos != length && (mediaType[pos] <= ' ' || mediaType[pos] == '"' || mediaType[pos] == '\''))
211 ++pos;
212
213 // we don't handle spaces within quoted parameter values, because charset names cannot have any
214 int endpos = pos;
215 while (pos != length && mediaType[endpos] > ' ' && mediaType[endpos] != '"' && mediaType[endpos] != '\'' && mediaType[endpos] != ';')
216 ++endpos;
217
218 return mediaType.substring(pos, endpos-pos);
219 }
220
221 return String();
222 }
223 }
224