1 /*
2 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile Inc. http://www.torchmobile.com/
5 * Copyright (C) 2009 Google Inc. All rights reserved.
6 * Copyright (C) 2011 Apple Inc. All Rights Reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
18 * its contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
22 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
25 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include "config.h"
34 #include "platform/network/HTTPParsers.h"
35
36 #include "wtf/DateMath.h"
37 #include "wtf/MathExtras.h"
38 #include "wtf/text/CString.h"
39 #include "wtf/text/StringBuilder.h"
40 #include "wtf/text/WTFString.h"
41 #include "wtf/unicode/CharacterNames.h"
42
43 using namespace WTF;
44
45 namespace WebCore {
46
47 // true if there is more to parse, after incrementing pos past whitespace.
48 // Note: Might return pos == str.length()
skipWhiteSpace(const String & str,unsigned & pos,bool fromHttpEquivMeta)49 static inline bool skipWhiteSpace(const String& str, unsigned& pos, bool fromHttpEquivMeta)
50 {
51 unsigned len = str.length();
52
53 if (fromHttpEquivMeta) {
54 while (pos < len && str[pos] <= ' ')
55 ++pos;
56 } else {
57 while (pos < len && (str[pos] == '\t' || str[pos] == ' '))
58 ++pos;
59 }
60
61 return pos < len;
62 }
63
64 // Returns true if the function can match the whole token (case insensitive)
65 // incrementing pos on match, otherwise leaving pos unchanged.
66 // Note: Might return pos == str.length()
skipToken(const String & str,unsigned & pos,const char * token)67 static inline bool skipToken(const String& str, unsigned& pos, const char* token)
68 {
69 unsigned len = str.length();
70 unsigned current = pos;
71
72 while (current < len && *token) {
73 if (toASCIILower(str[current]) != *token++)
74 return false;
75 ++current;
76 }
77
78 if (*token)
79 return false;
80
81 pos = current;
82 return true;
83 }
84
85 // True if the expected equals sign is seen and there is more to follow.
skipEquals(const String & str,unsigned & pos)86 static inline bool skipEquals(const String& str, unsigned &pos)
87 {
88 return skipWhiteSpace(str, pos, false) && str[pos++] == '=' && skipWhiteSpace(str, pos, false);
89 }
90
91 // True if a value present, incrementing pos to next space or semicolon, if any.
92 // Note: might return pos == str.length().
skipValue(const String & str,unsigned & pos)93 static inline bool skipValue(const String& str, unsigned& pos)
94 {
95 unsigned start = pos;
96 unsigned len = str.length();
97 while (pos < len) {
98 if (str[pos] == ' ' || str[pos] == '\t' || str[pos] == ';')
99 break;
100 ++pos;
101 }
102 return pos != start;
103 }
104
isValidHTTPHeaderValue(const String & name)105 bool isValidHTTPHeaderValue(const String& name)
106 {
107 // FIXME: This should really match name against
108 // field-value in section 4.2 of RFC 2616.
109
110 return name.containsOnlyLatin1() && !name.contains('\r') && !name.contains('\n');
111 }
112
113 // See RFC 2616, Section 2.2.
isValidHTTPToken(const String & characters)114 bool isValidHTTPToken(const String& characters)
115 {
116 if (characters.isEmpty())
117 return false;
118 for (unsigned i = 0; i < characters.length(); ++i) {
119 UChar c = characters[i];
120 if (c <= 0x20 || c >= 0x7F
121 || c == '(' || c == ')' || c == '<' || c == '>' || c == '@'
122 || c == ',' || c == ';' || c == ':' || c == '\\' || c == '"'
123 || c == '/' || c == '[' || c == ']' || c == '?' || c == '='
124 || c == '{' || c == '}')
125 return false;
126 }
127 return true;
128 }
129
130 static const size_t maxInputSampleSize = 128;
trimInputSample(const char * p,size_t length)131 static String trimInputSample(const char* p, size_t length)
132 {
133 if (length > maxInputSampleSize)
134 return String(p, maxInputSampleSize) + horizontalEllipsis;
135 return String(p, length);
136 }
137
contentDispositionType(const String & contentDisposition)138 ContentDispositionType contentDispositionType(const String& contentDisposition)
139 {
140 if (contentDisposition.isEmpty())
141 return ContentDispositionNone;
142
143 Vector<String> parameters;
144 contentDisposition.split(';', parameters);
145
146 if (parameters.isEmpty())
147 return ContentDispositionNone;
148
149 String dispositionType = parameters[0];
150 dispositionType.stripWhiteSpace();
151
152 if (equalIgnoringCase(dispositionType, "inline"))
153 return ContentDispositionInline;
154
155 // Some broken sites just send bogus headers like
156 //
157 // Content-Disposition: ; filename="file"
158 // Content-Disposition: filename="file"
159 // Content-Disposition: name="file"
160 //
161 // without a disposition token... screen those out.
162 if (!isValidHTTPToken(dispositionType))
163 return ContentDispositionNone;
164
165 // We have a content-disposition of "attachment" or unknown.
166 // RFC 2183, section 2.8 says that an unknown disposition
167 // value should be treated as "attachment"
168 return ContentDispositionAttachment;
169 }
170
parseHTTPRefresh(const String & refresh,bool fromHttpEquivMeta,double & delay,String & url)171 bool parseHTTPRefresh(const String& refresh, bool fromHttpEquivMeta, double& delay, String& url)
172 {
173 unsigned len = refresh.length();
174 unsigned pos = 0;
175
176 if (!skipWhiteSpace(refresh, pos, fromHttpEquivMeta))
177 return false;
178
179 while (pos != len && refresh[pos] != ',' && refresh[pos] != ';')
180 ++pos;
181
182 if (pos == len) { // no URL
183 url = String();
184 bool ok;
185 delay = refresh.stripWhiteSpace().toDouble(&ok);
186 return ok;
187 } else {
188 bool ok;
189 delay = refresh.left(pos).stripWhiteSpace().toDouble(&ok);
190 if (!ok)
191 return false;
192
193 ++pos;
194 skipWhiteSpace(refresh, pos, fromHttpEquivMeta);
195 unsigned urlStartPos = pos;
196 if (refresh.find("url", urlStartPos, false) == urlStartPos) {
197 urlStartPos += 3;
198 skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
199 if (refresh[urlStartPos] == '=') {
200 ++urlStartPos;
201 skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
202 } else {
203 urlStartPos = pos; // e.g. "Refresh: 0; url.html"
204 }
205 }
206
207 unsigned urlEndPos = len;
208
209 if (refresh[urlStartPos] == '"' || refresh[urlStartPos] == '\'') {
210 UChar quotationMark = refresh[urlStartPos];
211 urlStartPos++;
212 while (urlEndPos > urlStartPos) {
213 urlEndPos--;
214 if (refresh[urlEndPos] == quotationMark)
215 break;
216 }
217
218 // https://bugs.webkit.org/show_bug.cgi?id=27868
219 // Sometimes there is no closing quote for the end of the URL even though there was an opening quote.
220 // If we looped over the entire alleged URL string back to the opening quote, just go ahead and use everything
221 // after the opening quote instead.
222 if (urlEndPos == urlStartPos)
223 urlEndPos = len;
224 }
225
226 url = refresh.substring(urlStartPos, urlEndPos - urlStartPos).stripWhiteSpace();
227 return true;
228 }
229 }
230
parseDate(const String & value)231 double parseDate(const String& value)
232 {
233 return parseDateFromNullTerminatedCharacters(value.utf8().data());
234 }
235
236 // FIXME: This function doesn't comply with RFC 6266.
237 // For example, this function doesn't handle the interaction between " and ;
238 // that arises from quoted-string, nor does this function properly unquote
239 // attribute values. Further this function appears to process parameter names
240 // in a case-sensitive manner. (There are likely other bugs as well.)
filenameFromHTTPContentDisposition(const String & value)241 String filenameFromHTTPContentDisposition(const String& value)
242 {
243 Vector<String> keyValuePairs;
244 value.split(';', keyValuePairs);
245
246 unsigned length = keyValuePairs.size();
247 for (unsigned i = 0; i < length; i++) {
248 size_t valueStartPos = keyValuePairs[i].find('=');
249 if (valueStartPos == kNotFound)
250 continue;
251
252 String key = keyValuePairs[i].left(valueStartPos).stripWhiteSpace();
253
254 if (key.isEmpty() || key != "filename")
255 continue;
256
257 String value = keyValuePairs[i].substring(valueStartPos + 1).stripWhiteSpace();
258
259 // Remove quotes if there are any
260 if (value[0] == '\"')
261 value = value.substring(1, value.length() - 2);
262
263 return value;
264 }
265
266 return String();
267 }
268
extractMIMETypeFromMediaType(const AtomicString & mediaType)269 AtomicString extractMIMETypeFromMediaType(const AtomicString& mediaType)
270 {
271 StringBuilder mimeType;
272 unsigned length = mediaType.length();
273 mimeType.reserveCapacity(length);
274 for (unsigned i = 0; i < length; i++) {
275 UChar c = mediaType[i];
276
277 if (c == ';')
278 break;
279
280 // While RFC 2616 does not allow it, other browsers allow multiple values in the HTTP media
281 // type header field, Content-Type. In such cases, the media type string passed here may contain
282 // the multiple values separated by commas. For now, this code ignores text after the first comma,
283 // which prevents it from simply failing to parse such types altogether. Later for better
284 // compatibility we could consider using the first or last valid MIME type instead.
285 // See https://bugs.webkit.org/show_bug.cgi?id=25352 for more discussion.
286 if (c == ',')
287 break;
288
289 // FIXME: The following is not correct. RFC 2616 allows linear white space before and
290 // after the MIME type, but not within the MIME type itself. And linear white space
291 // includes only a few specific ASCII characters; a small subset of isSpaceOrNewline.
292 // See https://bugs.webkit.org/show_bug.cgi?id=8644 for a bug tracking part of this.
293 if (isSpaceOrNewline(c))
294 continue;
295
296 mimeType.append(c);
297 }
298
299 if (mimeType.length() == length)
300 return mediaType;
301 return mimeType.toAtomicString();
302 }
303
extractCharsetFromMediaType(const String & mediaType)304 String extractCharsetFromMediaType(const String& mediaType)
305 {
306 unsigned pos, len;
307 findCharsetInMediaType(mediaType, pos, len);
308 return mediaType.substring(pos, len);
309 }
310
findCharsetInMediaType(const String & mediaType,unsigned & charsetPos,unsigned & charsetLen,unsigned start)311 void findCharsetInMediaType(const String& mediaType, unsigned& charsetPos, unsigned& charsetLen, unsigned start)
312 {
313 charsetPos = start;
314 charsetLen = 0;
315
316 size_t pos = start;
317 unsigned length = mediaType.length();
318
319 while (pos < length) {
320 pos = mediaType.find("charset", pos, false);
321 if (pos == kNotFound || !pos) {
322 charsetLen = 0;
323 return;
324 }
325
326 // is what we found a beginning of a word?
327 if (mediaType[pos-1] > ' ' && mediaType[pos-1] != ';') {
328 pos += 7;
329 continue;
330 }
331
332 pos += 7;
333
334 // skip whitespace
335 while (pos != length && mediaType[pos] <= ' ')
336 ++pos;
337
338 if (mediaType[pos++] != '=') // this "charset" substring wasn't a parameter name, but there may be others
339 continue;
340
341 while (pos != length && (mediaType[pos] <= ' ' || mediaType[pos] == '"' || mediaType[pos] == '\''))
342 ++pos;
343
344 // we don't handle spaces within quoted parameter values, because charset names cannot have any
345 unsigned endpos = pos;
346 while (pos != length && mediaType[endpos] > ' ' && mediaType[endpos] != '"' && mediaType[endpos] != '\'' && mediaType[endpos] != ';')
347 ++endpos;
348
349 charsetPos = pos;
350 charsetLen = endpos - pos;
351 return;
352 }
353 }
354
parseXSSProtectionHeader(const String & header,String & failureReason,unsigned & failurePosition,String & reportURL)355 ReflectedXSSDisposition parseXSSProtectionHeader(const String& header, String& failureReason, unsigned& failurePosition, String& reportURL)
356 {
357 DEFINE_STATIC_LOCAL(String, failureReasonInvalidToggle, ("expected 0 or 1"));
358 DEFINE_STATIC_LOCAL(String, failureReasonInvalidSeparator, ("expected semicolon"));
359 DEFINE_STATIC_LOCAL(String, failureReasonInvalidEquals, ("expected equals sign"));
360 DEFINE_STATIC_LOCAL(String, failureReasonInvalidMode, ("invalid mode directive"));
361 DEFINE_STATIC_LOCAL(String, failureReasonInvalidReport, ("invalid report directive"));
362 DEFINE_STATIC_LOCAL(String, failureReasonDuplicateMode, ("duplicate mode directive"));
363 DEFINE_STATIC_LOCAL(String, failureReasonDuplicateReport, ("duplicate report directive"));
364 DEFINE_STATIC_LOCAL(String, failureReasonInvalidDirective, ("unrecognized directive"));
365
366 unsigned pos = 0;
367
368 if (!skipWhiteSpace(header, pos, false))
369 return ReflectedXSSUnset;
370
371 if (header[pos] == '0')
372 return AllowReflectedXSS;
373
374 if (header[pos++] != '1') {
375 failureReason = failureReasonInvalidToggle;
376 return ReflectedXSSInvalid;
377 }
378
379 ReflectedXSSDisposition result = FilterReflectedXSS;
380 bool modeDirectiveSeen = false;
381 bool reportDirectiveSeen = false;
382
383 while (1) {
384 // At end of previous directive: consume whitespace, semicolon, and whitespace.
385 if (!skipWhiteSpace(header, pos, false))
386 return result;
387
388 if (header[pos++] != ';') {
389 failureReason = failureReasonInvalidSeparator;
390 failurePosition = pos;
391 return ReflectedXSSInvalid;
392 }
393
394 if (!skipWhiteSpace(header, pos, false))
395 return result;
396
397 // At start of next directive.
398 if (skipToken(header, pos, "mode")) {
399 if (modeDirectiveSeen) {
400 failureReason = failureReasonDuplicateMode;
401 failurePosition = pos;
402 return ReflectedXSSInvalid;
403 }
404 modeDirectiveSeen = true;
405 if (!skipEquals(header, pos)) {
406 failureReason = failureReasonInvalidEquals;
407 failurePosition = pos;
408 return ReflectedXSSInvalid;
409 }
410 if (!skipToken(header, pos, "block")) {
411 failureReason = failureReasonInvalidMode;
412 failurePosition = pos;
413 return ReflectedXSSInvalid;
414 }
415 result = BlockReflectedXSS;
416 } else if (skipToken(header, pos, "report")) {
417 if (reportDirectiveSeen) {
418 failureReason = failureReasonDuplicateReport;
419 failurePosition = pos;
420 return ReflectedXSSInvalid;
421 }
422 reportDirectiveSeen = true;
423 if (!skipEquals(header, pos)) {
424 failureReason = failureReasonInvalidEquals;
425 failurePosition = pos;
426 return ReflectedXSSInvalid;
427 }
428 size_t startPos = pos;
429 if (!skipValue(header, pos)) {
430 failureReason = failureReasonInvalidReport;
431 failurePosition = pos;
432 return ReflectedXSSInvalid;
433 }
434 reportURL = header.substring(startPos, pos - startPos);
435 failurePosition = startPos; // If later semantic check deems unacceptable.
436 } else {
437 failureReason = failureReasonInvalidDirective;
438 failurePosition = pos;
439 return ReflectedXSSInvalid;
440 }
441 }
442 }
443
parseContentTypeOptionsHeader(const String & header)444 ContentTypeOptionsDisposition parseContentTypeOptionsHeader(const String& header)
445 {
446 if (header.stripWhiteSpace().lower() == "nosniff")
447 return ContentTypeOptionsNosniff;
448 return ContentTypeOptionsNone;
449 }
450
extractReasonPhraseFromHTTPStatusLine(const String & statusLine)451 String extractReasonPhraseFromHTTPStatusLine(const String& statusLine)
452 {
453 size_t spacePos = statusLine.find(' ');
454 // Remove status code from the status line.
455 spacePos = statusLine.find(' ', spacePos + 1);
456 return statusLine.substring(spacePos + 1);
457 }
458
parseXFrameOptionsHeader(const String & header)459 XFrameOptionsDisposition parseXFrameOptionsHeader(const String& header)
460 {
461 XFrameOptionsDisposition result = XFrameOptionsNone;
462
463 if (header.isEmpty())
464 return result;
465
466 Vector<String> headers;
467 header.split(',', headers);
468
469 for (size_t i = 0; i < headers.size(); i++) {
470 String currentHeader = headers[i].stripWhiteSpace();
471 XFrameOptionsDisposition currentValue = XFrameOptionsNone;
472 if (equalIgnoringCase(currentHeader, "deny"))
473 currentValue = XFrameOptionsDeny;
474 else if (equalIgnoringCase(currentHeader, "sameorigin"))
475 currentValue = XFrameOptionsSameOrigin;
476 else if (equalIgnoringCase(currentHeader, "allowall"))
477 currentValue = XFrameOptionsAllowAll;
478 else
479 currentValue = XFrameOptionsInvalid;
480
481 if (result == XFrameOptionsNone)
482 result = currentValue;
483 else if (result != currentValue)
484 return XFrameOptionsConflict;
485 }
486 return result;
487 }
488
parseRange(const String & range,long long & rangeOffset,long long & rangeEnd,long long & rangeSuffixLength)489 bool parseRange(const String& range, long long& rangeOffset, long long& rangeEnd, long long& rangeSuffixLength)
490 {
491 // The format of "Range" header is defined in RFC 2616 Section 14.35.1.
492 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.1
493 // We don't support multiple range requests.
494
495 rangeOffset = rangeEnd = rangeSuffixLength = -1;
496
497 // The "bytes" unit identifier should be present.
498 static const char bytesStart[] = "bytes=";
499 if (!range.startsWith(bytesStart, false))
500 return false;
501 String byteRange = range.substring(sizeof(bytesStart) - 1);
502
503 // The '-' character needs to be present.
504 int index = byteRange.find('-');
505 if (index == -1)
506 return false;
507
508 // If the '-' character is at the beginning, the suffix length, which specifies the last N bytes, is provided.
509 // Example:
510 // -500
511 if (!index) {
512 String suffixLengthString = byteRange.substring(index + 1).stripWhiteSpace();
513 bool ok;
514 long long value = suffixLengthString.toInt64Strict(&ok);
515 if (ok)
516 rangeSuffixLength = value;
517 return true;
518 }
519
520 // Otherwise, the first-byte-position and the last-byte-position are provied.
521 // Examples:
522 // 0-499
523 // 500-
524 String firstBytePosStr = byteRange.left(index).stripWhiteSpace();
525 bool ok;
526 long long firstBytePos = firstBytePosStr.toInt64Strict(&ok);
527 if (!ok)
528 return false;
529
530 String lastBytePosStr = byteRange.substring(index + 1).stripWhiteSpace();
531 long long lastBytePos = -1;
532 if (!lastBytePosStr.isEmpty()) {
533 lastBytePos = lastBytePosStr.toInt64Strict(&ok);
534 if (!ok)
535 return false;
536 }
537
538 if (firstBytePos < 0 || !(lastBytePos == -1 || lastBytePos >= firstBytePos))
539 return false;
540
541 rangeOffset = firstBytePos;
542 rangeEnd = lastBytePos;
543 return true;
544 }
545
546 // HTTP/1.1 - RFC 2616
547 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1
548 // Request-Line = Method SP Request-URI SP HTTP-Version CRLF
parseHTTPRequestLine(const char * data,size_t length,String & failureReason,String & method,String & url,HTTPVersion & httpVersion)549 size_t parseHTTPRequestLine(const char* data, size_t length, String& failureReason, String& method, String& url, HTTPVersion& httpVersion)
550 {
551 method = String();
552 url = String();
553 httpVersion = Unknown;
554
555 const char* space1 = 0;
556 const char* space2 = 0;
557 const char* p;
558 size_t consumedLength;
559
560 for (p = data, consumedLength = 0; consumedLength < length; p++, consumedLength++) {
561 if (*p == ' ') {
562 if (!space1)
563 space1 = p;
564 else if (!space2)
565 space2 = p;
566 } else if (*p == '\n') {
567 break;
568 }
569 }
570
571 // Haven't finished header line.
572 if (consumedLength == length) {
573 failureReason = "Incomplete Request Line";
574 return 0;
575 }
576
577 // RequestLine does not contain 3 parts.
578 if (!space1 || !space2) {
579 failureReason = "Request Line does not appear to contain: <Method> <Url> <HTTPVersion>.";
580 return 0;
581 }
582
583 // The line must end with "\r\n".
584 const char* end = p + 1;
585 if (*(end - 2) != '\r') {
586 failureReason = "Request line does not end with CRLF";
587 return 0;
588 }
589
590 // Request Method.
591 method = String(data, space1 - data); // For length subtract 1 for space, but add 1 for data being the first character.
592
593 // Request URI.
594 url = String(space1 + 1, space2 - space1 - 1); // For length subtract 1 for space.
595
596 // HTTP Version.
597 String httpVersionString(space2 + 1, end - space2 - 3); // For length subtract 1 for space, and 2 for "\r\n".
598 if (httpVersionString.length() != 8 || !httpVersionString.startsWith("HTTP/1."))
599 httpVersion = Unknown;
600 else if (httpVersionString[7] == '0')
601 httpVersion = HTTP_1_0;
602 else if (httpVersionString[7] == '1')
603 httpVersion = HTTP_1_1;
604 else
605 httpVersion = Unknown;
606
607 return end - data;
608 }
609
parseHTTPHeaderName(const char * s,size_t start,size_t size,String & failureReason,size_t * position,AtomicString * name)610 static bool parseHTTPHeaderName(const char* s, size_t start, size_t size, String& failureReason, size_t* position, AtomicString* name)
611 {
612 size_t nameBegin = start;
613 for (size_t i = start; i < size; ++i) {
614 switch (s[i]) {
615 case '\r':
616 failureReason = "Unexpected CR in name at " + trimInputSample(&s[nameBegin], i - nameBegin);
617 return false;
618 case '\n':
619 failureReason = "Unexpected LF in name at " + trimInputSample(&s[nameBegin], i - nameBegin);
620 return false;
621 case ':':
622 if (i == nameBegin) {
623 failureReason = "Header name is missing";
624 return false;
625 }
626 *name = AtomicString::fromUTF8(&s[nameBegin], i - nameBegin);
627 if (name->isNull()) {
628 failureReason = "Invalid UTF-8 sequence in header name";
629 return false;
630 }
631 *position = i;
632 return true;
633 default:
634 break;
635 }
636 }
637 failureReason = "Unterminated header name";
638 return false;
639 }
640
parseHTTPHeaderValue(const char * s,size_t start,size_t size,String & failureReason,size_t * position,AtomicString * value)641 static bool parseHTTPHeaderValue(const char* s, size_t start, size_t size, String& failureReason, size_t* position, AtomicString* value)
642 {
643 size_t i = start;
644 for (; i < size && s[i] == ' '; ++i) {
645 }
646 size_t valueBegin = i;
647
648 for (; i < size && s[i] != '\r'; ++i) {
649 if (s[i] == '\n') {
650 failureReason = "Unexpected LF in value at " + trimInputSample(&s[valueBegin], i - valueBegin);
651 return false;
652 }
653 }
654 if (i == size) {
655 failureReason = "Unterminated header value";
656 return false;
657 }
658
659 ASSERT(i < size && s[i] == '\r');
660 if (i + 1 >= size || s[i + 1] != '\n') {
661 failureReason = "LF doesn't follow CR after value at " + trimInputSample(&s[i + 1], size - i - 1);
662 return false;
663 }
664
665 *value = AtomicString::fromUTF8(&s[valueBegin], i - valueBegin);
666 if (i != valueBegin && value->isNull()) {
667 failureReason = "Invalid UTF-8 sequence in header value";
668 return false;
669 }
670
671 // 2 for strlen("\r\n")
672 *position = i + 2;
673 return true;
674 }
675
676 // Note that the header is already parsed and re-formatted in chromium side.
677 // We assume that the input is more restricted than RFC2616.
parseHTTPHeader(const char * s,size_t size,String & failureReason,AtomicString & name,AtomicString & value)678 size_t parseHTTPHeader(const char* s, size_t size, String& failureReason, AtomicString& name, AtomicString& value)
679 {
680 name = nullAtom;
681 value = nullAtom;
682 if (size >= 1 && s[0] == '\r') {
683 if (size >= 2 && s[1] == '\n') {
684 // Skip an empty line.
685 return 2;
686 }
687 failureReason = "LF doesn't follow CR at " + trimInputSample(0, size);
688 return 0;
689 }
690 size_t current = 0;
691 if (!parseHTTPHeaderName(s, current, size, failureReason, ¤t, &name)) {
692 return 0;
693 }
694 ASSERT(s[current] == ':');
695 ++current;
696
697 if (!parseHTTPHeaderValue(s, current, size, failureReason, ¤t, &value)) {
698 return 0;
699 }
700
701 return current;
702 }
703
parseHTTPRequestBody(const char * data,size_t length,Vector<unsigned char> & body)704 size_t parseHTTPRequestBody(const char* data, size_t length, Vector<unsigned char>& body)
705 {
706 body.clear();
707 body.append(data, length);
708
709 return length;
710 }
711
isCacheHeaderSeparator(UChar c)712 static bool isCacheHeaderSeparator(UChar c)
713 {
714 // See RFC 2616, Section 2.2
715 switch (c) {
716 case '(':
717 case ')':
718 case '<':
719 case '>':
720 case '@':
721 case ',':
722 case ';':
723 case ':':
724 case '\\':
725 case '"':
726 case '/':
727 case '[':
728 case ']':
729 case '?':
730 case '=':
731 case '{':
732 case '}':
733 case ' ':
734 case '\t':
735 return true;
736 default:
737 return false;
738 }
739 }
740
isControlCharacter(UChar c)741 static bool isControlCharacter(UChar c)
742 {
743 return c < ' ' || c == 127;
744 }
745
trimToNextSeparator(const String & str)746 static inline String trimToNextSeparator(const String& str)
747 {
748 return str.substring(0, str.find(isCacheHeaderSeparator));
749 }
750
parseCacheHeader(const String & header,Vector<pair<String,String>> & result)751 static void parseCacheHeader(const String& header, Vector<pair<String, String> >& result)
752 {
753 const String safeHeader = header.removeCharacters(isControlCharacter);
754 unsigned max = safeHeader.length();
755 for (unsigned pos = 0; pos < max; /* pos incremented in loop */) {
756 size_t nextCommaPosition = safeHeader.find(',', pos);
757 size_t nextEqualSignPosition = safeHeader.find('=', pos);
758 if (nextEqualSignPosition != kNotFound && (nextEqualSignPosition < nextCommaPosition || nextCommaPosition == kNotFound)) {
759 // Get directive name, parse right hand side of equal sign, then add to map
760 String directive = trimToNextSeparator(safeHeader.substring(pos, nextEqualSignPosition - pos).stripWhiteSpace());
761 pos += nextEqualSignPosition - pos + 1;
762
763 String value = safeHeader.substring(pos, max - pos).stripWhiteSpace();
764 if (value[0] == '"') {
765 // The value is a quoted string
766 size_t nextDoubleQuotePosition = value.find('"', 1);
767 if (nextDoubleQuotePosition != kNotFound) {
768 // Store the value as a quoted string without quotes
769 result.append(pair<String, String>(directive, value.substring(1, nextDoubleQuotePosition - 1).stripWhiteSpace()));
770 pos += (safeHeader.find('"', pos) - pos) + nextDoubleQuotePosition + 1;
771 // Move past next comma, if there is one
772 size_t nextCommaPosition2 = safeHeader.find(',', pos);
773 if (nextCommaPosition2 != kNotFound)
774 pos += nextCommaPosition2 - pos + 1;
775 else
776 return; // Parse error if there is anything left with no comma
777 } else {
778 // Parse error; just use the rest as the value
779 result.append(pair<String, String>(directive, trimToNextSeparator(value.substring(1, value.length() - 1).stripWhiteSpace())));
780 return;
781 }
782 } else {
783 // The value is a token until the next comma
784 size_t nextCommaPosition2 = value.find(',');
785 if (nextCommaPosition2 != kNotFound) {
786 // The value is delimited by the next comma
787 result.append(pair<String, String>(directive, trimToNextSeparator(value.substring(0, nextCommaPosition2).stripWhiteSpace())));
788 pos += (safeHeader.find(',', pos) - pos) + 1;
789 } else {
790 // The rest is the value; no change to value needed
791 result.append(pair<String, String>(directive, trimToNextSeparator(value)));
792 return;
793 }
794 }
795 } else if (nextCommaPosition != kNotFound && (nextCommaPosition < nextEqualSignPosition || nextEqualSignPosition == kNotFound)) {
796 // Add directive to map with empty string as value
797 result.append(pair<String, String>(trimToNextSeparator(safeHeader.substring(pos, nextCommaPosition - pos).stripWhiteSpace()), ""));
798 pos += nextCommaPosition - pos + 1;
799 } else {
800 // Add last directive to map with empty string as value
801 result.append(pair<String, String>(trimToNextSeparator(safeHeader.substring(pos, max - pos).stripWhiteSpace()), ""));
802 return;
803 }
804 }
805 }
806
parseCacheControlDirectives(const AtomicString & cacheControlValue,const AtomicString & pragmaValue)807 CacheControlHeader parseCacheControlDirectives(const AtomicString& cacheControlValue, const AtomicString& pragmaValue)
808 {
809 CacheControlHeader cacheControlHeader;
810 cacheControlHeader.parsed = true;
811 cacheControlHeader.maxAge = std::numeric_limits<double>::quiet_NaN();
812
813 DEFINE_STATIC_LOCAL(const AtomicString, noCacheDirective, ("no-cache", AtomicString::ConstructFromLiteral));
814 DEFINE_STATIC_LOCAL(const AtomicString, noStoreDirective, ("no-store", AtomicString::ConstructFromLiteral));
815 DEFINE_STATIC_LOCAL(const AtomicString, mustRevalidateDirective, ("must-revalidate", AtomicString::ConstructFromLiteral));
816 DEFINE_STATIC_LOCAL(const AtomicString, maxAgeDirective, ("max-age", AtomicString::ConstructFromLiteral));
817
818 if (!cacheControlValue.isEmpty()) {
819 Vector<pair<String, String> > directives;
820 parseCacheHeader(cacheControlValue, directives);
821
822 size_t directivesSize = directives.size();
823 for (size_t i = 0; i < directivesSize; ++i) {
824 // RFC2616 14.9.1: A no-cache directive with a value is only meaningful for proxy caches.
825 // It should be ignored by a browser level cache.
826 if (equalIgnoringCase(directives[i].first, noCacheDirective) && directives[i].second.isEmpty()) {
827 cacheControlHeader.containsNoCache = true;
828 } else if (equalIgnoringCase(directives[i].first, noStoreDirective)) {
829 cacheControlHeader.containsNoStore = true;
830 } else if (equalIgnoringCase(directives[i].first, mustRevalidateDirective)) {
831 cacheControlHeader.containsMustRevalidate = true;
832 } else if (equalIgnoringCase(directives[i].first, maxAgeDirective)) {
833 if (!std::isnan(cacheControlHeader.maxAge)) {
834 // First max-age directive wins if there are multiple ones.
835 continue;
836 }
837 bool ok;
838 double maxAge = directives[i].second.toDouble(&ok);
839 if (ok)
840 cacheControlHeader.maxAge = maxAge;
841 }
842 }
843 }
844
845 if (!cacheControlHeader.containsNoCache) {
846 // Handle Pragma: no-cache
847 // This is deprecated and equivalent to Cache-control: no-cache
848 // Don't bother tokenizing the value, it is not important
849 cacheControlHeader.containsNoCache = pragmaValue.lower().contains(noCacheDirective);
850 }
851 return cacheControlHeader;
852 }
853
854 }
855