• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
3  * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4  * Copyright (C) 2009 Torch Mobile Inc. http://www.torchmobile.com/
5  * Copyright (C) 2009 Google Inc. All rights reserved.
6  * Copyright (C) 2011 Apple Inc. All Rights Reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1.  Redistributions of source code must retain the above copyright
13  *     notice, this list of conditions and the following disclaimer.
14  * 2.  Redistributions in binary form must reproduce the above copyright
15  *     notice, this list of conditions and the following disclaimer in the
16  *     documentation and/or other materials provided with the distribution.
17  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
18  *     its contributors may be used to endorse or promote products derived
19  *     from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
22  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
25  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include "config.h"
34 #include "platform/network/HTTPParsers.h"
35 
36 #include "wtf/DateMath.h"
37 #include "wtf/MathExtras.h"
38 #include "wtf/text/CString.h"
39 #include "wtf/text/StringBuilder.h"
40 #include "wtf/text/WTFString.h"
41 #include "wtf/unicode/CharacterNames.h"
42 
43 using namespace WTF;
44 
45 namespace WebCore {
46 
47 // true if there is more to parse, after incrementing pos past whitespace.
48 // Note: Might return pos == str.length()
skipWhiteSpace(const String & str,unsigned & pos,bool fromHttpEquivMeta)49 static inline bool skipWhiteSpace(const String& str, unsigned& pos, bool fromHttpEquivMeta)
50 {
51     unsigned len = str.length();
52 
53     if (fromHttpEquivMeta) {
54         while (pos < len && str[pos] <= ' ')
55             ++pos;
56     } else {
57         while (pos < len && (str[pos] == '\t' || str[pos] == ' '))
58             ++pos;
59     }
60 
61     return pos < len;
62 }
63 
64 // Returns true if the function can match the whole token (case insensitive)
65 // incrementing pos on match, otherwise leaving pos unchanged.
66 // Note: Might return pos == str.length()
skipToken(const String & str,unsigned & pos,const char * token)67 static inline bool skipToken(const String& str, unsigned& pos, const char* token)
68 {
69     unsigned len = str.length();
70     unsigned current = pos;
71 
72     while (current < len && *token) {
73         if (toASCIILower(str[current]) != *token++)
74             return false;
75         ++current;
76     }
77 
78     if (*token)
79         return false;
80 
81     pos = current;
82     return true;
83 }
84 
85 // True if the expected equals sign is seen and there is more to follow.
skipEquals(const String & str,unsigned & pos)86 static inline bool skipEquals(const String& str, unsigned &pos)
87 {
88     return skipWhiteSpace(str, pos, false) && str[pos++] == '=' && skipWhiteSpace(str, pos, false);
89 }
90 
91 // True if a value present, incrementing pos to next space or semicolon, if any.
92 // Note: might return pos == str.length().
skipValue(const String & str,unsigned & pos)93 static inline bool skipValue(const String& str, unsigned& pos)
94 {
95     unsigned start = pos;
96     unsigned len = str.length();
97     while (pos < len) {
98         if (str[pos] == ' ' || str[pos] == '\t' || str[pos] == ';')
99             break;
100         ++pos;
101     }
102     return pos != start;
103 }
104 
isValidHTTPHeaderValue(const String & name)105 bool isValidHTTPHeaderValue(const String& name)
106 {
107     // FIXME: This should really match name against
108     // field-value in section 4.2 of RFC 2616.
109 
110     return name.containsOnlyLatin1() && !name.contains('\r') && !name.contains('\n');
111 }
112 
113 // See RFC 2616, Section 2.2.
isValidHTTPToken(const String & characters)114 bool isValidHTTPToken(const String& characters)
115 {
116     if (characters.isEmpty())
117         return false;
118     for (unsigned i = 0; i < characters.length(); ++i) {
119         UChar c = characters[i];
120         if (c <= 0x20 || c >= 0x7F
121             || c == '(' || c == ')' || c == '<' || c == '>' || c == '@'
122             || c == ',' || c == ';' || c == ':' || c == '\\' || c == '"'
123             || c == '/' || c == '[' || c == ']' || c == '?' || c == '='
124             || c == '{' || c == '}')
125         return false;
126     }
127     return true;
128 }
129 
130 static const size_t maxInputSampleSize = 128;
trimInputSample(const char * p,size_t length)131 static String trimInputSample(const char* p, size_t length)
132 {
133     if (length > maxInputSampleSize)
134         return String(p, maxInputSampleSize) + horizontalEllipsis;
135     return String(p, length);
136 }
137 
contentDispositionType(const String & contentDisposition)138 ContentDispositionType contentDispositionType(const String& contentDisposition)
139 {
140     if (contentDisposition.isEmpty())
141         return ContentDispositionNone;
142 
143     Vector<String> parameters;
144     contentDisposition.split(';', parameters);
145 
146     if (parameters.isEmpty())
147         return ContentDispositionNone;
148 
149     String dispositionType = parameters[0];
150     dispositionType.stripWhiteSpace();
151 
152     if (equalIgnoringCase(dispositionType, "inline"))
153         return ContentDispositionInline;
154 
155     // Some broken sites just send bogus headers like
156     //
157     //   Content-Disposition: ; filename="file"
158     //   Content-Disposition: filename="file"
159     //   Content-Disposition: name="file"
160     //
161     // without a disposition token... screen those out.
162     if (!isValidHTTPToken(dispositionType))
163         return ContentDispositionNone;
164 
165     // We have a content-disposition of "attachment" or unknown.
166     // RFC 2183, section 2.8 says that an unknown disposition
167     // value should be treated as "attachment"
168     return ContentDispositionAttachment;
169 }
170 
parseHTTPRefresh(const String & refresh,bool fromHttpEquivMeta,double & delay,String & url)171 bool parseHTTPRefresh(const String& refresh, bool fromHttpEquivMeta, double& delay, String& url)
172 {
173     unsigned len = refresh.length();
174     unsigned pos = 0;
175 
176     if (!skipWhiteSpace(refresh, pos, fromHttpEquivMeta))
177         return false;
178 
179     while (pos != len && refresh[pos] != ',' && refresh[pos] != ';')
180         ++pos;
181 
182     if (pos == len) { // no URL
183         url = String();
184         bool ok;
185         delay = refresh.stripWhiteSpace().toDouble(&ok);
186         return ok;
187     } else {
188         bool ok;
189         delay = refresh.left(pos).stripWhiteSpace().toDouble(&ok);
190         if (!ok)
191             return false;
192 
193         ++pos;
194         skipWhiteSpace(refresh, pos, fromHttpEquivMeta);
195         unsigned urlStartPos = pos;
196         if (refresh.find("url", urlStartPos, false) == urlStartPos) {
197             urlStartPos += 3;
198             skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
199             if (refresh[urlStartPos] == '=') {
200                 ++urlStartPos;
201                 skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
202             } else {
203                 urlStartPos = pos; // e.g. "Refresh: 0; url.html"
204             }
205         }
206 
207         unsigned urlEndPos = len;
208 
209         if (refresh[urlStartPos] == '"' || refresh[urlStartPos] == '\'') {
210             UChar quotationMark = refresh[urlStartPos];
211             urlStartPos++;
212             while (urlEndPos > urlStartPos) {
213                 urlEndPos--;
214                 if (refresh[urlEndPos] == quotationMark)
215                     break;
216             }
217 
218             // https://bugs.webkit.org/show_bug.cgi?id=27868
219             // Sometimes there is no closing quote for the end of the URL even though there was an opening quote.
220             // If we looped over the entire alleged URL string back to the opening quote, just go ahead and use everything
221             // after the opening quote instead.
222             if (urlEndPos == urlStartPos)
223                 urlEndPos = len;
224         }
225 
226         url = refresh.substring(urlStartPos, urlEndPos - urlStartPos).stripWhiteSpace();
227         return true;
228     }
229 }
230 
parseDate(const String & value)231 double parseDate(const String& value)
232 {
233     return parseDateFromNullTerminatedCharacters(value.utf8().data());
234 }
235 
236 // FIXME: This function doesn't comply with RFC 6266.
237 // For example, this function doesn't handle the interaction between " and ;
238 // that arises from quoted-string, nor does this function properly unquote
239 // attribute values. Further this function appears to process parameter names
240 // in a case-sensitive manner. (There are likely other bugs as well.)
filenameFromHTTPContentDisposition(const String & value)241 String filenameFromHTTPContentDisposition(const String& value)
242 {
243     Vector<String> keyValuePairs;
244     value.split(';', keyValuePairs);
245 
246     unsigned length = keyValuePairs.size();
247     for (unsigned i = 0; i < length; i++) {
248         size_t valueStartPos = keyValuePairs[i].find('=');
249         if (valueStartPos == kNotFound)
250             continue;
251 
252         String key = keyValuePairs[i].left(valueStartPos).stripWhiteSpace();
253 
254         if (key.isEmpty() || key != "filename")
255             continue;
256 
257         String value = keyValuePairs[i].substring(valueStartPos + 1).stripWhiteSpace();
258 
259         // Remove quotes if there are any
260         if (value[0] == '\"')
261             value = value.substring(1, value.length() - 2);
262 
263         return value;
264     }
265 
266     return String();
267 }
268 
extractMIMETypeFromMediaType(const AtomicString & mediaType)269 AtomicString extractMIMETypeFromMediaType(const AtomicString& mediaType)
270 {
271     StringBuilder mimeType;
272     unsigned length = mediaType.length();
273     mimeType.reserveCapacity(length);
274     for (unsigned i = 0; i < length; i++) {
275         UChar c = mediaType[i];
276 
277         if (c == ';')
278             break;
279 
280         // While RFC 2616 does not allow it, other browsers allow multiple values in the HTTP media
281         // type header field, Content-Type. In such cases, the media type string passed here may contain
282         // the multiple values separated by commas. For now, this code ignores text after the first comma,
283         // which prevents it from simply failing to parse such types altogether. Later for better
284         // compatibility we could consider using the first or last valid MIME type instead.
285         // See https://bugs.webkit.org/show_bug.cgi?id=25352 for more discussion.
286         if (c == ',')
287             break;
288 
289         // FIXME: The following is not correct. RFC 2616 allows linear white space before and
290         // after the MIME type, but not within the MIME type itself. And linear white space
291         // includes only a few specific ASCII characters; a small subset of isSpaceOrNewline.
292         // See https://bugs.webkit.org/show_bug.cgi?id=8644 for a bug tracking part of this.
293         if (isSpaceOrNewline(c))
294             continue;
295 
296         mimeType.append(c);
297     }
298 
299     if (mimeType.length() == length)
300         return mediaType;
301     return mimeType.toAtomicString();
302 }
303 
extractCharsetFromMediaType(const String & mediaType)304 String extractCharsetFromMediaType(const String& mediaType)
305 {
306     unsigned pos, len;
307     findCharsetInMediaType(mediaType, pos, len);
308     return mediaType.substring(pos, len);
309 }
310 
findCharsetInMediaType(const String & mediaType,unsigned & charsetPos,unsigned & charsetLen,unsigned start)311 void findCharsetInMediaType(const String& mediaType, unsigned& charsetPos, unsigned& charsetLen, unsigned start)
312 {
313     charsetPos = start;
314     charsetLen = 0;
315 
316     size_t pos = start;
317     unsigned length = mediaType.length();
318 
319     while (pos < length) {
320         pos = mediaType.find("charset", pos, false);
321         if (pos == kNotFound || !pos) {
322             charsetLen = 0;
323             return;
324         }
325 
326         // is what we found a beginning of a word?
327         if (mediaType[pos-1] > ' ' && mediaType[pos-1] != ';') {
328             pos += 7;
329             continue;
330         }
331 
332         pos += 7;
333 
334         // skip whitespace
335         while (pos != length && mediaType[pos] <= ' ')
336             ++pos;
337 
338         if (mediaType[pos++] != '=') // this "charset" substring wasn't a parameter name, but there may be others
339             continue;
340 
341         while (pos != length && (mediaType[pos] <= ' ' || mediaType[pos] == '"' || mediaType[pos] == '\''))
342             ++pos;
343 
344         // we don't handle spaces within quoted parameter values, because charset names cannot have any
345         unsigned endpos = pos;
346         while (pos != length && mediaType[endpos] > ' ' && mediaType[endpos] != '"' && mediaType[endpos] != '\'' && mediaType[endpos] != ';')
347             ++endpos;
348 
349         charsetPos = pos;
350         charsetLen = endpos - pos;
351         return;
352     }
353 }
354 
parseXSSProtectionHeader(const String & header,String & failureReason,unsigned & failurePosition,String & reportURL)355 ReflectedXSSDisposition parseXSSProtectionHeader(const String& header, String& failureReason, unsigned& failurePosition, String& reportURL)
356 {
357     DEFINE_STATIC_LOCAL(String, failureReasonInvalidToggle, ("expected 0 or 1"));
358     DEFINE_STATIC_LOCAL(String, failureReasonInvalidSeparator, ("expected semicolon"));
359     DEFINE_STATIC_LOCAL(String, failureReasonInvalidEquals, ("expected equals sign"));
360     DEFINE_STATIC_LOCAL(String, failureReasonInvalidMode, ("invalid mode directive"));
361     DEFINE_STATIC_LOCAL(String, failureReasonInvalidReport, ("invalid report directive"));
362     DEFINE_STATIC_LOCAL(String, failureReasonDuplicateMode, ("duplicate mode directive"));
363     DEFINE_STATIC_LOCAL(String, failureReasonDuplicateReport, ("duplicate report directive"));
364     DEFINE_STATIC_LOCAL(String, failureReasonInvalidDirective, ("unrecognized directive"));
365 
366     unsigned pos = 0;
367 
368     if (!skipWhiteSpace(header, pos, false))
369         return ReflectedXSSUnset;
370 
371     if (header[pos] == '0')
372         return AllowReflectedXSS;
373 
374     if (header[pos++] != '1') {
375         failureReason = failureReasonInvalidToggle;
376         return ReflectedXSSInvalid;
377     }
378 
379     ReflectedXSSDisposition result = FilterReflectedXSS;
380     bool modeDirectiveSeen = false;
381     bool reportDirectiveSeen = false;
382 
383     while (1) {
384         // At end of previous directive: consume whitespace, semicolon, and whitespace.
385         if (!skipWhiteSpace(header, pos, false))
386             return result;
387 
388         if (header[pos++] != ';') {
389             failureReason = failureReasonInvalidSeparator;
390             failurePosition = pos;
391             return ReflectedXSSInvalid;
392         }
393 
394         if (!skipWhiteSpace(header, pos, false))
395             return result;
396 
397         // At start of next directive.
398         if (skipToken(header, pos, "mode")) {
399             if (modeDirectiveSeen) {
400                 failureReason = failureReasonDuplicateMode;
401                 failurePosition = pos;
402                 return ReflectedXSSInvalid;
403             }
404             modeDirectiveSeen = true;
405             if (!skipEquals(header, pos)) {
406                 failureReason = failureReasonInvalidEquals;
407                 failurePosition = pos;
408                 return ReflectedXSSInvalid;
409             }
410             if (!skipToken(header, pos, "block")) {
411                 failureReason = failureReasonInvalidMode;
412                 failurePosition = pos;
413                 return ReflectedXSSInvalid;
414             }
415             result = BlockReflectedXSS;
416         } else if (skipToken(header, pos, "report")) {
417             if (reportDirectiveSeen) {
418                 failureReason = failureReasonDuplicateReport;
419                 failurePosition = pos;
420                 return ReflectedXSSInvalid;
421             }
422             reportDirectiveSeen = true;
423             if (!skipEquals(header, pos)) {
424                 failureReason = failureReasonInvalidEquals;
425                 failurePosition = pos;
426                 return ReflectedXSSInvalid;
427             }
428             size_t startPos = pos;
429             if (!skipValue(header, pos)) {
430                 failureReason = failureReasonInvalidReport;
431                 failurePosition = pos;
432                 return ReflectedXSSInvalid;
433             }
434             reportURL = header.substring(startPos, pos - startPos);
435             failurePosition = startPos; // If later semantic check deems unacceptable.
436         } else {
437             failureReason = failureReasonInvalidDirective;
438             failurePosition = pos;
439             return ReflectedXSSInvalid;
440         }
441     }
442 }
443 
parseContentTypeOptionsHeader(const String & header)444 ContentTypeOptionsDisposition parseContentTypeOptionsHeader(const String& header)
445 {
446     if (header.stripWhiteSpace().lower() == "nosniff")
447         return ContentTypeOptionsNosniff;
448     return ContentTypeOptionsNone;
449 }
450 
extractReasonPhraseFromHTTPStatusLine(const String & statusLine)451 String extractReasonPhraseFromHTTPStatusLine(const String& statusLine)
452 {
453     size_t spacePos = statusLine.find(' ');
454     // Remove status code from the status line.
455     spacePos = statusLine.find(' ', spacePos + 1);
456     return statusLine.substring(spacePos + 1);
457 }
458 
parseXFrameOptionsHeader(const String & header)459 XFrameOptionsDisposition parseXFrameOptionsHeader(const String& header)
460 {
461     XFrameOptionsDisposition result = XFrameOptionsNone;
462 
463     if (header.isEmpty())
464         return result;
465 
466     Vector<String> headers;
467     header.split(',', headers);
468 
469     for (size_t i = 0; i < headers.size(); i++) {
470         String currentHeader = headers[i].stripWhiteSpace();
471         XFrameOptionsDisposition currentValue = XFrameOptionsNone;
472         if (equalIgnoringCase(currentHeader, "deny"))
473             currentValue = XFrameOptionsDeny;
474         else if (equalIgnoringCase(currentHeader, "sameorigin"))
475             currentValue = XFrameOptionsSameOrigin;
476         else if (equalIgnoringCase(currentHeader, "allowall"))
477             currentValue = XFrameOptionsAllowAll;
478         else
479             currentValue = XFrameOptionsInvalid;
480 
481         if (result == XFrameOptionsNone)
482             result = currentValue;
483         else if (result != currentValue)
484             return XFrameOptionsConflict;
485     }
486     return result;
487 }
488 
parseRange(const String & range,long long & rangeOffset,long long & rangeEnd,long long & rangeSuffixLength)489 bool parseRange(const String& range, long long& rangeOffset, long long& rangeEnd, long long& rangeSuffixLength)
490 {
491     // The format of "Range" header is defined in RFC 2616 Section 14.35.1.
492     // http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.1
493     // We don't support multiple range requests.
494 
495     rangeOffset = rangeEnd = rangeSuffixLength = -1;
496 
497     // The "bytes" unit identifier should be present.
498     static const char bytesStart[] = "bytes=";
499     if (!range.startsWith(bytesStart, false))
500         return false;
501     String byteRange = range.substring(sizeof(bytesStart) - 1);
502 
503     // The '-' character needs to be present.
504     int index = byteRange.find('-');
505     if (index == -1)
506         return false;
507 
508     // If the '-' character is at the beginning, the suffix length, which specifies the last N bytes, is provided.
509     // Example:
510     //     -500
511     if (!index) {
512         String suffixLengthString = byteRange.substring(index + 1).stripWhiteSpace();
513         bool ok;
514         long long value = suffixLengthString.toInt64Strict(&ok);
515         if (ok)
516             rangeSuffixLength = value;
517         return true;
518     }
519 
520     // Otherwise, the first-byte-position and the last-byte-position are provied.
521     // Examples:
522     //     0-499
523     //     500-
524     String firstBytePosStr = byteRange.left(index).stripWhiteSpace();
525     bool ok;
526     long long firstBytePos = firstBytePosStr.toInt64Strict(&ok);
527     if (!ok)
528         return false;
529 
530     String lastBytePosStr = byteRange.substring(index + 1).stripWhiteSpace();
531     long long lastBytePos = -1;
532     if (!lastBytePosStr.isEmpty()) {
533         lastBytePos = lastBytePosStr.toInt64Strict(&ok);
534         if (!ok)
535             return false;
536     }
537 
538     if (firstBytePos < 0 || !(lastBytePos == -1 || lastBytePos >= firstBytePos))
539         return false;
540 
541     rangeOffset = firstBytePos;
542     rangeEnd = lastBytePos;
543     return true;
544 }
545 
546 // HTTP/1.1 - RFC 2616
547 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1
548 // Request-Line = Method SP Request-URI SP HTTP-Version CRLF
parseHTTPRequestLine(const char * data,size_t length,String & failureReason,String & method,String & url,HTTPVersion & httpVersion)549 size_t parseHTTPRequestLine(const char* data, size_t length, String& failureReason, String& method, String& url, HTTPVersion& httpVersion)
550 {
551     method = String();
552     url = String();
553     httpVersion = Unknown;
554 
555     const char* space1 = 0;
556     const char* space2 = 0;
557     const char* p;
558     size_t consumedLength;
559 
560     for (p = data, consumedLength = 0; consumedLength < length; p++, consumedLength++) {
561         if (*p == ' ') {
562             if (!space1)
563                 space1 = p;
564             else if (!space2)
565                 space2 = p;
566         } else if (*p == '\n') {
567             break;
568         }
569     }
570 
571     // Haven't finished header line.
572     if (consumedLength == length) {
573         failureReason = "Incomplete Request Line";
574         return 0;
575     }
576 
577     // RequestLine does not contain 3 parts.
578     if (!space1 || !space2) {
579         failureReason = "Request Line does not appear to contain: <Method> <Url> <HTTPVersion>.";
580         return 0;
581     }
582 
583     // The line must end with "\r\n".
584     const char* end = p + 1;
585     if (*(end - 2) != '\r') {
586         failureReason = "Request line does not end with CRLF";
587         return 0;
588     }
589 
590     // Request Method.
591     method = String(data, space1 - data); // For length subtract 1 for space, but add 1 for data being the first character.
592 
593     // Request URI.
594     url = String(space1 + 1, space2 - space1 - 1); // For length subtract 1 for space.
595 
596     // HTTP Version.
597     String httpVersionString(space2 + 1, end - space2 - 3); // For length subtract 1 for space, and 2 for "\r\n".
598     if (httpVersionString.length() != 8 || !httpVersionString.startsWith("HTTP/1."))
599         httpVersion = Unknown;
600     else if (httpVersionString[7] == '0')
601         httpVersion = HTTP_1_0;
602     else if (httpVersionString[7] == '1')
603         httpVersion = HTTP_1_1;
604     else
605         httpVersion = Unknown;
606 
607     return end - data;
608 }
609 
parseHTTPHeaderName(const char * s,size_t start,size_t size,String & failureReason,size_t * position,AtomicString * name)610 static bool parseHTTPHeaderName(const char* s, size_t start, size_t size, String& failureReason, size_t* position, AtomicString* name)
611 {
612     size_t nameBegin = start;
613     for (size_t i = start; i < size; ++i) {
614         switch (s[i]) {
615         case '\r':
616             failureReason = "Unexpected CR in name at " + trimInputSample(&s[nameBegin], i - nameBegin);
617             return false;
618         case '\n':
619             failureReason = "Unexpected LF in name at " + trimInputSample(&s[nameBegin], i - nameBegin);
620             return false;
621         case ':':
622             if (i == nameBegin) {
623                 failureReason = "Header name is missing";
624                 return false;
625             }
626             *name = AtomicString::fromUTF8(&s[nameBegin], i - nameBegin);
627             if (name->isNull()) {
628                 failureReason = "Invalid UTF-8 sequence in header name";
629                 return false;
630             }
631             *position = i;
632             return true;
633         default:
634             break;
635         }
636     }
637     failureReason = "Unterminated header name";
638     return false;
639 }
640 
parseHTTPHeaderValue(const char * s,size_t start,size_t size,String & failureReason,size_t * position,AtomicString * value)641 static bool parseHTTPHeaderValue(const char* s, size_t start, size_t size, String& failureReason, size_t* position, AtomicString* value)
642 {
643     size_t i = start;
644     for (; i < size && s[i] == ' '; ++i) {
645     }
646     size_t valueBegin = i;
647 
648     for (; i < size && s[i] != '\r'; ++i) {
649         if (s[i] == '\n') {
650             failureReason = "Unexpected LF in value at " + trimInputSample(&s[valueBegin], i - valueBegin);
651             return false;
652         }
653     }
654     if (i == size) {
655         failureReason = "Unterminated header value";
656         return false;
657     }
658 
659     ASSERT(i < size && s[i] == '\r');
660     if (i + 1 >= size || s[i + 1] != '\n') {
661         failureReason = "LF doesn't follow CR after value at " + trimInputSample(&s[i + 1], size - i - 1);
662         return false;
663     }
664 
665     *value = AtomicString::fromUTF8(&s[valueBegin], i - valueBegin);
666     if (i != valueBegin && value->isNull()) {
667         failureReason = "Invalid UTF-8 sequence in header value";
668         return false;
669     }
670 
671     // 2 for strlen("\r\n")
672     *position = i + 2;
673     return true;
674 }
675 
676 // Note that the header is already parsed and re-formatted in chromium side.
677 // We assume that the input is more restricted than RFC2616.
parseHTTPHeader(const char * s,size_t size,String & failureReason,AtomicString & name,AtomicString & value)678 size_t parseHTTPHeader(const char* s, size_t size, String& failureReason, AtomicString& name, AtomicString& value)
679 {
680     name = nullAtom;
681     value = nullAtom;
682     if (size >= 1 && s[0] == '\r') {
683         if (size >= 2 && s[1] == '\n') {
684             // Skip an empty line.
685             return 2;
686         }
687         failureReason = "LF doesn't follow CR at " + trimInputSample(0, size);
688         return 0;
689     }
690     size_t current = 0;
691     if (!parseHTTPHeaderName(s, current, size, failureReason, &current, &name)) {
692         return 0;
693     }
694     ASSERT(s[current] == ':');
695     ++current;
696 
697     if (!parseHTTPHeaderValue(s, current, size, failureReason, &current, &value)) {
698         return 0;
699     }
700 
701     return current;
702 }
703 
parseHTTPRequestBody(const char * data,size_t length,Vector<unsigned char> & body)704 size_t parseHTTPRequestBody(const char* data, size_t length, Vector<unsigned char>& body)
705 {
706     body.clear();
707     body.append(data, length);
708 
709     return length;
710 }
711 
isCacheHeaderSeparator(UChar c)712 static bool isCacheHeaderSeparator(UChar c)
713 {
714     // See RFC 2616, Section 2.2
715     switch (c) {
716     case '(':
717     case ')':
718     case '<':
719     case '>':
720     case '@':
721     case ',':
722     case ';':
723     case ':':
724     case '\\':
725     case '"':
726     case '/':
727     case '[':
728     case ']':
729     case '?':
730     case '=':
731     case '{':
732     case '}':
733     case ' ':
734     case '\t':
735         return true;
736     default:
737         return false;
738     }
739 }
740 
isControlCharacter(UChar c)741 static bool isControlCharacter(UChar c)
742 {
743     return c < ' ' || c == 127;
744 }
745 
trimToNextSeparator(const String & str)746 static inline String trimToNextSeparator(const String& str)
747 {
748     return str.substring(0, str.find(isCacheHeaderSeparator));
749 }
750 
parseCacheHeader(const String & header,Vector<pair<String,String>> & result)751 static void parseCacheHeader(const String& header, Vector<pair<String, String> >& result)
752 {
753     const String safeHeader = header.removeCharacters(isControlCharacter);
754     unsigned max = safeHeader.length();
755     for (unsigned pos = 0; pos < max; /* pos incremented in loop */) {
756         size_t nextCommaPosition = safeHeader.find(',', pos);
757         size_t nextEqualSignPosition = safeHeader.find('=', pos);
758         if (nextEqualSignPosition != kNotFound && (nextEqualSignPosition < nextCommaPosition || nextCommaPosition == kNotFound)) {
759             // Get directive name, parse right hand side of equal sign, then add to map
760             String directive = trimToNextSeparator(safeHeader.substring(pos, nextEqualSignPosition - pos).stripWhiteSpace());
761             pos += nextEqualSignPosition - pos + 1;
762 
763             String value = safeHeader.substring(pos, max - pos).stripWhiteSpace();
764             if (value[0] == '"') {
765                 // The value is a quoted string
766                 size_t nextDoubleQuotePosition = value.find('"', 1);
767                 if (nextDoubleQuotePosition != kNotFound) {
768                     // Store the value as a quoted string without quotes
769                     result.append(pair<String, String>(directive, value.substring(1, nextDoubleQuotePosition - 1).stripWhiteSpace()));
770                     pos += (safeHeader.find('"', pos) - pos) + nextDoubleQuotePosition + 1;
771                     // Move past next comma, if there is one
772                     size_t nextCommaPosition2 = safeHeader.find(',', pos);
773                     if (nextCommaPosition2 != kNotFound)
774                         pos += nextCommaPosition2 - pos + 1;
775                     else
776                         return; // Parse error if there is anything left with no comma
777                 } else {
778                     // Parse error; just use the rest as the value
779                     result.append(pair<String, String>(directive, trimToNextSeparator(value.substring(1, value.length() - 1).stripWhiteSpace())));
780                     return;
781                 }
782             } else {
783                 // The value is a token until the next comma
784                 size_t nextCommaPosition2 = value.find(',');
785                 if (nextCommaPosition2 != kNotFound) {
786                     // The value is delimited by the next comma
787                     result.append(pair<String, String>(directive, trimToNextSeparator(value.substring(0, nextCommaPosition2).stripWhiteSpace())));
788                     pos += (safeHeader.find(',', pos) - pos) + 1;
789                 } else {
790                     // The rest is the value; no change to value needed
791                     result.append(pair<String, String>(directive, trimToNextSeparator(value)));
792                     return;
793                 }
794             }
795         } else if (nextCommaPosition != kNotFound && (nextCommaPosition < nextEqualSignPosition || nextEqualSignPosition == kNotFound)) {
796             // Add directive to map with empty string as value
797             result.append(pair<String, String>(trimToNextSeparator(safeHeader.substring(pos, nextCommaPosition - pos).stripWhiteSpace()), ""));
798             pos += nextCommaPosition - pos + 1;
799         } else {
800             // Add last directive to map with empty string as value
801             result.append(pair<String, String>(trimToNextSeparator(safeHeader.substring(pos, max - pos).stripWhiteSpace()), ""));
802             return;
803         }
804     }
805 }
806 
parseCacheControlDirectives(const AtomicString & cacheControlValue,const AtomicString & pragmaValue)807 CacheControlHeader parseCacheControlDirectives(const AtomicString& cacheControlValue, const AtomicString& pragmaValue)
808 {
809     CacheControlHeader cacheControlHeader;
810     cacheControlHeader.parsed = true;
811     cacheControlHeader.maxAge = std::numeric_limits<double>::quiet_NaN();
812 
813     DEFINE_STATIC_LOCAL(const AtomicString, noCacheDirective, ("no-cache", AtomicString::ConstructFromLiteral));
814     DEFINE_STATIC_LOCAL(const AtomicString, noStoreDirective, ("no-store", AtomicString::ConstructFromLiteral));
815     DEFINE_STATIC_LOCAL(const AtomicString, mustRevalidateDirective, ("must-revalidate", AtomicString::ConstructFromLiteral));
816     DEFINE_STATIC_LOCAL(const AtomicString, maxAgeDirective, ("max-age", AtomicString::ConstructFromLiteral));
817 
818     if (!cacheControlValue.isEmpty()) {
819         Vector<pair<String, String> > directives;
820         parseCacheHeader(cacheControlValue, directives);
821 
822         size_t directivesSize = directives.size();
823         for (size_t i = 0; i < directivesSize; ++i) {
824             // RFC2616 14.9.1: A no-cache directive with a value is only meaningful for proxy caches.
825             // It should be ignored by a browser level cache.
826             if (equalIgnoringCase(directives[i].first, noCacheDirective) && directives[i].second.isEmpty()) {
827                 cacheControlHeader.containsNoCache = true;
828             } else if (equalIgnoringCase(directives[i].first, noStoreDirective)) {
829                 cacheControlHeader.containsNoStore = true;
830             } else if (equalIgnoringCase(directives[i].first, mustRevalidateDirective)) {
831                 cacheControlHeader.containsMustRevalidate = true;
832             } else if (equalIgnoringCase(directives[i].first, maxAgeDirective)) {
833                 if (!std::isnan(cacheControlHeader.maxAge)) {
834                     // First max-age directive wins if there are multiple ones.
835                     continue;
836                 }
837                 bool ok;
838                 double maxAge = directives[i].second.toDouble(&ok);
839                 if (ok)
840                     cacheControlHeader.maxAge = maxAge;
841             }
842         }
843     }
844 
845     if (!cacheControlHeader.containsNoCache) {
846         // Handle Pragma: no-cache
847         // This is deprecated and equivalent to Cache-control: no-cache
848         // Don't bother tokenizing the value, it is not important
849         cacheControlHeader.containsNoCache = pragmaValue.lower().contains(noCacheDirective);
850     }
851     return cacheControlHeader;
852 }
853 
854 }
855