1/* 2 * Copyright (C) 2005 Apple Computer, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of 14 * its contributors may be used to endorse or promote products derived 15 * from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#import <WebKit/WebNSDataExtras.h> 30#import <WebKit/WebNSDataExtrasPrivate.h> 31 32#import <wtf/Assertions.h> 33 34@interface NSString (WebNSDataExtrasInternal) 35- (NSString *)_web_capitalizeRFC822HeaderFieldName; 36@end 37 38@implementation NSString (WebNSDataExtrasInternal) 39 40-(NSString *)_web_capitalizeRFC822HeaderFieldName 41{ 42 CFStringRef name = (CFStringRef)self; 43 NSString *result = nil; 44 45 CFIndex i; 46 CFIndex len = CFStringGetLength(name); 47 char *charPtr = NULL; 48 UniChar *uniCharPtr = NULL; 49 Boolean useUniCharPtr = FALSE; 50 Boolean shouldCapitalize = TRUE; 51 Boolean somethingChanged = FALSE; 52 53 for (i = 0; i < len; i ++) { 54 UniChar ch = CFStringGetCharacterAtIndex(name, i); 55 Boolean replace = FALSE; 56 if (shouldCapitalize && ch >= 'a' && ch <= 'z') { 57 ch = ch + 'A' - 'a'; 58 replace = TRUE; 59 } 60 else if (!shouldCapitalize && ch >= 'A' && ch <= 'Z') { 61 ch = ch + 'a' - 'A'; 62 replace = TRUE; 63 } 64 if (replace) { 65 if (!somethingChanged) { 66 somethingChanged = TRUE; 67 if (CFStringGetBytes(name, CFRangeMake(0, len), kCFStringEncodingISOLatin1, 0, FALSE, NULL, 0, NULL) == len) { 68 // Can be encoded in ISOLatin1 69 useUniCharPtr = FALSE; 70 charPtr = CFAllocatorAllocate(NULL, len + 1, 0); 71 CFStringGetCString(name, charPtr, len+1, kCFStringEncodingISOLatin1); 72 } 73 else { 74 useUniCharPtr = TRUE; 75 uniCharPtr = CFAllocatorAllocate(NULL, len * sizeof(UniChar), 0); 76 CFStringGetCharacters(name, CFRangeMake(0, len), uniCharPtr); 77 } 78 } 79 if (useUniCharPtr) { 80 uniCharPtr[i] = ch; 81 } 82 else { 83 charPtr[i] = ch; 84 } 85 } 86 if (ch == '-') { 87 shouldCapitalize = TRUE; 88 } 89 else { 90 shouldCapitalize = FALSE; 91 } 92 } 93 if (somethingChanged) { 94 if (useUniCharPtr) { 95 result = (NSString *)CFMakeCollectable(CFStringCreateWithCharactersNoCopy(NULL, uniCharPtr, len, NULL)); 96 } 97 else { 98 result = (NSString *)CFMakeCollectable(CFStringCreateWithCStringNoCopy(NULL, charPtr, kCFStringEncodingISOLatin1, NULL)); 99 } 100 } 101 else { 102 result = [self retain]; 103 } 104 105 return [result autorelease]; 106} 107 108@end 109 110@implementation NSData (WebKitExtras) 111 112-(NSString *)_webkit_guessedMIMETypeForXML 113{ 114 int length = [self length]; 115 const UInt8 *bytes = [self bytes]; 116 117#define CHANNEL_TAG_LENGTH 7 118 119 const char *p = (const char *)bytes; 120 int remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (CHANNEL_TAG_LENGTH - 1); 121 122 BOOL foundRDF = false; 123 124 while (remaining > 0) { 125 // Look for a "<". 126 const char *hit = memchr(p, '<', remaining); 127 if (!hit) { 128 break; 129 } 130 131 // We are trying to identify RSS or Atom. RSS has a top-level 132 // element of either <rss> or <rdf>. However, there are 133 // non-RSS RDF files, so in the case of <rdf> we further look 134 // for a <channel> element. In the case of an Atom file, a 135 // top-level <feed> element is all we need to see. Only tags 136 // starting with <? or <! can precede the root element. We 137 // bail if we don't find an <rss>, <feed> or <rdf> element 138 // right after those. 139 140 if (foundRDF) { 141 if (strncasecmp(hit, "<channel", strlen("<channel")) == 0) { 142 return @"application/rss+xml"; 143 } 144 } else if (strncasecmp(hit, "<rdf", strlen("<rdf")) == 0) { 145 foundRDF = TRUE; 146 } else if (strncasecmp(hit, "<rss", strlen("<rss")) == 0) { 147 return @"application/rss+xml"; 148 } else if (strncasecmp(hit, "<feed", strlen("<feed")) == 0) { 149 return @"application/atom+xml"; 150 } else if (strncasecmp(hit, "<?", strlen("<?")) != 0 && strncasecmp(hit, "<!", strlen("<!")) != 0) { 151 return nil; 152 } 153 154 // Skip the "<" and continue. 155 remaining -= (hit + 1) - p; 156 p = hit + 1; 157 } 158 159 return nil; 160} 161 162-(NSString *)_webkit_guessedMIMEType 163{ 164#define JPEG_MAGIC_NUMBER_LENGTH 4 165#define SCRIPT_TAG_LENGTH 7 166#define TEXT_HTML_LENGTH 9 167#define VCARD_HEADER_LENGTH 11 168#define VCAL_HEADER_LENGTH 15 169 170 NSString *MIMEType = [self _webkit_guessedMIMETypeForXML]; 171 if ([MIMEType length]) 172 return MIMEType; 173 174 int length = [self length]; 175 const char *bytes = [self bytes]; 176 177 const char *p = bytes; 178 int remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (SCRIPT_TAG_LENGTH - 1); 179 while (remaining > 0) { 180 // Look for a "<". 181 const char *hit = memchr(p, '<', remaining); 182 if (!hit) { 183 break; 184 } 185 186 // If we found a "<", look for "<html>" or "<a " or "<script". 187 if (strncasecmp(hit, "<html>", strlen("<html>")) == 0 || 188 strncasecmp(hit, "<a ", strlen("<a ")) == 0 || 189 strncasecmp(hit, "<script", strlen("<script")) == 0 || 190 strncasecmp(hit, "<title>", strlen("<title>")) == 0) { 191 return @"text/html"; 192 } 193 194 // Skip the "<" and continue. 195 remaining -= (hit + 1) - p; 196 p = hit + 1; 197 } 198 199 // Test for a broken server which has sent the content type as part of the content. 200 // This code could be improved to look for other mime types. 201 p = bytes; 202 remaining = MIN(length, WEB_GUESS_MIME_TYPE_PEEK_LENGTH) - (TEXT_HTML_LENGTH - 1); 203 while (remaining > 0) { 204 // Look for a "t" or "T". 205 const char *hit = NULL; 206 const char *lowerhit = memchr(p, 't', remaining); 207 const char *upperhit = memchr(p, 'T', remaining); 208 if (!lowerhit && !upperhit) { 209 break; 210 } 211 if (!lowerhit) { 212 hit = upperhit; 213 } 214 else if (!upperhit) { 215 hit = lowerhit; 216 } 217 else { 218 hit = MIN(lowerhit, upperhit); 219 } 220 221 // If we found a "t/T", look for "text/html". 222 if (strncasecmp(hit, "text/html", TEXT_HTML_LENGTH) == 0) { 223 return @"text/html"; 224 } 225 226 // Skip the "t/T" and continue. 227 remaining -= (hit + 1) - p; 228 p = hit + 1; 229 } 230 231 if ((length >= VCARD_HEADER_LENGTH) && strncmp(bytes, "BEGIN:VCARD", VCARD_HEADER_LENGTH) == 0) { 232 return @"text/vcard"; 233 } 234 if ((length >= VCAL_HEADER_LENGTH) && strncmp(bytes, "BEGIN:VCALENDAR", VCAL_HEADER_LENGTH) == 0) { 235 return @"text/calendar"; 236 } 237 238 // Test for plain text. 239 int i; 240 for(i=0; i<length; i++){ 241 char c = bytes[i]; 242 if ((c < 0x20 || c > 0x7E) && (c != '\t' && c != '\r' && c != '\n')) { 243 break; 244 } 245 } 246 if (i == length) { 247 // Didn't encounter any bad characters, looks like plain text. 248 return @"text/plain"; 249 } 250 251 // Looks like this is a binary file. 252 253 // Sniff for the JPEG magic number. 254 if ((length >= JPEG_MAGIC_NUMBER_LENGTH) && strncmp(bytes, "\xFF\xD8\xFF\xE0", JPEG_MAGIC_NUMBER_LENGTH) == 0) { 255 return @"image/jpeg"; 256 } 257 258#undef JPEG_MAGIC_NUMBER_LENGTH 259#undef SCRIPT_TAG_LENGTH 260#undef TEXT_HTML_LENGTH 261#undef VCARD_HEADER_LENGTH 262#undef VCAL_HEADER_LENGTH 263 264 return nil; 265} 266 267@end 268 269@implementation NSData (WebNSDataExtras) 270 271-(BOOL)_web_isCaseInsensitiveEqualToCString:(const char *)string 272{ 273 ASSERT(string); 274 275 const char *bytes = [self bytes]; 276 return strncasecmp(bytes, string, [self length]) == 0; 277} 278 279static const UInt8 *_findEOL(const UInt8 *bytes, CFIndex len) { 280 281 // According to the HTTP specification EOL is defined as 282 // a CRLF pair. Unfortunately, some servers will use LF 283 // instead. Worse yet, some servers will use a combination 284 // of both (e.g. <header>CRLFLF<body>), so findEOL needs 285 // to be more forgiving. It will now accept CRLF, LF, or 286 // CR. 287 // 288 // It returns NULL if EOL is not found or it will return 289 // a pointer to the first terminating character. 290 CFIndex i; 291 for (i = 0; i < len; i++) 292 { 293 UInt8 c = bytes[i]; 294 if ('\n' == c) return bytes + i; 295 if ('\r' == c) 296 { 297 // Check to see if spanning buffer bounds 298 // (CRLF is across reads). If so, wait for 299 // next read. 300 if (i + 1 == len) break; 301 302 return bytes + i; 303 } 304 } 305 306 return NULL; 307} 308 309-(NSMutableDictionary *)_webkit_parseRFC822HeaderFields 310{ 311 NSMutableDictionary *headerFields = [NSMutableDictionary dictionary]; 312 313 const UInt8 *bytes = [self bytes]; 314 unsigned length = [self length]; 315 NSString *lastKey = nil; 316 const UInt8 *eol; 317 318 // Loop over lines until we're past the header, or we can't find any more end-of-lines 319 while ((eol = _findEOL(bytes, length))) { 320 const UInt8 *line = bytes; 321 SInt32 lineLength = eol - bytes; 322 323 // Move bytes to the character after the terminator as returned by _findEOL. 324 bytes = eol + 1; 325 if (('\r' == *eol) && ('\n' == *bytes)) { 326 bytes++; // Safe since _findEOL won't return a spanning CRLF. 327 } 328 329 length -= (bytes - line); 330 if (lineLength == 0) { 331 // Blank line; we're at the end of the header 332 break; 333 } 334 else if (*line == ' ' || *line == '\t') { 335 // Continuation of the previous header 336 if (!lastKey) { 337 // malformed header; ignore it and continue 338 continue; 339 } 340 else { 341 // Merge the continuation of the previous header 342 NSString *currentValue = [headerFields objectForKey:lastKey]; 343 NSString *newValue = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, line, lineLength, kCFStringEncodingISOLatin1, FALSE)); 344 ASSERT(currentValue); 345 ASSERT(newValue); 346 NSString *mergedValue = [[NSString alloc] initWithFormat:@"%@%@", currentValue, newValue]; 347 [headerFields setObject:(NSString *)mergedValue forKey:lastKey]; 348 [newValue release]; 349 [mergedValue release]; 350 // Note: currentValue is autoreleased 351 } 352 } 353 else { 354 // Brand new header 355 const UInt8 *colon; 356 for (colon = line; *colon != ':' && colon != eol; colon ++) { 357 // empty loop 358 } 359 if (colon == eol) { 360 // malformed header; ignore it and continue 361 continue; 362 } 363 else { 364 lastKey = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, line, colon - line, kCFStringEncodingISOLatin1, FALSE)); 365 [lastKey autorelease]; 366 NSString *value = [lastKey _web_capitalizeRFC822HeaderFieldName]; 367 lastKey = value; 368 for (colon++; colon != eol; colon++) { 369 if (*colon != ' ' && *colon != '\t') { 370 break; 371 } 372 } 373 if (colon == eol) { 374 value = [[NSString alloc] initWithString:@""]; 375 [value autorelease]; 376 } 377 else { 378 value = (NSString *)CFMakeCollectable(CFStringCreateWithBytes(NULL, colon, eol-colon, kCFStringEncodingISOLatin1, FALSE)); 379 [value autorelease]; 380 } 381 NSString *oldValue = [headerFields objectForKey:lastKey]; 382 if (oldValue) { 383 NSString *newValue = [[NSString alloc] initWithFormat:@"%@, %@", oldValue, value]; 384 value = newValue; 385 [newValue autorelease]; 386 } 387 [headerFields setObject:(NSString *)value forKey:lastKey]; 388 } 389 } 390 } 391 392 return headerFields; 393} 394 395- (BOOL)_web_startsWithBlankLine 396{ 397 return [self length] > 0 && ((const char *)[self bytes])[0] == '\n'; 398} 399 400- (NSInteger)_web_locationAfterFirstBlankLine 401{ 402 const char *bytes = (const char *)[self bytes]; 403 unsigned length = [self length]; 404 405 unsigned i; 406 for (i = 0; i < length - 4; i++) { 407 408 // Support for Acrobat. It sends "\n\n". 409 if (bytes[i] == '\n' && bytes[i+1] == '\n') { 410 return i+2; 411 } 412 413 // Returns the position after 2 CRLF's or 1 CRLF if it is the first line. 414 if (bytes[i] == '\r' && bytes[i+1] == '\n') { 415 i += 2; 416 if (i == 2) { 417 return i; 418 } else if (bytes[i] == '\n') { 419 // Support for Director. It sends "\r\n\n" (3880387). 420 return i+1; 421 } else if (bytes[i] == '\r' && bytes[i+1] == '\n') { 422 // Support for Flash. It sends "\r\n\r\n" (3758113). 423 return i+2; 424 } 425 } 426 } 427 return NSNotFound; 428} 429 430@end 431