• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 #include "PreloadScanner.h"
29 
30 #include "AtomicString.h"
31 #include "CachedCSSStyleSheet.h"
32 #include "CachedImage.h"
33 #include "CachedResource.h"
34 #include "CachedResourceClient.h"
35 #include "CachedScript.h"
36 #include "CSSHelper.h"
37 #include "CString.h"
38 #include "DocLoader.h"
39 #include "Document.h"
40 #include "Frame.h"
41 #include "FrameLoader.h"
42 #include "HTMLLinkElement.h"
43 #include "HTMLNames.h"
44 #include <wtf/CurrentTime.h>
45 #include <wtf/unicode/Unicode.h>
46 
47 // Use __GNUC__ instead of PLATFORM(GCC) to stay consistent with the gperf generated c file
48 #ifdef __GNUC__
49 // The main tokenizer includes this too so we are getting two copies of the data. However, this way the code gets inlined.
50 #include "HTMLEntityNames.c"
51 #else
52 // Not inlined for non-GCC compilers
53 struct Entity {
54     const char* name;
55     int code;
56 };
57 const struct Entity* findEntity(register const char* str, register unsigned int len);
58 #endif
59 
60 #define PRELOAD_DEBUG 0
61 
62 using namespace WTF;
63 
64 namespace WebCore {
65 
66 using namespace HTMLNames;
67 
PreloadScanner(Document * doc)68 PreloadScanner::PreloadScanner(Document* doc)
69     : m_inProgress(false)
70     , m_timeUsed(0)
71     , m_bodySeen(false)
72     , m_document(doc)
73 {
74 #if PRELOAD_DEBUG
75     printf("CREATING PRELOAD SCANNER FOR %s\n", m_document->url().string().latin1().data());
76 #endif
77 }
78 
~PreloadScanner()79 PreloadScanner::~PreloadScanner()
80 {
81 #if PRELOAD_DEBUG
82     printf("DELETING PRELOAD SCANNER FOR %s\n", m_document->url().string().latin1().data());
83     printf("TOTAL TIME USED %.4fs\n", m_timeUsed);
84 #endif
85 }
86 
begin()87 void PreloadScanner::begin()
88 {
89     ASSERT(!m_inProgress);
90     reset();
91     m_inProgress = true;
92 }
93 
end()94 void PreloadScanner::end()
95 {
96     ASSERT(m_inProgress);
97     m_inProgress = false;
98 }
99 
reset()100 void PreloadScanner::reset()
101 {
102     m_source.clear();
103 
104     m_state = Data;
105     m_escape = false;
106     m_contentModel = PCDATA;
107     m_commentPos = 0;
108 
109     m_closeTag = false;
110     m_tagName.clear();
111     m_attributeName.clear();
112     m_attributeValue.clear();
113     m_lastStartTag = AtomicString();
114 
115     m_urlToLoad = String();
116     m_charset = String();
117     m_linkIsStyleSheet = false;
118     m_lastCharacterIndex = 0;
119     clearLastCharacters();
120 
121     m_cssState = CSSInitial;
122     m_cssRule.clear();
123     m_cssRuleValue.clear();
124 }
125 
scanningBody() const126 bool PreloadScanner::scanningBody() const
127 {
128     return m_document->body() || m_bodySeen;
129 }
130 
write(const SegmentedString & source)131 void PreloadScanner::write(const SegmentedString& source)
132 {
133 #if PRELOAD_DEBUG
134     double startTime = currentTime();
135 #endif
136     tokenize(source);
137 #if PRELOAD_DEBUG
138     m_timeUsed += currentTime() - startTime;
139 #endif
140 }
141 
isWhitespace(UChar c)142 static inline bool isWhitespace(UChar c)
143 {
144     return c == ' ' || c == '\n' || c == '\r' || c == '\t';
145 }
146 
clearLastCharacters()147 inline void PreloadScanner::clearLastCharacters()
148 {
149     memset(m_lastCharacters, 0, lastCharactersBufferSize * sizeof(UChar));
150 }
151 
rememberCharacter(UChar c)152 inline void PreloadScanner::rememberCharacter(UChar c)
153 {
154     m_lastCharacterIndex = (m_lastCharacterIndex + 1) % lastCharactersBufferSize;
155     m_lastCharacters[m_lastCharacterIndex] = c;
156 }
157 
lastCharactersMatch(const char * chars,unsigned count) const158 inline bool PreloadScanner::lastCharactersMatch(const char* chars, unsigned count) const
159 {
160     unsigned pos = m_lastCharacterIndex;
161     while (count) {
162         if (chars[count - 1] != m_lastCharacters[pos])
163             return false;
164         --count;
165         if (!pos)
166             pos = lastCharactersBufferSize;
167         --pos;
168     }
169     return true;
170 }
171 
legalEntityFor(unsigned value)172 static inline unsigned legalEntityFor(unsigned value)
173 {
174     // FIXME There is a table for more exceptions in the HTML5 specification.
175     if (value == 0 || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF))
176         return 0xFFFD;
177     return value;
178 }
179 
consumeEntity(SegmentedString & source,bool & notEnoughCharacters)180 unsigned PreloadScanner::consumeEntity(SegmentedString& source, bool& notEnoughCharacters)
181 {
182     enum EntityState {
183         Initial,
184         NumberType,
185         MaybeHex,
186         Hex,
187         Decimal,
188         Named
189     };
190     EntityState entityState = Initial;
191     unsigned result = 0;
192     Vector<UChar, 10> seenChars;
193     Vector<char, 10> entityName;
194 
195     while (!source.isEmpty()) {
196         UChar cc = *source;
197         seenChars.append(cc);
198         switch (entityState) {
199         case Initial:
200             if (isWhitespace(cc) || cc == '<' || cc == '&')
201                 return 0;
202             else if (cc == '#')
203                 entityState = NumberType;
204             else if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) {
205                 entityName.append(cc);
206                 entityState = Named;
207             } else
208                 return 0;
209             break;
210         case NumberType:
211             if (cc == 'x' || cc == 'X')
212                 entityState = MaybeHex;
213             else if (cc >= '0' && cc <= '9') {
214                 entityState = Decimal;
215                 result = cc - '0';
216             } else {
217                 source.push('#');
218                 return 0;
219             }
220             break;
221         case MaybeHex:
222             if (cc >= '0' && cc <= '9')
223                 result = cc - '0';
224             else if (cc >= 'a' && cc <= 'f')
225                 result = 10 + cc - 'a';
226             else if (cc >= 'A' && cc <= 'F')
227                 result = 10 + cc - 'A';
228             else {
229                 source.push('#');
230                 source.push(seenChars[1]);
231                 return 0;
232             }
233             entityState = Hex;
234             break;
235         case Hex:
236             if (cc >= '0' && cc <= '9')
237                 result = result * 16 + cc - '0';
238             else if (cc >= 'a' && cc <= 'f')
239                 result = result * 16 + 10 + cc - 'a';
240             else if (cc >= 'A' && cc <= 'F')
241                 result = result * 16 + 10 + cc - 'A';
242             else if (cc == ';') {
243                 source.advance();
244                 return legalEntityFor(result);
245             } else
246                 return legalEntityFor(result);
247             break;
248         case Decimal:
249             if (cc >= '0' && cc <= '9')
250                 result = result * 10 + cc - '0';
251             else if (cc == ';') {
252                 source.advance();
253                 return legalEntityFor(result);
254             } else
255                 return legalEntityFor(result);
256             break;
257         case Named:
258             // This is the attribute only version, generic version matches somewhat differently
259             while (entityName.size() <= 8) {
260                 if (cc == ';') {
261                     const Entity* entity = findEntity(entityName.data(), entityName.size());
262                     if (entity) {
263                         source.advance();
264                         return entity->code;
265                     }
266                     break;
267                 }
268                 if (!(cc >= 'a' && cc <= 'z') && !(cc >= 'A' && cc <= 'Z') && !(cc >= '0' && cc <= '9')) {
269                     const Entity* entity = findEntity(entityName.data(), entityName.size());
270                     if (entity)
271                         return entity->code;
272                     break;
273                 }
274                 entityName.append(cc);
275                 source.advance();
276                 if (source.isEmpty())
277                     goto outOfCharacters;
278                 cc = *source;
279                 seenChars.append(cc);
280             }
281             if (seenChars.size() == 2)
282                 source.push(seenChars[0]);
283             else if (seenChars.size() == 3) {
284                 source.push(seenChars[0]);
285                 source.push(seenChars[1]);
286             } else
287                 source.prepend(SegmentedString(String(seenChars.data(), seenChars.size() - 1)));
288             return 0;
289         }
290         source.advance();
291     }
292 outOfCharacters:
293     notEnoughCharacters = true;
294     source.prepend(SegmentedString(String(seenChars.data(), seenChars.size())));
295     return 0;
296 }
297 
tokenize(const SegmentedString & source)298 void PreloadScanner::tokenize(const SegmentedString& source)
299 {
300     ASSERT(m_inProgress);
301 
302     m_source.append(source);
303 
304     // This is a simplified HTML5 Tokenizer
305     // http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
306     while (!m_source.isEmpty()) {
307         UChar cc = *m_source;
308         switch (m_state) {
309         case Data:
310             while (1) {
311                 rememberCharacter(cc);
312                 if (cc == '&') {
313                     if (m_contentModel == PCDATA || m_contentModel == RCDATA) {
314                         m_state = EntityData;
315                         break;
316                     }
317                 } else if (cc == '-') {
318                     if ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape) {
319                         if (lastCharactersMatch("<!--", 4))
320                             m_escape = true;
321                     }
322                 } else if (cc == '<') {
323                     if (m_contentModel == PCDATA || ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape)) {
324                         m_state = TagOpen;
325                         break;
326                     }
327                 } else if (cc == '>') {
328                      if ((m_contentModel == RCDATA || m_contentModel == CDATA) && m_escape) {
329                          if (lastCharactersMatch("-->", 3))
330                              m_escape = false;
331                      }
332                 }
333                 emitCharacter(cc);
334                 m_source.advance();
335                 if (m_source.isEmpty())
336                      return;
337                 cc = *m_source;
338             }
339             break;
340         case EntityData:
341             // should try to consume the entity but we only care about entities in attributes
342             m_state = Data;
343             break;
344         case TagOpen:
345             if (m_contentModel == RCDATA || m_contentModel == CDATA) {
346                 if (cc == '/')
347                     m_state = CloseTagOpen;
348                 else {
349                     m_state = Data;
350                     continue;
351                 }
352             } else if (m_contentModel == PCDATA) {
353                 if (cc == '!')
354                     m_state = MarkupDeclarationOpen;
355                 else if (cc == '/')
356                     m_state = CloseTagOpen;
357                 else if (cc >= 'A' && cc <= 'Z') {
358                     m_tagName.clear();
359                     m_charset = String();
360                     m_tagName.append(cc + 0x20);
361                     m_closeTag = false;
362                     m_state = TagName;
363                 } else if (cc >= 'a' && cc <= 'z') {
364                     m_tagName.clear();
365                     m_charset = String();
366                     m_tagName.append(cc);
367                     m_closeTag = false;
368                     m_state = TagName;
369                 } else if (cc == '>') {
370                     m_state = Data;
371                 } else if (cc == '?') {
372                     m_state = BogusComment;
373                 } else {
374                     m_state = Data;
375                     continue;
376                 }
377             }
378             break;
379         case CloseTagOpen:
380             if (m_contentModel == RCDATA || m_contentModel == CDATA) {
381                 if (!m_lastStartTag.length()) {
382                     m_state = Data;
383                     continue;
384                 }
385                 if (m_source.length() < m_lastStartTag.length() + 1)
386                     return;
387                 Vector<UChar> tmpString;
388                 UChar tmpChar = 0;
389                 bool match = true;
390                 for (unsigned n = 0; n < m_lastStartTag.length() + 1; n++) {
391                     tmpChar = Unicode::toLower(*m_source);
392                     if (n < m_lastStartTag.length() && tmpChar != m_lastStartTag[n])
393                         match = false;
394                     tmpString.append(tmpChar);
395                     m_source.advance();
396                 }
397                 m_source.prepend(SegmentedString(String(tmpString.data(), tmpString.size())));
398                 if (!match || (!isWhitespace(tmpChar) && tmpChar != '>' && tmpChar != '/')) {
399                     m_state = Data;
400                     continue;
401                 }
402             }
403             if (cc >= 'A' && cc <= 'Z') {
404                 m_tagName.clear();
405                 m_charset = String();
406                 m_tagName.append(cc + 0x20);
407                 m_closeTag = true;
408                 m_state = TagName;
409             } else if (cc >= 'a' && cc <= 'z') {
410                 m_tagName.clear();
411                 m_charset = String();
412                 m_tagName.append(cc);
413                 m_closeTag = true;
414                 m_state = TagName;
415             } else if (cc == '>') {
416                 m_state = Data;
417             } else
418                 m_state = BogusComment;
419             break;
420         case TagName:
421             while (1) {
422                 if (isWhitespace(cc)) {
423                     m_state = BeforeAttributeName;
424                     break;
425                 }
426                 if (cc == '>') {
427                     emitTag();
428                     m_state = Data;
429                     break;
430                 }
431                 if (cc == '/') {
432                     m_state = BeforeAttributeName;
433                     break;
434                 }
435                 if (cc >= 'A' && cc <= 'Z')
436                     m_tagName.append(cc + 0x20);
437                 else
438                     m_tagName.append(cc);
439                 m_source.advance();
440                 if (m_source.isEmpty())
441                     return;
442                 cc = *m_source;
443             }
444             break;
445         case BeforeAttributeName:
446             if (isWhitespace(cc))
447                 ;
448             else if (cc == '>') {
449                 emitTag();
450                 m_state = Data;
451             } else if (cc >= 'A' && cc <= 'Z') {
452                 m_attributeName.clear();
453                 m_attributeValue.clear();
454                 m_attributeName.append(cc + 0x20);
455                 m_state = AttributeName;
456             } else if (cc == '/')
457                 ;
458             else {
459                 m_attributeName.clear();
460                 m_attributeValue.clear();
461                 m_attributeName.append(cc);
462                 m_state = AttributeName;
463             }
464             break;
465         case AttributeName:
466             while (1) {
467                 if (isWhitespace(cc)) {
468                     m_state = AfterAttributeName;
469                     break;
470                 }
471                 if (cc == '=') {
472                     m_state = BeforeAttributeValue;
473                     break;
474                 }
475                 if (cc == '>') {
476                     emitTag();
477                     m_state = Data;
478                     break;
479                 }
480                 if (cc == '/') {
481                     m_state = BeforeAttributeName;
482                     break;
483                 }
484                 if (cc >= 'A' && cc <= 'Z')
485                     m_attributeName.append(cc + 0x20);
486                 else
487                     m_attributeName.append(cc);
488                 m_source.advance();
489                 if (m_source.isEmpty())
490                     return;
491                 cc = *m_source;
492             }
493             break;
494         case AfterAttributeName:
495             if (isWhitespace(cc))
496                 ;
497             else if (cc == '=')
498                 m_state = BeforeAttributeValue;
499             else if (cc == '>') {
500                 emitTag();
501                 m_state = Data;
502             } else if (cc >= 'A' && cc <= 'Z') {
503                 m_attributeName.clear();
504                 m_attributeValue.clear();
505                 m_attributeName.append(cc + 0x20);
506                 m_state = AttributeName;
507             } else if (cc == '/')
508                 m_state = BeforeAttributeName;
509             else {
510                 m_attributeName.clear();
511                 m_attributeValue.clear();
512                 m_attributeName.append(cc);
513                 m_state = AttributeName;
514             }
515             break;
516         case BeforeAttributeValue:
517             if (isWhitespace(cc))
518                 ;
519             else if (cc == '"')
520                 m_state = AttributeValueDoubleQuoted;
521             else if (cc == '&') {
522                 m_state = AttributeValueUnquoted;
523                 continue;
524             } else if (cc == '\'')
525                 m_state = AttributeValueSingleQuoted;
526             else if (cc == '>') {
527                 emitTag();
528                 m_state = Data;
529             } else {
530                 m_attributeValue.append(cc);
531                 m_state = AttributeValueUnquoted;
532             }
533             break;
534         case AttributeValueDoubleQuoted:
535             while (1) {
536                 if (cc == '"') {
537                     processAttribute();
538                     m_state = BeforeAttributeName;
539                     break;
540                 }
541                 if (cc == '&') {
542                     m_stateBeforeEntityInAttributeValue = m_state;
543                     m_state = EntityInAttributeValue;
544                     break;
545                 }
546                 m_attributeValue.append(cc);
547                 m_source.advance();
548                 if (m_source.isEmpty())
549                     return;
550                 cc = *m_source;
551             }
552             break;
553         case AttributeValueSingleQuoted:
554             while (1) {
555                 if (cc == '\'') {
556                     processAttribute();
557                     m_state = BeforeAttributeName;
558                     break;
559                 }
560                 if (cc == '&') {
561                     m_stateBeforeEntityInAttributeValue = m_state;
562                     m_state = EntityInAttributeValue;
563                     break;
564                 }
565                 m_attributeValue.append(cc);
566                 m_source.advance();
567                 if (m_source.isEmpty())
568                     return;
569                 cc = *m_source;
570             }
571             break;
572         case AttributeValueUnquoted:
573             while (1) {
574                 if (isWhitespace(cc)) {
575                     processAttribute();
576                     m_state = BeforeAttributeName;
577                     break;
578                 }
579                 if (cc == '&') {
580                     m_stateBeforeEntityInAttributeValue = m_state;
581                     m_state = EntityInAttributeValue;
582                     break;
583                 }
584                 if (cc == '>') {
585                     processAttribute();
586                     emitTag();
587                     m_state = Data;
588                     break;
589                 }
590                 m_attributeValue.append(cc);
591                 m_source.advance();
592                 if (m_source.isEmpty())
593                     return;
594                 cc = *m_source;
595             }
596             break;
597         case EntityInAttributeValue:
598             {
599                 bool notEnoughCharacters = false;
600                 unsigned entity = consumeEntity(m_source, notEnoughCharacters);
601                 if (notEnoughCharacters)
602                     return;
603                 if (entity > 0xFFFF) {
604                     m_attributeValue.append(U16_LEAD(entity));
605                     m_attributeValue.append(U16_TRAIL(entity));
606                 } else if (entity)
607                     m_attributeValue.append(entity);
608                 else
609                     m_attributeValue.append('&');
610             }
611             m_state = m_stateBeforeEntityInAttributeValue;
612             continue;
613         case BogusComment:
614             while (1) {
615                 if (cc == '>') {
616                     m_state = Data;
617                     break;
618                 }
619                 m_source.advance();
620                 if (m_source.isEmpty())
621                     return;
622                 cc = *m_source;
623             }
624             break;
625         case MarkupDeclarationOpen: {
626             if (cc == '-') {
627                 if (m_source.length() < 2)
628                     return;
629                 m_source.advance();
630                 cc = *m_source;
631                 if (cc == '-')
632                     m_state = CommentStart;
633                 else {
634                     m_state = BogusComment;
635                     continue;
636                 }
637             // If we cared about the DOCTYPE we would test to enter those states here
638             } else {
639                 m_state = BogusComment;
640                 continue;
641             }
642             break;
643         }
644         case CommentStart:
645             if (cc == '-')
646                 m_state = CommentStartDash;
647             else if (cc == '>')
648                 m_state = Data;
649             else
650                 m_state = Comment;
651             break;
652         case CommentStartDash:
653             if (cc == '-')
654                 m_state = CommentEnd;
655             else if (cc == '>')
656                 m_state = Data;
657             else
658                 m_state = Comment;
659             break;
660         case Comment:
661             while (1) {
662                 if (cc == '-') {
663                     m_state = CommentEndDash;
664                     break;
665                 }
666                 m_source.advance();
667                 if (m_source.isEmpty())
668                     return;
669                 cc = *m_source;
670             }
671             break;
672         case CommentEndDash:
673             if (cc == '-')
674                 m_state = CommentEnd;
675             else
676                 m_state = Comment;
677             break;
678         case CommentEnd:
679             if (cc == '>')
680                 m_state = Data;
681             else if (cc == '-')
682                 ;
683             else
684                 m_state = Comment;
685             break;
686         }
687         m_source.advance();
688     }
689 }
690 
processAttribute()691 void PreloadScanner::processAttribute()
692 {
693     AtomicString tag = AtomicString(m_tagName.data(), m_tagName.size());
694     AtomicString attribute = AtomicString(m_attributeName.data(), m_attributeName.size());
695 
696     String value(m_attributeValue.data(), m_attributeValue.size());
697     if (tag == scriptTag || tag == imgTag) {
698         if (attribute == srcAttr && m_urlToLoad.isEmpty())
699             m_urlToLoad = deprecatedParseURL(value);
700         else if (attribute == charsetAttr)
701             m_charset = value;
702     } else if (tag == linkTag) {
703         if (attribute == hrefAttr && m_urlToLoad.isEmpty())
704             m_urlToLoad = deprecatedParseURL(value);
705         else if (attribute == relAttr) {
706             bool styleSheet = false;
707             bool alternate = false;
708             bool icon = false;
709             bool dnsPrefetch = false;
710 #ifdef ANDROID_APPLE_TOUCH_ICON
711             bool touchIcon = false;
712             bool precomposedTouchIcon = false;
713             HTMLLinkElement::tokenizeRelAttribute(value, styleSheet, alternate, icon, touchIcon, precomposedTouchIcon, dnsPrefetch);
714             m_linkIsStyleSheet = styleSheet && !alternate && !icon && !touchIcon && !precomposedTouchIcon && !dnsPrefetch;
715 #else
716             HTMLLinkElement::tokenizeRelAttribute(value, styleSheet, alternate, icon, dnsPrefetch);
717             m_linkIsStyleSheet = styleSheet && !alternate && !icon && !dnsPrefetch;
718 #endif
719         } else if (attribute == charsetAttr)
720             m_charset = value;
721     }
722 }
723 
emitCharacter(UChar c)724 inline void PreloadScanner::emitCharacter(UChar c)
725 {
726     if (m_contentModel == CDATA && m_lastStartTag == styleTag)
727         tokenizeCSS(c);
728 }
729 
tokenizeCSS(UChar c)730 inline void PreloadScanner::tokenizeCSS(UChar c)
731 {
732     // We are just interested in @import rules, no need for real tokenization here
733     // Searching for other types of resources is probably low payoff
734     switch (m_cssState) {
735     case CSSInitial:
736         if (c == '@')
737             m_cssState = CSSRuleStart;
738         else if (c == '/')
739             m_cssState = CSSMaybeComment;
740         break;
741     case CSSMaybeComment:
742         if (c == '*')
743             m_cssState = CSSComment;
744         else
745             m_cssState = CSSInitial;
746         break;
747     case CSSComment:
748         if (c == '*')
749             m_cssState = CSSMaybeCommentEnd;
750         break;
751     case CSSMaybeCommentEnd:
752         if (c == '/')
753             m_cssState = CSSInitial;
754         else if (c == '*')
755             ;
756         else
757             m_cssState = CSSComment;
758         break;
759     case CSSRuleStart:
760         if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
761             m_cssRule.clear();
762             m_cssRuleValue.clear();
763             m_cssRule.append(c);
764             m_cssState = CSSRule;
765         } else
766             m_cssState = CSSInitial;
767         break;
768     case CSSRule:
769         if (isWhitespace(c))
770             m_cssState = CSSAfterRule;
771         else if (c == ';')
772             m_cssState = CSSInitial;
773         else
774             m_cssRule.append(c);
775         break;
776     case CSSAfterRule:
777         if (isWhitespace(c))
778             ;
779         else if (c == ';')
780             m_cssState = CSSInitial;
781         else {
782             m_cssState = CSSRuleValue;
783             m_cssRuleValue.append(c);
784         }
785         break;
786     case CSSRuleValue:
787         if (isWhitespace(c))
788             m_cssState = CSSAfterRuleValue;
789         else if (c == ';') {
790             emitCSSRule();
791             m_cssState = CSSInitial;
792         } else
793             m_cssRuleValue.append(c);
794         break;
795     case CSSAfterRuleValue:
796         if (isWhitespace(c))
797             ;
798         else if (c == ';') {
799             emitCSSRule();
800             m_cssState = CSSInitial;
801         } else {
802             // FIXME media rules
803              m_cssState = CSSInitial;
804         }
805         break;
806     }
807 }
808 
emitTag()809 void PreloadScanner::emitTag()
810 {
811     if (m_closeTag) {
812         m_contentModel = PCDATA;
813         m_cssState = CSSInitial;
814         clearLastCharacters();
815         return;
816     }
817 
818     AtomicString tag(m_tagName.data(), m_tagName.size());
819     m_lastStartTag = tag;
820 
821     if (tag == textareaTag || tag == titleTag)
822         m_contentModel = RCDATA;
823     else if (tag == styleTag || tag == xmpTag || tag == scriptTag || tag == iframeTag || tag == noembedTag || tag == noframesTag)
824         m_contentModel = CDATA;
825     else if (tag == noscriptTag)
826         // we wouldn't be here if scripts were disabled
827         m_contentModel = CDATA;
828     else if (tag == plaintextTag)
829         m_contentModel = PLAINTEXT;
830     else
831         m_contentModel = PCDATA;
832 
833     if (tag == bodyTag)
834         m_bodySeen = true;
835 
836     if (m_urlToLoad.isEmpty()) {
837         m_linkIsStyleSheet = false;
838         return;
839     }
840 
841     if (tag == scriptTag)
842         m_document->docLoader()->preload(CachedResource::Script, m_urlToLoad, m_charset, scanningBody());
843     else if (tag == imgTag)
844         m_document->docLoader()->preload(CachedResource::ImageResource, m_urlToLoad, String(), scanningBody());
845     else if (tag == linkTag && m_linkIsStyleSheet)
846         m_document->docLoader()->preload(CachedResource::CSSStyleSheet, m_urlToLoad, m_charset, scanningBody());
847 
848     m_urlToLoad = String();
849     m_charset = String();
850     m_linkIsStyleSheet = false;
851 }
852 
emitCSSRule()853 void PreloadScanner::emitCSSRule()
854 {
855     String rule(m_cssRule.data(), m_cssRule.size());
856     if (equalIgnoringCase(rule, "import") && !m_cssRuleValue.isEmpty()) {
857         String value(m_cssRuleValue.data(), m_cssRuleValue.size());
858         String url = deprecatedParseURL(value);
859         if (!url.isEmpty())
860             m_document->docLoader()->preload(CachedResource::CSSStyleSheet, url, String(), scanningBody());
861     }
862     m_cssRule.clear();
863     m_cssRuleValue.clear();
864 }
865 
866 }
867