1 /* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef HTMLDocumentParser_h 27 #define HTMLDocumentParser_h 28 29 #include "core/dom/ParserContentPolicy.h" 30 #include "core/dom/ScriptableDocumentParser.h" 31 #include "core/fetch/ResourceClient.h" 32 #include "core/frame/UseCounter.h" 33 #include "core/html/parser/BackgroundHTMLInputStream.h" 34 #include "core/html/parser/CompactHTMLToken.h" 35 #include "core/html/parser/HTMLInputStream.h" 36 #include "core/html/parser/HTMLParserOptions.h" 37 #include "core/html/parser/HTMLPreloadScanner.h" 38 #include "core/html/parser/HTMLScriptRunnerHost.h" 39 #include "core/html/parser/HTMLSourceTracker.h" 40 #include "core/html/parser/HTMLToken.h" 41 #include "core/html/parser/HTMLTokenizer.h" 42 #include "core/html/parser/HTMLTreeBuilderSimulator.h" 43 #include "core/html/parser/TextResourceDecoder.h" 44 #include "core/html/parser/XSSAuditor.h" 45 #include "core/html/parser/XSSAuditorDelegate.h" 46 #include "platform/text/SegmentedString.h" 47 #include "wtf/Deque.h" 48 #include "wtf/OwnPtr.h" 49 #include "wtf/WeakPtr.h" 50 #include "wtf/text/TextPosition.h" 51 52 namespace blink { 53 54 class BackgroundHTMLParser; 55 class CompactHTMLToken; 56 class Document; 57 class DocumentFragment; 58 class HTMLDocument; 59 class HTMLParserScheduler; 60 class HTMLScriptRunner; 61 class HTMLTreeBuilder; 62 class HTMLResourcePreloader; 63 class ScriptController; 64 class ScriptSourceCode; 65 66 class PumpSession; 67 68 class HTMLDocumentParser : public ScriptableDocumentParser, private HTMLScriptRunnerHost { 69 WTF_MAKE_FAST_ALLOCATED_WILL_BE_REMOVED; 70 WILL_BE_USING_GARBAGE_COLLECTED_MIXIN(HTMLDocumentParser); 71 public: create(HTMLDocument & document,bool reportErrors)72 static PassRefPtrWillBeRawPtr<HTMLDocumentParser> create(HTMLDocument& document, bool reportErrors) 73 { 74 return adoptRefWillBeNoop(new HTMLDocumentParser(document, reportErrors)); 75 } 76 virtual ~HTMLDocumentParser(); 77 virtual void trace(Visitor*) OVERRIDE; 78 79 // Exposed for HTMLParserScheduler 80 void resumeParsingAfterYield(); 81 82 static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, ParserContentPolicy = AllowScriptingContent); 83 tokenizer()84 HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); } 85 86 virtual TextPosition textPosition() const OVERRIDE FINAL; 87 virtual OrdinalNumber lineNumber() const OVERRIDE FINAL; 88 89 virtual void suspendScheduledTasks() OVERRIDE FINAL; 90 virtual void resumeScheduledTasks() OVERRIDE FINAL; 91 92 struct ParsedChunk { 93 OwnPtr<CompactHTMLTokenStream> tokens; 94 PreloadRequestStream preloads; 95 XSSInfoStream xssInfos; 96 HTMLTokenizer::State tokenizerState; 97 HTMLTreeBuilderSimulator::State treeBuilderState; 98 HTMLInputCheckpoint inputCheckpoint; 99 TokenPreloadScannerCheckpoint preloadScannerCheckpoint; 100 }; 101 void didReceiveParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>); 102 void didReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData&); 103 104 virtual void appendBytes(const char* bytes, size_t length) OVERRIDE; 105 virtual void flush() OVERRIDE FINAL; 106 virtual void setDecoder(PassOwnPtr<TextResourceDecoder>) OVERRIDE FINAL; 107 useCounter()108 UseCounter* useCounter() { return UseCounter::getFrom(contextForParsingSession()); } 109 110 protected: 111 virtual void insert(const SegmentedString&) OVERRIDE FINAL; 112 virtual void append(PassRefPtr<StringImpl>) OVERRIDE; 113 virtual void finish() OVERRIDE FINAL; 114 115 HTMLDocumentParser(HTMLDocument&, bool reportErrors); 116 HTMLDocumentParser(DocumentFragment*, Element* contextElement, ParserContentPolicy); 117 treeBuilder()118 HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); } 119 120 void forcePlaintextForTextDocument(); 121 122 private: create(DocumentFragment * fragment,Element * contextElement,ParserContentPolicy parserContentPolicy)123 static PassRefPtrWillBeRawPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) 124 { 125 return adoptRefWillBeNoop(new HTMLDocumentParser(fragment, contextElement, parserContentPolicy)); 126 } 127 128 // DocumentParser 129 virtual void pinToMainThread() OVERRIDE FINAL; 130 virtual void detach() OVERRIDE FINAL; 131 virtual bool hasInsertionPoint() OVERRIDE FINAL; 132 virtual bool processingData() const OVERRIDE FINAL; 133 virtual void prepareToStopParsing() OVERRIDE FINAL; 134 virtual void stopParsing() OVERRIDE FINAL; 135 virtual bool isWaitingForScripts() const OVERRIDE FINAL; 136 virtual bool isExecutingScript() const OVERRIDE FINAL; 137 virtual void executeScriptsWaitingForResources() OVERRIDE FINAL; 138 139 // HTMLScriptRunnerHost 140 virtual void notifyScriptLoaded(Resource*) OVERRIDE FINAL; inputStream()141 virtual HTMLInputStream& inputStream() OVERRIDE FINAL { return m_input; } hasPreloadScanner()142 virtual bool hasPreloadScanner() const OVERRIDE FINAL { return m_preloadScanner.get() && !shouldUseThreading(); } 143 virtual void appendCurrentInputStreamToPreloadScannerAndScan() OVERRIDE FINAL; 144 145 void startBackgroundParser(); 146 void stopBackgroundParser(); 147 void validateSpeculations(PassOwnPtr<ParsedChunk> lastChunk); 148 void discardSpeculationsAndResumeFrom(PassOwnPtr<ParsedChunk> lastChunk, PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>); 149 void processParsedChunkFromBackgroundParser(PassOwnPtr<ParsedChunk>); 150 void pumpPendingSpeculations(); 151 152 Document* contextForParsingSession(); 153 154 enum SynchronousMode { 155 AllowYield, 156 ForceSynchronous, 157 }; 158 bool canTakeNextToken(SynchronousMode, PumpSession&); 159 void pumpTokenizer(SynchronousMode); 160 void pumpTokenizerIfPossible(SynchronousMode); 161 void constructTreeFromHTMLToken(HTMLToken&); 162 void constructTreeFromCompactHTMLToken(const CompactHTMLToken&); 163 164 void runScriptsForPausedTreeBuilder(); 165 void resumeParsingAfterScriptExecution(); 166 167 void attemptToEnd(); 168 void endIfDelayed(); 169 void attemptToRunDeferredScriptsAndEnd(); 170 void end(); 171 shouldUseThreading()172 bool shouldUseThreading() const { return m_options.useThreading && !m_isPinnedToMainThread; } 173 174 bool isParsingFragment() const; 175 bool isScheduledForResume() const; inPumpSession()176 bool inPumpSession() const { return m_pumpSessionNestingLevel > 0; } shouldDelayEnd()177 bool shouldDelayEnd() const { return inPumpSession() || isWaitingForScripts() || isScheduledForResume() || isExecutingScript(); } 178 token()179 HTMLToken& token() { return *m_token; } 180 181 HTMLParserOptions m_options; 182 HTMLInputStream m_input; 183 184 OwnPtr<HTMLToken> m_token; 185 OwnPtr<HTMLTokenizer> m_tokenizer; 186 OwnPtrWillBeMember<HTMLScriptRunner> m_scriptRunner; 187 OwnPtrWillBeMember<HTMLTreeBuilder> m_treeBuilder; 188 OwnPtr<HTMLPreloadScanner> m_preloadScanner; 189 OwnPtr<HTMLPreloadScanner> m_insertionPreloadScanner; 190 OwnPtr<HTMLParserScheduler> m_parserScheduler; 191 HTMLSourceTracker m_sourceTracker; 192 TextPosition m_textPosition; 193 XSSAuditor m_xssAuditor; 194 XSSAuditorDelegate m_xssAuditorDelegate; 195 196 // FIXME: m_lastChunkBeforeScript, m_tokenizer, m_token, and m_input should be combined into a single state object 197 // so they can be set and cleared together and passed between threads together. 198 OwnPtr<ParsedChunk> m_lastChunkBeforeScript; 199 Deque<OwnPtr<ParsedChunk> > m_speculations; 200 WeakPtrFactory<HTMLDocumentParser> m_weakFactory; 201 WeakPtr<BackgroundHTMLParser> m_backgroundParser; 202 OwnPtrWillBeMember<HTMLResourcePreloader> m_preloader; 203 204 bool m_isPinnedToMainThread; 205 bool m_endWasDelayed; 206 bool m_haveBackgroundParser; 207 unsigned m_pumpSessionNestingLevel; 208 }; 209 210 } 211 212 #endif 213