• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2     Copyright (C) 1997 Martin Jones (mjones@kde.org)
3               (C) 1997 Torben Weis (weis@kde.org)
4               (C) 1998 Waldo Bastian (bastian@kde.org)
5               (C) 1999 Lars Knoll (knoll@kde.org)
6     Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
7 
8     This library is free software; you can redistribute it and/or
9     modify it under the terms of the GNU Library General Public
10     License as published by the Free Software Foundation; either
11     version 2 of the License, or (at your option) any later version.
12 
13     This library is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16     Library General Public License for more details.
17 
18     You should have received a copy of the GNU Library General Public License
19     along with this library; see the file COPYING.LIB.  If not, write to
20     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21     Boston, MA 02110-1301, USA.
22 */
23 
24 #ifndef HTMLParser_h
25 #define HTMLParser_h
26 
27 #include "QualifiedName.h"
28 #include <wtf/Forward.h>
29 #include <wtf/RefPtr.h>
30 #include "HTMLParserErrorCodes.h"
31 
32 namespace WebCore {
33 
34 class DoctypeToken;
35 class Document;
36 class DocumentFragment;
37 class HTMLDocument;
38 class HTMLFormElement;
39 class HTMLHeadElement;
40 class HTMLMapElement;
41 class Node;
42 
43 struct HTMLStackElem;
44 struct Token;
45 
46 /**
47  * The parser for HTML. It receives a stream of tokens from the HTMLTokenizer, and
48  * builds up the Document structure from it.
49  */
50 class HTMLParser : Noncopyable {
51 public:
52     HTMLParser(HTMLDocument*, bool reportErrors);
53     HTMLParser(DocumentFragment*);
54     virtual ~HTMLParser();
55 
56     /**
57      * parses one token delivered by the tokenizer
58      */
59     PassRefPtr<Node> parseToken(Token*);
60 
61     // Parses a doctype token.
62     void parseDoctypeToken(DoctypeToken*);
63 
64     /**
65      * tokenizer says it's not going to be sending us any more tokens
66      */
67     void finished();
68 
69     /**
70      * resets the parser
71      */
72     void reset();
73 
skipMode()74     bool skipMode() const { return !m_skipModeTag.isNull(); }
isHandlingResidualStyleAcrossBlocks()75     bool isHandlingResidualStyleAcrossBlocks() const { return m_handlingResidualStyleAcrossBlocks; }
76 
77 private:
78     void setCurrent(Node*);
79     void derefCurrent();
setSkipMode(const QualifiedName & qName)80     void setSkipMode(const QualifiedName& qName) { m_skipModeTag = qName.localName(); }
81 
82     PassRefPtr<Node> getNode(Token*);
83     bool bodyCreateErrorCheck(Token*, RefPtr<Node>&);
84     bool canvasCreateErrorCheck(Token*, RefPtr<Node>&);
85     bool commentCreateErrorCheck(Token*, RefPtr<Node>&);
86     bool ddCreateErrorCheck(Token*, RefPtr<Node>&);
87     bool dtCreateErrorCheck(Token*, RefPtr<Node>&);
88     bool formCreateErrorCheck(Token*, RefPtr<Node>&);
89     bool framesetCreateErrorCheck(Token*, RefPtr<Node>&);
90     bool headCreateErrorCheck(Token*, RefPtr<Node>&);
91     bool iframeCreateErrorCheck(Token*, RefPtr<Node>&);
92     bool isindexCreateErrorCheck(Token*, RefPtr<Node>&);
93     bool mapCreateErrorCheck(Token*, RefPtr<Node>&);
94     bool nestedCreateErrorCheck(Token*, RefPtr<Node>&);
95     bool nestedPCloserCreateErrorCheck(Token*, RefPtr<Node>&);
96     bool nestedStyleCreateErrorCheck(Token*, RefPtr<Node>&);
97     bool noembedCreateErrorCheck(Token*, RefPtr<Node>&);
98     bool noframesCreateErrorCheck(Token*, RefPtr<Node>&);
99     bool nolayerCreateErrorCheck(Token*, RefPtr<Node>&);
100     bool noscriptCreateErrorCheck(Token*, RefPtr<Node>&);
101     bool pCloserCreateErrorCheck(Token*, RefPtr<Node>&);
102     bool pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&);
103     bool selectCreateErrorCheck(Token*, RefPtr<Node>&);
104     bool tableCellCreateErrorCheck(Token*, RefPtr<Node>&);
105     bool tableSectionCreateErrorCheck(Token*, RefPtr<Node>&);
106     bool textCreateErrorCheck(Token*, RefPtr<Node>&);
107 
108     void processCloseTag(Token*);
109 
110     bool insertNode(Node*, bool flat = false);
111     bool handleError(Node*, bool flat, const AtomicString& localName, int tagPriority);
112 
113     void pushBlock(const AtomicString& tagName, int level);
114     void popBlock(const AtomicString& tagName, bool reportErrors = false);
115     void popBlock(const QualifiedName& qName, bool reportErrors = false) { return popBlock(qName.localName(), reportErrors); } // Convenience function for readability.
116     void popOneBlock();
117     void moveOneBlockToStack(HTMLStackElem*& head);
118     inline HTMLStackElem* popOneBlockCommon();
119     void popInlineBlocks();
120 
121     void freeBlock();
122 
123     void createHead();
124 
125     static bool isResidualStyleTag(const AtomicString& tagName);
126     static bool isAffectedByResidualStyle(const AtomicString& tagName);
127     void handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem*);
128     void reopenResidualStyleTags(HTMLStackElem*, Node* malformedTableParent);
129 
130     bool allowNestedRedundantTag(const AtomicString& tagName);
131 
132     static bool isHeaderTag(const AtomicString& tagName);
133     void popNestedHeaderTag();
134 
135     bool isInline(Node*) const;
136 
137     void startBody(); // inserts the isindex element
138     PassRefPtr<Node> handleIsindex(Token*);
139 
140     void checkIfHasPElementInScope();
hasPElementInScope()141     bool hasPElementInScope()
142     {
143         if (m_hasPElementInScope == Unknown)
144             checkIfHasPElementInScope();
145         return m_hasPElementInScope == InScope;
146     }
147 
148     void reportError(HTMLParserErrorCode errorCode, const AtomicString* tagName1 = 0, const AtomicString* tagName2 = 0, bool closeTags = false)
149     { if (!m_reportErrors) return; reportErrorToConsole(errorCode, tagName1, tagName2, closeTags); }
150 
151     void reportErrorToConsole(HTMLParserErrorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags);
152 
153     Document* document;
154 
155     // The currently active element (the one new elements will be added to). Can be a document fragment, a document or an element.
156     Node* current;
157     // We can't ref a document, but we don't want to constantly check if a node is a document just to decide whether to deref.
158     bool didRefCurrent;
159 
160     HTMLStackElem* blockStack;
161 
162     // The number of tags with priority minBlockLevelTagPriority or higher
163     // currently in m_blockStack. The parser enforces a cap on this value by
164     // adding such new elements as siblings instead of children once it is reached.
165     size_t m_blocksInStack;
166 
167     enum ElementInScopeState { NotInScope, InScope, Unknown };
168     ElementInScopeState m_hasPElementInScope;
169 
170     RefPtr<HTMLFormElement> m_currentFormElement; // currently active form
171     RefPtr<HTMLMapElement> m_currentMapElement; // current map
172     HTMLHeadElement* head; // head element; needed for HTML which defines <base> after </head>
173     RefPtr<Node> m_isindexElement; // a possible <isindex> element in the head
174 
175     bool inBody;
176     bool haveContent;
177     bool haveFrameSet;
178 
179     AtomicString m_skipModeTag; // tells the parser to discard all tags until it reaches the one specified
180 
181     bool m_isParsingFragment;
182     bool m_reportErrors;
183     bool m_handlingResidualStyleAcrossBlocks;
184     int inStrayTableContent;
185 };
186 
187 }
188 
189 #endif // HTMLParser_h
190