1 /* 2 Copyright (C) 1997 Martin Jones (mjones@kde.org) 3 (C) 1997 Torben Weis (weis@kde.org) 4 (C) 1998 Waldo Bastian (bastian@kde.org) 5 (C) 1999 Lars Knoll (knoll@kde.org) 6 Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved. 7 8 This library is free software; you can redistribute it and/or 9 modify it under the terms of the GNU Library General Public 10 License as published by the Free Software Foundation; either 11 version 2 of the License, or (at your option) any later version. 12 13 This library is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 Library General Public License for more details. 17 18 You should have received a copy of the GNU Library General Public License 19 along with this library; see the file COPYING.LIB. If not, write to 20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 Boston, MA 02110-1301, USA. 22 */ 23 24 #ifndef HTMLParser_h 25 #define HTMLParser_h 26 27 #include "QualifiedName.h" 28 #include <wtf/Forward.h> 29 #include <wtf/RefPtr.h> 30 #include "HTMLParserErrorCodes.h" 31 32 namespace WebCore { 33 34 class DoctypeToken; 35 class Document; 36 class DocumentFragment; 37 class HTMLDocument; 38 class HTMLFormElement; 39 class HTMLHeadElement; 40 class HTMLMapElement; 41 class Node; 42 43 struct HTMLStackElem; 44 struct Token; 45 46 /** 47 * The parser for HTML. It receives a stream of tokens from the HTMLTokenizer, and 48 * builds up the Document structure from it. 49 */ 50 class HTMLParser : Noncopyable { 51 public: 52 HTMLParser(HTMLDocument*, bool reportErrors); 53 HTMLParser(DocumentFragment*); 54 virtual ~HTMLParser(); 55 56 /** 57 * parses one token delivered by the tokenizer 58 */ 59 PassRefPtr<Node> parseToken(Token*); 60 61 // Parses a doctype token. 62 void parseDoctypeToken(DoctypeToken*); 63 64 /** 65 * tokenizer says it's not going to be sending us any more tokens 66 */ 67 void finished(); 68 69 /** 70 * resets the parser 71 */ 72 void reset(); 73 skipMode()74 bool skipMode() const { return !m_skipModeTag.isNull(); } isHandlingResidualStyleAcrossBlocks()75 bool isHandlingResidualStyleAcrossBlocks() const { return m_handlingResidualStyleAcrossBlocks; } 76 77 private: 78 void setCurrent(Node*); 79 void derefCurrent(); setSkipMode(const QualifiedName & qName)80 void setSkipMode(const QualifiedName& qName) { m_skipModeTag = qName.localName(); } 81 82 PassRefPtr<Node> getNode(Token*); 83 bool bodyCreateErrorCheck(Token*, RefPtr<Node>&); 84 bool canvasCreateErrorCheck(Token*, RefPtr<Node>&); 85 bool commentCreateErrorCheck(Token*, RefPtr<Node>&); 86 bool ddCreateErrorCheck(Token*, RefPtr<Node>&); 87 bool dtCreateErrorCheck(Token*, RefPtr<Node>&); 88 bool formCreateErrorCheck(Token*, RefPtr<Node>&); 89 bool framesetCreateErrorCheck(Token*, RefPtr<Node>&); 90 bool headCreateErrorCheck(Token*, RefPtr<Node>&); 91 bool iframeCreateErrorCheck(Token*, RefPtr<Node>&); 92 bool isindexCreateErrorCheck(Token*, RefPtr<Node>&); 93 bool mapCreateErrorCheck(Token*, RefPtr<Node>&); 94 bool nestedCreateErrorCheck(Token*, RefPtr<Node>&); 95 bool nestedPCloserCreateErrorCheck(Token*, RefPtr<Node>&); 96 bool nestedStyleCreateErrorCheck(Token*, RefPtr<Node>&); 97 bool noembedCreateErrorCheck(Token*, RefPtr<Node>&); 98 bool noframesCreateErrorCheck(Token*, RefPtr<Node>&); 99 bool nolayerCreateErrorCheck(Token*, RefPtr<Node>&); 100 bool noscriptCreateErrorCheck(Token*, RefPtr<Node>&); 101 bool pCloserCreateErrorCheck(Token*, RefPtr<Node>&); 102 bool pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&); 103 bool selectCreateErrorCheck(Token*, RefPtr<Node>&); 104 bool tableCellCreateErrorCheck(Token*, RefPtr<Node>&); 105 bool tableSectionCreateErrorCheck(Token*, RefPtr<Node>&); 106 bool textCreateErrorCheck(Token*, RefPtr<Node>&); 107 108 void processCloseTag(Token*); 109 110 bool insertNode(Node*, bool flat = false); 111 bool handleError(Node*, bool flat, const AtomicString& localName, int tagPriority); 112 113 void pushBlock(const AtomicString& tagName, int level); 114 void popBlock(const AtomicString& tagName, bool reportErrors = false); 115 void popBlock(const QualifiedName& qName, bool reportErrors = false) { return popBlock(qName.localName(), reportErrors); } // Convenience function for readability. 116 void popOneBlock(); 117 void moveOneBlockToStack(HTMLStackElem*& head); 118 inline HTMLStackElem* popOneBlockCommon(); 119 void popInlineBlocks(); 120 121 void freeBlock(); 122 123 void createHead(); 124 125 static bool isResidualStyleTag(const AtomicString& tagName); 126 static bool isAffectedByResidualStyle(const AtomicString& tagName); 127 void handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem*); 128 void reopenResidualStyleTags(HTMLStackElem*, Node* malformedTableParent); 129 130 bool allowNestedRedundantTag(const AtomicString& tagName); 131 132 static bool isHeaderTag(const AtomicString& tagName); 133 void popNestedHeaderTag(); 134 135 bool isInline(Node*) const; 136 137 void startBody(); // inserts the isindex element 138 PassRefPtr<Node> handleIsindex(Token*); 139 140 void checkIfHasPElementInScope(); hasPElementInScope()141 bool hasPElementInScope() 142 { 143 if (m_hasPElementInScope == Unknown) 144 checkIfHasPElementInScope(); 145 return m_hasPElementInScope == InScope; 146 } 147 148 void reportError(HTMLParserErrorCode errorCode, const AtomicString* tagName1 = 0, const AtomicString* tagName2 = 0, bool closeTags = false) 149 { if (!m_reportErrors) return; reportErrorToConsole(errorCode, tagName1, tagName2, closeTags); } 150 151 void reportErrorToConsole(HTMLParserErrorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags); 152 153 Document* document; 154 155 // The currently active element (the one new elements will be added to). Can be a document fragment, a document or an element. 156 Node* current; 157 // We can't ref a document, but we don't want to constantly check if a node is a document just to decide whether to deref. 158 bool didRefCurrent; 159 160 HTMLStackElem* blockStack; 161 162 // The number of tags with priority minBlockLevelTagPriority or higher 163 // currently in m_blockStack. The parser enforces a cap on this value by 164 // adding such new elements as siblings instead of children once it is reached. 165 size_t m_blocksInStack; 166 167 enum ElementInScopeState { NotInScope, InScope, Unknown }; 168 ElementInScopeState m_hasPElementInScope; 169 170 RefPtr<HTMLFormElement> m_currentFormElement; // currently active form 171 RefPtr<HTMLMapElement> m_currentMapElement; // current map 172 HTMLHeadElement* head; // head element; needed for HTML which defines <base> after </head> 173 RefPtr<Node> m_isindexElement; // a possible <isindex> element in the head 174 175 bool inBody; 176 bool haveContent; 177 bool haveFrameSet; 178 179 AtomicString m_skipModeTag; // tells the parser to discard all tags until it reaches the one specified 180 181 bool m_isParsingFragment; 182 bool m_reportErrors; 183 bool m_handlingResidualStyleAcrossBlocks; 184 int inStrayTableContent; 185 }; 186 187 } 188 189 #endif // HTMLParser_h 190