1 /* 2 * Copyright (C) 2000 Peter Kelly (pmk@post.com) 3 * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved. 4 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org) 5 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) 6 * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Library General Public 10 * License as published by the Free Software Foundation; either 11 * version 2 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Library General Public License for more details. 17 * 18 * You should have received a copy of the GNU Library General Public License 19 * along with this library; see the file COPYING.LIB. If not, write to 20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 * Boston, MA 02110-1301, USA. 22 * 23 */ 24 25 #ifndef XMLTokenizer_h 26 #define XMLTokenizer_h 27 28 #if USE(EXPAT) 29 30 #include "CachedResourceClient.h" 31 #include "SegmentedString.h" 32 #include "StringHash.h" 33 #include "Tokenizer.h" 34 #include <libexpat/expat.h> 35 #include <wtf/HashMap.h> 36 #include <wtf/OwnPtr.h> 37 38 namespace WebCore { 39 40 class Node; 41 class CachedScript; 42 class DocumentFragment; 43 class Document; 44 class Element; 45 class FrameView; 46 class PendingCallbacks; 47 48 class XMLTokenizer : public Tokenizer, public CachedResourceClient { 49 public: 50 XMLTokenizer(Document *, FrameView * = 0); 51 XMLTokenizer(DocumentFragment *, Element *); 52 ~XMLTokenizer(); 53 54 enum ErrorType { warning, nonFatal, fatal }; 55 56 // from Tokenizer 57 virtual bool write(const SegmentedString &str, bool); 58 virtual void finish(); 59 virtual bool isWaitingForScripts() const; 60 virtual void stopParsing(); wellFormed()61 virtual bool wellFormed() const { return !m_sawError; } 62 virtual int lineNumber() const; 63 virtual int columnNumber() const; 64 65 // from CachedObjectClient 66 virtual void notifyFinished(CachedResource *finishedObj); 67 68 // callbacks from parser expat 69 void startElementNs(const XML_Char *name, const XML_Char **atts); 70 void endElementNs(); 71 void characters(const XML_Char *s, int len); 72 void processingInstruction(const XML_Char *target, const XML_Char *data); 73 void comment(const XML_Char *s); 74 void startCdata(); 75 void endCdata(); 76 77 void error(ErrorType type, const char* m, int lineNumber, int columnNumber); 78 79 // utilities getXMLParser()80 XML_Parser getXMLParser() const { return m_parser; } setXMLParser(XML_Parser parser)81 void setXMLParser(XML_Parser parser) { m_parser = parser; } 82 83 private: 84 void setCurrentNode(Node*); 85 86 void end(); 87 88 void pauseParsing(); 89 void resumeParsing(); 90 91 void reportError(); 92 void insertErrorMessageBlock(); 93 94 bool enterText(); 95 void exitText(); 96 97 Document *m_doc; 98 FrameView *m_view; 99 100 XML_Parser m_parser; 101 102 Node *m_currentNode; 103 bool m_currentNodeIsReferenced; 104 105 bool m_sawError; 106 bool m_sawXSLTransform; 107 bool m_sawFirstElement; 108 109 bool m_parserPaused; 110 bool m_requestingScript; 111 bool m_finishCalled; 112 113 int m_errorCount; 114 String m_errorMessages; 115 116 CachedScript *m_pendingScript; 117 RefPtr<Element> m_scriptElement; 118 int m_scriptStartLine; 119 120 bool m_parsingFragment; 121 String m_defaultNamespaceURI; 122 123 typedef HashMap<String, String> PrefixForNamespaceMap; 124 PrefixForNamespaceMap m_prefixToNamespaceMap; 125 126 OwnPtr<PendingCallbacks> m_pendingCallbacks; 127 SegmentedString m_pendingSrc; 128 }; 129 130 HashMap<String, String> parseAttributes(const String&, bool& attrsOK); 131 bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0); 132 133 } // namespace WebCore 134 135 #else // USE(EXPAT) 136 137 #include "CachedResourceClient.h" 138 #include "CachedResourceHandle.h" 139 #include "SegmentedString.h" 140 #include "StringHash.h" 141 #include "Tokenizer.h" 142 #include <wtf/HashMap.h> 143 #include <wtf/OwnPtr.h> 144 145 #if USE(QXMLSTREAM) 146 #include <QtXml/qxmlstream.h> 147 #else 148 #include <libxml/tree.h> 149 #include <libxml/xmlstring.h> 150 #endif 151 152 namespace WebCore { 153 154 class Node; 155 class CachedScript; 156 class DocLoader; 157 class DocumentFragment; 158 class Document; 159 class Element; 160 class FrameView; 161 class PendingCallbacks; 162 class ScriptElement; 163 164 class XMLTokenizer : public Tokenizer, public CachedResourceClient { 165 public: 166 XMLTokenizer(Document*, FrameView* = 0); 167 XMLTokenizer(DocumentFragment*, Element*); 168 ~XMLTokenizer(); 169 170 enum ErrorType { warning, nonFatal, fatal }; 171 172 // from Tokenizer 173 virtual void write(const SegmentedString&, bool appendData); 174 virtual void finish(); 175 virtual bool isWaitingForScripts() const; 176 virtual void stopParsing(); 177 178 void end(); 179 180 void pauseParsing(); 181 void resumeParsing(); 182 setIsXHTMLDocument(bool isXHTML)183 void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; } isXHTMLDocument()184 bool isXHTMLDocument() const { return m_isXHTMLDocument; } 185 #if ENABLE(XHTMLMP) setIsXHTMLMPDocument(bool isXHTML)186 void setIsXHTMLMPDocument(bool isXHTML) { m_isXHTMLMPDocument = isXHTML; } isXHTMLMPDocument()187 bool isXHTMLMPDocument() const { return m_isXHTMLMPDocument; } 188 #endif 189 #if ENABLE(WML) 190 bool isWMLDocument() const; 191 #endif 192 193 // from CachedResourceClient 194 virtual void notifyFinished(CachedResource* finishedObj); 195 196 197 void handleError(ErrorType type, const char* m, int lineNumber, int columnNumber); 198 wellFormed()199 virtual bool wellFormed() const { return !m_sawError; } 200 201 int lineNumber() const; 202 int columnNumber() const; 203 204 #if USE(QXMLSTREAM) 205 private: 206 void parse(); 207 void startDocument(); 208 void parseStartElement(); 209 void parseEndElement(); 210 void parseCharacters(); 211 void parseProcessingInstruction(); 212 void parseCdata(); 213 void parseComment(); 214 void endDocument(); 215 void parseDtd(); 216 bool hasError() const; 217 #else 218 public: 219 // callbacks from parser SAX 220 void error(ErrorType, const char* message, va_list args) WTF_ATTRIBUTE_PRINTF(3, 0); 221 void startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces, 222 const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes); 223 void endElementNs(); 224 void characters(const xmlChar* s, int len); 225 void processingInstruction(const xmlChar* target, const xmlChar* data); 226 void cdataBlock(const xmlChar* s, int len); 227 void comment(const xmlChar* s); 228 void startDocument(const xmlChar* version, const xmlChar* encoding, int standalone); 229 void internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID); 230 void endDocument(); 231 #endif 232 private: 233 friend bool parseXMLDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* parent); 234 235 void initializeParserContext(const char* chunk = 0); 236 void setCurrentNode(Node*); 237 238 void insertErrorMessageBlock(); 239 240 bool enterText(); 241 void exitText(); 242 243 void doWrite(const String&); 244 void doEnd(); 245 246 Document* m_doc; 247 FrameView* m_view; 248 249 String m_originalSourceForTransform; 250 251 #if USE(QXMLSTREAM) 252 QXmlStreamReader m_stream; 253 bool m_wroteText; 254 #else 255 xmlParserCtxtPtr m_context; 256 OwnPtr<PendingCallbacks> m_pendingCallbacks; 257 Vector<xmlChar> m_bufferedText; 258 #endif 259 Node* m_currentNode; 260 bool m_currentNodeIsReferenced; 261 262 bool m_sawError; 263 bool m_sawXSLTransform; 264 bool m_sawFirstElement; 265 bool m_isXHTMLDocument; 266 #if ENABLE(XHTMLMP) 267 bool m_isXHTMLMPDocument; 268 bool m_hasDocTypeDeclaration; 269 #endif 270 271 bool m_parserPaused; 272 bool m_requestingScript; 273 bool m_finishCalled; 274 275 int m_errorCount; 276 int m_lastErrorLine; 277 int m_lastErrorColumn; 278 String m_errorMessages; 279 280 CachedResourceHandle<CachedScript> m_pendingScript; 281 RefPtr<Element> m_scriptElement; 282 int m_scriptStartLine; 283 284 bool m_parsingFragment; 285 String m_defaultNamespaceURI; 286 287 typedef HashMap<String, String> PrefixForNamespaceMap; 288 PrefixForNamespaceMap m_prefixToNamespaceMap; 289 SegmentedString m_pendingSrc; 290 }; 291 292 #if ENABLE(XSLT) 293 void* xmlDocPtrForString(DocLoader*, const String& source, const String& url); 294 #endif 295 296 HashMap<String, String> parseAttributes(const String&, bool& attrsOK); 297 bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0); 298 299 } // namespace WebCore 300 301 #endif // USE(EXPAT) 302 303 #endif // XMLTokenizer_h 304