1 /* 2 * Copyright (C) 2000 Peter Kelly (pmk@post.com) 3 * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved. 4 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org) 5 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) 6 * 7 * This library is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Library General Public 9 * License as published by the Free Software Foundation; either 10 * version 2 of the License, or (at your option) any later version. 11 * 12 * This library is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Library General Public License for more details. 16 * 17 * You should have received a copy of the GNU Library General Public License 18 * along with this library; see the file COPYING.LIB. If not, write to 19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 20 * Boston, MA 02110-1301, USA. 21 * 22 */ 23 24 #ifndef XMLTokenizer_h 25 #define XMLTokenizer_h 26 27 #if USE(EXPAT) 28 29 #include "CachedResourceClient.h" 30 #include "SegmentedString.h" 31 #include "StringHash.h" 32 #include "Tokenizer.h" 33 #include <libexpat/expat.h> 34 #include <wtf/HashMap.h> 35 #include <wtf/OwnPtr.h> 36 37 namespace WebCore { 38 39 class Node; 40 class CachedScript; 41 class DocumentFragment; 42 class Document; 43 class Element; 44 class FrameView; 45 class PendingCallbacks; 46 47 class XMLTokenizer : public Tokenizer, public CachedResourceClient { 48 public: 49 XMLTokenizer(Document *, FrameView * = 0); 50 XMLTokenizer(DocumentFragment *, Element *); 51 ~XMLTokenizer(); 52 53 enum ErrorType { warning, nonFatal, fatal }; 54 55 // from Tokenizer 56 virtual bool write(const SegmentedString &str, bool); 57 virtual void finish(); 58 virtual bool isWaitingForScripts() const; 59 virtual void stopParsing(); wellFormed()60 virtual bool wellFormed() const { return !m_sawError; } 61 virtual int lineNumber() const; 62 virtual int columnNumber() const; 63 64 // from CachedObjectClient 65 virtual void notifyFinished(CachedResource *finishedObj); 66 67 // callbacks from parser expat 68 void startElementNs(const XML_Char *name, const XML_Char **atts); 69 void endElementNs(); 70 void characters(const XML_Char *s, int len); 71 void processingInstruction(const XML_Char *target, const XML_Char *data); 72 void comment(const XML_Char *s); 73 void startCdata(); 74 void endCdata(); 75 76 void error(ErrorType type, const char* m, int lineNumber, int columnNumber); 77 78 // utilities getXMLParser()79 XML_Parser getXMLParser() const { return m_parser; } setXMLParser(XML_Parser parser)80 void setXMLParser(XML_Parser parser) { m_parser = parser; } 81 82 private: 83 void setCurrentNode(Node*); 84 85 void end(); 86 87 void pauseParsing(); 88 void resumeParsing(); 89 90 void reportError(); 91 void insertErrorMessageBlock(); 92 93 bool enterText(); 94 void exitText(); 95 96 Document *m_doc; 97 FrameView *m_view; 98 99 XML_Parser m_parser; 100 101 Node *m_currentNode; 102 bool m_currentNodeIsReferenced; 103 104 bool m_sawError; 105 bool m_sawXSLTransform; 106 bool m_sawFirstElement; 107 108 bool m_parserPaused; 109 bool m_requestingScript; 110 bool m_finishCalled; 111 112 int m_errorCount; 113 String m_errorMessages; 114 115 CachedScript *m_pendingScript; 116 RefPtr<Element> m_scriptElement; 117 int m_scriptStartLine; 118 119 bool m_parsingFragment; 120 String m_defaultNamespaceURI; 121 122 typedef HashMap<String, String> PrefixForNamespaceMap; 123 PrefixForNamespaceMap m_prefixToNamespaceMap; 124 125 OwnPtr<PendingCallbacks> m_pendingCallbacks; 126 SegmentedString m_pendingSrc; 127 }; 128 129 HashMap<String, String> parseAttributes(const String&, bool& attrsOK); 130 bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0); 131 132 } // namespace WebCore 133 134 #else // USE(EXPAT) 135 136 #include "CachedResourceClient.h" 137 #include "CachedResourceHandle.h" 138 #include "SegmentedString.h" 139 #include "StringHash.h" 140 #include "Tokenizer.h" 141 #include <wtf/HashMap.h> 142 #include <wtf/OwnPtr.h> 143 144 #if USE(QXMLSTREAM) 145 #include <QtXml/qxmlstream.h> 146 #else 147 #include <libxml/tree.h> 148 #include <libxml/xmlstring.h> 149 #endif 150 151 namespace WebCore { 152 153 class Node; 154 class CachedScript; 155 class DocLoader; 156 class DocumentFragment; 157 class Document; 158 class Element; 159 class FrameView; 160 class PendingCallbacks; 161 class ScriptElement; 162 163 class XMLTokenizer : public Tokenizer, public CachedResourceClient { 164 public: 165 XMLTokenizer(Document*, FrameView* = 0); 166 XMLTokenizer(DocumentFragment*, Element*); 167 ~XMLTokenizer(); 168 169 enum ErrorType { warning, nonFatal, fatal }; 170 171 // from Tokenizer 172 virtual bool write(const SegmentedString&, bool appendData); 173 virtual void finish(); 174 virtual bool isWaitingForScripts() const; 175 virtual void stopParsing(); 176 177 void end(); 178 179 void pauseParsing(); 180 void resumeParsing(); 181 setIsXHTMLDocument(bool isXHTML)182 void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; } isXHTMLDocument()183 bool isXHTMLDocument() const { return m_isXHTMLDocument; } 184 #if ENABLE(WML) 185 bool isWMLDocument() const; 186 #endif 187 188 // from CachedResourceClient 189 virtual void notifyFinished(CachedResource* finishedObj); 190 191 192 void handleError(ErrorType type, const char* m, int lineNumber, int columnNumber); 193 wellFormed()194 virtual bool wellFormed() const { return !m_sawError; } 195 196 int lineNumber() const; 197 int columnNumber() const; 198 199 #if USE(QXMLSTREAM) 200 private: 201 void parse(); 202 void startDocument(); 203 void parseStartElement(); 204 void parseEndElement(); 205 void parseCharacters(); 206 void parseProcessingInstruction(); 207 void parseCdata(); 208 void parseComment(); 209 void endDocument(); 210 void parseDtd(); 211 bool hasError() const; 212 #else 213 public: 214 // callbacks from parser SAX 215 void error(ErrorType, const char* message, va_list args) WTF_ATTRIBUTE_PRINTF(3, 0); 216 void startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces, 217 const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes); 218 void endElementNs(); 219 void characters(const xmlChar* s, int len); 220 void processingInstruction(const xmlChar* target, const xmlChar* data); 221 void cdataBlock(const xmlChar* s, int len); 222 void comment(const xmlChar* s); 223 void startDocument(const xmlChar* version, const xmlChar* encoding, int standalone); 224 void internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID); 225 void endDocument(); 226 #endif 227 private: 228 friend bool parseXMLDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* parent); 229 230 void initializeParserContext(const char* chunk = 0); 231 void setCurrentNode(Node*); 232 233 void insertErrorMessageBlock(); 234 235 bool enterText(); 236 void exitText(); 237 238 void doWrite(const String&); 239 void doEnd(); 240 241 Document* m_doc; 242 FrameView* m_view; 243 244 String m_originalSourceForTransform; 245 246 #if USE(QXMLSTREAM) 247 QXmlStreamReader m_stream; 248 bool m_wroteText; 249 #else 250 xmlParserCtxtPtr m_context; 251 OwnPtr<PendingCallbacks> m_pendingCallbacks; 252 Vector<xmlChar> m_bufferedText; 253 #endif 254 Node* m_currentNode; 255 bool m_currentNodeIsReferenced; 256 257 bool m_sawError; 258 bool m_sawXSLTransform; 259 bool m_sawFirstElement; 260 bool m_isXHTMLDocument; 261 262 bool m_parserPaused; 263 bool m_requestingScript; 264 bool m_finishCalled; 265 266 int m_errorCount; 267 int m_lastErrorLine; 268 int m_lastErrorColumn; 269 String m_errorMessages; 270 271 CachedResourceHandle<CachedScript> m_pendingScript; 272 RefPtr<Element> m_scriptElement; 273 int m_scriptStartLine; 274 275 bool m_parsingFragment; 276 String m_defaultNamespaceURI; 277 278 typedef HashMap<String, String> PrefixForNamespaceMap; 279 PrefixForNamespaceMap m_prefixToNamespaceMap; 280 SegmentedString m_pendingSrc; 281 }; 282 283 #if ENABLE(XSLT) 284 void* xmlDocPtrForString(DocLoader*, const String& source, const String& url); 285 void setLoaderForLibXMLCallbacks(DocLoader*); 286 #endif 287 288 HashMap<String, String> parseAttributes(const String&, bool& attrsOK); 289 bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0); 290 291 } // namespace WebCore 292 293 #endif // USE(EXPAT) 294 295 #endif // XMLTokenizer_h 296