• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3  * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved.
4  * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
5  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
6  * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Library General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Library General Public License for more details.
17  *
18  * You should have received a copy of the GNU Library General Public License
19  * along with this library; see the file COPYING.LIB.  If not, write to
20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21  * Boston, MA 02110-1301, USA.
22  *
23  */
24 
25 #ifndef XMLTokenizer_h
26 #define XMLTokenizer_h
27 
28 #if USE(EXPAT)
29 
30 #include "CachedResourceClient.h"
31 #include "SegmentedString.h"
32 #include "StringHash.h"
33 #include "Tokenizer.h"
34 #include <libexpat/expat.h>
35 #include <wtf/HashMap.h>
36 #include <wtf/OwnPtr.h>
37 
38 namespace WebCore {
39 
40     class Node;
41     class CachedScript;
42     class DocumentFragment;
43     class Document;
44     class Element;
45     class FrameView;
46     class PendingCallbacks;
47 
48     class XMLTokenizer : public Tokenizer, public CachedResourceClient {
49     public:
50         XMLTokenizer(Document *, FrameView * = 0);
51         XMLTokenizer(DocumentFragment *, Element *);
52         ~XMLTokenizer();
53 
54         enum ErrorType { warning, nonFatal, fatal };
55 
56         // from Tokenizer
57         virtual bool write(const SegmentedString &str, bool);
58         virtual void finish();
59         virtual bool isWaitingForScripts() const;
60         virtual void stopParsing();
wellFormed()61         virtual bool wellFormed() const { return !m_sawError; }
62         virtual int lineNumber() const;
63         virtual int columnNumber() const;
64 
65         // from CachedObjectClient
66         virtual void notifyFinished(CachedResource *finishedObj);
67 
68         // callbacks from parser expat
69         void startElementNs(const XML_Char *name, const XML_Char **atts);
70         void endElementNs();
71         void characters(const XML_Char *s, int len);
72         void processingInstruction(const XML_Char *target, const XML_Char *data);
73         void comment(const XML_Char *s);
74         void startCdata();
75         void endCdata();
76 
77         void error(ErrorType type, const char* m, int lineNumber, int columnNumber);
78 
79         // utilities
getXMLParser()80         XML_Parser getXMLParser() const { return m_parser; }
setXMLParser(XML_Parser parser)81         void setXMLParser(XML_Parser parser) { m_parser = parser; }
82 
83     private:
84         void setCurrentNode(Node*);
85 
86         void end();
87 
88         void pauseParsing();
89         void resumeParsing();
90 
91         void reportError();
92         void insertErrorMessageBlock();
93 
94         bool enterText();
95         void exitText();
96 
97         Document *m_doc;
98         FrameView *m_view;
99 
100         XML_Parser m_parser;
101 
102         Node *m_currentNode;
103         bool m_currentNodeIsReferenced;
104 
105         bool m_sawError;
106         bool m_sawXSLTransform;
107         bool m_sawFirstElement;
108 
109         bool m_parserPaused;
110         bool m_requestingScript;
111         bool m_finishCalled;
112 
113         int m_errorCount;
114         String m_errorMessages;
115 
116         CachedScript *m_pendingScript;
117         RefPtr<Element> m_scriptElement;
118         int m_scriptStartLine;
119 
120         bool m_parsingFragment;
121         String m_defaultNamespaceURI;
122 
123         typedef HashMap<String, String> PrefixForNamespaceMap;
124         PrefixForNamespaceMap m_prefixToNamespaceMap;
125 
126         OwnPtr<PendingCallbacks> m_pendingCallbacks;
127         SegmentedString m_pendingSrc;
128     };
129 
130 HashMap<String, String> parseAttributes(const String&, bool& attrsOK);
131 bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0);
132 
133 } // namespace WebCore
134 
135 #else   // USE(EXPAT)
136 
137 #include "CachedResourceClient.h"
138 #include "CachedResourceHandle.h"
139 #include "SegmentedString.h"
140 #include "StringHash.h"
141 #include "Tokenizer.h"
142 #include <wtf/HashMap.h>
143 #include <wtf/OwnPtr.h>
144 
145 #if USE(QXMLSTREAM)
146 #include <QtXml/qxmlstream.h>
147 #else
148 #include <libxml/tree.h>
149 #include <libxml/xmlstring.h>
150 #endif
151 
152 namespace WebCore {
153 
154     class Node;
155     class CachedScript;
156     class DocLoader;
157     class DocumentFragment;
158     class Document;
159     class Element;
160     class FrameView;
161     class PendingCallbacks;
162     class ScriptElement;
163 
164     class XMLTokenizer : public Tokenizer, public CachedResourceClient {
165     public:
166         XMLTokenizer(Document*, FrameView* = 0);
167         XMLTokenizer(DocumentFragment*, Element*);
168         ~XMLTokenizer();
169 
170         enum ErrorType { warning, nonFatal, fatal };
171 
172         // from Tokenizer
173         virtual void write(const SegmentedString&, bool appendData);
174         virtual void finish();
175         virtual bool isWaitingForScripts() const;
176         virtual void stopParsing();
177 
178         void end();
179 
180         void pauseParsing();
181         void resumeParsing();
182 
setIsXHTMLDocument(bool isXHTML)183         void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; }
isXHTMLDocument()184         bool isXHTMLDocument() const { return m_isXHTMLDocument; }
185 #if ENABLE(XHTMLMP)
setIsXHTMLMPDocument(bool isXHTML)186         void setIsXHTMLMPDocument(bool isXHTML) { m_isXHTMLMPDocument = isXHTML; }
isXHTMLMPDocument()187         bool isXHTMLMPDocument() const { return m_isXHTMLMPDocument; }
188 #endif
189 #if ENABLE(WML)
190         bool isWMLDocument() const;
191 #endif
192 
193         // from CachedResourceClient
194         virtual void notifyFinished(CachedResource* finishedObj);
195 
196 
197         void handleError(ErrorType type, const char* m, int lineNumber, int columnNumber);
198 
wellFormed()199         virtual bool wellFormed() const { return !m_sawError; }
200 
201         int lineNumber() const;
202         int columnNumber() const;
203 
204 #if USE(QXMLSTREAM)
205 private:
206         void parse();
207         void startDocument();
208         void parseStartElement();
209         void parseEndElement();
210         void parseCharacters();
211         void parseProcessingInstruction();
212         void parseCdata();
213         void parseComment();
214         void endDocument();
215         void parseDtd();
216         bool hasError() const;
217 #else
218 public:
219         // callbacks from parser SAX
220         void error(ErrorType, const char* message, va_list args) WTF_ATTRIBUTE_PRINTF(3, 0);
221         void startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
222                             const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes);
223         void endElementNs();
224         void characters(const xmlChar* s, int len);
225         void processingInstruction(const xmlChar* target, const xmlChar* data);
226         void cdataBlock(const xmlChar* s, int len);
227         void comment(const xmlChar* s);
228         void startDocument(const xmlChar* version, const xmlChar* encoding, int standalone);
229         void internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID);
230         void endDocument();
231 #endif
232     private:
233         friend bool parseXMLDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* parent);
234 
235         void initializeParserContext(const char* chunk = 0);
236         void setCurrentNode(Node*);
237 
238         void insertErrorMessageBlock();
239 
240         bool enterText();
241         void exitText();
242 
243         void doWrite(const String&);
244         void doEnd();
245 
246         Document* m_doc;
247         FrameView* m_view;
248 
249         String m_originalSourceForTransform;
250 
251 #if USE(QXMLSTREAM)
252         QXmlStreamReader m_stream;
253         bool m_wroteText;
254 #else
255         xmlParserCtxtPtr m_context;
256         OwnPtr<PendingCallbacks> m_pendingCallbacks;
257         Vector<xmlChar> m_bufferedText;
258 #endif
259         Node* m_currentNode;
260         bool m_currentNodeIsReferenced;
261 
262         bool m_sawError;
263         bool m_sawXSLTransform;
264         bool m_sawFirstElement;
265         bool m_isXHTMLDocument;
266 #if ENABLE(XHTMLMP)
267         bool m_isXHTMLMPDocument;
268         bool m_hasDocTypeDeclaration;
269 #endif
270 
271         bool m_parserPaused;
272         bool m_requestingScript;
273         bool m_finishCalled;
274 
275         int m_errorCount;
276         int m_lastErrorLine;
277         int m_lastErrorColumn;
278         String m_errorMessages;
279 
280         CachedResourceHandle<CachedScript> m_pendingScript;
281         RefPtr<Element> m_scriptElement;
282         int m_scriptStartLine;
283 
284         bool m_parsingFragment;
285         String m_defaultNamespaceURI;
286 
287         typedef HashMap<String, String> PrefixForNamespaceMap;
288         PrefixForNamespaceMap m_prefixToNamespaceMap;
289         SegmentedString m_pendingSrc;
290     };
291 
292 #if ENABLE(XSLT)
293 void* xmlDocPtrForString(DocLoader*, const String& source, const String& url);
294 #endif
295 
296 HashMap<String, String> parseAttributes(const String&, bool& attrsOK);
297 bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0);
298 
299 } // namespace WebCore
300 
301 #endif // USE(EXPAT)
302 
303 #endif // XMLTokenizer_h
304