• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3  * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved.
4  * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
5  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Library General Public
9  * License as published by the Free Software Foundation; either
10  * version 2 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Library General Public License for more details.
16  *
17  * You should have received a copy of the GNU Library General Public License
18  * along with this library; see the file COPYING.LIB.  If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20  * Boston, MA 02110-1301, USA.
21  *
22  */
23 
24 #ifndef XMLTokenizer_h
25 #define XMLTokenizer_h
26 
27 #if USE(EXPAT)
28 
29 #include "CachedResourceClient.h"
30 #include "SegmentedString.h"
31 #include "StringHash.h"
32 #include "Tokenizer.h"
33 #include <libexpat/expat.h>
34 #include <wtf/HashMap.h>
35 #include <wtf/OwnPtr.h>
36 
37 namespace WebCore {
38 
39     class Node;
40     class CachedScript;
41     class DocumentFragment;
42     class Document;
43     class Element;
44     class FrameView;
45     class PendingCallbacks;
46 
47     class XMLTokenizer : public Tokenizer, public CachedResourceClient {
48     public:
49         XMLTokenizer(Document *, FrameView * = 0);
50         XMLTokenizer(DocumentFragment *, Element *);
51         ~XMLTokenizer();
52 
53         enum ErrorType { warning, nonFatal, fatal };
54 
55         // from Tokenizer
56         virtual bool write(const SegmentedString &str, bool);
57         virtual void finish();
58         virtual bool isWaitingForScripts() const;
59         virtual void stopParsing();
wellFormed()60         virtual bool wellFormed() const { return !m_sawError; }
61         virtual int lineNumber() const;
62         virtual int columnNumber() const;
63 
64         // from CachedObjectClient
65         virtual void notifyFinished(CachedResource *finishedObj);
66 
67         // callbacks from parser expat
68         void startElementNs(const XML_Char *name, const XML_Char **atts);
69         void endElementNs();
70         void characters(const XML_Char *s, int len);
71         void processingInstruction(const XML_Char *target, const XML_Char *data);
72         void comment(const XML_Char *s);
73         void startCdata();
74         void endCdata();
75 
76         void error(ErrorType type, const char* m, int lineNumber, int columnNumber);
77 
78         // utilities
getXMLParser()79         XML_Parser getXMLParser() const { return m_parser; }
setXMLParser(XML_Parser parser)80         void setXMLParser(XML_Parser parser) { m_parser = parser; }
81 
82     private:
83         void setCurrentNode(Node*);
84 
85         void end();
86 
87         void pauseParsing();
88         void resumeParsing();
89 
90         void reportError();
91         void insertErrorMessageBlock();
92 
93         bool enterText();
94         void exitText();
95 
96         Document *m_doc;
97         FrameView *m_view;
98 
99         XML_Parser m_parser;
100 
101         Node *m_currentNode;
102         bool m_currentNodeIsReferenced;
103 
104         bool m_sawError;
105         bool m_sawXSLTransform;
106         bool m_sawFirstElement;
107 
108         bool m_parserPaused;
109         bool m_requestingScript;
110         bool m_finishCalled;
111 
112         int m_errorCount;
113         String m_errorMessages;
114 
115         CachedScript *m_pendingScript;
116         RefPtr<Element> m_scriptElement;
117         int m_scriptStartLine;
118 
119         bool m_parsingFragment;
120         String m_defaultNamespaceURI;
121 
122         typedef HashMap<String, String> PrefixForNamespaceMap;
123         PrefixForNamespaceMap m_prefixToNamespaceMap;
124 
125         OwnPtr<PendingCallbacks> m_pendingCallbacks;
126         SegmentedString m_pendingSrc;
127     };
128 
129 HashMap<String, String> parseAttributes(const String&, bool& attrsOK);
130 bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0);
131 
132 } // namespace WebCore
133 
134 #else   // USE(EXPAT)
135 
136 #include "CachedResourceClient.h"
137 #include "CachedResourceHandle.h"
138 #include "SegmentedString.h"
139 #include "StringHash.h"
140 #include "Tokenizer.h"
141 #include <wtf/HashMap.h>
142 #include <wtf/OwnPtr.h>
143 
144 #if USE(QXMLSTREAM)
145 #include <QtXml/qxmlstream.h>
146 #else
147 #include <libxml/tree.h>
148 #include <libxml/xmlstring.h>
149 #endif
150 
151 namespace WebCore {
152 
153     class Node;
154     class CachedScript;
155     class DocLoader;
156     class DocumentFragment;
157     class Document;
158     class Element;
159     class FrameView;
160     class PendingCallbacks;
161     class ScriptElement;
162 
163     class XMLTokenizer : public Tokenizer, public CachedResourceClient {
164     public:
165         XMLTokenizer(Document*, FrameView* = 0);
166         XMLTokenizer(DocumentFragment*, Element*);
167         ~XMLTokenizer();
168 
169         enum ErrorType { warning, nonFatal, fatal };
170 
171         // from Tokenizer
172         virtual bool write(const SegmentedString&, bool appendData);
173         virtual void finish();
174         virtual bool isWaitingForScripts() const;
175         virtual void stopParsing();
176 
177         void end();
178 
179         void pauseParsing();
180         void resumeParsing();
181 
setIsXHTMLDocument(bool isXHTML)182         void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; }
isXHTMLDocument()183         bool isXHTMLDocument() const { return m_isXHTMLDocument; }
184 #if ENABLE(WML)
185         bool isWMLDocument() const;
186 #endif
187 
188         // from CachedResourceClient
189         virtual void notifyFinished(CachedResource* finishedObj);
190 
191 
192         void handleError(ErrorType type, const char* m, int lineNumber, int columnNumber);
193 
wellFormed()194         virtual bool wellFormed() const { return !m_sawError; }
195 
196         int lineNumber() const;
197         int columnNumber() const;
198 
199 #if USE(QXMLSTREAM)
200 private:
201         void parse();
202         void startDocument();
203         void parseStartElement();
204         void parseEndElement();
205         void parseCharacters();
206         void parseProcessingInstruction();
207         void parseCdata();
208         void parseComment();
209         void endDocument();
210         void parseDtd();
211         bool hasError() const;
212 #else
213 public:
214         // callbacks from parser SAX
215         void error(ErrorType, const char* message, va_list args) WTF_ATTRIBUTE_PRINTF(3, 0);
216         void startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
217                             const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes);
218         void endElementNs();
219         void characters(const xmlChar* s, int len);
220         void processingInstruction(const xmlChar* target, const xmlChar* data);
221         void cdataBlock(const xmlChar* s, int len);
222         void comment(const xmlChar* s);
223         void startDocument(const xmlChar* version, const xmlChar* encoding, int standalone);
224         void internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID);
225         void endDocument();
226 #endif
227     private:
228         friend bool parseXMLDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* parent);
229 
230         void initializeParserContext(const char* chunk = 0);
231         void setCurrentNode(Node*);
232 
233         void insertErrorMessageBlock();
234 
235         bool enterText();
236         void exitText();
237 
238         void doWrite(const String&);
239         void doEnd();
240 
241         Document* m_doc;
242         FrameView* m_view;
243 
244         String m_originalSourceForTransform;
245 
246 #if USE(QXMLSTREAM)
247         QXmlStreamReader m_stream;
248         bool m_wroteText;
249 #else
250         xmlParserCtxtPtr m_context;
251         OwnPtr<PendingCallbacks> m_pendingCallbacks;
252         Vector<xmlChar> m_bufferedText;
253 #endif
254         Node* m_currentNode;
255         bool m_currentNodeIsReferenced;
256 
257         bool m_sawError;
258         bool m_sawXSLTransform;
259         bool m_sawFirstElement;
260         bool m_isXHTMLDocument;
261 
262         bool m_parserPaused;
263         bool m_requestingScript;
264         bool m_finishCalled;
265 
266         int m_errorCount;
267         int m_lastErrorLine;
268         int m_lastErrorColumn;
269         String m_errorMessages;
270 
271         CachedResourceHandle<CachedScript> m_pendingScript;
272         RefPtr<Element> m_scriptElement;
273         int m_scriptStartLine;
274 
275         bool m_parsingFragment;
276         String m_defaultNamespaceURI;
277 
278         typedef HashMap<String, String> PrefixForNamespaceMap;
279         PrefixForNamespaceMap m_prefixToNamespaceMap;
280         SegmentedString m_pendingSrc;
281     };
282 
283 #if ENABLE(XSLT)
284 void* xmlDocPtrForString(DocLoader*, const String& source, const String& url);
285 void setLoaderForLibXMLCallbacks(DocLoader*);
286 #endif
287 
288 HashMap<String, String> parseAttributes(const String&, bool& attrsOK);
289 bool parseXMLDocumentFragment(const String&, DocumentFragment*, Element* parent = 0);
290 
291 } // namespace WebCore
292 
293 #endif // USE(EXPAT)
294 
295 #endif // XMLTokenizer_h
296