1 /*
2 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
5 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU Library General Public License
21 * along with this library; see the file COPYING.LIB. If not, write to
22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
24 */
25
26 #include "config.h"
27 #include "XMLTokenizer.h"
28
29 #include "CDATASection.h"
30 #include "CString.h"
31 #include "CachedScript.h"
32 #include "Comment.h"
33 #include "DocLoader.h"
34 #include "Document.h"
35 #include "DocumentFragment.h"
36 #include "DocumentType.h"
37 #include "Frame.h"
38 #include "FrameLoader.h"
39 #include "FrameView.h"
40 #include "HTMLLinkElement.h"
41 #include "HTMLNames.h"
42 #include "HTMLStyleElement.h"
43 #include "ProcessingInstruction.h"
44 #include "ResourceError.h"
45 #include "ResourceHandle.h"
46 #include "ResourceRequest.h"
47 #include "ResourceResponse.h"
48 #include "ScriptController.h"
49 #include "ScriptElement.h"
50 #include "ScriptSourceCode.h"
51 #include "ScriptValue.h"
52 #include "TextResourceDecoder.h"
53 #include <wtf/Platform.h>
54 #include <wtf/StringExtras.h>
55 #include <wtf/Threading.h>
56 #include <wtf/Vector.h>
57
58 #if ENABLE(SVG)
59 #include "SVGNames.h"
60 #include "SVGStyleElement.h"
61 #endif
62
63 using namespace std;
64
65 namespace WebCore {
66
67 using namespace HTMLNames;
68
69 const int maxErrors = 25;
70
71 #if ENABLE(WML)
isWMLDocument() const72 bool XMLTokenizer::isWMLDocument() const
73 {
74 if (m_doc)
75 return m_doc->isWMLDocument();
76
77 return false;
78 }
79 #endif
80
setCurrentNode(Node * n)81 void XMLTokenizer::setCurrentNode(Node* n)
82 {
83 bool nodeNeedsReference = n && n != m_doc;
84 if (nodeNeedsReference)
85 n->ref();
86 if (m_currentNodeIsReferenced)
87 m_currentNode->deref();
88 m_currentNode = n;
89 m_currentNodeIsReferenced = nodeNeedsReference;
90 }
91
write(const SegmentedString & s,bool)92 void XMLTokenizer::write(const SegmentedString& s, bool /*appendData*/)
93 {
94 String parseString = s.toString();
95
96 if (m_sawXSLTransform || !m_sawFirstElement)
97 m_originalSourceForTransform += parseString;
98
99 if (m_parserStopped || m_sawXSLTransform)
100 return;
101
102 if (m_parserPaused) {
103 m_pendingSrc.append(s);
104 return;
105 }
106
107 doWrite(s.toString());
108 }
109
handleError(ErrorType type,const char * m,int lineNumber,int columnNumber)110 void XMLTokenizer::handleError(ErrorType type, const char* m, int lineNumber, int columnNumber)
111 {
112 if (type == fatal || (m_errorCount < maxErrors && m_lastErrorLine != lineNumber && m_lastErrorColumn != columnNumber)) {
113 switch (type) {
114 case warning:
115 m_errorMessages += String::format("warning on line %d at column %d: %s", lineNumber, columnNumber, m);
116 break;
117 case fatal:
118 case nonFatal:
119 m_errorMessages += String::format("error on line %d at column %d: %s", lineNumber, columnNumber, m);
120 }
121
122 m_lastErrorLine = lineNumber;
123 m_lastErrorColumn = columnNumber;
124 ++m_errorCount;
125 }
126
127 if (type != warning)
128 m_sawError = true;
129
130 if (type == fatal)
131 stopParsing();
132 }
133
enterText()134 bool XMLTokenizer::enterText()
135 {
136 #if !USE(QXMLSTREAM)
137 ASSERT(m_bufferedText.size() == 0);
138 #endif
139 RefPtr<Node> newNode = new Text(m_doc, "");
140 if (!m_currentNode->addChild(newNode.get()))
141 return false;
142 setCurrentNode(newNode.get());
143 return true;
144 }
145
146 #if !USE(QXMLSTREAM)
toString(const xmlChar * str,unsigned len)147 static inline String toString(const xmlChar* str, unsigned len)
148 {
149 return UTF8Encoding().decode(reinterpret_cast<const char*>(str), len);
150 }
151 #endif
152
153
exitText()154 void XMLTokenizer::exitText()
155 {
156 if (m_parserStopped)
157 return;
158
159 if (!m_currentNode || !m_currentNode->isTextNode())
160 return;
161
162 #if !USE(QXMLSTREAM)
163 ExceptionCode ec = 0;
164 static_cast<Text*>(m_currentNode)->appendData(toString(m_bufferedText.data(), m_bufferedText.size()), ec);
165 Vector<xmlChar> empty;
166 m_bufferedText.swap(empty);
167 #endif
168
169 if (m_view && m_currentNode && !m_currentNode->attached())
170 m_currentNode->attach();
171
172 // FIXME: What's the right thing to do if the parent is really 0?
173 // Just leaving the current node set to the text node doesn't make much sense.
174 if (Node* par = m_currentNode->parentNode())
175 setCurrentNode(par);
176 }
177
end()178 void XMLTokenizer::end()
179 {
180 doEnd();
181
182 if (m_sawError)
183 insertErrorMessageBlock();
184 else {
185 exitText();
186 m_doc->updateStyleSelector();
187 }
188
189 setCurrentNode(0);
190 if (!m_parsingFragment)
191 m_doc->finishedParsing();
192 }
193
finish()194 void XMLTokenizer::finish()
195 {
196 if (m_parserPaused)
197 m_finishCalled = true;
198 else
199 end();
200 }
201
createXHTMLParserErrorHeader(Document * doc,const String & errorMessages)202 static inline RefPtr<Element> createXHTMLParserErrorHeader(Document* doc, const String& errorMessages)
203 {
204 RefPtr<Element> reportElement = doc->createElement(QualifiedName(nullAtom, "parsererror", xhtmlNamespaceURI), false);
205 reportElement->setAttribute(styleAttr, "display: block; white-space: pre; border: 2px solid #c77; padding: 0 1em 0 1em; margin: 1em; background-color: #fdd; color: black");
206
207 ExceptionCode ec = 0;
208 RefPtr<Element> h3 = doc->createElement(h3Tag, false);
209 reportElement->appendChild(h3.get(), ec);
210 h3->appendChild(doc->createTextNode("This page contains the following errors:"), ec);
211
212 RefPtr<Element> fixed = doc->createElement(divTag, false);
213 reportElement->appendChild(fixed.get(), ec);
214 fixed->setAttribute(styleAttr, "font-family:monospace;font-size:12px");
215 fixed->appendChild(doc->createTextNode(errorMessages), ec);
216
217 h3 = doc->createElement(h3Tag, false);
218 reportElement->appendChild(h3.get(), ec);
219 h3->appendChild(doc->createTextNode("Below is a rendering of the page up to the first error."), ec);
220
221 return reportElement;
222 }
223
insertErrorMessageBlock()224 void XMLTokenizer::insertErrorMessageBlock()
225 {
226 #if USE(QXMLSTREAM)
227 if (m_parsingFragment)
228 return;
229 #endif
230 // One or more errors occurred during parsing of the code. Display an error block to the user above
231 // the normal content (the DOM tree is created manually and includes line/col info regarding
232 // where the errors are located)
233
234 // Create elements for display
235 ExceptionCode ec = 0;
236 Document* doc = m_doc;
237 Node* documentElement = doc->documentElement();
238 if (!documentElement) {
239 RefPtr<Node> rootElement = doc->createElement(htmlTag, false);
240 doc->appendChild(rootElement, ec);
241 RefPtr<Node> body = doc->createElement(bodyTag, false);
242 rootElement->appendChild(body, ec);
243 documentElement = body.get();
244 }
245 #if ENABLE(SVG)
246 else if (documentElement->namespaceURI() == SVGNames::svgNamespaceURI) {
247 RefPtr<Node> rootElement = doc->createElement(htmlTag, false);
248 RefPtr<Node> body = doc->createElement(bodyTag, false);
249 rootElement->appendChild(body, ec);
250 body->appendChild(documentElement, ec);
251 doc->appendChild(rootElement.get(), ec);
252 documentElement = body.get();
253 }
254 #endif
255 #if ENABLE(WML)
256 else if (isWMLDocument()) {
257 RefPtr<Node> rootElement = doc->createElement(htmlTag, false);
258 RefPtr<Node> body = doc->createElement(bodyTag, false);
259 rootElement->appendChild(body, ec);
260 body->appendChild(documentElement, ec);
261 doc->appendChild(rootElement.get(), ec);
262 documentElement = body.get();
263 }
264 #endif
265
266 RefPtr<Element> reportElement = createXHTMLParserErrorHeader(doc, m_errorMessages);
267 documentElement->insertBefore(reportElement, documentElement->firstChild(), ec);
268 #if ENABLE(XSLT)
269 if (doc->transformSourceDocument()) {
270 RefPtr<Element> par = doc->createElement(pTag, false);
271 reportElement->appendChild(par, ec);
272 par->setAttribute(styleAttr, "white-space: normal");
273 par->appendChild(doc->createTextNode("This document was created as the result of an XSL transformation. The line and column numbers given are from the transformed result."), ec);
274 }
275 #endif
276 doc->updateStyleIfNeeded();
277 }
278
notifyFinished(CachedResource * unusedResource)279 void XMLTokenizer::notifyFinished(CachedResource* unusedResource)
280 {
281 ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
282 ASSERT(m_pendingScript->accessCount() > 0);
283
284 ScriptSourceCode sourceCode(m_pendingScript.get());
285 bool errorOccurred = m_pendingScript->errorOccurred();
286
287 m_pendingScript->removeClient(this);
288 m_pendingScript = 0;
289
290 RefPtr<Element> e = m_scriptElement;
291 m_scriptElement = 0;
292
293 ScriptElement* scriptElement = toScriptElement(e.get());
294 ASSERT(scriptElement);
295
296 if (errorOccurred)
297 scriptElement->dispatchErrorEvent();
298 else {
299 m_view->frame()->loader()->executeScript(sourceCode);
300 scriptElement->dispatchLoadEvent();
301 }
302
303 m_scriptElement = 0;
304
305 if (!m_requestingScript)
306 resumeParsing();
307 }
308
isWaitingForScripts() const309 bool XMLTokenizer::isWaitingForScripts() const
310 {
311 return m_pendingScript;
312 }
313
pauseParsing()314 void XMLTokenizer::pauseParsing()
315 {
316 if (m_parsingFragment)
317 return;
318
319 m_parserPaused = true;
320 }
321
322 }
323