1 /*
2 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
5 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU Library General Public License
21 * along with this library; see the file COPYING.LIB. If not, write to
22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
24 */
25
26 #include "config.h"
27 #include "XMLDocumentParser.h"
28
29 #include "CDATASection.h"
30 #include "CachedScript.h"
31 #include "Comment.h"
32 #include "CachedResourceLoader.h"
33 #include "Document.h"
34 #include "DocumentFragment.h"
35 #include "DocumentType.h"
36 #include "Frame.h"
37 #include "FrameLoader.h"
38 #include "FrameView.h"
39 #include "HTMLLinkElement.h"
40 #include "HTMLNames.h"
41 #include "HTMLStyleElement.h"
42 #include "ImageLoader.h"
43 #include "ProcessingInstruction.h"
44 #include "ResourceError.h"
45 #include "ResourceHandle.h"
46 #include "ResourceRequest.h"
47 #include "ResourceResponse.h"
48 #include "ScriptController.h"
49 #include "ScriptElement.h"
50 #include "ScriptSourceCode.h"
51 #include "ScriptValue.h"
52 #include "TextResourceDecoder.h"
53 #include "TreeDepthLimit.h"
54 #include <wtf/text/StringConcatenate.h>
55 #include <wtf/StringExtras.h>
56 #include <wtf/Threading.h>
57 #include <wtf/Vector.h>
58
59 #if ENABLE(SVG)
60 #include "SVGNames.h"
61 #include "SVGStyleElement.h"
62 #endif
63
64 using namespace std;
65
66 namespace WebCore {
67
68 using namespace HTMLNames;
69
70 const int maxErrors = 25;
71
72 #if ENABLE(WML)
isWMLDocument() const73 bool XMLDocumentParser::isWMLDocument() const
74 {
75 return document()->isWMLDocument();
76 }
77 #endif
78
pushCurrentNode(Node * n)79 void XMLDocumentParser::pushCurrentNode(Node* n)
80 {
81 ASSERT(n);
82 ASSERT(m_currentNode);
83 if (n != document())
84 n->ref();
85 m_currentNodeStack.append(m_currentNode);
86 m_currentNode = n;
87 if (m_currentNodeStack.size() > maxDOMTreeDepth)
88 handleError(fatal, "Excessive node nesting.", lineNumber(), columnNumber());
89 }
90
popCurrentNode()91 void XMLDocumentParser::popCurrentNode()
92 {
93 if (!m_currentNode)
94 return;
95 ASSERT(m_currentNodeStack.size());
96
97 if (m_currentNode != document())
98 m_currentNode->deref();
99
100 m_currentNode = m_currentNodeStack.last();
101 m_currentNodeStack.removeLast();
102 }
103
clearCurrentNodeStack()104 void XMLDocumentParser::clearCurrentNodeStack()
105 {
106 if (m_currentNode && m_currentNode != document())
107 m_currentNode->deref();
108 m_currentNode = 0;
109
110 if (m_currentNodeStack.size()) { // Aborted parsing.
111 for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
112 m_currentNodeStack[i]->deref();
113 if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
114 m_currentNodeStack[0]->deref();
115 m_currentNodeStack.clear();
116 }
117 }
118
insert(const SegmentedString &)119 void XMLDocumentParser::insert(const SegmentedString&)
120 {
121 ASSERT_NOT_REACHED();
122 }
123
append(const SegmentedString & s)124 void XMLDocumentParser::append(const SegmentedString& s)
125 {
126 String parseString = s.toString();
127
128 if (m_sawXSLTransform || !m_sawFirstElement)
129 m_originalSourceForTransform += parseString;
130
131 if (isStopped() || m_sawXSLTransform)
132 return;
133
134 if (m_parserPaused) {
135 m_pendingSrc.append(s);
136 return;
137 }
138
139 doWrite(s.toString());
140
141 // After parsing, go ahead and dispatch image beforeload events.
142 ImageLoader::dispatchPendingBeforeLoadEvents();
143 }
144
handleError(ErrorType type,const char * m,int lineNumber,int columnNumber)145 void XMLDocumentParser::handleError(ErrorType type, const char* m, int lineNumber, int columnNumber)
146 {
147 handleError(type, m, TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(lineNumber), WTF::OneBasedNumber::fromOneBasedInt(columnNumber)));
148 }
149
handleError(ErrorType type,const char * m,TextPosition1 position)150 void XMLDocumentParser::handleError(ErrorType type, const char* m, TextPosition1 position)
151 {
152 if (type == fatal || (m_errorCount < maxErrors && m_lastErrorPosition.m_line != position.m_line && m_lastErrorPosition.m_column != position.m_column)) {
153 switch (type) {
154 case warning:
155 m_errorMessages += makeString("warning on line ", String::number(position.m_line.oneBasedInt()), " at column ", String::number(position.m_column.oneBasedInt()), ": ", m);
156 break;
157 case fatal:
158 case nonFatal:
159 m_errorMessages += makeString("error on line ", String::number(position.m_line.oneBasedInt()), " at column ", String::number(position.m_column.oneBasedInt()), ": ", m);
160 }
161
162 m_lastErrorPosition = position;
163 ++m_errorCount;
164 }
165
166 if (type != warning)
167 m_sawError = true;
168
169 if (type == fatal)
170 stopParsing();
171 }
172
enterText()173 void XMLDocumentParser::enterText()
174 {
175 #if !USE(QXMLSTREAM)
176 ASSERT(m_bufferedText.size() == 0);
177 #endif
178 RefPtr<Node> newNode = Text::create(document(), "");
179 m_currentNode->deprecatedParserAddChild(newNode.get());
180 pushCurrentNode(newNode.get());
181 }
182
183 #if !USE(QXMLSTREAM)
toString(const xmlChar * string,size_t size)184 static inline String toString(const xmlChar* string, size_t size)
185 {
186 return String::fromUTF8(reinterpret_cast<const char*>(string), size);
187 }
188 #endif
189
190
exitText()191 void XMLDocumentParser::exitText()
192 {
193 if (isStopped())
194 return;
195
196 if (!m_currentNode || !m_currentNode->isTextNode())
197 return;
198
199 #if !USE(QXMLSTREAM)
200 ExceptionCode ec = 0;
201 static_cast<Text*>(m_currentNode)->appendData(toString(m_bufferedText.data(), m_bufferedText.size()), ec);
202 Vector<xmlChar> empty;
203 m_bufferedText.swap(empty);
204 #endif
205
206 if (m_view && m_currentNode && !m_currentNode->attached())
207 m_currentNode->attach();
208
209 popCurrentNode();
210 }
211
detach()212 void XMLDocumentParser::detach()
213 {
214 clearCurrentNodeStack();
215 ScriptableDocumentParser::detach();
216 }
217
end()218 void XMLDocumentParser::end()
219 {
220 // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
221 // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
222 ASSERT(!m_parsingFragment);
223
224 doEnd();
225
226 // doEnd() could process a script tag, thus pausing parsing.
227 if (m_parserPaused)
228 return;
229
230 if (m_sawError)
231 insertErrorMessageBlock();
232 else {
233 exitText();
234 document()->styleSelectorChanged(RecalcStyleImmediately);
235 }
236
237 if (isParsing())
238 prepareToStopParsing();
239 document()->setReadyState(Document::Interactive);
240 clearCurrentNodeStack();
241 document()->finishedParsing();
242 }
243
finish()244 void XMLDocumentParser::finish()
245 {
246 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
247 // makes sense to call any methods on DocumentParser once it's been stopped.
248 // However, FrameLoader::stop calls Document::finishParsing unconditionally
249 // which in turn calls m_parser->finish().
250
251 if (m_parserPaused)
252 m_finishCalled = true;
253 else
254 end();
255 }
256
finishWasCalled()257 bool XMLDocumentParser::finishWasCalled()
258 {
259 return m_finishCalled;
260 }
261
createXHTMLParserErrorHeader(Document * doc,const String & errorMessages)262 static inline RefPtr<Element> createXHTMLParserErrorHeader(Document* doc, const String& errorMessages)
263 {
264 RefPtr<Element> reportElement = doc->createElement(QualifiedName(nullAtom, "parsererror", xhtmlNamespaceURI), false);
265 reportElement->setAttribute(styleAttr, "display: block; white-space: pre; border: 2px solid #c77; padding: 0 1em 0 1em; margin: 1em; background-color: #fdd; color: black");
266
267 ExceptionCode ec = 0;
268 RefPtr<Element> h3 = doc->createElement(h3Tag, false);
269 reportElement->appendChild(h3.get(), ec);
270 h3->appendChild(doc->createTextNode("This page contains the following errors:"), ec);
271
272 RefPtr<Element> fixed = doc->createElement(divTag, false);
273 reportElement->appendChild(fixed.get(), ec);
274 fixed->setAttribute(styleAttr, "font-family:monospace;font-size:12px");
275 fixed->appendChild(doc->createTextNode(errorMessages), ec);
276
277 h3 = doc->createElement(h3Tag, false);
278 reportElement->appendChild(h3.get(), ec);
279 h3->appendChild(doc->createTextNode("Below is a rendering of the page up to the first error."), ec);
280
281 return reportElement;
282 }
283
insertErrorMessageBlock()284 void XMLDocumentParser::insertErrorMessageBlock()
285 {
286 #if USE(QXMLSTREAM)
287 if (m_parsingFragment)
288 return;
289 #endif
290 // One or more errors occurred during parsing of the code. Display an error block to the user above
291 // the normal content (the DOM tree is created manually and includes line/col info regarding
292 // where the errors are located)
293
294 // Create elements for display
295 ExceptionCode ec = 0;
296 Document* document = this->document();
297 RefPtr<Element> documentElement = document->documentElement();
298 if (!documentElement) {
299 RefPtr<Element> rootElement = document->createElement(htmlTag, false);
300 document->appendChild(rootElement, ec);
301 RefPtr<Element> body = document->createElement(bodyTag, false);
302 rootElement->appendChild(body, ec);
303 documentElement = body.get();
304 }
305 #if ENABLE(SVG)
306 else if (documentElement->namespaceURI() == SVGNames::svgNamespaceURI) {
307 RefPtr<Element> rootElement = document->createElement(htmlTag, false);
308 RefPtr<Element> body = document->createElement(bodyTag, false);
309 rootElement->appendChild(body, ec);
310 body->appendChild(documentElement, ec);
311 document->appendChild(rootElement.get(), ec);
312 documentElement = body.get();
313 }
314 #endif
315 #if ENABLE(WML)
316 else if (isWMLDocument()) {
317 RefPtr<Element> rootElement = document->createElement(htmlTag, false);
318 RefPtr<Element> body = document->createElement(bodyTag, false);
319 rootElement->appendChild(body, ec);
320 body->appendChild(documentElement, ec);
321 document->appendChild(rootElement.get(), ec);
322 documentElement = body.get();
323 }
324 #endif
325
326 RefPtr<Element> reportElement = createXHTMLParserErrorHeader(document, m_errorMessages);
327 documentElement->insertBefore(reportElement, documentElement->firstChild(), ec);
328 #if ENABLE(XSLT)
329 if (document->transformSourceDocument()) {
330 RefPtr<Element> paragraph = document->createElement(pTag, false);
331 paragraph->setAttribute(styleAttr, "white-space: normal");
332 paragraph->appendChild(document->createTextNode("This document was created as the result of an XSL transformation. The line and column numbers given are from the transformed result."), ec);
333 reportElement->appendChild(paragraph.release(), ec);
334 }
335 #endif
336 document->updateStyleIfNeeded();
337 }
338
notifyFinished(CachedResource * unusedResource)339 void XMLDocumentParser::notifyFinished(CachedResource* unusedResource)
340 {
341 ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
342 ASSERT(m_pendingScript->accessCount() > 0);
343
344 ScriptSourceCode sourceCode(m_pendingScript.get());
345 bool errorOccurred = m_pendingScript->errorOccurred();
346
347 m_pendingScript->removeClient(this);
348 m_pendingScript = 0;
349
350 RefPtr<Element> e = m_scriptElement;
351 m_scriptElement = 0;
352
353 ScriptElement* scriptElement = toScriptElement(e.get());
354 ASSERT(scriptElement);
355
356 // JavaScript can detach this parser, make sure it's kept alive even if detached.
357 RefPtr<XMLDocumentParser> protect(this);
358
359 if (errorOccurred)
360 scriptElement->dispatchErrorEvent();
361 else {
362 scriptElement->executeScript(sourceCode);
363 scriptElement->dispatchLoadEvent();
364 }
365
366 m_scriptElement = 0;
367
368 if (!isDetached() && !m_requestingScript)
369 resumeParsing();
370 }
371
isWaitingForScripts() const372 bool XMLDocumentParser::isWaitingForScripts() const
373 {
374 return m_pendingScript;
375 }
376
pauseParsing()377 void XMLDocumentParser::pauseParsing()
378 {
379 if (m_parsingFragment)
380 return;
381
382 m_parserPaused = true;
383 }
384
parseDocumentFragment(const String & chunk,DocumentFragment * fragment,Element * contextElement,FragmentScriptingPermission scriptingPermission)385 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
386 {
387 if (!chunk.length())
388 return true;
389
390 // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
391 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
392 // For now we have a hack for script/style innerHTML support:
393 if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag) || contextElement->hasLocalName(HTMLNames::styleTag))) {
394 fragment->parserAddChild(fragment->document()->createTextNode(chunk));
395 return true;
396 }
397
398 RefPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, scriptingPermission);
399 bool wellFormed = parser->appendFragmentSource(chunk);
400 // Do not call finish(). Current finish() and doEnd() implementations touch the main Document/loader
401 // and can cause crashes in the fragment case.
402 parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
403 return wellFormed; // appendFragmentSource()'s wellFormed is more permissive than wellFormed().
404 }
405
406 } // namespace WebCore
407