1 /*
2 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
5 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU Library General Public License
21 * along with this library; see the file COPYING.LIB. If not, write to
22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
24 */
25
26 #include "config.h"
27 #include "core/xml/parser/XMLDocumentParser.h"
28
29 #include "bindings/v8/ExceptionState.h"
30 #include "bindings/v8/ExceptionStatePlaceholder.h"
31 #include "bindings/v8/ScriptController.h"
32 #include "bindings/v8/ScriptSourceCode.h"
33 #include "core/FetchInitiatorTypeNames.h"
34 #include "core/HTMLNames.h"
35 #include "core/XMLNSNames.h"
36 #include "core/dom/CDATASection.h"
37 #include "core/dom/Comment.h"
38 #include "core/dom/Document.h"
39 #include "core/dom/DocumentFragment.h"
40 #include "core/dom/DocumentType.h"
41 #include "core/dom/ProcessingInstruction.h"
42 #include "core/dom/ScriptLoader.h"
43 #include "core/dom/TransformSource.h"
44 #include "core/fetch/ResourceFetcher.h"
45 #include "core/fetch/ScriptResource.h"
46 #include "core/frame/LocalFrame.h"
47 #include "core/frame/UseCounter.h"
48 #include "core/html/HTMLHtmlElement.h"
49 #include "core/html/HTMLTemplateElement.h"
50 #include "core/html/parser/HTMLEntityParser.h"
51 #include "core/html/parser/TextResourceDecoder.h"
52 #include "core/loader/FrameLoader.h"
53 #include "core/loader/ImageLoader.h"
54 #include "core/svg/graphics/SVGImage.h"
55 #include "core/xml/XMLTreeViewer.h"
56 #include "core/xml/parser/SharedBufferReader.h"
57 #include "core/xml/parser/XMLDocumentParserScope.h"
58 #include "core/xml/parser/XMLParserInput.h"
59 #include "platform/RuntimeEnabledFeatures.h"
60 #include "platform/SharedBuffer.h"
61 #include "platform/network/ResourceError.h"
62 #include "platform/network/ResourceRequest.h"
63 #include "platform/network/ResourceResponse.h"
64 #include "platform/weborigin/SecurityOrigin.h"
65 #include "wtf/StringExtras.h"
66 #include "wtf/TemporaryChange.h"
67 #include "wtf/Threading.h"
68 #include "wtf/Vector.h"
69 #include "wtf/unicode/UTF8.h"
70 #include <libxml/catalog.h>
71 #include <libxml/parser.h>
72 #include <libxml/parserInternals.h>
73 #include <libxslt/xslt.h>
74
75 namespace WebCore {
76
77 using namespace HTMLNames;
78
79 // FIXME: HTMLConstructionSite has a limit of 512, should these match?
80 static const unsigned maxXMLTreeDepth = 5000;
81
toString(const xmlChar * string,size_t length)82 static inline String toString(const xmlChar* string, size_t length)
83 {
84 return String::fromUTF8(reinterpret_cast<const char*>(string), length);
85 }
86
toString(const xmlChar * string)87 static inline String toString(const xmlChar* string)
88 {
89 return String::fromUTF8(reinterpret_cast<const char*>(string));
90 }
91
toAtomicString(const xmlChar * string,size_t length)92 static inline AtomicString toAtomicString(const xmlChar* string, size_t length)
93 {
94 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), length);
95 }
96
toAtomicString(const xmlChar * string)97 static inline AtomicString toAtomicString(const xmlChar* string)
98 {
99 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
100 }
101
hasNoStyleInformation(Document * document)102 static inline bool hasNoStyleInformation(Document* document)
103 {
104 if (document->sawElementsInKnownNamespaces() || document->transformSourceDocument())
105 return false;
106
107 if (!document->frame() || !document->frame()->page())
108 return false;
109
110 if (document->frame()->tree().parent())
111 return false; // This document is not in a top frame
112
113 if (SVGImage::isInSVGImage(document))
114 return false;
115
116 return true;
117 }
118
119 class PendingStartElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
120 public:
PendingStartElementNSCallback(const AtomicString & localName,const AtomicString & prefix,const AtomicString & uri,int namespaceCount,const xmlChar ** namespaces,int attributeCount,int defaultedCount,const xmlChar ** attributes)121 PendingStartElementNSCallback(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri,
122 int namespaceCount, const xmlChar** namespaces, int attributeCount, int defaultedCount, const xmlChar** attributes)
123 : m_localName(localName)
124 , m_prefix(prefix)
125 , m_uri(uri)
126 , m_namespaceCount(namespaceCount)
127 , m_attributeCount(attributeCount)
128 , m_defaultedCount(defaultedCount)
129 {
130 m_namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * namespaceCount * 2));
131 for (int i = 0; i < namespaceCount * 2 ; ++i)
132 m_namespaces[i] = xmlStrdup(namespaces[i]);
133 m_attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * attributeCount * 5));
134 for (int i = 0; i < attributeCount; ++i) {
135 // Each attribute has 5 elements in the array:
136 // name, prefix, uri, value and an end pointer.
137 for (int j = 0; j < 3; ++j)
138 m_attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
139 int length = attributes[i * 5 + 4] - attributes[i * 5 + 3];
140 m_attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], length);
141 m_attributes[i * 5 + 4] = m_attributes[i * 5 + 3] + length;
142 }
143 }
144
~PendingStartElementNSCallback()145 virtual ~PendingStartElementNSCallback()
146 {
147 for (int i = 0; i < m_namespaceCount * 2; ++i)
148 xmlFree(m_namespaces[i]);
149 xmlFree(m_namespaces);
150 for (int i = 0; i < m_attributeCount; ++i)
151 for (int j = 0; j < 4; ++j)
152 xmlFree(m_attributes[i * 5 + j]);
153 xmlFree(m_attributes);
154 }
155
call(XMLDocumentParser * parser)156 virtual void call(XMLDocumentParser* parser) OVERRIDE
157 {
158 parser->startElementNs(m_localName, m_prefix, m_uri,
159 m_namespaceCount, const_cast<const xmlChar**>(m_namespaces),
160 m_attributeCount, m_defaultedCount, const_cast<const xmlChar**>(m_attributes));
161 }
162
163 private:
164 AtomicString m_localName;
165 AtomicString m_prefix;
166 AtomicString m_uri;
167 int m_namespaceCount;
168 xmlChar** m_namespaces;
169 int m_attributeCount;
170 int m_defaultedCount;
171 xmlChar** m_attributes;
172 };
173
174 class PendingEndElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
175 public:
call(XMLDocumentParser * parser)176 virtual void call(XMLDocumentParser* parser) OVERRIDE
177 {
178 parser->endElementNs();
179 }
180 };
181
182 class PendingCharactersCallback FINAL : public XMLDocumentParser::PendingCallback {
183 public:
PendingCharactersCallback(const xmlChar * chars,int length)184 PendingCharactersCallback(const xmlChar* chars, int length)
185 : m_chars(xmlStrndup(chars, length))
186 , m_length(length)
187 {
188 }
189
~PendingCharactersCallback()190 virtual ~PendingCharactersCallback()
191 {
192 xmlFree(m_chars);
193 }
194
call(XMLDocumentParser * parser)195 virtual void call(XMLDocumentParser* parser) OVERRIDE
196 {
197 parser->characters(m_chars, m_length);
198 }
199
200 private:
201 xmlChar* m_chars;
202 int m_length;
203 };
204
205 class PendingProcessingInstructionCallback FINAL : public XMLDocumentParser::PendingCallback {
206 public:
PendingProcessingInstructionCallback(const String & target,const String & data)207 PendingProcessingInstructionCallback(const String& target, const String& data)
208 : m_target(target)
209 , m_data(data)
210 {
211 }
212
call(XMLDocumentParser * parser)213 virtual void call(XMLDocumentParser* parser) OVERRIDE
214 {
215 parser->processingInstruction(m_target, m_data);
216 }
217
218 private:
219 String m_target;
220 String m_data;
221 };
222
223 class PendingCDATABlockCallback FINAL : public XMLDocumentParser::PendingCallback {
224 public:
PendingCDATABlockCallback(const String & text)225 explicit PendingCDATABlockCallback(const String& text) : m_text(text) { }
226
call(XMLDocumentParser * parser)227 virtual void call(XMLDocumentParser* parser) OVERRIDE
228 {
229 parser->cdataBlock(m_text);
230 }
231
232 private:
233 String m_text;
234 };
235
236 class PendingCommentCallback FINAL : public XMLDocumentParser::PendingCallback {
237 public:
PendingCommentCallback(const String & text)238 explicit PendingCommentCallback(const String& text) : m_text(text) { }
239
call(XMLDocumentParser * parser)240 virtual void call(XMLDocumentParser* parser) OVERRIDE
241 {
242 parser->comment(m_text);
243 }
244
245 private:
246 String m_text;
247 };
248
249 class PendingInternalSubsetCallback FINAL : public XMLDocumentParser::PendingCallback {
250 public:
PendingInternalSubsetCallback(const String & name,const String & externalID,const String & systemID)251 PendingInternalSubsetCallback(const String& name, const String& externalID, const String& systemID)
252 : m_name(name)
253 , m_externalID(externalID)
254 , m_systemID(systemID)
255 {
256 }
257
call(XMLDocumentParser * parser)258 virtual void call(XMLDocumentParser* parser) OVERRIDE
259 {
260 parser->internalSubset(m_name, m_externalID, m_systemID);
261 }
262
263 private:
264 String m_name;
265 String m_externalID;
266 String m_systemID;
267 };
268
269 class PendingErrorCallback FINAL : public XMLDocumentParser::PendingCallback {
270 public:
PendingErrorCallback(XMLErrors::ErrorType type,const xmlChar * message,OrdinalNumber lineNumber,OrdinalNumber columnNumber)271 PendingErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
272 : m_type(type)
273 , m_message(xmlStrdup(message))
274 , m_lineNumber(lineNumber)
275 , m_columnNumber(columnNumber)
276 {
277 }
278
~PendingErrorCallback()279 virtual ~PendingErrorCallback()
280 {
281 xmlFree(m_message);
282 }
283
call(XMLDocumentParser * parser)284 virtual void call(XMLDocumentParser* parser) OVERRIDE
285 {
286 parser->handleError(m_type, reinterpret_cast<char*>(m_message), TextPosition(m_lineNumber, m_columnNumber));
287 }
288
289 private:
290 XMLErrors::ErrorType m_type;
291 xmlChar* m_message;
292 OrdinalNumber m_lineNumber;
293 OrdinalNumber m_columnNumber;
294 };
295
pushCurrentNode(ContainerNode * n)296 void XMLDocumentParser::pushCurrentNode(ContainerNode* n)
297 {
298 ASSERT(n);
299 ASSERT(m_currentNode);
300 #if !ENABLE(OILPAN)
301 if (n != document())
302 n->ref();
303 #endif
304 m_currentNodeStack.append(m_currentNode);
305 m_currentNode = n;
306 if (m_currentNodeStack.size() > maxXMLTreeDepth)
307 handleError(XMLErrors::ErrorTypeFatal, "Excessive node nesting.", textPosition());
308 }
309
popCurrentNode()310 void XMLDocumentParser::popCurrentNode()
311 {
312 if (!m_currentNode)
313 return;
314 ASSERT(m_currentNodeStack.size());
315 #if !ENABLE(OILPAN)
316 if (m_currentNode != document())
317 m_currentNode->deref();
318 #endif
319 m_currentNode = m_currentNodeStack.last();
320 m_currentNodeStack.removeLast();
321 }
322
clearCurrentNodeStack()323 void XMLDocumentParser::clearCurrentNodeStack()
324 {
325 #if !ENABLE(OILPAN)
326 if (m_currentNode && m_currentNode != document())
327 m_currentNode->deref();
328 #endif
329 m_currentNode = nullptr;
330 m_leafTextNode = nullptr;
331
332 if (m_currentNodeStack.size()) { // Aborted parsing.
333 #if !ENABLE(OILPAN)
334 for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
335 m_currentNodeStack[i]->deref();
336 if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
337 m_currentNodeStack[0]->deref();
338 #endif
339 m_currentNodeStack.clear();
340 }
341 }
342
insert(const SegmentedString &)343 void XMLDocumentParser::insert(const SegmentedString&)
344 {
345 ASSERT_NOT_REACHED();
346 }
347
append(PassRefPtr<StringImpl> inputSource)348 void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
349 {
350 SegmentedString source(inputSource);
351 if (m_sawXSLTransform || !m_sawFirstElement)
352 m_originalSourceForTransform.append(source);
353
354 if (isStopped() || m_sawXSLTransform)
355 return;
356
357 if (m_parserPaused) {
358 m_pendingSrc.append(source);
359 return;
360 }
361
362 // JavaScript can detach the parser. Make sure this is not released
363 // before the end of this method.
364 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
365
366 doWrite(source.toString());
367 }
368
handleError(XMLErrors::ErrorType type,const char * formattedMessage,TextPosition position)369 void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* formattedMessage, TextPosition position)
370 {
371 m_xmlErrors.handleError(type, formattedMessage, position);
372 if (type != XMLErrors::ErrorTypeWarning)
373 m_sawError = true;
374 if (type == XMLErrors::ErrorTypeFatal)
375 stopParsing();
376 }
377
enterText()378 void XMLDocumentParser::enterText()
379 {
380 ASSERT(m_bufferedText.size() == 0);
381 ASSERT(!m_leafTextNode);
382 m_leafTextNode = Text::create(m_currentNode->document(), "");
383 m_currentNode->parserAppendChild(m_leafTextNode.get());
384 }
385
exitText()386 void XMLDocumentParser::exitText()
387 {
388 if (isStopped())
389 return;
390
391 if (!m_leafTextNode)
392 return;
393
394 m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size()));
395 m_bufferedText.clear();
396 m_leafTextNode = nullptr;
397 }
398
detach()399 void XMLDocumentParser::detach()
400 {
401 clearCurrentNodeStack();
402 ScriptableDocumentParser::detach();
403 }
404
end()405 void XMLDocumentParser::end()
406 {
407 // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
408 // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
409 ASSERT(!m_parsingFragment);
410
411 doEnd();
412
413 // doEnd() call above can detach the parser and null out its document.
414 // In that case, we just bail out.
415 if (isDetached())
416 return;
417
418 // doEnd() could process a script tag, thus pausing parsing.
419 if (m_parserPaused)
420 return;
421
422 if (m_sawError) {
423 insertErrorMessageBlock();
424 } else {
425 exitText();
426 document()->styleResolverChanged();
427 }
428
429 if (isParsing())
430 prepareToStopParsing();
431 document()->setReadyState(Document::Interactive);
432 clearCurrentNodeStack();
433 document()->finishedParsing();
434 }
435
finish()436 void XMLDocumentParser::finish()
437 {
438 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
439 // makes sense to call any methods on DocumentParser once it's been stopped.
440 // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
441
442 if (m_parserPaused)
443 m_finishCalled = true;
444 else
445 end();
446 }
447
insertErrorMessageBlock()448 void XMLDocumentParser::insertErrorMessageBlock()
449 {
450 m_xmlErrors.insertErrorMessageBlock();
451 }
452
notifyFinished(Resource * unusedResource)453 void XMLDocumentParser::notifyFinished(Resource* unusedResource)
454 {
455 ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
456
457 ScriptSourceCode sourceCode(m_pendingScript.get());
458 bool errorOccurred = m_pendingScript->errorOccurred();
459 bool wasCanceled = m_pendingScript->wasCanceled();
460
461 m_pendingScript->removeClient(this);
462 m_pendingScript = 0;
463
464 RefPtrWillBeRawPtr<Element> e = m_scriptElement;
465 m_scriptElement = nullptr;
466
467 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(e.get());
468 ASSERT(scriptLoader);
469
470 // JavaScript can detach this parser, make sure it's kept alive even if
471 // detached.
472 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
473
474 if (errorOccurred) {
475 scriptLoader->dispatchErrorEvent();
476 } else if (!wasCanceled) {
477 scriptLoader->executeScript(sourceCode);
478 scriptLoader->dispatchLoadEvent();
479 }
480
481 m_scriptElement = nullptr;
482
483 if (!isDetached() && !m_requestingScript)
484 resumeParsing();
485 }
486
isWaitingForScripts() const487 bool XMLDocumentParser::isWaitingForScripts() const
488 {
489 return m_pendingScript;
490 }
491
pauseParsing()492 void XMLDocumentParser::pauseParsing()
493 {
494 if (!m_parsingFragment)
495 m_parserPaused = true;
496 }
497
parseDocumentFragment(const String & chunk,DocumentFragment * fragment,Element * contextElement,ParserContentPolicy parserContentPolicy)498 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
499 {
500 if (!chunk.length())
501 return true;
502
503 // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
504 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
505 // For now we have a hack for script/style innerHTML support:
506 if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag) || contextElement->hasLocalName(HTMLNames::styleTag))) {
507 fragment->parserAppendChild(fragment->document().createTextNode(chunk));
508 return true;
509 }
510
511 RefPtrWillBeRawPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
512 bool wellFormed = parser->appendFragmentSource(chunk);
513
514 // Do not call finish(). Current finish() and doEnd() implementations touch
515 // the main Document/loader and can cause crashes in the fragment case.
516
517 // Allows ~DocumentParser to assert it was detached before destruction.
518 parser->detach();
519 // appendFragmentSource()'s wellFormed is more permissive than wellFormed().
520 return wellFormed;
521 }
522
523 static int globalDescriptor = 0;
524 static ThreadIdentifier libxmlLoaderThread = 0;
525
matchFunc(const char *)526 static int matchFunc(const char*)
527 {
528 // Only match loads initiated due to uses of libxml2 from within
529 // XMLDocumentParser to avoid interfering with client applications that also
530 // use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353
531 return XMLDocumentParserScope::currentFetcher && currentThread() == libxmlLoaderThread;
532 }
533
setAttributes(Element * element,Vector<Attribute> & attributeVector,ParserContentPolicy parserContentPolicy)534 static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
535 {
536 if (!scriptingContentIsAllowed(parserContentPolicy))
537 element->stripScriptingAttributes(attributeVector);
538 element->parserSetAttributes(attributeVector);
539 }
540
switchEncoding(xmlParserCtxtPtr ctxt,bool is8Bit)541 static void switchEncoding(xmlParserCtxtPtr ctxt, bool is8Bit)
542 {
543 // Hack around libxml2's lack of encoding overide support by manually
544 // resetting the encoding to UTF-16 before every chunk. Otherwise libxml
545 // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks and
546 // switch encodings, causing the parse to fail.
547 if (is8Bit) {
548 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
549 return;
550 }
551
552 const UChar BOM = 0xFEFF;
553 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
554 xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
555 }
556
parseChunk(xmlParserCtxtPtr ctxt,const String & chunk)557 static void parseChunk(xmlParserCtxtPtr ctxt, const String& chunk)
558 {
559 bool is8Bit = chunk.is8Bit();
560 switchEncoding(ctxt, is8Bit);
561 if (is8Bit)
562 xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters8()), sizeof(LChar) * chunk.length(), 0);
563 else
564 xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters16()), sizeof(UChar) * chunk.length(), 0);
565 }
566
finishParsing(xmlParserCtxtPtr ctxt)567 static void finishParsing(xmlParserCtxtPtr ctxt)
568 {
569 xmlParseChunk(ctxt, 0, 0, 1);
570 }
571
572 #define xmlParseChunk #error "Use parseChunk instead to select the correct encoding."
573
isLibxmlDefaultCatalogFile(const String & urlString)574 static bool isLibxmlDefaultCatalogFile(const String& urlString)
575 {
576 // On non-Windows platforms libxml asks for this URL, the
577 // "XML_XML_DEFAULT_CATALOG", on initialization.
578 if (urlString == "file:///etc/xml/catalog")
579 return true;
580
581 // On Windows, libxml computes a URL relative to where its DLL resides.
582 if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
583 return true;
584 return false;
585 }
586
shouldAllowExternalLoad(const KURL & url)587 static bool shouldAllowExternalLoad(const KURL& url)
588 {
589 String urlString = url.string();
590
591 // This isn't really necessary now that initializeLibXMLIfNecessary
592 // disables catalog support in libxml, but keeping it for defense in depth.
593 if (isLibxmlDefaultCatalogFile(url))
594 return false;
595
596 // The most common DTD. There isn't much point in hammering www.w3c.org by
597 // requesting this URL for every XHTML document.
598 if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
599 return false;
600
601 // Similarly, there isn't much point in requesting the SVG DTD.
602 if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
603 return false;
604
605 // The libxml doesn't give us a lot of context for deciding whether to allow
606 // this request. In the worst case, this load could be for an external
607 // entity and the resulting document could simply read the retrieved
608 // content. If we had more context, we could potentially allow the parser to
609 // load a DTD. As things stand, we take the conservative route and allow
610 // same-origin requests only.
611 if (!XMLDocumentParserScope::currentFetcher->document()->securityOrigin()->canRequest(url)) {
612 XMLDocumentParserScope::currentFetcher->printAccessDeniedMessage(url);
613 return false;
614 }
615
616 return true;
617 }
618
openFunc(const char * uri)619 static void* openFunc(const char* uri)
620 {
621 ASSERT(XMLDocumentParserScope::currentFetcher);
622 ASSERT(currentThread() == libxmlLoaderThread);
623
624 KURL url(KURL(), uri);
625
626 if (!shouldAllowExternalLoad(url))
627 return &globalDescriptor;
628
629 KURL finalURL;
630 RefPtr<SharedBuffer> data;
631
632 {
633 ResourceFetcher* fetcher = XMLDocumentParserScope::currentFetcher;
634 XMLDocumentParserScope scope(0);
635 // FIXME: We should restore the original global error handler as well.
636
637 if (fetcher->frame()) {
638 FetchRequest request(ResourceRequest(url), FetchInitiatorTypeNames::xml, ResourceFetcher::defaultResourceOptions());
639 ResourcePtr<Resource> resource = fetcher->fetchSynchronously(request);
640 if (resource && !resource->errorOccurred()) {
641 data = resource->resourceBuffer();
642 finalURL = resource->response().url();
643 }
644 }
645 }
646
647 // We have to check the URL again after the load to catch redirects.
648 // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
649 if (!shouldAllowExternalLoad(finalURL))
650 return &globalDescriptor;
651
652 return new SharedBufferReader(data);
653 }
654
readFunc(void * context,char * buffer,int len)655 static int readFunc(void* context, char* buffer, int len)
656 {
657 // Do 0-byte reads in case of a null descriptor
658 if (context == &globalDescriptor)
659 return 0;
660
661 SharedBufferReader* data = static_cast<SharedBufferReader*>(context);
662 return data->readData(buffer, len);
663 }
664
writeFunc(void *,const char *,int)665 static int writeFunc(void*, const char*, int)
666 {
667 // Always just do 0-byte writes
668 return 0;
669 }
670
closeFunc(void * context)671 static int closeFunc(void* context)
672 {
673 if (context != &globalDescriptor) {
674 SharedBufferReader* data = static_cast<SharedBufferReader*>(context);
675 delete data;
676 }
677 return 0;
678 }
679
errorFunc(void *,const char *,...)680 static void errorFunc(void*, const char*, ...)
681 {
682 // FIXME: It would be nice to display error messages somewhere.
683 }
684
initializeLibXMLIfNecessary()685 static void initializeLibXMLIfNecessary()
686 {
687 static bool didInit = false;
688 if (didInit)
689 return;
690
691 // We don't want libxml to try and load catalogs.
692 // FIXME: It's not nice to set global settings in libxml, embedders of Blink
693 // could be trying to use libxml themselves.
694 xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
695 xmlInitParser();
696 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
697 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
698 libxmlLoaderThread = currentThread();
699 didInit = true;
700 }
701
702
createStringParser(xmlSAXHandlerPtr handlers,void * userData)703 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
704 {
705 initializeLibXMLIfNecessary();
706 xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
707 parser->_private = userData;
708 parser->replaceEntities = true;
709 return adoptRef(new XMLParserContext(parser));
710 }
711
712 // Chunk should be encoded in UTF-8
createMemoryParser(xmlSAXHandlerPtr handlers,void * userData,const CString & chunk)713 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
714 {
715 initializeLibXMLIfNecessary();
716
717 // appendFragmentSource() checks that the length doesn't overflow an int.
718 xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
719
720 if (!parser)
721 return nullptr;
722
723 // Copy the sax handler
724 memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
725
726 // Set parser options.
727 // XML_PARSE_NODICT: default dictionary option.
728 // XML_PARSE_NOENT: force entities substitutions.
729 xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
730
731 // Internal initialization
732 parser->sax2 = 1;
733 parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
734 parser->depth = 0;
735 parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
736 parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
737 parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
738 parser->_private = userData;
739
740 return adoptRef(new XMLParserContext(parser));
741 }
742
743 // --------------------------------
744
supportsXMLVersion(const String & version)745 bool XMLDocumentParser::supportsXMLVersion(const String& version)
746 {
747 return version == "1.0";
748 }
749
XMLDocumentParser(Document & document,FrameView * frameView)750 XMLDocumentParser::XMLDocumentParser(Document& document, FrameView* frameView)
751 : ScriptableDocumentParser(document)
752 , m_hasView(frameView)
753 , m_context(nullptr)
754 , m_currentNode(&document)
755 , m_isCurrentlyParsing8BitChunk(false)
756 , m_sawError(false)
757 , m_sawCSS(false)
758 , m_sawXSLTransform(false)
759 , m_sawFirstElement(false)
760 , m_isXHTMLDocument(false)
761 , m_parserPaused(false)
762 , m_requestingScript(false)
763 , m_finishCalled(false)
764 , m_xmlErrors(&document)
765 , m_pendingScript(0)
766 , m_scriptStartPosition(TextPosition::belowRangePosition())
767 , m_parsingFragment(false)
768 {
769 // This is XML being used as a document resource.
770 if (frameView && document.isXMLDocument())
771 UseCounter::count(document, UseCounter::XMLDocument);
772 }
773
XMLDocumentParser(DocumentFragment * fragment,Element * parentElement,ParserContentPolicy parserContentPolicy)774 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
775 : ScriptableDocumentParser(fragment->document(), parserContentPolicy)
776 , m_hasView(false)
777 , m_context(nullptr)
778 , m_currentNode(fragment)
779 , m_isCurrentlyParsing8BitChunk(false)
780 , m_sawError(false)
781 , m_sawCSS(false)
782 , m_sawXSLTransform(false)
783 , m_sawFirstElement(false)
784 , m_isXHTMLDocument(false)
785 , m_parserPaused(false)
786 , m_requestingScript(false)
787 , m_finishCalled(false)
788 , m_xmlErrors(&fragment->document())
789 , m_pendingScript(0)
790 , m_scriptStartPosition(TextPosition::belowRangePosition())
791 , m_parsingFragment(true)
792 {
793 #if !ENABLE(OILPAN)
794 fragment->ref();
795 #endif
796
797 // Add namespaces based on the parent node
798 WillBeHeapVector<RawPtrWillBeMember<Element> > elemStack;
799 while (parentElement) {
800 elemStack.append(parentElement);
801
802 ContainerNode* n = parentElement->parentNode();
803 if (!n || !n->isElementNode())
804 break;
805 parentElement = toElement(n);
806 }
807
808 if (elemStack.isEmpty())
809 return;
810
811 for (; !elemStack.isEmpty(); elemStack.removeLast()) {
812 Element* element = elemStack.last();
813 if (element->hasAttributes()) {
814 AttributeCollection attributes = element->attributes();
815 AttributeCollection::const_iterator end = attributes.end();
816 for (AttributeCollection::const_iterator it = attributes.begin(); it != end; ++it) {
817 if (it->localName() == xmlnsAtom)
818 m_defaultNamespaceURI = it->value();
819 else if (it->prefix() == xmlnsAtom)
820 m_prefixToNamespaceMap.set(it->localName(), it->value());
821 }
822 }
823 }
824
825 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
826 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
827 m_defaultNamespaceURI = parentElement->namespaceURI();
828 }
829
~XMLParserContext()830 XMLParserContext::~XMLParserContext()
831 {
832 if (m_context->myDoc)
833 xmlFreeDoc(m_context->myDoc);
834 xmlFreeParserCtxt(m_context);
835 }
836
~XMLDocumentParser()837 XMLDocumentParser::~XMLDocumentParser()
838 {
839 #if !ENABLE(OILPAN)
840 // The XMLDocumentParser will always be detached before being destroyed.
841 ASSERT(m_currentNodeStack.isEmpty());
842 ASSERT(!m_currentNode);
843 #endif
844
845 // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
846 if (m_pendingScript)
847 m_pendingScript->removeClient(this);
848 }
849
trace(Visitor * visitor)850 void XMLDocumentParser::trace(Visitor* visitor)
851 {
852 visitor->trace(m_currentNode);
853 #if ENABLE(OILPAN)
854 visitor->trace(m_currentNodeStack);
855 #endif
856 visitor->trace(m_leafTextNode);
857 visitor->trace(m_xmlErrors);
858 visitor->trace(m_scriptElement);
859 ScriptableDocumentParser::trace(visitor);
860 }
861
doWrite(const String & parseString)862 void XMLDocumentParser::doWrite(const String& parseString)
863 {
864 ASSERT(!isDetached());
865 if (!m_context)
866 initializeParserContext();
867
868 // Protect the libxml context from deletion during a callback
869 RefPtr<XMLParserContext> context = m_context;
870
871 // libXML throws an error if you try to switch the encoding for an empty
872 // string.
873 if (parseString.length()) {
874 // JavaScript may cause the parser to detach during parseChunk
875 // keep this alive until this function is done.
876 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
877
878 XMLDocumentParserScope scope(document()->fetcher());
879 TemporaryChange<bool> encodingScope(m_isCurrentlyParsing8BitChunk, parseString.is8Bit());
880 parseChunk(context->context(), parseString);
881
882 // JavaScript (which may be run under the parseChunk callstack) may
883 // cause the parser to be stopped or detached.
884 if (isStopped())
885 return;
886 }
887
888 // FIXME: Why is this here? And why is it after we process the passed
889 // source?
890 if (document()->sawDecodingError()) {
891 // If the decoder saw an error, report it as fatal (stops parsing)
892 TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
893 handleError(XMLErrors::ErrorTypeFatal, "Encoding error", position);
894 }
895 }
896
897 struct xmlSAX2Namespace {
898 const xmlChar* prefix;
899 const xmlChar* uri;
900 };
901
handleNamespaceAttributes(Vector<Attribute> & prefixedAttributes,const xmlChar ** libxmlNamespaces,int nbNamespaces,ExceptionState & exceptionState)902 static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nbNamespaces, ExceptionState& exceptionState)
903 {
904 xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
905 for (int i = 0; i < nbNamespaces; ++i) {
906 AtomicString namespaceQName = xmlnsAtom;
907 AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
908 if (namespaces[i].prefix)
909 namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
910
911 QualifiedName parsedName = anyName;
912 if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, exceptionState))
913 return;
914
915 prefixedAttributes.append(Attribute(parsedName, namespaceURI));
916 }
917 }
918
919 struct xmlSAX2Attributes {
920 const xmlChar* localname;
921 const xmlChar* prefix;
922 const xmlChar* uri;
923 const xmlChar* value;
924 const xmlChar* end;
925 };
926
handleElementAttributes(Vector<Attribute> & prefixedAttributes,const xmlChar ** libxmlAttributes,int nbAttributes,ExceptionState & exceptionState)927 static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nbAttributes, ExceptionState& exceptionState)
928 {
929 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
930 for (int i = 0; i < nbAttributes; ++i) {
931 int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
932 AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
933 String attrPrefix = toString(attributes[i].prefix);
934 AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
935 AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname);
936
937 QualifiedName parsedName = anyName;
938 if (!Element::parseAttributeName(parsedName, attrURI, attrQName, exceptionState))
939 return;
940
941 prefixedAttributes.append(Attribute(parsedName, attrValue));
942 }
943 }
944
startElementNs(const AtomicString & localName,const AtomicString & prefix,const AtomicString & uri,int nbNamespaces,const xmlChar ** libxmlNamespaces,int nbAttributes,int nbDefaulted,const xmlChar ** libxmlAttributes)945 void XMLDocumentParser::startElementNs(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, int nbNamespaces,
946 const xmlChar** libxmlNamespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
947 {
948 if (isStopped())
949 return;
950
951 if (m_parserPaused) {
952 m_pendingCallbacks.append(adoptPtr(new PendingStartElementNSCallback(localName, prefix, uri, nbNamespaces, libxmlNamespaces,
953 nbAttributes, nbDefaulted, libxmlAttributes)));
954 return;
955 }
956
957 exitText();
958
959 AtomicString adjustedURI = uri;
960 if (m_parsingFragment && adjustedURI.isNull()) {
961 if (!prefix.isNull())
962 adjustedURI = m_prefixToNamespaceMap.get(prefix);
963 else
964 adjustedURI = m_defaultNamespaceURI;
965 }
966
967 bool isFirstElement = !m_sawFirstElement;
968 m_sawFirstElement = true;
969
970 QualifiedName qName(prefix, localName, adjustedURI);
971 RefPtrWillBeRawPtr<Element> newElement = m_currentNode->document().createElement(qName, true);
972 if (!newElement) {
973 stopParsing();
974 return;
975 }
976
977 Vector<Attribute> prefixedAttributes;
978 TrackExceptionState exceptionState;
979 handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nbNamespaces, exceptionState);
980 if (exceptionState.hadException()) {
981 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
982 stopParsing();
983 return;
984 }
985
986 handleElementAttributes(prefixedAttributes, libxmlAttributes, nbAttributes, exceptionState);
987 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
988 if (exceptionState.hadException()) {
989 stopParsing();
990 return;
991 }
992
993 newElement->beginParsingChildren();
994
995 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(newElement.get());
996 if (scriptLoader)
997 m_scriptStartPosition = textPosition();
998
999 m_currentNode->parserAppendChild(newElement.get());
1000
1001 if (isHTMLTemplateElement(*newElement))
1002 pushCurrentNode(toHTMLTemplateElement(*newElement).content());
1003 else
1004 pushCurrentNode(newElement.get());
1005
1006 if (isHTMLHtmlElement(*newElement))
1007 toHTMLHtmlElement(*newElement).insertedByParser();
1008
1009 if (!m_parsingFragment && isFirstElement && document()->frame())
1010 document()->frame()->loader().dispatchDocumentElementAvailable();
1011 }
1012
endElementNs()1013 void XMLDocumentParser::endElementNs()
1014 {
1015 if (isStopped())
1016 return;
1017
1018 if (m_parserPaused) {
1019 m_pendingCallbacks.append(adoptPtr(new PendingEndElementNSCallback()));
1020 return;
1021 }
1022
1023 // JavaScript can detach the parser. Make sure this is not released before
1024 // the end of this method.
1025 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
1026
1027 exitText();
1028
1029 RefPtrWillBeRawPtr<ContainerNode> n = m_currentNode;
1030 if (m_currentNode->isElementNode())
1031 toElement(n.get())->finishParsingChildren();
1032
1033 if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptLoaderIfPossible(toElement(n))) {
1034 popCurrentNode();
1035 n->remove(IGNORE_EXCEPTION);
1036 return;
1037 }
1038
1039 if (!n->isElementNode() || !m_hasView) {
1040 popCurrentNode();
1041 return;
1042 }
1043
1044 Element* element = toElement(n);
1045
1046 // The element's parent may have already been removed from document.
1047 // Parsing continues in this case, but scripts aren't executed.
1048 if (!element->inDocument()) {
1049 popCurrentNode();
1050 return;
1051 }
1052
1053 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element);
1054 if (!scriptLoader) {
1055 popCurrentNode();
1056 return;
1057 }
1058
1059 // Don't load external scripts for standalone documents (for now).
1060 ASSERT(!m_pendingScript);
1061 m_requestingScript = true;
1062
1063 if (scriptLoader->prepareScript(m_scriptStartPosition, ScriptLoader::AllowLegacyTypeInTypeAttribute)) {
1064 // FIXME: Script execution should be shared between
1065 // the libxml2 and Qt XMLDocumentParser implementations.
1066
1067 if (scriptLoader->readyToBeParserExecuted()) {
1068 scriptLoader->executeScript(ScriptSourceCode(scriptLoader->scriptContent(), document()->url(), m_scriptStartPosition));
1069 } else if (scriptLoader->willBeParserExecuted()) {
1070 m_pendingScript = scriptLoader->resource();
1071 m_scriptElement = element;
1072 m_pendingScript->addClient(this);
1073
1074 // m_pendingScript will be 0 if script was already loaded and
1075 // addClient() executed it.
1076 if (m_pendingScript)
1077 pauseParsing();
1078 } else {
1079 m_scriptElement = nullptr;
1080 }
1081
1082 // JavaScript may have detached the parser
1083 if (isDetached())
1084 return;
1085 }
1086 m_requestingScript = false;
1087 popCurrentNode();
1088 }
1089
characters(const xmlChar * chars,int length)1090 void XMLDocumentParser::characters(const xmlChar* chars, int length)
1091 {
1092 if (isStopped())
1093 return;
1094
1095 if (m_parserPaused) {
1096 m_pendingCallbacks.append(adoptPtr(new PendingCharactersCallback(chars, length)));
1097 return;
1098 }
1099
1100 if (!m_leafTextNode)
1101 enterText();
1102 m_bufferedText.append(chars, length);
1103 }
1104
error(XMLErrors::ErrorType type,const char * message,va_list args)1105 void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
1106 {
1107 if (isStopped())
1108 return;
1109
1110 char formattedMessage[1024];
1111 vsnprintf(formattedMessage, sizeof(formattedMessage) - 1, message, args);
1112
1113 if (m_parserPaused) {
1114 m_pendingCallbacks.append(adoptPtr(new PendingErrorCallback(type, reinterpret_cast<const xmlChar*>(formattedMessage), lineNumber(), columnNumber())));
1115 return;
1116 }
1117
1118 handleError(type, formattedMessage, textPosition());
1119 }
1120
processingInstruction(const String & target,const String & data)1121 void XMLDocumentParser::processingInstruction(const String& target, const String& data)
1122 {
1123 if (isStopped())
1124 return;
1125
1126 if (m_parserPaused) {
1127 m_pendingCallbacks.append(adoptPtr(new PendingProcessingInstructionCallback(target, data)));
1128 return;
1129 }
1130
1131 exitText();
1132
1133 // ### handle exceptions
1134 TrackExceptionState exceptionState;
1135 RefPtrWillBeRawPtr<ProcessingInstruction> pi = m_currentNode->document().createProcessingInstruction(target, data, exceptionState);
1136 if (exceptionState.hadException())
1137 return;
1138
1139 pi->setCreatedByParser(true);
1140
1141 m_currentNode->parserAppendChild(pi.get());
1142
1143 pi->setCreatedByParser(false);
1144
1145 if (pi->isCSS())
1146 m_sawCSS = true;
1147
1148 if (!RuntimeEnabledFeatures::xsltEnabled())
1149 return;
1150
1151 m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
1152 if (m_sawXSLTransform && !document()->transformSourceDocument()) {
1153 // This behavior is very tricky. We call stopParsing() here because we
1154 // want to stop processing the document until we're ready to apply the
1155 // transform, but we actually still want to be fed decoded string pieces
1156 // to accumulate in m_originalSourceForTransform. So, we call
1157 // stopParsing() here and check isStopped() in element callbacks.
1158 // FIXME: This contradicts the contract of DocumentParser.
1159 stopParsing();
1160 }
1161 }
1162
cdataBlock(const String & text)1163 void XMLDocumentParser::cdataBlock(const String& text)
1164 {
1165 if (isStopped())
1166 return;
1167
1168 if (m_parserPaused) {
1169 m_pendingCallbacks.append(adoptPtr(new PendingCDATABlockCallback(text)));
1170 return;
1171 }
1172
1173 exitText();
1174
1175 m_currentNode->parserAppendChild(CDATASection::create(m_currentNode->document(), text));
1176 }
1177
comment(const String & text)1178 void XMLDocumentParser::comment(const String& text)
1179 {
1180 if (isStopped())
1181 return;
1182
1183 if (m_parserPaused) {
1184 m_pendingCallbacks.append(adoptPtr(new PendingCommentCallback(text)));
1185 return;
1186 }
1187
1188 exitText();
1189
1190 m_currentNode->parserAppendChild(Comment::create(m_currentNode->document(), text));
1191 }
1192
1193 enum StandaloneInfo {
1194 StandaloneUnspecified = -2,
1195 NoXMlDeclaration,
1196 StandaloneNo,
1197 StandaloneYes
1198 };
1199
startDocument(const String & version,const String & encoding,int standalone)1200 void XMLDocumentParser::startDocument(const String& version, const String& encoding, int standalone)
1201 {
1202 StandaloneInfo standaloneInfo = static_cast<StandaloneInfo>(standalone);
1203 if (standaloneInfo == NoXMlDeclaration) {
1204 document()->setHasXMLDeclaration(false);
1205 return;
1206 }
1207
1208 if (!version.isNull())
1209 document()->setXMLVersion(version, ASSERT_NO_EXCEPTION);
1210 if (standalone != StandaloneUnspecified)
1211 document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION);
1212 if (!encoding.isNull())
1213 document()->setXMLEncoding(encoding);
1214 document()->setHasXMLDeclaration(true);
1215 }
1216
endDocument()1217 void XMLDocumentParser::endDocument()
1218 {
1219 exitText();
1220 }
1221
internalSubset(const String & name,const String & externalID,const String & systemID)1222 void XMLDocumentParser::internalSubset(const String& name, const String& externalID, const String& systemID)
1223 {
1224 if (isStopped())
1225 return;
1226
1227 if (m_parserPaused) {
1228 m_pendingCallbacks.append(adoptPtr(new PendingInternalSubsetCallback(name, externalID, systemID)));
1229 return;
1230 }
1231
1232 if (document())
1233 document()->parserAppendChild(DocumentType::create(document(), name, externalID, systemID));
1234 }
1235
getParser(void * closure)1236 static inline XMLDocumentParser* getParser(void* closure)
1237 {
1238 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1239 return static_cast<XMLDocumentParser*>(ctxt->_private);
1240 }
1241
startElementNsHandler(void * closure,const xmlChar * localName,const xmlChar * prefix,const xmlChar * uri,int nbNamespaces,const xmlChar ** namespaces,int nbAttributes,int nbDefaulted,const xmlChar ** libxmlAttributes)1242 static void startElementNsHandler(void* closure, const xmlChar* localName, const xmlChar* prefix, const xmlChar* uri, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
1243 {
1244 getParser(closure)->startElementNs(toAtomicString(localName), toAtomicString(prefix), toAtomicString(uri), nbNamespaces, namespaces, nbAttributes, nbDefaulted, libxmlAttributes);
1245 }
1246
endElementNsHandler(void * closure,const xmlChar *,const xmlChar *,const xmlChar *)1247 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
1248 {
1249 getParser(closure)->endElementNs();
1250 }
1251
charactersHandler(void * closure,const xmlChar * chars,int length)1252 static void charactersHandler(void* closure, const xmlChar* chars, int length)
1253 {
1254 getParser(closure)->characters(chars, length);
1255 }
1256
processingInstructionHandler(void * closure,const xmlChar * target,const xmlChar * data)1257 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
1258 {
1259 getParser(closure)->processingInstruction(toString(target), toString(data));
1260 }
1261
cdataBlockHandler(void * closure,const xmlChar * text,int length)1262 static void cdataBlockHandler(void* closure, const xmlChar* text, int length)
1263 {
1264 getParser(closure)->cdataBlock(toString(text, length));
1265 }
1266
commentHandler(void * closure,const xmlChar * text)1267 static void commentHandler(void* closure, const xmlChar* text)
1268 {
1269 getParser(closure)->comment(toString(text));
1270 }
1271
1272 WTF_ATTRIBUTE_PRINTF(2, 3)
warningHandler(void * closure,const char * message,...)1273 static void warningHandler(void* closure, const char* message, ...)
1274 {
1275 va_list args;
1276 va_start(args, message);
1277 getParser(closure)->error(XMLErrors::ErrorTypeWarning, message, args);
1278 va_end(args);
1279 }
1280
1281 WTF_ATTRIBUTE_PRINTF(2, 3)
fatalErrorHandler(void * closure,const char * message,...)1282 static void fatalErrorHandler(void* closure, const char* message, ...)
1283 {
1284 va_list args;
1285 va_start(args, message);
1286 getParser(closure)->error(XMLErrors::ErrorTypeFatal, message, args);
1287 va_end(args);
1288 }
1289
1290 WTF_ATTRIBUTE_PRINTF(2, 3)
normalErrorHandler(void * closure,const char * message,...)1291 static void normalErrorHandler(void* closure, const char* message, ...)
1292 {
1293 va_list args;
1294 va_start(args, message);
1295 getParser(closure)->error(XMLErrors::ErrorTypeNonFatal, message, args);
1296 va_end(args);
1297 }
1298
1299 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is a hack
1300 // to avoid malloc/free. Using a global variable like this could cause trouble
1301 // if libxml implementation details were to change
1302 static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
1303
sharedXHTMLEntity()1304 static xmlEntityPtr sharedXHTMLEntity()
1305 {
1306 static xmlEntity entity;
1307 if (!entity.type) {
1308 entity.type = XML_ENTITY_DECL;
1309 entity.orig = sharedXHTMLEntityResult;
1310 entity.content = sharedXHTMLEntityResult;
1311 entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
1312 }
1313 return &entity;
1314 }
1315
convertUTF16EntityToUTF8(const UChar * utf16Entity,size_t numberOfCodeUnits,char * target,size_t targetSize)1316 static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
1317 {
1318 const char* originalTarget = target;
1319 WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity,
1320 utf16Entity + numberOfCodeUnits, &target, target + targetSize);
1321 if (conversionResult != WTF::Unicode::conversionOK)
1322 return 0;
1323
1324 // Even though we must pass the length, libxml expects the entity string to be null terminated.
1325 ASSERT(target > originalTarget + 1);
1326 *target = '\0';
1327 return target - originalTarget;
1328 }
1329
getXHTMLEntity(const xmlChar * name)1330 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
1331 {
1332 UChar utf16DecodedEntity[4];
1333 size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
1334 if (!numberOfCodeUnits)
1335 return 0;
1336
1337 ASSERT(numberOfCodeUnits <= 4);
1338 size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
1339 reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
1340 if (!entityLengthInUTF8)
1341 return 0;
1342
1343 xmlEntityPtr entity = sharedXHTMLEntity();
1344 entity->length = entityLengthInUTF8;
1345 entity->name = name;
1346 return entity;
1347 }
1348
getEntityHandler(void * closure,const xmlChar * name)1349 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
1350 {
1351 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1352 xmlEntityPtr ent = xmlGetPredefinedEntity(name);
1353 if (ent) {
1354 ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
1355 return ent;
1356 }
1357
1358 ent = xmlGetDocEntity(ctxt->myDoc, name);
1359 if (!ent && getParser(closure)->isXHTMLDocument()) {
1360 ent = getXHTMLEntity(name);
1361 if (ent)
1362 ent->etype = XML_INTERNAL_GENERAL_ENTITY;
1363 }
1364
1365 return ent;
1366 }
1367
startDocumentHandler(void * closure)1368 static void startDocumentHandler(void* closure)
1369 {
1370 xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
1371 XMLDocumentParser* parser = getParser(closure);
1372 switchEncoding(ctxt, parser->isCurrentlyParsing8BitChunk());
1373 parser->startDocument(toString(ctxt->version), toString(ctxt->encoding), ctxt->standalone);
1374 xmlSAX2StartDocument(closure);
1375 }
1376
endDocumentHandler(void * closure)1377 static void endDocumentHandler(void* closure)
1378 {
1379 getParser(closure)->endDocument();
1380 xmlSAX2EndDocument(closure);
1381 }
1382
internalSubsetHandler(void * closure,const xmlChar * name,const xmlChar * externalID,const xmlChar * systemID)1383 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1384 {
1385 getParser(closure)->internalSubset(toString(name), toString(externalID), toString(systemID));
1386 xmlSAX2InternalSubset(closure, name, externalID, systemID);
1387 }
1388
externalSubsetHandler(void * closure,const xmlChar *,const xmlChar * externalId,const xmlChar *)1389 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
1390 {
1391 String extId = toString(externalId);
1392 if (extId == "-//W3C//DTD XHTML 1.0 Transitional//EN"
1393 || extId == "-//W3C//DTD XHTML 1.1//EN"
1394 || extId == "-//W3C//DTD XHTML 1.0 Strict//EN"
1395 || extId == "-//W3C//DTD XHTML 1.0 Frameset//EN"
1396 || extId == "-//W3C//DTD XHTML Basic 1.0//EN"
1397 || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"
1398 || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
1399 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
1400 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN"
1401 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN") {
1402 // Controls if we replace entities or not.
1403 getParser(closure)->setIsXHTMLDocument(true);
1404 }
1405 }
1406
ignorableWhitespaceHandler(void *,const xmlChar *,int)1407 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
1408 {
1409 // Nothing to do, but we need this to work around a crasher.
1410 // http://bugzilla.gnome.org/show_bug.cgi?id=172255
1411 // http://bugs.webkit.org/show_bug.cgi?id=5792
1412 }
1413
initializeParserContext(const CString & chunk)1414 void XMLDocumentParser::initializeParserContext(const CString& chunk)
1415 {
1416 xmlSAXHandler sax;
1417 memset(&sax, 0, sizeof(sax));
1418
1419 sax.error = normalErrorHandler;
1420 sax.fatalError = fatalErrorHandler;
1421 sax.characters = charactersHandler;
1422 sax.processingInstruction = processingInstructionHandler;
1423 sax.cdataBlock = cdataBlockHandler;
1424 sax.comment = commentHandler;
1425 sax.warning = warningHandler;
1426 sax.startElementNs = startElementNsHandler;
1427 sax.endElementNs = endElementNsHandler;
1428 sax.getEntity = getEntityHandler;
1429 sax.startDocument = startDocumentHandler;
1430 sax.endDocument = endDocumentHandler;
1431 sax.internalSubset = internalSubsetHandler;
1432 sax.externalSubset = externalSubsetHandler;
1433 sax.ignorableWhitespace = ignorableWhitespaceHandler;
1434 sax.entityDecl = xmlSAX2EntityDecl;
1435 sax.initialized = XML_SAX2_MAGIC;
1436 DocumentParser::startParsing();
1437 m_sawError = false;
1438 m_sawCSS = false;
1439 m_sawXSLTransform = false;
1440 m_sawFirstElement = false;
1441
1442 XMLDocumentParserScope scope(document()->fetcher());
1443 if (m_parsingFragment) {
1444 m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
1445 } else {
1446 ASSERT(!chunk.data());
1447 m_context = XMLParserContext::createStringParser(&sax, this);
1448 }
1449 }
1450
doEnd()1451 void XMLDocumentParser::doEnd()
1452 {
1453 if (!isStopped()) {
1454 if (m_context) {
1455 // Tell libxml we're done.
1456 {
1457 XMLDocumentParserScope scope(document()->fetcher());
1458 finishParsing(context());
1459 }
1460
1461 m_context = nullptr;
1462 }
1463 }
1464
1465 bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document());
1466 if (xmlViewerMode) {
1467 XMLTreeViewer xmlTreeViewer(document());
1468 xmlTreeViewer.transformDocumentToTreeView();
1469 } else if (m_sawXSLTransform) {
1470 xmlDocPtr doc = xmlDocPtrForString(document()->fetcher(), m_originalSourceForTransform.toString(), document()->url().string());
1471 document()->setTransformSource(adoptPtr(new TransformSource(doc)));
1472 // Make the document think it's done, so it will apply XSL stylesheets.
1473 document()->setParsing(false);
1474 document()->styleResolverChanged();
1475
1476 // styleResolverChanged() call can detach the parser and null out its
1477 // document. In that case, we just bail out.
1478 if (isDetached())
1479 return;
1480
1481 document()->setParsing(true);
1482 DocumentParser::stopParsing();
1483 }
1484 }
1485
xmlDocPtrForString(ResourceFetcher * fetcher,const String & source,const String & url)1486 xmlDocPtr xmlDocPtrForString(ResourceFetcher* fetcher, const String& source, const String& url)
1487 {
1488 if (source.isEmpty())
1489 return 0;
1490 // Parse in a single chunk into an xmlDocPtr
1491 // FIXME: Hook up error handlers so that a failure to parse the main
1492 // document results in good error messages.
1493 XMLDocumentParserScope scope(fetcher, errorFunc, 0);
1494 XMLParserInput input(source);
1495 return xmlReadMemory(input.data(), input.size(), url.latin1().data(), input.encoding(), XSLT_PARSE_OPTIONS);
1496 }
1497
lineNumber() const1498 OrdinalNumber XMLDocumentParser::lineNumber() const
1499 {
1500 return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1);
1501 }
1502
columnNumber() const1503 OrdinalNumber XMLDocumentParser::columnNumber() const
1504 {
1505 return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1);
1506 }
1507
textPosition() const1508 TextPosition XMLDocumentParser::textPosition() const
1509 {
1510 xmlParserCtxtPtr context = this->context();
1511 if (!context)
1512 return TextPosition::minimumPosition();
1513 return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line), OrdinalNumber::fromOneBasedInt(context->input->col));
1514 }
1515
stopParsing()1516 void XMLDocumentParser::stopParsing()
1517 {
1518 DocumentParser::stopParsing();
1519 if (context())
1520 xmlStopParser(context());
1521 }
1522
resumeParsing()1523 void XMLDocumentParser::resumeParsing()
1524 {
1525 ASSERT(!isDetached());
1526 ASSERT(m_parserPaused);
1527
1528 m_parserPaused = false;
1529
1530 // First, execute any pending callbacks
1531 while (!m_pendingCallbacks.isEmpty()) {
1532 OwnPtr<PendingCallback> callback = m_pendingCallbacks.takeFirst();
1533 callback->call(this);
1534
1535 // A callback paused the parser
1536 if (m_parserPaused)
1537 return;
1538 }
1539
1540 // Then, write any pending data
1541 SegmentedString rest = m_pendingSrc;
1542 m_pendingSrc.clear();
1543 // There is normally only one string left, so toString() shouldn't copy.
1544 // In any case, the XML parser runs on the main thread and it's OK if
1545 // the passed string has more than one reference.
1546 append(rest.toString().impl());
1547
1548 // Finally, if finish() has been called and write() didn't result
1549 // in any further callbacks being queued, call end()
1550 if (m_finishCalled && m_pendingCallbacks.isEmpty())
1551 end();
1552 }
1553
appendFragmentSource(const String & chunk)1554 bool XMLDocumentParser::appendFragmentSource(const String& chunk)
1555 {
1556 ASSERT(!m_context);
1557 ASSERT(m_parsingFragment);
1558
1559 CString chunkAsUtf8 = chunk.utf8();
1560
1561 // libxml2 takes an int for a length, and therefore can't handle XML chunks
1562 // larger than 2 GiB.
1563 if (chunkAsUtf8.length() > INT_MAX)
1564 return false;
1565
1566 initializeParserContext(chunkAsUtf8);
1567 xmlParseContent(context());
1568 endDocument(); // Close any open text nodes.
1569
1570 // FIXME: If this code is actually needed, it should probably move to
1571 // finish()
1572 // XMLDocumentParserQt has a similar check (m_stream.error() ==
1573 // QXmlStreamReader::PrematureEndOfDocumentError) in doEnd(). Check if all
1574 // the chunk has been processed.
1575 long bytesProcessed = xmlByteConsumed(context());
1576 if (bytesProcessed == -1 || static_cast<unsigned long>(bytesProcessed) != chunkAsUtf8.length()) {
1577 // FIXME: I don't believe we can hit this case without also having seen
1578 // an error or a null byte. If we hit this ASSERT, we've found a test
1579 // case which demonstrates the need for this code.
1580 ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
1581 return false;
1582 }
1583
1584 // No error if the chunk is well formed or it is not but we have no error.
1585 return context()->wellFormed || !xmlCtxtGetLastError(context());
1586 }
1587
1588 // --------------------------------
1589
1590 struct AttributeParseState {
1591 HashMap<String, String> attributes;
1592 bool gotAttributes;
1593 };
1594
attributesStartElementNsHandler(void * closure,const xmlChar * xmlLocalName,const xmlChar *,const xmlChar *,int,const xmlChar **,int nbAttributes,int,const xmlChar ** libxmlAttributes)1595 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
1596 const xmlChar* /*xmlURI*/, int /*nbNamespaces*/, const xmlChar** /*namespaces*/,
1597 int nbAttributes, int /*nbDefaulted*/, const xmlChar** libxmlAttributes)
1598 {
1599 if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
1600 return;
1601
1602 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1603 AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
1604
1605 state->gotAttributes = true;
1606
1607 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
1608 for (int i = 0; i < nbAttributes; ++i) {
1609 String attrLocalName = toString(attributes[i].localname);
1610 int valueLength = (int) (attributes[i].end - attributes[i].value);
1611 String attrValue = toString(attributes[i].value, valueLength);
1612 String attrPrefix = toString(attributes[i].prefix);
1613 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
1614
1615 state->attributes.set(attrQName, attrValue);
1616 }
1617 }
1618
parseAttributes(const String & string,bool & attrsOK)1619 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1620 {
1621 AttributeParseState state;
1622 state.gotAttributes = false;
1623
1624 xmlSAXHandler sax;
1625 memset(&sax, 0, sizeof(sax));
1626 sax.startElementNs = attributesStartElementNsHandler;
1627 sax.initialized = XML_SAX2_MAGIC;
1628 RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
1629 String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
1630 parseChunk(parser->context(), parseString);
1631 finishParsing(parser->context());
1632 attrsOK = state.gotAttributes;
1633 return state.attributes;
1634 }
1635
1636 } // namespace WebCore
1637