• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3  * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4  * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
5  * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
6  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7  * Copyright (C) 2008 Holger Hans Peter Freyther
8  * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Library General Public
12  * License as published by the Free Software Foundation; either
13  * version 2 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Library General Public License for more details.
19  *
20  * You should have received a copy of the GNU Library General Public License
21  * along with this library; see the file COPYING.LIB.  If not, write to
22  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23  * Boston, MA 02110-1301, USA.
24  */
25 
26 #include "config.h"
27 #include "core/xml/parser/XMLDocumentParser.h"
28 
29 #include "bindings/v8/ExceptionState.h"
30 #include "bindings/v8/ExceptionStatePlaceholder.h"
31 #include "bindings/v8/ScriptController.h"
32 #include "bindings/v8/ScriptSourceCode.h"
33 #include "core/FetchInitiatorTypeNames.h"
34 #include "core/HTMLNames.h"
35 #include "core/XMLNSNames.h"
36 #include "core/dom/CDATASection.h"
37 #include "core/dom/Comment.h"
38 #include "core/dom/Document.h"
39 #include "core/dom/DocumentFragment.h"
40 #include "core/dom/DocumentType.h"
41 #include "core/dom/ProcessingInstruction.h"
42 #include "core/dom/ScriptLoader.h"
43 #include "core/dom/TransformSource.h"
44 #include "core/fetch/ResourceFetcher.h"
45 #include "core/fetch/ScriptResource.h"
46 #include "core/frame/LocalFrame.h"
47 #include "core/frame/UseCounter.h"
48 #include "core/html/HTMLHtmlElement.h"
49 #include "core/html/HTMLTemplateElement.h"
50 #include "core/html/parser/HTMLEntityParser.h"
51 #include "core/html/parser/TextResourceDecoder.h"
52 #include "core/loader/FrameLoader.h"
53 #include "core/loader/ImageLoader.h"
54 #include "core/svg/graphics/SVGImage.h"
55 #include "core/xml/XMLTreeViewer.h"
56 #include "core/xml/parser/SharedBufferReader.h"
57 #include "core/xml/parser/XMLDocumentParserScope.h"
58 #include "core/xml/parser/XMLParserInput.h"
59 #include "platform/RuntimeEnabledFeatures.h"
60 #include "platform/SharedBuffer.h"
61 #include "platform/network/ResourceError.h"
62 #include "platform/network/ResourceRequest.h"
63 #include "platform/network/ResourceResponse.h"
64 #include "platform/weborigin/SecurityOrigin.h"
65 #include "wtf/StringExtras.h"
66 #include "wtf/TemporaryChange.h"
67 #include "wtf/Threading.h"
68 #include "wtf/Vector.h"
69 #include "wtf/unicode/UTF8.h"
70 #include <libxml/catalog.h>
71 #include <libxml/parser.h>
72 #include <libxml/parserInternals.h>
73 #include <libxslt/xslt.h>
74 
75 namespace WebCore {
76 
77 using namespace HTMLNames;
78 
79 // FIXME: HTMLConstructionSite has a limit of 512, should these match?
80 static const unsigned maxXMLTreeDepth = 5000;
81 
toString(const xmlChar * string,size_t length)82 static inline String toString(const xmlChar* string, size_t length)
83 {
84     return String::fromUTF8(reinterpret_cast<const char*>(string), length);
85 }
86 
toString(const xmlChar * string)87 static inline String toString(const xmlChar* string)
88 {
89     return String::fromUTF8(reinterpret_cast<const char*>(string));
90 }
91 
toAtomicString(const xmlChar * string,size_t length)92 static inline AtomicString toAtomicString(const xmlChar* string, size_t length)
93 {
94     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), length);
95 }
96 
toAtomicString(const xmlChar * string)97 static inline AtomicString toAtomicString(const xmlChar* string)
98 {
99     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
100 }
101 
hasNoStyleInformation(Document * document)102 static inline bool hasNoStyleInformation(Document* document)
103 {
104     if (document->sawElementsInKnownNamespaces() || document->transformSourceDocument())
105         return false;
106 
107     if (!document->frame() || !document->frame()->page())
108         return false;
109 
110     if (document->frame()->tree().parent())
111         return false; // This document is not in a top frame
112 
113     if (SVGImage::isInSVGImage(document))
114         return false;
115 
116     return true;
117 }
118 
119 class PendingStartElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
120 public:
PendingStartElementNSCallback(const AtomicString & localName,const AtomicString & prefix,const AtomicString & uri,int namespaceCount,const xmlChar ** namespaces,int attributeCount,int defaultedCount,const xmlChar ** attributes)121     PendingStartElementNSCallback(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri,
122         int namespaceCount, const xmlChar** namespaces, int attributeCount, int defaultedCount, const xmlChar** attributes)
123         : m_localName(localName)
124         , m_prefix(prefix)
125         , m_uri(uri)
126         , m_namespaceCount(namespaceCount)
127         , m_attributeCount(attributeCount)
128         , m_defaultedCount(defaultedCount)
129     {
130         m_namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * namespaceCount * 2));
131         for (int i = 0; i < namespaceCount * 2 ; ++i)
132             m_namespaces[i] = xmlStrdup(namespaces[i]);
133         m_attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * attributeCount * 5));
134         for (int i = 0; i < attributeCount; ++i) {
135             // Each attribute has 5 elements in the array:
136             // name, prefix, uri, value and an end pointer.
137             for (int j = 0; j < 3; ++j)
138                 m_attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
139             int length = attributes[i * 5 + 4] - attributes[i * 5 + 3];
140             m_attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], length);
141             m_attributes[i * 5 + 4] = m_attributes[i * 5 + 3] + length;
142         }
143     }
144 
~PendingStartElementNSCallback()145     virtual ~PendingStartElementNSCallback()
146     {
147         for (int i = 0; i < m_namespaceCount * 2; ++i)
148             xmlFree(m_namespaces[i]);
149         xmlFree(m_namespaces);
150         for (int i = 0; i < m_attributeCount; ++i)
151             for (int j = 0; j < 4; ++j)
152                 xmlFree(m_attributes[i * 5 + j]);
153         xmlFree(m_attributes);
154     }
155 
call(XMLDocumentParser * parser)156     virtual void call(XMLDocumentParser* parser) OVERRIDE
157     {
158         parser->startElementNs(m_localName, m_prefix, m_uri,
159             m_namespaceCount, const_cast<const xmlChar**>(m_namespaces),
160             m_attributeCount, m_defaultedCount, const_cast<const xmlChar**>(m_attributes));
161     }
162 
163 private:
164     AtomicString m_localName;
165     AtomicString m_prefix;
166     AtomicString m_uri;
167     int m_namespaceCount;
168     xmlChar** m_namespaces;
169     int m_attributeCount;
170     int m_defaultedCount;
171     xmlChar** m_attributes;
172 };
173 
174 class PendingEndElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
175 public:
call(XMLDocumentParser * parser)176     virtual void call(XMLDocumentParser* parser) OVERRIDE
177     {
178         parser->endElementNs();
179     }
180 };
181 
182 class PendingCharactersCallback FINAL : public XMLDocumentParser::PendingCallback {
183 public:
PendingCharactersCallback(const xmlChar * chars,int length)184     PendingCharactersCallback(const xmlChar* chars, int length)
185         : m_chars(xmlStrndup(chars, length))
186         , m_length(length)
187     {
188     }
189 
~PendingCharactersCallback()190     virtual ~PendingCharactersCallback()
191     {
192         xmlFree(m_chars);
193     }
194 
call(XMLDocumentParser * parser)195     virtual void call(XMLDocumentParser* parser) OVERRIDE
196     {
197         parser->characters(m_chars, m_length);
198     }
199 
200 private:
201     xmlChar* m_chars;
202     int m_length;
203 };
204 
205 class PendingProcessingInstructionCallback FINAL : public XMLDocumentParser::PendingCallback {
206 public:
PendingProcessingInstructionCallback(const String & target,const String & data)207     PendingProcessingInstructionCallback(const String& target, const String& data)
208         : m_target(target)
209         , m_data(data)
210     {
211     }
212 
call(XMLDocumentParser * parser)213     virtual void call(XMLDocumentParser* parser) OVERRIDE
214     {
215         parser->processingInstruction(m_target, m_data);
216     }
217 
218 private:
219     String m_target;
220     String m_data;
221 };
222 
223 class PendingCDATABlockCallback FINAL : public XMLDocumentParser::PendingCallback {
224 public:
PendingCDATABlockCallback(const String & text)225     explicit PendingCDATABlockCallback(const String& text) : m_text(text) { }
226 
call(XMLDocumentParser * parser)227     virtual void call(XMLDocumentParser* parser) OVERRIDE
228     {
229         parser->cdataBlock(m_text);
230     }
231 
232 private:
233     String m_text;
234 };
235 
236 class PendingCommentCallback FINAL : public XMLDocumentParser::PendingCallback {
237 public:
PendingCommentCallback(const String & text)238     explicit PendingCommentCallback(const String& text) : m_text(text) { }
239 
call(XMLDocumentParser * parser)240     virtual void call(XMLDocumentParser* parser) OVERRIDE
241     {
242         parser->comment(m_text);
243     }
244 
245 private:
246     String m_text;
247 };
248 
249 class PendingInternalSubsetCallback FINAL : public XMLDocumentParser::PendingCallback {
250 public:
PendingInternalSubsetCallback(const String & name,const String & externalID,const String & systemID)251     PendingInternalSubsetCallback(const String& name, const String& externalID, const String& systemID)
252         : m_name(name)
253         , m_externalID(externalID)
254         , m_systemID(systemID)
255     {
256     }
257 
call(XMLDocumentParser * parser)258     virtual void call(XMLDocumentParser* parser) OVERRIDE
259     {
260         parser->internalSubset(m_name, m_externalID, m_systemID);
261     }
262 
263 private:
264     String m_name;
265     String m_externalID;
266     String m_systemID;
267 };
268 
269 class PendingErrorCallback FINAL : public XMLDocumentParser::PendingCallback {
270 public:
PendingErrorCallback(XMLErrors::ErrorType type,const xmlChar * message,OrdinalNumber lineNumber,OrdinalNumber columnNumber)271     PendingErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
272         : m_type(type)
273         , m_message(xmlStrdup(message))
274         , m_lineNumber(lineNumber)
275         , m_columnNumber(columnNumber)
276     {
277     }
278 
~PendingErrorCallback()279     virtual ~PendingErrorCallback()
280     {
281         xmlFree(m_message);
282     }
283 
call(XMLDocumentParser * parser)284     virtual void call(XMLDocumentParser* parser) OVERRIDE
285     {
286         parser->handleError(m_type, reinterpret_cast<char*>(m_message), TextPosition(m_lineNumber, m_columnNumber));
287     }
288 
289 private:
290     XMLErrors::ErrorType m_type;
291     xmlChar* m_message;
292     OrdinalNumber m_lineNumber;
293     OrdinalNumber m_columnNumber;
294 };
295 
pushCurrentNode(ContainerNode * n)296 void XMLDocumentParser::pushCurrentNode(ContainerNode* n)
297 {
298     ASSERT(n);
299     ASSERT(m_currentNode);
300 #if !ENABLE(OILPAN)
301     if (n != document())
302         n->ref();
303 #endif
304     m_currentNodeStack.append(m_currentNode);
305     m_currentNode = n;
306     if (m_currentNodeStack.size() > maxXMLTreeDepth)
307         handleError(XMLErrors::ErrorTypeFatal, "Excessive node nesting.", textPosition());
308 }
309 
popCurrentNode()310 void XMLDocumentParser::popCurrentNode()
311 {
312     if (!m_currentNode)
313         return;
314     ASSERT(m_currentNodeStack.size());
315 #if !ENABLE(OILPAN)
316     if (m_currentNode != document())
317         m_currentNode->deref();
318 #endif
319     m_currentNode = m_currentNodeStack.last();
320     m_currentNodeStack.removeLast();
321 }
322 
clearCurrentNodeStack()323 void XMLDocumentParser::clearCurrentNodeStack()
324 {
325 #if !ENABLE(OILPAN)
326     if (m_currentNode && m_currentNode != document())
327         m_currentNode->deref();
328 #endif
329     m_currentNode = nullptr;
330     m_leafTextNode = nullptr;
331 
332     if (m_currentNodeStack.size()) { // Aborted parsing.
333 #if !ENABLE(OILPAN)
334         for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
335             m_currentNodeStack[i]->deref();
336         if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
337             m_currentNodeStack[0]->deref();
338 #endif
339         m_currentNodeStack.clear();
340     }
341 }
342 
insert(const SegmentedString &)343 void XMLDocumentParser::insert(const SegmentedString&)
344 {
345     ASSERT_NOT_REACHED();
346 }
347 
append(PassRefPtr<StringImpl> inputSource)348 void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
349 {
350     SegmentedString source(inputSource);
351     if (m_sawXSLTransform || !m_sawFirstElement)
352         m_originalSourceForTransform.append(source);
353 
354     if (isStopped() || m_sawXSLTransform)
355         return;
356 
357     if (m_parserPaused) {
358         m_pendingSrc.append(source);
359         return;
360     }
361 
362     // JavaScript can detach the parser. Make sure this is not released
363     // before the end of this method.
364     RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
365 
366     doWrite(source.toString());
367 }
368 
handleError(XMLErrors::ErrorType type,const char * formattedMessage,TextPosition position)369 void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* formattedMessage, TextPosition position)
370 {
371     m_xmlErrors.handleError(type, formattedMessage, position);
372     if (type != XMLErrors::ErrorTypeWarning)
373         m_sawError = true;
374     if (type == XMLErrors::ErrorTypeFatal)
375         stopParsing();
376 }
377 
enterText()378 void XMLDocumentParser::enterText()
379 {
380     ASSERT(m_bufferedText.size() == 0);
381     ASSERT(!m_leafTextNode);
382     m_leafTextNode = Text::create(m_currentNode->document(), "");
383     m_currentNode->parserAppendChild(m_leafTextNode.get());
384 }
385 
exitText()386 void XMLDocumentParser::exitText()
387 {
388     if (isStopped())
389         return;
390 
391     if (!m_leafTextNode)
392         return;
393 
394     m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size()));
395     m_bufferedText.clear();
396     m_leafTextNode = nullptr;
397 }
398 
detach()399 void XMLDocumentParser::detach()
400 {
401     clearCurrentNodeStack();
402     ScriptableDocumentParser::detach();
403 }
404 
end()405 void XMLDocumentParser::end()
406 {
407     // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
408     // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
409     ASSERT(!m_parsingFragment);
410 
411     doEnd();
412 
413     // doEnd() call above can detach the parser and null out its document.
414     // In that case, we just bail out.
415     if (isDetached())
416         return;
417 
418     // doEnd() could process a script tag, thus pausing parsing.
419     if (m_parserPaused)
420         return;
421 
422     if (m_sawError) {
423         insertErrorMessageBlock();
424     } else {
425         exitText();
426         document()->styleResolverChanged();
427     }
428 
429     if (isParsing())
430         prepareToStopParsing();
431     document()->setReadyState(Document::Interactive);
432     clearCurrentNodeStack();
433     document()->finishedParsing();
434 }
435 
finish()436 void XMLDocumentParser::finish()
437 {
438     // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
439     // makes sense to call any methods on DocumentParser once it's been stopped.
440     // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
441 
442     if (m_parserPaused)
443         m_finishCalled = true;
444     else
445         end();
446 }
447 
insertErrorMessageBlock()448 void XMLDocumentParser::insertErrorMessageBlock()
449 {
450     m_xmlErrors.insertErrorMessageBlock();
451 }
452 
notifyFinished(Resource * unusedResource)453 void XMLDocumentParser::notifyFinished(Resource* unusedResource)
454 {
455     ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
456 
457     ScriptSourceCode sourceCode(m_pendingScript.get());
458     bool errorOccurred = m_pendingScript->errorOccurred();
459     bool wasCanceled = m_pendingScript->wasCanceled();
460 
461     m_pendingScript->removeClient(this);
462     m_pendingScript = 0;
463 
464     RefPtrWillBeRawPtr<Element> e = m_scriptElement;
465     m_scriptElement = nullptr;
466 
467     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(e.get());
468     ASSERT(scriptLoader);
469 
470     // JavaScript can detach this parser, make sure it's kept alive even if
471     // detached.
472     RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
473 
474     if (errorOccurred) {
475         scriptLoader->dispatchErrorEvent();
476     } else if (!wasCanceled) {
477         scriptLoader->executeScript(sourceCode);
478         scriptLoader->dispatchLoadEvent();
479     }
480 
481     m_scriptElement = nullptr;
482 
483     if (!isDetached() && !m_requestingScript)
484         resumeParsing();
485 }
486 
isWaitingForScripts() const487 bool XMLDocumentParser::isWaitingForScripts() const
488 {
489     return m_pendingScript;
490 }
491 
pauseParsing()492 void XMLDocumentParser::pauseParsing()
493 {
494     if (!m_parsingFragment)
495         m_parserPaused = true;
496 }
497 
parseDocumentFragment(const String & chunk,DocumentFragment * fragment,Element * contextElement,ParserContentPolicy parserContentPolicy)498 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
499 {
500     if (!chunk.length())
501         return true;
502 
503     // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
504     // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
505     // For now we have a hack for script/style innerHTML support:
506     if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag) || contextElement->hasLocalName(HTMLNames::styleTag))) {
507         fragment->parserAppendChild(fragment->document().createTextNode(chunk));
508         return true;
509     }
510 
511     RefPtrWillBeRawPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
512     bool wellFormed = parser->appendFragmentSource(chunk);
513 
514     // Do not call finish(). Current finish() and doEnd() implementations touch
515     // the main Document/loader and can cause crashes in the fragment case.
516 
517     // Allows ~DocumentParser to assert it was detached before destruction.
518     parser->detach();
519     // appendFragmentSource()'s wellFormed is more permissive than wellFormed().
520     return wellFormed;
521 }
522 
523 static int globalDescriptor = 0;
524 static ThreadIdentifier libxmlLoaderThread = 0;
525 
matchFunc(const char *)526 static int matchFunc(const char*)
527 {
528     // Only match loads initiated due to uses of libxml2 from within
529     // XMLDocumentParser to avoid interfering with client applications that also
530     // use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353
531     return XMLDocumentParserScope::currentFetcher && currentThread() == libxmlLoaderThread;
532 }
533 
setAttributes(Element * element,Vector<Attribute> & attributeVector,ParserContentPolicy parserContentPolicy)534 static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
535 {
536     if (!scriptingContentIsAllowed(parserContentPolicy))
537         element->stripScriptingAttributes(attributeVector);
538     element->parserSetAttributes(attributeVector);
539 }
540 
switchEncoding(xmlParserCtxtPtr ctxt,bool is8Bit)541 static void switchEncoding(xmlParserCtxtPtr ctxt, bool is8Bit)
542 {
543     // Hack around libxml2's lack of encoding overide support by manually
544     // resetting the encoding to UTF-16 before every chunk. Otherwise libxml
545     // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks and
546     // switch encodings, causing the parse to fail.
547     if (is8Bit) {
548         xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
549         return;
550     }
551 
552     const UChar BOM = 0xFEFF;
553     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
554     xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
555 }
556 
parseChunk(xmlParserCtxtPtr ctxt,const String & chunk)557 static void parseChunk(xmlParserCtxtPtr ctxt, const String& chunk)
558 {
559     bool is8Bit = chunk.is8Bit();
560     switchEncoding(ctxt, is8Bit);
561     if (is8Bit)
562         xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters8()), sizeof(LChar) * chunk.length(), 0);
563     else
564         xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters16()), sizeof(UChar) * chunk.length(), 0);
565 }
566 
finishParsing(xmlParserCtxtPtr ctxt)567 static void finishParsing(xmlParserCtxtPtr ctxt)
568 {
569     xmlParseChunk(ctxt, 0, 0, 1);
570 }
571 
572 #define xmlParseChunk #error "Use parseChunk instead to select the correct encoding."
573 
isLibxmlDefaultCatalogFile(const String & urlString)574 static bool isLibxmlDefaultCatalogFile(const String& urlString)
575 {
576     // On non-Windows platforms libxml asks for this URL, the
577     // "XML_XML_DEFAULT_CATALOG", on initialization.
578     if (urlString == "file:///etc/xml/catalog")
579         return true;
580 
581     // On Windows, libxml computes a URL relative to where its DLL resides.
582     if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
583         return true;
584     return false;
585 }
586 
shouldAllowExternalLoad(const KURL & url)587 static bool shouldAllowExternalLoad(const KURL& url)
588 {
589     String urlString = url.string();
590 
591     // This isn't really necessary now that initializeLibXMLIfNecessary
592     // disables catalog support in libxml, but keeping it for defense in depth.
593     if (isLibxmlDefaultCatalogFile(url))
594         return false;
595 
596     // The most common DTD. There isn't much point in hammering www.w3c.org by
597     // requesting this URL for every XHTML document.
598     if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
599         return false;
600 
601     // Similarly, there isn't much point in requesting the SVG DTD.
602     if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
603         return false;
604 
605     // The libxml doesn't give us a lot of context for deciding whether to allow
606     // this request. In the worst case, this load could be for an external
607     // entity and the resulting document could simply read the retrieved
608     // content. If we had more context, we could potentially allow the parser to
609     // load a DTD. As things stand, we take the conservative route and allow
610     // same-origin requests only.
611     if (!XMLDocumentParserScope::currentFetcher->document()->securityOrigin()->canRequest(url)) {
612         XMLDocumentParserScope::currentFetcher->printAccessDeniedMessage(url);
613         return false;
614     }
615 
616     return true;
617 }
618 
openFunc(const char * uri)619 static void* openFunc(const char* uri)
620 {
621     ASSERT(XMLDocumentParserScope::currentFetcher);
622     ASSERT(currentThread() == libxmlLoaderThread);
623 
624     KURL url(KURL(), uri);
625 
626     if (!shouldAllowExternalLoad(url))
627         return &globalDescriptor;
628 
629     KURL finalURL;
630     RefPtr<SharedBuffer> data;
631 
632     {
633         ResourceFetcher* fetcher = XMLDocumentParserScope::currentFetcher;
634         XMLDocumentParserScope scope(0);
635         // FIXME: We should restore the original global error handler as well.
636 
637         if (fetcher->frame()) {
638             FetchRequest request(ResourceRequest(url), FetchInitiatorTypeNames::xml, ResourceFetcher::defaultResourceOptions());
639             ResourcePtr<Resource> resource = fetcher->fetchSynchronously(request);
640             if (resource && !resource->errorOccurred()) {
641                 data = resource->resourceBuffer();
642                 finalURL = resource->response().url();
643             }
644         }
645     }
646 
647     // We have to check the URL again after the load to catch redirects.
648     // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
649     if (!shouldAllowExternalLoad(finalURL))
650         return &globalDescriptor;
651 
652     return new SharedBufferReader(data);
653 }
654 
readFunc(void * context,char * buffer,int len)655 static int readFunc(void* context, char* buffer, int len)
656 {
657     // Do 0-byte reads in case of a null descriptor
658     if (context == &globalDescriptor)
659         return 0;
660 
661     SharedBufferReader* data = static_cast<SharedBufferReader*>(context);
662     return data->readData(buffer, len);
663 }
664 
writeFunc(void *,const char *,int)665 static int writeFunc(void*, const char*, int)
666 {
667     // Always just do 0-byte writes
668     return 0;
669 }
670 
closeFunc(void * context)671 static int closeFunc(void* context)
672 {
673     if (context != &globalDescriptor) {
674         SharedBufferReader* data = static_cast<SharedBufferReader*>(context);
675         delete data;
676     }
677     return 0;
678 }
679 
errorFunc(void *,const char *,...)680 static void errorFunc(void*, const char*, ...)
681 {
682     // FIXME: It would be nice to display error messages somewhere.
683 }
684 
initializeLibXMLIfNecessary()685 static void initializeLibXMLIfNecessary()
686 {
687     static bool didInit = false;
688     if (didInit)
689         return;
690 
691     // We don't want libxml to try and load catalogs.
692     // FIXME: It's not nice to set global settings in libxml, embedders of Blink
693     // could be trying to use libxml themselves.
694     xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
695     xmlInitParser();
696     xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
697     xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
698     libxmlLoaderThread = currentThread();
699     didInit = true;
700 }
701 
702 
createStringParser(xmlSAXHandlerPtr handlers,void * userData)703 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
704 {
705     initializeLibXMLIfNecessary();
706     xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
707     parser->_private = userData;
708     parser->replaceEntities = true;
709     return adoptRef(new XMLParserContext(parser));
710 }
711 
712 // Chunk should be encoded in UTF-8
createMemoryParser(xmlSAXHandlerPtr handlers,void * userData,const CString & chunk)713 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
714 {
715     initializeLibXMLIfNecessary();
716 
717     // appendFragmentSource() checks that the length doesn't overflow an int.
718     xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
719 
720     if (!parser)
721         return nullptr;
722 
723     // Copy the sax handler
724     memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
725 
726     // Set parser options.
727     // XML_PARSE_NODICT: default dictionary option.
728     // XML_PARSE_NOENT: force entities substitutions.
729     xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
730 
731     // Internal initialization
732     parser->sax2 = 1;
733     parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
734     parser->depth = 0;
735     parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
736     parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
737     parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
738     parser->_private = userData;
739 
740     return adoptRef(new XMLParserContext(parser));
741 }
742 
743 // --------------------------------
744 
supportsXMLVersion(const String & version)745 bool XMLDocumentParser::supportsXMLVersion(const String& version)
746 {
747     return version == "1.0";
748 }
749 
XMLDocumentParser(Document & document,FrameView * frameView)750 XMLDocumentParser::XMLDocumentParser(Document& document, FrameView* frameView)
751     : ScriptableDocumentParser(document)
752     , m_hasView(frameView)
753     , m_context(nullptr)
754     , m_currentNode(&document)
755     , m_isCurrentlyParsing8BitChunk(false)
756     , m_sawError(false)
757     , m_sawCSS(false)
758     , m_sawXSLTransform(false)
759     , m_sawFirstElement(false)
760     , m_isXHTMLDocument(false)
761     , m_parserPaused(false)
762     , m_requestingScript(false)
763     , m_finishCalled(false)
764     , m_xmlErrors(&document)
765     , m_pendingScript(0)
766     , m_scriptStartPosition(TextPosition::belowRangePosition())
767     , m_parsingFragment(false)
768 {
769     // This is XML being used as a document resource.
770     if (frameView && document.isXMLDocument())
771         UseCounter::count(document, UseCounter::XMLDocument);
772 }
773 
XMLDocumentParser(DocumentFragment * fragment,Element * parentElement,ParserContentPolicy parserContentPolicy)774 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
775     : ScriptableDocumentParser(fragment->document(), parserContentPolicy)
776     , m_hasView(false)
777     , m_context(nullptr)
778     , m_currentNode(fragment)
779     , m_isCurrentlyParsing8BitChunk(false)
780     , m_sawError(false)
781     , m_sawCSS(false)
782     , m_sawXSLTransform(false)
783     , m_sawFirstElement(false)
784     , m_isXHTMLDocument(false)
785     , m_parserPaused(false)
786     , m_requestingScript(false)
787     , m_finishCalled(false)
788     , m_xmlErrors(&fragment->document())
789     , m_pendingScript(0)
790     , m_scriptStartPosition(TextPosition::belowRangePosition())
791     , m_parsingFragment(true)
792 {
793 #if !ENABLE(OILPAN)
794     fragment->ref();
795 #endif
796 
797     // Add namespaces based on the parent node
798     WillBeHeapVector<RawPtrWillBeMember<Element> > elemStack;
799     while (parentElement) {
800         elemStack.append(parentElement);
801 
802         ContainerNode* n = parentElement->parentNode();
803         if (!n || !n->isElementNode())
804             break;
805         parentElement = toElement(n);
806     }
807 
808     if (elemStack.isEmpty())
809         return;
810 
811     for (; !elemStack.isEmpty(); elemStack.removeLast()) {
812         Element* element = elemStack.last();
813         if (element->hasAttributes()) {
814             AttributeCollection attributes = element->attributes();
815             AttributeCollection::const_iterator end = attributes.end();
816             for (AttributeCollection::const_iterator it = attributes.begin(); it != end; ++it) {
817                 if (it->localName() == xmlnsAtom)
818                     m_defaultNamespaceURI = it->value();
819                 else if (it->prefix() == xmlnsAtom)
820                     m_prefixToNamespaceMap.set(it->localName(), it->value());
821             }
822         }
823     }
824 
825     // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
826     if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
827         m_defaultNamespaceURI = parentElement->namespaceURI();
828 }
829 
~XMLParserContext()830 XMLParserContext::~XMLParserContext()
831 {
832     if (m_context->myDoc)
833         xmlFreeDoc(m_context->myDoc);
834     xmlFreeParserCtxt(m_context);
835 }
836 
~XMLDocumentParser()837 XMLDocumentParser::~XMLDocumentParser()
838 {
839 #if !ENABLE(OILPAN)
840     // The XMLDocumentParser will always be detached before being destroyed.
841     ASSERT(m_currentNodeStack.isEmpty());
842     ASSERT(!m_currentNode);
843 #endif
844 
845     // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
846     if (m_pendingScript)
847         m_pendingScript->removeClient(this);
848 }
849 
trace(Visitor * visitor)850 void XMLDocumentParser::trace(Visitor* visitor)
851 {
852     visitor->trace(m_currentNode);
853 #if ENABLE(OILPAN)
854     visitor->trace(m_currentNodeStack);
855 #endif
856     visitor->trace(m_leafTextNode);
857     visitor->trace(m_xmlErrors);
858     visitor->trace(m_scriptElement);
859     ScriptableDocumentParser::trace(visitor);
860 }
861 
doWrite(const String & parseString)862 void XMLDocumentParser::doWrite(const String& parseString)
863 {
864     ASSERT(!isDetached());
865     if (!m_context)
866         initializeParserContext();
867 
868     // Protect the libxml context from deletion during a callback
869     RefPtr<XMLParserContext> context = m_context;
870 
871     // libXML throws an error if you try to switch the encoding for an empty
872     // string.
873     if (parseString.length()) {
874         // JavaScript may cause the parser to detach during parseChunk
875         // keep this alive until this function is done.
876         RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
877 
878         XMLDocumentParserScope scope(document()->fetcher());
879         TemporaryChange<bool> encodingScope(m_isCurrentlyParsing8BitChunk, parseString.is8Bit());
880         parseChunk(context->context(), parseString);
881 
882         // JavaScript (which may be run under the parseChunk callstack) may
883         // cause the parser to be stopped or detached.
884         if (isStopped())
885             return;
886     }
887 
888     // FIXME: Why is this here? And why is it after we process the passed
889     // source?
890     if (document()->sawDecodingError()) {
891         // If the decoder saw an error, report it as fatal (stops parsing)
892         TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
893         handleError(XMLErrors::ErrorTypeFatal, "Encoding error", position);
894     }
895 }
896 
897 struct xmlSAX2Namespace {
898     const xmlChar* prefix;
899     const xmlChar* uri;
900 };
901 
handleNamespaceAttributes(Vector<Attribute> & prefixedAttributes,const xmlChar ** libxmlNamespaces,int nbNamespaces,ExceptionState & exceptionState)902 static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nbNamespaces, ExceptionState& exceptionState)
903 {
904     xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
905     for (int i = 0; i < nbNamespaces; ++i) {
906         AtomicString namespaceQName = xmlnsAtom;
907         AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
908         if (namespaces[i].prefix)
909             namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
910 
911         QualifiedName parsedName = anyName;
912         if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, exceptionState))
913             return;
914 
915         prefixedAttributes.append(Attribute(parsedName, namespaceURI));
916     }
917 }
918 
919 struct xmlSAX2Attributes {
920     const xmlChar* localname;
921     const xmlChar* prefix;
922     const xmlChar* uri;
923     const xmlChar* value;
924     const xmlChar* end;
925 };
926 
handleElementAttributes(Vector<Attribute> & prefixedAttributes,const xmlChar ** libxmlAttributes,int nbAttributes,ExceptionState & exceptionState)927 static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nbAttributes, ExceptionState& exceptionState)
928 {
929     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
930     for (int i = 0; i < nbAttributes; ++i) {
931         int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
932         AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
933         String attrPrefix = toString(attributes[i].prefix);
934         AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
935         AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname);
936 
937         QualifiedName parsedName = anyName;
938         if (!Element::parseAttributeName(parsedName, attrURI, attrQName, exceptionState))
939             return;
940 
941         prefixedAttributes.append(Attribute(parsedName, attrValue));
942     }
943 }
944 
startElementNs(const AtomicString & localName,const AtomicString & prefix,const AtomicString & uri,int nbNamespaces,const xmlChar ** libxmlNamespaces,int nbAttributes,int nbDefaulted,const xmlChar ** libxmlAttributes)945 void XMLDocumentParser::startElementNs(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, int nbNamespaces,
946     const xmlChar** libxmlNamespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
947 {
948     if (isStopped())
949         return;
950 
951     if (m_parserPaused) {
952         m_pendingCallbacks.append(adoptPtr(new PendingStartElementNSCallback(localName, prefix, uri, nbNamespaces, libxmlNamespaces,
953             nbAttributes, nbDefaulted, libxmlAttributes)));
954         return;
955     }
956 
957     exitText();
958 
959     AtomicString adjustedURI = uri;
960     if (m_parsingFragment && adjustedURI.isNull()) {
961         if (!prefix.isNull())
962             adjustedURI = m_prefixToNamespaceMap.get(prefix);
963         else
964             adjustedURI = m_defaultNamespaceURI;
965     }
966 
967     bool isFirstElement = !m_sawFirstElement;
968     m_sawFirstElement = true;
969 
970     QualifiedName qName(prefix, localName, adjustedURI);
971     RefPtrWillBeRawPtr<Element> newElement = m_currentNode->document().createElement(qName, true);
972     if (!newElement) {
973         stopParsing();
974         return;
975     }
976 
977     Vector<Attribute> prefixedAttributes;
978     TrackExceptionState exceptionState;
979     handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nbNamespaces, exceptionState);
980     if (exceptionState.hadException()) {
981         setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
982         stopParsing();
983         return;
984     }
985 
986     handleElementAttributes(prefixedAttributes, libxmlAttributes, nbAttributes, exceptionState);
987     setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
988     if (exceptionState.hadException()) {
989         stopParsing();
990         return;
991     }
992 
993     newElement->beginParsingChildren();
994 
995     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(newElement.get());
996     if (scriptLoader)
997         m_scriptStartPosition = textPosition();
998 
999     m_currentNode->parserAppendChild(newElement.get());
1000 
1001     if (isHTMLTemplateElement(*newElement))
1002         pushCurrentNode(toHTMLTemplateElement(*newElement).content());
1003     else
1004         pushCurrentNode(newElement.get());
1005 
1006     if (isHTMLHtmlElement(*newElement))
1007         toHTMLHtmlElement(*newElement).insertedByParser();
1008 
1009     if (!m_parsingFragment && isFirstElement && document()->frame())
1010         document()->frame()->loader().dispatchDocumentElementAvailable();
1011 }
1012 
endElementNs()1013 void XMLDocumentParser::endElementNs()
1014 {
1015     if (isStopped())
1016         return;
1017 
1018     if (m_parserPaused) {
1019         m_pendingCallbacks.append(adoptPtr(new PendingEndElementNSCallback()));
1020         return;
1021     }
1022 
1023     // JavaScript can detach the parser. Make sure this is not released before
1024     // the end of this method.
1025     RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
1026 
1027     exitText();
1028 
1029     RefPtrWillBeRawPtr<ContainerNode> n = m_currentNode;
1030     if (m_currentNode->isElementNode())
1031         toElement(n.get())->finishParsingChildren();
1032 
1033     if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptLoaderIfPossible(toElement(n))) {
1034         popCurrentNode();
1035         n->remove(IGNORE_EXCEPTION);
1036         return;
1037     }
1038 
1039     if (!n->isElementNode() || !m_hasView) {
1040         popCurrentNode();
1041         return;
1042     }
1043 
1044     Element* element = toElement(n);
1045 
1046     // The element's parent may have already been removed from document.
1047     // Parsing continues in this case, but scripts aren't executed.
1048     if (!element->inDocument()) {
1049         popCurrentNode();
1050         return;
1051     }
1052 
1053     ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element);
1054     if (!scriptLoader) {
1055         popCurrentNode();
1056         return;
1057     }
1058 
1059     // Don't load external scripts for standalone documents (for now).
1060     ASSERT(!m_pendingScript);
1061     m_requestingScript = true;
1062 
1063     if (scriptLoader->prepareScript(m_scriptStartPosition, ScriptLoader::AllowLegacyTypeInTypeAttribute)) {
1064         // FIXME: Script execution should be shared between
1065         // the libxml2 and Qt XMLDocumentParser implementations.
1066 
1067         if (scriptLoader->readyToBeParserExecuted()) {
1068             scriptLoader->executeScript(ScriptSourceCode(scriptLoader->scriptContent(), document()->url(), m_scriptStartPosition));
1069         } else if (scriptLoader->willBeParserExecuted()) {
1070             m_pendingScript = scriptLoader->resource();
1071             m_scriptElement = element;
1072             m_pendingScript->addClient(this);
1073 
1074             // m_pendingScript will be 0 if script was already loaded and
1075             // addClient() executed it.
1076             if (m_pendingScript)
1077                 pauseParsing();
1078         } else {
1079             m_scriptElement = nullptr;
1080         }
1081 
1082         // JavaScript may have detached the parser
1083         if (isDetached())
1084             return;
1085     }
1086     m_requestingScript = false;
1087     popCurrentNode();
1088 }
1089 
characters(const xmlChar * chars,int length)1090 void XMLDocumentParser::characters(const xmlChar* chars, int length)
1091 {
1092     if (isStopped())
1093         return;
1094 
1095     if (m_parserPaused) {
1096         m_pendingCallbacks.append(adoptPtr(new PendingCharactersCallback(chars, length)));
1097         return;
1098     }
1099 
1100     if (!m_leafTextNode)
1101         enterText();
1102     m_bufferedText.append(chars, length);
1103 }
1104 
error(XMLErrors::ErrorType type,const char * message,va_list args)1105 void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
1106 {
1107     if (isStopped())
1108         return;
1109 
1110     char formattedMessage[1024];
1111     vsnprintf(formattedMessage, sizeof(formattedMessage) - 1, message, args);
1112 
1113     if (m_parserPaused) {
1114         m_pendingCallbacks.append(adoptPtr(new PendingErrorCallback(type, reinterpret_cast<const xmlChar*>(formattedMessage), lineNumber(), columnNumber())));
1115         return;
1116     }
1117 
1118     handleError(type, formattedMessage, textPosition());
1119 }
1120 
processingInstruction(const String & target,const String & data)1121 void XMLDocumentParser::processingInstruction(const String& target, const String& data)
1122 {
1123     if (isStopped())
1124         return;
1125 
1126     if (m_parserPaused) {
1127         m_pendingCallbacks.append(adoptPtr(new PendingProcessingInstructionCallback(target, data)));
1128         return;
1129     }
1130 
1131     exitText();
1132 
1133     // ### handle exceptions
1134     TrackExceptionState exceptionState;
1135     RefPtrWillBeRawPtr<ProcessingInstruction> pi = m_currentNode->document().createProcessingInstruction(target, data, exceptionState);
1136     if (exceptionState.hadException())
1137         return;
1138 
1139     pi->setCreatedByParser(true);
1140 
1141     m_currentNode->parserAppendChild(pi.get());
1142 
1143     pi->setCreatedByParser(false);
1144 
1145     if (pi->isCSS())
1146         m_sawCSS = true;
1147 
1148     if (!RuntimeEnabledFeatures::xsltEnabled())
1149         return;
1150 
1151     m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
1152     if (m_sawXSLTransform && !document()->transformSourceDocument()) {
1153         // This behavior is very tricky. We call stopParsing() here because we
1154         // want to stop processing the document until we're ready to apply the
1155         // transform, but we actually still want to be fed decoded string pieces
1156         // to accumulate in m_originalSourceForTransform. So, we call
1157         // stopParsing() here and check isStopped() in element callbacks.
1158         // FIXME: This contradicts the contract of DocumentParser.
1159         stopParsing();
1160     }
1161 }
1162 
cdataBlock(const String & text)1163 void XMLDocumentParser::cdataBlock(const String& text)
1164 {
1165     if (isStopped())
1166         return;
1167 
1168     if (m_parserPaused) {
1169         m_pendingCallbacks.append(adoptPtr(new PendingCDATABlockCallback(text)));
1170         return;
1171     }
1172 
1173     exitText();
1174 
1175     m_currentNode->parserAppendChild(CDATASection::create(m_currentNode->document(), text));
1176 }
1177 
comment(const String & text)1178 void XMLDocumentParser::comment(const String& text)
1179 {
1180     if (isStopped())
1181         return;
1182 
1183     if (m_parserPaused) {
1184         m_pendingCallbacks.append(adoptPtr(new PendingCommentCallback(text)));
1185         return;
1186     }
1187 
1188     exitText();
1189 
1190     m_currentNode->parserAppendChild(Comment::create(m_currentNode->document(), text));
1191 }
1192 
1193 enum StandaloneInfo {
1194     StandaloneUnspecified = -2,
1195     NoXMlDeclaration,
1196     StandaloneNo,
1197     StandaloneYes
1198 };
1199 
startDocument(const String & version,const String & encoding,int standalone)1200 void XMLDocumentParser::startDocument(const String& version, const String& encoding, int standalone)
1201 {
1202     StandaloneInfo standaloneInfo = static_cast<StandaloneInfo>(standalone);
1203     if (standaloneInfo == NoXMlDeclaration) {
1204         document()->setHasXMLDeclaration(false);
1205         return;
1206     }
1207 
1208     if (!version.isNull())
1209         document()->setXMLVersion(version, ASSERT_NO_EXCEPTION);
1210     if (standalone != StandaloneUnspecified)
1211         document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION);
1212     if (!encoding.isNull())
1213         document()->setXMLEncoding(encoding);
1214     document()->setHasXMLDeclaration(true);
1215 }
1216 
endDocument()1217 void XMLDocumentParser::endDocument()
1218 {
1219     exitText();
1220 }
1221 
internalSubset(const String & name,const String & externalID,const String & systemID)1222 void XMLDocumentParser::internalSubset(const String& name, const String& externalID, const String& systemID)
1223 {
1224     if (isStopped())
1225         return;
1226 
1227     if (m_parserPaused) {
1228         m_pendingCallbacks.append(adoptPtr(new PendingInternalSubsetCallback(name, externalID, systemID)));
1229         return;
1230     }
1231 
1232     if (document())
1233         document()->parserAppendChild(DocumentType::create(document(), name, externalID, systemID));
1234 }
1235 
getParser(void * closure)1236 static inline XMLDocumentParser* getParser(void* closure)
1237 {
1238     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1239     return static_cast<XMLDocumentParser*>(ctxt->_private);
1240 }
1241 
startElementNsHandler(void * closure,const xmlChar * localName,const xmlChar * prefix,const xmlChar * uri,int nbNamespaces,const xmlChar ** namespaces,int nbAttributes,int nbDefaulted,const xmlChar ** libxmlAttributes)1242 static void startElementNsHandler(void* closure, const xmlChar* localName, const xmlChar* prefix, const xmlChar* uri, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
1243 {
1244     getParser(closure)->startElementNs(toAtomicString(localName), toAtomicString(prefix), toAtomicString(uri), nbNamespaces, namespaces, nbAttributes, nbDefaulted, libxmlAttributes);
1245 }
1246 
endElementNsHandler(void * closure,const xmlChar *,const xmlChar *,const xmlChar *)1247 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
1248 {
1249     getParser(closure)->endElementNs();
1250 }
1251 
charactersHandler(void * closure,const xmlChar * chars,int length)1252 static void charactersHandler(void* closure, const xmlChar* chars, int length)
1253 {
1254     getParser(closure)->characters(chars, length);
1255 }
1256 
processingInstructionHandler(void * closure,const xmlChar * target,const xmlChar * data)1257 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
1258 {
1259     getParser(closure)->processingInstruction(toString(target), toString(data));
1260 }
1261 
cdataBlockHandler(void * closure,const xmlChar * text,int length)1262 static void cdataBlockHandler(void* closure, const xmlChar* text, int length)
1263 {
1264     getParser(closure)->cdataBlock(toString(text, length));
1265 }
1266 
commentHandler(void * closure,const xmlChar * text)1267 static void commentHandler(void* closure, const xmlChar* text)
1268 {
1269     getParser(closure)->comment(toString(text));
1270 }
1271 
1272 WTF_ATTRIBUTE_PRINTF(2, 3)
warningHandler(void * closure,const char * message,...)1273 static void warningHandler(void* closure, const char* message, ...)
1274 {
1275     va_list args;
1276     va_start(args, message);
1277     getParser(closure)->error(XMLErrors::ErrorTypeWarning, message, args);
1278     va_end(args);
1279 }
1280 
1281 WTF_ATTRIBUTE_PRINTF(2, 3)
fatalErrorHandler(void * closure,const char * message,...)1282 static void fatalErrorHandler(void* closure, const char* message, ...)
1283 {
1284     va_list args;
1285     va_start(args, message);
1286     getParser(closure)->error(XMLErrors::ErrorTypeFatal, message, args);
1287     va_end(args);
1288 }
1289 
1290 WTF_ATTRIBUTE_PRINTF(2, 3)
normalErrorHandler(void * closure,const char * message,...)1291 static void normalErrorHandler(void* closure, const char* message, ...)
1292 {
1293     va_list args;
1294     va_start(args, message);
1295     getParser(closure)->error(XMLErrors::ErrorTypeNonFatal, message, args);
1296     va_end(args);
1297 }
1298 
1299 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is a hack
1300 // to avoid malloc/free. Using a global variable like this could cause trouble
1301 // if libxml implementation details were to change
1302 static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
1303 
sharedXHTMLEntity()1304 static xmlEntityPtr sharedXHTMLEntity()
1305 {
1306     static xmlEntity entity;
1307     if (!entity.type) {
1308         entity.type = XML_ENTITY_DECL;
1309         entity.orig = sharedXHTMLEntityResult;
1310         entity.content = sharedXHTMLEntityResult;
1311         entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
1312     }
1313     return &entity;
1314 }
1315 
convertUTF16EntityToUTF8(const UChar * utf16Entity,size_t numberOfCodeUnits,char * target,size_t targetSize)1316 static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
1317 {
1318     const char* originalTarget = target;
1319     WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity,
1320         utf16Entity + numberOfCodeUnits, &target, target + targetSize);
1321     if (conversionResult != WTF::Unicode::conversionOK)
1322         return 0;
1323 
1324     // Even though we must pass the length, libxml expects the entity string to be null terminated.
1325     ASSERT(target > originalTarget + 1);
1326     *target = '\0';
1327     return target - originalTarget;
1328 }
1329 
getXHTMLEntity(const xmlChar * name)1330 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
1331 {
1332     UChar utf16DecodedEntity[4];
1333     size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
1334     if (!numberOfCodeUnits)
1335         return 0;
1336 
1337     ASSERT(numberOfCodeUnits <= 4);
1338     size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
1339         reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
1340     if (!entityLengthInUTF8)
1341         return 0;
1342 
1343     xmlEntityPtr entity = sharedXHTMLEntity();
1344     entity->length = entityLengthInUTF8;
1345     entity->name = name;
1346     return entity;
1347 }
1348 
getEntityHandler(void * closure,const xmlChar * name)1349 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
1350 {
1351     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1352     xmlEntityPtr ent = xmlGetPredefinedEntity(name);
1353     if (ent) {
1354         ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
1355         return ent;
1356     }
1357 
1358     ent = xmlGetDocEntity(ctxt->myDoc, name);
1359     if (!ent && getParser(closure)->isXHTMLDocument()) {
1360         ent = getXHTMLEntity(name);
1361         if (ent)
1362             ent->etype = XML_INTERNAL_GENERAL_ENTITY;
1363     }
1364 
1365     return ent;
1366 }
1367 
startDocumentHandler(void * closure)1368 static void startDocumentHandler(void* closure)
1369 {
1370     xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
1371     XMLDocumentParser* parser = getParser(closure);
1372     switchEncoding(ctxt, parser->isCurrentlyParsing8BitChunk());
1373     parser->startDocument(toString(ctxt->version), toString(ctxt->encoding), ctxt->standalone);
1374     xmlSAX2StartDocument(closure);
1375 }
1376 
endDocumentHandler(void * closure)1377 static void endDocumentHandler(void* closure)
1378 {
1379     getParser(closure)->endDocument();
1380     xmlSAX2EndDocument(closure);
1381 }
1382 
internalSubsetHandler(void * closure,const xmlChar * name,const xmlChar * externalID,const xmlChar * systemID)1383 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1384 {
1385     getParser(closure)->internalSubset(toString(name), toString(externalID), toString(systemID));
1386     xmlSAX2InternalSubset(closure, name, externalID, systemID);
1387 }
1388 
externalSubsetHandler(void * closure,const xmlChar *,const xmlChar * externalId,const xmlChar *)1389 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
1390 {
1391     String extId = toString(externalId);
1392     if (extId == "-//W3C//DTD XHTML 1.0 Transitional//EN"
1393         || extId == "-//W3C//DTD XHTML 1.1//EN"
1394         || extId == "-//W3C//DTD XHTML 1.0 Strict//EN"
1395         || extId == "-//W3C//DTD XHTML 1.0 Frameset//EN"
1396         || extId == "-//W3C//DTD XHTML Basic 1.0//EN"
1397         || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"
1398         || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
1399         || extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
1400         || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN"
1401         || extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN") {
1402         // Controls if we replace entities or not.
1403         getParser(closure)->setIsXHTMLDocument(true);
1404     }
1405 }
1406 
ignorableWhitespaceHandler(void *,const xmlChar *,int)1407 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
1408 {
1409     // Nothing to do, but we need this to work around a crasher.
1410     // http://bugzilla.gnome.org/show_bug.cgi?id=172255
1411     // http://bugs.webkit.org/show_bug.cgi?id=5792
1412 }
1413 
initializeParserContext(const CString & chunk)1414 void XMLDocumentParser::initializeParserContext(const CString& chunk)
1415 {
1416     xmlSAXHandler sax;
1417     memset(&sax, 0, sizeof(sax));
1418 
1419     sax.error = normalErrorHandler;
1420     sax.fatalError = fatalErrorHandler;
1421     sax.characters = charactersHandler;
1422     sax.processingInstruction = processingInstructionHandler;
1423     sax.cdataBlock = cdataBlockHandler;
1424     sax.comment = commentHandler;
1425     sax.warning = warningHandler;
1426     sax.startElementNs = startElementNsHandler;
1427     sax.endElementNs = endElementNsHandler;
1428     sax.getEntity = getEntityHandler;
1429     sax.startDocument = startDocumentHandler;
1430     sax.endDocument = endDocumentHandler;
1431     sax.internalSubset = internalSubsetHandler;
1432     sax.externalSubset = externalSubsetHandler;
1433     sax.ignorableWhitespace = ignorableWhitespaceHandler;
1434     sax.entityDecl = xmlSAX2EntityDecl;
1435     sax.initialized = XML_SAX2_MAGIC;
1436     DocumentParser::startParsing();
1437     m_sawError = false;
1438     m_sawCSS = false;
1439     m_sawXSLTransform = false;
1440     m_sawFirstElement = false;
1441 
1442     XMLDocumentParserScope scope(document()->fetcher());
1443     if (m_parsingFragment) {
1444         m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
1445     } else {
1446         ASSERT(!chunk.data());
1447         m_context = XMLParserContext::createStringParser(&sax, this);
1448     }
1449 }
1450 
doEnd()1451 void XMLDocumentParser::doEnd()
1452 {
1453     if (!isStopped()) {
1454         if (m_context) {
1455             // Tell libxml we're done.
1456             {
1457                 XMLDocumentParserScope scope(document()->fetcher());
1458                 finishParsing(context());
1459             }
1460 
1461             m_context = nullptr;
1462         }
1463     }
1464 
1465     bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document());
1466     if (xmlViewerMode) {
1467         XMLTreeViewer xmlTreeViewer(document());
1468         xmlTreeViewer.transformDocumentToTreeView();
1469     } else if (m_sawXSLTransform) {
1470         xmlDocPtr doc = xmlDocPtrForString(document()->fetcher(), m_originalSourceForTransform.toString(), document()->url().string());
1471         document()->setTransformSource(adoptPtr(new TransformSource(doc)));
1472         // Make the document think it's done, so it will apply XSL stylesheets.
1473         document()->setParsing(false);
1474         document()->styleResolverChanged();
1475 
1476         // styleResolverChanged() call can detach the parser and null out its
1477         // document. In that case, we just bail out.
1478         if (isDetached())
1479             return;
1480 
1481         document()->setParsing(true);
1482         DocumentParser::stopParsing();
1483     }
1484 }
1485 
xmlDocPtrForString(ResourceFetcher * fetcher,const String & source,const String & url)1486 xmlDocPtr xmlDocPtrForString(ResourceFetcher* fetcher, const String& source, const String& url)
1487 {
1488     if (source.isEmpty())
1489         return 0;
1490     // Parse in a single chunk into an xmlDocPtr
1491     // FIXME: Hook up error handlers so that a failure to parse the main
1492     // document results in good error messages.
1493     XMLDocumentParserScope scope(fetcher, errorFunc, 0);
1494     XMLParserInput input(source);
1495     return xmlReadMemory(input.data(), input.size(), url.latin1().data(), input.encoding(), XSLT_PARSE_OPTIONS);
1496 }
1497 
lineNumber() const1498 OrdinalNumber XMLDocumentParser::lineNumber() const
1499 {
1500     return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1);
1501 }
1502 
columnNumber() const1503 OrdinalNumber XMLDocumentParser::columnNumber() const
1504 {
1505     return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1);
1506 }
1507 
textPosition() const1508 TextPosition XMLDocumentParser::textPosition() const
1509 {
1510     xmlParserCtxtPtr context = this->context();
1511     if (!context)
1512         return TextPosition::minimumPosition();
1513     return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line), OrdinalNumber::fromOneBasedInt(context->input->col));
1514 }
1515 
stopParsing()1516 void XMLDocumentParser::stopParsing()
1517 {
1518     DocumentParser::stopParsing();
1519     if (context())
1520         xmlStopParser(context());
1521 }
1522 
resumeParsing()1523 void XMLDocumentParser::resumeParsing()
1524 {
1525     ASSERT(!isDetached());
1526     ASSERT(m_parserPaused);
1527 
1528     m_parserPaused = false;
1529 
1530     // First, execute any pending callbacks
1531     while (!m_pendingCallbacks.isEmpty()) {
1532         OwnPtr<PendingCallback> callback = m_pendingCallbacks.takeFirst();
1533         callback->call(this);
1534 
1535         // A callback paused the parser
1536         if (m_parserPaused)
1537             return;
1538     }
1539 
1540     // Then, write any pending data
1541     SegmentedString rest = m_pendingSrc;
1542     m_pendingSrc.clear();
1543     // There is normally only one string left, so toString() shouldn't copy.
1544     // In any case, the XML parser runs on the main thread and it's OK if
1545     // the passed string has more than one reference.
1546     append(rest.toString().impl());
1547 
1548     // Finally, if finish() has been called and write() didn't result
1549     // in any further callbacks being queued, call end()
1550     if (m_finishCalled && m_pendingCallbacks.isEmpty())
1551         end();
1552 }
1553 
appendFragmentSource(const String & chunk)1554 bool XMLDocumentParser::appendFragmentSource(const String& chunk)
1555 {
1556     ASSERT(!m_context);
1557     ASSERT(m_parsingFragment);
1558 
1559     CString chunkAsUtf8 = chunk.utf8();
1560 
1561     // libxml2 takes an int for a length, and therefore can't handle XML chunks
1562     // larger than 2 GiB.
1563     if (chunkAsUtf8.length() > INT_MAX)
1564         return false;
1565 
1566     initializeParserContext(chunkAsUtf8);
1567     xmlParseContent(context());
1568     endDocument(); // Close any open text nodes.
1569 
1570     // FIXME: If this code is actually needed, it should probably move to
1571     // finish()
1572     // XMLDocumentParserQt has a similar check (m_stream.error() ==
1573     // QXmlStreamReader::PrematureEndOfDocumentError) in doEnd(). Check if all
1574     // the chunk has been processed.
1575     long bytesProcessed = xmlByteConsumed(context());
1576     if (bytesProcessed == -1 || static_cast<unsigned long>(bytesProcessed) != chunkAsUtf8.length()) {
1577         // FIXME: I don't believe we can hit this case without also having seen
1578         // an error or a null byte. If we hit this ASSERT, we've found a test
1579         // case which demonstrates the need for this code.
1580         ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
1581         return false;
1582     }
1583 
1584     // No error if the chunk is well formed or it is not but we have no error.
1585     return context()->wellFormed || !xmlCtxtGetLastError(context());
1586 }
1587 
1588 // --------------------------------
1589 
1590 struct AttributeParseState {
1591     HashMap<String, String> attributes;
1592     bool gotAttributes;
1593 };
1594 
attributesStartElementNsHandler(void * closure,const xmlChar * xmlLocalName,const xmlChar *,const xmlChar *,int,const xmlChar **,int nbAttributes,int,const xmlChar ** libxmlAttributes)1595 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
1596     const xmlChar* /*xmlURI*/, int /*nbNamespaces*/, const xmlChar** /*namespaces*/,
1597     int nbAttributes, int /*nbDefaulted*/, const xmlChar** libxmlAttributes)
1598 {
1599     if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
1600         return;
1601 
1602     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1603     AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
1604 
1605     state->gotAttributes = true;
1606 
1607     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
1608     for (int i = 0; i < nbAttributes; ++i) {
1609         String attrLocalName = toString(attributes[i].localname);
1610         int valueLength = (int) (attributes[i].end - attributes[i].value);
1611         String attrValue = toString(attributes[i].value, valueLength);
1612         String attrPrefix = toString(attributes[i].prefix);
1613         String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
1614 
1615         state->attributes.set(attrQName, attrValue);
1616     }
1617 }
1618 
parseAttributes(const String & string,bool & attrsOK)1619 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1620 {
1621     AttributeParseState state;
1622     state.gotAttributes = false;
1623 
1624     xmlSAXHandler sax;
1625     memset(&sax, 0, sizeof(sax));
1626     sax.startElementNs = attributesStartElementNsHandler;
1627     sax.initialized = XML_SAX2_MAGIC;
1628     RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
1629     String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
1630     parseChunk(parser->context(), parseString);
1631     finishParsing(parser->context());
1632     attrsOK = state.gotAttributes;
1633     return state.attributes;
1634 }
1635 
1636 } // namespace WebCore
1637