• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2000 Peter Kelly <pmk@post.com>
3  * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4  * Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org>
5  * Copyright (C) 2007 Samuel Weinig <sam@webkit.org>
6  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7  * Copyright (C) 2008 Holger Hans Peter Freyther
8  * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9  * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Library General Public
13  * License as published by the Free Software Foundation; either
14  * version 2 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Library General Public License for more details.
20  *
21  * You should have received a copy of the GNU Library General Public License
22  * along with this library; see the file COPYING.LIB.  If not, write to
23  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24  * Boston, MA 02110-1301, USA.
25  */
26 
27 #include "config.h"
28 #include "XMLDocumentParser.h"
29 
30 #include "CDATASection.h"
31 #include "CachedScript.h"
32 #include "Comment.h"
33 #include "CachedResourceLoader.h"
34 #include "Document.h"
35 #include "DocumentFragment.h"
36 #include "DocumentType.h"
37 #include "Frame.h"
38 #include "FrameLoader.h"
39 #include "FrameView.h"
40 #include "HTMLEntityParser.h"
41 #include "HTMLHtmlElement.h"
42 #include "HTMLLinkElement.h"
43 #include "HTMLNames.h"
44 #include "HTMLStyleElement.h"
45 #include "ProcessingInstruction.h"
46 #include "ResourceError.h"
47 #include "ResourceHandle.h"
48 #include "ResourceRequest.h"
49 #include "ResourceResponse.h"
50 #include "ScriptElement.h"
51 #include "ScriptSourceCode.h"
52 #include "ScriptValue.h"
53 #include "TextResourceDecoder.h"
54 #include "TransformSource.h"
55 #include "XMLNSNames.h"
56 #include "XMLDocumentParserScope.h"
57 #include <libxml/parser.h>
58 #include <libxml/parserInternals.h>
59 #include <wtf/text/CString.h>
60 #include <wtf/StringExtras.h>
61 #include <wtf/Threading.h>
62 #include <wtf/UnusedParam.h>
63 #include <wtf/Vector.h>
64 
65 #if ENABLE(XSLT)
66 #include "XMLTreeViewer.h"
67 #include <libxslt/xslt.h>
68 #endif
69 
70 #if ENABLE(XHTMLMP)
71 #include "HTMLScriptElement.h"
72 #endif
73 
74 
75 using namespace std;
76 
77 namespace WebCore {
78 
79 class PendingCallbacks {
80     WTF_MAKE_NONCOPYABLE(PendingCallbacks);
81 public:
PendingCallbacks()82     PendingCallbacks() { }
~PendingCallbacks()83     ~PendingCallbacks()
84     {
85         deleteAllValues(m_callbacks);
86     }
87 
appendStartElementNSCallback(const xmlChar * xmlLocalName,const xmlChar * xmlPrefix,const xmlChar * xmlURI,int nb_namespaces,const xmlChar ** namespaces,int nb_attributes,int nb_defaulted,const xmlChar ** attributes)88     void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
89                                       const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** attributes)
90     {
91         PendingStartElementNSCallback* callback = new PendingStartElementNSCallback;
92 
93         callback->xmlLocalName = xmlStrdup(xmlLocalName);
94         callback->xmlPrefix = xmlStrdup(xmlPrefix);
95         callback->xmlURI = xmlStrdup(xmlURI);
96         callback->nb_namespaces = nb_namespaces;
97         callback->namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_namespaces * 2));
98         for (int i = 0; i < nb_namespaces * 2 ; i++)
99             callback->namespaces[i] = xmlStrdup(namespaces[i]);
100         callback->nb_attributes = nb_attributes;
101         callback->nb_defaulted = nb_defaulted;
102         callback->attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_attributes * 5));
103         for (int i = 0; i < nb_attributes; i++) {
104             // Each attribute has 5 elements in the array:
105             // name, prefix, uri, value and an end pointer.
106 
107             for (int j = 0; j < 3; j++)
108                 callback->attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
109 
110             int len = attributes[i * 5 + 4] - attributes[i * 5 + 3];
111 
112             callback->attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], len);
113             callback->attributes[i * 5 + 4] = callback->attributes[i * 5 + 3] + len;
114         }
115 
116         m_callbacks.append(callback);
117     }
118 
appendEndElementNSCallback()119     void appendEndElementNSCallback()
120     {
121         PendingEndElementNSCallback* callback = new PendingEndElementNSCallback;
122 
123         m_callbacks.append(callback);
124     }
125 
appendCharactersCallback(const xmlChar * s,int len)126     void appendCharactersCallback(const xmlChar* s, int len)
127     {
128         PendingCharactersCallback* callback = new PendingCharactersCallback;
129 
130         callback->s = xmlStrndup(s, len);
131         callback->len = len;
132 
133         m_callbacks.append(callback);
134     }
135 
appendProcessingInstructionCallback(const xmlChar * target,const xmlChar * data)136     void appendProcessingInstructionCallback(const xmlChar* target, const xmlChar* data)
137     {
138         PendingProcessingInstructionCallback* callback = new PendingProcessingInstructionCallback;
139 
140         callback->target = xmlStrdup(target);
141         callback->data = xmlStrdup(data);
142 
143         m_callbacks.append(callback);
144     }
145 
appendCDATABlockCallback(const xmlChar * s,int len)146     void appendCDATABlockCallback(const xmlChar* s, int len)
147     {
148         PendingCDATABlockCallback* callback = new PendingCDATABlockCallback;
149 
150         callback->s = xmlStrndup(s, len);
151         callback->len = len;
152 
153         m_callbacks.append(callback);
154     }
155 
appendCommentCallback(const xmlChar * s)156     void appendCommentCallback(const xmlChar* s)
157     {
158         PendingCommentCallback* callback = new PendingCommentCallback;
159 
160         callback->s = xmlStrdup(s);
161 
162         m_callbacks.append(callback);
163     }
164 
appendInternalSubsetCallback(const xmlChar * name,const xmlChar * externalID,const xmlChar * systemID)165     void appendInternalSubsetCallback(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
166     {
167         PendingInternalSubsetCallback* callback = new PendingInternalSubsetCallback;
168 
169         callback->name = xmlStrdup(name);
170         callback->externalID = xmlStrdup(externalID);
171         callback->systemID = xmlStrdup(systemID);
172 
173         m_callbacks.append(callback);
174     }
175 
appendErrorCallback(XMLDocumentParser::ErrorType type,const xmlChar * message,int lineNumber,int columnNumber)176     void appendErrorCallback(XMLDocumentParser::ErrorType type, const xmlChar* message, int lineNumber, int columnNumber)
177     {
178         PendingErrorCallback* callback = new PendingErrorCallback;
179 
180         callback->message = xmlStrdup(message);
181         callback->type = type;
182         callback->lineNumber = lineNumber;
183         callback->columnNumber = columnNumber;
184 
185         m_callbacks.append(callback);
186     }
187 
callAndRemoveFirstCallback(XMLDocumentParser * parser)188     void callAndRemoveFirstCallback(XMLDocumentParser* parser)
189     {
190         OwnPtr<PendingCallback> callback(m_callbacks.takeFirst());
191         callback->call(parser);
192     }
193 
isEmpty() const194     bool isEmpty() const { return m_callbacks.isEmpty(); }
195 
196 private:
197     struct PendingCallback {
~PendingCallbackWebCore::PendingCallbacks::PendingCallback198         virtual ~PendingCallback() { }
199         virtual void call(XMLDocumentParser* parser) = 0;
200     };
201 
202     struct PendingStartElementNSCallback : public PendingCallback {
~PendingStartElementNSCallbackWebCore::PendingCallbacks::PendingStartElementNSCallback203         virtual ~PendingStartElementNSCallback()
204         {
205             xmlFree(xmlLocalName);
206             xmlFree(xmlPrefix);
207             xmlFree(xmlURI);
208             for (int i = 0; i < nb_namespaces * 2; i++)
209                 xmlFree(namespaces[i]);
210             xmlFree(namespaces);
211             for (int i = 0; i < nb_attributes; i++)
212                 for (int j = 0; j < 4; j++)
213                     xmlFree(attributes[i * 5 + j]);
214             xmlFree(attributes);
215         }
216 
callWebCore::PendingCallbacks::PendingStartElementNSCallback217         virtual void call(XMLDocumentParser* parser)
218         {
219             parser->startElementNs(xmlLocalName, xmlPrefix, xmlURI,
220                                       nb_namespaces, const_cast<const xmlChar**>(namespaces),
221                                       nb_attributes, nb_defaulted, const_cast<const xmlChar**>(attributes));
222         }
223 
224         xmlChar* xmlLocalName;
225         xmlChar* xmlPrefix;
226         xmlChar* xmlURI;
227         int nb_namespaces;
228         xmlChar** namespaces;
229         int nb_attributes;
230         int nb_defaulted;
231         xmlChar** attributes;
232     };
233 
234     struct PendingEndElementNSCallback : public PendingCallback {
callWebCore::PendingCallbacks::PendingEndElementNSCallback235         virtual void call(XMLDocumentParser* parser)
236         {
237             parser->endElementNs();
238         }
239     };
240 
241     struct PendingCharactersCallback : public PendingCallback {
~PendingCharactersCallbackWebCore::PendingCallbacks::PendingCharactersCallback242         virtual ~PendingCharactersCallback()
243         {
244             xmlFree(s);
245         }
246 
callWebCore::PendingCallbacks::PendingCharactersCallback247         virtual void call(XMLDocumentParser* parser)
248         {
249             parser->characters(s, len);
250         }
251 
252         xmlChar* s;
253         int len;
254     };
255 
256     struct PendingProcessingInstructionCallback : public PendingCallback {
~PendingProcessingInstructionCallbackWebCore::PendingCallbacks::PendingProcessingInstructionCallback257         virtual ~PendingProcessingInstructionCallback()
258         {
259             xmlFree(target);
260             xmlFree(data);
261         }
262 
callWebCore::PendingCallbacks::PendingProcessingInstructionCallback263         virtual void call(XMLDocumentParser* parser)
264         {
265             parser->processingInstruction(target, data);
266         }
267 
268         xmlChar* target;
269         xmlChar* data;
270     };
271 
272     struct PendingCDATABlockCallback : public PendingCallback {
~PendingCDATABlockCallbackWebCore::PendingCallbacks::PendingCDATABlockCallback273         virtual ~PendingCDATABlockCallback()
274         {
275             xmlFree(s);
276         }
277 
callWebCore::PendingCallbacks::PendingCDATABlockCallback278         virtual void call(XMLDocumentParser* parser)
279         {
280             parser->cdataBlock(s, len);
281         }
282 
283         xmlChar* s;
284         int len;
285     };
286 
287     struct PendingCommentCallback : public PendingCallback {
~PendingCommentCallbackWebCore::PendingCallbacks::PendingCommentCallback288         virtual ~PendingCommentCallback()
289         {
290             xmlFree(s);
291         }
292 
callWebCore::PendingCallbacks::PendingCommentCallback293         virtual void call(XMLDocumentParser* parser)
294         {
295             parser->comment(s);
296         }
297 
298         xmlChar* s;
299     };
300 
301     struct PendingInternalSubsetCallback : public PendingCallback {
~PendingInternalSubsetCallbackWebCore::PendingCallbacks::PendingInternalSubsetCallback302         virtual ~PendingInternalSubsetCallback()
303         {
304             xmlFree(name);
305             xmlFree(externalID);
306             xmlFree(systemID);
307         }
308 
callWebCore::PendingCallbacks::PendingInternalSubsetCallback309         virtual void call(XMLDocumentParser* parser)
310         {
311             parser->internalSubset(name, externalID, systemID);
312         }
313 
314         xmlChar* name;
315         xmlChar* externalID;
316         xmlChar* systemID;
317     };
318 
319     struct PendingErrorCallback: public PendingCallback {
~PendingErrorCallbackWebCore::PendingCallbacks::PendingErrorCallback320         virtual ~PendingErrorCallback()
321         {
322             xmlFree(message);
323         }
324 
callWebCore::PendingCallbacks::PendingErrorCallback325         virtual void call(XMLDocumentParser* parser)
326         {
327             parser->handleError(type, reinterpret_cast<char*>(message), lineNumber, columnNumber);
328         }
329 
330         XMLDocumentParser::ErrorType type;
331         xmlChar* message;
332         int lineNumber;
333         int columnNumber;
334     };
335 
336     Deque<PendingCallback*> m_callbacks;
337 };
338 // --------------------------------
339 
340 static int globalDescriptor = 0;
341 static ThreadIdentifier libxmlLoaderThread = 0;
342 
matchFunc(const char *)343 static int matchFunc(const char*)
344 {
345     // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid
346     // interfering with client applications that also use libxml2.  http://bugs.webkit.org/show_bug.cgi?id=17353
347     return XMLDocumentParserScope::currentCachedResourceLoader && currentThread() == libxmlLoaderThread;
348 }
349 
350 class OffsetBuffer {
351 public:
OffsetBuffer(const Vector<char> & b)352     OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { }
353 
readOutBytes(char * outputBuffer,unsigned askedToRead)354     int readOutBytes(char* outputBuffer, unsigned askedToRead)
355     {
356         unsigned bytesLeft = m_buffer.size() - m_currentOffset;
357         unsigned lenToCopy = min(askedToRead, bytesLeft);
358         if (lenToCopy) {
359             memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
360             m_currentOffset += lenToCopy;
361         }
362         return lenToCopy;
363     }
364 
365 private:
366     Vector<char> m_buffer;
367     unsigned m_currentOffset;
368 };
369 
switchToUTF16(xmlParserCtxtPtr ctxt)370 static void switchToUTF16(xmlParserCtxtPtr ctxt)
371 {
372     // Hack around libxml2's lack of encoding overide support by manually
373     // resetting the encoding to UTF-16 before every chunk.  Otherwise libxml
374     // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
375     // and switch encodings, causing the parse to fail.
376     const UChar BOM = 0xFEFF;
377     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
378     xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
379 }
380 
shouldAllowExternalLoad(const KURL & url)381 static bool shouldAllowExternalLoad(const KURL& url)
382 {
383     String urlString = url.string();
384 
385     // On non-Windows platforms libxml asks for this URL, the
386     // "XML_XML_DEFAULT_CATALOG", on initialization.
387     if (urlString == "file:///etc/xml/catalog")
388         return false;
389 
390     // On Windows, libxml computes a URL relative to where its DLL resides.
391     if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
392         return false;
393 
394     // The most common DTD.  There isn't much point in hammering www.w3c.org
395     // by requesting this URL for every XHTML document.
396     if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
397         return false;
398 
399     // Similarly, there isn't much point in requesting the SVG DTD.
400     if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
401         return false;
402 
403     // The libxml doesn't give us a lot of context for deciding whether to
404     // allow this request.  In the worst case, this load could be for an
405     // external entity and the resulting document could simply read the
406     // retrieved content.  If we had more context, we could potentially allow
407     // the parser to load a DTD.  As things stand, we take the conservative
408     // route and allow same-origin requests only.
409     if (!XMLDocumentParserScope::currentCachedResourceLoader->document()->securityOrigin()->canRequest(url)) {
410         XMLDocumentParserScope::currentCachedResourceLoader->printAccessDeniedMessage(url);
411         return false;
412     }
413 
414     return true;
415 }
416 
openFunc(const char * uri)417 static void* openFunc(const char* uri)
418 {
419     ASSERT(XMLDocumentParserScope::currentCachedResourceLoader);
420     ASSERT(currentThread() == libxmlLoaderThread);
421 
422     KURL url(KURL(), uri);
423 
424     if (!shouldAllowExternalLoad(url))
425         return &globalDescriptor;
426 
427     ResourceError error;
428     ResourceResponse response;
429     Vector<char> data;
430 
431 
432     {
433         CachedResourceLoader* cachedResourceLoader = XMLDocumentParserScope::currentCachedResourceLoader;
434         XMLDocumentParserScope scope(0);
435         // FIXME: We should restore the original global error handler as well.
436 
437         if (cachedResourceLoader->frame())
438             cachedResourceLoader->frame()->loader()->loadResourceSynchronously(url, AllowStoredCredentials, error, response, data);
439     }
440 
441     // We have to check the URL again after the load to catch redirects.
442     // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
443     if (!shouldAllowExternalLoad(response.url()))
444         return &globalDescriptor;
445 
446     return new OffsetBuffer(data);
447 }
448 
readFunc(void * context,char * buffer,int len)449 static int readFunc(void* context, char* buffer, int len)
450 {
451     // Do 0-byte reads in case of a null descriptor
452     if (context == &globalDescriptor)
453         return 0;
454 
455     OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
456     return data->readOutBytes(buffer, len);
457 }
458 
writeFunc(void *,const char *,int)459 static int writeFunc(void*, const char*, int)
460 {
461     // Always just do 0-byte writes
462     return 0;
463 }
464 
closeFunc(void * context)465 static int closeFunc(void* context)
466 {
467     if (context != &globalDescriptor) {
468         OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
469         delete data;
470     }
471     return 0;
472 }
473 
474 #if ENABLE(XSLT)
errorFunc(void *,const char *,...)475 static void errorFunc(void*, const char*, ...)
476 {
477     // FIXME: It would be nice to display error messages somewhere.
478 }
479 #endif
480 
481 static bool didInit = false;
482 
createStringParser(xmlSAXHandlerPtr handlers,void * userData)483 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
484 {
485     if (!didInit) {
486         xmlInitParser();
487         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
488         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
489         libxmlLoaderThread = currentThread();
490         didInit = true;
491     }
492 
493     xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
494     parser->_private = userData;
495     parser->replaceEntities = true;
496     switchToUTF16(parser);
497 
498     return adoptRef(new XMLParserContext(parser));
499 }
500 
501 
502 // Chunk should be encoded in UTF-8
createMemoryParser(xmlSAXHandlerPtr handlers,void * userData,const char * chunk)503 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const char* chunk)
504 {
505     if (!didInit) {
506         xmlInitParser();
507         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
508         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
509         libxmlLoaderThread = currentThread();
510         didInit = true;
511     }
512 
513     xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk, xmlStrlen((const xmlChar*)chunk));
514 
515     if (!parser)
516         return 0;
517 
518     // Copy the sax handler
519     memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
520 
521     // Set parser options.
522     // XML_PARSE_NODICT: default dictionary option.
523     // XML_PARSE_NOENT: force entities substitutions.
524     xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
525 
526     // Internal initialization
527     parser->sax2 = 1;
528     parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
529     parser->depth = 0;
530     parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
531     parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
532     parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
533     parser->_private = userData;
534 
535     return adoptRef(new XMLParserContext(parser));
536 }
537 
538 // --------------------------------
539 
supportsXMLVersion(const String & version)540 bool XMLDocumentParser::supportsXMLVersion(const String& version)
541 {
542     return version == "1.0";
543 }
544 
XMLDocumentParser(Document * document,FrameView * frameView)545 XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView)
546     : ScriptableDocumentParser(document)
547     , m_view(frameView)
548     , m_context(0)
549     , m_pendingCallbacks(new PendingCallbacks)
550     , m_currentNode(document)
551     , m_sawError(false)
552     , m_sawCSS(false)
553     , m_sawXSLTransform(false)
554     , m_sawFirstElement(false)
555     , m_isXHTMLDocument(false)
556 #if ENABLE(XHTMLMP)
557     , m_isXHTMLMPDocument(false)
558     , m_hasDocTypeDeclaration(false)
559 #endif
560     , m_parserPaused(false)
561     , m_requestingScript(false)
562     , m_finishCalled(false)
563     , m_errorCount(0)
564     , m_lastErrorPosition(TextPosition1::belowRangePosition())
565     , m_pendingScript(0)
566     , m_scriptStartPosition(TextPosition1::belowRangePosition())
567     , m_parsingFragment(false)
568     , m_scriptingPermission(FragmentScriptingAllowed)
569 {
570 }
571 
XMLDocumentParser(DocumentFragment * fragment,Element * parentElement,FragmentScriptingPermission scriptingPermission)572 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, FragmentScriptingPermission scriptingPermission)
573     : ScriptableDocumentParser(fragment->document())
574     , m_view(0)
575     , m_context(0)
576     , m_pendingCallbacks(new PendingCallbacks)
577     , m_currentNode(fragment)
578     , m_sawError(false)
579     , m_sawCSS(false)
580     , m_sawXSLTransform(false)
581     , m_sawFirstElement(false)
582     , m_isXHTMLDocument(false)
583 #if ENABLE(XHTMLMP)
584     , m_isXHTMLMPDocument(false)
585     , m_hasDocTypeDeclaration(false)
586 #endif
587     , m_parserPaused(false)
588     , m_requestingScript(false)
589     , m_finishCalled(false)
590     , m_errorCount(0)
591     , m_lastErrorPosition(TextPosition1::belowRangePosition())
592     , m_pendingScript(0)
593     , m_scriptStartPosition(TextPosition1::belowRangePosition())
594     , m_parsingFragment(true)
595     , m_scriptingPermission(scriptingPermission)
596 {
597     fragment->ref();
598 
599     // Add namespaces based on the parent node
600     Vector<Element*> elemStack;
601     while (parentElement) {
602         elemStack.append(parentElement);
603 
604         ContainerNode* n = parentElement->parentNode();
605         if (!n || !n->isElementNode())
606             break;
607         parentElement = static_cast<Element*>(n);
608     }
609 
610     if (elemStack.isEmpty())
611         return;
612 
613     for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) {
614         if (NamedNodeMap* attrs = element->attributes()) {
615             for (unsigned i = 0; i < attrs->length(); i++) {
616                 Attribute* attr = attrs->attributeItem(i);
617                 if (attr->localName() == xmlnsAtom)
618                     m_defaultNamespaceURI = attr->value();
619                 else if (attr->prefix() == xmlnsAtom)
620                     m_prefixToNamespaceMap.set(attr->localName(), attr->value());
621             }
622         }
623     }
624 
625     // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
626     if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
627         m_defaultNamespaceURI = parentElement->namespaceURI();
628 }
629 
~XMLParserContext()630 XMLParserContext::~XMLParserContext()
631 {
632     if (m_context->myDoc)
633         xmlFreeDoc(m_context->myDoc);
634     xmlFreeParserCtxt(m_context);
635 }
636 
~XMLDocumentParser()637 XMLDocumentParser::~XMLDocumentParser()
638 {
639     // The XMLDocumentParser will always be detached before being destroyed.
640     ASSERT(m_currentNodeStack.isEmpty());
641     ASSERT(!m_currentNode);
642 
643     // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
644     if (m_pendingScript)
645         m_pendingScript->removeClient(this);
646 }
647 
doWrite(const String & parseString)648 void XMLDocumentParser::doWrite(const String& parseString)
649 {
650     ASSERT(!isDetached());
651     if (!m_context)
652         initializeParserContext();
653 
654     // Protect the libxml context from deletion during a callback
655     RefPtr<XMLParserContext> context = m_context;
656 
657     // libXML throws an error if you try to switch the encoding for an empty string.
658     if (parseString.length()) {
659         // JavaScript may cause the parser to detach during xmlParseChunk
660         // keep this alive until this function is done.
661         RefPtr<XMLDocumentParser> protect(this);
662 
663         switchToUTF16(context->context());
664         XMLDocumentParserScope scope(document()->cachedResourceLoader());
665         xmlParseChunk(context->context(), reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0);
666 
667         // JavaScript (which may be run under the xmlParseChunk callstack) may
668         // cause the parser to be stopped or detached.
669         if (isStopped())
670             return;
671     }
672 
673     // FIXME: Why is this here?  And why is it after we process the passed source?
674     if (document()->decoder() && document()->decoder()->sawError()) {
675         // If the decoder saw an error, report it as fatal (stops parsing)
676         handleError(fatal, "Encoding error", context->context()->input->line, context->context()->input->col);
677     }
678 }
679 
toString(const xmlChar * string,size_t size)680 static inline String toString(const xmlChar* string, size_t size)
681 {
682     return String::fromUTF8(reinterpret_cast<const char*>(string), size);
683 }
684 
toString(const xmlChar * string)685 static inline String toString(const xmlChar* string)
686 {
687     return String::fromUTF8(reinterpret_cast<const char*>(string));
688 }
689 
toAtomicString(const xmlChar * string,size_t size)690 static inline AtomicString toAtomicString(const xmlChar* string, size_t size)
691 {
692     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), size);
693 }
694 
toAtomicString(const xmlChar * string)695 static inline AtomicString toAtomicString(const xmlChar* string)
696 {
697     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
698 }
699 
700 struct _xmlSAX2Namespace {
701     const xmlChar* prefix;
702     const xmlChar* uri;
703 };
704 typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
705 
handleElementNamespaces(Element * newElement,const xmlChar ** libxmlNamespaces,int nb_namespaces,ExceptionCode & ec,FragmentScriptingPermission scriptingPermission)706 static inline void handleElementNamespaces(Element* newElement, const xmlChar** libxmlNamespaces, int nb_namespaces, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission)
707 {
708     xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
709     for (int i = 0; i < nb_namespaces; i++) {
710         AtomicString namespaceQName = xmlnsAtom;
711         AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
712         if (namespaces[i].prefix)
713             namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
714         newElement->setAttributeNS(XMLNSNames::xmlnsNamespaceURI, namespaceQName, namespaceURI, ec, scriptingPermission);
715         if (ec) // exception setting attributes
716             return;
717     }
718 }
719 
720 struct _xmlSAX2Attributes {
721     const xmlChar* localname;
722     const xmlChar* prefix;
723     const xmlChar* uri;
724     const xmlChar* value;
725     const xmlChar* end;
726 };
727 typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
728 
handleElementAttributes(Element * newElement,const xmlChar ** libxmlAttributes,int nb_attributes,ExceptionCode & ec,FragmentScriptingPermission scriptingPermission)729 static inline void handleElementAttributes(Element* newElement, const xmlChar** libxmlAttributes, int nb_attributes, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission)
730 {
731     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
732     for (int i = 0; i < nb_attributes; i++) {
733         int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
734         AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
735         String attrPrefix = toString(attributes[i].prefix);
736         AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
737         AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : AtomicString(attrPrefix + ":" + toString(attributes[i].localname));
738 
739         newElement->setAttributeNS(attrURI, attrQName, attrValue, ec, scriptingPermission);
740         if (ec) // exception setting attributes
741             return;
742     }
743 }
744 
startElementNs(const xmlChar * xmlLocalName,const xmlChar * xmlPrefix,const xmlChar * xmlURI,int nb_namespaces,const xmlChar ** libxmlNamespaces,int nb_attributes,int nb_defaulted,const xmlChar ** libxmlAttributes)745 void XMLDocumentParser::startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
746                                   const xmlChar** libxmlNamespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
747 {
748     if (isStopped())
749         return;
750 
751     if (m_parserPaused) {
752         m_pendingCallbacks->appendStartElementNSCallback(xmlLocalName, xmlPrefix, xmlURI, nb_namespaces, libxmlNamespaces,
753                                                          nb_attributes, nb_defaulted, libxmlAttributes);
754         return;
755     }
756 
757 #if ENABLE(XHTMLMP)
758     // check if the DOCTYPE Declaration of XHTMLMP document exists
759     if (!m_hasDocTypeDeclaration && document()->isXHTMLMPDocument()) {
760         handleError(fatal, "DOCTYPE declaration lost.", lineNumber(), columnNumber());
761         return;
762     }
763 #endif
764 
765     exitText();
766 
767     AtomicString localName = toAtomicString(xmlLocalName);
768     AtomicString uri = toAtomicString(xmlURI);
769     AtomicString prefix = toAtomicString(xmlPrefix);
770 
771     if (m_parsingFragment && uri.isNull()) {
772         if (!prefix.isNull())
773             uri = m_prefixToNamespaceMap.get(prefix);
774         else
775             uri = m_defaultNamespaceURI;
776     }
777 
778 #if ENABLE(XHTMLMP)
779     if (!m_sawFirstElement && isXHTMLMPDocument()) {
780         // As per the section 7.1 of OMA-WAP-XHTMLMP-V1_1-20061020-A.pdf,
781         // we should make sure that the root element MUST be 'html' and
782         // ensure the name of the default namespace on the root elment 'html'
783         // MUST be 'http://www.w3.org/1999/xhtml'
784         if (localName != HTMLNames::htmlTag.localName()) {
785             handleError(fatal, "XHTMLMP document expects 'html' as root element.", lineNumber(), columnNumber());
786             return;
787         }
788 
789         if (uri.isNull()) {
790             m_defaultNamespaceURI = HTMLNames::xhtmlNamespaceURI;
791             uri = m_defaultNamespaceURI;
792         }
793     }
794 #endif
795 
796     bool isFirstElement = !m_sawFirstElement;
797     m_sawFirstElement = true;
798 
799     QualifiedName qName(prefix, localName, uri);
800     RefPtr<Element> newElement = document()->createElement(qName, true);
801     if (!newElement) {
802         stopParsing();
803         return;
804     }
805 
806     ExceptionCode ec = 0;
807     handleElementNamespaces(newElement.get(), libxmlNamespaces, nb_namespaces, ec, m_scriptingPermission);
808     if (ec) {
809         stopParsing();
810         return;
811     }
812 
813     handleElementAttributes(newElement.get(), libxmlAttributes, nb_attributes, ec, m_scriptingPermission);
814     if (ec) {
815         stopParsing();
816         return;
817     }
818 
819     newElement->beginParsingChildren();
820 
821     ScriptElement* scriptElement = toScriptElement(newElement.get());
822     if (scriptElement)
823         m_scriptStartPosition = textPositionOneBased();
824 
825     m_currentNode->deprecatedParserAddChild(newElement.get());
826 
827     pushCurrentNode(newElement.get());
828     if (m_view && !newElement->attached())
829         newElement->attach();
830 
831 #if ENABLE(OFFLINE_WEB_APPLICATIONS)
832     if (newElement->hasTagName(HTMLNames::htmlTag))
833         static_cast<HTMLHtmlElement*>(newElement.get())->insertedByParser();
834 #endif
835 
836     if (!m_parsingFragment && isFirstElement && document()->frame())
837         document()->frame()->loader()->dispatchDocumentElementAvailable();
838 }
839 
endElementNs()840 void XMLDocumentParser::endElementNs()
841 {
842     if (isStopped())
843         return;
844 
845     if (m_parserPaused) {
846         m_pendingCallbacks->appendEndElementNSCallback();
847         return;
848     }
849 
850     // JavaScript can detach the parser.  Make sure this is not released
851     // before the end of this method.
852     RefPtr<XMLDocumentParser> protect(this);
853 
854     exitText();
855 
856     RefPtr<Node> n = m_currentNode;
857     n->finishParsingChildren();
858 
859     if (m_scriptingPermission == FragmentScriptingNotAllowed && n->isElementNode() && toScriptElement(static_cast<Element*>(n.get()))) {
860         popCurrentNode();
861         ExceptionCode ec;
862         n->remove(ec);
863         return;
864     }
865 
866     if (!n->isElementNode() || !m_view) {
867         popCurrentNode();
868         return;
869     }
870 
871     Element* element = static_cast<Element*>(n.get());
872 
873     // The element's parent may have already been removed from document.
874     // Parsing continues in this case, but scripts aren't executed.
875     if (!element->inDocument()) {
876         popCurrentNode();
877         return;
878     }
879 
880     ScriptElement* scriptElement = toScriptElement(element);
881     if (!scriptElement) {
882         popCurrentNode();
883         return;
884     }
885 
886     // Don't load external scripts for standalone documents (for now).
887     ASSERT(!m_pendingScript);
888     m_requestingScript = true;
889 
890     bool successfullyPrepared = scriptElement->prepareScript(m_scriptStartPosition, ScriptElement::AllowLegacyTypeInTypeAttribute);
891     if (!successfullyPrepared) {
892 #if ENABLE(XHTMLMP)
893         if (!scriptElement->isScriptTypeSupported(ScriptElement::AllowLegacyTypeInTypeAttribute))
894             document()->setShouldProcessNoscriptElement(true);
895 #endif
896     } else {
897         // FIXME: Script execution should be shared between
898         // the libxml2 and Qt XMLDocumentParser implementations.
899 
900         if (scriptElement->readyToBeParserExecuted())
901             scriptElement->executeScript(ScriptSourceCode(scriptElement->scriptContent(), document()->url(), m_scriptStartPosition));
902         else if (scriptElement->willBeParserExecuted()) {
903             m_pendingScript = scriptElement->cachedScript();
904             m_scriptElement = element;
905             m_pendingScript->addClient(this);
906 
907             // m_pendingScript will be 0 if script was already loaded and addClient() executed it.
908             if (m_pendingScript)
909                 pauseParsing();
910         } else
911             m_scriptElement = 0;
912 
913         // JavaScript may have detached the parser
914         if (isDetached())
915             return;
916     }
917     m_requestingScript = false;
918     popCurrentNode();
919 }
920 
characters(const xmlChar * s,int len)921 void XMLDocumentParser::characters(const xmlChar* s, int len)
922 {
923     if (isStopped())
924         return;
925 
926     if (m_parserPaused) {
927         m_pendingCallbacks->appendCharactersCallback(s, len);
928         return;
929     }
930 
931     if (!m_currentNode->isTextNode())
932         enterText();
933     m_bufferedText.append(s, len);
934 }
935 
error(ErrorType type,const char * message,va_list args)936 void XMLDocumentParser::error(ErrorType type, const char* message, va_list args)
937 {
938     if (isStopped())
939         return;
940 
941 #if COMPILER(MSVC) || COMPILER(RVCT)
942     char m[1024];
943     vsnprintf(m, sizeof(m) - 1, message, args);
944 #else
945     char* m;
946     if (vasprintf(&m, message, args) == -1)
947         return;
948 #endif
949 
950     if (m_parserPaused)
951         m_pendingCallbacks->appendErrorCallback(type, reinterpret_cast<const xmlChar*>(m), lineNumber(), columnNumber());
952     else
953         handleError(type, m, lineNumber(), columnNumber());
954 
955 #if !COMPILER(MSVC) && !COMPILER(RVCT)
956     free(m);
957 #endif
958 }
959 
processingInstruction(const xmlChar * target,const xmlChar * data)960 void XMLDocumentParser::processingInstruction(const xmlChar* target, const xmlChar* data)
961 {
962     if (isStopped())
963         return;
964 
965     if (m_parserPaused) {
966         m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
967         return;
968     }
969 
970     exitText();
971 
972     // ### handle exceptions
973     ExceptionCode ec = 0;
974     RefPtr<ProcessingInstruction> pi = document()->createProcessingInstruction(
975         toString(target), toString(data), ec);
976     if (ec)
977         return;
978 
979     pi->setCreatedByParser(true);
980 
981     m_currentNode->deprecatedParserAddChild(pi.get());
982     if (m_view && !pi->attached())
983         pi->attach();
984 
985     pi->finishParsingChildren();
986 
987     if (pi->isCSS())
988         m_sawCSS = true;
989 #if ENABLE(XSLT)
990     m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
991     if (m_sawXSLTransform && !document()->transformSourceDocument())
992         stopParsing();
993 #endif
994 }
995 
cdataBlock(const xmlChar * s,int len)996 void XMLDocumentParser::cdataBlock(const xmlChar* s, int len)
997 {
998     if (isStopped())
999         return;
1000 
1001     if (m_parserPaused) {
1002         m_pendingCallbacks->appendCDATABlockCallback(s, len);
1003         return;
1004     }
1005 
1006     exitText();
1007 
1008     RefPtr<Node> newNode = CDATASection::create(document(), toString(s, len));
1009     m_currentNode->deprecatedParserAddChild(newNode.get());
1010     if (m_view && !newNode->attached())
1011         newNode->attach();
1012 }
1013 
comment(const xmlChar * s)1014 void XMLDocumentParser::comment(const xmlChar* s)
1015 {
1016     if (isStopped())
1017         return;
1018 
1019     if (m_parserPaused) {
1020         m_pendingCallbacks->appendCommentCallback(s);
1021         return;
1022     }
1023 
1024     exitText();
1025 
1026     RefPtr<Node> newNode = Comment::create(document(), toString(s));
1027     m_currentNode->deprecatedParserAddChild(newNode.get());
1028     if (m_view && !newNode->attached())
1029         newNode->attach();
1030 }
1031 
startDocument(const xmlChar * version,const xmlChar * encoding,int standalone)1032 void XMLDocumentParser::startDocument(const xmlChar* version, const xmlChar* encoding, int standalone)
1033 {
1034     ExceptionCode ec = 0;
1035 
1036     if (version)
1037         document()->setXMLVersion(toString(version), ec);
1038     document()->setXMLStandalone(standalone == 1, ec); // possible values are 0, 1, and -1
1039     if (encoding)
1040         document()->setXMLEncoding(toString(encoding));
1041 }
1042 
endDocument()1043 void XMLDocumentParser::endDocument()
1044 {
1045     exitText();
1046 #if ENABLE(XHTMLMP)
1047     m_hasDocTypeDeclaration = false;
1048 #endif
1049 }
1050 
internalSubset(const xmlChar * name,const xmlChar * externalID,const xmlChar * systemID)1051 void XMLDocumentParser::internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1052 {
1053     if (isStopped())
1054         return;
1055 
1056     if (m_parserPaused) {
1057         m_pendingCallbacks->appendInternalSubsetCallback(name, externalID, systemID);
1058         return;
1059     }
1060 
1061     if (document()) {
1062 #if ENABLE(WML) || ENABLE(XHTMLMP)
1063         String extId = toString(externalID);
1064 #endif
1065 #if ENABLE(WML)
1066         if (isWMLDocument()
1067             && extId != "-//WAPFORUM//DTD WML 1.3//EN"
1068             && extId != "-//WAPFORUM//DTD WML 1.2//EN"
1069             && extId != "-//WAPFORUM//DTD WML 1.1//EN"
1070             && extId != "-//WAPFORUM//DTD WML 1.0//EN")
1071             handleError(fatal, "Invalid DTD Public ID", lineNumber(), columnNumber());
1072 #endif
1073 #if ENABLE(XHTMLMP)
1074         String dtdName = toString(name);
1075         if (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
1076             || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN") {
1077             if (dtdName != HTMLNames::htmlTag.localName()) {
1078                 handleError(fatal, "Invalid DOCTYPE declaration, expected 'html' as root element.", lineNumber(), columnNumber());
1079                 return;
1080             }
1081 
1082             if (document()->isXHTMLMPDocument())
1083                 setIsXHTMLMPDocument(true);
1084             else
1085                 setIsXHTMLDocument(true);
1086 
1087             m_hasDocTypeDeclaration = true;
1088         }
1089 #endif
1090 
1091         document()->parserAddChild(DocumentType::create(document(), toString(name), toString(externalID), toString(systemID)));
1092     }
1093 }
1094 
getParser(void * closure)1095 static inline XMLDocumentParser* getParser(void* closure)
1096 {
1097     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1098     return static_cast<XMLDocumentParser*>(ctxt->_private);
1099 }
1100 
1101 // This is a hack around http://bugzilla.gnome.org/show_bug.cgi?id=159219
1102 // Otherwise libxml seems to call all the SAX callbacks twice for any replaced entity.
hackAroundLibXMLEntityBug(void * closure)1103 static inline bool hackAroundLibXMLEntityBug(void* closure)
1104 {
1105 #if LIBXML_VERSION >= 20627
1106     UNUSED_PARAM(closure);
1107 
1108     // This bug has been fixed in libxml 2.6.27.
1109     return false;
1110 #else
1111     return static_cast<xmlParserCtxtPtr>(closure)->node;
1112 #endif
1113 }
1114 
startElementNsHandler(void * closure,const xmlChar * localname,const xmlChar * prefix,const xmlChar * uri,int nb_namespaces,const xmlChar ** namespaces,int nb_attributes,int nb_defaulted,const xmlChar ** libxmlAttributes)1115 static void startElementNsHandler(void* closure, const xmlChar* localname, const xmlChar* prefix, const xmlChar* uri, int nb_namespaces, const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
1116 {
1117     if (hackAroundLibXMLEntityBug(closure))
1118         return;
1119 
1120     getParser(closure)->startElementNs(localname, prefix, uri, nb_namespaces, namespaces, nb_attributes, nb_defaulted, libxmlAttributes);
1121 }
1122 
endElementNsHandler(void * closure,const xmlChar *,const xmlChar *,const xmlChar *)1123 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
1124 {
1125     if (hackAroundLibXMLEntityBug(closure))
1126         return;
1127 
1128     getParser(closure)->endElementNs();
1129 }
1130 
charactersHandler(void * closure,const xmlChar * s,int len)1131 static void charactersHandler(void* closure, const xmlChar* s, int len)
1132 {
1133     if (hackAroundLibXMLEntityBug(closure))
1134         return;
1135 
1136     getParser(closure)->characters(s, len);
1137 }
1138 
processingInstructionHandler(void * closure,const xmlChar * target,const xmlChar * data)1139 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
1140 {
1141     if (hackAroundLibXMLEntityBug(closure))
1142         return;
1143 
1144     getParser(closure)->processingInstruction(target, data);
1145 }
1146 
cdataBlockHandler(void * closure,const xmlChar * s,int len)1147 static void cdataBlockHandler(void* closure, const xmlChar* s, int len)
1148 {
1149     if (hackAroundLibXMLEntityBug(closure))
1150         return;
1151 
1152     getParser(closure)->cdataBlock(s, len);
1153 }
1154 
commentHandler(void * closure,const xmlChar * comment)1155 static void commentHandler(void* closure, const xmlChar* comment)
1156 {
1157     if (hackAroundLibXMLEntityBug(closure))
1158         return;
1159 
1160     getParser(closure)->comment(comment);
1161 }
1162 
1163 WTF_ATTRIBUTE_PRINTF(2, 3)
warningHandler(void * closure,const char * message,...)1164 static void warningHandler(void* closure, const char* message, ...)
1165 {
1166     va_list args;
1167     va_start(args, message);
1168     getParser(closure)->error(XMLDocumentParser::warning, message, args);
1169     va_end(args);
1170 }
1171 
1172 WTF_ATTRIBUTE_PRINTF(2, 3)
fatalErrorHandler(void * closure,const char * message,...)1173 static void fatalErrorHandler(void* closure, const char* message, ...)
1174 {
1175     va_list args;
1176     va_start(args, message);
1177     getParser(closure)->error(XMLDocumentParser::fatal, message, args);
1178     va_end(args);
1179 }
1180 
1181 WTF_ATTRIBUTE_PRINTF(2, 3)
normalErrorHandler(void * closure,const char * message,...)1182 static void normalErrorHandler(void* closure, const char* message, ...)
1183 {
1184     va_list args;
1185     va_start(args, message);
1186     getParser(closure)->error(XMLDocumentParser::nonFatal, message, args);
1187     va_end(args);
1188 }
1189 
1190 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
1191 // a hack to avoid malloc/free. Using a global variable like this could cause trouble
1192 // if libxml implementation details were to change
1193 static xmlChar sharedXHTMLEntityResult[5] = {0, 0, 0, 0, 0};
1194 
sharedXHTMLEntity()1195 static xmlEntityPtr sharedXHTMLEntity()
1196 {
1197     static xmlEntity entity;
1198     if (!entity.type) {
1199         entity.type = XML_ENTITY_DECL;
1200         entity.orig = sharedXHTMLEntityResult;
1201         entity.content = sharedXHTMLEntityResult;
1202         entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
1203     }
1204     return &entity;
1205 }
1206 
getXHTMLEntity(const xmlChar * name)1207 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
1208 {
1209     UChar c = decodeNamedEntity(reinterpret_cast<const char*>(name));
1210     if (!c)
1211         return 0;
1212 
1213     CString value = String(&c, 1).utf8();
1214     ASSERT(value.length() < 5);
1215     xmlEntityPtr entity = sharedXHTMLEntity();
1216     entity->length = value.length();
1217     entity->name = name;
1218     memcpy(sharedXHTMLEntityResult, value.data(), entity->length + 1);
1219 
1220     return entity;
1221 }
1222 
getEntityHandler(void * closure,const xmlChar * name)1223 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
1224 {
1225     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1226     xmlEntityPtr ent = xmlGetPredefinedEntity(name);
1227     if (ent) {
1228         ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
1229         return ent;
1230     }
1231 
1232     ent = xmlGetDocEntity(ctxt->myDoc, name);
1233     if (!ent && (getParser(closure)->isXHTMLDocument()
1234 #if ENABLE(XHTMLMP)
1235                  || getParser(closure)->isXHTMLMPDocument()
1236 #endif
1237 #if ENABLE(WML)
1238                  || getParser(closure)->isWMLDocument()
1239 #endif
1240        )) {
1241         ent = getXHTMLEntity(name);
1242         if (ent)
1243             ent->etype = XML_INTERNAL_GENERAL_ENTITY;
1244     }
1245 
1246     return ent;
1247 }
1248 
startDocumentHandler(void * closure)1249 static void startDocumentHandler(void* closure)
1250 {
1251     xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
1252     switchToUTF16(ctxt);
1253     getParser(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone);
1254     xmlSAX2StartDocument(closure);
1255 }
1256 
endDocumentHandler(void * closure)1257 static void endDocumentHandler(void* closure)
1258 {
1259     getParser(closure)->endDocument();
1260     xmlSAX2EndDocument(closure);
1261 }
1262 
internalSubsetHandler(void * closure,const xmlChar * name,const xmlChar * externalID,const xmlChar * systemID)1263 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1264 {
1265     getParser(closure)->internalSubset(name, externalID, systemID);
1266     xmlSAX2InternalSubset(closure, name, externalID, systemID);
1267 }
1268 
externalSubsetHandler(void * closure,const xmlChar *,const xmlChar * externalId,const xmlChar *)1269 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
1270 {
1271     String extId = toString(externalId);
1272     if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
1273         || (extId == "-//W3C//DTD XHTML 1.1//EN")
1274         || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
1275         || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN")
1276         || (extId == "-//W3C//DTD XHTML Basic 1.0//EN")
1277         || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")
1278         || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")
1279         || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")
1280        )
1281         getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
1282 }
1283 
ignorableWhitespaceHandler(void *,const xmlChar *,int)1284 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
1285 {
1286     // nothing to do, but we need this to work around a crasher
1287     // http://bugzilla.gnome.org/show_bug.cgi?id=172255
1288     // http://bugs.webkit.org/show_bug.cgi?id=5792
1289 }
1290 
initializeParserContext(const char * chunk)1291 void XMLDocumentParser::initializeParserContext(const char* chunk)
1292 {
1293     xmlSAXHandler sax;
1294     memset(&sax, 0, sizeof(sax));
1295 
1296     sax.error = normalErrorHandler;
1297     sax.fatalError = fatalErrorHandler;
1298     sax.characters = charactersHandler;
1299     sax.processingInstruction = processingInstructionHandler;
1300     sax.cdataBlock = cdataBlockHandler;
1301     sax.comment = commentHandler;
1302     sax.warning = warningHandler;
1303     sax.startElementNs = startElementNsHandler;
1304     sax.endElementNs = endElementNsHandler;
1305     sax.getEntity = getEntityHandler;
1306     sax.startDocument = startDocumentHandler;
1307     sax.endDocument = endDocumentHandler;
1308     sax.internalSubset = internalSubsetHandler;
1309     sax.externalSubset = externalSubsetHandler;
1310     sax.ignorableWhitespace = ignorableWhitespaceHandler;
1311     sax.entityDecl = xmlSAX2EntityDecl;
1312     sax.initialized = XML_SAX2_MAGIC;
1313     DocumentParser::startParsing();
1314     m_sawError = false;
1315     m_sawCSS = false;
1316     m_sawXSLTransform = false;
1317     m_sawFirstElement = false;
1318 
1319     XMLDocumentParserScope scope(document()->cachedResourceLoader());
1320     if (m_parsingFragment)
1321         m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
1322     else {
1323         ASSERT(!chunk);
1324         m_context = XMLParserContext::createStringParser(&sax, this);
1325     }
1326 }
1327 
doEnd()1328 void XMLDocumentParser::doEnd()
1329 {
1330     if (!isStopped()) {
1331         if (m_context) {
1332             // Tell libxml we're done.
1333             {
1334                 XMLDocumentParserScope scope(document()->cachedResourceLoader());
1335                 xmlParseChunk(context(), 0, 0, 1);
1336             }
1337 
1338             m_context = 0;
1339         }
1340     }
1341 
1342 #if ENABLE(XSLT)
1343     XMLTreeViewer xmlTreeViewer(document());
1344     bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && xmlTreeViewer.hasNoStyleInformation();
1345     if (xmlViewerMode)
1346         xmlTreeViewer.transformDocumentToTreeView();
1347 
1348     if (m_sawXSLTransform) {
1349         void* doc = xmlDocPtrForString(document()->cachedResourceLoader(), m_originalSourceForTransform, document()->url().string());
1350         document()->setTransformSource(new TransformSource(doc));
1351 
1352         document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets.
1353         document()->styleSelectorChanged(RecalcStyleImmediately);
1354         document()->setParsing(true);
1355 
1356         DocumentParser::stopParsing();
1357     }
1358 #endif
1359 }
1360 
1361 #if ENABLE(XSLT)
xmlDocPtrForString(CachedResourceLoader * cachedResourceLoader,const String & source,const String & url)1362 void* xmlDocPtrForString(CachedResourceLoader* cachedResourceLoader, const String& source, const String& url)
1363 {
1364     if (source.isEmpty())
1365         return 0;
1366 
1367     // Parse in a single chunk into an xmlDocPtr
1368     // FIXME: Hook up error handlers so that a failure to parse the main document results in
1369     // good error messages.
1370     const UChar BOM = 0xFEFF;
1371     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
1372 
1373     XMLDocumentParserScope scope(cachedResourceLoader, errorFunc, 0);
1374     xmlDocPtr sourceDoc = xmlReadMemory(reinterpret_cast<const char*>(source.characters()),
1375                                         source.length() * sizeof(UChar),
1376                                         url.latin1().data(),
1377                                         BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE",
1378                                         XSLT_PARSE_OPTIONS);
1379     return sourceDoc;
1380 }
1381 #endif
1382 
lineNumber() const1383 int XMLDocumentParser::lineNumber() const
1384 {
1385     // FIXME: The implementation probably returns 1-based int, but method should return 0-based.
1386     return context() ? context()->input->line : 1;
1387 }
1388 
columnNumber() const1389 int XMLDocumentParser::columnNumber() const
1390 {
1391     // FIXME: The implementation probably returns 1-based int, but method should return 0-based.
1392     return context() ? context()->input->col : 1;
1393 }
1394 
textPosition() const1395 TextPosition0 XMLDocumentParser::textPosition() const
1396 {
1397     xmlParserCtxtPtr context = this->context();
1398     if (!context)
1399         return TextPosition0::minimumPosition();
1400     // FIXME: The context probably contains 1-based numbers, but we treat them as 0-based,
1401     //        to be consistent with fixme's in lineNumber() and columnNumber
1402     //        methods.
1403     return TextPosition0(WTF::ZeroBasedNumber::fromZeroBasedInt(context->input->line),
1404         WTF::ZeroBasedNumber::fromZeroBasedInt(context->input->col));
1405 }
1406 
1407 // This method has a correct implementation, in contrast to textPosition() method.
1408 // It should replace textPosition().
textPositionOneBased() const1409 TextPosition1 XMLDocumentParser::textPositionOneBased() const
1410 {
1411     xmlParserCtxtPtr context = this->context();
1412     if (!context)
1413         return TextPosition1::minimumPosition();
1414     return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(context->input->line),
1415         WTF::OneBasedNumber::fromOneBasedInt(context->input->col));
1416 }
1417 
stopParsing()1418 void XMLDocumentParser::stopParsing()
1419 {
1420     DocumentParser::stopParsing();
1421     if (context())
1422         xmlStopParser(context());
1423 }
1424 
resumeParsing()1425 void XMLDocumentParser::resumeParsing()
1426 {
1427     ASSERT(!isDetached());
1428     ASSERT(m_parserPaused);
1429 
1430     m_parserPaused = false;
1431 
1432     // First, execute any pending callbacks
1433     while (!m_pendingCallbacks->isEmpty()) {
1434         m_pendingCallbacks->callAndRemoveFirstCallback(this);
1435 
1436         // A callback paused the parser
1437         if (m_parserPaused)
1438             return;
1439     }
1440 
1441     // Then, write any pending data
1442     SegmentedString rest = m_pendingSrc;
1443     m_pendingSrc.clear();
1444     append(rest);
1445 
1446     // Finally, if finish() has been called and write() didn't result
1447     // in any further callbacks being queued, call end()
1448     if (m_finishCalled && m_pendingCallbacks->isEmpty())
1449         end();
1450 }
1451 
appendFragmentSource(const String & chunk)1452 bool XMLDocumentParser::appendFragmentSource(const String& chunk)
1453 {
1454     ASSERT(!m_context);
1455     ASSERT(m_parsingFragment);
1456 
1457     CString chunkAsUtf8 = chunk.utf8();
1458     initializeParserContext(chunkAsUtf8.data());
1459     xmlParseContent(context());
1460     endDocument(); // Close any open text nodes.
1461 
1462     // FIXME: If this code is actually needed, it should probably move to finish()
1463     // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd().
1464     // Check if all the chunk has been processed.
1465     long bytesProcessed = xmlByteConsumed(context());
1466     if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) {
1467         // FIXME: I don't believe we can hit this case without also having seen an error.
1468         // If we hit this ASSERT, we've found a test case which demonstrates the need for this code.
1469         ASSERT(m_sawError);
1470         return false;
1471     }
1472 
1473     // No error if the chunk is well formed or it is not but we have no error.
1474     return context()->wellFormed || !xmlCtxtGetLastError(context());
1475 }
1476 
1477 // --------------------------------
1478 
1479 struct AttributeParseState {
1480     HashMap<String, String> attributes;
1481     bool gotAttributes;
1482 };
1483 
attributesStartElementNsHandler(void * closure,const xmlChar * xmlLocalName,const xmlChar *,const xmlChar *,int,const xmlChar **,int nb_attributes,int,const xmlChar ** libxmlAttributes)1484 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
1485                                             const xmlChar* /*xmlURI*/, int /*nb_namespaces*/, const xmlChar** /*namespaces*/,
1486                                             int nb_attributes, int /*nb_defaulted*/, const xmlChar** libxmlAttributes)
1487 {
1488     if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
1489         return;
1490 
1491     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1492     AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
1493 
1494     state->gotAttributes = true;
1495 
1496     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
1497     for (int i = 0; i < nb_attributes; i++) {
1498         String attrLocalName = toString(attributes[i].localname);
1499         int valueLength = (int) (attributes[i].end - attributes[i].value);
1500         String attrValue = toString(attributes[i].value, valueLength);
1501         String attrPrefix = toString(attributes[i].prefix);
1502         String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
1503 
1504         state->attributes.set(attrQName, attrValue);
1505     }
1506 }
1507 
parseAttributes(const String & string,bool & attrsOK)1508 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1509 {
1510     AttributeParseState state;
1511     state.gotAttributes = false;
1512 
1513     xmlSAXHandler sax;
1514     memset(&sax, 0, sizeof(sax));
1515     sax.startElementNs = attributesStartElementNsHandler;
1516     sax.initialized = XML_SAX2_MAGIC;
1517     RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
1518     String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
1519     xmlParseChunk(parser->context(), reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1);
1520     attrsOK = state.gotAttributes;
1521     return state.attributes;
1522 }
1523 
1524 }
1525