1 /*
2 * Copyright (C) 2000 Peter Kelly <pmk@post.com>
3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org>
5 * Copyright (C) 2007 Samuel Weinig <sam@webkit.org>
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
10 *
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Library General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Library General Public License for more details.
20 *
21 * You should have received a copy of the GNU Library General Public License
22 * along with this library; see the file COPYING.LIB. If not, write to
23 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24 * Boston, MA 02110-1301, USA.
25 */
26
27 #include "config.h"
28 #include "XMLDocumentParser.h"
29
30 #include "CDATASection.h"
31 #include "CachedScript.h"
32 #include "Comment.h"
33 #include "CachedResourceLoader.h"
34 #include "Document.h"
35 #include "DocumentFragment.h"
36 #include "DocumentType.h"
37 #include "Frame.h"
38 #include "FrameLoader.h"
39 #include "FrameView.h"
40 #include "HTMLEntityParser.h"
41 #include "HTMLHtmlElement.h"
42 #include "HTMLLinkElement.h"
43 #include "HTMLNames.h"
44 #include "HTMLStyleElement.h"
45 #include "ProcessingInstruction.h"
46 #include "ResourceError.h"
47 #include "ResourceHandle.h"
48 #include "ResourceRequest.h"
49 #include "ResourceResponse.h"
50 #include "ScriptElement.h"
51 #include "ScriptSourceCode.h"
52 #include "ScriptValue.h"
53 #include "TextResourceDecoder.h"
54 #include "TransformSource.h"
55 #include "XMLNSNames.h"
56 #include "XMLDocumentParserScope.h"
57 #include <libxml/parser.h>
58 #include <libxml/parserInternals.h>
59 #include <wtf/text/CString.h>
60 #include <wtf/StringExtras.h>
61 #include <wtf/Threading.h>
62 #include <wtf/UnusedParam.h>
63 #include <wtf/Vector.h>
64
65 #if ENABLE(XSLT)
66 #include "XMLTreeViewer.h"
67 #include <libxslt/xslt.h>
68 #endif
69
70 #if ENABLE(XHTMLMP)
71 #include "HTMLScriptElement.h"
72 #endif
73
74
75 using namespace std;
76
77 namespace WebCore {
78
79 class PendingCallbacks {
80 WTF_MAKE_NONCOPYABLE(PendingCallbacks);
81 public:
PendingCallbacks()82 PendingCallbacks() { }
~PendingCallbacks()83 ~PendingCallbacks()
84 {
85 deleteAllValues(m_callbacks);
86 }
87
appendStartElementNSCallback(const xmlChar * xmlLocalName,const xmlChar * xmlPrefix,const xmlChar * xmlURI,int nb_namespaces,const xmlChar ** namespaces,int nb_attributes,int nb_defaulted,const xmlChar ** attributes)88 void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
89 const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** attributes)
90 {
91 PendingStartElementNSCallback* callback = new PendingStartElementNSCallback;
92
93 callback->xmlLocalName = xmlStrdup(xmlLocalName);
94 callback->xmlPrefix = xmlStrdup(xmlPrefix);
95 callback->xmlURI = xmlStrdup(xmlURI);
96 callback->nb_namespaces = nb_namespaces;
97 callback->namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_namespaces * 2));
98 for (int i = 0; i < nb_namespaces * 2 ; i++)
99 callback->namespaces[i] = xmlStrdup(namespaces[i]);
100 callback->nb_attributes = nb_attributes;
101 callback->nb_defaulted = nb_defaulted;
102 callback->attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_attributes * 5));
103 for (int i = 0; i < nb_attributes; i++) {
104 // Each attribute has 5 elements in the array:
105 // name, prefix, uri, value and an end pointer.
106
107 for (int j = 0; j < 3; j++)
108 callback->attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
109
110 int len = attributes[i * 5 + 4] - attributes[i * 5 + 3];
111
112 callback->attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], len);
113 callback->attributes[i * 5 + 4] = callback->attributes[i * 5 + 3] + len;
114 }
115
116 m_callbacks.append(callback);
117 }
118
appendEndElementNSCallback()119 void appendEndElementNSCallback()
120 {
121 PendingEndElementNSCallback* callback = new PendingEndElementNSCallback;
122
123 m_callbacks.append(callback);
124 }
125
appendCharactersCallback(const xmlChar * s,int len)126 void appendCharactersCallback(const xmlChar* s, int len)
127 {
128 PendingCharactersCallback* callback = new PendingCharactersCallback;
129
130 callback->s = xmlStrndup(s, len);
131 callback->len = len;
132
133 m_callbacks.append(callback);
134 }
135
appendProcessingInstructionCallback(const xmlChar * target,const xmlChar * data)136 void appendProcessingInstructionCallback(const xmlChar* target, const xmlChar* data)
137 {
138 PendingProcessingInstructionCallback* callback = new PendingProcessingInstructionCallback;
139
140 callback->target = xmlStrdup(target);
141 callback->data = xmlStrdup(data);
142
143 m_callbacks.append(callback);
144 }
145
appendCDATABlockCallback(const xmlChar * s,int len)146 void appendCDATABlockCallback(const xmlChar* s, int len)
147 {
148 PendingCDATABlockCallback* callback = new PendingCDATABlockCallback;
149
150 callback->s = xmlStrndup(s, len);
151 callback->len = len;
152
153 m_callbacks.append(callback);
154 }
155
appendCommentCallback(const xmlChar * s)156 void appendCommentCallback(const xmlChar* s)
157 {
158 PendingCommentCallback* callback = new PendingCommentCallback;
159
160 callback->s = xmlStrdup(s);
161
162 m_callbacks.append(callback);
163 }
164
appendInternalSubsetCallback(const xmlChar * name,const xmlChar * externalID,const xmlChar * systemID)165 void appendInternalSubsetCallback(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
166 {
167 PendingInternalSubsetCallback* callback = new PendingInternalSubsetCallback;
168
169 callback->name = xmlStrdup(name);
170 callback->externalID = xmlStrdup(externalID);
171 callback->systemID = xmlStrdup(systemID);
172
173 m_callbacks.append(callback);
174 }
175
appendErrorCallback(XMLDocumentParser::ErrorType type,const xmlChar * message,int lineNumber,int columnNumber)176 void appendErrorCallback(XMLDocumentParser::ErrorType type, const xmlChar* message, int lineNumber, int columnNumber)
177 {
178 PendingErrorCallback* callback = new PendingErrorCallback;
179
180 callback->message = xmlStrdup(message);
181 callback->type = type;
182 callback->lineNumber = lineNumber;
183 callback->columnNumber = columnNumber;
184
185 m_callbacks.append(callback);
186 }
187
callAndRemoveFirstCallback(XMLDocumentParser * parser)188 void callAndRemoveFirstCallback(XMLDocumentParser* parser)
189 {
190 OwnPtr<PendingCallback> callback(m_callbacks.takeFirst());
191 callback->call(parser);
192 }
193
isEmpty() const194 bool isEmpty() const { return m_callbacks.isEmpty(); }
195
196 private:
197 struct PendingCallback {
~PendingCallbackWebCore::PendingCallbacks::PendingCallback198 virtual ~PendingCallback() { }
199 virtual void call(XMLDocumentParser* parser) = 0;
200 };
201
202 struct PendingStartElementNSCallback : public PendingCallback {
~PendingStartElementNSCallbackWebCore::PendingCallbacks::PendingStartElementNSCallback203 virtual ~PendingStartElementNSCallback()
204 {
205 xmlFree(xmlLocalName);
206 xmlFree(xmlPrefix);
207 xmlFree(xmlURI);
208 for (int i = 0; i < nb_namespaces * 2; i++)
209 xmlFree(namespaces[i]);
210 xmlFree(namespaces);
211 for (int i = 0; i < nb_attributes; i++)
212 for (int j = 0; j < 4; j++)
213 xmlFree(attributes[i * 5 + j]);
214 xmlFree(attributes);
215 }
216
callWebCore::PendingCallbacks::PendingStartElementNSCallback217 virtual void call(XMLDocumentParser* parser)
218 {
219 parser->startElementNs(xmlLocalName, xmlPrefix, xmlURI,
220 nb_namespaces, const_cast<const xmlChar**>(namespaces),
221 nb_attributes, nb_defaulted, const_cast<const xmlChar**>(attributes));
222 }
223
224 xmlChar* xmlLocalName;
225 xmlChar* xmlPrefix;
226 xmlChar* xmlURI;
227 int nb_namespaces;
228 xmlChar** namespaces;
229 int nb_attributes;
230 int nb_defaulted;
231 xmlChar** attributes;
232 };
233
234 struct PendingEndElementNSCallback : public PendingCallback {
callWebCore::PendingCallbacks::PendingEndElementNSCallback235 virtual void call(XMLDocumentParser* parser)
236 {
237 parser->endElementNs();
238 }
239 };
240
241 struct PendingCharactersCallback : public PendingCallback {
~PendingCharactersCallbackWebCore::PendingCallbacks::PendingCharactersCallback242 virtual ~PendingCharactersCallback()
243 {
244 xmlFree(s);
245 }
246
callWebCore::PendingCallbacks::PendingCharactersCallback247 virtual void call(XMLDocumentParser* parser)
248 {
249 parser->characters(s, len);
250 }
251
252 xmlChar* s;
253 int len;
254 };
255
256 struct PendingProcessingInstructionCallback : public PendingCallback {
~PendingProcessingInstructionCallbackWebCore::PendingCallbacks::PendingProcessingInstructionCallback257 virtual ~PendingProcessingInstructionCallback()
258 {
259 xmlFree(target);
260 xmlFree(data);
261 }
262
callWebCore::PendingCallbacks::PendingProcessingInstructionCallback263 virtual void call(XMLDocumentParser* parser)
264 {
265 parser->processingInstruction(target, data);
266 }
267
268 xmlChar* target;
269 xmlChar* data;
270 };
271
272 struct PendingCDATABlockCallback : public PendingCallback {
~PendingCDATABlockCallbackWebCore::PendingCallbacks::PendingCDATABlockCallback273 virtual ~PendingCDATABlockCallback()
274 {
275 xmlFree(s);
276 }
277
callWebCore::PendingCallbacks::PendingCDATABlockCallback278 virtual void call(XMLDocumentParser* parser)
279 {
280 parser->cdataBlock(s, len);
281 }
282
283 xmlChar* s;
284 int len;
285 };
286
287 struct PendingCommentCallback : public PendingCallback {
~PendingCommentCallbackWebCore::PendingCallbacks::PendingCommentCallback288 virtual ~PendingCommentCallback()
289 {
290 xmlFree(s);
291 }
292
callWebCore::PendingCallbacks::PendingCommentCallback293 virtual void call(XMLDocumentParser* parser)
294 {
295 parser->comment(s);
296 }
297
298 xmlChar* s;
299 };
300
301 struct PendingInternalSubsetCallback : public PendingCallback {
~PendingInternalSubsetCallbackWebCore::PendingCallbacks::PendingInternalSubsetCallback302 virtual ~PendingInternalSubsetCallback()
303 {
304 xmlFree(name);
305 xmlFree(externalID);
306 xmlFree(systemID);
307 }
308
callWebCore::PendingCallbacks::PendingInternalSubsetCallback309 virtual void call(XMLDocumentParser* parser)
310 {
311 parser->internalSubset(name, externalID, systemID);
312 }
313
314 xmlChar* name;
315 xmlChar* externalID;
316 xmlChar* systemID;
317 };
318
319 struct PendingErrorCallback: public PendingCallback {
~PendingErrorCallbackWebCore::PendingCallbacks::PendingErrorCallback320 virtual ~PendingErrorCallback()
321 {
322 xmlFree(message);
323 }
324
callWebCore::PendingCallbacks::PendingErrorCallback325 virtual void call(XMLDocumentParser* parser)
326 {
327 parser->handleError(type, reinterpret_cast<char*>(message), lineNumber, columnNumber);
328 }
329
330 XMLDocumentParser::ErrorType type;
331 xmlChar* message;
332 int lineNumber;
333 int columnNumber;
334 };
335
336 Deque<PendingCallback*> m_callbacks;
337 };
338 // --------------------------------
339
340 static int globalDescriptor = 0;
341 static ThreadIdentifier libxmlLoaderThread = 0;
342
matchFunc(const char *)343 static int matchFunc(const char*)
344 {
345 // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid
346 // interfering with client applications that also use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353
347 return XMLDocumentParserScope::currentCachedResourceLoader && currentThread() == libxmlLoaderThread;
348 }
349
350 class OffsetBuffer {
351 public:
OffsetBuffer(const Vector<char> & b)352 OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { }
353
readOutBytes(char * outputBuffer,unsigned askedToRead)354 int readOutBytes(char* outputBuffer, unsigned askedToRead)
355 {
356 unsigned bytesLeft = m_buffer.size() - m_currentOffset;
357 unsigned lenToCopy = min(askedToRead, bytesLeft);
358 if (lenToCopy) {
359 memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
360 m_currentOffset += lenToCopy;
361 }
362 return lenToCopy;
363 }
364
365 private:
366 Vector<char> m_buffer;
367 unsigned m_currentOffset;
368 };
369
switchToUTF16(xmlParserCtxtPtr ctxt)370 static void switchToUTF16(xmlParserCtxtPtr ctxt)
371 {
372 // Hack around libxml2's lack of encoding overide support by manually
373 // resetting the encoding to UTF-16 before every chunk. Otherwise libxml
374 // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
375 // and switch encodings, causing the parse to fail.
376 const UChar BOM = 0xFEFF;
377 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
378 xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
379 }
380
shouldAllowExternalLoad(const KURL & url)381 static bool shouldAllowExternalLoad(const KURL& url)
382 {
383 String urlString = url.string();
384
385 // On non-Windows platforms libxml asks for this URL, the
386 // "XML_XML_DEFAULT_CATALOG", on initialization.
387 if (urlString == "file:///etc/xml/catalog")
388 return false;
389
390 // On Windows, libxml computes a URL relative to where its DLL resides.
391 if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
392 return false;
393
394 // The most common DTD. There isn't much point in hammering www.w3c.org
395 // by requesting this URL for every XHTML document.
396 if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
397 return false;
398
399 // Similarly, there isn't much point in requesting the SVG DTD.
400 if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
401 return false;
402
403 // The libxml doesn't give us a lot of context for deciding whether to
404 // allow this request. In the worst case, this load could be for an
405 // external entity and the resulting document could simply read the
406 // retrieved content. If we had more context, we could potentially allow
407 // the parser to load a DTD. As things stand, we take the conservative
408 // route and allow same-origin requests only.
409 if (!XMLDocumentParserScope::currentCachedResourceLoader->document()->securityOrigin()->canRequest(url)) {
410 XMLDocumentParserScope::currentCachedResourceLoader->printAccessDeniedMessage(url);
411 return false;
412 }
413
414 return true;
415 }
416
openFunc(const char * uri)417 static void* openFunc(const char* uri)
418 {
419 ASSERT(XMLDocumentParserScope::currentCachedResourceLoader);
420 ASSERT(currentThread() == libxmlLoaderThread);
421
422 KURL url(KURL(), uri);
423
424 if (!shouldAllowExternalLoad(url))
425 return &globalDescriptor;
426
427 ResourceError error;
428 ResourceResponse response;
429 Vector<char> data;
430
431
432 {
433 CachedResourceLoader* cachedResourceLoader = XMLDocumentParserScope::currentCachedResourceLoader;
434 XMLDocumentParserScope scope(0);
435 // FIXME: We should restore the original global error handler as well.
436
437 if (cachedResourceLoader->frame())
438 cachedResourceLoader->frame()->loader()->loadResourceSynchronously(url, AllowStoredCredentials, error, response, data);
439 }
440
441 // We have to check the URL again after the load to catch redirects.
442 // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
443 if (!shouldAllowExternalLoad(response.url()))
444 return &globalDescriptor;
445
446 return new OffsetBuffer(data);
447 }
448
readFunc(void * context,char * buffer,int len)449 static int readFunc(void* context, char* buffer, int len)
450 {
451 // Do 0-byte reads in case of a null descriptor
452 if (context == &globalDescriptor)
453 return 0;
454
455 OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
456 return data->readOutBytes(buffer, len);
457 }
458
writeFunc(void *,const char *,int)459 static int writeFunc(void*, const char*, int)
460 {
461 // Always just do 0-byte writes
462 return 0;
463 }
464
closeFunc(void * context)465 static int closeFunc(void* context)
466 {
467 if (context != &globalDescriptor) {
468 OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
469 delete data;
470 }
471 return 0;
472 }
473
474 #if ENABLE(XSLT)
errorFunc(void *,const char *,...)475 static void errorFunc(void*, const char*, ...)
476 {
477 // FIXME: It would be nice to display error messages somewhere.
478 }
479 #endif
480
481 static bool didInit = false;
482
createStringParser(xmlSAXHandlerPtr handlers,void * userData)483 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
484 {
485 if (!didInit) {
486 xmlInitParser();
487 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
488 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
489 libxmlLoaderThread = currentThread();
490 didInit = true;
491 }
492
493 xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
494 parser->_private = userData;
495 parser->replaceEntities = true;
496 switchToUTF16(parser);
497
498 return adoptRef(new XMLParserContext(parser));
499 }
500
501
502 // Chunk should be encoded in UTF-8
createMemoryParser(xmlSAXHandlerPtr handlers,void * userData,const char * chunk)503 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const char* chunk)
504 {
505 if (!didInit) {
506 xmlInitParser();
507 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
508 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
509 libxmlLoaderThread = currentThread();
510 didInit = true;
511 }
512
513 xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk, xmlStrlen((const xmlChar*)chunk));
514
515 if (!parser)
516 return 0;
517
518 // Copy the sax handler
519 memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
520
521 // Set parser options.
522 // XML_PARSE_NODICT: default dictionary option.
523 // XML_PARSE_NOENT: force entities substitutions.
524 xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
525
526 // Internal initialization
527 parser->sax2 = 1;
528 parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
529 parser->depth = 0;
530 parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
531 parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
532 parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
533 parser->_private = userData;
534
535 return adoptRef(new XMLParserContext(parser));
536 }
537
538 // --------------------------------
539
supportsXMLVersion(const String & version)540 bool XMLDocumentParser::supportsXMLVersion(const String& version)
541 {
542 return version == "1.0";
543 }
544
XMLDocumentParser(Document * document,FrameView * frameView)545 XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView)
546 : ScriptableDocumentParser(document)
547 , m_view(frameView)
548 , m_context(0)
549 , m_pendingCallbacks(new PendingCallbacks)
550 , m_currentNode(document)
551 , m_sawError(false)
552 , m_sawCSS(false)
553 , m_sawXSLTransform(false)
554 , m_sawFirstElement(false)
555 , m_isXHTMLDocument(false)
556 #if ENABLE(XHTMLMP)
557 , m_isXHTMLMPDocument(false)
558 , m_hasDocTypeDeclaration(false)
559 #endif
560 , m_parserPaused(false)
561 , m_requestingScript(false)
562 , m_finishCalled(false)
563 , m_errorCount(0)
564 , m_lastErrorPosition(TextPosition1::belowRangePosition())
565 , m_pendingScript(0)
566 , m_scriptStartPosition(TextPosition1::belowRangePosition())
567 , m_parsingFragment(false)
568 , m_scriptingPermission(FragmentScriptingAllowed)
569 {
570 }
571
XMLDocumentParser(DocumentFragment * fragment,Element * parentElement,FragmentScriptingPermission scriptingPermission)572 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, FragmentScriptingPermission scriptingPermission)
573 : ScriptableDocumentParser(fragment->document())
574 , m_view(0)
575 , m_context(0)
576 , m_pendingCallbacks(new PendingCallbacks)
577 , m_currentNode(fragment)
578 , m_sawError(false)
579 , m_sawCSS(false)
580 , m_sawXSLTransform(false)
581 , m_sawFirstElement(false)
582 , m_isXHTMLDocument(false)
583 #if ENABLE(XHTMLMP)
584 , m_isXHTMLMPDocument(false)
585 , m_hasDocTypeDeclaration(false)
586 #endif
587 , m_parserPaused(false)
588 , m_requestingScript(false)
589 , m_finishCalled(false)
590 , m_errorCount(0)
591 , m_lastErrorPosition(TextPosition1::belowRangePosition())
592 , m_pendingScript(0)
593 , m_scriptStartPosition(TextPosition1::belowRangePosition())
594 , m_parsingFragment(true)
595 , m_scriptingPermission(scriptingPermission)
596 {
597 fragment->ref();
598
599 // Add namespaces based on the parent node
600 Vector<Element*> elemStack;
601 while (parentElement) {
602 elemStack.append(parentElement);
603
604 ContainerNode* n = parentElement->parentNode();
605 if (!n || !n->isElementNode())
606 break;
607 parentElement = static_cast<Element*>(n);
608 }
609
610 if (elemStack.isEmpty())
611 return;
612
613 for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) {
614 if (NamedNodeMap* attrs = element->attributes()) {
615 for (unsigned i = 0; i < attrs->length(); i++) {
616 Attribute* attr = attrs->attributeItem(i);
617 if (attr->localName() == xmlnsAtom)
618 m_defaultNamespaceURI = attr->value();
619 else if (attr->prefix() == xmlnsAtom)
620 m_prefixToNamespaceMap.set(attr->localName(), attr->value());
621 }
622 }
623 }
624
625 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
626 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
627 m_defaultNamespaceURI = parentElement->namespaceURI();
628 }
629
~XMLParserContext()630 XMLParserContext::~XMLParserContext()
631 {
632 if (m_context->myDoc)
633 xmlFreeDoc(m_context->myDoc);
634 xmlFreeParserCtxt(m_context);
635 }
636
~XMLDocumentParser()637 XMLDocumentParser::~XMLDocumentParser()
638 {
639 // The XMLDocumentParser will always be detached before being destroyed.
640 ASSERT(m_currentNodeStack.isEmpty());
641 ASSERT(!m_currentNode);
642
643 // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
644 if (m_pendingScript)
645 m_pendingScript->removeClient(this);
646 }
647
doWrite(const String & parseString)648 void XMLDocumentParser::doWrite(const String& parseString)
649 {
650 ASSERT(!isDetached());
651 if (!m_context)
652 initializeParserContext();
653
654 // Protect the libxml context from deletion during a callback
655 RefPtr<XMLParserContext> context = m_context;
656
657 // libXML throws an error if you try to switch the encoding for an empty string.
658 if (parseString.length()) {
659 // JavaScript may cause the parser to detach during xmlParseChunk
660 // keep this alive until this function is done.
661 RefPtr<XMLDocumentParser> protect(this);
662
663 switchToUTF16(context->context());
664 XMLDocumentParserScope scope(document()->cachedResourceLoader());
665 xmlParseChunk(context->context(), reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0);
666
667 // JavaScript (which may be run under the xmlParseChunk callstack) may
668 // cause the parser to be stopped or detached.
669 if (isStopped())
670 return;
671 }
672
673 // FIXME: Why is this here? And why is it after we process the passed source?
674 if (document()->decoder() && document()->decoder()->sawError()) {
675 // If the decoder saw an error, report it as fatal (stops parsing)
676 handleError(fatal, "Encoding error", context->context()->input->line, context->context()->input->col);
677 }
678 }
679
toString(const xmlChar * string,size_t size)680 static inline String toString(const xmlChar* string, size_t size)
681 {
682 return String::fromUTF8(reinterpret_cast<const char*>(string), size);
683 }
684
toString(const xmlChar * string)685 static inline String toString(const xmlChar* string)
686 {
687 return String::fromUTF8(reinterpret_cast<const char*>(string));
688 }
689
toAtomicString(const xmlChar * string,size_t size)690 static inline AtomicString toAtomicString(const xmlChar* string, size_t size)
691 {
692 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), size);
693 }
694
toAtomicString(const xmlChar * string)695 static inline AtomicString toAtomicString(const xmlChar* string)
696 {
697 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
698 }
699
700 struct _xmlSAX2Namespace {
701 const xmlChar* prefix;
702 const xmlChar* uri;
703 };
704 typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
705
handleElementNamespaces(Element * newElement,const xmlChar ** libxmlNamespaces,int nb_namespaces,ExceptionCode & ec,FragmentScriptingPermission scriptingPermission)706 static inline void handleElementNamespaces(Element* newElement, const xmlChar** libxmlNamespaces, int nb_namespaces, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission)
707 {
708 xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
709 for (int i = 0; i < nb_namespaces; i++) {
710 AtomicString namespaceQName = xmlnsAtom;
711 AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
712 if (namespaces[i].prefix)
713 namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
714 newElement->setAttributeNS(XMLNSNames::xmlnsNamespaceURI, namespaceQName, namespaceURI, ec, scriptingPermission);
715 if (ec) // exception setting attributes
716 return;
717 }
718 }
719
720 struct _xmlSAX2Attributes {
721 const xmlChar* localname;
722 const xmlChar* prefix;
723 const xmlChar* uri;
724 const xmlChar* value;
725 const xmlChar* end;
726 };
727 typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
728
handleElementAttributes(Element * newElement,const xmlChar ** libxmlAttributes,int nb_attributes,ExceptionCode & ec,FragmentScriptingPermission scriptingPermission)729 static inline void handleElementAttributes(Element* newElement, const xmlChar** libxmlAttributes, int nb_attributes, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission)
730 {
731 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
732 for (int i = 0; i < nb_attributes; i++) {
733 int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
734 AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
735 String attrPrefix = toString(attributes[i].prefix);
736 AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
737 AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : AtomicString(attrPrefix + ":" + toString(attributes[i].localname));
738
739 newElement->setAttributeNS(attrURI, attrQName, attrValue, ec, scriptingPermission);
740 if (ec) // exception setting attributes
741 return;
742 }
743 }
744
startElementNs(const xmlChar * xmlLocalName,const xmlChar * xmlPrefix,const xmlChar * xmlURI,int nb_namespaces,const xmlChar ** libxmlNamespaces,int nb_attributes,int nb_defaulted,const xmlChar ** libxmlAttributes)745 void XMLDocumentParser::startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
746 const xmlChar** libxmlNamespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
747 {
748 if (isStopped())
749 return;
750
751 if (m_parserPaused) {
752 m_pendingCallbacks->appendStartElementNSCallback(xmlLocalName, xmlPrefix, xmlURI, nb_namespaces, libxmlNamespaces,
753 nb_attributes, nb_defaulted, libxmlAttributes);
754 return;
755 }
756
757 #if ENABLE(XHTMLMP)
758 // check if the DOCTYPE Declaration of XHTMLMP document exists
759 if (!m_hasDocTypeDeclaration && document()->isXHTMLMPDocument()) {
760 handleError(fatal, "DOCTYPE declaration lost.", lineNumber(), columnNumber());
761 return;
762 }
763 #endif
764
765 exitText();
766
767 AtomicString localName = toAtomicString(xmlLocalName);
768 AtomicString uri = toAtomicString(xmlURI);
769 AtomicString prefix = toAtomicString(xmlPrefix);
770
771 if (m_parsingFragment && uri.isNull()) {
772 if (!prefix.isNull())
773 uri = m_prefixToNamespaceMap.get(prefix);
774 else
775 uri = m_defaultNamespaceURI;
776 }
777
778 #if ENABLE(XHTMLMP)
779 if (!m_sawFirstElement && isXHTMLMPDocument()) {
780 // As per the section 7.1 of OMA-WAP-XHTMLMP-V1_1-20061020-A.pdf,
781 // we should make sure that the root element MUST be 'html' and
782 // ensure the name of the default namespace on the root elment 'html'
783 // MUST be 'http://www.w3.org/1999/xhtml'
784 if (localName != HTMLNames::htmlTag.localName()) {
785 handleError(fatal, "XHTMLMP document expects 'html' as root element.", lineNumber(), columnNumber());
786 return;
787 }
788
789 if (uri.isNull()) {
790 m_defaultNamespaceURI = HTMLNames::xhtmlNamespaceURI;
791 uri = m_defaultNamespaceURI;
792 }
793 }
794 #endif
795
796 bool isFirstElement = !m_sawFirstElement;
797 m_sawFirstElement = true;
798
799 QualifiedName qName(prefix, localName, uri);
800 RefPtr<Element> newElement = document()->createElement(qName, true);
801 if (!newElement) {
802 stopParsing();
803 return;
804 }
805
806 ExceptionCode ec = 0;
807 handleElementNamespaces(newElement.get(), libxmlNamespaces, nb_namespaces, ec, m_scriptingPermission);
808 if (ec) {
809 stopParsing();
810 return;
811 }
812
813 handleElementAttributes(newElement.get(), libxmlAttributes, nb_attributes, ec, m_scriptingPermission);
814 if (ec) {
815 stopParsing();
816 return;
817 }
818
819 newElement->beginParsingChildren();
820
821 ScriptElement* scriptElement = toScriptElement(newElement.get());
822 if (scriptElement)
823 m_scriptStartPosition = textPositionOneBased();
824
825 m_currentNode->deprecatedParserAddChild(newElement.get());
826
827 pushCurrentNode(newElement.get());
828 if (m_view && !newElement->attached())
829 newElement->attach();
830
831 #if ENABLE(OFFLINE_WEB_APPLICATIONS)
832 if (newElement->hasTagName(HTMLNames::htmlTag))
833 static_cast<HTMLHtmlElement*>(newElement.get())->insertedByParser();
834 #endif
835
836 if (!m_parsingFragment && isFirstElement && document()->frame())
837 document()->frame()->loader()->dispatchDocumentElementAvailable();
838 }
839
endElementNs()840 void XMLDocumentParser::endElementNs()
841 {
842 if (isStopped())
843 return;
844
845 if (m_parserPaused) {
846 m_pendingCallbacks->appendEndElementNSCallback();
847 return;
848 }
849
850 // JavaScript can detach the parser. Make sure this is not released
851 // before the end of this method.
852 RefPtr<XMLDocumentParser> protect(this);
853
854 exitText();
855
856 RefPtr<Node> n = m_currentNode;
857 n->finishParsingChildren();
858
859 if (m_scriptingPermission == FragmentScriptingNotAllowed && n->isElementNode() && toScriptElement(static_cast<Element*>(n.get()))) {
860 popCurrentNode();
861 ExceptionCode ec;
862 n->remove(ec);
863 return;
864 }
865
866 if (!n->isElementNode() || !m_view) {
867 popCurrentNode();
868 return;
869 }
870
871 Element* element = static_cast<Element*>(n.get());
872
873 // The element's parent may have already been removed from document.
874 // Parsing continues in this case, but scripts aren't executed.
875 if (!element->inDocument()) {
876 popCurrentNode();
877 return;
878 }
879
880 ScriptElement* scriptElement = toScriptElement(element);
881 if (!scriptElement) {
882 popCurrentNode();
883 return;
884 }
885
886 // Don't load external scripts for standalone documents (for now).
887 ASSERT(!m_pendingScript);
888 m_requestingScript = true;
889
890 bool successfullyPrepared = scriptElement->prepareScript(m_scriptStartPosition, ScriptElement::AllowLegacyTypeInTypeAttribute);
891 if (!successfullyPrepared) {
892 #if ENABLE(XHTMLMP)
893 if (!scriptElement->isScriptTypeSupported(ScriptElement::AllowLegacyTypeInTypeAttribute))
894 document()->setShouldProcessNoscriptElement(true);
895 #endif
896 } else {
897 // FIXME: Script execution should be shared between
898 // the libxml2 and Qt XMLDocumentParser implementations.
899
900 if (scriptElement->readyToBeParserExecuted())
901 scriptElement->executeScript(ScriptSourceCode(scriptElement->scriptContent(), document()->url(), m_scriptStartPosition));
902 else if (scriptElement->willBeParserExecuted()) {
903 m_pendingScript = scriptElement->cachedScript();
904 m_scriptElement = element;
905 m_pendingScript->addClient(this);
906
907 // m_pendingScript will be 0 if script was already loaded and addClient() executed it.
908 if (m_pendingScript)
909 pauseParsing();
910 } else
911 m_scriptElement = 0;
912
913 // JavaScript may have detached the parser
914 if (isDetached())
915 return;
916 }
917 m_requestingScript = false;
918 popCurrentNode();
919 }
920
characters(const xmlChar * s,int len)921 void XMLDocumentParser::characters(const xmlChar* s, int len)
922 {
923 if (isStopped())
924 return;
925
926 if (m_parserPaused) {
927 m_pendingCallbacks->appendCharactersCallback(s, len);
928 return;
929 }
930
931 if (!m_currentNode->isTextNode())
932 enterText();
933 m_bufferedText.append(s, len);
934 }
935
error(ErrorType type,const char * message,va_list args)936 void XMLDocumentParser::error(ErrorType type, const char* message, va_list args)
937 {
938 if (isStopped())
939 return;
940
941 #if COMPILER(MSVC) || COMPILER(RVCT)
942 char m[1024];
943 vsnprintf(m, sizeof(m) - 1, message, args);
944 #else
945 char* m;
946 if (vasprintf(&m, message, args) == -1)
947 return;
948 #endif
949
950 if (m_parserPaused)
951 m_pendingCallbacks->appendErrorCallback(type, reinterpret_cast<const xmlChar*>(m), lineNumber(), columnNumber());
952 else
953 handleError(type, m, lineNumber(), columnNumber());
954
955 #if !COMPILER(MSVC) && !COMPILER(RVCT)
956 free(m);
957 #endif
958 }
959
processingInstruction(const xmlChar * target,const xmlChar * data)960 void XMLDocumentParser::processingInstruction(const xmlChar* target, const xmlChar* data)
961 {
962 if (isStopped())
963 return;
964
965 if (m_parserPaused) {
966 m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
967 return;
968 }
969
970 exitText();
971
972 // ### handle exceptions
973 ExceptionCode ec = 0;
974 RefPtr<ProcessingInstruction> pi = document()->createProcessingInstruction(
975 toString(target), toString(data), ec);
976 if (ec)
977 return;
978
979 pi->setCreatedByParser(true);
980
981 m_currentNode->deprecatedParserAddChild(pi.get());
982 if (m_view && !pi->attached())
983 pi->attach();
984
985 pi->finishParsingChildren();
986
987 if (pi->isCSS())
988 m_sawCSS = true;
989 #if ENABLE(XSLT)
990 m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
991 if (m_sawXSLTransform && !document()->transformSourceDocument())
992 stopParsing();
993 #endif
994 }
995
cdataBlock(const xmlChar * s,int len)996 void XMLDocumentParser::cdataBlock(const xmlChar* s, int len)
997 {
998 if (isStopped())
999 return;
1000
1001 if (m_parserPaused) {
1002 m_pendingCallbacks->appendCDATABlockCallback(s, len);
1003 return;
1004 }
1005
1006 exitText();
1007
1008 RefPtr<Node> newNode = CDATASection::create(document(), toString(s, len));
1009 m_currentNode->deprecatedParserAddChild(newNode.get());
1010 if (m_view && !newNode->attached())
1011 newNode->attach();
1012 }
1013
comment(const xmlChar * s)1014 void XMLDocumentParser::comment(const xmlChar* s)
1015 {
1016 if (isStopped())
1017 return;
1018
1019 if (m_parserPaused) {
1020 m_pendingCallbacks->appendCommentCallback(s);
1021 return;
1022 }
1023
1024 exitText();
1025
1026 RefPtr<Node> newNode = Comment::create(document(), toString(s));
1027 m_currentNode->deprecatedParserAddChild(newNode.get());
1028 if (m_view && !newNode->attached())
1029 newNode->attach();
1030 }
1031
startDocument(const xmlChar * version,const xmlChar * encoding,int standalone)1032 void XMLDocumentParser::startDocument(const xmlChar* version, const xmlChar* encoding, int standalone)
1033 {
1034 ExceptionCode ec = 0;
1035
1036 if (version)
1037 document()->setXMLVersion(toString(version), ec);
1038 document()->setXMLStandalone(standalone == 1, ec); // possible values are 0, 1, and -1
1039 if (encoding)
1040 document()->setXMLEncoding(toString(encoding));
1041 }
1042
endDocument()1043 void XMLDocumentParser::endDocument()
1044 {
1045 exitText();
1046 #if ENABLE(XHTMLMP)
1047 m_hasDocTypeDeclaration = false;
1048 #endif
1049 }
1050
internalSubset(const xmlChar * name,const xmlChar * externalID,const xmlChar * systemID)1051 void XMLDocumentParser::internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1052 {
1053 if (isStopped())
1054 return;
1055
1056 if (m_parserPaused) {
1057 m_pendingCallbacks->appendInternalSubsetCallback(name, externalID, systemID);
1058 return;
1059 }
1060
1061 if (document()) {
1062 #if ENABLE(WML) || ENABLE(XHTMLMP)
1063 String extId = toString(externalID);
1064 #endif
1065 #if ENABLE(WML)
1066 if (isWMLDocument()
1067 && extId != "-//WAPFORUM//DTD WML 1.3//EN"
1068 && extId != "-//WAPFORUM//DTD WML 1.2//EN"
1069 && extId != "-//WAPFORUM//DTD WML 1.1//EN"
1070 && extId != "-//WAPFORUM//DTD WML 1.0//EN")
1071 handleError(fatal, "Invalid DTD Public ID", lineNumber(), columnNumber());
1072 #endif
1073 #if ENABLE(XHTMLMP)
1074 String dtdName = toString(name);
1075 if (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
1076 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN") {
1077 if (dtdName != HTMLNames::htmlTag.localName()) {
1078 handleError(fatal, "Invalid DOCTYPE declaration, expected 'html' as root element.", lineNumber(), columnNumber());
1079 return;
1080 }
1081
1082 if (document()->isXHTMLMPDocument())
1083 setIsXHTMLMPDocument(true);
1084 else
1085 setIsXHTMLDocument(true);
1086
1087 m_hasDocTypeDeclaration = true;
1088 }
1089 #endif
1090
1091 document()->parserAddChild(DocumentType::create(document(), toString(name), toString(externalID), toString(systemID)));
1092 }
1093 }
1094
getParser(void * closure)1095 static inline XMLDocumentParser* getParser(void* closure)
1096 {
1097 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1098 return static_cast<XMLDocumentParser*>(ctxt->_private);
1099 }
1100
1101 // This is a hack around http://bugzilla.gnome.org/show_bug.cgi?id=159219
1102 // Otherwise libxml seems to call all the SAX callbacks twice for any replaced entity.
hackAroundLibXMLEntityBug(void * closure)1103 static inline bool hackAroundLibXMLEntityBug(void* closure)
1104 {
1105 #if LIBXML_VERSION >= 20627
1106 UNUSED_PARAM(closure);
1107
1108 // This bug has been fixed in libxml 2.6.27.
1109 return false;
1110 #else
1111 return static_cast<xmlParserCtxtPtr>(closure)->node;
1112 #endif
1113 }
1114
startElementNsHandler(void * closure,const xmlChar * localname,const xmlChar * prefix,const xmlChar * uri,int nb_namespaces,const xmlChar ** namespaces,int nb_attributes,int nb_defaulted,const xmlChar ** libxmlAttributes)1115 static void startElementNsHandler(void* closure, const xmlChar* localname, const xmlChar* prefix, const xmlChar* uri, int nb_namespaces, const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
1116 {
1117 if (hackAroundLibXMLEntityBug(closure))
1118 return;
1119
1120 getParser(closure)->startElementNs(localname, prefix, uri, nb_namespaces, namespaces, nb_attributes, nb_defaulted, libxmlAttributes);
1121 }
1122
endElementNsHandler(void * closure,const xmlChar *,const xmlChar *,const xmlChar *)1123 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
1124 {
1125 if (hackAroundLibXMLEntityBug(closure))
1126 return;
1127
1128 getParser(closure)->endElementNs();
1129 }
1130
charactersHandler(void * closure,const xmlChar * s,int len)1131 static void charactersHandler(void* closure, const xmlChar* s, int len)
1132 {
1133 if (hackAroundLibXMLEntityBug(closure))
1134 return;
1135
1136 getParser(closure)->characters(s, len);
1137 }
1138
processingInstructionHandler(void * closure,const xmlChar * target,const xmlChar * data)1139 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
1140 {
1141 if (hackAroundLibXMLEntityBug(closure))
1142 return;
1143
1144 getParser(closure)->processingInstruction(target, data);
1145 }
1146
cdataBlockHandler(void * closure,const xmlChar * s,int len)1147 static void cdataBlockHandler(void* closure, const xmlChar* s, int len)
1148 {
1149 if (hackAroundLibXMLEntityBug(closure))
1150 return;
1151
1152 getParser(closure)->cdataBlock(s, len);
1153 }
1154
commentHandler(void * closure,const xmlChar * comment)1155 static void commentHandler(void* closure, const xmlChar* comment)
1156 {
1157 if (hackAroundLibXMLEntityBug(closure))
1158 return;
1159
1160 getParser(closure)->comment(comment);
1161 }
1162
1163 WTF_ATTRIBUTE_PRINTF(2, 3)
warningHandler(void * closure,const char * message,...)1164 static void warningHandler(void* closure, const char* message, ...)
1165 {
1166 va_list args;
1167 va_start(args, message);
1168 getParser(closure)->error(XMLDocumentParser::warning, message, args);
1169 va_end(args);
1170 }
1171
1172 WTF_ATTRIBUTE_PRINTF(2, 3)
fatalErrorHandler(void * closure,const char * message,...)1173 static void fatalErrorHandler(void* closure, const char* message, ...)
1174 {
1175 va_list args;
1176 va_start(args, message);
1177 getParser(closure)->error(XMLDocumentParser::fatal, message, args);
1178 va_end(args);
1179 }
1180
1181 WTF_ATTRIBUTE_PRINTF(2, 3)
normalErrorHandler(void * closure,const char * message,...)1182 static void normalErrorHandler(void* closure, const char* message, ...)
1183 {
1184 va_list args;
1185 va_start(args, message);
1186 getParser(closure)->error(XMLDocumentParser::nonFatal, message, args);
1187 va_end(args);
1188 }
1189
1190 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
1191 // a hack to avoid malloc/free. Using a global variable like this could cause trouble
1192 // if libxml implementation details were to change
1193 static xmlChar sharedXHTMLEntityResult[5] = {0, 0, 0, 0, 0};
1194
sharedXHTMLEntity()1195 static xmlEntityPtr sharedXHTMLEntity()
1196 {
1197 static xmlEntity entity;
1198 if (!entity.type) {
1199 entity.type = XML_ENTITY_DECL;
1200 entity.orig = sharedXHTMLEntityResult;
1201 entity.content = sharedXHTMLEntityResult;
1202 entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
1203 }
1204 return &entity;
1205 }
1206
getXHTMLEntity(const xmlChar * name)1207 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
1208 {
1209 UChar c = decodeNamedEntity(reinterpret_cast<const char*>(name));
1210 if (!c)
1211 return 0;
1212
1213 CString value = String(&c, 1).utf8();
1214 ASSERT(value.length() < 5);
1215 xmlEntityPtr entity = sharedXHTMLEntity();
1216 entity->length = value.length();
1217 entity->name = name;
1218 memcpy(sharedXHTMLEntityResult, value.data(), entity->length + 1);
1219
1220 return entity;
1221 }
1222
getEntityHandler(void * closure,const xmlChar * name)1223 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
1224 {
1225 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1226 xmlEntityPtr ent = xmlGetPredefinedEntity(name);
1227 if (ent) {
1228 ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
1229 return ent;
1230 }
1231
1232 ent = xmlGetDocEntity(ctxt->myDoc, name);
1233 if (!ent && (getParser(closure)->isXHTMLDocument()
1234 #if ENABLE(XHTMLMP)
1235 || getParser(closure)->isXHTMLMPDocument()
1236 #endif
1237 #if ENABLE(WML)
1238 || getParser(closure)->isWMLDocument()
1239 #endif
1240 )) {
1241 ent = getXHTMLEntity(name);
1242 if (ent)
1243 ent->etype = XML_INTERNAL_GENERAL_ENTITY;
1244 }
1245
1246 return ent;
1247 }
1248
startDocumentHandler(void * closure)1249 static void startDocumentHandler(void* closure)
1250 {
1251 xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
1252 switchToUTF16(ctxt);
1253 getParser(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone);
1254 xmlSAX2StartDocument(closure);
1255 }
1256
endDocumentHandler(void * closure)1257 static void endDocumentHandler(void* closure)
1258 {
1259 getParser(closure)->endDocument();
1260 xmlSAX2EndDocument(closure);
1261 }
1262
internalSubsetHandler(void * closure,const xmlChar * name,const xmlChar * externalID,const xmlChar * systemID)1263 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1264 {
1265 getParser(closure)->internalSubset(name, externalID, systemID);
1266 xmlSAX2InternalSubset(closure, name, externalID, systemID);
1267 }
1268
externalSubsetHandler(void * closure,const xmlChar *,const xmlChar * externalId,const xmlChar *)1269 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
1270 {
1271 String extId = toString(externalId);
1272 if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
1273 || (extId == "-//W3C//DTD XHTML 1.1//EN")
1274 || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
1275 || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN")
1276 || (extId == "-//W3C//DTD XHTML Basic 1.0//EN")
1277 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")
1278 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")
1279 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")
1280 )
1281 getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
1282 }
1283
ignorableWhitespaceHandler(void *,const xmlChar *,int)1284 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
1285 {
1286 // nothing to do, but we need this to work around a crasher
1287 // http://bugzilla.gnome.org/show_bug.cgi?id=172255
1288 // http://bugs.webkit.org/show_bug.cgi?id=5792
1289 }
1290
initializeParserContext(const char * chunk)1291 void XMLDocumentParser::initializeParserContext(const char* chunk)
1292 {
1293 xmlSAXHandler sax;
1294 memset(&sax, 0, sizeof(sax));
1295
1296 sax.error = normalErrorHandler;
1297 sax.fatalError = fatalErrorHandler;
1298 sax.characters = charactersHandler;
1299 sax.processingInstruction = processingInstructionHandler;
1300 sax.cdataBlock = cdataBlockHandler;
1301 sax.comment = commentHandler;
1302 sax.warning = warningHandler;
1303 sax.startElementNs = startElementNsHandler;
1304 sax.endElementNs = endElementNsHandler;
1305 sax.getEntity = getEntityHandler;
1306 sax.startDocument = startDocumentHandler;
1307 sax.endDocument = endDocumentHandler;
1308 sax.internalSubset = internalSubsetHandler;
1309 sax.externalSubset = externalSubsetHandler;
1310 sax.ignorableWhitespace = ignorableWhitespaceHandler;
1311 sax.entityDecl = xmlSAX2EntityDecl;
1312 sax.initialized = XML_SAX2_MAGIC;
1313 DocumentParser::startParsing();
1314 m_sawError = false;
1315 m_sawCSS = false;
1316 m_sawXSLTransform = false;
1317 m_sawFirstElement = false;
1318
1319 XMLDocumentParserScope scope(document()->cachedResourceLoader());
1320 if (m_parsingFragment)
1321 m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
1322 else {
1323 ASSERT(!chunk);
1324 m_context = XMLParserContext::createStringParser(&sax, this);
1325 }
1326 }
1327
doEnd()1328 void XMLDocumentParser::doEnd()
1329 {
1330 if (!isStopped()) {
1331 if (m_context) {
1332 // Tell libxml we're done.
1333 {
1334 XMLDocumentParserScope scope(document()->cachedResourceLoader());
1335 xmlParseChunk(context(), 0, 0, 1);
1336 }
1337
1338 m_context = 0;
1339 }
1340 }
1341
1342 #if ENABLE(XSLT)
1343 XMLTreeViewer xmlTreeViewer(document());
1344 bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && xmlTreeViewer.hasNoStyleInformation();
1345 if (xmlViewerMode)
1346 xmlTreeViewer.transformDocumentToTreeView();
1347
1348 if (m_sawXSLTransform) {
1349 void* doc = xmlDocPtrForString(document()->cachedResourceLoader(), m_originalSourceForTransform, document()->url().string());
1350 document()->setTransformSource(new TransformSource(doc));
1351
1352 document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets.
1353 document()->styleSelectorChanged(RecalcStyleImmediately);
1354 document()->setParsing(true);
1355
1356 DocumentParser::stopParsing();
1357 }
1358 #endif
1359 }
1360
1361 #if ENABLE(XSLT)
xmlDocPtrForString(CachedResourceLoader * cachedResourceLoader,const String & source,const String & url)1362 void* xmlDocPtrForString(CachedResourceLoader* cachedResourceLoader, const String& source, const String& url)
1363 {
1364 if (source.isEmpty())
1365 return 0;
1366
1367 // Parse in a single chunk into an xmlDocPtr
1368 // FIXME: Hook up error handlers so that a failure to parse the main document results in
1369 // good error messages.
1370 const UChar BOM = 0xFEFF;
1371 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
1372
1373 XMLDocumentParserScope scope(cachedResourceLoader, errorFunc, 0);
1374 xmlDocPtr sourceDoc = xmlReadMemory(reinterpret_cast<const char*>(source.characters()),
1375 source.length() * sizeof(UChar),
1376 url.latin1().data(),
1377 BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE",
1378 XSLT_PARSE_OPTIONS);
1379 return sourceDoc;
1380 }
1381 #endif
1382
lineNumber() const1383 int XMLDocumentParser::lineNumber() const
1384 {
1385 // FIXME: The implementation probably returns 1-based int, but method should return 0-based.
1386 return context() ? context()->input->line : 1;
1387 }
1388
columnNumber() const1389 int XMLDocumentParser::columnNumber() const
1390 {
1391 // FIXME: The implementation probably returns 1-based int, but method should return 0-based.
1392 return context() ? context()->input->col : 1;
1393 }
1394
textPosition() const1395 TextPosition0 XMLDocumentParser::textPosition() const
1396 {
1397 xmlParserCtxtPtr context = this->context();
1398 if (!context)
1399 return TextPosition0::minimumPosition();
1400 // FIXME: The context probably contains 1-based numbers, but we treat them as 0-based,
1401 // to be consistent with fixme's in lineNumber() and columnNumber
1402 // methods.
1403 return TextPosition0(WTF::ZeroBasedNumber::fromZeroBasedInt(context->input->line),
1404 WTF::ZeroBasedNumber::fromZeroBasedInt(context->input->col));
1405 }
1406
1407 // This method has a correct implementation, in contrast to textPosition() method.
1408 // It should replace textPosition().
textPositionOneBased() const1409 TextPosition1 XMLDocumentParser::textPositionOneBased() const
1410 {
1411 xmlParserCtxtPtr context = this->context();
1412 if (!context)
1413 return TextPosition1::minimumPosition();
1414 return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(context->input->line),
1415 WTF::OneBasedNumber::fromOneBasedInt(context->input->col));
1416 }
1417
stopParsing()1418 void XMLDocumentParser::stopParsing()
1419 {
1420 DocumentParser::stopParsing();
1421 if (context())
1422 xmlStopParser(context());
1423 }
1424
resumeParsing()1425 void XMLDocumentParser::resumeParsing()
1426 {
1427 ASSERT(!isDetached());
1428 ASSERT(m_parserPaused);
1429
1430 m_parserPaused = false;
1431
1432 // First, execute any pending callbacks
1433 while (!m_pendingCallbacks->isEmpty()) {
1434 m_pendingCallbacks->callAndRemoveFirstCallback(this);
1435
1436 // A callback paused the parser
1437 if (m_parserPaused)
1438 return;
1439 }
1440
1441 // Then, write any pending data
1442 SegmentedString rest = m_pendingSrc;
1443 m_pendingSrc.clear();
1444 append(rest);
1445
1446 // Finally, if finish() has been called and write() didn't result
1447 // in any further callbacks being queued, call end()
1448 if (m_finishCalled && m_pendingCallbacks->isEmpty())
1449 end();
1450 }
1451
appendFragmentSource(const String & chunk)1452 bool XMLDocumentParser::appendFragmentSource(const String& chunk)
1453 {
1454 ASSERT(!m_context);
1455 ASSERT(m_parsingFragment);
1456
1457 CString chunkAsUtf8 = chunk.utf8();
1458 initializeParserContext(chunkAsUtf8.data());
1459 xmlParseContent(context());
1460 endDocument(); // Close any open text nodes.
1461
1462 // FIXME: If this code is actually needed, it should probably move to finish()
1463 // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd().
1464 // Check if all the chunk has been processed.
1465 long bytesProcessed = xmlByteConsumed(context());
1466 if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) {
1467 // FIXME: I don't believe we can hit this case without also having seen an error.
1468 // If we hit this ASSERT, we've found a test case which demonstrates the need for this code.
1469 ASSERT(m_sawError);
1470 return false;
1471 }
1472
1473 // No error if the chunk is well formed or it is not but we have no error.
1474 return context()->wellFormed || !xmlCtxtGetLastError(context());
1475 }
1476
1477 // --------------------------------
1478
1479 struct AttributeParseState {
1480 HashMap<String, String> attributes;
1481 bool gotAttributes;
1482 };
1483
attributesStartElementNsHandler(void * closure,const xmlChar * xmlLocalName,const xmlChar *,const xmlChar *,int,const xmlChar **,int nb_attributes,int,const xmlChar ** libxmlAttributes)1484 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
1485 const xmlChar* /*xmlURI*/, int /*nb_namespaces*/, const xmlChar** /*namespaces*/,
1486 int nb_attributes, int /*nb_defaulted*/, const xmlChar** libxmlAttributes)
1487 {
1488 if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
1489 return;
1490
1491 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1492 AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
1493
1494 state->gotAttributes = true;
1495
1496 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
1497 for (int i = 0; i < nb_attributes; i++) {
1498 String attrLocalName = toString(attributes[i].localname);
1499 int valueLength = (int) (attributes[i].end - attributes[i].value);
1500 String attrValue = toString(attributes[i].value, valueLength);
1501 String attrPrefix = toString(attributes[i].prefix);
1502 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
1503
1504 state->attributes.set(attrQName, attrValue);
1505 }
1506 }
1507
parseAttributes(const String & string,bool & attrsOK)1508 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1509 {
1510 AttributeParseState state;
1511 state.gotAttributes = false;
1512
1513 xmlSAXHandler sax;
1514 memset(&sax, 0, sizeof(sax));
1515 sax.startElementNs = attributesStartElementNsHandler;
1516 sax.initialized = XML_SAX2_MAGIC;
1517 RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
1518 String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
1519 xmlParseChunk(parser->context(), reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1);
1520 attrsOK = state.gotAttributes;
1521 return state.attributes;
1522 }
1523
1524 }
1525