1 /*
2 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
5 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU Library General Public License
21 * along with this library; see the file COPYING.LIB. If not, write to
22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
24 */
25
26 #include "config.h"
27 #include "XMLTokenizer.h"
28
29 #include "CDATASection.h"
30 #include "CString.h"
31 #include "CachedScript.h"
32 #include "Comment.h"
33 #include "DocLoader.h"
34 #include "Document.h"
35 #include "DocumentFragment.h"
36 #include "DocumentType.h"
37 #include "Frame.h"
38 #include "FrameLoader.h"
39 #include "FrameView.h"
40 #include "HTMLLinkElement.h"
41 #include "HTMLStyleElement.h"
42 #include "HTMLTokenizer.h" // for decodeNamedEntity
43 #include "ProcessingInstruction.h"
44 #include "ResourceError.h"
45 #include "ResourceHandle.h"
46 #include "ResourceRequest.h"
47 #include "ResourceResponse.h"
48 #include "ScriptController.h"
49 #include "ScriptElement.h"
50 #include "ScriptSourceCode.h"
51 #include "ScriptValue.h"
52 #include "TextResourceDecoder.h"
53 #include "XMLTokenizerScope.h"
54 #include <libxml/parser.h>
55 #include <libxml/parserInternals.h>
56 #include <wtf/Platform.h>
57 #include <wtf/StringExtras.h>
58 #include <wtf/Threading.h>
59 #include <wtf/UnusedParam.h>
60 #include <wtf/Vector.h>
61
62 #if ENABLE(XSLT)
63 #include <libxslt/xslt.h>
64 #endif
65
66 #if ENABLE(XHTMLMP)
67 #include "HTMLNames.h"
68 #include "HTMLScriptElement.h"
69 #endif
70
71 using namespace std;
72
73 namespace WebCore {
74
75 class PendingCallbacks : public Noncopyable {
76 public:
~PendingCallbacks()77 ~PendingCallbacks()
78 {
79 deleteAllValues(m_callbacks);
80 }
81
appendStartElementNSCallback(const xmlChar * xmlLocalName,const xmlChar * xmlPrefix,const xmlChar * xmlURI,int nb_namespaces,const xmlChar ** namespaces,int nb_attributes,int nb_defaulted,const xmlChar ** attributes)82 void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
83 const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** attributes)
84 {
85 PendingStartElementNSCallback* callback = new PendingStartElementNSCallback;
86
87 callback->xmlLocalName = xmlStrdup(xmlLocalName);
88 callback->xmlPrefix = xmlStrdup(xmlPrefix);
89 callback->xmlURI = xmlStrdup(xmlURI);
90 callback->nb_namespaces = nb_namespaces;
91 callback->namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_namespaces * 2));
92 for (int i = 0; i < nb_namespaces * 2 ; i++)
93 callback->namespaces[i] = xmlStrdup(namespaces[i]);
94 callback->nb_attributes = nb_attributes;
95 callback->nb_defaulted = nb_defaulted;
96 callback->attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_attributes * 5));
97 for (int i = 0; i < nb_attributes; i++) {
98 // Each attribute has 5 elements in the array:
99 // name, prefix, uri, value and an end pointer.
100
101 for (int j = 0; j < 3; j++)
102 callback->attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
103
104 int len = attributes[i * 5 + 4] - attributes[i * 5 + 3];
105
106 callback->attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], len);
107 callback->attributes[i * 5 + 4] = callback->attributes[i * 5 + 3] + len;
108 }
109
110 m_callbacks.append(callback);
111 }
112
appendEndElementNSCallback()113 void appendEndElementNSCallback()
114 {
115 PendingEndElementNSCallback* callback = new PendingEndElementNSCallback;
116
117 m_callbacks.append(callback);
118 }
119
appendCharactersCallback(const xmlChar * s,int len)120 void appendCharactersCallback(const xmlChar* s, int len)
121 {
122 PendingCharactersCallback* callback = new PendingCharactersCallback;
123
124 callback->s = xmlStrndup(s, len);
125 callback->len = len;
126
127 m_callbacks.append(callback);
128 }
129
appendProcessingInstructionCallback(const xmlChar * target,const xmlChar * data)130 void appendProcessingInstructionCallback(const xmlChar* target, const xmlChar* data)
131 {
132 PendingProcessingInstructionCallback* callback = new PendingProcessingInstructionCallback;
133
134 callback->target = xmlStrdup(target);
135 callback->data = xmlStrdup(data);
136
137 m_callbacks.append(callback);
138 }
139
appendCDATABlockCallback(const xmlChar * s,int len)140 void appendCDATABlockCallback(const xmlChar* s, int len)
141 {
142 PendingCDATABlockCallback* callback = new PendingCDATABlockCallback;
143
144 callback->s = xmlStrndup(s, len);
145 callback->len = len;
146
147 m_callbacks.append(callback);
148 }
149
appendCommentCallback(const xmlChar * s)150 void appendCommentCallback(const xmlChar* s)
151 {
152 PendingCommentCallback* callback = new PendingCommentCallback;
153
154 callback->s = xmlStrdup(s);
155
156 m_callbacks.append(callback);
157 }
158
appendInternalSubsetCallback(const xmlChar * name,const xmlChar * externalID,const xmlChar * systemID)159 void appendInternalSubsetCallback(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
160 {
161 PendingInternalSubsetCallback* callback = new PendingInternalSubsetCallback;
162
163 callback->name = xmlStrdup(name);
164 callback->externalID = xmlStrdup(externalID);
165 callback->systemID = xmlStrdup(systemID);
166
167 m_callbacks.append(callback);
168 }
169
appendErrorCallback(XMLTokenizer::ErrorType type,const char * message,int lineNumber,int columnNumber)170 void appendErrorCallback(XMLTokenizer::ErrorType type, const char* message, int lineNumber, int columnNumber)
171 {
172 PendingErrorCallback* callback = new PendingErrorCallback;
173
174 callback->message = strdup(message);
175 callback->type = type;
176 callback->lineNumber = lineNumber;
177 callback->columnNumber = columnNumber;
178
179 m_callbacks.append(callback);
180 }
181
callAndRemoveFirstCallback(XMLTokenizer * tokenizer)182 void callAndRemoveFirstCallback(XMLTokenizer* tokenizer)
183 {
184 OwnPtr<PendingCallback> callback(m_callbacks.first());
185 m_callbacks.removeFirst();
186 callback->call(tokenizer);
187 }
188
isEmpty() const189 bool isEmpty() const { return m_callbacks.isEmpty(); }
190
191 private:
192 struct PendingCallback {
~PendingCallbackWebCore::PendingCallbacks::PendingCallback193 virtual ~PendingCallback() { }
194 virtual void call(XMLTokenizer* tokenizer) = 0;
195 };
196
197 struct PendingStartElementNSCallback : public PendingCallback {
~PendingStartElementNSCallbackWebCore::PendingCallbacks::PendingStartElementNSCallback198 virtual ~PendingStartElementNSCallback()
199 {
200 xmlFree(xmlLocalName);
201 xmlFree(xmlPrefix);
202 xmlFree(xmlURI);
203 for (int i = 0; i < nb_namespaces * 2; i++)
204 xmlFree(namespaces[i]);
205 xmlFree(namespaces);
206 for (int i = 0; i < nb_attributes; i++)
207 for (int j = 0; j < 4; j++)
208 xmlFree(attributes[i * 5 + j]);
209 xmlFree(attributes);
210 }
211
callWebCore::PendingCallbacks::PendingStartElementNSCallback212 virtual void call(XMLTokenizer* tokenizer)
213 {
214 tokenizer->startElementNs(xmlLocalName, xmlPrefix, xmlURI,
215 nb_namespaces, const_cast<const xmlChar**>(namespaces),
216 nb_attributes, nb_defaulted, const_cast<const xmlChar**>(attributes));
217 }
218
219 xmlChar* xmlLocalName;
220 xmlChar* xmlPrefix;
221 xmlChar* xmlURI;
222 int nb_namespaces;
223 xmlChar** namespaces;
224 int nb_attributes;
225 int nb_defaulted;
226 xmlChar** attributes;
227 };
228
229 struct PendingEndElementNSCallback : public PendingCallback {
callWebCore::PendingCallbacks::PendingEndElementNSCallback230 virtual void call(XMLTokenizer* tokenizer)
231 {
232 tokenizer->endElementNs();
233 }
234 };
235
236 struct PendingCharactersCallback : public PendingCallback {
~PendingCharactersCallbackWebCore::PendingCallbacks::PendingCharactersCallback237 virtual ~PendingCharactersCallback()
238 {
239 xmlFree(s);
240 }
241
callWebCore::PendingCallbacks::PendingCharactersCallback242 virtual void call(XMLTokenizer* tokenizer)
243 {
244 tokenizer->characters(s, len);
245 }
246
247 xmlChar* s;
248 int len;
249 };
250
251 struct PendingProcessingInstructionCallback : public PendingCallback {
~PendingProcessingInstructionCallbackWebCore::PendingCallbacks::PendingProcessingInstructionCallback252 virtual ~PendingProcessingInstructionCallback()
253 {
254 xmlFree(target);
255 xmlFree(data);
256 }
257
callWebCore::PendingCallbacks::PendingProcessingInstructionCallback258 virtual void call(XMLTokenizer* tokenizer)
259 {
260 tokenizer->processingInstruction(target, data);
261 }
262
263 xmlChar* target;
264 xmlChar* data;
265 };
266
267 struct PendingCDATABlockCallback : public PendingCallback {
~PendingCDATABlockCallbackWebCore::PendingCallbacks::PendingCDATABlockCallback268 virtual ~PendingCDATABlockCallback()
269 {
270 xmlFree(s);
271 }
272
callWebCore::PendingCallbacks::PendingCDATABlockCallback273 virtual void call(XMLTokenizer* tokenizer)
274 {
275 tokenizer->cdataBlock(s, len);
276 }
277
278 xmlChar* s;
279 int len;
280 };
281
282 struct PendingCommentCallback : public PendingCallback {
~PendingCommentCallbackWebCore::PendingCallbacks::PendingCommentCallback283 virtual ~PendingCommentCallback()
284 {
285 xmlFree(s);
286 }
287
callWebCore::PendingCallbacks::PendingCommentCallback288 virtual void call(XMLTokenizer* tokenizer)
289 {
290 tokenizer->comment(s);
291 }
292
293 xmlChar* s;
294 };
295
296 struct PendingInternalSubsetCallback : public PendingCallback {
~PendingInternalSubsetCallbackWebCore::PendingCallbacks::PendingInternalSubsetCallback297 virtual ~PendingInternalSubsetCallback()
298 {
299 xmlFree(name);
300 xmlFree(externalID);
301 xmlFree(systemID);
302 }
303
callWebCore::PendingCallbacks::PendingInternalSubsetCallback304 virtual void call(XMLTokenizer* tokenizer)
305 {
306 tokenizer->internalSubset(name, externalID, systemID);
307 }
308
309 xmlChar* name;
310 xmlChar* externalID;
311 xmlChar* systemID;
312 };
313
314 struct PendingErrorCallback: public PendingCallback {
~PendingErrorCallbackWebCore::PendingCallbacks::PendingErrorCallback315 virtual ~PendingErrorCallback()
316 {
317 free(message);
318 }
319
callWebCore::PendingCallbacks::PendingErrorCallback320 virtual void call(XMLTokenizer* tokenizer)
321 {
322 tokenizer->handleError(type, message, lineNumber, columnNumber);
323 }
324
325 XMLTokenizer::ErrorType type;
326 char* message;
327 int lineNumber;
328 int columnNumber;
329 };
330
331 Deque<PendingCallback*> m_callbacks;
332 };
333 // --------------------------------
334
335 static int globalDescriptor = 0;
336 static ThreadIdentifier libxmlLoaderThread = 0;
337
matchFunc(const char *)338 static int matchFunc(const char*)
339 {
340 // Only match loads initiated due to uses of libxml2 from within XMLTokenizer to avoid
341 // interfering with client applications that also use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353
342 return XMLTokenizerScope::currentDocLoader && currentThread() == libxmlLoaderThread;
343 }
344
345 class OffsetBuffer {
346 public:
OffsetBuffer(const Vector<char> & b)347 OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { }
348
readOutBytes(char * outputBuffer,unsigned askedToRead)349 int readOutBytes(char* outputBuffer, unsigned askedToRead)
350 {
351 unsigned bytesLeft = m_buffer.size() - m_currentOffset;
352 unsigned lenToCopy = min(askedToRead, bytesLeft);
353 if (lenToCopy) {
354 memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
355 m_currentOffset += lenToCopy;
356 }
357 return lenToCopy;
358 }
359
360 private:
361 Vector<char> m_buffer;
362 unsigned m_currentOffset;
363 };
364
shouldAllowExternalLoad(const KURL & url)365 static bool shouldAllowExternalLoad(const KURL& url)
366 {
367 String urlString = url.string();
368
369 // On non-Windows platforms libxml asks for this URL, the
370 // "XML_XML_DEFAULT_CATALOG", on initialization.
371 if (urlString == "file:///etc/xml/catalog")
372 return false;
373
374 // On Windows, libxml computes a URL relative to where its DLL resides.
375 if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
376 return false;
377
378 // The most common DTD. There isn't much point in hammering www.w3c.org
379 // by requesting this URL for every XHTML document.
380 if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
381 return false;
382
383 // Similarly, there isn't much point in requesting the SVG DTD.
384 if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
385 return false;
386
387 // The libxml doesn't give us a lot of context for deciding whether to
388 // allow this request. In the worst case, this load could be for an
389 // external entity and the resulting document could simply read the
390 // retrieved content. If we had more context, we could potentially allow
391 // the parser to load a DTD. As things stand, we take the conservative
392 // route and allow same-origin requests only.
393 if (!XMLTokenizerScope::currentDocLoader->doc()->securityOrigin()->canRequest(url)) {
394 XMLTokenizerScope::currentDocLoader->printAccessDeniedMessage(url);
395 return false;
396 }
397
398 return true;
399 }
400
openFunc(const char * uri)401 static void* openFunc(const char* uri)
402 {
403 ASSERT(XMLTokenizerScope::currentDocLoader);
404 ASSERT(currentThread() == libxmlLoaderThread);
405
406 KURL url(KURL(), uri);
407
408 if (!shouldAllowExternalLoad(url))
409 return &globalDescriptor;
410
411 ResourceError error;
412 ResourceResponse response;
413 Vector<char> data;
414
415
416 {
417 DocLoader* docLoader = XMLTokenizerScope::currentDocLoader;
418 XMLTokenizerScope scope(0);
419 // FIXME: We should restore the original global error handler as well.
420
421 if (docLoader->frame())
422 docLoader->frame()->loader()->loadResourceSynchronously(url, AllowStoredCredentials, error, response, data);
423 }
424
425 // We have to check the URL again after the load to catch redirects.
426 // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
427 if (!shouldAllowExternalLoad(response.url()))
428 return &globalDescriptor;
429
430 return new OffsetBuffer(data);
431 }
432
readFunc(void * context,char * buffer,int len)433 static int readFunc(void* context, char* buffer, int len)
434 {
435 // Do 0-byte reads in case of a null descriptor
436 if (context == &globalDescriptor)
437 return 0;
438
439 OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
440 return data->readOutBytes(buffer, len);
441 }
442
writeFunc(void *,const char *,int)443 static int writeFunc(void*, const char*, int)
444 {
445 // Always just do 0-byte writes
446 return 0;
447 }
448
closeFunc(void * context)449 static int closeFunc(void* context)
450 {
451 if (context != &globalDescriptor) {
452 OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
453 delete data;
454 }
455 return 0;
456 }
457
458 #if ENABLE(XSLT)
errorFunc(void *,const char *,...)459 static void errorFunc(void*, const char*, ...)
460 {
461 // FIXME: It would be nice to display error messages somewhere.
462 }
463 #endif
464
465 static bool didInit = false;
466
createStringParser(xmlSAXHandlerPtr handlers,void * userData)467 static xmlParserCtxtPtr createStringParser(xmlSAXHandlerPtr handlers, void* userData)
468 {
469 if (!didInit) {
470 xmlInitParser();
471 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
472 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
473 libxmlLoaderThread = currentThread();
474 didInit = true;
475 }
476
477 xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
478 parser->_private = userData;
479 parser->replaceEntities = true;
480 const UChar BOM = 0xFEFF;
481 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
482 xmlSwitchEncoding(parser, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
483
484 return parser;
485 }
486
487
488 // Chunk should be encoded in UTF-8
createMemoryParser(xmlSAXHandlerPtr handlers,void * userData,const char * chunk)489 static xmlParserCtxtPtr createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const char* chunk)
490 {
491 if (!didInit) {
492 xmlInitParser();
493 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
494 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
495 libxmlLoaderThread = currentThread();
496 didInit = true;
497 }
498
499 xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk, xmlStrlen((const xmlChar*)chunk));
500
501 if (!parser)
502 return 0;
503
504 // Copy the sax handler
505 memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
506
507 // Set parser options.
508 // XML_PARSE_NODICT: default dictionary option.
509 // XML_PARSE_NOENT: force entities substitutions.
510 xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
511
512 // Internal initialization
513 parser->sax2 = 1;
514 parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
515 parser->depth = 0;
516 parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
517 parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
518 parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
519 parser->_private = userData;
520
521 return parser;
522 }
523
524 // --------------------------------
525
XMLTokenizer(Document * _doc,FrameView * _view)526 XMLTokenizer::XMLTokenizer(Document* _doc, FrameView* _view)
527 : m_doc(_doc)
528 , m_view(_view)
529 , m_context(0)
530 , m_pendingCallbacks(new PendingCallbacks)
531 , m_currentNode(_doc)
532 , m_currentNodeIsReferenced(false)
533 , m_sawError(false)
534 , m_sawXSLTransform(false)
535 , m_sawFirstElement(false)
536 , m_isXHTMLDocument(false)
537 #if ENABLE(XHTMLMP)
538 , m_isXHTMLMPDocument(false)
539 , m_hasDocTypeDeclaration(false)
540 #endif
541 , m_parserPaused(false)
542 , m_requestingScript(false)
543 , m_finishCalled(false)
544 , m_errorCount(0)
545 , m_lastErrorLine(0)
546 , m_lastErrorColumn(0)
547 , m_pendingScript(0)
548 , m_scriptStartLine(0)
549 , m_parsingFragment(false)
550 {
551 }
552
XMLTokenizer(DocumentFragment * fragment,Element * parentElement)553 XMLTokenizer::XMLTokenizer(DocumentFragment* fragment, Element* parentElement)
554 : m_doc(fragment->document())
555 , m_view(0)
556 , m_context(0)
557 , m_pendingCallbacks(new PendingCallbacks)
558 , m_currentNode(fragment)
559 , m_currentNodeIsReferenced(fragment)
560 , m_sawError(false)
561 , m_sawXSLTransform(false)
562 , m_sawFirstElement(false)
563 , m_isXHTMLDocument(false)
564 #if ENABLE(XHTMLMP)
565 , m_isXHTMLMPDocument(false)
566 , m_hasDocTypeDeclaration(false)
567 #endif
568 , m_parserPaused(false)
569 , m_requestingScript(false)
570 , m_finishCalled(false)
571 , m_errorCount(0)
572 , m_lastErrorLine(0)
573 , m_lastErrorColumn(0)
574 , m_pendingScript(0)
575 , m_scriptStartLine(0)
576 , m_parsingFragment(true)
577 {
578 if (fragment)
579 fragment->ref();
580 if (m_doc)
581 m_doc->ref();
582
583 // Add namespaces based on the parent node
584 Vector<Element*> elemStack;
585 while (parentElement) {
586 elemStack.append(parentElement);
587
588 Node* n = parentElement->parentNode();
589 if (!n || !n->isElementNode())
590 break;
591 parentElement = static_cast<Element*>(n);
592 }
593
594 if (elemStack.isEmpty())
595 return;
596
597 for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) {
598 if (NamedNodeMap* attrs = element->attributes()) {
599 for (unsigned i = 0; i < attrs->length(); i++) {
600 Attribute* attr = attrs->attributeItem(i);
601 if (attr->localName() == "xmlns")
602 m_defaultNamespaceURI = attr->value();
603 else if (attr->prefix() == "xmlns")
604 m_prefixToNamespaceMap.set(attr->localName(), attr->value());
605 }
606 }
607 }
608
609 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
610 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
611 m_defaultNamespaceURI = parentElement->namespaceURI();
612 }
613
~XMLTokenizer()614 XMLTokenizer::~XMLTokenizer()
615 {
616 setCurrentNode(0);
617 if (m_parsingFragment && m_doc)
618 m_doc->deref();
619 if (m_pendingScript)
620 m_pendingScript->removeClient(this);
621 if (m_context)
622 xmlFreeParserCtxt(m_context);
623 }
624
doWrite(const String & parseString)625 void XMLTokenizer::doWrite(const String& parseString)
626 {
627 if (!m_context)
628 initializeParserContext();
629
630 // libXML throws an error if you try to switch the encoding for an empty string.
631 if (parseString.length()) {
632 // Hack around libxml2's lack of encoding overide support by manually
633 // resetting the encoding to UTF-16 before every chunk. Otherwise libxml
634 // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
635 // and switch encodings, causing the parse to fail.
636 const UChar BOM = 0xFEFF;
637 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
638 xmlSwitchEncoding(m_context, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
639
640 XMLTokenizerScope scope(m_doc->docLoader());
641 xmlParseChunk(m_context, reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0);
642 }
643
644 if (m_doc->decoder() && m_doc->decoder()->sawError()) {
645 // If the decoder saw an error, report it as fatal (stops parsing)
646 handleError(fatal, "Encoding error", lineNumber(), columnNumber());
647 }
648
649 return;
650 }
651
toString(const xmlChar * str,unsigned len)652 static inline String toString(const xmlChar* str, unsigned len)
653 {
654 return UTF8Encoding().decode(reinterpret_cast<const char*>(str), len);
655 }
656
toString(const xmlChar * str)657 static inline String toString(const xmlChar* str)
658 {
659 if (!str)
660 return String();
661
662 return UTF8Encoding().decode(reinterpret_cast<const char*>(str), strlen(reinterpret_cast<const char*>(str)));
663 }
664
665 struct _xmlSAX2Namespace {
666 const xmlChar* prefix;
667 const xmlChar* uri;
668 };
669 typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
670
handleElementNamespaces(Element * newElement,const xmlChar ** libxmlNamespaces,int nb_namespaces,ExceptionCode & ec)671 static inline void handleElementNamespaces(Element* newElement, const xmlChar** libxmlNamespaces, int nb_namespaces, ExceptionCode& ec)
672 {
673 xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
674 for (int i = 0; i < nb_namespaces; i++) {
675 String namespaceQName = "xmlns";
676 String namespaceURI = toString(namespaces[i].uri);
677 if (namespaces[i].prefix)
678 namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
679 newElement->setAttributeNS("http://www.w3.org/2000/xmlns/", namespaceQName, namespaceURI, ec);
680 if (ec) // exception setting attributes
681 return;
682 }
683 }
684
685 struct _xmlSAX2Attributes {
686 const xmlChar* localname;
687 const xmlChar* prefix;
688 const xmlChar* uri;
689 const xmlChar* value;
690 const xmlChar* end;
691 };
692 typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
693
handleElementAttributes(Element * newElement,const xmlChar ** libxmlAttributes,int nb_attributes,ExceptionCode & ec)694 static inline void handleElementAttributes(Element* newElement, const xmlChar** libxmlAttributes, int nb_attributes, ExceptionCode& ec)
695 {
696 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
697 for (int i = 0; i < nb_attributes; i++) {
698 String attrLocalName = toString(attributes[i].localname);
699 int valueLength = (int) (attributes[i].end - attributes[i].value);
700 String attrValue = toString(attributes[i].value, valueLength);
701 String attrPrefix = toString(attributes[i].prefix);
702 String attrURI = attrPrefix.isEmpty() ? String() : toString(attributes[i].uri);
703 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
704
705 newElement->setAttributeNS(attrURI, attrQName, attrValue, ec);
706 if (ec) // exception setting attributes
707 return;
708 }
709 }
710
startElementNs(const xmlChar * xmlLocalName,const xmlChar * xmlPrefix,const xmlChar * xmlURI,int nb_namespaces,const xmlChar ** libxmlNamespaces,int nb_attributes,int nb_defaulted,const xmlChar ** libxmlAttributes)711 void XMLTokenizer::startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
712 const xmlChar** libxmlNamespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
713 {
714 if (m_parserStopped)
715 return;
716
717 if (m_parserPaused) {
718 m_pendingCallbacks->appendStartElementNSCallback(xmlLocalName, xmlPrefix, xmlURI, nb_namespaces, libxmlNamespaces,
719 nb_attributes, nb_defaulted, libxmlAttributes);
720 return;
721 }
722
723 #if ENABLE(XHTMLMP)
724 // check if the DOCTYPE Declaration of XHTMLMP document exists
725 if (!m_hasDocTypeDeclaration && m_doc->isXHTMLMPDocument()) {
726 handleError(fatal, "DOCTYPE declaration lost.", lineNumber(), columnNumber());
727 return;
728 }
729 #endif
730
731 exitText();
732
733 String localName = toString(xmlLocalName);
734 String uri = toString(xmlURI);
735 String prefix = toString(xmlPrefix);
736
737 if (m_parsingFragment && uri.isNull()) {
738 if (!prefix.isNull())
739 uri = m_prefixToNamespaceMap.get(prefix);
740 else
741 uri = m_defaultNamespaceURI;
742 }
743
744 #if ENABLE(XHTMLMP)
745 if (!m_sawFirstElement && isXHTMLMPDocument()) {
746 // As per the section 7.1 of OMA-WAP-XHTMLMP-V1_1-20061020-A.pdf,
747 // we should make sure that the root element MUST be 'html' and
748 // ensure the name of the default namespace on the root elment 'html'
749 // MUST be 'http://www.w3.org/1999/xhtml'
750 if (localName != HTMLNames::htmlTag.localName()) {
751 handleError(fatal, "XHTMLMP document expects 'html' as root element.", lineNumber(), columnNumber());
752 return;
753 }
754
755 if (uri.isNull()) {
756 m_defaultNamespaceURI = HTMLNames::xhtmlNamespaceURI;
757 uri = m_defaultNamespaceURI;
758 }
759 }
760 #endif
761
762 bool isFirstElement = !m_sawFirstElement;
763 m_sawFirstElement = true;
764
765 QualifiedName qName(prefix, localName, uri);
766 RefPtr<Element> newElement = m_doc->createElement(qName, true);
767 if (!newElement) {
768 stopParsing();
769 return;
770 }
771
772 ExceptionCode ec = 0;
773 handleElementNamespaces(newElement.get(), libxmlNamespaces, nb_namespaces, ec);
774 if (ec) {
775 stopParsing();
776 return;
777 }
778
779 ScriptController* jsProxy = m_doc->frame() ? m_doc->frame()->script() : 0;
780 if (jsProxy && m_doc->frame()->script()->isEnabled())
781 jsProxy->setEventHandlerLineNumber(lineNumber());
782
783 handleElementAttributes(newElement.get(), libxmlAttributes, nb_attributes, ec);
784 if (ec) {
785 stopParsing();
786 return;
787 }
788
789 if (jsProxy)
790 jsProxy->setEventHandlerLineNumber(0);
791
792 newElement->beginParsingChildren();
793
794 ScriptElement* scriptElement = toScriptElement(newElement.get());
795 if (scriptElement)
796 m_scriptStartLine = lineNumber();
797
798 if (!m_currentNode->addChild(newElement.get())) {
799 stopParsing();
800 return;
801 }
802
803 setCurrentNode(newElement.get());
804 if (m_view && !newElement->attached())
805 newElement->attach();
806
807 if (isFirstElement && m_doc->frame())
808 m_doc->frame()->loader()->dispatchDocumentElementAvailable();
809 }
810
endElementNs()811 void XMLTokenizer::endElementNs()
812 {
813 if (m_parserStopped)
814 return;
815
816 if (m_parserPaused) {
817 m_pendingCallbacks->appendEndElementNSCallback();
818 return;
819 }
820
821 exitText();
822
823 Node* n = m_currentNode;
824 RefPtr<Node> parent = n->parentNode();
825 n->finishParsingChildren();
826
827 if (!n->isElementNode() || !m_view) {
828 setCurrentNode(parent.get());
829 return;
830 }
831
832 Element* element = static_cast<Element*>(n);
833 ScriptElement* scriptElement = toScriptElement(element);
834 if (!scriptElement) {
835 setCurrentNode(parent.get());
836 return;
837 }
838
839 // don't load external scripts for standalone documents (for now)
840 ASSERT(!m_pendingScript);
841 m_requestingScript = true;
842
843 #if ENABLE(XHTMLMP)
844 if (!scriptElement->shouldExecuteAsJavaScript())
845 m_doc->setShouldProcessNoscriptElement(true);
846 else
847 #endif
848 {
849 String scriptHref = scriptElement->sourceAttributeValue();
850 if (!scriptHref.isEmpty()) {
851 // we have a src attribute
852 String scriptCharset = scriptElement->scriptCharset();
853 if ((m_pendingScript = m_doc->docLoader()->requestScript(scriptHref, scriptCharset))) {
854 m_scriptElement = element;
855 m_pendingScript->addClient(this);
856
857 // m_pendingScript will be 0 if script was already loaded and ref() executed it
858 if (m_pendingScript)
859 pauseParsing();
860 } else
861 m_scriptElement = 0;
862 } else
863 m_view->frame()->loader()->executeScript(ScriptSourceCode(scriptElement->scriptContent(), m_doc->url(), m_scriptStartLine));
864 }
865 m_requestingScript = false;
866 setCurrentNode(parent.get());
867 }
868
characters(const xmlChar * s,int len)869 void XMLTokenizer::characters(const xmlChar* s, int len)
870 {
871 if (m_parserStopped)
872 return;
873
874 if (m_parserPaused) {
875 m_pendingCallbacks->appendCharactersCallback(s, len);
876 return;
877 }
878
879 if (m_currentNode->isTextNode() || enterText())
880 m_bufferedText.append(s, len);
881 }
882
error(ErrorType type,const char * message,va_list args)883 void XMLTokenizer::error(ErrorType type, const char* message, va_list args)
884 {
885 if (m_parserStopped)
886 return;
887
888 #if PLATFORM(WIN_OS)
889 char m[1024];
890 vsnprintf(m, sizeof(m) - 1, message, args);
891 #else
892 char* m;
893 if (vasprintf(&m, message, args) == -1)
894 return;
895 #endif
896
897 if (m_parserPaused)
898 m_pendingCallbacks->appendErrorCallback(type, m, lineNumber(), columnNumber());
899 else
900 handleError(type, m, lineNumber(), columnNumber());
901
902 #if !PLATFORM(WIN_OS)
903 free(m);
904 #endif
905 }
906
processingInstruction(const xmlChar * target,const xmlChar * data)907 void XMLTokenizer::processingInstruction(const xmlChar* target, const xmlChar* data)
908 {
909 if (m_parserStopped)
910 return;
911
912 if (m_parserPaused) {
913 m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
914 return;
915 }
916
917 exitText();
918
919 // ### handle exceptions
920 int exception = 0;
921 RefPtr<ProcessingInstruction> pi = m_doc->createProcessingInstruction(
922 toString(target), toString(data), exception);
923 if (exception)
924 return;
925
926 pi->setCreatedByParser(true);
927
928 if (!m_currentNode->addChild(pi.get()))
929 return;
930 if (m_view && !pi->attached())
931 pi->attach();
932
933 pi->finishParsingChildren();
934
935 #if ENABLE(XSLT)
936 m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
937 if (m_sawXSLTransform && !m_doc->transformSourceDocument())
938 stopParsing();
939 #endif
940 }
941
cdataBlock(const xmlChar * s,int len)942 void XMLTokenizer::cdataBlock(const xmlChar* s, int len)
943 {
944 if (m_parserStopped)
945 return;
946
947 if (m_parserPaused) {
948 m_pendingCallbacks->appendCDATABlockCallback(s, len);
949 return;
950 }
951
952 exitText();
953
954 RefPtr<Node> newNode = new CDATASection(m_doc, toString(s, len));
955 if (!m_currentNode->addChild(newNode.get()))
956 return;
957 if (m_view && !newNode->attached())
958 newNode->attach();
959 }
960
comment(const xmlChar * s)961 void XMLTokenizer::comment(const xmlChar* s)
962 {
963 if (m_parserStopped)
964 return;
965
966 if (m_parserPaused) {
967 m_pendingCallbacks->appendCommentCallback(s);
968 return;
969 }
970
971 exitText();
972
973 RefPtr<Node> newNode = new Comment(m_doc, toString(s));
974 m_currentNode->addChild(newNode.get());
975 if (m_view && !newNode->attached())
976 newNode->attach();
977 }
978
startDocument(const xmlChar * version,const xmlChar * encoding,int standalone)979 void XMLTokenizer::startDocument(const xmlChar* version, const xmlChar* encoding, int standalone)
980 {
981 ExceptionCode ec = 0;
982
983 if (version)
984 m_doc->setXMLVersion(toString(version), ec);
985 m_doc->setXMLStandalone(standalone == 1, ec); // possible values are 0, 1, and -1
986 if (encoding)
987 m_doc->setXMLEncoding(toString(encoding));
988 }
989
endDocument()990 void XMLTokenizer::endDocument()
991 {
992 exitText();
993 #if ENABLE(XHTMLMP)
994 m_hasDocTypeDeclaration = false;
995 #endif
996 }
997
internalSubset(const xmlChar * name,const xmlChar * externalID,const xmlChar * systemID)998 void XMLTokenizer::internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
999 {
1000 if (m_parserStopped)
1001 return;
1002
1003 if (m_parserPaused) {
1004 m_pendingCallbacks->appendInternalSubsetCallback(name, externalID, systemID);
1005 return;
1006 }
1007
1008 if (m_doc) {
1009 #if ENABLE(WML) || ENABLE(XHTMLMP)
1010 String extId = toString(externalID);
1011 #endif
1012 #if ENABLE(WML)
1013 if (isWMLDocument()
1014 && extId != "-//WAPFORUM//DTD WML 1.3//EN"
1015 && extId != "-//WAPFORUM//DTD WML 1.2//EN"
1016 && extId != "-//WAPFORUM//DTD WML 1.1//EN"
1017 && extId != "-//WAPFORUM//DTD WML 1.0//EN")
1018 handleError(fatal, "Invalid DTD Public ID", lineNumber(), columnNumber());
1019 #endif
1020 #if ENABLE(XHTMLMP)
1021 String dtdName = toString(name);
1022 if (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
1023 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN") {
1024 if (dtdName != HTMLNames::htmlTag.localName()) {
1025 handleError(fatal, "Invalid DOCTYPE declaration, expected 'html' as root element.", lineNumber(), columnNumber());
1026 return;
1027 }
1028
1029 if (m_doc->isXHTMLMPDocument())
1030 setIsXHTMLMPDocument(true);
1031 else
1032 setIsXHTMLDocument(true);
1033
1034 m_hasDocTypeDeclaration = true;
1035 }
1036 #endif
1037
1038 #if ENABLE(XHTMLMP)
1039 m_doc->addChild(DocumentType::create(m_doc, dtdName, extId, toString(systemID)));
1040 #elif ENABLE(WML)
1041 m_doc->addChild(DocumentType::create(m_doc, toString(name), extId, toString(systemID)));
1042 #else
1043 m_doc->addChild(DocumentType::create(m_doc, toString(name), toString(externalID), toString(systemID)));
1044 #endif
1045 }
1046 }
1047
getTokenizer(void * closure)1048 static inline XMLTokenizer* getTokenizer(void* closure)
1049 {
1050 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1051 return static_cast<XMLTokenizer*>(ctxt->_private);
1052 }
1053
1054 // This is a hack around http://bugzilla.gnome.org/show_bug.cgi?id=159219
1055 // Otherwise libxml seems to call all the SAX callbacks twice for any replaced entity.
hackAroundLibXMLEntityBug(void * closure)1056 static inline bool hackAroundLibXMLEntityBug(void* closure)
1057 {
1058 #if LIBXML_VERSION >= 20627
1059 UNUSED_PARAM(closure);
1060
1061 // This bug has been fixed in libxml 2.6.27.
1062 return false;
1063 #else
1064 return static_cast<xmlParserCtxtPtr>(closure)->node;
1065 #endif
1066 }
1067
startElementNsHandler(void * closure,const xmlChar * localname,const xmlChar * prefix,const xmlChar * uri,int nb_namespaces,const xmlChar ** namespaces,int nb_attributes,int nb_defaulted,const xmlChar ** libxmlAttributes)1068 static void startElementNsHandler(void* closure, const xmlChar* localname, const xmlChar* prefix, const xmlChar* uri, int nb_namespaces, const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
1069 {
1070 if (hackAroundLibXMLEntityBug(closure))
1071 return;
1072
1073 getTokenizer(closure)->startElementNs(localname, prefix, uri, nb_namespaces, namespaces, nb_attributes, nb_defaulted, libxmlAttributes);
1074 }
1075
endElementNsHandler(void * closure,const xmlChar *,const xmlChar *,const xmlChar *)1076 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
1077 {
1078 if (hackAroundLibXMLEntityBug(closure))
1079 return;
1080
1081 getTokenizer(closure)->endElementNs();
1082 }
1083
charactersHandler(void * closure,const xmlChar * s,int len)1084 static void charactersHandler(void* closure, const xmlChar* s, int len)
1085 {
1086 if (hackAroundLibXMLEntityBug(closure))
1087 return;
1088
1089 getTokenizer(closure)->characters(s, len);
1090 }
1091
processingInstructionHandler(void * closure,const xmlChar * target,const xmlChar * data)1092 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
1093 {
1094 if (hackAroundLibXMLEntityBug(closure))
1095 return;
1096
1097 getTokenizer(closure)->processingInstruction(target, data);
1098 }
1099
cdataBlockHandler(void * closure,const xmlChar * s,int len)1100 static void cdataBlockHandler(void* closure, const xmlChar* s, int len)
1101 {
1102 if (hackAroundLibXMLEntityBug(closure))
1103 return;
1104
1105 getTokenizer(closure)->cdataBlock(s, len);
1106 }
1107
commentHandler(void * closure,const xmlChar * comment)1108 static void commentHandler(void* closure, const xmlChar* comment)
1109 {
1110 if (hackAroundLibXMLEntityBug(closure))
1111 return;
1112
1113 getTokenizer(closure)->comment(comment);
1114 }
1115
1116 WTF_ATTRIBUTE_PRINTF(2, 3)
warningHandler(void * closure,const char * message,...)1117 static void warningHandler(void* closure, const char* message, ...)
1118 {
1119 va_list args;
1120 va_start(args, message);
1121 getTokenizer(closure)->error(XMLTokenizer::warning, message, args);
1122 va_end(args);
1123 }
1124
1125 WTF_ATTRIBUTE_PRINTF(2, 3)
fatalErrorHandler(void * closure,const char * message,...)1126 static void fatalErrorHandler(void* closure, const char* message, ...)
1127 {
1128 va_list args;
1129 va_start(args, message);
1130 getTokenizer(closure)->error(XMLTokenizer::fatal, message, args);
1131 va_end(args);
1132 }
1133
1134 WTF_ATTRIBUTE_PRINTF(2, 3)
normalErrorHandler(void * closure,const char * message,...)1135 static void normalErrorHandler(void* closure, const char* message, ...)
1136 {
1137 va_list args;
1138 va_start(args, message);
1139 getTokenizer(closure)->error(XMLTokenizer::nonFatal, message, args);
1140 va_end(args);
1141 }
1142
1143 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
1144 // a hack to avoid malloc/free. Using a global variable like this could cause trouble
1145 // if libxml implementation details were to change
1146 static xmlChar sharedXHTMLEntityResult[5] = {0, 0, 0, 0, 0};
1147
sharedXHTMLEntity()1148 static xmlEntityPtr sharedXHTMLEntity()
1149 {
1150 static xmlEntity entity;
1151 if (!entity.type) {
1152 entity.type = XML_ENTITY_DECL;
1153 entity.orig = sharedXHTMLEntityResult;
1154 entity.content = sharedXHTMLEntityResult;
1155 entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
1156 }
1157 return &entity;
1158 }
1159
getXHTMLEntity(const xmlChar * name)1160 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
1161 {
1162 UChar c = decodeNamedEntity(reinterpret_cast<const char*>(name));
1163 if (!c)
1164 return 0;
1165
1166 CString value = String(&c, 1).utf8();
1167 ASSERT(value.length() < 5);
1168 xmlEntityPtr entity = sharedXHTMLEntity();
1169 entity->length = value.length();
1170 entity->name = name;
1171 memcpy(sharedXHTMLEntityResult, value.data(), entity->length + 1);
1172
1173 return entity;
1174 }
1175
getEntityHandler(void * closure,const xmlChar * name)1176 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
1177 {
1178 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1179 xmlEntityPtr ent = xmlGetPredefinedEntity(name);
1180 if (ent) {
1181 ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
1182 return ent;
1183 }
1184
1185 ent = xmlGetDocEntity(ctxt->myDoc, name);
1186 if (!ent && (getTokenizer(closure)->isXHTMLDocument()
1187 #if ENABLE(XHTMLMP)
1188 || getTokenizer(closure)->isXHTMLMPDocument()
1189 #endif
1190 #if ENABLE(WML)
1191 || getTokenizer(closure)->isWMLDocument()
1192 #endif
1193 )) {
1194 ent = getXHTMLEntity(name);
1195 if (ent)
1196 ent->etype = XML_INTERNAL_GENERAL_ENTITY;
1197 }
1198
1199 return ent;
1200 }
1201
startDocumentHandler(void * closure)1202 static void startDocumentHandler(void* closure)
1203 {
1204 xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
1205 getTokenizer(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone);
1206 xmlSAX2StartDocument(closure);
1207 }
1208
endDocumentHandler(void * closure)1209 static void endDocumentHandler(void* closure)
1210 {
1211 getTokenizer(closure)->endDocument();
1212 xmlSAX2EndDocument(closure);
1213 }
1214
internalSubsetHandler(void * closure,const xmlChar * name,const xmlChar * externalID,const xmlChar * systemID)1215 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1216 {
1217 getTokenizer(closure)->internalSubset(name, externalID, systemID);
1218 xmlSAX2InternalSubset(closure, name, externalID, systemID);
1219 }
1220
externalSubsetHandler(void * closure,const xmlChar *,const xmlChar * externalId,const xmlChar *)1221 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
1222 {
1223 String extId = toString(externalId);
1224 if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
1225 || (extId == "-//W3C//DTD XHTML 1.1//EN")
1226 || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
1227 || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN")
1228 || (extId == "-//W3C//DTD XHTML Basic 1.0//EN")
1229 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")
1230 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")
1231 #if !ENABLE(XHTMLMP)
1232 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")
1233 #endif
1234 )
1235 getTokenizer(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
1236 }
1237
ignorableWhitespaceHandler(void *,const xmlChar *,int)1238 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
1239 {
1240 // nothing to do, but we need this to work around a crasher
1241 // http://bugzilla.gnome.org/show_bug.cgi?id=172255
1242 // http://bugs.webkit.org/show_bug.cgi?id=5792
1243 }
1244
initializeParserContext(const char * chunk)1245 void XMLTokenizer::initializeParserContext(const char* chunk)
1246 {
1247 xmlSAXHandler sax;
1248 memset(&sax, 0, sizeof(sax));
1249
1250 sax.error = normalErrorHandler;
1251 sax.fatalError = fatalErrorHandler;
1252 sax.characters = charactersHandler;
1253 sax.processingInstruction = processingInstructionHandler;
1254 sax.cdataBlock = cdataBlockHandler;
1255 sax.comment = commentHandler;
1256 sax.warning = warningHandler;
1257 sax.startElementNs = startElementNsHandler;
1258 sax.endElementNs = endElementNsHandler;
1259 sax.getEntity = getEntityHandler;
1260 sax.startDocument = startDocumentHandler;
1261 sax.endDocument = endDocumentHandler;
1262 sax.internalSubset = internalSubsetHandler;
1263 sax.externalSubset = externalSubsetHandler;
1264 sax.ignorableWhitespace = ignorableWhitespaceHandler;
1265 sax.entityDecl = xmlSAX2EntityDecl;
1266 sax.initialized = XML_SAX2_MAGIC;
1267 m_parserStopped = false;
1268 m_sawError = false;
1269 m_sawXSLTransform = false;
1270 m_sawFirstElement = false;
1271
1272 XMLTokenizerScope scope(m_doc->docLoader());
1273 if (m_parsingFragment)
1274 m_context = createMemoryParser(&sax, this, chunk);
1275 else
1276 m_context = createStringParser(&sax, this);
1277 }
1278
doEnd()1279 void XMLTokenizer::doEnd()
1280 {
1281 #if ENABLE(XSLT)
1282 if (m_sawXSLTransform) {
1283 m_doc->setTransformSource(xmlDocPtrForString(m_doc->docLoader(), m_originalSourceForTransform, m_doc->url().string()));
1284
1285 m_doc->setParsing(false); // Make the doc think it's done, so it will apply xsl sheets.
1286 m_doc->updateStyleSelector();
1287 m_doc->setParsing(true);
1288 m_parserStopped = true;
1289 }
1290 #endif
1291
1292 if (m_context) {
1293 // Tell libxml we're done.
1294 {
1295 XMLTokenizerScope scope(m_doc->docLoader());
1296 xmlParseChunk(m_context, 0, 0, 1);
1297 }
1298
1299 if (m_context->myDoc)
1300 xmlFreeDoc(m_context->myDoc);
1301 xmlFreeParserCtxt(m_context);
1302 m_context = 0;
1303 }
1304 }
1305
1306 #if ENABLE(XSLT)
xmlDocPtrForString(DocLoader * docLoader,const String & source,const String & url)1307 void* xmlDocPtrForString(DocLoader* docLoader, const String& source, const String& url)
1308 {
1309 if (source.isEmpty())
1310 return 0;
1311
1312 // Parse in a single chunk into an xmlDocPtr
1313 // FIXME: Hook up error handlers so that a failure to parse the main document results in
1314 // good error messages.
1315 const UChar BOM = 0xFEFF;
1316 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
1317
1318 XMLTokenizerScope scope(docLoader, errorFunc, 0);
1319 xmlDocPtr sourceDoc = xmlReadMemory(reinterpret_cast<const char*>(source.characters()),
1320 source.length() * sizeof(UChar),
1321 url.latin1().data(),
1322 BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE",
1323 XSLT_PARSE_OPTIONS);
1324 return sourceDoc;
1325 }
1326 #endif
1327
lineNumber() const1328 int XMLTokenizer::lineNumber() const
1329 {
1330 return m_context ? m_context->input->line : 1;
1331 }
1332
columnNumber() const1333 int XMLTokenizer::columnNumber() const
1334 {
1335 return m_context ? m_context->input->col : 1;
1336 }
1337
stopParsing()1338 void XMLTokenizer::stopParsing()
1339 {
1340 Tokenizer::stopParsing();
1341 xmlStopParser(m_context);
1342 }
1343
resumeParsing()1344 void XMLTokenizer::resumeParsing()
1345 {
1346 ASSERT(m_parserPaused);
1347
1348 m_parserPaused = false;
1349
1350 // First, execute any pending callbacks
1351 while (!m_pendingCallbacks->isEmpty()) {
1352 m_pendingCallbacks->callAndRemoveFirstCallback(this);
1353
1354 // A callback paused the parser
1355 if (m_parserPaused)
1356 return;
1357 }
1358
1359 // Then, write any pending data
1360 SegmentedString rest = m_pendingSrc;
1361 m_pendingSrc.clear();
1362 write(rest, false);
1363
1364 // Finally, if finish() has been called and write() didn't result
1365 // in any further callbacks being queued, call end()
1366 if (m_finishCalled && m_pendingCallbacks->isEmpty())
1367 end();
1368 }
1369
parseXMLDocumentFragment(const String & chunk,DocumentFragment * fragment,Element * parent)1370 bool parseXMLDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* parent)
1371 {
1372 if (!chunk.length())
1373 return true;
1374
1375 XMLTokenizer tokenizer(fragment, parent);
1376
1377 CString chunkAsUtf8 = chunk.utf8();
1378 tokenizer.initializeParserContext(chunkAsUtf8.data());
1379
1380 xmlParseContent(tokenizer.m_context);
1381
1382 tokenizer.endDocument();
1383
1384 // Check if all the chunk has been processed.
1385 long bytesProcessed = xmlByteConsumed(tokenizer.m_context);
1386 if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length())
1387 return false;
1388
1389 // No error if the chunk is well formed or it is not but we have no error.
1390 return tokenizer.m_context->wellFormed || xmlCtxtGetLastError(tokenizer.m_context) == 0;
1391 }
1392
1393 // --------------------------------
1394
1395 struct AttributeParseState {
1396 HashMap<String, String> attributes;
1397 bool gotAttributes;
1398 };
1399
attributesStartElementNsHandler(void * closure,const xmlChar * xmlLocalName,const xmlChar *,const xmlChar *,int,const xmlChar **,int nb_attributes,int,const xmlChar ** libxmlAttributes)1400 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
1401 const xmlChar* /*xmlURI*/, int /*nb_namespaces*/, const xmlChar** /*namespaces*/,
1402 int nb_attributes, int /*nb_defaulted*/, const xmlChar** libxmlAttributes)
1403 {
1404 if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
1405 return;
1406
1407 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1408 AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
1409
1410 state->gotAttributes = true;
1411
1412 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
1413 for (int i = 0; i < nb_attributes; i++) {
1414 String attrLocalName = toString(attributes[i].localname);
1415 int valueLength = (int) (attributes[i].end - attributes[i].value);
1416 String attrValue = toString(attributes[i].value, valueLength);
1417 String attrPrefix = toString(attributes[i].prefix);
1418 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
1419
1420 state->attributes.set(attrQName, attrValue);
1421 }
1422 }
1423
parseAttributes(const String & string,bool & attrsOK)1424 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1425 {
1426 AttributeParseState state;
1427 state.gotAttributes = false;
1428
1429 xmlSAXHandler sax;
1430 memset(&sax, 0, sizeof(sax));
1431 sax.startElementNs = attributesStartElementNsHandler;
1432 sax.initialized = XML_SAX2_MAGIC;
1433 xmlParserCtxtPtr parser = createStringParser(&sax, &state);
1434 String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
1435 xmlParseChunk(parser, reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1);
1436 if (parser->myDoc)
1437 xmlFreeDoc(parser->myDoc);
1438 xmlFreeParserCtxt(parser);
1439 attrsOK = state.gotAttributes;
1440 return state.attributes;
1441 }
1442
1443 }
1444