• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * This file is part of the DOM implementation for KDE.
3  *
4  * Copyright (C) 2000 Peter Kelly (pmk@post.com)
5  * Copyright (C) 2005, 2006 Apple Computer, Inc.
6  * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
7  * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
8  * Copyright (C) 2007 The Android Open Source Project
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Library General Public
12  * License as published by the Free Software Foundation; either
13  * version 2 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Library General Public License for more details.
19  *
20  * You should have received a copy of the GNU Library General Public License
21  * along with this library; see the file COPYING.LIB.  If not, write to
22  * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23  * Boston, MA 02111-1307, USA.
24  */
25 
26 #include "config.h"
27 #include "XMLTokenizer.h"
28 
29 #include "CDATASection.h"
30 #include "CachedScript.h"
31 #include "Comment.h"
32 #include "CString.h"
33 #include "DocLoader.h"
34 #include "Document.h"
35 #include "DocumentFragment.h"
36 #include "Frame.h"
37 #include "FrameLoader.h"
38 #include "FrameView.h"
39 #include "HTMLNames.h"
40 #include "HTMLScriptElement.h"
41 #include "HTMLTableSectionElement.h"
42 #include "HTMLTokenizer.h"
43 #include "ProcessingInstruction.h"
44 #include "EventNames.h"
45 
46 // strndup is not available everywhere, so here is a portable version <reed>
portable_strndup(const char src[],size_t len)47 static char* portable_strndup(const char src[], size_t len)
48 {
49     char* origDst = (char*)malloc(len + 1);
50     if (NULL == origDst)
51         return NULL;
52 
53     char* dst = origDst;
54     while (len-- > 0) {
55         if ((*dst++ = *src++) == 0)
56             return origDst;
57     }
58     *dst = 0;
59     return origDst;
60 }
61 
62 namespace WebCore {
63 
64 using namespace EventNames;
65 using namespace HTMLNames;
66 
67 const int maxErrors = 25;
68 
69 class PendingCallbacks {
70 public:
PendingCallbacks()71     PendingCallbacks() {
72         m_callbacks.setAutoDelete(true);
73     }
74 
appendStartElementNSCallback(const XML_Char * name,const XML_Char ** atts)75     void appendStartElementNSCallback(const XML_Char* name, const XML_Char** atts) {
76         PendingStartElementNSCallback* callback = new PendingStartElementNSCallback;
77 
78         callback->name = strdup(name);
79         callback->count = 0;
80         while (atts[callback->count])
81             callback->count++;
82         callback->atts = (XML_Char**)malloc(sizeof(XML_Char*) * (callback->count+1));
83         for (int i=0; i<callback->count; i++)
84             callback->atts[i] = strdup(atts[i]);
85         callback->atts[callback->count] = NULL;
86 
87         m_callbacks.append(callback);
88     }
89 
appendEndElementNSCallback()90     void appendEndElementNSCallback() {
91         PendingEndElementNSCallback* callback = new PendingEndElementNSCallback;
92 
93         m_callbacks.append(callback);
94     }
95 
appendCharactersCallback(const XML_Char * s,int len)96     void appendCharactersCallback(const XML_Char* s, int len) {
97         PendingCharactersCallback* callback = new PendingCharactersCallback;
98 
99         callback->s = portable_strndup(s, len);
100         callback->len = len;
101 
102         m_callbacks.append(callback);
103     }
104 
appendProcessingInstructionCallback(const XML_Char * target,const XML_Char * data)105     void appendProcessingInstructionCallback(const XML_Char* target, const XML_Char* data) {
106         PendingProcessingInstructionCallback* callback = new PendingProcessingInstructionCallback;
107 
108         callback->target = strdup(target);
109         callback->data = strdup(data);
110 
111         m_callbacks.append(callback);
112     }
113 
appendStartCDATABlockCallback()114     void appendStartCDATABlockCallback() {
115         PendingStartCDATABlockCallback* callback = new PendingStartCDATABlockCallback;
116 
117         m_callbacks.append(callback);
118     }
119 
appendEndCDATABlockCallback()120     void appendEndCDATABlockCallback() {
121         PendingEndCDATABlockCallback* callback = new PendingEndCDATABlockCallback;
122 
123         m_callbacks.append(callback);
124     }
125 
appendCommentCallback(const XML_Char * s)126     void appendCommentCallback(const XML_Char* s) {
127         PendingCommentCallback* callback = new PendingCommentCallback;
128 
129         callback->s = strdup(s);
130 
131         m_callbacks.append(callback);
132     }
133 
appendErrorCallback(XMLTokenizer::ErrorType type,const char * message,int lineNumber,int columnNumber)134     void appendErrorCallback(XMLTokenizer::ErrorType type, const char* message, int lineNumber, int columnNumber) {
135         PendingErrorCallback* callback = new PendingErrorCallback;
136 
137         callback->message = strdup(message);
138         callback->type = type;
139         callback->lineNumber = lineNumber;
140         callback->columnNumber = columnNumber;
141 
142         m_callbacks.append(callback);
143     }
144 
callAndRemoveFirstCallback(XMLTokenizer * tokenizer)145     void callAndRemoveFirstCallback(XMLTokenizer* tokenizer) {
146         PendingCallback* cb = m_callbacks.getFirst();
147 
148         cb->call(tokenizer);
149         m_callbacks.removeFirst();
150     }
151 
isEmpty() const152     bool isEmpty() const { return m_callbacks.isEmpty(); }
153 
154 private:
155     struct PendingCallback {
156 
~PendingCallbackWebCore::PendingCallbacks::PendingCallback157         virtual ~PendingCallback() { }
158 
159         virtual void call(XMLTokenizer* tokenizer) = 0;
160     };
161 
162     struct PendingStartElementNSCallback : public PendingCallback {
~PendingStartElementNSCallbackWebCore::PendingCallbacks::PendingStartElementNSCallback163         virtual ~PendingStartElementNSCallback() {
164             free(name);
165             for (int i=0; i<count; i++)
166                 free(atts[i]);
167             free(atts);
168         }
169 
callWebCore::PendingCallbacks::PendingStartElementNSCallback170         virtual void call(XMLTokenizer* tokenizer) {
171             tokenizer->startElementNs(name, (const XML_Char**)(atts));
172         }
173 
174         XML_Char* name;
175         int count;
176         XML_Char** atts;
177     };
178 
179     struct PendingEndElementNSCallback : public PendingCallback {
callWebCore::PendingCallbacks::PendingEndElementNSCallback180         virtual void call(XMLTokenizer* tokenizer) {
181             tokenizer->endElementNs();
182         }
183     };
184 
185     struct PendingCharactersCallback : public PendingCallback {
~PendingCharactersCallbackWebCore::PendingCallbacks::PendingCharactersCallback186         virtual ~PendingCharactersCallback() {
187             free(s);
188         }
189 
callWebCore::PendingCallbacks::PendingCharactersCallback190         virtual void call(XMLTokenizer* tokenizer) {
191             tokenizer->characters(s, len);
192         }
193 
194         XML_Char* s;
195         int len;
196     };
197 
198     struct PendingProcessingInstructionCallback : public PendingCallback {
~PendingProcessingInstructionCallbackWebCore::PendingCallbacks::PendingProcessingInstructionCallback199         virtual ~PendingProcessingInstructionCallback() {
200             free(target);
201             free(data);
202         }
203 
callWebCore::PendingCallbacks::PendingProcessingInstructionCallback204         virtual void call(XMLTokenizer* tokenizer) {
205             tokenizer->processingInstruction(target, data);
206         }
207 
208         XML_Char* target;
209         XML_Char* data;
210     };
211 
212     struct PendingStartCDATABlockCallback : public PendingCallback {
callWebCore::PendingCallbacks::PendingStartCDATABlockCallback213         virtual void call(XMLTokenizer* tokenizer) {
214             tokenizer->startCdata();
215         }
216     };
217 
218     struct PendingEndCDATABlockCallback : public PendingCallback {
callWebCore::PendingCallbacks::PendingEndCDATABlockCallback219         virtual void call(XMLTokenizer* tokenizer) {
220             tokenizer->endCdata();
221         }
222     };
223 
224     struct PendingCommentCallback : public PendingCallback {
~PendingCommentCallbackWebCore::PendingCallbacks::PendingCommentCallback225         virtual ~PendingCommentCallback() {
226             free(s);
227         }
228 
callWebCore::PendingCallbacks::PendingCommentCallback229         virtual void call(XMLTokenizer* tokenizer) {
230             tokenizer->comment(s);
231         }
232 
233         XML_Char* s;
234     };
235 
236     struct PendingErrorCallback: public PendingCallback {
~PendingErrorCallbackWebCore::PendingCallbacks::PendingErrorCallback237         virtual ~PendingErrorCallback() {
238             free (message);
239         }
240 
callWebCore::PendingCallbacks::PendingErrorCallback241         virtual void call(XMLTokenizer* tokenizer) {
242             tokenizer->error(type, message, lineNumber, columnNumber);
243         }
244 
245         XMLTokenizer::ErrorType type;
246         char* message;
247         int lineNumber;
248         int columnNumber;
249     };
250 
251 public:
252     DeprecatedPtrList<PendingCallback> m_callbacks;
253 };
254 
255 // --------------------------------
256 
XMLTokenizer(Document * _doc,FrameView * _view)257 XMLTokenizer::XMLTokenizer(Document *_doc, FrameView *_view)
258     : m_doc(_doc)
259     , m_view(_view)
260     , m_parser(0)
261     , m_currentNode(_doc)
262     , m_currentNodeIsReferenced(false)
263     , m_sawError(false)
264     , m_sawXSLTransform(false)
265     , m_sawFirstElement(false)
266     , m_parserPaused(false)
267     , m_requestingScript(false)
268     , m_finishCalled(false)
269     , m_errorCount(0)
270     , m_pendingScript(0)
271     , m_scriptStartLine(0)
272     , m_parsingFragment(false)
273     , m_pendingCallbacks(new PendingCallbacks)
274 {
275 }
276 
XMLTokenizer(DocumentFragment * fragment,Element * parentElement)277 XMLTokenizer::XMLTokenizer(DocumentFragment *fragment, Element *parentElement)
278     : m_doc(fragment->document())
279     , m_view(0)
280     , m_parser(0)
281     , m_currentNode(fragment)
282     , m_currentNodeIsReferenced(fragment)
283     , m_sawError(false)
284     , m_sawXSLTransform(false)
285     , m_sawFirstElement(false)
286     , m_parserPaused(false)
287     , m_requestingScript(false)
288     , m_finishCalled(false)
289     , m_errorCount(0)
290     , m_pendingScript(0)
291     , m_scriptStartLine(0)
292     , m_parsingFragment(true)
293     , m_pendingCallbacks(new PendingCallbacks)
294 {
295     if (fragment)
296         fragment->ref();
297     if (m_doc)
298         m_doc->ref();
299 
300     // Add namespaces based on the parent node
301     Vector<Element*> elemStack;
302     while (parentElement) {
303         elemStack.append(parentElement);
304 
305         Node* n = parentElement->parentNode();
306         if (!n || !n->isElementNode())
307             break;
308         parentElement = static_cast<Element*>(n);
309     }
310 
311     if (elemStack.isEmpty())
312         return;
313 
314     for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) {
315         if (NamedAttrMap* attrs = element->attributes()) {
316             for (unsigned i = 0; i < attrs->length(); i++) {
317                 Attribute* attr = attrs->attributeItem(i);
318                 if (attr->localName() == "xmlns")
319                     m_defaultNamespaceURI = attr->value();
320                 else if (attr->prefix() == "xmlns")
321                     m_prefixToNamespaceMap.set(attr->localName(), attr->value());
322             }
323         }
324     }
325 }
326 
~XMLTokenizer()327 XMLTokenizer::~XMLTokenizer()
328 {
329     setCurrentNode(0);
330     if (m_parsingFragment && m_doc)
331         m_doc->deref();
332     if (m_pendingScript)
333         m_pendingScript->deref(this);
334 }
335 
setCurrentNode(Node * n)336 void XMLTokenizer::setCurrentNode(Node* n)
337 {
338     bool nodeNeedsReference = n && n != m_doc;
339     if (nodeNeedsReference)
340         n->ref();
341     if (m_currentNodeIsReferenced)
342         m_currentNode->deref();
343     m_currentNode = n;
344     m_currentNodeIsReferenced = nodeNeedsReference;
345 }
346 
347 // use space instead of ':' as separator because ':' can be inside an uri
348 const XML_Char tripletSep=' ';
349 
toQString(const XML_Char * str,unsigned int len)350 inline DeprecatedString toQString(const XML_Char* str, unsigned int len)
351 {
352     return DeprecatedString::fromUtf8(reinterpret_cast<const char *>(str), len);
353 }
354 
toQString(const XML_Char * str)355 inline DeprecatedString toQString(const XML_Char* str)
356 {
357     return DeprecatedString::fromUtf8(str ? reinterpret_cast<const char *>(str) : "");
358 }
359 
360 // triplet is formatted as URI + sep + local_name + sep + prefix.
splitTriplet(const XML_Char * name,String & uri,String & localname,String & prefix)361 static inline void splitTriplet(const XML_Char *name, String &uri, String &localname, String &prefix)
362 {
363     String string[3];
364     int found = 0;
365     const char *start = reinterpret_cast<const char *>(name);
366 
367     while(start && (found < 3)) {
368         char *next = strchr(start, tripletSep);
369         if (next) {
370             string[found++] = toQString(start, (next-start));
371             start = next+1;
372         } else {
373             string[found++] = toQString(start);
374             break;
375         }
376     }
377 
378     switch(found) {
379     case 1:
380         localname = string[0];
381         break;
382     case 2:
383         uri = string[0];
384         localname = string[1];
385         break;
386     case 3:
387         uri = string[0];
388         localname = string[1];
389         prefix = string[2];
390         break;
391     }
392 }
393 
handleElementNamespaces(Element * newElement,const String & uri,const String & prefix,ExceptionCode & exceptioncode)394 static inline void handleElementNamespaces(Element *newElement, const String &uri, const String &prefix, ExceptionCode &exceptioncode)
395 {
396     if (uri.isEmpty())
397         return;
398 
399     String namespaceQName("xmlns");
400     if(!prefix.isEmpty())
401         namespaceQName += String(":")+ prefix;
402     newElement->setAttributeNS(String("http://www.w3.org/2000/xmlns/"), namespaceQName, uri, exceptioncode);
403 }
404 
handleElementAttributes(Element * newElement,const XML_Char ** atts,ExceptionCode & exceptioncode)405 static inline void handleElementAttributes(Element *newElement, const XML_Char **atts, ExceptionCode &exceptioncode)
406 {
407     for (int i = 0; atts[i]; i += 2) {
408         String attrURI, attrLocalName, attrPrefix;
409         splitTriplet(atts[i], attrURI, attrLocalName, attrPrefix);
410         String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + String(":") + attrLocalName;
411         String attrValue = toQString(atts[i+1]);
412         newElement->setAttributeNS(attrURI, attrQName, attrValue, exceptioncode);
413         if (exceptioncode) // exception while setting attributes
414             return;
415     }
416 }
417 
startElementNs(const XML_Char * name,const XML_Char ** atts)418 void XMLTokenizer::startElementNs(const XML_Char *name, const XML_Char **atts)
419 {
420     if (m_parserStopped)
421         return;
422 
423     if (m_parserPaused) {
424         m_pendingCallbacks->appendStartElementNSCallback(name, atts);
425         return;
426     }
427 
428     m_sawFirstElement = true;
429 
430     exitText();
431 
432     String uri, localName, prefix;
433     splitTriplet(name, uri, localName, prefix);
434     String qName = prefix.isEmpty() ? localName : prefix + ":" + localName;
435 
436     if (m_parsingFragment && uri.isEmpty()) {
437         if (!prefix.isEmpty())
438             uri = String(m_prefixToNamespaceMap.get(prefix.impl()));
439         else
440             uri = m_defaultNamespaceURI;
441     }
442 
443     ExceptionCode ec = 0;
444     RefPtr<Element> newElement = m_doc->createElementNS(uri, qName, ec);
445     if (!newElement) {
446         stopParsing();
447         return;
448     }
449 
450     handleElementNamespaces(newElement.get(), uri, prefix, ec);
451     if (ec) {
452         stopParsing();
453         return;
454     }
455 
456     handleElementAttributes(newElement.get(), atts, ec);
457     if (ec) {
458         stopParsing();
459         return;
460     }
461 
462     if (newElement->hasTagName(scriptTag))
463         static_cast<HTMLScriptElement*>(newElement.get())->setCreatedByParser(true);
464 
465     if (newElement->hasTagName(HTMLNames::scriptTag))
466         m_scriptStartLine = lineNumber();
467 
468     if (!m_currentNode->addChild(newElement.get())) {
469         stopParsing();
470         return;
471     }
472 
473     setCurrentNode(newElement.get());
474     if (m_view && !newElement->attached())
475         newElement->attach();
476 }
477 
endElementNs()478 void XMLTokenizer::endElementNs()
479 {
480     if (m_parserStopped)
481         return;
482 
483     if (m_parserPaused) {
484         m_pendingCallbacks->appendEndElementNSCallback();
485         return;
486     }
487 
488     exitText();
489 
490     Node* n = m_currentNode;
491     RefPtr<Node> parent = n->parentNode();
492     n->finishedParsing();
493 
494     // don't load external scripts for standalone documents (for now)
495     if (n->isElementNode() && m_view && static_cast<Element*>(n)->hasTagName(scriptTag)) {
496         ASSERT(!m_pendingScript);
497 
498         m_requestingScript = true;
499 
500         Element* scriptElement = static_cast<Element*>(n);
501         String scriptHref;
502 
503         if (static_cast<Element*>(n)->hasTagName(scriptTag))
504             scriptHref = scriptElement->getAttribute(srcAttr);
505 
506         if (!scriptHref.isEmpty()) {
507             // we have a src attribute
508             const AtomicString& charset = scriptElement->getAttribute(charsetAttr);
509             if ((m_pendingScript = m_doc->docLoader()->requestScript(scriptHref, charset))) {
510                 m_scriptElement = scriptElement;
511                 m_pendingScript->ref(this);
512 
513                 // m_pendingScript will be 0 if script was already loaded and ref() executed it
514                 if (m_pendingScript)
515                     pauseParsing();
516             } else
517                 m_scriptElement = 0;
518 
519         } else {
520             String scriptCode = "";
521             for (Node* child = scriptElement->firstChild(); child; child = child->nextSibling()) {
522                 if (child->isTextNode() || child->nodeType() == Node::CDATA_SECTION_NODE)
523                     scriptCode += static_cast<CharacterData*>(child)->data();
524             }
525             m_view->frame()->loader()->executeScript(m_doc->URL(), m_scriptStartLine - 1, scriptCode);
526         }
527 
528         m_requestingScript = false;
529     }
530 
531     setCurrentNode(parent.get());
532 }
533 
characters(const XML_Char * s,int len)534 void XMLTokenizer::characters(const XML_Char *s, int len)
535 {
536     if (m_parserStopped)
537         return;
538 
539     if (m_parserPaused) {
540         m_pendingCallbacks->appendCharactersCallback(s, len);
541         return;
542     }
543 
544     if (m_currentNode->isTextNode() || enterText()) {
545         ExceptionCode ec = 0;
546         static_cast<Text*>(m_currentNode)->appendData(toQString(s, len), ec);
547     }
548 }
549 
enterText()550 bool XMLTokenizer::enterText()
551 {
552     RefPtr<Node> newNode = new Text(m_doc, "");
553     if (!m_currentNode->addChild(newNode.get()))
554         return false;
555     setCurrentNode(newNode.get());
556     return true;
557 }
558 
exitText()559 void XMLTokenizer::exitText()
560 {
561     if (m_parserStopped)
562         return;
563 
564     if (!m_currentNode || !m_currentNode->isTextNode())
565         return;
566 
567     if (m_view && m_currentNode && !m_currentNode->attached())
568         m_currentNode->attach();
569 
570     // FIXME: What's the right thing to do if the parent is really 0?
571     // Just leaving the current node set to the text node doesn't make much sense.
572     if (Node* par = m_currentNode->parentNode())
573         setCurrentNode(par);
574 }
575 
processingInstruction(const XML_Char * target,const XML_Char * data)576 void XMLTokenizer::processingInstruction(const XML_Char *target, const XML_Char *data)
577 {
578     if (m_parserStopped)
579         return;
580 
581     if (m_parserPaused) {
582         m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
583         return;
584     }
585 
586     exitText();
587 
588     // ### handle exceptions
589     int exception = 0;
590     RefPtr<ProcessingInstruction> pi = m_doc->createProcessingInstruction(
591         toQString(target), toQString(data), exception);
592     if (exception)
593         return;
594 
595     if (!m_currentNode->addChild(pi.get()))
596         return;
597     if (m_view && !pi->attached())
598         pi->attach();
599 
600     // don't load stylesheets for standalone documents
601     if (m_doc->frame()) {
602         m_sawXSLTransform = !m_sawFirstElement && !pi->checkStyleSheet();
603         if (m_sawXSLTransform)
604             stopParsing();
605     }
606 }
607 
comment(const XML_Char * s)608 void XMLTokenizer::comment(const XML_Char *s)
609 {
610     if (m_parserStopped)
611         return;
612 
613     if (m_parserPaused) {
614         m_pendingCallbacks->appendCommentCallback(s);
615         return;
616     }
617 
618     exitText();
619 
620     RefPtr<Node> newNode = m_doc->createComment(toQString(s));
621     m_currentNode->addChild(newNode.get());
622     if (m_view && !newNode->attached())
623         newNode->attach();
624 }
625 
startCdata()626 void XMLTokenizer::startCdata()
627 {
628     if (m_parserStopped)
629         return;
630 
631     if (m_parserPaused) {
632         m_pendingCallbacks->appendStartCDATABlockCallback();
633         return;
634     }
635 
636     exitText();
637 
638     RefPtr<Node> newNode = new CDATASection(m_doc, "");
639     if (!m_currentNode->addChild(newNode.get()))
640         return;
641     if (m_view && !newNode->attached())
642         newNode->attach();
643     setCurrentNode(newNode.get());
644 }
645 
endCdata()646 void XMLTokenizer::endCdata()
647 {
648     if (m_parserStopped)
649         return;
650 
651     if (m_parserPaused) {
652         m_pendingCallbacks->appendEndCDATABlockCallback();
653         return;
654     }
655 
656     if (m_currentNode->parentNode() != 0)
657         setCurrentNode(m_currentNode->parentNode());
658 }
659 
startElementHandler(void * userdata,const XML_Char * name,const XML_Char ** atts)660 static void XMLCALL startElementHandler(void *userdata, const XML_Char *name, const XML_Char **atts)
661 {
662     XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
663     tokenizer->startElementNs(name, atts);
664 }
665 
endElementHandler(void * userdata,const XML_Char * name)666 static void XMLCALL endElementHandler(void *userdata, const XML_Char *name)
667 {
668     XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
669     tokenizer->endElementNs();
670 }
671 
charactersHandler(void * userdata,const XML_Char * s,int len)672 static void charactersHandler(void *userdata, const XML_Char *s, int len)
673 {
674     XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
675     tokenizer->characters(s, len);
676 }
677 
processingInstructionHandler(void * userdata,const XML_Char * target,const XML_Char * data)678 static void processingInstructionHandler(void *userdata, const XML_Char *target, const XML_Char *data)
679 {
680     XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
681     tokenizer->processingInstruction(target, data);
682 }
683 
commentHandler(void * userdata,const XML_Char * comment)684 static void commentHandler(void *userdata, const XML_Char *comment)
685 {
686     XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
687     tokenizer->comment(comment);
688 }
689 
startCdataHandler(void * userdata)690 static void startCdataHandler(void *userdata)
691 {
692     XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
693     tokenizer->startCdata();
694 }
695 
endCdataHandler(void * userdata)696 static void endCdataHandler(void *userdata)
697 {
698     XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
699     tokenizer->endCdata();
700 }
701 
unknownEncodingHandler(void * userdata,const XML_Char * name,XML_Encoding * info)702 static int unknownEncodingHandler(void *userdata, const XML_Char *name, XML_Encoding *info)
703 {
704     // Expat doesn't like latin1 so we have to build this map
705     // to do conversion correctly.
706     // FIXME: Create a wrapper for expat that looks like libxml.
707     if (strcasecmp(name, "latin1") == 0)
708     {
709         for (int i=0; i<256; i++) {
710             info->map[i] = i;
711         }
712         return XML_STATUS_OK;
713     }
714     return XML_STATUS_ERROR;
715 }
716 
write(const SegmentedString & s,bool)717 bool XMLTokenizer::write(const SegmentedString&s, bool /*appendData*/ )
718 {
719     String parseString = s.toString();
720 
721     if (m_parserStopped || m_sawXSLTransform)
722         return false;
723 
724     if (m_parserPaused) {
725         m_pendingSrc.append(s);
726         return false;
727     }
728 
729     if (!m_parser) {
730         static const UChar BOM = 0xFEFF;
731         static const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
732         m_parser = XML_ParserCreateNS(BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE", tripletSep);
733         XML_SetUserData(m_parser, (void *)this);
734         XML_SetReturnNSTriplet(m_parser, true);
735 
736         XML_SetStartElementHandler(m_parser, startElementHandler);
737         XML_SetEndElementHandler(m_parser, endElementHandler);
738         XML_SetCharacterDataHandler(m_parser, charactersHandler);
739         XML_SetProcessingInstructionHandler(m_parser, processingInstructionHandler);
740         XML_SetCommentHandler(m_parser, commentHandler);
741         XML_SetStartCdataSectionHandler(m_parser, startCdataHandler);
742         XML_SetEndCdataSectionHandler(m_parser, endCdataHandler);
743         XML_SetUnknownEncodingHandler(m_parser, unknownEncodingHandler, NULL);
744     }
745 
746     enum XML_Status result = XML_Parse(m_parser, (const char*)parseString.characters(), sizeof(UChar) * parseString.length(), false);
747     if (result == XML_STATUS_ERROR) {
748         reportError();
749         return false;
750     }
751 
752     return true;
753 }
754 
end()755 void XMLTokenizer::end()
756 {
757     if (m_parser) {
758         XML_Parse(m_parser, 0, 0, true);
759         XML_ParserFree(m_parser);
760         m_parser = 0;
761     }
762 
763     if (m_sawError)
764         insertErrorMessageBlock();
765     else {
766         exitText();
767         m_doc->updateStyleSelector();
768     }
769 
770     setCurrentNode(0);
771     m_doc->finishedParsing();
772 }
773 
finish()774 void XMLTokenizer::finish()
775 {
776     if (m_parserPaused)
777         m_finishCalled = true;
778     else
779         end();
780 }
781 
reportError()782 void XMLTokenizer::reportError()
783 {
784     ErrorType type = nonFatal;
785     enum XML_Error code = XML_GetErrorCode(m_parser);
786     switch (code) {
787         case XML_ERROR_NO_MEMORY:
788             type = fatal;
789             break;
790         case XML_ERROR_FINISHED:
791             type = warning;
792             break;
793         default:
794             type = nonFatal;
795     }
796     error(type, XML_ErrorString(code), lineNumber(), columnNumber());
797 }
798 
error(ErrorType type,const char * m,int lineNumber,int columnNumber)799 void XMLTokenizer::error(ErrorType type, const char* m, int lineNumber, int columnNumber)
800 {
801     if (type == fatal || m_errorCount < maxErrors) {
802         switch (type) {
803             case warning:
804                 m_errorMessages += String::format("warning on line %d at column %d: %s", lineNumber, columnNumber, m);
805                 break;
806             case fatal:
807             case nonFatal:
808                 m_errorMessages += String::format("error on line %d at column %d: %s", lineNumber, columnNumber, m);
809         }
810         ++m_errorCount;
811     }
812 
813     if (type != warning)
814         m_sawError = true;
815 
816     if (type == fatal)
817         stopParsing();
818 }
819 
createXHTMLParserErrorHeader(Document * doc,const String & errorMessages)820 static inline RefPtr<Element> createXHTMLParserErrorHeader(Document* doc, const String& errorMessages)
821 {
822     ExceptionCode ec = 0;
823     RefPtr<Element> reportElement = doc->createElementNS(xhtmlNamespaceURI, "parsererror", ec);
824     reportElement->setAttribute(styleAttr, "display:block; pre; border: 2px solid #c77; padding: 0 1em 0 1em; margin: 1em; background-color: #fdd; color: black");
825 
826     RefPtr<Element> h3 = doc->createElementNS(xhtmlNamespaceURI, "h3", ec);
827     reportElement->appendChild(h3.get(), ec);
828     h3->appendChild(doc->createTextNode("This page contains the following errors:"), ec);
829 
830     RefPtr<Element> fixed = doc->createElementNS(xhtmlNamespaceURI, "div", ec);
831     reportElement->appendChild(fixed.get(), ec);
832     fixed->setAttribute(styleAttr, "font-family:monospace;font-size:12px");
833     fixed->appendChild(doc->createTextNode(errorMessages), ec);
834 
835     h3 = doc->createElementNS(xhtmlNamespaceURI, "h3", ec);
836     reportElement->appendChild(h3.get(), ec);
837     h3->appendChild(doc->createTextNode("Below is a rendering of the page up to the first error."), ec);
838 
839     return reportElement;
840 }
841 
insertErrorMessageBlock()842 void XMLTokenizer::insertErrorMessageBlock()
843 {
844     // One or more errors occurred during parsing of the code. Display an error block to the user above
845     // the normal content (the DOM tree is created manually and includes line/col info regarding
846     // where the errors are located)
847 
848     // Create elements for display
849     ExceptionCode ec = 0;
850     Document* doc = m_doc;
851     Node* documentElement = doc->documentElement();
852     if (!documentElement) {
853         RefPtr<Node> rootElement = doc->createElementNS(xhtmlNamespaceURI, "html", ec);
854         doc->appendChild(rootElement, ec);
855         RefPtr<Node> body = doc->createElementNS(xhtmlNamespaceURI, "body", ec);
856         rootElement->appendChild(body, ec);
857         documentElement = body.get();
858     }
859 
860     RefPtr<Element> reportElement = createXHTMLParserErrorHeader(doc, m_errorMessages);
861     documentElement->insertBefore(reportElement, documentElement->firstChild(), ec);
862     doc->updateRendering();
863 }
864 
notifyFinished(CachedResource * finishedObj)865 void XMLTokenizer::notifyFinished(CachedResource *finishedObj)
866 {
867     ASSERT(m_pendingScript == finishedObj);
868 
869     String cachedScriptUrl = m_pendingScript->url();
870     String scriptSource = m_pendingScript->script();
871     bool errorOccurred = m_pendingScript->errorOccurred();
872     m_pendingScript->deref(this);
873     m_pendingScript = 0;
874 
875     RefPtr<Element> e = m_scriptElement;
876     m_scriptElement = 0;
877 
878     if (errorOccurred)
879         EventTargetNodeCast(e.get())->dispatchHTMLEvent(errorEvent, true, false);
880     else {
881         m_view->frame()->loader()->executeScript(cachedScriptUrl, 0, scriptSource);
882         EventTargetNodeCast(e.get())->dispatchHTMLEvent(loadEvent, false, false);
883     }
884 
885     m_scriptElement = 0;
886 
887     if (!m_requestingScript)
888         resumeParsing();
889 }
890 
isWaitingForScripts() const891 bool XMLTokenizer::isWaitingForScripts() const
892 {
893     return m_pendingScript != 0;
894 }
895 
newXMLTokenizer(Document * d,FrameView * v)896 Tokenizer *newXMLTokenizer(Document *d, FrameView *v)
897 {
898     return new XMLTokenizer(d, v);
899 }
900 
lineNumber() const901 int XMLTokenizer::lineNumber() const
902 {
903     return XML_GetCurrentLineNumber(m_parser);
904 }
905 
columnNumber() const906 int XMLTokenizer::columnNumber() const
907 {
908     return XML_GetCurrentColumnNumber(m_parser);
909 }
910 
stopParsing()911 void XMLTokenizer::stopParsing()
912 {
913     Tokenizer::stopParsing();
914     if (m_parser)
915         XML_StopParser(m_parser, 0);
916 }
917 
pauseParsing()918 void XMLTokenizer::pauseParsing()
919 {
920     if (m_parsingFragment)
921         return;
922 
923     m_parserPaused = true;
924 }
925 
resumeParsing()926 void XMLTokenizer::resumeParsing()
927 {
928     ASSERT(m_parserPaused);
929 
930     m_parserPaused = false;
931 
932     // First, execute any pending callbacks
933     while (!m_pendingCallbacks->isEmpty()) {
934         m_pendingCallbacks->callAndRemoveFirstCallback(this);
935 
936         // A callback paused the parser
937         if (m_parserPaused)
938             return;
939     }
940 
941     // Then, write any pending data
942     SegmentedString rest = m_pendingSrc;
943     m_pendingSrc.clear();
944     write(rest, false);
945 
946     // Finally, if finish() has been called and write() didn't result
947     // in any further callbacks being queued, call end()
948     if (m_finishCalled && m_pendingCallbacks->isEmpty())
949         end();
950 }
951 
952 // --------------------------------
953 
parseXMLDocumentFragment(const String & string,DocumentFragment * fragment,Element * parent)954 bool parseXMLDocumentFragment(const String &string, DocumentFragment *fragment, Element *parent)
955 {
956     XMLTokenizer tokenizer(fragment, parent);
957 
958     XML_Parser parser = XML_ParserCreateNS(NULL, tripletSep);
959     tokenizer.setXMLParser(parser);
960 
961     XML_SetUserData(parser, (void *)&tokenizer);
962     XML_SetReturnNSTriplet(parser, true);
963 
964     XML_SetStartElementHandler(parser, startElementHandler);
965     XML_SetEndElementHandler(parser, endElementHandler);
966     XML_SetCharacterDataHandler(parser, charactersHandler);
967     XML_SetProcessingInstructionHandler(parser, processingInstructionHandler);
968     XML_SetCommentHandler(parser, commentHandler);
969     XML_SetStartCdataSectionHandler(parser, startCdataHandler);
970     XML_SetEndCdataSectionHandler(parser, endCdataHandler);
971 
972     CString cString = string.utf8();
973     int result = XML_Parse(parser, cString.data(), cString.length(), true);
974 
975     XML_ParserFree(parser);
976     tokenizer.setXMLParser(0);
977 
978     return result != XML_STATUS_ERROR;
979 }
980 
981 // --------------------------------
982 
983 struct AttributeParseState {
984     HashMap<String, String> attributes;
985     bool gotAttributes;
986 };
987 
attributesStartElementHandler(void * userData,const XML_Char * name,const XML_Char ** atts)988 static void attributesStartElementHandler(void *userData, const XML_Char *name, const XML_Char **atts)
989 {
990     if (strcmp(name, "attrs") != 0)
991         return;
992 
993     if (atts[0] == 0 )
994         return;
995 
996     AttributeParseState *state = static_cast<AttributeParseState *>(userData);
997     state->gotAttributes = true;
998 
999     for (int i = 0; atts[i]; i += 2) {
1000         DeprecatedString attrName = toQString(atts[i]);
1001         DeprecatedString attrValue = toQString(atts[i+1]);
1002         state->attributes.set(attrName, attrValue);
1003     }
1004 }
1005 
parseAttributes(const String & string,bool & attrsOK)1006 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1007 {
1008     AttributeParseState state;
1009     state.gotAttributes = false;
1010 
1011     XML_Parser parser = XML_ParserCreateNS(NULL, tripletSep);
1012     XML_SetUserData(parser, (void *)&state);
1013     XML_SetReturnNSTriplet(parser, true);
1014 
1015     XML_SetStartElementHandler(parser, attributesStartElementHandler);
1016     String input = "<?xml version=\"1.0\"?><attrs " + string.deprecatedString() + " />";
1017     CString cString = input.deprecatedString().utf8();
1018     if ( XML_Parse(parser, cString.data(), cString.length(), true) != XML_STATUS_ERROR )
1019         attrsOK = state.gotAttributes;
1020     XML_ParserFree(parser);
1021 
1022     return state.attributes;
1023 }
1024 
1025 }
1026