1 /**
2 * This file is part of the DOM implementation for KDE.
3 *
4 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
5 * Copyright (C) 2005, 2006 Apple Computer, Inc.
6 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
7 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
8 * Copyright (C) 2007 The Android Open Source Project
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU Library General Public License
21 * along with this library; see the file COPYING.LIB. If not, write to
22 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 02111-1307, USA.
24 */
25
26 #include "config.h"
27 #include "XMLTokenizer.h"
28
29 #include "CDATASection.h"
30 #include "CachedScript.h"
31 #include "Comment.h"
32 #include "CString.h"
33 #include "DocLoader.h"
34 #include "Document.h"
35 #include "DocumentFragment.h"
36 #include "Frame.h"
37 #include "FrameLoader.h"
38 #include "FrameView.h"
39 #include "HTMLNames.h"
40 #include "HTMLScriptElement.h"
41 #include "HTMLTableSectionElement.h"
42 #include "HTMLTokenizer.h"
43 #include "ProcessingInstruction.h"
44 #include "EventNames.h"
45
46 // strndup is not available everywhere, so here is a portable version <reed>
portable_strndup(const char src[],size_t len)47 static char* portable_strndup(const char src[], size_t len)
48 {
49 char* origDst = (char*)malloc(len + 1);
50 if (NULL == origDst)
51 return NULL;
52
53 char* dst = origDst;
54 while (len-- > 0) {
55 if ((*dst++ = *src++) == 0)
56 return origDst;
57 }
58 *dst = 0;
59 return origDst;
60 }
61
62 namespace WebCore {
63
64 using namespace EventNames;
65 using namespace HTMLNames;
66
67 const int maxErrors = 25;
68
69 class PendingCallbacks {
70 public:
PendingCallbacks()71 PendingCallbacks() {
72 m_callbacks.setAutoDelete(true);
73 }
74
appendStartElementNSCallback(const XML_Char * name,const XML_Char ** atts)75 void appendStartElementNSCallback(const XML_Char* name, const XML_Char** atts) {
76 PendingStartElementNSCallback* callback = new PendingStartElementNSCallback;
77
78 callback->name = strdup(name);
79 callback->count = 0;
80 while (atts[callback->count])
81 callback->count++;
82 callback->atts = (XML_Char**)malloc(sizeof(XML_Char*) * (callback->count+1));
83 for (int i=0; i<callback->count; i++)
84 callback->atts[i] = strdup(atts[i]);
85 callback->atts[callback->count] = NULL;
86
87 m_callbacks.append(callback);
88 }
89
appendEndElementNSCallback()90 void appendEndElementNSCallback() {
91 PendingEndElementNSCallback* callback = new PendingEndElementNSCallback;
92
93 m_callbacks.append(callback);
94 }
95
appendCharactersCallback(const XML_Char * s,int len)96 void appendCharactersCallback(const XML_Char* s, int len) {
97 PendingCharactersCallback* callback = new PendingCharactersCallback;
98
99 callback->s = portable_strndup(s, len);
100 callback->len = len;
101
102 m_callbacks.append(callback);
103 }
104
appendProcessingInstructionCallback(const XML_Char * target,const XML_Char * data)105 void appendProcessingInstructionCallback(const XML_Char* target, const XML_Char* data) {
106 PendingProcessingInstructionCallback* callback = new PendingProcessingInstructionCallback;
107
108 callback->target = strdup(target);
109 callback->data = strdup(data);
110
111 m_callbacks.append(callback);
112 }
113
appendStartCDATABlockCallback()114 void appendStartCDATABlockCallback() {
115 PendingStartCDATABlockCallback* callback = new PendingStartCDATABlockCallback;
116
117 m_callbacks.append(callback);
118 }
119
appendEndCDATABlockCallback()120 void appendEndCDATABlockCallback() {
121 PendingEndCDATABlockCallback* callback = new PendingEndCDATABlockCallback;
122
123 m_callbacks.append(callback);
124 }
125
appendCommentCallback(const XML_Char * s)126 void appendCommentCallback(const XML_Char* s) {
127 PendingCommentCallback* callback = new PendingCommentCallback;
128
129 callback->s = strdup(s);
130
131 m_callbacks.append(callback);
132 }
133
appendErrorCallback(XMLTokenizer::ErrorType type,const char * message,int lineNumber,int columnNumber)134 void appendErrorCallback(XMLTokenizer::ErrorType type, const char* message, int lineNumber, int columnNumber) {
135 PendingErrorCallback* callback = new PendingErrorCallback;
136
137 callback->message = strdup(message);
138 callback->type = type;
139 callback->lineNumber = lineNumber;
140 callback->columnNumber = columnNumber;
141
142 m_callbacks.append(callback);
143 }
144
callAndRemoveFirstCallback(XMLTokenizer * tokenizer)145 void callAndRemoveFirstCallback(XMLTokenizer* tokenizer) {
146 PendingCallback* cb = m_callbacks.getFirst();
147
148 cb->call(tokenizer);
149 m_callbacks.removeFirst();
150 }
151
isEmpty() const152 bool isEmpty() const { return m_callbacks.isEmpty(); }
153
154 private:
155 struct PendingCallback {
156
~PendingCallbackWebCore::PendingCallbacks::PendingCallback157 virtual ~PendingCallback() { }
158
159 virtual void call(XMLTokenizer* tokenizer) = 0;
160 };
161
162 struct PendingStartElementNSCallback : public PendingCallback {
~PendingStartElementNSCallbackWebCore::PendingCallbacks::PendingStartElementNSCallback163 virtual ~PendingStartElementNSCallback() {
164 free(name);
165 for (int i=0; i<count; i++)
166 free(atts[i]);
167 free(atts);
168 }
169
callWebCore::PendingCallbacks::PendingStartElementNSCallback170 virtual void call(XMLTokenizer* tokenizer) {
171 tokenizer->startElementNs(name, (const XML_Char**)(atts));
172 }
173
174 XML_Char* name;
175 int count;
176 XML_Char** atts;
177 };
178
179 struct PendingEndElementNSCallback : public PendingCallback {
callWebCore::PendingCallbacks::PendingEndElementNSCallback180 virtual void call(XMLTokenizer* tokenizer) {
181 tokenizer->endElementNs();
182 }
183 };
184
185 struct PendingCharactersCallback : public PendingCallback {
~PendingCharactersCallbackWebCore::PendingCallbacks::PendingCharactersCallback186 virtual ~PendingCharactersCallback() {
187 free(s);
188 }
189
callWebCore::PendingCallbacks::PendingCharactersCallback190 virtual void call(XMLTokenizer* tokenizer) {
191 tokenizer->characters(s, len);
192 }
193
194 XML_Char* s;
195 int len;
196 };
197
198 struct PendingProcessingInstructionCallback : public PendingCallback {
~PendingProcessingInstructionCallbackWebCore::PendingCallbacks::PendingProcessingInstructionCallback199 virtual ~PendingProcessingInstructionCallback() {
200 free(target);
201 free(data);
202 }
203
callWebCore::PendingCallbacks::PendingProcessingInstructionCallback204 virtual void call(XMLTokenizer* tokenizer) {
205 tokenizer->processingInstruction(target, data);
206 }
207
208 XML_Char* target;
209 XML_Char* data;
210 };
211
212 struct PendingStartCDATABlockCallback : public PendingCallback {
callWebCore::PendingCallbacks::PendingStartCDATABlockCallback213 virtual void call(XMLTokenizer* tokenizer) {
214 tokenizer->startCdata();
215 }
216 };
217
218 struct PendingEndCDATABlockCallback : public PendingCallback {
callWebCore::PendingCallbacks::PendingEndCDATABlockCallback219 virtual void call(XMLTokenizer* tokenizer) {
220 tokenizer->endCdata();
221 }
222 };
223
224 struct PendingCommentCallback : public PendingCallback {
~PendingCommentCallbackWebCore::PendingCallbacks::PendingCommentCallback225 virtual ~PendingCommentCallback() {
226 free(s);
227 }
228
callWebCore::PendingCallbacks::PendingCommentCallback229 virtual void call(XMLTokenizer* tokenizer) {
230 tokenizer->comment(s);
231 }
232
233 XML_Char* s;
234 };
235
236 struct PendingErrorCallback: public PendingCallback {
~PendingErrorCallbackWebCore::PendingCallbacks::PendingErrorCallback237 virtual ~PendingErrorCallback() {
238 free (message);
239 }
240
callWebCore::PendingCallbacks::PendingErrorCallback241 virtual void call(XMLTokenizer* tokenizer) {
242 tokenizer->error(type, message, lineNumber, columnNumber);
243 }
244
245 XMLTokenizer::ErrorType type;
246 char* message;
247 int lineNumber;
248 int columnNumber;
249 };
250
251 public:
252 DeprecatedPtrList<PendingCallback> m_callbacks;
253 };
254
255 // --------------------------------
256
XMLTokenizer(Document * _doc,FrameView * _view)257 XMLTokenizer::XMLTokenizer(Document *_doc, FrameView *_view)
258 : m_doc(_doc)
259 , m_view(_view)
260 , m_parser(0)
261 , m_currentNode(_doc)
262 , m_currentNodeIsReferenced(false)
263 , m_sawError(false)
264 , m_sawXSLTransform(false)
265 , m_sawFirstElement(false)
266 , m_parserPaused(false)
267 , m_requestingScript(false)
268 , m_finishCalled(false)
269 , m_errorCount(0)
270 , m_pendingScript(0)
271 , m_scriptStartLine(0)
272 , m_parsingFragment(false)
273 , m_pendingCallbacks(new PendingCallbacks)
274 {
275 }
276
XMLTokenizer(DocumentFragment * fragment,Element * parentElement)277 XMLTokenizer::XMLTokenizer(DocumentFragment *fragment, Element *parentElement)
278 : m_doc(fragment->document())
279 , m_view(0)
280 , m_parser(0)
281 , m_currentNode(fragment)
282 , m_currentNodeIsReferenced(fragment)
283 , m_sawError(false)
284 , m_sawXSLTransform(false)
285 , m_sawFirstElement(false)
286 , m_parserPaused(false)
287 , m_requestingScript(false)
288 , m_finishCalled(false)
289 , m_errorCount(0)
290 , m_pendingScript(0)
291 , m_scriptStartLine(0)
292 , m_parsingFragment(true)
293 , m_pendingCallbacks(new PendingCallbacks)
294 {
295 if (fragment)
296 fragment->ref();
297 if (m_doc)
298 m_doc->ref();
299
300 // Add namespaces based on the parent node
301 Vector<Element*> elemStack;
302 while (parentElement) {
303 elemStack.append(parentElement);
304
305 Node* n = parentElement->parentNode();
306 if (!n || !n->isElementNode())
307 break;
308 parentElement = static_cast<Element*>(n);
309 }
310
311 if (elemStack.isEmpty())
312 return;
313
314 for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) {
315 if (NamedAttrMap* attrs = element->attributes()) {
316 for (unsigned i = 0; i < attrs->length(); i++) {
317 Attribute* attr = attrs->attributeItem(i);
318 if (attr->localName() == "xmlns")
319 m_defaultNamespaceURI = attr->value();
320 else if (attr->prefix() == "xmlns")
321 m_prefixToNamespaceMap.set(attr->localName(), attr->value());
322 }
323 }
324 }
325 }
326
~XMLTokenizer()327 XMLTokenizer::~XMLTokenizer()
328 {
329 setCurrentNode(0);
330 if (m_parsingFragment && m_doc)
331 m_doc->deref();
332 if (m_pendingScript)
333 m_pendingScript->deref(this);
334 }
335
setCurrentNode(Node * n)336 void XMLTokenizer::setCurrentNode(Node* n)
337 {
338 bool nodeNeedsReference = n && n != m_doc;
339 if (nodeNeedsReference)
340 n->ref();
341 if (m_currentNodeIsReferenced)
342 m_currentNode->deref();
343 m_currentNode = n;
344 m_currentNodeIsReferenced = nodeNeedsReference;
345 }
346
347 // use space instead of ':' as separator because ':' can be inside an uri
348 const XML_Char tripletSep=' ';
349
toQString(const XML_Char * str,unsigned int len)350 inline DeprecatedString toQString(const XML_Char* str, unsigned int len)
351 {
352 return DeprecatedString::fromUtf8(reinterpret_cast<const char *>(str), len);
353 }
354
toQString(const XML_Char * str)355 inline DeprecatedString toQString(const XML_Char* str)
356 {
357 return DeprecatedString::fromUtf8(str ? reinterpret_cast<const char *>(str) : "");
358 }
359
360 // triplet is formatted as URI + sep + local_name + sep + prefix.
splitTriplet(const XML_Char * name,String & uri,String & localname,String & prefix)361 static inline void splitTriplet(const XML_Char *name, String &uri, String &localname, String &prefix)
362 {
363 String string[3];
364 int found = 0;
365 const char *start = reinterpret_cast<const char *>(name);
366
367 while(start && (found < 3)) {
368 char *next = strchr(start, tripletSep);
369 if (next) {
370 string[found++] = toQString(start, (next-start));
371 start = next+1;
372 } else {
373 string[found++] = toQString(start);
374 break;
375 }
376 }
377
378 switch(found) {
379 case 1:
380 localname = string[0];
381 break;
382 case 2:
383 uri = string[0];
384 localname = string[1];
385 break;
386 case 3:
387 uri = string[0];
388 localname = string[1];
389 prefix = string[2];
390 break;
391 }
392 }
393
handleElementNamespaces(Element * newElement,const String & uri,const String & prefix,ExceptionCode & exceptioncode)394 static inline void handleElementNamespaces(Element *newElement, const String &uri, const String &prefix, ExceptionCode &exceptioncode)
395 {
396 if (uri.isEmpty())
397 return;
398
399 String namespaceQName("xmlns");
400 if(!prefix.isEmpty())
401 namespaceQName += String(":")+ prefix;
402 newElement->setAttributeNS(String("http://www.w3.org/2000/xmlns/"), namespaceQName, uri, exceptioncode);
403 }
404
handleElementAttributes(Element * newElement,const XML_Char ** atts,ExceptionCode & exceptioncode)405 static inline void handleElementAttributes(Element *newElement, const XML_Char **atts, ExceptionCode &exceptioncode)
406 {
407 for (int i = 0; atts[i]; i += 2) {
408 String attrURI, attrLocalName, attrPrefix;
409 splitTriplet(atts[i], attrURI, attrLocalName, attrPrefix);
410 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + String(":") + attrLocalName;
411 String attrValue = toQString(atts[i+1]);
412 newElement->setAttributeNS(attrURI, attrQName, attrValue, exceptioncode);
413 if (exceptioncode) // exception while setting attributes
414 return;
415 }
416 }
417
startElementNs(const XML_Char * name,const XML_Char ** atts)418 void XMLTokenizer::startElementNs(const XML_Char *name, const XML_Char **atts)
419 {
420 if (m_parserStopped)
421 return;
422
423 if (m_parserPaused) {
424 m_pendingCallbacks->appendStartElementNSCallback(name, atts);
425 return;
426 }
427
428 m_sawFirstElement = true;
429
430 exitText();
431
432 String uri, localName, prefix;
433 splitTriplet(name, uri, localName, prefix);
434 String qName = prefix.isEmpty() ? localName : prefix + ":" + localName;
435
436 if (m_parsingFragment && uri.isEmpty()) {
437 if (!prefix.isEmpty())
438 uri = String(m_prefixToNamespaceMap.get(prefix.impl()));
439 else
440 uri = m_defaultNamespaceURI;
441 }
442
443 ExceptionCode ec = 0;
444 RefPtr<Element> newElement = m_doc->createElementNS(uri, qName, ec);
445 if (!newElement) {
446 stopParsing();
447 return;
448 }
449
450 handleElementNamespaces(newElement.get(), uri, prefix, ec);
451 if (ec) {
452 stopParsing();
453 return;
454 }
455
456 handleElementAttributes(newElement.get(), atts, ec);
457 if (ec) {
458 stopParsing();
459 return;
460 }
461
462 if (newElement->hasTagName(scriptTag))
463 static_cast<HTMLScriptElement*>(newElement.get())->setCreatedByParser(true);
464
465 if (newElement->hasTagName(HTMLNames::scriptTag))
466 m_scriptStartLine = lineNumber();
467
468 if (!m_currentNode->addChild(newElement.get())) {
469 stopParsing();
470 return;
471 }
472
473 setCurrentNode(newElement.get());
474 if (m_view && !newElement->attached())
475 newElement->attach();
476 }
477
endElementNs()478 void XMLTokenizer::endElementNs()
479 {
480 if (m_parserStopped)
481 return;
482
483 if (m_parserPaused) {
484 m_pendingCallbacks->appendEndElementNSCallback();
485 return;
486 }
487
488 exitText();
489
490 Node* n = m_currentNode;
491 RefPtr<Node> parent = n->parentNode();
492 n->finishedParsing();
493
494 // don't load external scripts for standalone documents (for now)
495 if (n->isElementNode() && m_view && static_cast<Element*>(n)->hasTagName(scriptTag)) {
496 ASSERT(!m_pendingScript);
497
498 m_requestingScript = true;
499
500 Element* scriptElement = static_cast<Element*>(n);
501 String scriptHref;
502
503 if (static_cast<Element*>(n)->hasTagName(scriptTag))
504 scriptHref = scriptElement->getAttribute(srcAttr);
505
506 if (!scriptHref.isEmpty()) {
507 // we have a src attribute
508 const AtomicString& charset = scriptElement->getAttribute(charsetAttr);
509 if ((m_pendingScript = m_doc->docLoader()->requestScript(scriptHref, charset))) {
510 m_scriptElement = scriptElement;
511 m_pendingScript->ref(this);
512
513 // m_pendingScript will be 0 if script was already loaded and ref() executed it
514 if (m_pendingScript)
515 pauseParsing();
516 } else
517 m_scriptElement = 0;
518
519 } else {
520 String scriptCode = "";
521 for (Node* child = scriptElement->firstChild(); child; child = child->nextSibling()) {
522 if (child->isTextNode() || child->nodeType() == Node::CDATA_SECTION_NODE)
523 scriptCode += static_cast<CharacterData*>(child)->data();
524 }
525 m_view->frame()->loader()->executeScript(m_doc->URL(), m_scriptStartLine - 1, scriptCode);
526 }
527
528 m_requestingScript = false;
529 }
530
531 setCurrentNode(parent.get());
532 }
533
characters(const XML_Char * s,int len)534 void XMLTokenizer::characters(const XML_Char *s, int len)
535 {
536 if (m_parserStopped)
537 return;
538
539 if (m_parserPaused) {
540 m_pendingCallbacks->appendCharactersCallback(s, len);
541 return;
542 }
543
544 if (m_currentNode->isTextNode() || enterText()) {
545 ExceptionCode ec = 0;
546 static_cast<Text*>(m_currentNode)->appendData(toQString(s, len), ec);
547 }
548 }
549
enterText()550 bool XMLTokenizer::enterText()
551 {
552 RefPtr<Node> newNode = new Text(m_doc, "");
553 if (!m_currentNode->addChild(newNode.get()))
554 return false;
555 setCurrentNode(newNode.get());
556 return true;
557 }
558
exitText()559 void XMLTokenizer::exitText()
560 {
561 if (m_parserStopped)
562 return;
563
564 if (!m_currentNode || !m_currentNode->isTextNode())
565 return;
566
567 if (m_view && m_currentNode && !m_currentNode->attached())
568 m_currentNode->attach();
569
570 // FIXME: What's the right thing to do if the parent is really 0?
571 // Just leaving the current node set to the text node doesn't make much sense.
572 if (Node* par = m_currentNode->parentNode())
573 setCurrentNode(par);
574 }
575
processingInstruction(const XML_Char * target,const XML_Char * data)576 void XMLTokenizer::processingInstruction(const XML_Char *target, const XML_Char *data)
577 {
578 if (m_parserStopped)
579 return;
580
581 if (m_parserPaused) {
582 m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
583 return;
584 }
585
586 exitText();
587
588 // ### handle exceptions
589 int exception = 0;
590 RefPtr<ProcessingInstruction> pi = m_doc->createProcessingInstruction(
591 toQString(target), toQString(data), exception);
592 if (exception)
593 return;
594
595 if (!m_currentNode->addChild(pi.get()))
596 return;
597 if (m_view && !pi->attached())
598 pi->attach();
599
600 // don't load stylesheets for standalone documents
601 if (m_doc->frame()) {
602 m_sawXSLTransform = !m_sawFirstElement && !pi->checkStyleSheet();
603 if (m_sawXSLTransform)
604 stopParsing();
605 }
606 }
607
comment(const XML_Char * s)608 void XMLTokenizer::comment(const XML_Char *s)
609 {
610 if (m_parserStopped)
611 return;
612
613 if (m_parserPaused) {
614 m_pendingCallbacks->appendCommentCallback(s);
615 return;
616 }
617
618 exitText();
619
620 RefPtr<Node> newNode = m_doc->createComment(toQString(s));
621 m_currentNode->addChild(newNode.get());
622 if (m_view && !newNode->attached())
623 newNode->attach();
624 }
625
startCdata()626 void XMLTokenizer::startCdata()
627 {
628 if (m_parserStopped)
629 return;
630
631 if (m_parserPaused) {
632 m_pendingCallbacks->appendStartCDATABlockCallback();
633 return;
634 }
635
636 exitText();
637
638 RefPtr<Node> newNode = new CDATASection(m_doc, "");
639 if (!m_currentNode->addChild(newNode.get()))
640 return;
641 if (m_view && !newNode->attached())
642 newNode->attach();
643 setCurrentNode(newNode.get());
644 }
645
endCdata()646 void XMLTokenizer::endCdata()
647 {
648 if (m_parserStopped)
649 return;
650
651 if (m_parserPaused) {
652 m_pendingCallbacks->appendEndCDATABlockCallback();
653 return;
654 }
655
656 if (m_currentNode->parentNode() != 0)
657 setCurrentNode(m_currentNode->parentNode());
658 }
659
startElementHandler(void * userdata,const XML_Char * name,const XML_Char ** atts)660 static void XMLCALL startElementHandler(void *userdata, const XML_Char *name, const XML_Char **atts)
661 {
662 XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
663 tokenizer->startElementNs(name, atts);
664 }
665
endElementHandler(void * userdata,const XML_Char * name)666 static void XMLCALL endElementHandler(void *userdata, const XML_Char *name)
667 {
668 XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
669 tokenizer->endElementNs();
670 }
671
charactersHandler(void * userdata,const XML_Char * s,int len)672 static void charactersHandler(void *userdata, const XML_Char *s, int len)
673 {
674 XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
675 tokenizer->characters(s, len);
676 }
677
processingInstructionHandler(void * userdata,const XML_Char * target,const XML_Char * data)678 static void processingInstructionHandler(void *userdata, const XML_Char *target, const XML_Char *data)
679 {
680 XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
681 tokenizer->processingInstruction(target, data);
682 }
683
commentHandler(void * userdata,const XML_Char * comment)684 static void commentHandler(void *userdata, const XML_Char *comment)
685 {
686 XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
687 tokenizer->comment(comment);
688 }
689
startCdataHandler(void * userdata)690 static void startCdataHandler(void *userdata)
691 {
692 XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
693 tokenizer->startCdata();
694 }
695
endCdataHandler(void * userdata)696 static void endCdataHandler(void *userdata)
697 {
698 XMLTokenizer *tokenizer = static_cast<XMLTokenizer *>(userdata);
699 tokenizer->endCdata();
700 }
701
unknownEncodingHandler(void * userdata,const XML_Char * name,XML_Encoding * info)702 static int unknownEncodingHandler(void *userdata, const XML_Char *name, XML_Encoding *info)
703 {
704 // Expat doesn't like latin1 so we have to build this map
705 // to do conversion correctly.
706 // FIXME: Create a wrapper for expat that looks like libxml.
707 if (strcasecmp(name, "latin1") == 0)
708 {
709 for (int i=0; i<256; i++) {
710 info->map[i] = i;
711 }
712 return XML_STATUS_OK;
713 }
714 return XML_STATUS_ERROR;
715 }
716
write(const SegmentedString & s,bool)717 bool XMLTokenizer::write(const SegmentedString&s, bool /*appendData*/ )
718 {
719 String parseString = s.toString();
720
721 if (m_parserStopped || m_sawXSLTransform)
722 return false;
723
724 if (m_parserPaused) {
725 m_pendingSrc.append(s);
726 return false;
727 }
728
729 if (!m_parser) {
730 static const UChar BOM = 0xFEFF;
731 static const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
732 m_parser = XML_ParserCreateNS(BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE", tripletSep);
733 XML_SetUserData(m_parser, (void *)this);
734 XML_SetReturnNSTriplet(m_parser, true);
735
736 XML_SetStartElementHandler(m_parser, startElementHandler);
737 XML_SetEndElementHandler(m_parser, endElementHandler);
738 XML_SetCharacterDataHandler(m_parser, charactersHandler);
739 XML_SetProcessingInstructionHandler(m_parser, processingInstructionHandler);
740 XML_SetCommentHandler(m_parser, commentHandler);
741 XML_SetStartCdataSectionHandler(m_parser, startCdataHandler);
742 XML_SetEndCdataSectionHandler(m_parser, endCdataHandler);
743 XML_SetUnknownEncodingHandler(m_parser, unknownEncodingHandler, NULL);
744 }
745
746 enum XML_Status result = XML_Parse(m_parser, (const char*)parseString.characters(), sizeof(UChar) * parseString.length(), false);
747 if (result == XML_STATUS_ERROR) {
748 reportError();
749 return false;
750 }
751
752 return true;
753 }
754
end()755 void XMLTokenizer::end()
756 {
757 if (m_parser) {
758 XML_Parse(m_parser, 0, 0, true);
759 XML_ParserFree(m_parser);
760 m_parser = 0;
761 }
762
763 if (m_sawError)
764 insertErrorMessageBlock();
765 else {
766 exitText();
767 m_doc->updateStyleSelector();
768 }
769
770 setCurrentNode(0);
771 m_doc->finishedParsing();
772 }
773
finish()774 void XMLTokenizer::finish()
775 {
776 if (m_parserPaused)
777 m_finishCalled = true;
778 else
779 end();
780 }
781
reportError()782 void XMLTokenizer::reportError()
783 {
784 ErrorType type = nonFatal;
785 enum XML_Error code = XML_GetErrorCode(m_parser);
786 switch (code) {
787 case XML_ERROR_NO_MEMORY:
788 type = fatal;
789 break;
790 case XML_ERROR_FINISHED:
791 type = warning;
792 break;
793 default:
794 type = nonFatal;
795 }
796 error(type, XML_ErrorString(code), lineNumber(), columnNumber());
797 }
798
error(ErrorType type,const char * m,int lineNumber,int columnNumber)799 void XMLTokenizer::error(ErrorType type, const char* m, int lineNumber, int columnNumber)
800 {
801 if (type == fatal || m_errorCount < maxErrors) {
802 switch (type) {
803 case warning:
804 m_errorMessages += String::format("warning on line %d at column %d: %s", lineNumber, columnNumber, m);
805 break;
806 case fatal:
807 case nonFatal:
808 m_errorMessages += String::format("error on line %d at column %d: %s", lineNumber, columnNumber, m);
809 }
810 ++m_errorCount;
811 }
812
813 if (type != warning)
814 m_sawError = true;
815
816 if (type == fatal)
817 stopParsing();
818 }
819
createXHTMLParserErrorHeader(Document * doc,const String & errorMessages)820 static inline RefPtr<Element> createXHTMLParserErrorHeader(Document* doc, const String& errorMessages)
821 {
822 ExceptionCode ec = 0;
823 RefPtr<Element> reportElement = doc->createElementNS(xhtmlNamespaceURI, "parsererror", ec);
824 reportElement->setAttribute(styleAttr, "display:block; pre; border: 2px solid #c77; padding: 0 1em 0 1em; margin: 1em; background-color: #fdd; color: black");
825
826 RefPtr<Element> h3 = doc->createElementNS(xhtmlNamespaceURI, "h3", ec);
827 reportElement->appendChild(h3.get(), ec);
828 h3->appendChild(doc->createTextNode("This page contains the following errors:"), ec);
829
830 RefPtr<Element> fixed = doc->createElementNS(xhtmlNamespaceURI, "div", ec);
831 reportElement->appendChild(fixed.get(), ec);
832 fixed->setAttribute(styleAttr, "font-family:monospace;font-size:12px");
833 fixed->appendChild(doc->createTextNode(errorMessages), ec);
834
835 h3 = doc->createElementNS(xhtmlNamespaceURI, "h3", ec);
836 reportElement->appendChild(h3.get(), ec);
837 h3->appendChild(doc->createTextNode("Below is a rendering of the page up to the first error."), ec);
838
839 return reportElement;
840 }
841
insertErrorMessageBlock()842 void XMLTokenizer::insertErrorMessageBlock()
843 {
844 // One or more errors occurred during parsing of the code. Display an error block to the user above
845 // the normal content (the DOM tree is created manually and includes line/col info regarding
846 // where the errors are located)
847
848 // Create elements for display
849 ExceptionCode ec = 0;
850 Document* doc = m_doc;
851 Node* documentElement = doc->documentElement();
852 if (!documentElement) {
853 RefPtr<Node> rootElement = doc->createElementNS(xhtmlNamespaceURI, "html", ec);
854 doc->appendChild(rootElement, ec);
855 RefPtr<Node> body = doc->createElementNS(xhtmlNamespaceURI, "body", ec);
856 rootElement->appendChild(body, ec);
857 documentElement = body.get();
858 }
859
860 RefPtr<Element> reportElement = createXHTMLParserErrorHeader(doc, m_errorMessages);
861 documentElement->insertBefore(reportElement, documentElement->firstChild(), ec);
862 doc->updateRendering();
863 }
864
notifyFinished(CachedResource * finishedObj)865 void XMLTokenizer::notifyFinished(CachedResource *finishedObj)
866 {
867 ASSERT(m_pendingScript == finishedObj);
868
869 String cachedScriptUrl = m_pendingScript->url();
870 String scriptSource = m_pendingScript->script();
871 bool errorOccurred = m_pendingScript->errorOccurred();
872 m_pendingScript->deref(this);
873 m_pendingScript = 0;
874
875 RefPtr<Element> e = m_scriptElement;
876 m_scriptElement = 0;
877
878 if (errorOccurred)
879 EventTargetNodeCast(e.get())->dispatchHTMLEvent(errorEvent, true, false);
880 else {
881 m_view->frame()->loader()->executeScript(cachedScriptUrl, 0, scriptSource);
882 EventTargetNodeCast(e.get())->dispatchHTMLEvent(loadEvent, false, false);
883 }
884
885 m_scriptElement = 0;
886
887 if (!m_requestingScript)
888 resumeParsing();
889 }
890
isWaitingForScripts() const891 bool XMLTokenizer::isWaitingForScripts() const
892 {
893 return m_pendingScript != 0;
894 }
895
newXMLTokenizer(Document * d,FrameView * v)896 Tokenizer *newXMLTokenizer(Document *d, FrameView *v)
897 {
898 return new XMLTokenizer(d, v);
899 }
900
lineNumber() const901 int XMLTokenizer::lineNumber() const
902 {
903 return XML_GetCurrentLineNumber(m_parser);
904 }
905
columnNumber() const906 int XMLTokenizer::columnNumber() const
907 {
908 return XML_GetCurrentColumnNumber(m_parser);
909 }
910
stopParsing()911 void XMLTokenizer::stopParsing()
912 {
913 Tokenizer::stopParsing();
914 if (m_parser)
915 XML_StopParser(m_parser, 0);
916 }
917
pauseParsing()918 void XMLTokenizer::pauseParsing()
919 {
920 if (m_parsingFragment)
921 return;
922
923 m_parserPaused = true;
924 }
925
resumeParsing()926 void XMLTokenizer::resumeParsing()
927 {
928 ASSERT(m_parserPaused);
929
930 m_parserPaused = false;
931
932 // First, execute any pending callbacks
933 while (!m_pendingCallbacks->isEmpty()) {
934 m_pendingCallbacks->callAndRemoveFirstCallback(this);
935
936 // A callback paused the parser
937 if (m_parserPaused)
938 return;
939 }
940
941 // Then, write any pending data
942 SegmentedString rest = m_pendingSrc;
943 m_pendingSrc.clear();
944 write(rest, false);
945
946 // Finally, if finish() has been called and write() didn't result
947 // in any further callbacks being queued, call end()
948 if (m_finishCalled && m_pendingCallbacks->isEmpty())
949 end();
950 }
951
952 // --------------------------------
953
parseXMLDocumentFragment(const String & string,DocumentFragment * fragment,Element * parent)954 bool parseXMLDocumentFragment(const String &string, DocumentFragment *fragment, Element *parent)
955 {
956 XMLTokenizer tokenizer(fragment, parent);
957
958 XML_Parser parser = XML_ParserCreateNS(NULL, tripletSep);
959 tokenizer.setXMLParser(parser);
960
961 XML_SetUserData(parser, (void *)&tokenizer);
962 XML_SetReturnNSTriplet(parser, true);
963
964 XML_SetStartElementHandler(parser, startElementHandler);
965 XML_SetEndElementHandler(parser, endElementHandler);
966 XML_SetCharacterDataHandler(parser, charactersHandler);
967 XML_SetProcessingInstructionHandler(parser, processingInstructionHandler);
968 XML_SetCommentHandler(parser, commentHandler);
969 XML_SetStartCdataSectionHandler(parser, startCdataHandler);
970 XML_SetEndCdataSectionHandler(parser, endCdataHandler);
971
972 CString cString = string.utf8();
973 int result = XML_Parse(parser, cString.data(), cString.length(), true);
974
975 XML_ParserFree(parser);
976 tokenizer.setXMLParser(0);
977
978 return result != XML_STATUS_ERROR;
979 }
980
981 // --------------------------------
982
983 struct AttributeParseState {
984 HashMap<String, String> attributes;
985 bool gotAttributes;
986 };
987
attributesStartElementHandler(void * userData,const XML_Char * name,const XML_Char ** atts)988 static void attributesStartElementHandler(void *userData, const XML_Char *name, const XML_Char **atts)
989 {
990 if (strcmp(name, "attrs") != 0)
991 return;
992
993 if (atts[0] == 0 )
994 return;
995
996 AttributeParseState *state = static_cast<AttributeParseState *>(userData);
997 state->gotAttributes = true;
998
999 for (int i = 0; atts[i]; i += 2) {
1000 DeprecatedString attrName = toQString(atts[i]);
1001 DeprecatedString attrValue = toQString(atts[i+1]);
1002 state->attributes.set(attrName, attrValue);
1003 }
1004 }
1005
parseAttributes(const String & string,bool & attrsOK)1006 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1007 {
1008 AttributeParseState state;
1009 state.gotAttributes = false;
1010
1011 XML_Parser parser = XML_ParserCreateNS(NULL, tripletSep);
1012 XML_SetUserData(parser, (void *)&state);
1013 XML_SetReturnNSTriplet(parser, true);
1014
1015 XML_SetStartElementHandler(parser, attributesStartElementHandler);
1016 String input = "<?xml version=\"1.0\"?><attrs " + string.deprecatedString() + " />";
1017 CString cString = input.deprecatedString().utf8();
1018 if ( XML_Parse(parser, cString.data(), cString.length(), true) != XML_STATUS_ERROR )
1019 attrsOK = state.gotAttributes;
1020 XML_ParserFree(parser);
1021
1022 return state.attributes;
1023 }
1024
1025 }
1026