1 #include "XMLHandler.h"
2
3 #include <algorithm>
4 #include <expat.h>
5 #include <stdio.h>
6 #include <string.h>
7 #include <fcntl.h>
8 #include <unistd.h>
9 #include <errno.h>
10
11 #define NS_SEPARATOR 1
12 #define MORE_INDENT " "
13
14 static string
xml_text_escape(const string & s)15 xml_text_escape(const string& s)
16 {
17 string result;
18 const size_t N = s.length();
19 for (size_t i=0; i<N; i++) {
20 char c = s[i];
21 switch (c) {
22 case '<':
23 result += "<";
24 break;
25 case '>':
26 result += ">";
27 break;
28 case '&':
29 result += "&";
30 break;
31 default:
32 result += c;
33 break;
34 }
35 }
36 return result;
37 }
38
39 static string
xml_attr_escape(const string & s)40 xml_attr_escape(const string& s)
41 {
42 string result;
43 const size_t N = s.length();
44 for (size_t i=0; i<N; i++) {
45 char c = s[i];
46 switch (c) {
47 case '\"':
48 result += """;
49 break;
50 default:
51 result += c;
52 break;
53 }
54 }
55 return result;
56 }
57
XMLNamespaceMap()58 XMLNamespaceMap::XMLNamespaceMap()
59 {
60 }
61
XMLNamespaceMap(char const * const * nspaces)62 XMLNamespaceMap::XMLNamespaceMap(char const*const* nspaces)
63
64 {
65 while (*nspaces) {
66 m_map[nspaces[1]] = nspaces[0];
67 nspaces += 2;
68 }
69 }
70
71 string
Get(const string & ns) const72 XMLNamespaceMap::Get(const string& ns) const
73 {
74 if (ns == "xml") {
75 return ns;
76 }
77 map<string,string>::const_iterator it = m_map.find(ns);
78 if (it == m_map.end()) {
79 return "";
80 } else {
81 return it->second;
82 }
83 }
84
85 string
GetPrefix(const string & ns) const86 XMLNamespaceMap::GetPrefix(const string& ns) const
87 {
88 if (ns == "") {
89 return "";
90 }
91 map<string,string>::const_iterator it = m_map.find(ns);
92 if (it != m_map.end()) {
93 if (it->second == "") {
94 return "";
95 } else {
96 return it->second + ":";
97 }
98 } else {
99 return ":"; // invalid
100 }
101 }
102
103 void
AddToAttributes(vector<XMLAttribute> * attrs) const104 XMLNamespaceMap::AddToAttributes(vector<XMLAttribute>* attrs) const
105 {
106 map<string,string>::const_iterator it;
107 for (it=m_map.begin(); it!=m_map.end(); it++) {
108 if (it->second == "xml") {
109 continue;
110 }
111 XMLAttribute attr;
112 if (it->second == "") {
113 attr.name = "xmlns";
114 } else {
115 attr.name = "xmlns:";
116 attr.name += it->second;
117 }
118 attr.value = it->first;
119 attrs->push_back(attr);
120 }
121 }
122
XMLAttribute()123 XMLAttribute::XMLAttribute()
124 {
125 }
126
XMLAttribute(const XMLAttribute & that)127 XMLAttribute::XMLAttribute(const XMLAttribute& that)
128 :ns(that.ns),
129 name(that.name),
130 value(that.value)
131 {
132 }
133
XMLAttribute(string n,string na,string v)134 XMLAttribute::XMLAttribute(string n, string na, string v)
135 :ns(n),
136 name(na),
137 value(v)
138 {
139 }
140
~XMLAttribute()141 XMLAttribute::~XMLAttribute()
142 {
143 }
144
145 int
Compare(const XMLAttribute & that) const146 XMLAttribute::Compare(const XMLAttribute& that) const
147 {
148 if (ns != that.ns) {
149 return ns < that.ns ? -1 : 1;
150 }
151 if (name != that.name) {
152 return name < that.name ? -1 : 1;
153 }
154 return 0;
155 }
156
157 string
Find(const vector<XMLAttribute> & list,const string & ns,const string & name,const string & def)158 XMLAttribute::Find(const vector<XMLAttribute>& list, const string& ns, const string& name,
159 const string& def)
160 {
161 const size_t N = list.size();
162 for (size_t i=0; i<N; i++) {
163 const XMLAttribute& attr = list[i];
164 if (attr.ns == ns && attr.name == name) {
165 return attr.value;
166 }
167 }
168 return def;
169 }
170
171 struct xml_handler_data {
172 vector<XMLHandler*> stack;
173 XML_Parser parser;
174 vector<vector<XMLAttribute>*> attributes;
175 string filename;
176 };
177
XMLNode()178 XMLNode::XMLNode()
179 {
180 }
181
~XMLNode()182 XMLNode::~XMLNode()
183 {
184 // for_each(m_children.begin(), m_children.end(), delete_object<XMLNode>);
185 }
186
187 XMLNode*
Clone() const188 XMLNode::Clone() const
189 {
190 switch (m_type) {
191 case ELEMENT: {
192 XMLNode* e = XMLNode::NewElement(m_pos, m_ns, m_name, m_attrs, m_pretty);
193 const size_t N = m_children.size();
194 for (size_t i=0; i<N; i++) {
195 e->m_children.push_back(m_children[i]->Clone());
196 }
197 return e;
198 }
199 case TEXT: {
200 return XMLNode::NewText(m_pos, m_text, m_pretty);
201 }
202 default:
203 return NULL;
204 }
205 }
206
207 XMLNode*
NewElement(const SourcePos & pos,const string & ns,const string & name,const vector<XMLAttribute> & attrs,int pretty)208 XMLNode::NewElement(const SourcePos& pos, const string& ns, const string& name,
209 const vector<XMLAttribute>& attrs, int pretty)
210 {
211 XMLNode* node = new XMLNode();
212 node->m_type = ELEMENT;
213 node->m_pretty = pretty;
214 node->m_pos = pos;
215 node->m_ns = ns;
216 node->m_name = name;
217 node->m_attrs = attrs;
218 return node;
219 }
220
221 XMLNode*
NewText(const SourcePos & pos,const string & text,int pretty)222 XMLNode::NewText(const SourcePos& pos, const string& text, int pretty)
223 {
224 XMLNode* node = new XMLNode();
225 node->m_type = TEXT;
226 node->m_pretty = pretty;
227 node->m_pos = pos;
228 node->m_text = text;
229 return node;
230 }
231
232 void
SetPrettyRecursive(int value)233 XMLNode::SetPrettyRecursive(int value)
234 {
235 m_pretty = value;
236 const size_t N = m_children.size();
237 for (size_t i=0; i<N; i++) {
238 m_children[i]->SetPrettyRecursive(value);
239 }
240 }
241
242 string
ContentsToString(const XMLNamespaceMap & nspaces) const243 XMLNode::ContentsToString(const XMLNamespaceMap& nspaces) const
244 {
245 return contents_to_string(nspaces, "");
246 }
247
248 string
ToString(const XMLNamespaceMap & nspaces) const249 XMLNode::ToString(const XMLNamespaceMap& nspaces) const
250 {
251 return to_string(nspaces, "");
252 }
253
254 string
OpenTagToString(const XMLNamespaceMap & nspaces,int pretty) const255 XMLNode::OpenTagToString(const XMLNamespaceMap& nspaces, int pretty) const
256 {
257 return open_tag_to_string(nspaces, "", pretty);
258 }
259
260 string
contents_to_string(const XMLNamespaceMap & nspaces,const string & indent) const261 XMLNode::contents_to_string(const XMLNamespaceMap& nspaces, const string& indent) const
262 {
263 string result;
264 const size_t N = m_children.size();
265 for (size_t i=0; i<N; i++) {
266 const XMLNode* child = m_children[i];
267 switch (child->Type()) {
268 case ELEMENT:
269 if (m_pretty == PRETTY) {
270 result += '\n';
271 result += indent;
272 }
273 case TEXT:
274 result += child->to_string(nspaces, indent);
275 break;
276 }
277 }
278 return result;
279 }
280
281 string
trim_string(const string & str)282 trim_string(const string& str)
283 {
284 const char* p = str.c_str();
285 while (*p && isspace(*p)) {
286 p++;
287 }
288 const char* q = str.c_str() + str.length() - 1;
289 while (q > p && isspace(*q)) {
290 q--;
291 }
292 q++;
293 return string(p, q-p);
294 }
295
296 string
open_tag_to_string(const XMLNamespaceMap & nspaces,const string & indent,int pretty) const297 XMLNode::open_tag_to_string(const XMLNamespaceMap& nspaces, const string& indent, int pretty) const
298 {
299 if (m_type != ELEMENT) {
300 return "";
301 }
302 string result = "<";
303 result += nspaces.GetPrefix(m_ns);
304 result += m_name;
305
306 vector<XMLAttribute> attrs = m_attrs;
307
308 sort(attrs.begin(), attrs.end());
309
310 const size_t N = attrs.size();
311 for (size_t i=0; i<N; i++) {
312 const XMLAttribute& attr = attrs[i];
313 if (i == 0 || m_pretty == EXACT || pretty == EXACT) {
314 result += ' ';
315 }
316 else {
317 result += "\n";
318 result += indent;
319 result += MORE_INDENT;
320 result += MORE_INDENT;
321 }
322 result += nspaces.GetPrefix(attr.ns);
323 result += attr.name;
324 result += "=\"";
325 result += xml_attr_escape(attr.value);
326 result += '\"';
327 }
328
329 if (m_children.size() > 0) {
330 result += '>';
331 } else {
332 result += " />";
333 }
334 return result;
335 }
336
337 string
to_string(const XMLNamespaceMap & nspaces,const string & indent) const338 XMLNode::to_string(const XMLNamespaceMap& nspaces, const string& indent) const
339 {
340 switch (m_type)
341 {
342 case TEXT: {
343 if (m_pretty == EXACT) {
344 return xml_text_escape(m_text);
345 } else {
346 return xml_text_escape(trim_string(m_text));
347 }
348 }
349 case ELEMENT: {
350 string result = open_tag_to_string(nspaces, indent, PRETTY);
351
352 if (m_children.size() > 0) {
353 result += contents_to_string(nspaces, indent + MORE_INDENT);
354
355 if (m_pretty == PRETTY && m_children.size() > 0) {
356 result += '\n';
357 result += indent;
358 }
359
360 result += "</";
361 result += nspaces.GetPrefix(m_ns);
362 result += m_name;
363 result += '>';
364 }
365 return result;
366 }
367 default:
368 return "";
369 }
370 }
371
372 string
CollapseTextContents() const373 XMLNode::CollapseTextContents() const
374 {
375 if (m_type == TEXT) {
376 return m_text;
377 }
378 else if (m_type == ELEMENT) {
379 string result;
380
381 const size_t N=m_children.size();
382 for (size_t i=0; i<N; i++) {
383 result += m_children[i]->CollapseTextContents();
384 }
385
386 return result;
387 }
388 else {
389 return "";
390 }
391 }
392
393 vector<XMLNode*>
GetElementsByName(const string & ns,const string & name) const394 XMLNode::GetElementsByName(const string& ns, const string& name) const
395 {
396 vector<XMLNode*> result;
397 const size_t N=m_children.size();
398 for (size_t i=0; i<N; i++) {
399 XMLNode* child = m_children[i];
400 if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
401 result.push_back(child);
402 }
403 }
404 return result;
405 }
406
407 XMLNode*
GetElementByNameAt(const string & ns,const string & name,size_t index) const408 XMLNode::GetElementByNameAt(const string& ns, const string& name, size_t index) const
409 {
410 vector<XMLNode*> result;
411 const size_t N=m_children.size();
412 for (size_t i=0; i<N; i++) {
413 XMLNode* child = m_children[i];
414 if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
415 if (index == 0) {
416 return child;
417 } else {
418 index--;
419 }
420 }
421 }
422 return NULL;
423 }
424
425 size_t
CountElementsByName(const string & ns,const string & name) const426 XMLNode::CountElementsByName(const string& ns, const string& name) const
427 {
428 size_t result = 0;
429 const size_t N=m_children.size();
430 for (size_t i=0; i<N; i++) {
431 XMLNode* child = m_children[i];
432 if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
433 result++;
434 }
435 }
436 return result;
437 }
438
439 string
GetAttribute(const string & ns,const string & name,const string & def) const440 XMLNode::GetAttribute(const string& ns, const string& name, const string& def) const
441 {
442 return XMLAttribute::Find(m_attrs, ns, name, def);
443 }
444
445 static void
parse_namespace(const char * data,string * ns,string * name)446 parse_namespace(const char* data, string* ns, string* name)
447 {
448 const char* p = strchr(data, NS_SEPARATOR);
449 if (p != NULL) {
450 ns->assign(data, p-data);
451 name->assign(p+1);
452 } else {
453 ns->assign("");
454 name->assign(data);
455 }
456 }
457
458 static void
convert_attrs(const char ** in,vector<XMLAttribute> * out)459 convert_attrs(const char** in, vector<XMLAttribute>* out)
460 {
461 while (*in) {
462 XMLAttribute attr;
463 parse_namespace(in[0], &attr.ns, &attr.name);
464 attr.value = in[1];
465 out->push_back(attr);
466 in += 2;
467 }
468 }
469
470 static bool
list_contains(const vector<XMLHandler * > & stack,XMLHandler * handler)471 list_contains(const vector<XMLHandler*>& stack, XMLHandler* handler)
472 {
473 const size_t N = stack.size();
474 for (size_t i=0; i<N; i++) {
475 if (stack[i] == handler) {
476 return true;
477 }
478 }
479 return false;
480 }
481
482 static void XMLCALL
start_element_handler(void * userData,const char * name,const char ** attrs)483 start_element_handler(void *userData, const char *name, const char **attrs)
484 {
485 xml_handler_data* data = (xml_handler_data*)userData;
486
487 XMLHandler* handler = data->stack[data->stack.size()-1];
488
489 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
490 string nsString;
491 string nameString;
492 XMLHandler* next = handler;
493 vector<XMLAttribute> attributes;
494
495 parse_namespace(name, &nsString, &nameString);
496 convert_attrs(attrs, &attributes);
497
498 handler->OnStartElement(pos, nsString, nameString, attributes, &next);
499
500 if (next == NULL) {
501 next = handler;
502 }
503
504 if (next != handler) {
505 next->elementPos = pos;
506 next->elementNamespace = nsString;
507 next->elementName = nameString;
508 next->elementAttributes = attributes;
509 }
510
511 data->stack.push_back(next);
512 }
513
514 static void XMLCALL
end_element_handler(void * userData,const char * name)515 end_element_handler(void *userData, const char *name)
516 {
517 xml_handler_data* data = (xml_handler_data*)userData;
518
519 XMLHandler* handler = data->stack[data->stack.size()-1];
520 data->stack.pop_back();
521
522 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
523
524 if (!list_contains(data->stack, handler)) {
525 handler->OnDone(pos);
526 if (data->stack.size() > 1) {
527 // not top one
528 delete handler;
529 }
530 }
531
532 handler = data->stack[data->stack.size()-1];
533
534 string nsString;
535 string nameString;
536
537 parse_namespace(name, &nsString, &nameString);
538
539 handler->OnEndElement(pos, nsString, nameString);
540 }
541
542 static void XMLCALL
text_handler(void * userData,const XML_Char * s,int len)543 text_handler(void *userData, const XML_Char *s, int len)
544 {
545 xml_handler_data* data = (xml_handler_data*)userData;
546 XMLHandler* handler = data->stack[data->stack.size()-1];
547 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
548 handler->OnText(pos, string(s, len));
549 }
550
551 static void XMLCALL
comment_handler(void * userData,const char * comment)552 comment_handler(void *userData, const char *comment)
553 {
554 xml_handler_data* data = (xml_handler_data*)userData;
555 XMLHandler* handler = data->stack[data->stack.size()-1];
556 SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
557 handler->OnComment(pos, string(comment));
558 }
559
560 bool
ParseFile(const string & filename,XMLHandler * handler)561 XMLHandler::ParseFile(const string& filename, XMLHandler* handler)
562 {
563 char buf[16384];
564 int fd = open(filename.c_str(), O_RDONLY);
565 if (fd < 0) {
566 SourcePos(filename, -1).Error("Unable to open file for read: %s", strerror(errno));
567 return false;
568 }
569
570 XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR);
571 xml_handler_data state;
572 state.stack.push_back(handler);
573 state.parser = parser;
574 state.filename = filename;
575
576 XML_SetUserData(parser, &state);
577 XML_SetElementHandler(parser, start_element_handler, end_element_handler);
578 XML_SetCharacterDataHandler(parser, text_handler);
579 XML_SetCommentHandler(parser, comment_handler);
580
581 ssize_t len;
582 bool done;
583 do {
584 len = read(fd, buf, sizeof(buf));
585 done = len < (ssize_t)sizeof(buf);
586 if (len < 0) {
587 SourcePos(filename, -1).Error("Error reading file: %s\n", strerror(errno));
588 close(fd);
589 return false;
590 }
591 if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) {
592 SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error(
593 "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
594 close(fd);
595 return false;
596 }
597 } while (!done);
598
599 XML_ParserFree(parser);
600
601 close(fd);
602
603 return true;
604 }
605
606 bool
ParseString(const string & filename,const string & text,XMLHandler * handler)607 XMLHandler::ParseString(const string& filename, const string& text, XMLHandler* handler)
608 {
609 XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR);
610 xml_handler_data state;
611 state.stack.push_back(handler);
612 state.parser = parser;
613 state.filename = filename;
614
615 XML_SetUserData(parser, &state);
616 XML_SetElementHandler(parser, start_element_handler, end_element_handler);
617 XML_SetCharacterDataHandler(parser, text_handler);
618 XML_SetCommentHandler(parser, comment_handler);
619
620 if (XML_Parse(parser, text.c_str(), text.size(), true) == XML_STATUS_ERROR) {
621 SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error(
622 "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
623 return false;
624 }
625
626 XML_ParserFree(parser);
627
628 return true;
629 }
630
XMLHandler()631 XMLHandler::XMLHandler()
632 {
633 }
634
~XMLHandler()635 XMLHandler::~XMLHandler()
636 {
637 }
638
639 int
OnStartElement(const SourcePos & pos,const string & ns,const string & name,const vector<XMLAttribute> & attrs,XMLHandler ** next)640 XMLHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
641 const vector<XMLAttribute>& attrs, XMLHandler** next)
642 {
643 return 0;
644 }
645
646 int
OnEndElement(const SourcePos & pos,const string & ns,const string & name)647 XMLHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
648 {
649 return 0;
650 }
651
652 int
OnText(const SourcePos & pos,const string & text)653 XMLHandler::OnText(const SourcePos& pos, const string& text)
654 {
655 return 0;
656 }
657
658 int
OnComment(const SourcePos & pos,const string & text)659 XMLHandler::OnComment(const SourcePos& pos, const string& text)
660 {
661 return 0;
662 }
663
664 int
OnDone(const SourcePos & pos)665 XMLHandler::OnDone(const SourcePos& pos)
666 {
667 return 0;
668 }
669
TopElementHandler(const string & ns,const string & name,XMLHandler * next)670 TopElementHandler::TopElementHandler(const string& ns, const string& name, XMLHandler* next)
671 :m_ns(ns),
672 m_name(name),
673 m_next(next)
674 {
675 }
676
677 int
OnStartElement(const SourcePos & pos,const string & ns,const string & name,const vector<XMLAttribute> & attrs,XMLHandler ** next)678 TopElementHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
679 const vector<XMLAttribute>& attrs, XMLHandler** next)
680 {
681 *next = m_next;
682 return 0;
683 }
684
685 int
OnEndElement(const SourcePos & pos,const string & ns,const string & name)686 TopElementHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
687 {
688 return 0;
689 }
690
691 int
OnText(const SourcePos & pos,const string & text)692 TopElementHandler::OnText(const SourcePos& pos, const string& text)
693 {
694 return 0;
695 }
696
697 int
OnDone(const SourcePos & pos)698 TopElementHandler::OnDone(const SourcePos& pos)
699 {
700 return 0;
701 }
702
703
NodeHandler(XMLNode * root,int pretty)704 NodeHandler::NodeHandler(XMLNode* root, int pretty)
705 :m_root(root),
706 m_pretty(pretty)
707 {
708 if (root != NULL) {
709 m_nodes.push_back(root);
710 }
711 }
712
~NodeHandler()713 NodeHandler::~NodeHandler()
714 {
715 }
716
717 int
OnStartElement(const SourcePos & pos,const string & ns,const string & name,const vector<XMLAttribute> & attrs,XMLHandler ** next)718 NodeHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
719 const vector<XMLAttribute>& attrs, XMLHandler** next)
720 {
721 int pretty;
722 if (XMLAttribute::Find(attrs, XMLNS_XMLNS, "space", "") == "preserve") {
723 pretty = XMLNode::EXACT;
724 } else {
725 if (m_root == NULL) {
726 pretty = m_pretty;
727 } else {
728 pretty = m_nodes[m_nodes.size()-1]->Pretty();
729 }
730 }
731 XMLNode* n = XMLNode::NewElement(pos, ns, name, attrs, pretty);
732 if (m_root == NULL) {
733 m_root = n;
734 } else {
735 m_nodes[m_nodes.size()-1]->EditChildren().push_back(n);
736 }
737 m_nodes.push_back(n);
738 return 0;
739 }
740
741 int
OnEndElement(const SourcePos & pos,const string & ns,const string & name)742 NodeHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
743 {
744 m_nodes.pop_back();
745 return 0;
746 }
747
748 int
OnText(const SourcePos & pos,const string & text)749 NodeHandler::OnText(const SourcePos& pos, const string& text)
750 {
751 if (m_root == NULL) {
752 return 1;
753 }
754 XMLNode* n = XMLNode::NewText(pos, text, m_nodes[m_nodes.size()-1]->Pretty());
755 m_nodes[m_nodes.size()-1]->EditChildren().push_back(n);
756 return 0;
757 }
758
759 int
OnComment(const SourcePos & pos,const string & text)760 NodeHandler::OnComment(const SourcePos& pos, const string& text)
761 {
762 return 0;
763 }
764
765 int
OnDone(const SourcePos & pos)766 NodeHandler::OnDone(const SourcePos& pos)
767 {
768 return 0;
769 }
770
771 XMLNode*
ParseFile(const string & filename,int pretty)772 NodeHandler::ParseFile(const string& filename, int pretty)
773 {
774 NodeHandler handler(NULL, pretty);
775 if (!XMLHandler::ParseFile(filename, &handler)) {
776 fprintf(stderr, "error parsing file: %s\n", filename.c_str());
777 return NULL;
778 }
779 return handler.Root();
780 }
781
782 XMLNode*
ParseString(const string & filename,const string & text,int pretty)783 NodeHandler::ParseString(const string& filename, const string& text, int pretty)
784 {
785 NodeHandler handler(NULL, pretty);
786 if (!XMLHandler::ParseString(filename, text, &handler)) {
787 fprintf(stderr, "error parsing file: %s\n", filename.c_str());
788 return NULL;
789 }
790 return handler.Root();
791 }
792
793
794