1 /*
2 www.sourceforge.net/projects/tinyxml
3 Original code by Lee Thomason (www.grinninglizard.com)
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must
14 not claim that you wrote the original software. If you use this
15 software in a product, an acknowledgment in the product documentation
16 would be appreciated but is not required.
17
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
20
21 3. This notice may not be removed or altered from any source
22 distribution.
23 */
24
25 #include <ctype.h>
26 #include <stddef.h>
27
28 #include "tinyxml.h"
29
30 //#define DEBUG_PARSER
31 #if defined( DEBUG_PARSER )
32 # if defined( DEBUG ) && defined( _MSC_VER )
33 # include <windows.h>
34 # define TIXML_LOG OutputDebugString
35 # else
36 # define TIXML_LOG printf
37 # endif
38 #endif
39
40 // Note tha "PutString" hardcodes the same list. This
41 // is less flexible than it appears. Changing the entries
42 // or order will break putstring.
43 TiXmlBase::Entity TiXmlBase::entity[ TiXmlBase::NUM_ENTITY ] =
44 {
45 { "&", 5, '&' },
46 { "<", 4, '<' },
47 { ">", 4, '>' },
48 { """, 6, '\"' },
49 { "'", 6, '\'' }
50 };
51
52 // Bunch of unicode info at:
53 // http://www.unicode.org/faq/utf_bom.html
54 // Including the basic of this table, which determines the #bytes in the
55 // sequence from the lead byte. 1 placed for invalid sequences --
56 // although the result will be junk, pass it through as much as possible.
57 // Beware of the non-characters in UTF-8:
58 // ef bb bf (Microsoft "lead bytes")
59 // ef bf be
60 // ef bf bf
61
62 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
63 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
64 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
65
66 const int TiXmlBase::utf8ByteTable[256] =
67 {
68 // 0 1 2 3 4 5 6 7 8 9 a b c d e f
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
81 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
83 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
84 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
85 };
86
87
ConvertUTF32ToUTF8(unsigned long input,char * output,int * length)88 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
89 {
90 const unsigned long BYTE_MASK = 0xBF;
91 const unsigned long BYTE_MARK = 0x80;
92 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
93
94 if (input < 0x80)
95 *length = 1;
96 else if ( input < 0x800 )
97 *length = 2;
98 else if ( input < 0x10000 )
99 *length = 3;
100 else if ( input < 0x200000 )
101 *length = 4;
102 else
103 { *length = 0; return; } // This code won't covert this correctly anyway.
104
105 output += *length;
106
107 // Scary scary fall throughs.
108 switch (*length)
109 {
110 case 4:
111 --output;
112 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
113 input >>= 6;
114 case 3:
115 --output;
116 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
117 input >>= 6;
118 case 2:
119 --output;
120 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
121 input >>= 6;
122 case 1:
123 --output;
124 *output = (char)(input | FIRST_BYTE_MARK[*length]);
125 }
126 }
127
128
IsAlpha(unsigned char anyByte,TiXmlEncoding)129 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
130 {
131 // This will only work for low-ascii, everything else is assumed to be a valid
132 // letter. I'm not sure this is the best approach, but it is quite tricky trying
133 // to figure out alhabetical vs. not across encoding. So take a very
134 // conservative approach.
135
136 // if ( encoding == TIXML_ENCODING_UTF8 )
137 // {
138 if ( anyByte < 127 )
139 return isalpha( anyByte );
140 else
141 return 1; // What else to do? The unicode set is huge...get the english ones right.
142 // }
143 // else
144 // {
145 // return isalpha( anyByte );
146 // }
147 }
148
149
IsAlphaNum(unsigned char anyByte,TiXmlEncoding)150 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
151 {
152 // This will only work for low-ascii, everything else is assumed to be a valid
153 // letter. I'm not sure this is the best approach, but it is quite tricky trying
154 // to figure out alhabetical vs. not across encoding. So take a very
155 // conservative approach.
156
157 // if ( encoding == TIXML_ENCODING_UTF8 )
158 // {
159 if ( anyByte < 127 )
160 return isalnum( anyByte );
161 else
162 return 1; // What else to do? The unicode set is huge...get the english ones right.
163 // }
164 // else
165 // {
166 // return isalnum( anyByte );
167 // }
168 }
169
170
171 class TiXmlParsingData
172 {
173 friend class TiXmlDocument;
174 public:
175 void Stamp( const char* now, TiXmlEncoding encoding );
176
Cursor() const177 const TiXmlCursor& Cursor() const { return cursor; }
178
179 private:
180 // Only used by the document!
TiXmlParsingData(const char * start,int _tabsize,int row,int col)181 TiXmlParsingData( const char* start, int _tabsize, int row, int col )
182 {
183 assert( start );
184 stamp = start;
185 tabsize = _tabsize;
186 cursor.row = row;
187 cursor.col = col;
188 }
189
190 TiXmlCursor cursor;
191 const char* stamp;
192 int tabsize;
193 };
194
195
Stamp(const char * now,TiXmlEncoding encoding)196 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
197 {
198 assert( now );
199
200 // Do nothing if the tabsize is 0.
201 if ( tabsize < 1 )
202 {
203 return;
204 }
205
206 // Get the current row, column.
207 int row = cursor.row;
208 int col = cursor.col;
209 const char* p = stamp;
210 assert( p );
211
212 while ( p < now )
213 {
214 // Treat p as unsigned, so we have a happy compiler.
215 const unsigned char* pU = (const unsigned char*)p;
216
217 // Code contributed by Fletcher Dunn: (modified by lee)
218 switch (*pU) {
219 case 0:
220 // We *should* never get here, but in case we do, don't
221 // advance past the terminating null character, ever
222 return;
223
224 case '\r':
225 // bump down to the next line
226 ++row;
227 col = 0;
228 // Eat the character
229 ++p;
230
231 // Check for \r\n sequence, and treat this as a single character
232 if (*p == '\n') {
233 ++p;
234 }
235 break;
236
237 case '\n':
238 // bump down to the next line
239 ++row;
240 col = 0;
241
242 // Eat the character
243 ++p;
244
245 // Check for \n\r sequence, and treat this as a single
246 // character. (Yes, this bizarre thing does occur still
247 // on some arcane platforms...)
248 if (*p == '\r') {
249 ++p;
250 }
251 break;
252
253 case '\t':
254 // Eat the character
255 ++p;
256
257 // Skip to next tab stop
258 col = (col / tabsize + 1) * tabsize;
259 break;
260
261 case TIXML_UTF_LEAD_0:
262 if ( encoding == TIXML_ENCODING_UTF8 )
263 {
264 if ( *(p+1) && *(p+2) )
265 {
266 // In these cases, don't advance the column. These are
267 // 0-width spaces.
268 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
269 p += 3;
270 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
271 p += 3;
272 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
273 p += 3;
274 else
275 { p +=3; ++col; } // A normal character.
276 }
277 }
278 else
279 {
280 ++p;
281 ++col;
282 }
283 break;
284
285 default:
286 if ( encoding == TIXML_ENCODING_UTF8 )
287 {
288 // Eat the 1 to 4 byte utf8 character.
289 int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
290 if ( step == 0 )
291 step = 1; // Error case from bad encoding, but handle gracefully.
292 p += step;
293
294 // Just advance one column, of course.
295 ++col;
296 }
297 else
298 {
299 ++p;
300 ++col;
301 }
302 break;
303 }
304 }
305 cursor.row = row;
306 cursor.col = col;
307 assert( cursor.row >= -1 );
308 assert( cursor.col >= -1 );
309 stamp = p;
310 assert( stamp );
311 }
312
313
SkipWhiteSpace(const char * p,TiXmlEncoding encoding)314 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
315 {
316 if ( !p || !*p )
317 {
318 return 0;
319 }
320 if ( encoding == TIXML_ENCODING_UTF8 )
321 {
322 while ( *p )
323 {
324 const unsigned char* pU = (const unsigned char*)p;
325
326 // Skip the stupid Microsoft UTF-8 Byte order marks
327 if ( *(pU+0)==TIXML_UTF_LEAD_0
328 && *(pU+1)==TIXML_UTF_LEAD_1
329 && *(pU+2)==TIXML_UTF_LEAD_2 )
330 {
331 p += 3;
332 continue;
333 }
334 else if(*(pU+0)==TIXML_UTF_LEAD_0
335 && *(pU+1)==0xbfU
336 && *(pU+2)==0xbeU )
337 {
338 p += 3;
339 continue;
340 }
341 else if(*(pU+0)==TIXML_UTF_LEAD_0
342 && *(pU+1)==0xbfU
343 && *(pU+2)==0xbfU )
344 {
345 p += 3;
346 continue;
347 }
348
349 if ( IsWhiteSpace( *p ) ) // Still using old rules for white space.
350 ++p;
351 else
352 break;
353 }
354 }
355 else
356 {
357 while ( *p && IsWhiteSpace( *p ) )
358 ++p;
359 }
360
361 return p;
362 }
363
364 #ifdef TIXML_USE_STL
StreamWhiteSpace(std::istream * in,TIXML_STRING * tag)365 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
366 {
367 for( ;; )
368 {
369 if ( !in->good() ) return false;
370
371 int c = in->peek();
372 // At this scope, we can't get to a document. So fail silently.
373 if ( !IsWhiteSpace( c ) || c <= 0 )
374 return true;
375
376 *tag += (char) in->get();
377 }
378 }
379
StreamTo(std::istream * in,int character,TIXML_STRING * tag)380 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
381 {
382 //assert( character > 0 && character < 128 ); // else it won't work in utf-8
383 while ( in->good() )
384 {
385 int c = in->peek();
386 if ( c == character )
387 return true;
388 if ( c <= 0 ) // Silent failure: can't get document at this scope
389 return false;
390
391 in->get();
392 *tag += (char) c;
393 }
394 return false;
395 }
396 #endif
397
398 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
399 // "assign" optimization removes over 10% of the execution time.
400 //
ReadName(const char * p,TIXML_STRING * name,TiXmlEncoding encoding)401 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
402 {
403 // Oddly, not supported on some comilers,
404 //name->clear();
405 // So use this:
406 *name = "";
407 assert( p );
408
409 // Names start with letters or underscores.
410 // Of course, in unicode, tinyxml has no idea what a letter *is*. The
411 // algorithm is generous.
412 //
413 // After that, they can be letters, underscores, numbers,
414 // hyphens, or colons. (Colons are valid ony for namespaces,
415 // but tinyxml can't tell namespaces from names.)
416 if ( p && *p
417 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
418 {
419 const char* start = p;
420 while( p && *p
421 && ( IsAlphaNum( (unsigned char ) *p, encoding )
422 || *p == '_'
423 || *p == '-'
424 || *p == '.'
425 || *p == ':' ) )
426 {
427 //(*name) += *p; // expensive
428 ++p;
429 }
430 if ( p-start > 0 ) {
431 name->assign( start, p-start );
432 }
433 return p;
434 }
435 return 0;
436 }
437
GetEntity(const char * p,char * value,int * length,TiXmlEncoding encoding)438 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
439 {
440 // Presume an entity, and pull it out.
441 TIXML_STRING ent;
442 int i;
443 *length = 0;
444
445 if ( *(p+1) && *(p+1) == '#' && *(p+2) )
446 {
447 unsigned long ucs = 0;
448 ptrdiff_t delta = 0;
449 unsigned mult = 1;
450
451 if ( *(p+2) == 'x' )
452 {
453 // Hexadecimal.
454 if ( !*(p+3) ) return 0;
455
456 const char* q = p+3;
457 q = strchr( q, ';' );
458
459 if ( !q || !*q ) return 0;
460
461 delta = q-p;
462 --q;
463
464 while ( *q != 'x' )
465 {
466 if ( *q >= '0' && *q <= '9' )
467 ucs += mult * (*q - '0');
468 else if ( *q >= 'a' && *q <= 'f' )
469 ucs += mult * (*q - 'a' + 10);
470 else if ( *q >= 'A' && *q <= 'F' )
471 ucs += mult * (*q - 'A' + 10 );
472 else
473 return 0;
474 mult *= 16;
475 --q;
476 }
477 }
478 else
479 {
480 // Decimal.
481 if ( !*(p+2) ) return 0;
482
483 const char* q = p+2;
484 q = strchr( q, ';' );
485
486 if ( !q || !*q ) return 0;
487
488 delta = q-p;
489 --q;
490
491 while ( *q != '#' )
492 {
493 if ( *q >= '0' && *q <= '9' )
494 ucs += mult * (*q - '0');
495 else
496 return 0;
497 mult *= 10;
498 --q;
499 }
500 }
501 if ( encoding == TIXML_ENCODING_UTF8 )
502 {
503 // convert the UCS to UTF-8
504 ConvertUTF32ToUTF8( ucs, value, length );
505 }
506 else
507 {
508 *value = (char)ucs;
509 *length = 1;
510 }
511 return p + delta + 1;
512 }
513
514 // Now try to match it.
515 for( i=0; i<NUM_ENTITY; ++i )
516 {
517 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
518 {
519 assert( strlen( entity[i].str ) == entity[i].strLength );
520 *value = entity[i].chr;
521 *length = 1;
522 return ( p + entity[i].strLength );
523 }
524 }
525
526 // So it wasn't an entity, its unrecognized, or something like that.
527 *value = *p; // Don't put back the last one, since we return it!
528 //*length = 1; // Leave unrecognized entities - this doesn't really work.
529 // Just writes strange XML.
530 return p+1;
531 }
532
533
StringEqual(const char * p,const char * tag,bool ignoreCase,TiXmlEncoding encoding)534 bool TiXmlBase::StringEqual( const char* p,
535 const char* tag,
536 bool ignoreCase,
537 TiXmlEncoding encoding )
538 {
539 assert( p );
540 assert( tag );
541 if ( !p || !*p )
542 {
543 assert( 0 );
544 return false;
545 }
546
547 const char* q = p;
548
549 if ( ignoreCase )
550 {
551 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
552 {
553 ++q;
554 ++tag;
555 }
556
557 if ( *tag == 0 )
558 return true;
559 }
560 else
561 {
562 while ( *q && *tag && *q == *tag )
563 {
564 ++q;
565 ++tag;
566 }
567
568 if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
569 return true;
570 }
571 return false;
572 }
573
ReadText(const char * p,TIXML_STRING * text,bool trimWhiteSpace,const char * endTag,bool caseInsensitive,TiXmlEncoding encoding)574 const char* TiXmlBase::ReadText( const char* p,
575 TIXML_STRING * text,
576 bool trimWhiteSpace,
577 const char* endTag,
578 bool caseInsensitive,
579 TiXmlEncoding encoding )
580 {
581 *text = "";
582 if ( !trimWhiteSpace // certain tags always keep whitespace
583 || !condenseWhiteSpace ) // if true, whitespace is always kept
584 {
585 // Keep all the white space.
586 while ( p && *p
587 && !StringEqual( p, endTag, caseInsensitive, encoding )
588 )
589 {
590 int len;
591 char cArr[4] = { 0, 0, 0, 0 };
592 p = GetChar( p, cArr, &len, encoding );
593 text->append( cArr, len );
594 }
595 }
596 else
597 {
598 bool whitespace = false;
599
600 // Remove leading white space:
601 p = SkipWhiteSpace( p, encoding );
602 while ( p && *p
603 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
604 {
605 if ( *p == '\r' || *p == '\n' )
606 {
607 whitespace = true;
608 ++p;
609 }
610 else if ( IsWhiteSpace( *p ) )
611 {
612 whitespace = true;
613 ++p;
614 }
615 else
616 {
617 // If we've found whitespace, add it before the
618 // new character. Any whitespace just becomes a space.
619 if ( whitespace )
620 {
621 (*text) += ' ';
622 whitespace = false;
623 }
624 int len;
625 char cArr[4] = { 0, 0, 0, 0 };
626 p = GetChar( p, cArr, &len, encoding );
627 if ( len == 1 )
628 (*text) += cArr[0]; // more efficient
629 else
630 text->append( cArr, len );
631 }
632 }
633 }
634 if ( p && *p )
635 p += strlen( endTag );
636 return ( p && *p ) ? p : 0;
637 }
638
639 #ifdef TIXML_USE_STL
640
StreamIn(std::istream * in,TIXML_STRING * tag)641 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
642 {
643 // The basic issue with a document is that we don't know what we're
644 // streaming. Read something presumed to be a tag (and hope), then
645 // identify it, and call the appropriate stream method on the tag.
646 //
647 // This "pre-streaming" will never read the closing ">" so the
648 // sub-tag can orient itself.
649
650 if ( !StreamTo( in, '<', tag ) )
651 {
652 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
653 return;
654 }
655
656 while ( in->good() )
657 {
658 int tagIndex = (int) tag->length();
659 while ( in->good() && in->peek() != '>' )
660 {
661 int c = in->get();
662 if ( c <= 0 )
663 {
664 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
665 break;
666 }
667 (*tag) += (char) c;
668 }
669
670 if ( in->good() )
671 {
672 // We now have something we presume to be a node of
673 // some sort. Identify it, and call the node to
674 // continue streaming.
675 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
676
677 if ( node )
678 {
679 node->StreamIn( in, tag );
680 bool isElement = node->ToElement() != 0;
681 delete node;
682 node = 0;
683
684 // If this is the root element, we're done. Parsing will be
685 // done by the >> operator.
686 if ( isElement )
687 {
688 return;
689 }
690 }
691 else
692 {
693 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
694 return;
695 }
696 }
697 }
698 // We should have returned sooner.
699 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
700 }
701
702 #endif
703
Parse(const char * p,TiXmlParsingData * prevData,TiXmlEncoding encoding)704 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
705 {
706 ClearError();
707
708 // Parse away, at the document level. Since a document
709 // contains nothing but other tags, most of what happens
710 // here is skipping white space.
711 if ( !p || !*p )
712 {
713 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
714 return 0;
715 }
716
717 // Note that, for a document, this needs to come
718 // before the while space skip, so that parsing
719 // starts from the pointer we are given.
720 location.Clear();
721 if ( prevData )
722 {
723 location.row = prevData->cursor.row;
724 location.col = prevData->cursor.col;
725 }
726 else
727 {
728 location.row = 0;
729 location.col = 0;
730 }
731 TiXmlParsingData data( p, TabSize(), location.row, location.col );
732 location = data.Cursor();
733
734 if ( encoding == TIXML_ENCODING_UNKNOWN )
735 {
736 // Check for the Microsoft UTF-8 lead bytes.
737 const unsigned char* pU = (const unsigned char*)p;
738 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
739 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
740 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
741 {
742 encoding = TIXML_ENCODING_UTF8;
743 useMicrosoftBOM = true;
744 }
745 }
746
747 p = SkipWhiteSpace( p, encoding );
748 if ( !p )
749 {
750 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
751 return 0;
752 }
753
754 while ( p && *p )
755 {
756 TiXmlNode* node = Identify( p, encoding );
757 if ( node )
758 {
759 p = node->Parse( p, &data, encoding );
760 LinkEndChild( node );
761 }
762 else
763 {
764 break;
765 }
766
767 // Did we get encoding info?
768 if ( encoding == TIXML_ENCODING_UNKNOWN
769 && node->ToDeclaration() )
770 {
771 TiXmlDeclaration* dec = node->ToDeclaration();
772 const char* enc = dec->Encoding();
773 assert( enc );
774
775 if ( *enc == 0 )
776 encoding = TIXML_ENCODING_UTF8;
777 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
778 encoding = TIXML_ENCODING_UTF8;
779 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
780 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
781 else
782 encoding = TIXML_ENCODING_LEGACY;
783 }
784
785 p = SkipWhiteSpace( p, encoding );
786 }
787
788 // Was this empty?
789 if ( !firstChild ) {
790 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
791 return 0;
792 }
793
794 // All is well.
795 return p;
796 }
797
SetError(int err,const char * pError,TiXmlParsingData * data,TiXmlEncoding encoding)798 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
799 {
800 // The first error in a chain is more accurate - don't set again!
801 if ( error )
802 return;
803
804 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
805 error = true;
806 errorId = err;
807 errorDesc = errorString[ errorId ];
808
809 errorLocation.Clear();
810 if ( pError && data )
811 {
812 data->Stamp( pError, encoding );
813 errorLocation = data->Cursor();
814 }
815 }
816
817
Identify(const char * p,TiXmlEncoding encoding)818 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
819 {
820 TiXmlNode* returnNode = 0;
821
822 p = SkipWhiteSpace( p, encoding );
823 if( !p || !*p || *p != '<' )
824 {
825 return 0;
826 }
827
828 p = SkipWhiteSpace( p, encoding );
829
830 if ( !p || !*p )
831 {
832 return 0;
833 }
834
835 // What is this thing?
836 // - Elements start with a letter or underscore, but xml is reserved.
837 // - Comments: <!--
838 // - Decleration: <?xml
839 // - Everthing else is unknown to tinyxml.
840 //
841
842 const char* xmlHeader = { "<?xml" };
843 const char* commentHeader = { "<!--" };
844 const char* dtdHeader = { "<!" };
845 const char* cdataHeader = { "<![CDATA[" };
846
847 if ( StringEqual( p, xmlHeader, true, encoding ) )
848 {
849 #ifdef DEBUG_PARSER
850 TIXML_LOG( "XML parsing Declaration\n" );
851 #endif
852 returnNode = new TiXmlDeclaration();
853 }
854 else if ( StringEqual( p, commentHeader, false, encoding ) )
855 {
856 #ifdef DEBUG_PARSER
857 TIXML_LOG( "XML parsing Comment\n" );
858 #endif
859 returnNode = new TiXmlComment();
860 }
861 else if ( StringEqual( p, cdataHeader, false, encoding ) )
862 {
863 #ifdef DEBUG_PARSER
864 TIXML_LOG( "XML parsing CDATA\n" );
865 #endif
866 TiXmlText* text = new TiXmlText( "" );
867 text->SetCDATA( true );
868 returnNode = text;
869 }
870 else if ( StringEqual( p, dtdHeader, false, encoding ) )
871 {
872 #ifdef DEBUG_PARSER
873 TIXML_LOG( "XML parsing Unknown(1)\n" );
874 #endif
875 returnNode = new TiXmlUnknown();
876 }
877 else if ( IsAlpha( *(p+1), encoding )
878 || *(p+1) == '_' )
879 {
880 #ifdef DEBUG_PARSER
881 TIXML_LOG( "XML parsing Element\n" );
882 #endif
883 returnNode = new TiXmlElement( "" );
884 }
885 else
886 {
887 #ifdef DEBUG_PARSER
888 TIXML_LOG( "XML parsing Unknown(2)\n" );
889 #endif
890 returnNode = new TiXmlUnknown();
891 }
892
893 if ( returnNode )
894 {
895 // Set the parent, so it can report errors
896 returnNode->parent = this;
897 }
898 return returnNode;
899 }
900
901 #ifdef TIXML_USE_STL
902
StreamIn(std::istream * in,TIXML_STRING * tag)903 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
904 {
905 // We're called with some amount of pre-parsing. That is, some of "this"
906 // element is in "tag". Go ahead and stream to the closing ">"
907 while( in->good() )
908 {
909 int c = in->get();
910 if ( c <= 0 )
911 {
912 TiXmlDocument* document = GetDocument();
913 if ( document )
914 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
915 return;
916 }
917 (*tag) += (char) c ;
918
919 if ( c == '>' )
920 break;
921 }
922
923 if ( tag->length() < 3 ) return;
924
925 // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
926 // If not, identify and stream.
927
928 if ( tag->at( tag->length() - 1 ) == '>'
929 && tag->at( tag->length() - 2 ) == '/' )
930 {
931 // All good!
932 return;
933 }
934 else if ( tag->at( tag->length() - 1 ) == '>' )
935 {
936 // There is more. Could be:
937 // text
938 // cdata text (which looks like another node)
939 // closing tag
940 // another node.
941 for ( ;; )
942 {
943 StreamWhiteSpace( in, tag );
944
945 // Do we have text?
946 if ( in->good() && in->peek() != '<' )
947 {
948 // Yep, text.
949 TiXmlText text( "" );
950 text.StreamIn( in, tag );
951
952 // What follows text is a closing tag or another node.
953 // Go around again and figure it out.
954 continue;
955 }
956
957 // We now have either a closing tag...or another node.
958 // We should be at a "<", regardless.
959 if ( !in->good() ) return;
960 assert( in->peek() == '<' );
961 int tagIndex = (int) tag->length();
962
963 bool closingTag = false;
964 bool firstCharFound = false;
965
966 for( ;; )
967 {
968 if ( !in->good() )
969 return;
970
971 int c = in->peek();
972 if ( c <= 0 )
973 {
974 TiXmlDocument* document = GetDocument();
975 if ( document )
976 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
977 return;
978 }
979
980 if ( c == '>' )
981 break;
982
983 *tag += (char) c;
984 in->get();
985
986 // Early out if we find the CDATA id.
987 if ( c == '[' && tag->size() >= 9 )
988 {
989 size_t len = tag->size();
990 const char* start = tag->c_str() + len - 9;
991 if ( strcmp( start, "<![CDATA[" ) == 0 ) {
992 assert( !closingTag );
993 break;
994 }
995 }
996
997 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
998 {
999 firstCharFound = true;
1000 if ( c == '/' )
1001 closingTag = true;
1002 }
1003 }
1004 // If it was a closing tag, then read in the closing '>' to clean up the input stream.
1005 // If it was not, the streaming will be done by the tag.
1006 if ( closingTag )
1007 {
1008 if ( !in->good() )
1009 return;
1010
1011 int c = in->get();
1012 if ( c <= 0 )
1013 {
1014 TiXmlDocument* document = GetDocument();
1015 if ( document )
1016 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1017 return;
1018 }
1019 assert( c == '>' );
1020 *tag += (char) c;
1021
1022 // We are done, once we've found our closing tag.
1023 return;
1024 }
1025 else
1026 {
1027 // If not a closing tag, id it, and stream.
1028 const char* tagloc = tag->c_str() + tagIndex;
1029 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1030 if ( !node )
1031 return;
1032 node->StreamIn( in, tag );
1033 delete node;
1034 node = 0;
1035
1036 // No return: go around from the beginning: text, closing tag, or node.
1037 }
1038 }
1039 }
1040 }
1041 #endif
1042
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1043 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1044 {
1045 p = SkipWhiteSpace( p, encoding );
1046 TiXmlDocument* document = GetDocument();
1047
1048 if ( !p || !*p )
1049 {
1050 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1051 return 0;
1052 }
1053
1054 if ( data )
1055 {
1056 data->Stamp( p, encoding );
1057 location = data->Cursor();
1058 }
1059
1060 if ( *p != '<' )
1061 {
1062 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1063 return 0;
1064 }
1065
1066 p = SkipWhiteSpace( p+1, encoding );
1067
1068 // Read the name.
1069 const char* pErr = p;
1070
1071 p = ReadName( p, &value, encoding );
1072 if ( !p || !*p )
1073 {
1074 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1075 return 0;
1076 }
1077
1078 TIXML_STRING endTag ("</");
1079 endTag += value;
1080
1081 // Check for and read attributes. Also look for an empty
1082 // tag or an end tag.
1083 while ( p && *p )
1084 {
1085 pErr = p;
1086 p = SkipWhiteSpace( p, encoding );
1087 if ( !p || !*p )
1088 {
1089 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1090 return 0;
1091 }
1092 if ( *p == '/' )
1093 {
1094 ++p;
1095 // Empty tag.
1096 if ( *p != '>' )
1097 {
1098 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1099 return 0;
1100 }
1101 return (p+1);
1102 }
1103 else if ( *p == '>' )
1104 {
1105 // Done with attributes (if there were any.)
1106 // Read the value -- which can include other
1107 // elements -- read the end tag, and return.
1108 ++p;
1109 p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
1110 if ( !p || !*p ) {
1111 // We were looking for the end tag, but found nothing.
1112 // Fix for [ 1663758 ] Failure to report error on bad XML
1113 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1114 return 0;
1115 }
1116
1117 // We should find the end tag now
1118 // note that:
1119 // </foo > and
1120 // </foo>
1121 // are both valid end tags.
1122 if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1123 {
1124 p += endTag.length();
1125 p = SkipWhiteSpace( p, encoding );
1126 if ( p && *p && *p == '>' ) {
1127 ++p;
1128 return p;
1129 }
1130 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1131 return 0;
1132 }
1133 else
1134 {
1135 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1136 return 0;
1137 }
1138 }
1139 else
1140 {
1141 // Try to read an attribute:
1142 TiXmlAttribute* attrib = new TiXmlAttribute();
1143 if ( !attrib )
1144 {
1145 return 0;
1146 }
1147
1148 attrib->SetDocument( document );
1149 pErr = p;
1150 p = attrib->Parse( p, data, encoding );
1151
1152 if ( !p || !*p )
1153 {
1154 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1155 delete attrib;
1156 return 0;
1157 }
1158
1159 // Handle the strange case of double attributes:
1160 #ifdef TIXML_USE_STL
1161 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
1162 #else
1163 TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
1164 #endif
1165 if ( node )
1166 {
1167 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1168 delete attrib;
1169 return 0;
1170 }
1171
1172 attributeSet.Add( attrib );
1173 }
1174 }
1175 return p;
1176 }
1177
1178
ReadValue(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1179 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1180 {
1181 TiXmlDocument* document = GetDocument();
1182
1183 // Read in text and elements in any order.
1184 const char* pWithWhiteSpace = p;
1185 p = SkipWhiteSpace( p, encoding );
1186
1187 while ( p && *p )
1188 {
1189 if ( *p != '<' )
1190 {
1191 // Take what we have, make a text element.
1192 TiXmlText* textNode = new TiXmlText( "" );
1193
1194 if ( !textNode )
1195 {
1196 return 0;
1197 }
1198
1199 if ( TiXmlBase::IsWhiteSpaceCondensed() )
1200 {
1201 p = textNode->Parse( p, data, encoding );
1202 }
1203 else
1204 {
1205 // Special case: we want to keep the white space
1206 // so that leading spaces aren't removed.
1207 p = textNode->Parse( pWithWhiteSpace, data, encoding );
1208 }
1209
1210 if ( !textNode->Blank() )
1211 LinkEndChild( textNode );
1212 else
1213 delete textNode;
1214 }
1215 else
1216 {
1217 // We hit a '<'
1218 // Have we hit a new element or an end tag? This could also be
1219 // a TiXmlText in the "CDATA" style.
1220 if ( StringEqual( p, "</", false, encoding ) )
1221 {
1222 return p;
1223 }
1224 else
1225 {
1226 TiXmlNode* node = Identify( p, encoding );
1227 if ( node )
1228 {
1229 p = node->Parse( p, data, encoding );
1230 LinkEndChild( node );
1231 }
1232 else
1233 {
1234 return 0;
1235 }
1236 }
1237 }
1238 pWithWhiteSpace = p;
1239 p = SkipWhiteSpace( p, encoding );
1240 }
1241
1242 if ( !p )
1243 {
1244 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1245 }
1246 return p;
1247 }
1248
1249
1250 #ifdef TIXML_USE_STL
StreamIn(std::istream * in,TIXML_STRING * tag)1251 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
1252 {
1253 while ( in->good() )
1254 {
1255 int c = in->get();
1256 if ( c <= 0 )
1257 {
1258 TiXmlDocument* document = GetDocument();
1259 if ( document )
1260 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1261 return;
1262 }
1263 (*tag) += (char) c;
1264
1265 if ( c == '>' )
1266 {
1267 // All is well.
1268 return;
1269 }
1270 }
1271 }
1272 #endif
1273
1274
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1275 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1276 {
1277 TiXmlDocument* document = GetDocument();
1278 p = SkipWhiteSpace( p, encoding );
1279
1280 if ( data )
1281 {
1282 data->Stamp( p, encoding );
1283 location = data->Cursor();
1284 }
1285 if ( !p || !*p || *p != '<' )
1286 {
1287 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1288 return 0;
1289 }
1290 ++p;
1291 value = "";
1292
1293 while ( p && *p && *p != '>' )
1294 {
1295 value += *p;
1296 ++p;
1297 }
1298
1299 if ( !p )
1300 {
1301 if ( document )
1302 document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1303 }
1304 if ( p && *p == '>' )
1305 return p+1;
1306 return p;
1307 }
1308
1309 #ifdef TIXML_USE_STL
StreamIn(std::istream * in,TIXML_STRING * tag)1310 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
1311 {
1312 while ( in->good() )
1313 {
1314 int c = in->get();
1315 if ( c <= 0 )
1316 {
1317 TiXmlDocument* document = GetDocument();
1318 if ( document )
1319 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1320 return;
1321 }
1322
1323 (*tag) += (char) c;
1324
1325 if ( c == '>'
1326 && tag->at( tag->length() - 2 ) == '-'
1327 && tag->at( tag->length() - 3 ) == '-' )
1328 {
1329 // All is well.
1330 return;
1331 }
1332 }
1333 }
1334 #endif
1335
1336
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1337 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1338 {
1339 TiXmlDocument* document = GetDocument();
1340 value = "";
1341
1342 p = SkipWhiteSpace( p, encoding );
1343
1344 if ( data )
1345 {
1346 data->Stamp( p, encoding );
1347 location = data->Cursor();
1348 }
1349 const char* startTag = "<!--";
1350 const char* endTag = "-->";
1351
1352 if ( !StringEqual( p, startTag, false, encoding ) )
1353 {
1354 if ( document )
1355 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1356 return 0;
1357 }
1358 p += strlen( startTag );
1359
1360 // [ 1475201 ] TinyXML parses entities in comments
1361 // Oops - ReadText doesn't work, because we don't want to parse the entities.
1362 // p = ReadText( p, &value, false, endTag, false, encoding );
1363 //
1364 // from the XML spec:
1365 /*
1366 [Definition: Comments may appear anywhere in a document outside other markup; in addition,
1367 they may appear within the document type declaration at places allowed by the grammar.
1368 They are not part of the document's character data; an XML processor MAY, but need not,
1369 make it possible for an application to retrieve the text of comments. For compatibility,
1370 the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
1371 references MUST NOT be recognized within comments.
1372
1373 An example of a comment:
1374
1375 <!-- declarations for <head> & <body> -->
1376 */
1377
1378 value = "";
1379 // Keep all the white space.
1380 while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
1381 {
1382 value.append( p, 1 );
1383 ++p;
1384 }
1385 if ( p && *p )
1386 p += strlen( endTag );
1387
1388 return p;
1389 }
1390
1391
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1392 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1393 {
1394 p = SkipWhiteSpace( p, encoding );
1395 if ( !p || !*p ) return 0;
1396
1397 if ( data )
1398 {
1399 data->Stamp( p, encoding );
1400 location = data->Cursor();
1401 }
1402 // Read the name, the '=' and the value.
1403 const char* pErr = p;
1404 p = ReadName( p, &name, encoding );
1405 if ( !p || !*p )
1406 {
1407 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1408 return 0;
1409 }
1410 p = SkipWhiteSpace( p, encoding );
1411 if ( !p || !*p || *p != '=' )
1412 {
1413 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1414 return 0;
1415 }
1416
1417 ++p; // skip '='
1418 p = SkipWhiteSpace( p, encoding );
1419 if ( !p || !*p )
1420 {
1421 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1422 return 0;
1423 }
1424
1425 const char* end;
1426 const char SINGLE_QUOTE = '\'';
1427 const char DOUBLE_QUOTE = '\"';
1428
1429 if ( *p == SINGLE_QUOTE )
1430 {
1431 ++p;
1432 end = "\'"; // single quote in string
1433 p = ReadText( p, &value, false, end, false, encoding );
1434 }
1435 else if ( *p == DOUBLE_QUOTE )
1436 {
1437 ++p;
1438 end = "\""; // double quote in string
1439 p = ReadText( p, &value, false, end, false, encoding );
1440 }
1441 else
1442 {
1443 // All attribute values should be in single or double quotes.
1444 // But this is such a common error that the parser will try
1445 // its best, even without them.
1446 value = "";
1447 while ( p && *p // existence
1448 && !IsWhiteSpace( *p ) // whitespace
1449 && *p != '/' && *p != '>' ) // tag end
1450 {
1451 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
1452 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
1453 // We did not have an opening quote but seem to have a
1454 // closing one. Give up and throw an error.
1455 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1456 return 0;
1457 }
1458 value += *p;
1459 ++p;
1460 }
1461 }
1462 return p;
1463 }
1464
1465 #ifdef TIXML_USE_STL
StreamIn(std::istream * in,TIXML_STRING * tag)1466 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
1467 {
1468 while ( in->good() )
1469 {
1470 int c = in->peek();
1471 if ( !cdata && (c == '<' ) )
1472 {
1473 return;
1474 }
1475 if ( c <= 0 )
1476 {
1477 TiXmlDocument* document = GetDocument();
1478 if ( document )
1479 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1480 return;
1481 }
1482
1483 (*tag) += (char) c;
1484 in->get(); // "commits" the peek made above
1485
1486 if ( cdata && c == '>' && tag->size() >= 3 ) {
1487 size_t len = tag->size();
1488 if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
1489 // terminator of cdata.
1490 return;
1491 }
1492 }
1493 }
1494 }
1495 #endif
1496
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1497 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1498 {
1499 value = "";
1500 TiXmlDocument* document = GetDocument();
1501
1502 if ( data )
1503 {
1504 data->Stamp( p, encoding );
1505 location = data->Cursor();
1506 }
1507
1508 const char* const startTag = "<![CDATA[";
1509 const char* const endTag = "]]>";
1510
1511 if ( cdata || StringEqual( p, startTag, false, encoding ) )
1512 {
1513 cdata = true;
1514
1515 if ( !StringEqual( p, startTag, false, encoding ) )
1516 {
1517 if ( document )
1518 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1519 return 0;
1520 }
1521 p += strlen( startTag );
1522
1523 // Keep all the white space, ignore the encoding, etc.
1524 while ( p && *p
1525 && !StringEqual( p, endTag, false, encoding )
1526 )
1527 {
1528 value += *p;
1529 ++p;
1530 }
1531
1532 TIXML_STRING dummy;
1533 p = ReadText( p, &dummy, false, endTag, false, encoding );
1534 return p;
1535 }
1536 else
1537 {
1538 bool ignoreWhite = true;
1539
1540 const char* end = "<";
1541 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1542 if ( p && *p )
1543 return p-1; // don't truncate the '<'
1544 return 0;
1545 }
1546 }
1547
1548 #ifdef TIXML_USE_STL
StreamIn(std::istream * in,TIXML_STRING * tag)1549 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
1550 {
1551 while ( in->good() )
1552 {
1553 int c = in->get();
1554 if ( c <= 0 )
1555 {
1556 TiXmlDocument* document = GetDocument();
1557 if ( document )
1558 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1559 return;
1560 }
1561 (*tag) += (char) c;
1562
1563 if ( c == '>' )
1564 {
1565 // All is well.
1566 return;
1567 }
1568 }
1569 }
1570 #endif
1571
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding _encoding)1572 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1573 {
1574 p = SkipWhiteSpace( p, _encoding );
1575 // Find the beginning, find the end, and look for
1576 // the stuff in-between.
1577 TiXmlDocument* document = GetDocument();
1578 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1579 {
1580 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1581 return 0;
1582 }
1583 if ( data )
1584 {
1585 data->Stamp( p, _encoding );
1586 location = data->Cursor();
1587 }
1588 p += 5;
1589
1590 version = "";
1591 encoding = "";
1592 standalone = "";
1593
1594 while ( p && *p )
1595 {
1596 if ( *p == '>' )
1597 {
1598 ++p;
1599 return p;
1600 }
1601
1602 p = SkipWhiteSpace( p, _encoding );
1603 if ( StringEqual( p, "version", true, _encoding ) )
1604 {
1605 TiXmlAttribute attrib;
1606 p = attrib.Parse( p, data, _encoding );
1607 version = attrib.Value();
1608 }
1609 else if ( StringEqual( p, "encoding", true, _encoding ) )
1610 {
1611 TiXmlAttribute attrib;
1612 p = attrib.Parse( p, data, _encoding );
1613 encoding = attrib.Value();
1614 }
1615 else if ( StringEqual( p, "standalone", true, _encoding ) )
1616 {
1617 TiXmlAttribute attrib;
1618 p = attrib.Parse( p, data, _encoding );
1619 standalone = attrib.Value();
1620 }
1621 else
1622 {
1623 // Read over whatever it is.
1624 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1625 ++p;
1626 }
1627 }
1628 return 0;
1629 }
1630
Blank() const1631 bool TiXmlText::Blank() const
1632 {
1633 for ( unsigned i=0; i<value.length(); i++ )
1634 if ( !IsWhiteSpace( value[i] ) )
1635 return false;
1636 return true;
1637 }
1638
1639