comparison 3rdparty/tinyxml/tinyxmlparser.cpp @ 0:a4671277546c tip

created the repository for the thymian project
author ferencd
date Tue, 17 Aug 2021 11:19:54 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a4671277546c
1 /*
2 www.sourceforge.net/projects/tinyxml
3 Original code by Lee Thomason (www.grinninglizard.com)
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must
14 not claim that you wrote the original software. If you use this
15 software in a product, an acknowledgment in the product documentation
16 would be appreciated but is not required.
17
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
20
21 3. This notice may not be removed or altered from any source
22 distribution.
23 */
24
25 #include <ctype.h>
26 #include <stddef.h>
27
28 #include "tinyxml.h"
29
30 //#define DEBUG_PARSER
31 #if defined( DEBUG_PARSER )
32 # if defined( DEBUG ) && defined( _MSC_VER )
33 # include <windows.h>
34 # define TIXML_LOG OutputDebugString
35 # else
36 # define TIXML_LOG printf
37 # endif
38 #endif
39
40 // Note tha "PutString" hardcodes the same list. This
41 // is less flexible than it appears. Changing the entries
42 // or order will break putstring.
43 TiXmlBase::Entity TiXmlBase::entity[ TiXmlBase::NUM_ENTITY ] =
44 {
45 { "&amp;", 5, '&' },
46 { "&lt;", 4, '<' },
47 { "&gt;", 4, '>' },
48 { "&quot;", 6, '\"' },
49 { "&apos;", 6, '\'' }
50 };
51
52 // Bunch of unicode info at:
53 // http://www.unicode.org/faq/utf_bom.html
54 // Including the basic of this table, which determines the #bytes in the
55 // sequence from the lead byte. 1 placed for invalid sequences --
56 // although the result will be junk, pass it through as much as possible.
57 // Beware of the non-characters in UTF-8:
58 // ef bb bf (Microsoft "lead bytes")
59 // ef bf be
60 // ef bf bf
61
62 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
63 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
64 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
65
66 const int TiXmlBase::utf8ByteTable[256] =
67 {
68 // 0 1 2 3 4 5 6 7 8 9 a b c d e f
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
81 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
83 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
84 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
85 };
86
87
88 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
89 {
90 const unsigned long BYTE_MASK = 0xBF;
91 const unsigned long BYTE_MARK = 0x80;
92 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
93
94 if (input < 0x80)
95 *length = 1;
96 else if ( input < 0x800 )
97 *length = 2;
98 else if ( input < 0x10000 )
99 *length = 3;
100 else if ( input < 0x200000 )
101 *length = 4;
102 else
103 { *length = 0; return; } // This code won't covert this correctly anyway.
104
105 output += *length;
106
107 // Scary scary fall throughs.
108 switch (*length)
109 {
110 case 4:
111 --output;
112 *output = static_cast<char>((input | BYTE_MARK) & BYTE_MASK);
113 input >>= 6;
114 case 3:
115 --output;
116 *output = static_cast<char>((input | BYTE_MARK) & BYTE_MASK);
117 input >>= 6;
118 case 2:
119 --output;
120 *output = static_cast<char>((input | BYTE_MARK) & BYTE_MASK);
121 input >>= 6;
122 case 1:
123 --output;
124 *output = static_cast<char>(input | FIRST_BYTE_MARK[*length]);
125 default:
126 break;
127 }
128 }
129
130
131 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
132 {
133 // This will only work for low-ascii, everything else is assumed to be a valid
134 // letter. I'm not sure this is the best approach, but it is quite tricky trying
135 // to figure out alhabetical vs. not across encoding. So take a very
136 // conservative approach.
137
138 // if ( encoding == TIXML_ENCODING_UTF8 )
139 // {
140 if ( anyByte < 127 )
141 return isalpha( anyByte );
142 else
143 return 1; // What else to do? The unicode set is huge...get the english ones right.
144 // }
145 // else
146 // {
147 // return isalpha( anyByte );
148 // }
149 }
150
151
152 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
153 {
154 // This will only work for low-ascii, everything else is assumed to be a valid
155 // letter. I'm not sure this is the best approach, but it is quite tricky trying
156 // to figure out alhabetical vs. not across encoding. So take a very
157 // conservative approach.
158
159 // if ( encoding == TIXML_ENCODING_UTF8 )
160 // {
161 if ( anyByte < 127 )
162 return isalnum( anyByte );
163 else
164 return 1; // What else to do? The unicode set is huge...get the english ones right.
165 // }
166 // else
167 // {
168 // return isalnum( anyByte );
169 // }
170 }
171
172
173 class TiXmlParsingData
174 {
175 friend class TiXmlDocument;
176 public:
177 void Stamp( const char* now, TiXmlEncoding encoding );
178
179 const TiXmlCursor& Cursor() const { return cursor; }
180
181 private:
182 // Only used by the document!
183 TiXmlParsingData( const char* start, int _tabsize, int row, int col )
184 {
185 assert( start );
186 stamp = start;
187 tabsize = _tabsize;
188 cursor.row = row;
189 cursor.col = col;
190 }
191
192 TiXmlCursor cursor;
193 const char* stamp;
194 int tabsize;
195 };
196
197
198 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
199 {
200 assert( now );
201
202 // Do nothing if the tabsize is 0.
203 if ( tabsize < 1 )
204 {
205 return;
206 }
207
208 // Get the current row, column.
209 int row = cursor.row;
210 int col = cursor.col;
211 const char* p = stamp;
212 assert( p );
213
214 while ( p < now )
215 {
216 // Treat p as unsigned, so we have a happy compiler.
217 const unsigned char* pU = reinterpret_cast<const unsigned char*>(p);
218
219 // Code contributed by Fletcher Dunn: (modified by lee)
220 switch (*pU) {
221 case 0:
222 // We *should* never get here, but in case we do, don't
223 // advance past the terminating null character, ever
224 return;
225
226 case '\r':
227 // bump down to the next line
228 ++row;
229 col = 0;
230 // Eat the character
231 ++p;
232
233 // Check for \r\n sequence, and treat this as a single character
234 if (*p == '\n') {
235 ++p;
236 }
237 break;
238
239 case '\n':
240 // bump down to the next line
241 ++row;
242 col = 0;
243
244 // Eat the character
245 ++p;
246
247 // Check for \n\r sequence, and treat this as a single
248 // character. (Yes, this bizarre thing does occur still
249 // on some arcane platforms...)
250 if (*p == '\r') {
251 ++p;
252 }
253 break;
254
255 case '\t':
256 // Eat the character
257 ++p;
258
259 // Skip to next tab stop
260 col = (col / tabsize + 1) * tabsize;
261 break;
262
263 case TIXML_UTF_LEAD_0:
264 if ( encoding == TIXML_ENCODING_UTF8 )
265 {
266 if ( *(p+1) && *(p+2) )
267 {
268 // In these cases, don't advance the column. These are
269 // 0-width spaces.
270 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
271 p += 3;
272 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
273 p += 3;
274 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
275 p += 3;
276 else
277 { p +=3; ++col; } // A normal character.
278 }
279 }
280 else
281 {
282 ++p;
283 ++col;
284 }
285 break;
286
287 default:
288 if ( encoding == TIXML_ENCODING_UTF8 )
289 {
290 // Eat the 1 to 4 byte utf8 character.
291 int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
292 if ( step == 0 )
293 step = 1; // Error case from bad encoding, but handle gracefully.
294 p += step;
295
296 // Just advance one column, of course.
297 ++col;
298 }
299 else
300 {
301 ++p;
302 ++col;
303 }
304 break;
305 }
306 }
307 cursor.row = row;
308 cursor.col = col;
309 assert( cursor.row >= -1 );
310 assert( cursor.col >= -1 );
311 stamp = p;
312 assert( stamp );
313 }
314
315
316 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
317 {
318 if ( !p || !*p )
319 {
320 return 0;
321 }
322 if ( encoding == TIXML_ENCODING_UTF8 )
323 {
324 while ( *p )
325 {
326 const unsigned char* pU = (const unsigned char*)p;
327
328 // Skip the stupid Microsoft UTF-8 Byte order marks
329 if ( *(pU+0)==TIXML_UTF_LEAD_0
330 && *(pU+1)==TIXML_UTF_LEAD_1
331 && *(pU+2)==TIXML_UTF_LEAD_2 )
332 {
333 p += 3;
334 continue;
335 }
336 else if(*(pU+0)==TIXML_UTF_LEAD_0
337 && *(pU+1)==0xbfU
338 && *(pU+2)==0xbeU )
339 {
340 p += 3;
341 continue;
342 }
343 else if(*(pU+0)==TIXML_UTF_LEAD_0
344 && *(pU+1)==0xbfU
345 && *(pU+2)==0xbfU )
346 {
347 p += 3;
348 continue;
349 }
350
351 if ( IsWhiteSpace( *p ) ) // Still using old rules for white space.
352 ++p;
353 else
354 break;
355 }
356 }
357 else
358 {
359 while ( *p && IsWhiteSpace( *p ) )
360 ++p;
361 }
362
363 return p;
364 }
365
366 #ifdef TIXML_USE_STL
367 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
368 {
369 for( ;; )
370 {
371 if ( !in->good() ) return false;
372
373 int c = in->peek();
374 // At this scope, we can't get to a document. So fail silently.
375 if ( !IsWhiteSpace( c ) || c <= 0 )
376 return true;
377
378 *tag += (char) in->get();
379 }
380 }
381
382 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
383 {
384 //assert( character > 0 && character < 128 ); // else it won't work in utf-8
385 while ( in->good() )
386 {
387 int c = in->peek();
388 if ( c == character )
389 return true;
390 if ( c <= 0 ) // Silent failure: can't get document at this scope
391 return false;
392
393 in->get();
394 *tag += (char) c;
395 }
396 return false;
397 }
398 #endif
399
400 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
401 // "assign" optimization removes over 10% of the execution time.
402 //
403 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
404 {
405 // Oddly, not supported on some comilers,
406 //name->clear();
407 // So use this:
408 *name = "";
409 assert( p );
410
411 // Names start with letters or underscores.
412 // Of course, in unicode, tinyxml has no idea what a letter *is*. The
413 // algorithm is generous.
414 //
415 // After that, they can be letters, underscores, numbers,
416 // hyphens, or colons. (Colons are valid ony for namespaces,
417 // but tinyxml can't tell namespaces from names.)
418 if ( p && *p
419 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
420 {
421 const char* start = p;
422 while( p && *p
423 && ( IsAlphaNum( (unsigned char ) *p, encoding )
424 || *p == '_'
425 || *p == '-'
426 || *p == '.'
427 || *p == ':' ) )
428 {
429 //(*name) += *p; // expensive
430 ++p;
431 }
432 if ( p-start > 0 ) {
433 name->assign( start, p-start );
434 }
435 return p;
436 }
437 return 0;
438 }
439
440 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
441 {
442 // Presume an entity, and pull it out.
443 TIXML_STRING ent;
444 int i;
445 *length = 0;
446
447 if ( *(p+1) && *(p+1) == '#' && *(p+2) )
448 {
449 unsigned long ucs = 0;
450 ptrdiff_t delta = 0;
451 unsigned mult = 1;
452
453 if ( *(p+2) == 'x' )
454 {
455 // Hexadecimal.
456 if ( !*(p+3) ) return 0;
457
458 const char* q = p+3;
459 q = strchr( q, ';' );
460
461 if ( !q || !*q ) return 0;
462
463 delta = q-p;
464 --q;
465
466 while ( *q != 'x' )
467 {
468 if ( *q >= '0' && *q <= '9' )
469 ucs += mult * (*q - '0');
470 else if ( *q >= 'a' && *q <= 'f' )
471 ucs += mult * (*q - 'a' + 10);
472 else if ( *q >= 'A' && *q <= 'F' )
473 ucs += mult * (*q - 'A' + 10 );
474 else
475 return 0;
476 mult *= 16;
477 --q;
478 }
479 }
480 else
481 {
482 // Decimal.
483 if ( !*(p+2) ) return 0;
484
485 const char* q = p+2;
486 q = strchr( q, ';' );
487
488 if ( !q || !*q ) return 0;
489
490 delta = q-p;
491 --q;
492
493 while ( *q != '#' )
494 {
495 if ( *q >= '0' && *q <= '9' )
496 ucs += mult * (*q - '0');
497 else
498 return 0;
499 mult *= 10;
500 --q;
501 }
502 }
503 if ( encoding == TIXML_ENCODING_UTF8 )
504 {
505 // convert the UCS to UTF-8
506 ConvertUTF32ToUTF8( ucs, value, length );
507 }
508 else
509 {
510 *value = (char)ucs;
511 *length = 1;
512 }
513 return p + delta + 1;
514 }
515
516 // Now try to match it.
517 for( i=0; i<NUM_ENTITY; ++i )
518 {
519 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
520 {
521 assert( strlen( entity[i].str ) == entity[i].strLength );
522 *value = entity[i].chr;
523 *length = 1;
524 return ( p + entity[i].strLength );
525 }
526 }
527
528 // So it wasn't an entity, its unrecognized, or something like that.
529 *value = *p; // Don't put back the last one, since we return it!
530 //*length = 1; // Leave unrecognized entities - this doesn't really work.
531 // Just writes strange XML.
532 return p+1;
533 }
534
535
536 bool TiXmlBase::StringEqual( const char* p,
537 const char* tag,
538 bool ignoreCase,
539 TiXmlEncoding encoding )
540 {
541 assert( p );
542 assert( tag );
543 if ( !p || !*p )
544 {
545 assert( 0 );
546 return false;
547 }
548
549 const char* q = p;
550
551 if ( ignoreCase )
552 {
553 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
554 {
555 ++q;
556 ++tag;
557 }
558
559 if ( *tag == 0 )
560 return true;
561 }
562 else
563 {
564 while ( *q && *tag && *q == *tag )
565 {
566 ++q;
567 ++tag;
568 }
569
570 if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
571 return true;
572 }
573 return false;
574 }
575
576 const char* TiXmlBase::ReadText( const char* p,
577 TIXML_STRING * text,
578 bool trimWhiteSpace,
579 const char* endTag,
580 bool caseInsensitive,
581 TiXmlEncoding encoding )
582 {
583 *text = "";
584 if ( !trimWhiteSpace // certain tags always keep whitespace
585 || !condenseWhiteSpace ) // if true, whitespace is always kept
586 {
587 // Keep all the white space.
588 while ( p && *p
589 && !StringEqual( p, endTag, caseInsensitive, encoding )
590 )
591 {
592 int len;
593 char cArr[4] = { 0, 0, 0, 0 };
594 p = GetChar( p, cArr, &len, encoding );
595 text->append( cArr, len );
596 }
597 }
598 else
599 {
600 bool whitespace = false;
601
602 // Remove leading white space:
603 p = SkipWhiteSpace( p, encoding );
604 while ( p && *p
605 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
606 {
607 if ( *p == '\r' || *p == '\n' )
608 {
609 whitespace = true;
610 ++p;
611 }
612 else if ( IsWhiteSpace( *p ) )
613 {
614 whitespace = true;
615 ++p;
616 }
617 else
618 {
619 // If we've found whitespace, add it before the
620 // new character. Any whitespace just becomes a space.
621 if ( whitespace )
622 {
623 (*text) += ' ';
624 whitespace = false;
625 }
626 int len;
627 char cArr[4] = { 0, 0, 0, 0 };
628 p = GetChar( p, cArr, &len, encoding );
629 if ( len == 1 )
630 (*text) += cArr[0]; // more efficient
631 else
632 text->append( cArr, len );
633 }
634 }
635 }
636 if ( p && *p )
637 p += strlen( endTag );
638 return ( p && *p ) ? p : 0;
639 }
640
641 #ifdef TIXML_USE_STL
642
643 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
644 {
645 // The basic issue with a document is that we don't know what we're
646 // streaming. Read something presumed to be a tag (and hope), then
647 // identify it, and call the appropriate stream method on the tag.
648 //
649 // This "pre-streaming" will never read the closing ">" so the
650 // sub-tag can orient itself.
651
652 if ( !StreamTo( in, '<', tag ) )
653 {
654 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
655 return;
656 }
657
658 while ( in->good() )
659 {
660 int tagIndex = (int) tag->length();
661 while ( in->good() && in->peek() != '>' )
662 {
663 int c = in->get();
664 if ( c <= 0 )
665 {
666 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
667 break;
668 }
669 (*tag) += (char) c;
670 }
671
672 if ( in->good() )
673 {
674 // We now have something we presume to be a node of
675 // some sort. Identify it, and call the node to
676 // continue streaming.
677 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
678
679 if ( node )
680 {
681 node->StreamIn( in, tag );
682 bool isElement = node->ToElement() != 0;
683 delete node;
684 node = 0;
685
686 // If this is the root element, we're done. Parsing will be
687 // done by the >> operator.
688 if ( isElement )
689 {
690 return;
691 }
692 }
693 else
694 {
695 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
696 return;
697 }
698 }
699 }
700 // We should have returned sooner.
701 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
702 }
703
704 #endif
705
706 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
707 {
708 ClearError();
709
710 // Parse away, at the document level. Since a document
711 // contains nothing but other tags, most of what happens
712 // here is skipping white space.
713 if ( !p || !*p )
714 {
715 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
716 return 0;
717 }
718
719 // Note that, for a document, this needs to come
720 // before the while space skip, so that parsing
721 // starts from the pointer we are given.
722 location.Clear();
723 if ( prevData )
724 {
725 location.row = prevData->cursor.row;
726 location.col = prevData->cursor.col;
727 }
728 else
729 {
730 location.row = 0;
731 location.col = 0;
732 }
733 TiXmlParsingData data( p, TabSize(), location.row, location.col );
734 location = data.Cursor();
735
736 if ( encoding == TIXML_ENCODING_UNKNOWN )
737 {
738 // Check for the Microsoft UTF-8 lead bytes.
739 const unsigned char* pU = (const unsigned char*)p;
740 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
741 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
742 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
743 {
744 encoding = TIXML_ENCODING_UTF8;
745 useMicrosoftBOM = true;
746 }
747 }
748
749 p = SkipWhiteSpace( p, encoding );
750 if ( !p )
751 {
752 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
753 return 0;
754 }
755
756 while ( p && *p )
757 {
758 TiXmlNode* node = Identify( p, encoding );
759 if ( node )
760 {
761 p = node->Parse( p, &data, encoding );
762 LinkEndChild( node );
763 }
764 else
765 {
766 break;
767 }
768
769 // Did we get encoding info?
770 if ( encoding == TIXML_ENCODING_UNKNOWN
771 && node->ToDeclaration() )
772 {
773 TiXmlDeclaration* dec = node->ToDeclaration();
774 const char* enc = dec->Encoding();
775 assert( enc );
776
777 if ( *enc == 0 )
778 encoding = TIXML_ENCODING_UTF8;
779 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
780 encoding = TIXML_ENCODING_UTF8;
781 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
782 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
783 else
784 encoding = TIXML_ENCODING_LEGACY;
785 }
786
787 p = SkipWhiteSpace( p, encoding );
788 }
789
790 // Was this empty?
791 if ( !firstChild ) {
792 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
793 return 0;
794 }
795
796 // All is well.
797 return p;
798 }
799
800 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
801 {
802 // The first error in a chain is more accurate - don't set again!
803 if ( error )
804 return;
805
806 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
807 error = true;
808 errorId = err;
809 errorDesc = errorString[ errorId ];
810
811 errorLocation.Clear();
812 if ( pError && data )
813 {
814 data->Stamp( pError, encoding );
815 errorLocation = data->Cursor();
816 }
817 }
818
819
820 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
821 {
822 TiXmlNode* returnNode = 0;
823
824 p = SkipWhiteSpace( p, encoding );
825 if( !p || !*p || *p != '<' )
826 {
827 return 0;
828 }
829
830 p = SkipWhiteSpace( p, encoding );
831
832 if ( !p || !*p )
833 {
834 return 0;
835 }
836
837 // What is this thing?
838 // - Elements start with a letter or underscore, but xml is reserved.
839 // - Comments: <!--
840 // - Decleration: <?xml
841 // - Everthing else is unknown to tinyxml.
842 //
843
844 const char* xmlHeader = { "<?xml" };
845 const char* commentHeader = { "<!--" };
846 const char* dtdHeader = { "<!" };
847 const char* cdataHeader = { "<![CDATA[" };
848
849 if ( StringEqual( p, xmlHeader, true, encoding ) )
850 {
851 #ifdef DEBUG_PARSER
852 TIXML_LOG( "XML parsing Declaration\n" );
853 #endif
854 returnNode = new TiXmlDeclaration();
855 }
856 else if ( StringEqual( p, commentHeader, false, encoding ) )
857 {
858 #ifdef DEBUG_PARSER
859 TIXML_LOG( "XML parsing Comment\n" );
860 #endif
861 returnNode = new TiXmlComment();
862 }
863 else if ( StringEqual( p, cdataHeader, false, encoding ) )
864 {
865 #ifdef DEBUG_PARSER
866 TIXML_LOG( "XML parsing CDATA\n" );
867 #endif
868 TiXmlText* text = new TiXmlText( "" );
869 text->SetCDATA( true );
870 returnNode = text;
871 }
872 else if ( StringEqual( p, dtdHeader, false, encoding ) )
873 {
874 #ifdef DEBUG_PARSER
875 TIXML_LOG( "XML parsing Unknown(1)\n" );
876 #endif
877 returnNode = new TiXmlUnknown();
878 }
879 else if ( IsAlpha( *(p+1), encoding )
880 || *(p+1) == '_' )
881 {
882 #ifdef DEBUG_PARSER
883 TIXML_LOG( "XML parsing Element\n" );
884 #endif
885 returnNode = new TiXmlElement( "" );
886 }
887 else
888 {
889 #ifdef DEBUG_PARSER
890 TIXML_LOG( "XML parsing Unknown(2)\n" );
891 #endif
892 returnNode = new TiXmlUnknown();
893 }
894
895 if ( returnNode )
896 {
897 // Set the parent, so it can report errors
898 returnNode->parent = this;
899 }
900 return returnNode;
901 }
902
903 #ifdef TIXML_USE_STL
904
905 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
906 {
907 // We're called with some amount of pre-parsing. That is, some of "this"
908 // element is in "tag". Go ahead and stream to the closing ">"
909 while( in->good() )
910 {
911 int c = in->get();
912 if ( c <= 0 )
913 {
914 TiXmlDocument* document = GetDocument();
915 if ( document )
916 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
917 return;
918 }
919 (*tag) += (char) c ;
920
921 if ( c == '>' )
922 break;
923 }
924
925 if ( tag->length() < 3 ) return;
926
927 // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
928 // If not, identify and stream.
929
930 if ( tag->at( tag->length() - 1 ) == '>'
931 && tag->at( tag->length() - 2 ) == '/' )
932 {
933 // All good!
934 return;
935 }
936 else if ( tag->at( tag->length() - 1 ) == '>' )
937 {
938 // There is more. Could be:
939 // text
940 // cdata text (which looks like another node)
941 // closing tag
942 // another node.
943 for ( ;; )
944 {
945 StreamWhiteSpace( in, tag );
946
947 // Do we have text?
948 if ( in->good() && in->peek() != '<' )
949 {
950 // Yep, text.
951 TiXmlText text( "" );
952 text.StreamIn( in, tag );
953
954 // What follows text is a closing tag or another node.
955 // Go around again and figure it out.
956 continue;
957 }
958
959 // We now have either a closing tag...or another node.
960 // We should be at a "<", regardless.
961 if ( !in->good() ) return;
962 assert( in->peek() == '<' );
963 int tagIndex = (int) tag->length();
964
965 bool closingTag = false;
966 bool firstCharFound = false;
967
968 for( ;; )
969 {
970 if ( !in->good() )
971 return;
972
973 int c = in->peek();
974 if ( c <= 0 )
975 {
976 TiXmlDocument* document = GetDocument();
977 if ( document )
978 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
979 return;
980 }
981
982 if ( c == '>' )
983 break;
984
985 *tag += (char) c;
986 in->get();
987
988 // Early out if we find the CDATA id.
989 if ( c == '[' && tag->size() >= 9 )
990 {
991 size_t len = tag->size();
992 const char* start = tag->c_str() + len - 9;
993 if ( strcmp( start, "<![CDATA[" ) == 0 ) {
994 assert( !closingTag );
995 break;
996 }
997 }
998
999 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
1000 {
1001 firstCharFound = true;
1002 if ( c == '/' )
1003 closingTag = true;
1004 }
1005 }
1006 // If it was a closing tag, then read in the closing '>' to clean up the input stream.
1007 // If it was not, the streaming will be done by the tag.
1008 if ( closingTag )
1009 {
1010 if ( !in->good() )
1011 return;
1012
1013 int c = in->get();
1014 if ( c <= 0 )
1015 {
1016 TiXmlDocument* document = GetDocument();
1017 if ( document )
1018 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1019 return;
1020 }
1021 assert( c == '>' );
1022 *tag += (char) c;
1023
1024 // We are done, once we've found our closing tag.
1025 return;
1026 }
1027 else
1028 {
1029 // If not a closing tag, id it, and stream.
1030 const char* tagloc = tag->c_str() + tagIndex;
1031 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1032 if ( !node )
1033 return;
1034 node->StreamIn( in, tag );
1035 delete node;
1036 node = 0;
1037
1038 // No return: go around from the beginning: text, closing tag, or node.
1039 }
1040 }
1041 }
1042 }
1043 #endif
1044
1045 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1046 {
1047 p = SkipWhiteSpace( p, encoding );
1048 TiXmlDocument* document = GetDocument();
1049
1050 if ( !p || !*p )
1051 {
1052 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1053 return 0;
1054 }
1055
1056 if ( data )
1057 {
1058 data->Stamp( p, encoding );
1059 location = data->Cursor();
1060 }
1061
1062 if ( *p != '<' )
1063 {
1064 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1065 return 0;
1066 }
1067
1068 p = SkipWhiteSpace( p+1, encoding );
1069
1070 // Read the name.
1071 const char* pErr = p;
1072
1073 p = ReadName( p, &value, encoding );
1074 if ( !p || !*p )
1075 {
1076 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1077 return 0;
1078 }
1079
1080 TIXML_STRING endTag ("</");
1081 endTag += value;
1082
1083 // Check for and read attributes. Also look for an empty
1084 // tag or an end tag.
1085 while ( p && *p )
1086 {
1087 pErr = p;
1088 p = SkipWhiteSpace( p, encoding );
1089 if ( !p || !*p )
1090 {
1091 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1092 return 0;
1093 }
1094 if ( *p == '/' )
1095 {
1096 ++p;
1097 // Empty tag.
1098 if ( *p != '>' )
1099 {
1100 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1101 return 0;
1102 }
1103 return (p+1);
1104 }
1105 else if ( *p == '>' )
1106 {
1107 // Done with attributes (if there were any.)
1108 // Read the value -- which can include other
1109 // elements -- read the end tag, and return.
1110 ++p;
1111 p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
1112 if ( !p || !*p ) {
1113 // We were looking for the end tag, but found nothing.
1114 // Fix for [ 1663758 ] Failure to report error on bad XML
1115 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1116 return 0;
1117 }
1118
1119 // We should find the end tag now
1120 // note that:
1121 // </foo > and
1122 // </foo>
1123 // are both valid end tags.
1124 if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1125 {
1126 p += endTag.length();
1127 p = SkipWhiteSpace( p, encoding );
1128 if ( p && *p && *p == '>' ) {
1129 ++p;
1130 return p;
1131 }
1132 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1133 return 0;
1134 }
1135 else
1136 {
1137 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1138 return 0;
1139 }
1140 }
1141 else
1142 {
1143 // Try to read an attribute:
1144 TiXmlAttribute* attrib = new TiXmlAttribute();
1145 if ( !attrib )
1146 {
1147 return 0;
1148 }
1149
1150 attrib->SetDocument( document );
1151 pErr = p;
1152 p = attrib->Parse( p, data, encoding );
1153
1154 if ( !p || !*p )
1155 {
1156 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1157 delete attrib;
1158 return 0;
1159 }
1160
1161 // Handle the strange case of double attributes:
1162 #ifdef TIXML_USE_STL
1163 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
1164 #else
1165 TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
1166 #endif
1167 if ( node )
1168 {
1169 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1170 delete attrib;
1171 return 0;
1172 }
1173
1174 attributeSet.Add( attrib );
1175 }
1176 }
1177 return p;
1178 }
1179
1180
1181 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1182 {
1183 TiXmlDocument* document = GetDocument();
1184
1185 // Read in text and elements in any order.
1186 const char* pWithWhiteSpace = p;
1187 p = SkipWhiteSpace( p, encoding );
1188
1189 while ( p && *p )
1190 {
1191 if ( *p != '<' )
1192 {
1193 // Take what we have, make a text element.
1194 TiXmlText* textNode = new TiXmlText( "" );
1195
1196 if ( !textNode )
1197 {
1198 return 0;
1199 }
1200
1201 if ( TiXmlBase::IsWhiteSpaceCondensed() )
1202 {
1203 p = textNode->Parse( p, data, encoding );
1204 }
1205 else
1206 {
1207 // Special case: we want to keep the white space
1208 // so that leading spaces aren't removed.
1209 p = textNode->Parse( pWithWhiteSpace, data, encoding );
1210 }
1211
1212 if ( !textNode->Blank() )
1213 LinkEndChild( textNode );
1214 else
1215 delete textNode;
1216 }
1217 else
1218 {
1219 // We hit a '<'
1220 // Have we hit a new element or an end tag? This could also be
1221 // a TiXmlText in the "CDATA" style.
1222 if ( StringEqual( p, "</", false, encoding ) )
1223 {
1224 return p;
1225 }
1226 else
1227 {
1228 TiXmlNode* node = Identify( p, encoding );
1229 if ( node )
1230 {
1231 p = node->Parse( p, data, encoding );
1232 LinkEndChild( node );
1233 }
1234 else
1235 {
1236 return 0;
1237 }
1238 }
1239 }
1240 pWithWhiteSpace = p;
1241 p = SkipWhiteSpace( p, encoding );
1242 }
1243
1244 if ( !p )
1245 {
1246 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1247 }
1248 return p;
1249 }
1250
1251
1252 #ifdef TIXML_USE_STL
1253 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
1254 {
1255 while ( in->good() )
1256 {
1257 int c = in->get();
1258 if ( c <= 0 )
1259 {
1260 TiXmlDocument* document = GetDocument();
1261 if ( document )
1262 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1263 return;
1264 }
1265 (*tag) += (char) c;
1266
1267 if ( c == '>' )
1268 {
1269 // All is well.
1270 return;
1271 }
1272 }
1273 }
1274 #endif
1275
1276
1277 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1278 {
1279 TiXmlDocument* document = GetDocument();
1280 p = SkipWhiteSpace( p, encoding );
1281
1282 if ( data )
1283 {
1284 data->Stamp( p, encoding );
1285 location = data->Cursor();
1286 }
1287 if ( !p || !*p || *p != '<' )
1288 {
1289 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1290 return 0;
1291 }
1292 ++p;
1293 value = "";
1294
1295 while ( p && *p && *p != '>' )
1296 {
1297 value += *p;
1298 ++p;
1299 }
1300
1301 if ( !p )
1302 {
1303 if ( document )
1304 document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1305 }
1306 if ( p && *p == '>' )
1307 return p+1;
1308 return p;
1309 }
1310
1311 #ifdef TIXML_USE_STL
1312 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
1313 {
1314 while ( in->good() )
1315 {
1316 int c = in->get();
1317 if ( c <= 0 )
1318 {
1319 TiXmlDocument* document = GetDocument();
1320 if ( document )
1321 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1322 return;
1323 }
1324
1325 (*tag) += (char) c;
1326
1327 if ( c == '>'
1328 && tag->at( tag->length() - 2 ) == '-'
1329 && tag->at( tag->length() - 3 ) == '-' )
1330 {
1331 // All is well.
1332 return;
1333 }
1334 }
1335 }
1336 #endif
1337
1338
1339 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1340 {
1341 TiXmlDocument* document = GetDocument();
1342 value = "";
1343
1344 p = SkipWhiteSpace( p, encoding );
1345
1346 if ( data )
1347 {
1348 data->Stamp( p, encoding );
1349 location = data->Cursor();
1350 }
1351 const char* startTag = "<!--";
1352 const char* endTag = "-->";
1353
1354 if ( !StringEqual( p, startTag, false, encoding ) )
1355 {
1356 if ( document )
1357 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1358 return 0;
1359 }
1360 p += strlen( startTag );
1361
1362 // [ 1475201 ] TinyXML parses entities in comments
1363 // Oops - ReadText doesn't work, because we don't want to parse the entities.
1364 // p = ReadText( p, &value, false, endTag, false, encoding );
1365 //
1366 // from the XML spec:
1367 /*
1368 [Definition: Comments may appear anywhere in a document outside other markup; in addition,
1369 they may appear within the document type declaration at places allowed by the grammar.
1370 They are not part of the document's character data; an XML processor MAY, but need not,
1371 make it possible for an application to retrieve the text of comments. For compatibility,
1372 the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
1373 references MUST NOT be recognized within comments.
1374
1375 An example of a comment:
1376
1377 <!-- declarations for <head> & <body> -->
1378 */
1379
1380 value = "";
1381 // Keep all the white space.
1382 while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
1383 {
1384 value.append( p, 1 );
1385 ++p;
1386 }
1387 if ( p && *p )
1388 p += strlen( endTag );
1389
1390 return p;
1391 }
1392
1393
1394 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1395 {
1396 p = SkipWhiteSpace( p, encoding );
1397 if ( !p || !*p ) return 0;
1398
1399 if ( data )
1400 {
1401 data->Stamp( p, encoding );
1402 location = data->Cursor();
1403 }
1404 // Read the name, the '=' and the value.
1405 const char* pErr = p;
1406 p = ReadName( p, &name, encoding );
1407 if ( !p || !*p )
1408 {
1409 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1410 return 0;
1411 }
1412 p = SkipWhiteSpace( p, encoding );
1413 if ( !p || !*p || *p != '=' )
1414 {
1415 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1416 return 0;
1417 }
1418
1419 ++p; // skip '='
1420 p = SkipWhiteSpace( p, encoding );
1421 if ( !p || !*p )
1422 {
1423 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1424 return 0;
1425 }
1426
1427 const char* end;
1428 const char SINGLE_QUOTE = '\'';
1429 const char DOUBLE_QUOTE = '\"';
1430
1431 if ( *p == SINGLE_QUOTE )
1432 {
1433 ++p;
1434 end = "\'"; // single quote in string
1435 p = ReadText( p, &value, false, end, false, encoding );
1436 }
1437 else if ( *p == DOUBLE_QUOTE )
1438 {
1439 ++p;
1440 end = "\""; // double quote in string
1441 p = ReadText( p, &value, false, end, false, encoding );
1442 }
1443 else
1444 {
1445 // All attribute values should be in single or double quotes.
1446 // But this is such a common error that the parser will try
1447 // its best, even without them.
1448 value = "";
1449 while ( p && *p // existence
1450 && !IsWhiteSpace( *p ) // whitespace
1451 && *p != '/' && *p != '>' ) // tag end
1452 {
1453 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
1454 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
1455 // We did not have an opening quote but seem to have a
1456 // closing one. Give up and throw an error.
1457 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1458 return 0;
1459 }
1460 value += *p;
1461 ++p;
1462 }
1463 }
1464 return p;
1465 }
1466
1467 #ifdef TIXML_USE_STL
1468 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
1469 {
1470 while ( in->good() )
1471 {
1472 int c = in->peek();
1473 if ( !cdata && (c == '<' ) )
1474 {
1475 return;
1476 }
1477 if ( c <= 0 )
1478 {
1479 TiXmlDocument* document = GetDocument();
1480 if ( document )
1481 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1482 return;
1483 }
1484
1485 (*tag) += (char) c;
1486 in->get(); // "commits" the peek made above
1487
1488 if ( cdata && c == '>' && tag->size() >= 3 ) {
1489 size_t len = tag->size();
1490 if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
1491 // terminator of cdata.
1492 return;
1493 }
1494 }
1495 }
1496 }
1497 #endif
1498
1499 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1500 {
1501 value = "";
1502 TiXmlDocument* document = GetDocument();
1503
1504 if ( data )
1505 {
1506 data->Stamp( p, encoding );
1507 location = data->Cursor();
1508 }
1509
1510 const char* const startTag = "<![CDATA[";
1511 const char* const endTag = "]]>";
1512
1513 if ( cdata || StringEqual( p, startTag, false, encoding ) )
1514 {
1515 cdata = true;
1516
1517 if ( !StringEqual( p, startTag, false, encoding ) )
1518 {
1519 if ( document )
1520 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1521 return 0;
1522 }
1523 p += strlen( startTag );
1524
1525 // Keep all the white space, ignore the encoding, etc.
1526 while ( p && *p
1527 && !StringEqual( p, endTag, false, encoding )
1528 )
1529 {
1530 value += *p;
1531 ++p;
1532 }
1533
1534 TIXML_STRING dummy;
1535 p = ReadText( p, &dummy, false, endTag, false, encoding );
1536 return p;
1537 }
1538 else
1539 {
1540 bool ignoreWhite = true;
1541
1542 const char* end = "<";
1543 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1544 if ( p && *p )
1545 return p-1; // don't truncate the '<'
1546 return 0;
1547 }
1548 }
1549
1550 #ifdef TIXML_USE_STL
1551 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
1552 {
1553 while ( in->good() )
1554 {
1555 int c = in->get();
1556 if ( c <= 0 )
1557 {
1558 TiXmlDocument* document = GetDocument();
1559 if ( document )
1560 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1561 return;
1562 }
1563 (*tag) += (char) c;
1564
1565 if ( c == '>' )
1566 {
1567 // All is well.
1568 return;
1569 }
1570 }
1571 }
1572 #endif
1573
1574 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1575 {
1576 p = SkipWhiteSpace( p, _encoding );
1577 // Find the beginning, find the end, and look for
1578 // the stuff in-between.
1579 TiXmlDocument* document = GetDocument();
1580 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1581 {
1582 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1583 return 0;
1584 }
1585 if ( data )
1586 {
1587 data->Stamp( p, _encoding );
1588 location = data->Cursor();
1589 }
1590 p += 5;
1591
1592 version = "";
1593 encoding = "";
1594 standalone = "";
1595
1596 while ( p && *p )
1597 {
1598 if ( *p == '>' )
1599 {
1600 ++p;
1601 return p;
1602 }
1603
1604 p = SkipWhiteSpace( p, _encoding );
1605 if ( StringEqual( p, "version", true, _encoding ) )
1606 {
1607 TiXmlAttribute attrib;
1608 p = attrib.Parse( p, data, _encoding );
1609 version = attrib.Value();
1610 }
1611 else if ( StringEqual( p, "encoding", true, _encoding ) )
1612 {
1613 TiXmlAttribute attrib;
1614 p = attrib.Parse( p, data, _encoding );
1615 encoding = attrib.Value();
1616 }
1617 else if ( StringEqual( p, "standalone", true, _encoding ) )
1618 {
1619 TiXmlAttribute attrib;
1620 p = attrib.Parse( p, data, _encoding );
1621 standalone = attrib.Value();
1622 }
1623 else
1624 {
1625 // Read over whatever it is.
1626 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1627 ++p;
1628 }
1629 }
1630 return 0;
1631 }
1632
1633 bool TiXmlText::Blank() const
1634 {
1635 for ( unsigned i=0; i<value.length(); i++ )
1636 if ( !IsWhiteSpace( value[i] ) )
1637 return false;
1638 return true;
1639 }
1640