thymian: 3rdparty/tinyxml/tinyxmlparser.cpp annotate

annotate 3rdparty/tinyxml/tinyxmlparser.cpp @ 0:a4671277546c tip

created the repository for the thymian project

author	ferencd
date	Tue, 17 Aug 2021 11:19:54 +0200
parents
children

rev	line source
ferencd@0	1 /*
ferencd@0	2 www.sourceforge.net/projects/tinyxml
ferencd@0	3 Original code by Lee Thomason (www.grinninglizard.com)
ferencd@0	4
ferencd@0	5 This software is provided 'as-is', without any express or implied
ferencd@0	6 warranty. In no event will the authors be held liable for any
ferencd@0	7 damages arising from the use of this software.
ferencd@0	8
ferencd@0	9 Permission is granted to anyone to use this software for any
ferencd@0	10 purpose, including commercial applications, and to alter it and
ferencd@0	11 redistribute it freely, subject to the following restrictions:
ferencd@0	12
ferencd@0	13 1. The origin of this software must not be misrepresented; you must
ferencd@0	14 not claim that you wrote the original software. If you use this
ferencd@0	15 software in a product, an acknowledgment in the product documentation
ferencd@0	16 would be appreciated but is not required.
ferencd@0	17
ferencd@0	18 2. Altered source versions must be plainly marked as such, and
ferencd@0	19 must not be misrepresented as being the original software.
ferencd@0	20
ferencd@0	21 3. This notice may not be removed or altered from any source
ferencd@0	22 distribution.
ferencd@0	23 */
ferencd@0	24
ferencd@0	25 #include <ctype.h>
ferencd@0	26 #include <stddef.h>
ferencd@0	27
ferencd@0	28 #include "tinyxml.h"
ferencd@0	29
ferencd@0	30 //#define DEBUG_PARSER
ferencd@0	31 #if defined( DEBUG_PARSER )
ferencd@0	32 # if defined( DEBUG ) && defined( _MSC_VER )
ferencd@0	33 # include <windows.h>
ferencd@0	34 # define TIXML_LOG OutputDebugString
ferencd@0	35 # else
ferencd@0	36 # define TIXML_LOG printf
ferencd@0	37 # endif
ferencd@0	38 #endif
ferencd@0	39
ferencd@0	40 // Note tha "PutString" hardcodes the same list. This
ferencd@0	41 // is less flexible than it appears. Changing the entries
ferencd@0	42 // or order will break putstring.
ferencd@0	43 TiXmlBase::Entity TiXmlBase::entity[ TiXmlBase::NUM_ENTITY ] =
ferencd@0	44 {
ferencd@0	45 { "&", 5, '&' },
ferencd@0	46 { "<", 4, '<' },
ferencd@0	47 { ">", 4, '>' },
ferencd@0	48 { """, 6, '\"' },
ferencd@0	49 { "'", 6, '\'' }
ferencd@0	50 };
ferencd@0	51
ferencd@0	52 // Bunch of unicode info at:
ferencd@0	53 // http://www.unicode.org/faq/utf_bom.html
ferencd@0	54 // Including the basic of this table, which determines the #bytes in the
ferencd@0	55 // sequence from the lead byte. 1 placed for invalid sequences --
ferencd@0	56 // although the result will be junk, pass it through as much as possible.
ferencd@0	57 // Beware of the non-characters in UTF-8:
ferencd@0	58 // ef bb bf (Microsoft "lead bytes")
ferencd@0	59 // ef bf be
ferencd@0	60 // ef bf bf
ferencd@0	61
ferencd@0	62 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
ferencd@0	63 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
ferencd@0	64 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
ferencd@0	65
ferencd@0	66 const int TiXmlBase::utf8ByteTable[256] =
ferencd@0	67 {
ferencd@0	68 // 0 1 2 3 4 5 6 7 8 9 a b c d e f
ferencd@0	69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
ferencd@0	70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
ferencd@0	71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
ferencd@0	72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
ferencd@0	73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
ferencd@0	74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
ferencd@0	75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
ferencd@0	76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
ferencd@0	77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
ferencd@0	78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
ferencd@0	79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
ferencd@0	80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
ferencd@0	81 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
ferencd@0	82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
ferencd@0	83 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
ferencd@0	84 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
ferencd@0	85 };
ferencd@0	86
ferencd@0	87
ferencd@0	88 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
ferencd@0	89 {
ferencd@0	90 const unsigned long BYTE_MASK = 0xBF;
ferencd@0	91 const unsigned long BYTE_MARK = 0x80;
ferencd@0	92 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
ferencd@0	93
ferencd@0	94 if (input < 0x80)
ferencd@0	95 *length = 1;
ferencd@0	96 else if ( input < 0x800 )
ferencd@0	97 *length = 2;
ferencd@0	98 else if ( input < 0x10000 )
ferencd@0	99 *length = 3;
ferencd@0	100 else if ( input < 0x200000 )
ferencd@0	101 *length = 4;
ferencd@0	102 else
ferencd@0	103 { *length = 0; return; } // This code won't covert this correctly anyway.
ferencd@0	104
ferencd@0	105 output += *length;
ferencd@0	106
ferencd@0	107 // Scary scary fall throughs.
ferencd@0	108 switch (*length)
ferencd@0	109 {
ferencd@0	110 case 4:
ferencd@0	111 --output;
ferencd@0	112 *output = static_cast<char>((input \| BYTE_MARK) & BYTE_MASK);
ferencd@0	113 input >>= 6;
ferencd@0	114 case 3:
ferencd@0	115 --output;
ferencd@0	116 *output = static_cast<char>((input \| BYTE_MARK) & BYTE_MASK);
ferencd@0	117 input >>= 6;
ferencd@0	118 case 2:
ferencd@0	119 --output;
ferencd@0	120 *output = static_cast<char>((input \| BYTE_MARK) & BYTE_MASK);
ferencd@0	121 input >>= 6;
ferencd@0	122 case 1:
ferencd@0	123 --output;
ferencd@0	124 output = static_cast<char>(input \| FIRST_BYTE_MARK[length]);
ferencd@0	125 default:
ferencd@0	126 break;
ferencd@0	127 }
ferencd@0	128 }
ferencd@0	129
ferencd@0	130
ferencd@0	131 /static/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /encoding/ )
ferencd@0	132 {
ferencd@0	133 // This will only work for low-ascii, everything else is assumed to be a valid
ferencd@0	134 // letter. I'm not sure this is the best approach, but it is quite tricky trying
ferencd@0	135 // to figure out alhabetical vs. not across encoding. So take a very
ferencd@0	136 // conservative approach.
ferencd@0	137
ferencd@0	138 // if ( encoding == TIXML_ENCODING_UTF8 )
ferencd@0	139 // {
ferencd@0	140 if ( anyByte < 127 )
ferencd@0	141 return isalpha( anyByte );
ferencd@0	142 else
ferencd@0	143 return 1; // What else to do? The unicode set is huge...get the english ones right.
ferencd@0	144 // }
ferencd@0	145 // else
ferencd@0	146 // {
ferencd@0	147 // return isalpha( anyByte );
ferencd@0	148 // }
ferencd@0	149 }
ferencd@0	150
ferencd@0	151
ferencd@0	152 /static/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /encoding/ )
ferencd@0	153 {
ferencd@0	154 // This will only work for low-ascii, everything else is assumed to be a valid
ferencd@0	155 // letter. I'm not sure this is the best approach, but it is quite tricky trying
ferencd@0	156 // to figure out alhabetical vs. not across encoding. So take a very
ferencd@0	157 // conservative approach.
ferencd@0	158
ferencd@0	159 // if ( encoding == TIXML_ENCODING_UTF8 )
ferencd@0	160 // {
ferencd@0	161 if ( anyByte < 127 )
ferencd@0	162 return isalnum( anyByte );
ferencd@0	163 else
ferencd@0	164 return 1; // What else to do? The unicode set is huge...get the english ones right.
ferencd@0	165 // }
ferencd@0	166 // else
ferencd@0	167 // {
ferencd@0	168 // return isalnum( anyByte );
ferencd@0	169 // }
ferencd@0	170 }
ferencd@0	171
ferencd@0	172
ferencd@0	173 class TiXmlParsingData
ferencd@0	174 {
ferencd@0	175 friend class TiXmlDocument;
ferencd@0	176 public:
ferencd@0	177 void Stamp( const char* now, TiXmlEncoding encoding );
ferencd@0	178
ferencd@0	179 const TiXmlCursor& Cursor() const { return cursor; }
ferencd@0	180
ferencd@0	181 private:
ferencd@0	182 // Only used by the document!
ferencd@0	183 TiXmlParsingData( const char* start, int _tabsize, int row, int col )
ferencd@0	184 {
ferencd@0	185 assert( start );
ferencd@0	186 stamp = start;
ferencd@0	187 tabsize = _tabsize;
ferencd@0	188 cursor.row = row;
ferencd@0	189 cursor.col = col;
ferencd@0	190 }
ferencd@0	191
ferencd@0	192 TiXmlCursor cursor;
ferencd@0	193 const char* stamp;
ferencd@0	194 int tabsize;
ferencd@0	195 };
ferencd@0	196
ferencd@0	197
ferencd@0	198 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
ferencd@0	199 {
ferencd@0	200 assert( now );
ferencd@0	201
ferencd@0	202 // Do nothing if the tabsize is 0.
ferencd@0	203 if ( tabsize < 1 )
ferencd@0	204 {
ferencd@0	205 return;
ferencd@0	206 }
ferencd@0	207
ferencd@0	208 // Get the current row, column.
ferencd@0	209 int row = cursor.row;
ferencd@0	210 int col = cursor.col;
ferencd@0	211 const char* p = stamp;
ferencd@0	212 assert( p );
ferencd@0	213
ferencd@0	214 while ( p < now )
ferencd@0	215 {
ferencd@0	216 // Treat p as unsigned, so we have a happy compiler.
ferencd@0	217 const unsigned char* pU = reinterpret_cast<const unsigned char*>(p);
ferencd@0	218
ferencd@0	219 // Code contributed by Fletcher Dunn: (modified by lee)
ferencd@0	220 switch (*pU) {
ferencd@0	221 case 0:
ferencd@0	222 // We should never get here, but in case we do, don't
ferencd@0	223 // advance past the terminating null character, ever
ferencd@0	224 return;
ferencd@0	225
ferencd@0	226 case '\r':
ferencd@0	227 // bump down to the next line
ferencd@0	228 ++row;
ferencd@0	229 col = 0;
ferencd@0	230 // Eat the character
ferencd@0	231 ++p;
ferencd@0	232
ferencd@0	233 // Check for \r\n sequence, and treat this as a single character
ferencd@0	234 if (*p == '\n') {
ferencd@0	235 ++p;
ferencd@0	236 }
ferencd@0	237 break;
ferencd@0	238
ferencd@0	239 case '\n':
ferencd@0	240 // bump down to the next line
ferencd@0	241 ++row;
ferencd@0	242 col = 0;
ferencd@0	243
ferencd@0	244 // Eat the character
ferencd@0	245 ++p;
ferencd@0	246
ferencd@0	247 // Check for \n\r sequence, and treat this as a single
ferencd@0	248 // character. (Yes, this bizarre thing does occur still
ferencd@0	249 // on some arcane platforms...)
ferencd@0	250 if (*p == '\r') {
ferencd@0	251 ++p;
ferencd@0	252 }
ferencd@0	253 break;
ferencd@0	254
ferencd@0	255 case '\t':
ferencd@0	256 // Eat the character
ferencd@0	257 ++p;
ferencd@0	258
ferencd@0	259 // Skip to next tab stop
ferencd@0	260 col = (col / tabsize + 1) * tabsize;
ferencd@0	261 break;
ferencd@0	262
ferencd@0	263 case TIXML_UTF_LEAD_0:
ferencd@0	264 if ( encoding == TIXML_ENCODING_UTF8 )
ferencd@0	265 {
ferencd@0	266 if ( (p+1) && (p+2) )
ferencd@0	267 {
ferencd@0	268 // In these cases, don't advance the column. These are
ferencd@0	269 // 0-width spaces.
ferencd@0	270 if ( (pU+1)==TIXML_UTF_LEAD_1 && (pU+2)==TIXML_UTF_LEAD_2 )
ferencd@0	271 p += 3;
ferencd@0	272 else if ( (pU+1)==0xbfU && (pU+2)==0xbeU )
ferencd@0	273 p += 3;
ferencd@0	274 else if ( (pU+1)==0xbfU && (pU+2)==0xbfU )
ferencd@0	275 p += 3;
ferencd@0	276 else
ferencd@0	277 { p +=3; ++col; } // A normal character.
ferencd@0	278 }
ferencd@0	279 }
ferencd@0	280 else
ferencd@0	281 {
ferencd@0	282 ++p;
ferencd@0	283 ++col;
ferencd@0	284 }
ferencd@0	285 break;
ferencd@0	286
ferencd@0	287 default:
ferencd@0	288 if ( encoding == TIXML_ENCODING_UTF8 )
ferencd@0	289 {
ferencd@0	290 // Eat the 1 to 4 byte utf8 character.
ferencd@0	291 int step = TiXmlBase::utf8ByteTable[((const unsigned char)p)];
ferencd@0	292 if ( step == 0 )
ferencd@0	293 step = 1; // Error case from bad encoding, but handle gracefully.
ferencd@0	294 p += step;
ferencd@0	295
ferencd@0	296 // Just advance one column, of course.
ferencd@0	297 ++col;
ferencd@0	298 }
ferencd@0	299 else
ferencd@0	300 {
ferencd@0	301 ++p;
ferencd@0	302 ++col;
ferencd@0	303 }
ferencd@0	304 break;
ferencd@0	305 }
ferencd@0	306 }
ferencd@0	307 cursor.row = row;
ferencd@0	308 cursor.col = col;
ferencd@0	309 assert( cursor.row >= -1 );
ferencd@0	310 assert( cursor.col >= -1 );
ferencd@0	311 stamp = p;
ferencd@0	312 assert( stamp );
ferencd@0	313 }
ferencd@0	314
ferencd@0	315
ferencd@0	316 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
ferencd@0	317 {
ferencd@0	318 if ( !p \|\| !*p )
ferencd@0	319 {
ferencd@0	320 return 0;
ferencd@0	321 }
ferencd@0	322 if ( encoding == TIXML_ENCODING_UTF8 )
ferencd@0	323 {
ferencd@0	324 while ( *p )
ferencd@0	325 {
ferencd@0	326 const unsigned char* pU = (const unsigned char*)p;
ferencd@0	327
ferencd@0	328 // Skip the stupid Microsoft UTF-8 Byte order marks
ferencd@0	329 if ( *(pU+0)==TIXML_UTF_LEAD_0
ferencd@0	330 && *(pU+1)==TIXML_UTF_LEAD_1
ferencd@0	331 && *(pU+2)==TIXML_UTF_LEAD_2 )
ferencd@0	332 {
ferencd@0	333 p += 3;
ferencd@0	334 continue;
ferencd@0	335 }
ferencd@0	336 else if(*(pU+0)==TIXML_UTF_LEAD_0
ferencd@0	337 && *(pU+1)==0xbfU
ferencd@0	338 && *(pU+2)==0xbeU )
ferencd@0	339 {
ferencd@0	340 p += 3;
ferencd@0	341 continue;
ferencd@0	342 }
ferencd@0	343 else if(*(pU+0)==TIXML_UTF_LEAD_0
ferencd@0	344 && *(pU+1)==0xbfU
ferencd@0	345 && *(pU+2)==0xbfU )
ferencd@0	346 {
ferencd@0	347 p += 3;
ferencd@0	348 continue;
ferencd@0	349 }
ferencd@0	350
ferencd@0	351 if ( IsWhiteSpace( *p ) ) // Still using old rules for white space.
ferencd@0	352 ++p;
ferencd@0	353 else
ferencd@0	354 break;
ferencd@0	355 }
ferencd@0	356 }
ferencd@0	357 else
ferencd@0	358 {
ferencd@0	359 while ( p && IsWhiteSpace( p ) )
ferencd@0	360 ++p;
ferencd@0	361 }
ferencd@0	362
ferencd@0	363 return p;
ferencd@0	364 }
ferencd@0	365
ferencd@0	366 #ifdef TIXML_USE_STL
ferencd@0	367 /static/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
ferencd@0	368 {
ferencd@0	369 for( ;; )
ferencd@0	370 {
ferencd@0	371 if ( !in->good() ) return false;
ferencd@0	372
ferencd@0	373 int c = in->peek();
ferencd@0	374 // At this scope, we can't get to a document. So fail silently.
ferencd@0	375 if ( !IsWhiteSpace( c ) \|\| c <= 0 )
ferencd@0	376 return true;
ferencd@0	377
ferencd@0	378 *tag += (char) in->get();
ferencd@0	379 }
ferencd@0	380 }
ferencd@0	381
ferencd@0	382 /static/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
ferencd@0	383 {
ferencd@0	384 //assert( character > 0 && character < 128 ); // else it won't work in utf-8
ferencd@0	385 while ( in->good() )
ferencd@0	386 {
ferencd@0	387 int c = in->peek();
ferencd@0	388 if ( c == character )
ferencd@0	389 return true;
ferencd@0	390 if ( c <= 0 ) // Silent failure: can't get document at this scope
ferencd@0	391 return false;
ferencd@0	392
ferencd@0	393 in->get();
ferencd@0	394 *tag += (char) c;
ferencd@0	395 }
ferencd@0	396 return false;
ferencd@0	397 }
ferencd@0	398 #endif
ferencd@0	399
ferencd@0	400 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
ferencd@0	401 // "assign" optimization removes over 10% of the execution time.
ferencd@0	402 //
ferencd@0	403 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
ferencd@0	404 {
ferencd@0	405 // Oddly, not supported on some comilers,
ferencd@0	406 //name->clear();
ferencd@0	407 // So use this:
ferencd@0	408 *name = "";
ferencd@0	409 assert( p );
ferencd@0	410
ferencd@0	411 // Names start with letters or underscores.
ferencd@0	412 // Of course, in unicode, tinyxml has no idea what a letter is. The
ferencd@0	413 // algorithm is generous.
ferencd@0	414 //
ferencd@0	415 // After that, they can be letters, underscores, numbers,
ferencd@0	416 // hyphens, or colons. (Colons are valid ony for namespaces,
ferencd@0	417 // but tinyxml can't tell namespaces from names.)
ferencd@0	418 if ( p && *p
ferencd@0	419 && ( IsAlpha( (unsigned char) p, encoding ) \|\| p == '_' ) )
ferencd@0	420 {
ferencd@0	421 const char* start = p;
ferencd@0	422 while( p && *p
ferencd@0	423 && ( IsAlphaNum( (unsigned char ) *p, encoding )
ferencd@0	424 \|\| *p == '_'
ferencd@0	425 \|\| *p == '-'
ferencd@0	426 \|\| *p == '.'
ferencd@0	427 \|\| *p == ':' ) )
ferencd@0	428 {
ferencd@0	429 //(name) += p; // expensive
ferencd@0	430 ++p;
ferencd@0	431 }
ferencd@0	432 if ( p-start > 0 ) {
ferencd@0	433 name->assign( start, p-start );
ferencd@0	434 }
ferencd@0	435 return p;
ferencd@0	436 }
ferencd@0	437 return 0;
ferencd@0	438 }
ferencd@0	439
ferencd@0	440 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
ferencd@0	441 {
ferencd@0	442 // Presume an entity, and pull it out.
ferencd@0	443 TIXML_STRING ent;
ferencd@0	444 int i;
ferencd@0	445 *length = 0;
ferencd@0	446
ferencd@0	447 if ( (p+1) && (p+1) == '#' && *(p+2) )
ferencd@0	448 {
ferencd@0	449 unsigned long ucs = 0;
ferencd@0	450 ptrdiff_t delta = 0;
ferencd@0	451 unsigned mult = 1;
ferencd@0	452
ferencd@0	453 if ( *(p+2) == 'x' )
ferencd@0	454 {
ferencd@0	455 // Hexadecimal.
ferencd@0	456 if ( !*(p+3) ) return 0;
ferencd@0	457
ferencd@0	458 const char* q = p+3;
ferencd@0	459 q = strchr( q, ';' );
ferencd@0	460
ferencd@0	461 if ( !q \|\| !*q ) return 0;
ferencd@0	462
ferencd@0	463 delta = q-p;
ferencd@0	464 --q;
ferencd@0	465
ferencd@0	466 while ( *q != 'x' )
ferencd@0	467 {
ferencd@0	468 if ( q >= '0' && q <= '9' )
ferencd@0	469 ucs += mult * (*q - '0');
ferencd@0	470 else if ( q >= 'a' && q <= 'f' )
ferencd@0	471 ucs += mult * (*q - 'a' + 10);
ferencd@0	472 else if ( q >= 'A' && q <= 'F' )
ferencd@0	473 ucs += mult * (*q - 'A' + 10 );
ferencd@0	474 else
ferencd@0	475 return 0;
ferencd@0	476 mult *= 16;
ferencd@0	477 --q;
ferencd@0	478 }
ferencd@0	479 }
ferencd@0	480 else
ferencd@0	481 {
ferencd@0	482 // Decimal.
ferencd@0	483 if ( !*(p+2) ) return 0;
ferencd@0	484
ferencd@0	485 const char* q = p+2;
ferencd@0	486 q = strchr( q, ';' );
ferencd@0	487
ferencd@0	488 if ( !q \|\| !*q ) return 0;
ferencd@0	489
ferencd@0	490 delta = q-p;
ferencd@0	491 --q;
ferencd@0	492
ferencd@0	493 while ( *q != '#' )
ferencd@0	494 {
ferencd@0	495 if ( q >= '0' && q <= '9' )
ferencd@0	496 ucs += mult * (*q - '0');
ferencd@0	497 else
ferencd@0	498 return 0;
ferencd@0	499 mult *= 10;
ferencd@0	500 --q;
ferencd@0	501 }
ferencd@0	502 }
ferencd@0	503 if ( encoding == TIXML_ENCODING_UTF8 )
ferencd@0	504 {
ferencd@0	505 // convert the UCS to UTF-8
ferencd@0	506 ConvertUTF32ToUTF8( ucs, value, length );
ferencd@0	507 }
ferencd@0	508 else
ferencd@0	509 {
ferencd@0	510 *value = (char)ucs;
ferencd@0	511 *length = 1;
ferencd@0	512 }
ferencd@0	513 return p + delta + 1;
ferencd@0	514 }
ferencd@0	515
ferencd@0	516 // Now try to match it.
ferencd@0	517 for( i=0; i<NUM_ENTITY; ++i )
ferencd@0	518 {
ferencd@0	519 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
ferencd@0	520 {
ferencd@0	521 assert( strlen( entity[i].str ) == entity[i].strLength );
ferencd@0	522 *value = entity[i].chr;
ferencd@0	523 *length = 1;
ferencd@0	524 return ( p + entity[i].strLength );
ferencd@0	525 }
ferencd@0	526 }
ferencd@0	527
ferencd@0	528 // So it wasn't an entity, its unrecognized, or something like that.
ferencd@0	529 value = p; // Don't put back the last one, since we return it!
ferencd@0	530 //*length = 1; // Leave unrecognized entities - this doesn't really work.
ferencd@0	531 // Just writes strange XML.
ferencd@0	532 return p+1;
ferencd@0	533 }
ferencd@0	534
ferencd@0	535
ferencd@0	536 bool TiXmlBase::StringEqual( const char* p,
ferencd@0	537 const char* tag,
ferencd@0	538 bool ignoreCase,
ferencd@0	539 TiXmlEncoding encoding )
ferencd@0	540 {
ferencd@0	541 assert( p );
ferencd@0	542 assert( tag );
ferencd@0	543 if ( !p \|\| !*p )
ferencd@0	544 {
ferencd@0	545 assert( 0 );
ferencd@0	546 return false;
ferencd@0	547 }
ferencd@0	548
ferencd@0	549 const char* q = p;
ferencd@0	550
ferencd@0	551 if ( ignoreCase )
ferencd@0	552 {
ferencd@0	553 while ( q && tag && ToLower( q, encoding ) == ToLower( tag, encoding ) )
ferencd@0	554 {
ferencd@0	555 ++q;
ferencd@0	556 ++tag;
ferencd@0	557 }
ferencd@0	558
ferencd@0	559 if ( *tag == 0 )
ferencd@0	560 return true;
ferencd@0	561 }
ferencd@0	562 else
ferencd@0	563 {
ferencd@0	564 while ( q && tag && q == tag )
ferencd@0	565 {
ferencd@0	566 ++q;
ferencd@0	567 ++tag;
ferencd@0	568 }
ferencd@0	569
ferencd@0	570 if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
ferencd@0	571 return true;
ferencd@0	572 }
ferencd@0	573 return false;
ferencd@0	574 }
ferencd@0	575
ferencd@0	576 const char* TiXmlBase::ReadText( const char* p,
ferencd@0	577 TIXML_STRING * text,
ferencd@0	578 bool trimWhiteSpace,
ferencd@0	579 const char* endTag,
ferencd@0	580 bool caseInsensitive,
ferencd@0	581 TiXmlEncoding encoding )
ferencd@0	582 {
ferencd@0	583 *text = "";
ferencd@0	584 if ( !trimWhiteSpace // certain tags always keep whitespace
ferencd@0	585 \|\| !condenseWhiteSpace ) // if true, whitespace is always kept
ferencd@0	586 {
ferencd@0	587 // Keep all the white space.
ferencd@0	588 while ( p && *p
ferencd@0	589 && !StringEqual( p, endTag, caseInsensitive, encoding )
ferencd@0	590 )
ferencd@0	591 {
ferencd@0	592 int len;
ferencd@0	593 char cArr[4] = { 0, 0, 0, 0 };
ferencd@0	594 p = GetChar( p, cArr, &len, encoding );
ferencd@0	595 text->append( cArr, len );
ferencd@0	596 }
ferencd@0	597 }
ferencd@0	598 else
ferencd@0	599 {
ferencd@0	600 bool whitespace = false;
ferencd@0	601
ferencd@0	602 // Remove leading white space:
ferencd@0	603 p = SkipWhiteSpace( p, encoding );
ferencd@0	604 while ( p && *p
ferencd@0	605 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
ferencd@0	606 {
ferencd@0	607 if ( p == '\r' \|\| p == '\n' )
ferencd@0	608 {
ferencd@0	609 whitespace = true;
ferencd@0	610 ++p;
ferencd@0	611 }
ferencd@0	612 else if ( IsWhiteSpace( *p ) )
ferencd@0	613 {
ferencd@0	614 whitespace = true;
ferencd@0	615 ++p;
ferencd@0	616 }
ferencd@0	617 else
ferencd@0	618 {
ferencd@0	619 // If we've found whitespace, add it before the
ferencd@0	620 // new character. Any whitespace just becomes a space.
ferencd@0	621 if ( whitespace )
ferencd@0	622 {
ferencd@0	623 (*text) += ' ';
ferencd@0	624 whitespace = false;
ferencd@0	625 }
ferencd@0	626 int len;
ferencd@0	627 char cArr[4] = { 0, 0, 0, 0 };
ferencd@0	628 p = GetChar( p, cArr, &len, encoding );
ferencd@0	629 if ( len == 1 )
ferencd@0	630 (*text) += cArr[0]; // more efficient
ferencd@0	631 else
ferencd@0	632 text->append( cArr, len );
ferencd@0	633 }
ferencd@0	634 }
ferencd@0	635 }
ferencd@0	636 if ( p && *p )
ferencd@0	637 p += strlen( endTag );
ferencd@0	638 return ( p && *p ) ? p : 0;
ferencd@0	639 }
ferencd@0	640
ferencd@0	641 #ifdef TIXML_USE_STL
ferencd@0	642
ferencd@0	643 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
ferencd@0	644 {
ferencd@0	645 // The basic issue with a document is that we don't know what we're
ferencd@0	646 // streaming. Read something presumed to be a tag (and hope), then
ferencd@0	647 // identify it, and call the appropriate stream method on the tag.
ferencd@0	648 //
ferencd@0	649 // This "pre-streaming" will never read the closing ">" so the
ferencd@0	650 // sub-tag can orient itself.
ferencd@0	651
ferencd@0	652 if ( !StreamTo( in, '<', tag ) )
ferencd@0	653 {
ferencd@0	654 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0	655 return;
ferencd@0	656 }
ferencd@0	657
ferencd@0	658 while ( in->good() )
ferencd@0	659 {
ferencd@0	660 int tagIndex = (int) tag->length();
ferencd@0	661 while ( in->good() && in->peek() != '>' )
ferencd@0	662 {
ferencd@0	663 int c = in->get();
ferencd@0	664 if ( c <= 0 )
ferencd@0	665 {
ferencd@0	666 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0	667 break;
ferencd@0	668 }
ferencd@0	669 (*tag) += (char) c;
ferencd@0	670 }
ferencd@0	671
ferencd@0	672 if ( in->good() )
ferencd@0	673 {
ferencd@0	674 // We now have something we presume to be a node of
ferencd@0	675 // some sort. Identify it, and call the node to
ferencd@0	676 // continue streaming.
ferencd@0	677 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
ferencd@0	678
ferencd@0	679 if ( node )
ferencd@0	680 {
ferencd@0	681 node->StreamIn( in, tag );
ferencd@0	682 bool isElement = node->ToElement() != 0;
ferencd@0	683 delete node;
ferencd@0	684 node = 0;
ferencd@0	685
ferencd@0	686 // If this is the root element, we're done. Parsing will be
ferencd@0	687 // done by the >> operator.
ferencd@0	688 if ( isElement )
ferencd@0	689 {
ferencd@0	690 return;
ferencd@0	691 }
ferencd@0	692 }
ferencd@0	693 else
ferencd@0	694 {
ferencd@0	695 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0	696 return;
ferencd@0	697 }
ferencd@0	698 }
ferencd@0	699 }
ferencd@0	700 // We should have returned sooner.
ferencd@0	701 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0	702 }
ferencd@0	703
ferencd@0	704 #endif
ferencd@0	705
ferencd@0	706 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
ferencd@0	707 {
ferencd@0	708 ClearError();
ferencd@0	709
ferencd@0	710 // Parse away, at the document level. Since a document
ferencd@0	711 // contains nothing but other tags, most of what happens
ferencd@0	712 // here is skipping white space.
ferencd@0	713 if ( !p \|\| !*p )
ferencd@0	714 {
ferencd@0	715 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0	716 return 0;
ferencd@0	717 }
ferencd@0	718
ferencd@0	719 // Note that, for a document, this needs to come
ferencd@0	720 // before the while space skip, so that parsing
ferencd@0	721 // starts from the pointer we are given.
ferencd@0	722 location.Clear();
ferencd@0	723 if ( prevData )
ferencd@0	724 {
ferencd@0	725 location.row = prevData->cursor.row;
ferencd@0	726 location.col = prevData->cursor.col;
ferencd@0	727 }
ferencd@0	728 else
ferencd@0	729 {
ferencd@0	730 location.row = 0;
ferencd@0	731 location.col = 0;
ferencd@0	732 }
ferencd@0	733 TiXmlParsingData data( p, TabSize(), location.row, location.col );
ferencd@0	734 location = data.Cursor();
ferencd@0	735
ferencd@0	736 if ( encoding == TIXML_ENCODING_UNKNOWN )
ferencd@0	737 {
ferencd@0	738 // Check for the Microsoft UTF-8 lead bytes.
ferencd@0	739 const unsigned char* pU = (const unsigned char*)p;
ferencd@0	740 if ( (pU+0) && (pU+0) == TIXML_UTF_LEAD_0
ferencd@0	741 && (pU+1) && (pU+1) == TIXML_UTF_LEAD_1
ferencd@0	742 && (pU+2) && (pU+2) == TIXML_UTF_LEAD_2 )
ferencd@0	743 {
ferencd@0	744 encoding = TIXML_ENCODING_UTF8;
ferencd@0	745 useMicrosoftBOM = true;
ferencd@0	746 }
ferencd@0	747 }
ferencd@0	748
ferencd@0	749 p = SkipWhiteSpace( p, encoding );
ferencd@0	750 if ( !p )
ferencd@0	751 {
ferencd@0	752 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0	753 return 0;
ferencd@0	754 }
ferencd@0	755
ferencd@0	756 while ( p && *p )
ferencd@0	757 {
ferencd@0	758 TiXmlNode* node = Identify( p, encoding );
ferencd@0	759 if ( node )
ferencd@0	760 {
ferencd@0	761 p = node->Parse( p, &data, encoding );
ferencd@0	762 LinkEndChild( node );
ferencd@0	763 }
ferencd@0	764 else
ferencd@0	765 {
ferencd@0	766 break;
ferencd@0	767 }
ferencd@0	768
ferencd@0	769 // Did we get encoding info?
ferencd@0	770 if ( encoding == TIXML_ENCODING_UNKNOWN
ferencd@0	771 && node->ToDeclaration() )
ferencd@0	772 {
ferencd@0	773 TiXmlDeclaration* dec = node->ToDeclaration();
ferencd@0	774 const char* enc = dec->Encoding();
ferencd@0	775 assert( enc );
ferencd@0	776
ferencd@0	777 if ( *enc == 0 )
ferencd@0	778 encoding = TIXML_ENCODING_UTF8;
ferencd@0	779 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
ferencd@0	780 encoding = TIXML_ENCODING_UTF8;
ferencd@0	781 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
ferencd@0	782 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
ferencd@0	783 else
ferencd@0	784 encoding = TIXML_ENCODING_LEGACY;
ferencd@0	785 }
ferencd@0	786
ferencd@0	787 p = SkipWhiteSpace( p, encoding );
ferencd@0	788 }
ferencd@0	789
ferencd@0	790 // Was this empty?
ferencd@0	791 if ( !firstChild ) {
ferencd@0	792 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
ferencd@0	793 return 0;
ferencd@0	794 }
ferencd@0	795
ferencd@0	796 // All is well.
ferencd@0	797 return p;
ferencd@0	798 }
ferencd@0	799
ferencd@0	800 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
ferencd@0	801 {
ferencd@0	802 // The first error in a chain is more accurate - don't set again!
ferencd@0	803 if ( error )
ferencd@0	804 return;
ferencd@0	805
ferencd@0	806 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
ferencd@0	807 error = true;
ferencd@0	808 errorId = err;
ferencd@0	809 errorDesc = errorString[ errorId ];
ferencd@0	810
ferencd@0	811 errorLocation.Clear();
ferencd@0	812 if ( pError && data )
ferencd@0	813 {
ferencd@0	814 data->Stamp( pError, encoding );
ferencd@0	815 errorLocation = data->Cursor();
ferencd@0	816 }
ferencd@0	817 }
ferencd@0	818
ferencd@0	819
ferencd@0	820 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
ferencd@0	821 {
ferencd@0	822 TiXmlNode* returnNode = 0;
ferencd@0	823
ferencd@0	824 p = SkipWhiteSpace( p, encoding );
ferencd@0	825 if( !p \|\| !p \|\| p != '<' )
ferencd@0	826 {
ferencd@0	827 return 0;
ferencd@0	828 }
ferencd@0	829
ferencd@0	830 p = SkipWhiteSpace( p, encoding );
ferencd@0	831
ferencd@0	832 if ( !p \|\| !*p )
ferencd@0	833 {
ferencd@0	834 return 0;
ferencd@0	835 }
ferencd@0	836
ferencd@0	837 // What is this thing?
ferencd@0	838 // - Elements start with a letter or underscore, but xml is reserved.
ferencd@0	839 // - Comments: <!--
ferencd@0	840 // - Decleration: <?xml
ferencd@0	841 // - Everthing else is unknown to tinyxml.
ferencd@0	842 //
ferencd@0	843
ferencd@0	844 const char* xmlHeader = { "<?xml" };
ferencd@0	845 const char* commentHeader = { "<!--" };
ferencd@0	846 const char* dtdHeader = { "<!" };
ferencd@0	847 const char* cdataHeader = { "<![CDATA[" };
ferencd@0	848
ferencd@0	849 if ( StringEqual( p, xmlHeader, true, encoding ) )
ferencd@0	850 {
ferencd@0	851 #ifdef DEBUG_PARSER
ferencd@0	852 TIXML_LOG( "XML parsing Declaration\n" );
ferencd@0	853 #endif
ferencd@0	854 returnNode = new TiXmlDeclaration();
ferencd@0	855 }
ferencd@0	856 else if ( StringEqual( p, commentHeader, false, encoding ) )
ferencd@0	857 {
ferencd@0	858 #ifdef DEBUG_PARSER
ferencd@0	859 TIXML_LOG( "XML parsing Comment\n" );
ferencd@0	860 #endif
ferencd@0	861 returnNode = new TiXmlComment();
ferencd@0	862 }
ferencd@0	863 else if ( StringEqual( p, cdataHeader, false, encoding ) )
ferencd@0	864 {
ferencd@0	865 #ifdef DEBUG_PARSER
ferencd@0	866 TIXML_LOG( "XML parsing CDATA\n" );
ferencd@0	867 #endif
ferencd@0	868 TiXmlText* text = new TiXmlText( "" );
ferencd@0	869 text->SetCDATA( true );
ferencd@0	870 returnNode = text;
ferencd@0	871 }
ferencd@0	872 else if ( StringEqual( p, dtdHeader, false, encoding ) )
ferencd@0	873 {
ferencd@0	874 #ifdef DEBUG_PARSER
ferencd@0	875 TIXML_LOG( "XML parsing Unknown(1)\n" );
ferencd@0	876 #endif
ferencd@0	877 returnNode = new TiXmlUnknown();
ferencd@0	878 }
ferencd@0	879 else if ( IsAlpha( *(p+1), encoding )
ferencd@0	880 \|\| *(p+1) == '_' )
ferencd@0	881 {
ferencd@0	882 #ifdef DEBUG_PARSER
ferencd@0	883 TIXML_LOG( "XML parsing Element\n" );
ferencd@0	884 #endif
ferencd@0	885 returnNode = new TiXmlElement( "" );
ferencd@0	886 }
ferencd@0	887 else
ferencd@0	888 {
ferencd@0	889 #ifdef DEBUG_PARSER
ferencd@0	890 TIXML_LOG( "XML parsing Unknown(2)\n" );
ferencd@0	891 #endif
ferencd@0	892 returnNode = new TiXmlUnknown();
ferencd@0	893 }
ferencd@0	894
ferencd@0	895 if ( returnNode )
ferencd@0	896 {
ferencd@0	897 // Set the parent, so it can report errors
ferencd@0	898 returnNode->parent = this;
ferencd@0	899 }
ferencd@0	900 return returnNode;
ferencd@0	901 }
ferencd@0	902
ferencd@0	903 #ifdef TIXML_USE_STL
ferencd@0	904
ferencd@0	905 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
ferencd@0	906 {
ferencd@0	907 // We're called with some amount of pre-parsing. That is, some of "this"
ferencd@0	908 // element is in "tag". Go ahead and stream to the closing ">"
ferencd@0	909 while( in->good() )
ferencd@0	910 {
ferencd@0	911 int c = in->get();
ferencd@0	912 if ( c <= 0 )
ferencd@0	913 {
ferencd@0	914 TiXmlDocument* document = GetDocument();
ferencd@0	915 if ( document )
ferencd@0	916 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0	917 return;
ferencd@0	918 }
ferencd@0	919 (*tag) += (char) c ;
ferencd@0	920
ferencd@0	921 if ( c == '>' )
ferencd@0	922 break;
ferencd@0	923 }
ferencd@0	924
ferencd@0	925 if ( tag->length() < 3 ) return;
ferencd@0	926
ferencd@0	927 // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
ferencd@0	928 // If not, identify and stream.
ferencd@0	929
ferencd@0	930 if ( tag->at( tag->length() - 1 ) == '>'
ferencd@0	931 && tag->at( tag->length() - 2 ) == '/' )
ferencd@0	932 {
ferencd@0	933 // All good!
ferencd@0	934 return;
ferencd@0	935 }
ferencd@0	936 else if ( tag->at( tag->length() - 1 ) == '>' )
ferencd@0	937 {
ferencd@0	938 // There is more. Could be:
ferencd@0	939 // text
ferencd@0	940 // cdata text (which looks like another node)
ferencd@0	941 // closing tag
ferencd@0	942 // another node.
ferencd@0	943 for ( ;; )
ferencd@0	944 {
ferencd@0	945 StreamWhiteSpace( in, tag );
ferencd@0	946
ferencd@0	947 // Do we have text?
ferencd@0	948 if ( in->good() && in->peek() != '<' )
ferencd@0	949 {
ferencd@0	950 // Yep, text.
ferencd@0	951 TiXmlText text( "" );
ferencd@0	952 text.StreamIn( in, tag );
ferencd@0	953
ferencd@0	954 // What follows text is a closing tag or another node.
ferencd@0	955 // Go around again and figure it out.
ferencd@0	956 continue;
ferencd@0	957 }
ferencd@0	958
ferencd@0	959 // We now have either a closing tag...or another node.
ferencd@0	960 // We should be at a "<", regardless.
ferencd@0	961 if ( !in->good() ) return;
ferencd@0	962 assert( in->peek() == '<' );
ferencd@0	963 int tagIndex = (int) tag->length();
ferencd@0	964
ferencd@0	965 bool closingTag = false;
ferencd@0	966 bool firstCharFound = false;
ferencd@0	967
ferencd@0	968 for( ;; )
ferencd@0	969 {
ferencd@0	970 if ( !in->good() )
ferencd@0	971 return;
ferencd@0	972
ferencd@0	973 int c = in->peek();
ferencd@0	974 if ( c <= 0 )
ferencd@0	975 {
ferencd@0	976 TiXmlDocument* document = GetDocument();
ferencd@0	977 if ( document )
ferencd@0	978 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0	979 return;
ferencd@0	980 }
ferencd@0	981
ferencd@0	982 if ( c == '>' )
ferencd@0	983 break;
ferencd@0	984
ferencd@0	985 *tag += (char) c;
ferencd@0	986 in->get();
ferencd@0	987
ferencd@0	988 // Early out if we find the CDATA id.
ferencd@0	989 if ( c == '[' && tag->size() >= 9 )
ferencd@0	990 {
ferencd@0	991 size_t len = tag->size();
ferencd@0	992 const char* start = tag->c_str() + len - 9;
ferencd@0	993 if ( strcmp( start, "<![CDATA[" ) == 0 ) {
ferencd@0	994 assert( !closingTag );
ferencd@0	995 break;
ferencd@0	996 }
ferencd@0	997 }
ferencd@0	998
ferencd@0	999 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
ferencd@0	1000 {
ferencd@0	1001 firstCharFound = true;
ferencd@0	1002 if ( c == '/' )
ferencd@0	1003 closingTag = true;
ferencd@0	1004 }
ferencd@0	1005 }
ferencd@0	1006 // If it was a closing tag, then read in the closing '>' to clean up the input stream.
ferencd@0	1007 // If it was not, the streaming will be done by the tag.
ferencd@0	1008 if ( closingTag )
ferencd@0	1009 {
ferencd@0	1010 if ( !in->good() )
ferencd@0	1011 return;
ferencd@0	1012
ferencd@0	1013 int c = in->get();
ferencd@0	1014 if ( c <= 0 )
ferencd@0	1015 {
ferencd@0	1016 TiXmlDocument* document = GetDocument();
ferencd@0	1017 if ( document )
ferencd@0	1018 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0	1019 return;
ferencd@0	1020 }
ferencd@0	1021 assert( c == '>' );
ferencd@0	1022 *tag += (char) c;
ferencd@0	1023
ferencd@0	1024 // We are done, once we've found our closing tag.
ferencd@0	1025 return;
ferencd@0	1026 }
ferencd@0	1027 else
ferencd@0	1028 {
ferencd@0	1029 // If not a closing tag, id it, and stream.
ferencd@0	1030 const char* tagloc = tag->c_str() + tagIndex;
ferencd@0	1031 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
ferencd@0	1032 if ( !node )
ferencd@0	1033 return;
ferencd@0	1034 node->StreamIn( in, tag );
ferencd@0	1035 delete node;
ferencd@0	1036 node = 0;
ferencd@0	1037
ferencd@0	1038 // No return: go around from the beginning: text, closing tag, or node.
ferencd@0	1039 }
ferencd@0	1040 }
ferencd@0	1041 }
ferencd@0	1042 }
ferencd@0	1043 #endif
ferencd@0	1044
ferencd@0	1045 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
ferencd@0	1046 {
ferencd@0	1047 p = SkipWhiteSpace( p, encoding );
ferencd@0	1048 TiXmlDocument* document = GetDocument();
ferencd@0	1049
ferencd@0	1050 if ( !p \|\| !*p )
ferencd@0	1051 {
ferencd@0	1052 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
ferencd@0	1053 return 0;
ferencd@0	1054 }
ferencd@0	1055
ferencd@0	1056 if ( data )
ferencd@0	1057 {
ferencd@0	1058 data->Stamp( p, encoding );
ferencd@0	1059 location = data->Cursor();
ferencd@0	1060 }
ferencd@0	1061
ferencd@0	1062 if ( *p != '<' )
ferencd@0	1063 {
ferencd@0	1064 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
ferencd@0	1065 return 0;
ferencd@0	1066 }
ferencd@0	1067
ferencd@0	1068 p = SkipWhiteSpace( p+1, encoding );
ferencd@0	1069
ferencd@0	1070 // Read the name.
ferencd@0	1071 const char* pErr = p;
ferencd@0	1072
ferencd@0	1073 p = ReadName( p, &value, encoding );
ferencd@0	1074 if ( !p \|\| !*p )
ferencd@0	1075 {
ferencd@0	1076 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
ferencd@0	1077 return 0;
ferencd@0	1078 }
ferencd@0	1079
ferencd@0	1080 TIXML_STRING endTag ("</");
ferencd@0	1081 endTag += value;
ferencd@0	1082
ferencd@0	1083 // Check for and read attributes. Also look for an empty
ferencd@0	1084 // tag or an end tag.
ferencd@0	1085 while ( p && *p )
ferencd@0	1086 {
ferencd@0	1087 pErr = p;
ferencd@0	1088 p = SkipWhiteSpace( p, encoding );
ferencd@0	1089 if ( !p \|\| !*p )
ferencd@0	1090 {
ferencd@0	1091 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
ferencd@0	1092 return 0;
ferencd@0	1093 }
ferencd@0	1094 if ( *p == '/' )
ferencd@0	1095 {
ferencd@0	1096 ++p;
ferencd@0	1097 // Empty tag.
ferencd@0	1098 if ( *p != '>' )
ferencd@0	1099 {
ferencd@0	1100 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
ferencd@0	1101 return 0;
ferencd@0	1102 }
ferencd@0	1103 return (p+1);
ferencd@0	1104 }
ferencd@0	1105 else if ( *p == '>' )
ferencd@0	1106 {
ferencd@0	1107 // Done with attributes (if there were any.)
ferencd@0	1108 // Read the value -- which can include other
ferencd@0	1109 // elements -- read the end tag, and return.
ferencd@0	1110 ++p;
ferencd@0	1111 p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
ferencd@0	1112 if ( !p \|\| !*p ) {
ferencd@0	1113 // We were looking for the end tag, but found nothing.
ferencd@0	1114 // Fix for [ 1663758 ] Failure to report error on bad XML
ferencd@0	1115 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
ferencd@0	1116 return 0;
ferencd@0	1117 }
ferencd@0	1118
ferencd@0	1119 // We should find the end tag now
ferencd@0	1120 // note that:
ferencd@0	1121 // </foo > and
ferencd@0	1122 // </foo>
ferencd@0	1123 // are both valid end tags.
ferencd@0	1124 if ( StringEqual( p, endTag.c_str(), false, encoding ) )
ferencd@0	1125 {
ferencd@0	1126 p += endTag.length();
ferencd@0	1127 p = SkipWhiteSpace( p, encoding );
ferencd@0	1128 if ( p && p && p == '>' ) {
ferencd@0	1129 ++p;
ferencd@0	1130 return p;
ferencd@0	1131 }
ferencd@0	1132 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
ferencd@0	1133 return 0;
ferencd@0	1134 }
ferencd@0	1135 else
ferencd@0	1136 {
ferencd@0	1137 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
ferencd@0	1138 return 0;
ferencd@0	1139 }
ferencd@0	1140 }
ferencd@0	1141 else
ferencd@0	1142 {
ferencd@0	1143 // Try to read an attribute:
ferencd@0	1144 TiXmlAttribute* attrib = new TiXmlAttribute();
ferencd@0	1145 if ( !attrib )
ferencd@0	1146 {
ferencd@0	1147 return 0;
ferencd@0	1148 }
ferencd@0	1149
ferencd@0	1150 attrib->SetDocument( document );
ferencd@0	1151 pErr = p;
ferencd@0	1152 p = attrib->Parse( p, data, encoding );
ferencd@0	1153
ferencd@0	1154 if ( !p \|\| !*p )
ferencd@0	1155 {
ferencd@0	1156 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
ferencd@0	1157 delete attrib;
ferencd@0	1158 return 0;
ferencd@0	1159 }
ferencd@0	1160
ferencd@0	1161 // Handle the strange case of double attributes:
ferencd@0	1162 #ifdef TIXML_USE_STL
ferencd@0	1163 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
ferencd@0	1164 #else
ferencd@0	1165 TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
ferencd@0	1166 #endif
ferencd@0	1167 if ( node )
ferencd@0	1168 {
ferencd@0	1169 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
ferencd@0	1170 delete attrib;
ferencd@0	1171 return 0;
ferencd@0	1172 }
ferencd@0	1173
ferencd@0	1174 attributeSet.Add( attrib );
ferencd@0	1175 }
ferencd@0	1176 }
ferencd@0	1177 return p;
ferencd@0	1178 }
ferencd@0	1179
ferencd@0	1180
ferencd@0	1181 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
ferencd@0	1182 {
ferencd@0	1183 TiXmlDocument* document = GetDocument();
ferencd@0	1184
ferencd@0	1185 // Read in text and elements in any order.
ferencd@0	1186 const char* pWithWhiteSpace = p;
ferencd@0	1187 p = SkipWhiteSpace( p, encoding );
ferencd@0	1188
ferencd@0	1189 while ( p && *p )
ferencd@0	1190 {
ferencd@0	1191 if ( *p != '<' )
ferencd@0	1192 {
ferencd@0	1193 // Take what we have, make a text element.
ferencd@0	1194 TiXmlText* textNode = new TiXmlText( "" );
ferencd@0	1195
ferencd@0	1196 if ( !textNode )
ferencd@0	1197 {
ferencd@0	1198 return 0;
ferencd@0	1199 }
ferencd@0	1200
ferencd@0	1201 if ( TiXmlBase::IsWhiteSpaceCondensed() )
ferencd@0	1202 {
ferencd@0	1203 p = textNode->Parse( p, data, encoding );
ferencd@0	1204 }
ferencd@0	1205 else
ferencd@0	1206 {
ferencd@0	1207 // Special case: we want to keep the white space
ferencd@0	1208 // so that leading spaces aren't removed.
ferencd@0	1209 p = textNode->Parse( pWithWhiteSpace, data, encoding );
ferencd@0	1210 }
ferencd@0	1211
ferencd@0	1212 if ( !textNode->Blank() )
ferencd@0	1213 LinkEndChild( textNode );
ferencd@0	1214 else
ferencd@0	1215 delete textNode;
ferencd@0	1216 }
ferencd@0	1217 else
ferencd@0	1218 {
ferencd@0	1219 // We hit a '<'
ferencd@0	1220 // Have we hit a new element or an end tag? This could also be
ferencd@0	1221 // a TiXmlText in the "CDATA" style.
ferencd@0	1222 if ( StringEqual( p, "</", false, encoding ) )
ferencd@0	1223 {
ferencd@0	1224 return p;
ferencd@0	1225 }
ferencd@0	1226 else
ferencd@0	1227 {
ferencd@0	1228 TiXmlNode* node = Identify( p, encoding );
ferencd@0	1229 if ( node )
ferencd@0	1230 {
ferencd@0	1231 p = node->Parse( p, data, encoding );
ferencd@0	1232 LinkEndChild( node );
ferencd@0	1233 }
ferencd@0	1234 else
ferencd@0	1235 {
ferencd@0	1236 return 0;
ferencd@0	1237 }
ferencd@0	1238 }
ferencd@0	1239 }
ferencd@0	1240 pWithWhiteSpace = p;
ferencd@0	1241 p = SkipWhiteSpace( p, encoding );
ferencd@0	1242 }
ferencd@0	1243
ferencd@0	1244 if ( !p )
ferencd@0	1245 {
ferencd@0	1246 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
ferencd@0	1247 }
ferencd@0	1248 return p;
ferencd@0	1249 }
ferencd@0	1250
ferencd@0	1251
ferencd@0	1252 #ifdef TIXML_USE_STL
ferencd@0	1253 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
ferencd@0	1254 {
ferencd@0	1255 while ( in->good() )
ferencd@0	1256 {
ferencd@0	1257 int c = in->get();
ferencd@0	1258 if ( c <= 0 )
ferencd@0	1259 {
ferencd@0	1260 TiXmlDocument* document = GetDocument();
ferencd@0	1261 if ( document )
ferencd@0	1262 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0	1263 return;
ferencd@0	1264 }
ferencd@0	1265 (*tag) += (char) c;
ferencd@0	1266
ferencd@0	1267 if ( c == '>' )
ferencd@0	1268 {
ferencd@0	1269 // All is well.
ferencd@0	1270 return;
ferencd@0	1271 }
ferencd@0	1272 }
ferencd@0	1273 }
ferencd@0	1274 #endif
ferencd@0	1275
ferencd@0	1276
ferencd@0	1277 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
ferencd@0	1278 {
ferencd@0	1279 TiXmlDocument* document = GetDocument();
ferencd@0	1280 p = SkipWhiteSpace( p, encoding );
ferencd@0	1281
ferencd@0	1282 if ( data )
ferencd@0	1283 {
ferencd@0	1284 data->Stamp( p, encoding );
ferencd@0	1285 location = data->Cursor();
ferencd@0	1286 }
ferencd@0	1287 if ( !p \|\| !p \|\| p != '<' )
ferencd@0	1288 {
ferencd@0	1289 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
ferencd@0	1290 return 0;
ferencd@0	1291 }
ferencd@0	1292 ++p;
ferencd@0	1293 value = "";
ferencd@0	1294
ferencd@0	1295 while ( p && p && p != '>' )
ferencd@0	1296 {
ferencd@0	1297 value += *p;
ferencd@0	1298 ++p;
ferencd@0	1299 }
ferencd@0	1300
ferencd@0	1301 if ( !p )
ferencd@0	1302 {
ferencd@0	1303 if ( document )
ferencd@0	1304 document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
ferencd@0	1305 }
ferencd@0	1306 if ( p && *p == '>' )
ferencd@0	1307 return p+1;
ferencd@0	1308 return p;
ferencd@0	1309 }
ferencd@0	1310
ferencd@0	1311 #ifdef TIXML_USE_STL
ferencd@0	1312 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
ferencd@0	1313 {
ferencd@0	1314 while ( in->good() )
ferencd@0	1315 {
ferencd@0	1316 int c = in->get();
ferencd@0	1317 if ( c <= 0 )
ferencd@0	1318 {
ferencd@0	1319 TiXmlDocument* document = GetDocument();
ferencd@0	1320 if ( document )
ferencd@0	1321 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0	1322 return;
ferencd@0	1323 }
ferencd@0	1324
ferencd@0	1325 (*tag) += (char) c;
ferencd@0	1326
ferencd@0	1327 if ( c == '>'
ferencd@0	1328 && tag->at( tag->length() - 2 ) == '-'
ferencd@0	1329 && tag->at( tag->length() - 3 ) == '-' )
ferencd@0	1330 {
ferencd@0	1331 // All is well.
ferencd@0	1332 return;
ferencd@0	1333 }
ferencd@0	1334 }
ferencd@0	1335 }
ferencd@0	1336 #endif
ferencd@0	1337
ferencd@0	1338
ferencd@0	1339 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
ferencd@0	1340 {
ferencd@0	1341 TiXmlDocument* document = GetDocument();
ferencd@0	1342 value = "";
ferencd@0	1343
ferencd@0	1344 p = SkipWhiteSpace( p, encoding );
ferencd@0	1345
ferencd@0	1346 if ( data )
ferencd@0	1347 {
ferencd@0	1348 data->Stamp( p, encoding );
ferencd@0	1349 location = data->Cursor();
ferencd@0	1350 }
ferencd@0	1351 const char* startTag = "<!--";
ferencd@0	1352 const char* endTag = "-->";
ferencd@0	1353
ferencd@0	1354 if ( !StringEqual( p, startTag, false, encoding ) )
ferencd@0	1355 {
ferencd@0	1356 if ( document )
ferencd@0	1357 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
ferencd@0	1358 return 0;
ferencd@0	1359 }
ferencd@0	1360 p += strlen( startTag );
ferencd@0	1361
ferencd@0	1362 // [ 1475201 ] TinyXML parses entities in comments
ferencd@0	1363 // Oops - ReadText doesn't work, because we don't want to parse the entities.
ferencd@0	1364 // p = ReadText( p, &value, false, endTag, false, encoding );
ferencd@0	1365 //
ferencd@0	1366 // from the XML spec:
ferencd@0	1367 /*
ferencd@0	1368 [Definition: Comments may appear anywhere in a document outside other markup; in addition,
ferencd@0	1369 they may appear within the document type declaration at places allowed by the grammar.
ferencd@0	1370 They are not part of the document's character data; an XML processor MAY, but need not,
ferencd@0	1371 make it possible for an application to retrieve the text of comments. For compatibility,
ferencd@0	1372 the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
ferencd@0	1373 references MUST NOT be recognized within comments.
ferencd@0	1374
ferencd@0	1375 An example of a comment:
ferencd@0	1376
ferencd@0	1377 <!-- declarations for <head> & <body> -->
ferencd@0	1378 */
ferencd@0	1379
ferencd@0	1380 value = "";
ferencd@0	1381 // Keep all the white space.
ferencd@0	1382 while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
ferencd@0	1383 {
ferencd@0	1384 value.append( p, 1 );
ferencd@0	1385 ++p;
ferencd@0	1386 }
ferencd@0	1387 if ( p && *p )
ferencd@0	1388 p += strlen( endTag );
ferencd@0	1389
ferencd@0	1390 return p;
ferencd@0	1391 }
ferencd@0	1392
ferencd@0	1393
ferencd@0	1394 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
ferencd@0	1395 {
ferencd@0	1396 p = SkipWhiteSpace( p, encoding );
ferencd@0	1397 if ( !p \|\| !*p ) return 0;
ferencd@0	1398
ferencd@0	1399 if ( data )
ferencd@0	1400 {
ferencd@0	1401 data->Stamp( p, encoding );
ferencd@0	1402 location = data->Cursor();
ferencd@0	1403 }
ferencd@0	1404 // Read the name, the '=' and the value.
ferencd@0	1405 const char* pErr = p;
ferencd@0	1406 p = ReadName( p, &name, encoding );
ferencd@0	1407 if ( !p \|\| !*p )
ferencd@0	1408 {
ferencd@0	1409 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
ferencd@0	1410 return 0;
ferencd@0	1411 }
ferencd@0	1412 p = SkipWhiteSpace( p, encoding );
ferencd@0	1413 if ( !p \|\| !p \|\| p != '=' )
ferencd@0	1414 {
ferencd@0	1415 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
ferencd@0	1416 return 0;
ferencd@0	1417 }
ferencd@0	1418
ferencd@0	1419 ++p; // skip '='
ferencd@0	1420 p = SkipWhiteSpace( p, encoding );
ferencd@0	1421 if ( !p \|\| !*p )
ferencd@0	1422 {
ferencd@0	1423 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
ferencd@0	1424 return 0;
ferencd@0	1425 }
ferencd@0	1426
ferencd@0	1427 const char* end;
ferencd@0	1428 const char SINGLE_QUOTE = '\'';
ferencd@0	1429 const char DOUBLE_QUOTE = '\"';
ferencd@0	1430
ferencd@0	1431 if ( *p == SINGLE_QUOTE )
ferencd@0	1432 {
ferencd@0	1433 ++p;
ferencd@0	1434 end = "\'"; // single quote in string
ferencd@0	1435 p = ReadText( p, &value, false, end, false, encoding );
ferencd@0	1436 }
ferencd@0	1437 else if ( *p == DOUBLE_QUOTE )
ferencd@0	1438 {
ferencd@0	1439 ++p;
ferencd@0	1440 end = "\""; // double quote in string
ferencd@0	1441 p = ReadText( p, &value, false, end, false, encoding );
ferencd@0	1442 }
ferencd@0	1443 else
ferencd@0	1444 {
ferencd@0	1445 // All attribute values should be in single or double quotes.
ferencd@0	1446 // But this is such a common error that the parser will try
ferencd@0	1447 // its best, even without them.
ferencd@0	1448 value = "";
ferencd@0	1449 while ( p && *p // existence
ferencd@0	1450 && !IsWhiteSpace( *p ) // whitespace
ferencd@0	1451 && p != '/' && p != '>' ) // tag end
ferencd@0	1452 {
ferencd@0	1453 if ( p == SINGLE_QUOTE \|\| p == DOUBLE_QUOTE ) {
ferencd@0	1454 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
ferencd@0	1455 // We did not have an opening quote but seem to have a
ferencd@0	1456 // closing one. Give up and throw an error.
ferencd@0	1457 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
ferencd@0	1458 return 0;
ferencd@0	1459 }
ferencd@0	1460 value += *p;
ferencd@0	1461 ++p;
ferencd@0	1462 }
ferencd@0	1463 }
ferencd@0	1464 return p;
ferencd@0	1465 }
ferencd@0	1466
ferencd@0	1467 #ifdef TIXML_USE_STL
ferencd@0	1468 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
ferencd@0	1469 {
ferencd@0	1470 while ( in->good() )
ferencd@0	1471 {
ferencd@0	1472 int c = in->peek();
ferencd@0	1473 if ( !cdata && (c == '<' ) )
ferencd@0	1474 {
ferencd@0	1475 return;
ferencd@0	1476 }
ferencd@0	1477 if ( c <= 0 )
ferencd@0	1478 {
ferencd@0	1479 TiXmlDocument* document = GetDocument();
ferencd@0	1480 if ( document )
ferencd@0	1481 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0	1482 return;
ferencd@0	1483 }
ferencd@0	1484
ferencd@0	1485 (*tag) += (char) c;
ferencd@0	1486 in->get(); // "commits" the peek made above
ferencd@0	1487
ferencd@0	1488 if ( cdata && c == '>' && tag->size() >= 3 ) {
ferencd@0	1489 size_t len = tag->size();
ferencd@0	1490 if ( (tag)[len-2] == ']' && (tag)[len-3] == ']' ) {
ferencd@0	1491 // terminator of cdata.
ferencd@0	1492 return;
ferencd@0	1493 }
ferencd@0	1494 }
ferencd@0	1495 }
ferencd@0	1496 }
ferencd@0	1497 #endif
ferencd@0	1498
ferencd@0	1499 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
ferencd@0	1500 {
ferencd@0	1501 value = "";
ferencd@0	1502 TiXmlDocument* document = GetDocument();
ferencd@0	1503
ferencd@0	1504 if ( data )
ferencd@0	1505 {
ferencd@0	1506 data->Stamp( p, encoding );
ferencd@0	1507 location = data->Cursor();
ferencd@0	1508 }
ferencd@0	1509
ferencd@0	1510 const char* const startTag = "<![CDATA[";
ferencd@0	1511 const char* const endTag = "]]>";
ferencd@0	1512
ferencd@0	1513 if ( cdata \|\| StringEqual( p, startTag, false, encoding ) )
ferencd@0	1514 {
ferencd@0	1515 cdata = true;
ferencd@0	1516
ferencd@0	1517 if ( !StringEqual( p, startTag, false, encoding ) )
ferencd@0	1518 {
ferencd@0	1519 if ( document )
ferencd@0	1520 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
ferencd@0	1521 return 0;
ferencd@0	1522 }
ferencd@0	1523 p += strlen( startTag );
ferencd@0	1524
ferencd@0	1525 // Keep all the white space, ignore the encoding, etc.
ferencd@0	1526 while ( p && *p
ferencd@0	1527 && !StringEqual( p, endTag, false, encoding )
ferencd@0	1528 )
ferencd@0	1529 {
ferencd@0	1530 value += *p;
ferencd@0	1531 ++p;
ferencd@0	1532 }
ferencd@0	1533
ferencd@0	1534 TIXML_STRING dummy;
ferencd@0	1535 p = ReadText( p, &dummy, false, endTag, false, encoding );
ferencd@0	1536 return p;
ferencd@0	1537 }
ferencd@0	1538 else
ferencd@0	1539 {
ferencd@0	1540 bool ignoreWhite = true;
ferencd@0	1541
ferencd@0	1542 const char* end = "<";
ferencd@0	1543 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
ferencd@0	1544 if ( p && *p )
ferencd@0	1545 return p-1; // don't truncate the '<'
ferencd@0	1546 return 0;
ferencd@0	1547 }
ferencd@0	1548 }
ferencd@0	1549
ferencd@0	1550 #ifdef TIXML_USE_STL
ferencd@0	1551 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
ferencd@0	1552 {
ferencd@0	1553 while ( in->good() )
ferencd@0	1554 {
ferencd@0	1555 int c = in->get();
ferencd@0	1556 if ( c <= 0 )
ferencd@0	1557 {
ferencd@0	1558 TiXmlDocument* document = GetDocument();
ferencd@0	1559 if ( document )
ferencd@0	1560 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0	1561 return;
ferencd@0	1562 }
ferencd@0	1563 (*tag) += (char) c;
ferencd@0	1564
ferencd@0	1565 if ( c == '>' )
ferencd@0	1566 {
ferencd@0	1567 // All is well.
ferencd@0	1568 return;
ferencd@0	1569 }
ferencd@0	1570 }
ferencd@0	1571 }
ferencd@0	1572 #endif
ferencd@0	1573
ferencd@0	1574 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
ferencd@0	1575 {
ferencd@0	1576 p = SkipWhiteSpace( p, _encoding );
ferencd@0	1577 // Find the beginning, find the end, and look for
ferencd@0	1578 // the stuff in-between.
ferencd@0	1579 TiXmlDocument* document = GetDocument();
ferencd@0	1580 if ( !p \|\| !*p \|\| !StringEqual( p, "<?xml", true, _encoding ) )
ferencd@0	1581 {
ferencd@0	1582 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
ferencd@0	1583 return 0;
ferencd@0	1584 }
ferencd@0	1585 if ( data )
ferencd@0	1586 {
ferencd@0	1587 data->Stamp( p, _encoding );
ferencd@0	1588 location = data->Cursor();
ferencd@0	1589 }
ferencd@0	1590 p += 5;
ferencd@0	1591
ferencd@0	1592 version = "";
ferencd@0	1593 encoding = "";
ferencd@0	1594 standalone = "";
ferencd@0	1595
ferencd@0	1596 while ( p && *p )
ferencd@0	1597 {
ferencd@0	1598 if ( *p == '>' )
ferencd@0	1599 {
ferencd@0	1600 ++p;
ferencd@0	1601 return p;
ferencd@0	1602 }
ferencd@0	1603
ferencd@0	1604 p = SkipWhiteSpace( p, _encoding );
ferencd@0	1605 if ( StringEqual( p, "version", true, _encoding ) )
ferencd@0	1606 {
ferencd@0	1607 TiXmlAttribute attrib;
ferencd@0	1608 p = attrib.Parse( p, data, _encoding );
ferencd@0	1609 version = attrib.Value();
ferencd@0	1610 }
ferencd@0	1611 else if ( StringEqual( p, "encoding", true, _encoding ) )
ferencd@0	1612 {
ferencd@0	1613 TiXmlAttribute attrib;
ferencd@0	1614 p = attrib.Parse( p, data, _encoding );
ferencd@0	1615 encoding = attrib.Value();
ferencd@0	1616 }
ferencd@0	1617 else if ( StringEqual( p, "standalone", true, _encoding ) )
ferencd@0	1618 {
ferencd@0	1619 TiXmlAttribute attrib;
ferencd@0	1620 p = attrib.Parse( p, data, _encoding );
ferencd@0	1621 standalone = attrib.Value();
ferencd@0	1622 }
ferencd@0	1623 else
ferencd@0	1624 {
ferencd@0	1625 // Read over whatever it is.
ferencd@0	1626 while( p && p && p != '>' && !IsWhiteSpace( *p ) )
ferencd@0	1627 ++p;
ferencd@0	1628 }
ferencd@0	1629 }
ferencd@0	1630 return 0;
ferencd@0	1631 }
ferencd@0	1632
ferencd@0	1633 bool TiXmlText::Blank() const
ferencd@0	1634 {
ferencd@0	1635 for ( unsigned i=0; i<value.length(); i++ )
ferencd@0	1636 if ( !IsWhiteSpace( value[i] ) )
ferencd@0	1637 return false;
ferencd@0	1638 return true;
ferencd@0	1639 }
ferencd@0	1640

Mercurial > thymian

annotate 3rdparty/tinyxml/tinyxmlparser.cpp @ 0:a4671277546c tip