annotate 3rdparty/tinyxml/tinyxmlparser.cpp @ 0:a4671277546c tip

created the repository for the thymian project
author ferencd
date Tue, 17 Aug 2021 11:19:54 +0200
parents
children
rev   line source
ferencd@0 1 /*
ferencd@0 2 www.sourceforge.net/projects/tinyxml
ferencd@0 3 Original code by Lee Thomason (www.grinninglizard.com)
ferencd@0 4
ferencd@0 5 This software is provided 'as-is', without any express or implied
ferencd@0 6 warranty. In no event will the authors be held liable for any
ferencd@0 7 damages arising from the use of this software.
ferencd@0 8
ferencd@0 9 Permission is granted to anyone to use this software for any
ferencd@0 10 purpose, including commercial applications, and to alter it and
ferencd@0 11 redistribute it freely, subject to the following restrictions:
ferencd@0 12
ferencd@0 13 1. The origin of this software must not be misrepresented; you must
ferencd@0 14 not claim that you wrote the original software. If you use this
ferencd@0 15 software in a product, an acknowledgment in the product documentation
ferencd@0 16 would be appreciated but is not required.
ferencd@0 17
ferencd@0 18 2. Altered source versions must be plainly marked as such, and
ferencd@0 19 must not be misrepresented as being the original software.
ferencd@0 20
ferencd@0 21 3. This notice may not be removed or altered from any source
ferencd@0 22 distribution.
ferencd@0 23 */
ferencd@0 24
ferencd@0 25 #include <ctype.h>
ferencd@0 26 #include <stddef.h>
ferencd@0 27
ferencd@0 28 #include "tinyxml.h"
ferencd@0 29
ferencd@0 30 //#define DEBUG_PARSER
ferencd@0 31 #if defined( DEBUG_PARSER )
ferencd@0 32 # if defined( DEBUG ) && defined( _MSC_VER )
ferencd@0 33 # include <windows.h>
ferencd@0 34 # define TIXML_LOG OutputDebugString
ferencd@0 35 # else
ferencd@0 36 # define TIXML_LOG printf
ferencd@0 37 # endif
ferencd@0 38 #endif
ferencd@0 39
ferencd@0 40 // Note tha "PutString" hardcodes the same list. This
ferencd@0 41 // is less flexible than it appears. Changing the entries
ferencd@0 42 // or order will break putstring.
ferencd@0 43 TiXmlBase::Entity TiXmlBase::entity[ TiXmlBase::NUM_ENTITY ] =
ferencd@0 44 {
ferencd@0 45 { "&amp;", 5, '&' },
ferencd@0 46 { "&lt;", 4, '<' },
ferencd@0 47 { "&gt;", 4, '>' },
ferencd@0 48 { "&quot;", 6, '\"' },
ferencd@0 49 { "&apos;", 6, '\'' }
ferencd@0 50 };
ferencd@0 51
ferencd@0 52 // Bunch of unicode info at:
ferencd@0 53 // http://www.unicode.org/faq/utf_bom.html
ferencd@0 54 // Including the basic of this table, which determines the #bytes in the
ferencd@0 55 // sequence from the lead byte. 1 placed for invalid sequences --
ferencd@0 56 // although the result will be junk, pass it through as much as possible.
ferencd@0 57 // Beware of the non-characters in UTF-8:
ferencd@0 58 // ef bb bf (Microsoft "lead bytes")
ferencd@0 59 // ef bf be
ferencd@0 60 // ef bf bf
ferencd@0 61
ferencd@0 62 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
ferencd@0 63 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
ferencd@0 64 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
ferencd@0 65
ferencd@0 66 const int TiXmlBase::utf8ByteTable[256] =
ferencd@0 67 {
ferencd@0 68 // 0 1 2 3 4 5 6 7 8 9 a b c d e f
ferencd@0 69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
ferencd@0 70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
ferencd@0 71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
ferencd@0 72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
ferencd@0 73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
ferencd@0 74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
ferencd@0 75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
ferencd@0 76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
ferencd@0 77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
ferencd@0 78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
ferencd@0 79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
ferencd@0 80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
ferencd@0 81 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
ferencd@0 82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
ferencd@0 83 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
ferencd@0 84 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
ferencd@0 85 };
ferencd@0 86
ferencd@0 87
ferencd@0 88 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
ferencd@0 89 {
ferencd@0 90 const unsigned long BYTE_MASK = 0xBF;
ferencd@0 91 const unsigned long BYTE_MARK = 0x80;
ferencd@0 92 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
ferencd@0 93
ferencd@0 94 if (input < 0x80)
ferencd@0 95 *length = 1;
ferencd@0 96 else if ( input < 0x800 )
ferencd@0 97 *length = 2;
ferencd@0 98 else if ( input < 0x10000 )
ferencd@0 99 *length = 3;
ferencd@0 100 else if ( input < 0x200000 )
ferencd@0 101 *length = 4;
ferencd@0 102 else
ferencd@0 103 { *length = 0; return; } // This code won't covert this correctly anyway.
ferencd@0 104
ferencd@0 105 output += *length;
ferencd@0 106
ferencd@0 107 // Scary scary fall throughs.
ferencd@0 108 switch (*length)
ferencd@0 109 {
ferencd@0 110 case 4:
ferencd@0 111 --output;
ferencd@0 112 *output = static_cast<char>((input | BYTE_MARK) & BYTE_MASK);
ferencd@0 113 input >>= 6;
ferencd@0 114 case 3:
ferencd@0 115 --output;
ferencd@0 116 *output = static_cast<char>((input | BYTE_MARK) & BYTE_MASK);
ferencd@0 117 input >>= 6;
ferencd@0 118 case 2:
ferencd@0 119 --output;
ferencd@0 120 *output = static_cast<char>((input | BYTE_MARK) & BYTE_MASK);
ferencd@0 121 input >>= 6;
ferencd@0 122 case 1:
ferencd@0 123 --output;
ferencd@0 124 *output = static_cast<char>(input | FIRST_BYTE_MARK[*length]);
ferencd@0 125 default:
ferencd@0 126 break;
ferencd@0 127 }
ferencd@0 128 }
ferencd@0 129
ferencd@0 130
ferencd@0 131 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
ferencd@0 132 {
ferencd@0 133 // This will only work for low-ascii, everything else is assumed to be a valid
ferencd@0 134 // letter. I'm not sure this is the best approach, but it is quite tricky trying
ferencd@0 135 // to figure out alhabetical vs. not across encoding. So take a very
ferencd@0 136 // conservative approach.
ferencd@0 137
ferencd@0 138 // if ( encoding == TIXML_ENCODING_UTF8 )
ferencd@0 139 // {
ferencd@0 140 if ( anyByte < 127 )
ferencd@0 141 return isalpha( anyByte );
ferencd@0 142 else
ferencd@0 143 return 1; // What else to do? The unicode set is huge...get the english ones right.
ferencd@0 144 // }
ferencd@0 145 // else
ferencd@0 146 // {
ferencd@0 147 // return isalpha( anyByte );
ferencd@0 148 // }
ferencd@0 149 }
ferencd@0 150
ferencd@0 151
ferencd@0 152 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
ferencd@0 153 {
ferencd@0 154 // This will only work for low-ascii, everything else is assumed to be a valid
ferencd@0 155 // letter. I'm not sure this is the best approach, but it is quite tricky trying
ferencd@0 156 // to figure out alhabetical vs. not across encoding. So take a very
ferencd@0 157 // conservative approach.
ferencd@0 158
ferencd@0 159 // if ( encoding == TIXML_ENCODING_UTF8 )
ferencd@0 160 // {
ferencd@0 161 if ( anyByte < 127 )
ferencd@0 162 return isalnum( anyByte );
ferencd@0 163 else
ferencd@0 164 return 1; // What else to do? The unicode set is huge...get the english ones right.
ferencd@0 165 // }
ferencd@0 166 // else
ferencd@0 167 // {
ferencd@0 168 // return isalnum( anyByte );
ferencd@0 169 // }
ferencd@0 170 }
ferencd@0 171
ferencd@0 172
ferencd@0 173 class TiXmlParsingData
ferencd@0 174 {
ferencd@0 175 friend class TiXmlDocument;
ferencd@0 176 public:
ferencd@0 177 void Stamp( const char* now, TiXmlEncoding encoding );
ferencd@0 178
ferencd@0 179 const TiXmlCursor& Cursor() const { return cursor; }
ferencd@0 180
ferencd@0 181 private:
ferencd@0 182 // Only used by the document!
ferencd@0 183 TiXmlParsingData( const char* start, int _tabsize, int row, int col )
ferencd@0 184 {
ferencd@0 185 assert( start );
ferencd@0 186 stamp = start;
ferencd@0 187 tabsize = _tabsize;
ferencd@0 188 cursor.row = row;
ferencd@0 189 cursor.col = col;
ferencd@0 190 }
ferencd@0 191
ferencd@0 192 TiXmlCursor cursor;
ferencd@0 193 const char* stamp;
ferencd@0 194 int tabsize;
ferencd@0 195 };
ferencd@0 196
ferencd@0 197
ferencd@0 198 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
ferencd@0 199 {
ferencd@0 200 assert( now );
ferencd@0 201
ferencd@0 202 // Do nothing if the tabsize is 0.
ferencd@0 203 if ( tabsize < 1 )
ferencd@0 204 {
ferencd@0 205 return;
ferencd@0 206 }
ferencd@0 207
ferencd@0 208 // Get the current row, column.
ferencd@0 209 int row = cursor.row;
ferencd@0 210 int col = cursor.col;
ferencd@0 211 const char* p = stamp;
ferencd@0 212 assert( p );
ferencd@0 213
ferencd@0 214 while ( p < now )
ferencd@0 215 {
ferencd@0 216 // Treat p as unsigned, so we have a happy compiler.
ferencd@0 217 const unsigned char* pU = reinterpret_cast<const unsigned char*>(p);
ferencd@0 218
ferencd@0 219 // Code contributed by Fletcher Dunn: (modified by lee)
ferencd@0 220 switch (*pU) {
ferencd@0 221 case 0:
ferencd@0 222 // We *should* never get here, but in case we do, don't
ferencd@0 223 // advance past the terminating null character, ever
ferencd@0 224 return;
ferencd@0 225
ferencd@0 226 case '\r':
ferencd@0 227 // bump down to the next line
ferencd@0 228 ++row;
ferencd@0 229 col = 0;
ferencd@0 230 // Eat the character
ferencd@0 231 ++p;
ferencd@0 232
ferencd@0 233 // Check for \r\n sequence, and treat this as a single character
ferencd@0 234 if (*p == '\n') {
ferencd@0 235 ++p;
ferencd@0 236 }
ferencd@0 237 break;
ferencd@0 238
ferencd@0 239 case '\n':
ferencd@0 240 // bump down to the next line
ferencd@0 241 ++row;
ferencd@0 242 col = 0;
ferencd@0 243
ferencd@0 244 // Eat the character
ferencd@0 245 ++p;
ferencd@0 246
ferencd@0 247 // Check for \n\r sequence, and treat this as a single
ferencd@0 248 // character. (Yes, this bizarre thing does occur still
ferencd@0 249 // on some arcane platforms...)
ferencd@0 250 if (*p == '\r') {
ferencd@0 251 ++p;
ferencd@0 252 }
ferencd@0 253 break;
ferencd@0 254
ferencd@0 255 case '\t':
ferencd@0 256 // Eat the character
ferencd@0 257 ++p;
ferencd@0 258
ferencd@0 259 // Skip to next tab stop
ferencd@0 260 col = (col / tabsize + 1) * tabsize;
ferencd@0 261 break;
ferencd@0 262
ferencd@0 263 case TIXML_UTF_LEAD_0:
ferencd@0 264 if ( encoding == TIXML_ENCODING_UTF8 )
ferencd@0 265 {
ferencd@0 266 if ( *(p+1) && *(p+2) )
ferencd@0 267 {
ferencd@0 268 // In these cases, don't advance the column. These are
ferencd@0 269 // 0-width spaces.
ferencd@0 270 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
ferencd@0 271 p += 3;
ferencd@0 272 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
ferencd@0 273 p += 3;
ferencd@0 274 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
ferencd@0 275 p += 3;
ferencd@0 276 else
ferencd@0 277 { p +=3; ++col; } // A normal character.
ferencd@0 278 }
ferencd@0 279 }
ferencd@0 280 else
ferencd@0 281 {
ferencd@0 282 ++p;
ferencd@0 283 ++col;
ferencd@0 284 }
ferencd@0 285 break;
ferencd@0 286
ferencd@0 287 default:
ferencd@0 288 if ( encoding == TIXML_ENCODING_UTF8 )
ferencd@0 289 {
ferencd@0 290 // Eat the 1 to 4 byte utf8 character.
ferencd@0 291 int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
ferencd@0 292 if ( step == 0 )
ferencd@0 293 step = 1; // Error case from bad encoding, but handle gracefully.
ferencd@0 294 p += step;
ferencd@0 295
ferencd@0 296 // Just advance one column, of course.
ferencd@0 297 ++col;
ferencd@0 298 }
ferencd@0 299 else
ferencd@0 300 {
ferencd@0 301 ++p;
ferencd@0 302 ++col;
ferencd@0 303 }
ferencd@0 304 break;
ferencd@0 305 }
ferencd@0 306 }
ferencd@0 307 cursor.row = row;
ferencd@0 308 cursor.col = col;
ferencd@0 309 assert( cursor.row >= -1 );
ferencd@0 310 assert( cursor.col >= -1 );
ferencd@0 311 stamp = p;
ferencd@0 312 assert( stamp );
ferencd@0 313 }
ferencd@0 314
ferencd@0 315
ferencd@0 316 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
ferencd@0 317 {
ferencd@0 318 if ( !p || !*p )
ferencd@0 319 {
ferencd@0 320 return 0;
ferencd@0 321 }
ferencd@0 322 if ( encoding == TIXML_ENCODING_UTF8 )
ferencd@0 323 {
ferencd@0 324 while ( *p )
ferencd@0 325 {
ferencd@0 326 const unsigned char* pU = (const unsigned char*)p;
ferencd@0 327
ferencd@0 328 // Skip the stupid Microsoft UTF-8 Byte order marks
ferencd@0 329 if ( *(pU+0)==TIXML_UTF_LEAD_0
ferencd@0 330 && *(pU+1)==TIXML_UTF_LEAD_1
ferencd@0 331 && *(pU+2)==TIXML_UTF_LEAD_2 )
ferencd@0 332 {
ferencd@0 333 p += 3;
ferencd@0 334 continue;
ferencd@0 335 }
ferencd@0 336 else if(*(pU+0)==TIXML_UTF_LEAD_0
ferencd@0 337 && *(pU+1)==0xbfU
ferencd@0 338 && *(pU+2)==0xbeU )
ferencd@0 339 {
ferencd@0 340 p += 3;
ferencd@0 341 continue;
ferencd@0 342 }
ferencd@0 343 else if(*(pU+0)==TIXML_UTF_LEAD_0
ferencd@0 344 && *(pU+1)==0xbfU
ferencd@0 345 && *(pU+2)==0xbfU )
ferencd@0 346 {
ferencd@0 347 p += 3;
ferencd@0 348 continue;
ferencd@0 349 }
ferencd@0 350
ferencd@0 351 if ( IsWhiteSpace( *p ) ) // Still using old rules for white space.
ferencd@0 352 ++p;
ferencd@0 353 else
ferencd@0 354 break;
ferencd@0 355 }
ferencd@0 356 }
ferencd@0 357 else
ferencd@0 358 {
ferencd@0 359 while ( *p && IsWhiteSpace( *p ) )
ferencd@0 360 ++p;
ferencd@0 361 }
ferencd@0 362
ferencd@0 363 return p;
ferencd@0 364 }
ferencd@0 365
ferencd@0 366 #ifdef TIXML_USE_STL
ferencd@0 367 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
ferencd@0 368 {
ferencd@0 369 for( ;; )
ferencd@0 370 {
ferencd@0 371 if ( !in->good() ) return false;
ferencd@0 372
ferencd@0 373 int c = in->peek();
ferencd@0 374 // At this scope, we can't get to a document. So fail silently.
ferencd@0 375 if ( !IsWhiteSpace( c ) || c <= 0 )
ferencd@0 376 return true;
ferencd@0 377
ferencd@0 378 *tag += (char) in->get();
ferencd@0 379 }
ferencd@0 380 }
ferencd@0 381
ferencd@0 382 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
ferencd@0 383 {
ferencd@0 384 //assert( character > 0 && character < 128 ); // else it won't work in utf-8
ferencd@0 385 while ( in->good() )
ferencd@0 386 {
ferencd@0 387 int c = in->peek();
ferencd@0 388 if ( c == character )
ferencd@0 389 return true;
ferencd@0 390 if ( c <= 0 ) // Silent failure: can't get document at this scope
ferencd@0 391 return false;
ferencd@0 392
ferencd@0 393 in->get();
ferencd@0 394 *tag += (char) c;
ferencd@0 395 }
ferencd@0 396 return false;
ferencd@0 397 }
ferencd@0 398 #endif
ferencd@0 399
ferencd@0 400 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
ferencd@0 401 // "assign" optimization removes over 10% of the execution time.
ferencd@0 402 //
ferencd@0 403 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
ferencd@0 404 {
ferencd@0 405 // Oddly, not supported on some comilers,
ferencd@0 406 //name->clear();
ferencd@0 407 // So use this:
ferencd@0 408 *name = "";
ferencd@0 409 assert( p );
ferencd@0 410
ferencd@0 411 // Names start with letters or underscores.
ferencd@0 412 // Of course, in unicode, tinyxml has no idea what a letter *is*. The
ferencd@0 413 // algorithm is generous.
ferencd@0 414 //
ferencd@0 415 // After that, they can be letters, underscores, numbers,
ferencd@0 416 // hyphens, or colons. (Colons are valid ony for namespaces,
ferencd@0 417 // but tinyxml can't tell namespaces from names.)
ferencd@0 418 if ( p && *p
ferencd@0 419 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
ferencd@0 420 {
ferencd@0 421 const char* start = p;
ferencd@0 422 while( p && *p
ferencd@0 423 && ( IsAlphaNum( (unsigned char ) *p, encoding )
ferencd@0 424 || *p == '_'
ferencd@0 425 || *p == '-'
ferencd@0 426 || *p == '.'
ferencd@0 427 || *p == ':' ) )
ferencd@0 428 {
ferencd@0 429 //(*name) += *p; // expensive
ferencd@0 430 ++p;
ferencd@0 431 }
ferencd@0 432 if ( p-start > 0 ) {
ferencd@0 433 name->assign( start, p-start );
ferencd@0 434 }
ferencd@0 435 return p;
ferencd@0 436 }
ferencd@0 437 return 0;
ferencd@0 438 }
ferencd@0 439
ferencd@0 440 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
ferencd@0 441 {
ferencd@0 442 // Presume an entity, and pull it out.
ferencd@0 443 TIXML_STRING ent;
ferencd@0 444 int i;
ferencd@0 445 *length = 0;
ferencd@0 446
ferencd@0 447 if ( *(p+1) && *(p+1) == '#' && *(p+2) )
ferencd@0 448 {
ferencd@0 449 unsigned long ucs = 0;
ferencd@0 450 ptrdiff_t delta = 0;
ferencd@0 451 unsigned mult = 1;
ferencd@0 452
ferencd@0 453 if ( *(p+2) == 'x' )
ferencd@0 454 {
ferencd@0 455 // Hexadecimal.
ferencd@0 456 if ( !*(p+3) ) return 0;
ferencd@0 457
ferencd@0 458 const char* q = p+3;
ferencd@0 459 q = strchr( q, ';' );
ferencd@0 460
ferencd@0 461 if ( !q || !*q ) return 0;
ferencd@0 462
ferencd@0 463 delta = q-p;
ferencd@0 464 --q;
ferencd@0 465
ferencd@0 466 while ( *q != 'x' )
ferencd@0 467 {
ferencd@0 468 if ( *q >= '0' && *q <= '9' )
ferencd@0 469 ucs += mult * (*q - '0');
ferencd@0 470 else if ( *q >= 'a' && *q <= 'f' )
ferencd@0 471 ucs += mult * (*q - 'a' + 10);
ferencd@0 472 else if ( *q >= 'A' && *q <= 'F' )
ferencd@0 473 ucs += mult * (*q - 'A' + 10 );
ferencd@0 474 else
ferencd@0 475 return 0;
ferencd@0 476 mult *= 16;
ferencd@0 477 --q;
ferencd@0 478 }
ferencd@0 479 }
ferencd@0 480 else
ferencd@0 481 {
ferencd@0 482 // Decimal.
ferencd@0 483 if ( !*(p+2) ) return 0;
ferencd@0 484
ferencd@0 485 const char* q = p+2;
ferencd@0 486 q = strchr( q, ';' );
ferencd@0 487
ferencd@0 488 if ( !q || !*q ) return 0;
ferencd@0 489
ferencd@0 490 delta = q-p;
ferencd@0 491 --q;
ferencd@0 492
ferencd@0 493 while ( *q != '#' )
ferencd@0 494 {
ferencd@0 495 if ( *q >= '0' && *q <= '9' )
ferencd@0 496 ucs += mult * (*q - '0');
ferencd@0 497 else
ferencd@0 498 return 0;
ferencd@0 499 mult *= 10;
ferencd@0 500 --q;
ferencd@0 501 }
ferencd@0 502 }
ferencd@0 503 if ( encoding == TIXML_ENCODING_UTF8 )
ferencd@0 504 {
ferencd@0 505 // convert the UCS to UTF-8
ferencd@0 506 ConvertUTF32ToUTF8( ucs, value, length );
ferencd@0 507 }
ferencd@0 508 else
ferencd@0 509 {
ferencd@0 510 *value = (char)ucs;
ferencd@0 511 *length = 1;
ferencd@0 512 }
ferencd@0 513 return p + delta + 1;
ferencd@0 514 }
ferencd@0 515
ferencd@0 516 // Now try to match it.
ferencd@0 517 for( i=0; i<NUM_ENTITY; ++i )
ferencd@0 518 {
ferencd@0 519 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
ferencd@0 520 {
ferencd@0 521 assert( strlen( entity[i].str ) == entity[i].strLength );
ferencd@0 522 *value = entity[i].chr;
ferencd@0 523 *length = 1;
ferencd@0 524 return ( p + entity[i].strLength );
ferencd@0 525 }
ferencd@0 526 }
ferencd@0 527
ferencd@0 528 // So it wasn't an entity, its unrecognized, or something like that.
ferencd@0 529 *value = *p; // Don't put back the last one, since we return it!
ferencd@0 530 //*length = 1; // Leave unrecognized entities - this doesn't really work.
ferencd@0 531 // Just writes strange XML.
ferencd@0 532 return p+1;
ferencd@0 533 }
ferencd@0 534
ferencd@0 535
ferencd@0 536 bool TiXmlBase::StringEqual( const char* p,
ferencd@0 537 const char* tag,
ferencd@0 538 bool ignoreCase,
ferencd@0 539 TiXmlEncoding encoding )
ferencd@0 540 {
ferencd@0 541 assert( p );
ferencd@0 542 assert( tag );
ferencd@0 543 if ( !p || !*p )
ferencd@0 544 {
ferencd@0 545 assert( 0 );
ferencd@0 546 return false;
ferencd@0 547 }
ferencd@0 548
ferencd@0 549 const char* q = p;
ferencd@0 550
ferencd@0 551 if ( ignoreCase )
ferencd@0 552 {
ferencd@0 553 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
ferencd@0 554 {
ferencd@0 555 ++q;
ferencd@0 556 ++tag;
ferencd@0 557 }
ferencd@0 558
ferencd@0 559 if ( *tag == 0 )
ferencd@0 560 return true;
ferencd@0 561 }
ferencd@0 562 else
ferencd@0 563 {
ferencd@0 564 while ( *q && *tag && *q == *tag )
ferencd@0 565 {
ferencd@0 566 ++q;
ferencd@0 567 ++tag;
ferencd@0 568 }
ferencd@0 569
ferencd@0 570 if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
ferencd@0 571 return true;
ferencd@0 572 }
ferencd@0 573 return false;
ferencd@0 574 }
ferencd@0 575
ferencd@0 576 const char* TiXmlBase::ReadText( const char* p,
ferencd@0 577 TIXML_STRING * text,
ferencd@0 578 bool trimWhiteSpace,
ferencd@0 579 const char* endTag,
ferencd@0 580 bool caseInsensitive,
ferencd@0 581 TiXmlEncoding encoding )
ferencd@0 582 {
ferencd@0 583 *text = "";
ferencd@0 584 if ( !trimWhiteSpace // certain tags always keep whitespace
ferencd@0 585 || !condenseWhiteSpace ) // if true, whitespace is always kept
ferencd@0 586 {
ferencd@0 587 // Keep all the white space.
ferencd@0 588 while ( p && *p
ferencd@0 589 && !StringEqual( p, endTag, caseInsensitive, encoding )
ferencd@0 590 )
ferencd@0 591 {
ferencd@0 592 int len;
ferencd@0 593 char cArr[4] = { 0, 0, 0, 0 };
ferencd@0 594 p = GetChar( p, cArr, &len, encoding );
ferencd@0 595 text->append( cArr, len );
ferencd@0 596 }
ferencd@0 597 }
ferencd@0 598 else
ferencd@0 599 {
ferencd@0 600 bool whitespace = false;
ferencd@0 601
ferencd@0 602 // Remove leading white space:
ferencd@0 603 p = SkipWhiteSpace( p, encoding );
ferencd@0 604 while ( p && *p
ferencd@0 605 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
ferencd@0 606 {
ferencd@0 607 if ( *p == '\r' || *p == '\n' )
ferencd@0 608 {
ferencd@0 609 whitespace = true;
ferencd@0 610 ++p;
ferencd@0 611 }
ferencd@0 612 else if ( IsWhiteSpace( *p ) )
ferencd@0 613 {
ferencd@0 614 whitespace = true;
ferencd@0 615 ++p;
ferencd@0 616 }
ferencd@0 617 else
ferencd@0 618 {
ferencd@0 619 // If we've found whitespace, add it before the
ferencd@0 620 // new character. Any whitespace just becomes a space.
ferencd@0 621 if ( whitespace )
ferencd@0 622 {
ferencd@0 623 (*text) += ' ';
ferencd@0 624 whitespace = false;
ferencd@0 625 }
ferencd@0 626 int len;
ferencd@0 627 char cArr[4] = { 0, 0, 0, 0 };
ferencd@0 628 p = GetChar( p, cArr, &len, encoding );
ferencd@0 629 if ( len == 1 )
ferencd@0 630 (*text) += cArr[0]; // more efficient
ferencd@0 631 else
ferencd@0 632 text->append( cArr, len );
ferencd@0 633 }
ferencd@0 634 }
ferencd@0 635 }
ferencd@0 636 if ( p && *p )
ferencd@0 637 p += strlen( endTag );
ferencd@0 638 return ( p && *p ) ? p : 0;
ferencd@0 639 }
ferencd@0 640
ferencd@0 641 #ifdef TIXML_USE_STL
ferencd@0 642
ferencd@0 643 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
ferencd@0 644 {
ferencd@0 645 // The basic issue with a document is that we don't know what we're
ferencd@0 646 // streaming. Read something presumed to be a tag (and hope), then
ferencd@0 647 // identify it, and call the appropriate stream method on the tag.
ferencd@0 648 //
ferencd@0 649 // This "pre-streaming" will never read the closing ">" so the
ferencd@0 650 // sub-tag can orient itself.
ferencd@0 651
ferencd@0 652 if ( !StreamTo( in, '<', tag ) )
ferencd@0 653 {
ferencd@0 654 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0 655 return;
ferencd@0 656 }
ferencd@0 657
ferencd@0 658 while ( in->good() )
ferencd@0 659 {
ferencd@0 660 int tagIndex = (int) tag->length();
ferencd@0 661 while ( in->good() && in->peek() != '>' )
ferencd@0 662 {
ferencd@0 663 int c = in->get();
ferencd@0 664 if ( c <= 0 )
ferencd@0 665 {
ferencd@0 666 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0 667 break;
ferencd@0 668 }
ferencd@0 669 (*tag) += (char) c;
ferencd@0 670 }
ferencd@0 671
ferencd@0 672 if ( in->good() )
ferencd@0 673 {
ferencd@0 674 // We now have something we presume to be a node of
ferencd@0 675 // some sort. Identify it, and call the node to
ferencd@0 676 // continue streaming.
ferencd@0 677 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
ferencd@0 678
ferencd@0 679 if ( node )
ferencd@0 680 {
ferencd@0 681 node->StreamIn( in, tag );
ferencd@0 682 bool isElement = node->ToElement() != 0;
ferencd@0 683 delete node;
ferencd@0 684 node = 0;
ferencd@0 685
ferencd@0 686 // If this is the root element, we're done. Parsing will be
ferencd@0 687 // done by the >> operator.
ferencd@0 688 if ( isElement )
ferencd@0 689 {
ferencd@0 690 return;
ferencd@0 691 }
ferencd@0 692 }
ferencd@0 693 else
ferencd@0 694 {
ferencd@0 695 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0 696 return;
ferencd@0 697 }
ferencd@0 698 }
ferencd@0 699 }
ferencd@0 700 // We should have returned sooner.
ferencd@0 701 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0 702 }
ferencd@0 703
ferencd@0 704 #endif
ferencd@0 705
ferencd@0 706 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
ferencd@0 707 {
ferencd@0 708 ClearError();
ferencd@0 709
ferencd@0 710 // Parse away, at the document level. Since a document
ferencd@0 711 // contains nothing but other tags, most of what happens
ferencd@0 712 // here is skipping white space.
ferencd@0 713 if ( !p || !*p )
ferencd@0 714 {
ferencd@0 715 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0 716 return 0;
ferencd@0 717 }
ferencd@0 718
ferencd@0 719 // Note that, for a document, this needs to come
ferencd@0 720 // before the while space skip, so that parsing
ferencd@0 721 // starts from the pointer we are given.
ferencd@0 722 location.Clear();
ferencd@0 723 if ( prevData )
ferencd@0 724 {
ferencd@0 725 location.row = prevData->cursor.row;
ferencd@0 726 location.col = prevData->cursor.col;
ferencd@0 727 }
ferencd@0 728 else
ferencd@0 729 {
ferencd@0 730 location.row = 0;
ferencd@0 731 location.col = 0;
ferencd@0 732 }
ferencd@0 733 TiXmlParsingData data( p, TabSize(), location.row, location.col );
ferencd@0 734 location = data.Cursor();
ferencd@0 735
ferencd@0 736 if ( encoding == TIXML_ENCODING_UNKNOWN )
ferencd@0 737 {
ferencd@0 738 // Check for the Microsoft UTF-8 lead bytes.
ferencd@0 739 const unsigned char* pU = (const unsigned char*)p;
ferencd@0 740 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
ferencd@0 741 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
ferencd@0 742 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
ferencd@0 743 {
ferencd@0 744 encoding = TIXML_ENCODING_UTF8;
ferencd@0 745 useMicrosoftBOM = true;
ferencd@0 746 }
ferencd@0 747 }
ferencd@0 748
ferencd@0 749 p = SkipWhiteSpace( p, encoding );
ferencd@0 750 if ( !p )
ferencd@0 751 {
ferencd@0 752 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0 753 return 0;
ferencd@0 754 }
ferencd@0 755
ferencd@0 756 while ( p && *p )
ferencd@0 757 {
ferencd@0 758 TiXmlNode* node = Identify( p, encoding );
ferencd@0 759 if ( node )
ferencd@0 760 {
ferencd@0 761 p = node->Parse( p, &data, encoding );
ferencd@0 762 LinkEndChild( node );
ferencd@0 763 }
ferencd@0 764 else
ferencd@0 765 {
ferencd@0 766 break;
ferencd@0 767 }
ferencd@0 768
ferencd@0 769 // Did we get encoding info?
ferencd@0 770 if ( encoding == TIXML_ENCODING_UNKNOWN
ferencd@0 771 && node->ToDeclaration() )
ferencd@0 772 {
ferencd@0 773 TiXmlDeclaration* dec = node->ToDeclaration();
ferencd@0 774 const char* enc = dec->Encoding();
ferencd@0 775 assert( enc );
ferencd@0 776
ferencd@0 777 if ( *enc == 0 )
ferencd@0 778 encoding = TIXML_ENCODING_UTF8;
ferencd@0 779 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
ferencd@0 780 encoding = TIXML_ENCODING_UTF8;
ferencd@0 781 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
ferencd@0 782 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
ferencd@0 783 else
ferencd@0 784 encoding = TIXML_ENCODING_LEGACY;
ferencd@0 785 }
ferencd@0 786
ferencd@0 787 p = SkipWhiteSpace( p, encoding );
ferencd@0 788 }
ferencd@0 789
ferencd@0 790 // Was this empty?
ferencd@0 791 if ( !firstChild ) {
ferencd@0 792 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
ferencd@0 793 return 0;
ferencd@0 794 }
ferencd@0 795
ferencd@0 796 // All is well.
ferencd@0 797 return p;
ferencd@0 798 }
ferencd@0 799
ferencd@0 800 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
ferencd@0 801 {
ferencd@0 802 // The first error in a chain is more accurate - don't set again!
ferencd@0 803 if ( error )
ferencd@0 804 return;
ferencd@0 805
ferencd@0 806 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
ferencd@0 807 error = true;
ferencd@0 808 errorId = err;
ferencd@0 809 errorDesc = errorString[ errorId ];
ferencd@0 810
ferencd@0 811 errorLocation.Clear();
ferencd@0 812 if ( pError && data )
ferencd@0 813 {
ferencd@0 814 data->Stamp( pError, encoding );
ferencd@0 815 errorLocation = data->Cursor();
ferencd@0 816 }
ferencd@0 817 }
ferencd@0 818
ferencd@0 819
ferencd@0 820 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
ferencd@0 821 {
ferencd@0 822 TiXmlNode* returnNode = 0;
ferencd@0 823
ferencd@0 824 p = SkipWhiteSpace( p, encoding );
ferencd@0 825 if( !p || !*p || *p != '<' )
ferencd@0 826 {
ferencd@0 827 return 0;
ferencd@0 828 }
ferencd@0 829
ferencd@0 830 p = SkipWhiteSpace( p, encoding );
ferencd@0 831
ferencd@0 832 if ( !p || !*p )
ferencd@0 833 {
ferencd@0 834 return 0;
ferencd@0 835 }
ferencd@0 836
ferencd@0 837 // What is this thing?
ferencd@0 838 // - Elements start with a letter or underscore, but xml is reserved.
ferencd@0 839 // - Comments: <!--
ferencd@0 840 // - Decleration: <?xml
ferencd@0 841 // - Everthing else is unknown to tinyxml.
ferencd@0 842 //
ferencd@0 843
ferencd@0 844 const char* xmlHeader = { "<?xml" };
ferencd@0 845 const char* commentHeader = { "<!--" };
ferencd@0 846 const char* dtdHeader = { "<!" };
ferencd@0 847 const char* cdataHeader = { "<![CDATA[" };
ferencd@0 848
ferencd@0 849 if ( StringEqual( p, xmlHeader, true, encoding ) )
ferencd@0 850 {
ferencd@0 851 #ifdef DEBUG_PARSER
ferencd@0 852 TIXML_LOG( "XML parsing Declaration\n" );
ferencd@0 853 #endif
ferencd@0 854 returnNode = new TiXmlDeclaration();
ferencd@0 855 }
ferencd@0 856 else if ( StringEqual( p, commentHeader, false, encoding ) )
ferencd@0 857 {
ferencd@0 858 #ifdef DEBUG_PARSER
ferencd@0 859 TIXML_LOG( "XML parsing Comment\n" );
ferencd@0 860 #endif
ferencd@0 861 returnNode = new TiXmlComment();
ferencd@0 862 }
ferencd@0 863 else if ( StringEqual( p, cdataHeader, false, encoding ) )
ferencd@0 864 {
ferencd@0 865 #ifdef DEBUG_PARSER
ferencd@0 866 TIXML_LOG( "XML parsing CDATA\n" );
ferencd@0 867 #endif
ferencd@0 868 TiXmlText* text = new TiXmlText( "" );
ferencd@0 869 text->SetCDATA( true );
ferencd@0 870 returnNode = text;
ferencd@0 871 }
ferencd@0 872 else if ( StringEqual( p, dtdHeader, false, encoding ) )
ferencd@0 873 {
ferencd@0 874 #ifdef DEBUG_PARSER
ferencd@0 875 TIXML_LOG( "XML parsing Unknown(1)\n" );
ferencd@0 876 #endif
ferencd@0 877 returnNode = new TiXmlUnknown();
ferencd@0 878 }
ferencd@0 879 else if ( IsAlpha( *(p+1), encoding )
ferencd@0 880 || *(p+1) == '_' )
ferencd@0 881 {
ferencd@0 882 #ifdef DEBUG_PARSER
ferencd@0 883 TIXML_LOG( "XML parsing Element\n" );
ferencd@0 884 #endif
ferencd@0 885 returnNode = new TiXmlElement( "" );
ferencd@0 886 }
ferencd@0 887 else
ferencd@0 888 {
ferencd@0 889 #ifdef DEBUG_PARSER
ferencd@0 890 TIXML_LOG( "XML parsing Unknown(2)\n" );
ferencd@0 891 #endif
ferencd@0 892 returnNode = new TiXmlUnknown();
ferencd@0 893 }
ferencd@0 894
ferencd@0 895 if ( returnNode )
ferencd@0 896 {
ferencd@0 897 // Set the parent, so it can report errors
ferencd@0 898 returnNode->parent = this;
ferencd@0 899 }
ferencd@0 900 return returnNode;
ferencd@0 901 }
ferencd@0 902
ferencd@0 903 #ifdef TIXML_USE_STL
ferencd@0 904
ferencd@0 905 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
ferencd@0 906 {
ferencd@0 907 // We're called with some amount of pre-parsing. That is, some of "this"
ferencd@0 908 // element is in "tag". Go ahead and stream to the closing ">"
ferencd@0 909 while( in->good() )
ferencd@0 910 {
ferencd@0 911 int c = in->get();
ferencd@0 912 if ( c <= 0 )
ferencd@0 913 {
ferencd@0 914 TiXmlDocument* document = GetDocument();
ferencd@0 915 if ( document )
ferencd@0 916 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0 917 return;
ferencd@0 918 }
ferencd@0 919 (*tag) += (char) c ;
ferencd@0 920
ferencd@0 921 if ( c == '>' )
ferencd@0 922 break;
ferencd@0 923 }
ferencd@0 924
ferencd@0 925 if ( tag->length() < 3 ) return;
ferencd@0 926
ferencd@0 927 // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
ferencd@0 928 // If not, identify and stream.
ferencd@0 929
ferencd@0 930 if ( tag->at( tag->length() - 1 ) == '>'
ferencd@0 931 && tag->at( tag->length() - 2 ) == '/' )
ferencd@0 932 {
ferencd@0 933 // All good!
ferencd@0 934 return;
ferencd@0 935 }
ferencd@0 936 else if ( tag->at( tag->length() - 1 ) == '>' )
ferencd@0 937 {
ferencd@0 938 // There is more. Could be:
ferencd@0 939 // text
ferencd@0 940 // cdata text (which looks like another node)
ferencd@0 941 // closing tag
ferencd@0 942 // another node.
ferencd@0 943 for ( ;; )
ferencd@0 944 {
ferencd@0 945 StreamWhiteSpace( in, tag );
ferencd@0 946
ferencd@0 947 // Do we have text?
ferencd@0 948 if ( in->good() && in->peek() != '<' )
ferencd@0 949 {
ferencd@0 950 // Yep, text.
ferencd@0 951 TiXmlText text( "" );
ferencd@0 952 text.StreamIn( in, tag );
ferencd@0 953
ferencd@0 954 // What follows text is a closing tag or another node.
ferencd@0 955 // Go around again and figure it out.
ferencd@0 956 continue;
ferencd@0 957 }
ferencd@0 958
ferencd@0 959 // We now have either a closing tag...or another node.
ferencd@0 960 // We should be at a "<", regardless.
ferencd@0 961 if ( !in->good() ) return;
ferencd@0 962 assert( in->peek() == '<' );
ferencd@0 963 int tagIndex = (int) tag->length();
ferencd@0 964
ferencd@0 965 bool closingTag = false;
ferencd@0 966 bool firstCharFound = false;
ferencd@0 967
ferencd@0 968 for( ;; )
ferencd@0 969 {
ferencd@0 970 if ( !in->good() )
ferencd@0 971 return;
ferencd@0 972
ferencd@0 973 int c = in->peek();
ferencd@0 974 if ( c <= 0 )
ferencd@0 975 {
ferencd@0 976 TiXmlDocument* document = GetDocument();
ferencd@0 977 if ( document )
ferencd@0 978 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0 979 return;
ferencd@0 980 }
ferencd@0 981
ferencd@0 982 if ( c == '>' )
ferencd@0 983 break;
ferencd@0 984
ferencd@0 985 *tag += (char) c;
ferencd@0 986 in->get();
ferencd@0 987
ferencd@0 988 // Early out if we find the CDATA id.
ferencd@0 989 if ( c == '[' && tag->size() >= 9 )
ferencd@0 990 {
ferencd@0 991 size_t len = tag->size();
ferencd@0 992 const char* start = tag->c_str() + len - 9;
ferencd@0 993 if ( strcmp( start, "<![CDATA[" ) == 0 ) {
ferencd@0 994 assert( !closingTag );
ferencd@0 995 break;
ferencd@0 996 }
ferencd@0 997 }
ferencd@0 998
ferencd@0 999 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
ferencd@0 1000 {
ferencd@0 1001 firstCharFound = true;
ferencd@0 1002 if ( c == '/' )
ferencd@0 1003 closingTag = true;
ferencd@0 1004 }
ferencd@0 1005 }
ferencd@0 1006 // If it was a closing tag, then read in the closing '>' to clean up the input stream.
ferencd@0 1007 // If it was not, the streaming will be done by the tag.
ferencd@0 1008 if ( closingTag )
ferencd@0 1009 {
ferencd@0 1010 if ( !in->good() )
ferencd@0 1011 return;
ferencd@0 1012
ferencd@0 1013 int c = in->get();
ferencd@0 1014 if ( c <= 0 )
ferencd@0 1015 {
ferencd@0 1016 TiXmlDocument* document = GetDocument();
ferencd@0 1017 if ( document )
ferencd@0 1018 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0 1019 return;
ferencd@0 1020 }
ferencd@0 1021 assert( c == '>' );
ferencd@0 1022 *tag += (char) c;
ferencd@0 1023
ferencd@0 1024 // We are done, once we've found our closing tag.
ferencd@0 1025 return;
ferencd@0 1026 }
ferencd@0 1027 else
ferencd@0 1028 {
ferencd@0 1029 // If not a closing tag, id it, and stream.
ferencd@0 1030 const char* tagloc = tag->c_str() + tagIndex;
ferencd@0 1031 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
ferencd@0 1032 if ( !node )
ferencd@0 1033 return;
ferencd@0 1034 node->StreamIn( in, tag );
ferencd@0 1035 delete node;
ferencd@0 1036 node = 0;
ferencd@0 1037
ferencd@0 1038 // No return: go around from the beginning: text, closing tag, or node.
ferencd@0 1039 }
ferencd@0 1040 }
ferencd@0 1041 }
ferencd@0 1042 }
ferencd@0 1043 #endif
ferencd@0 1044
ferencd@0 1045 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
ferencd@0 1046 {
ferencd@0 1047 p = SkipWhiteSpace( p, encoding );
ferencd@0 1048 TiXmlDocument* document = GetDocument();
ferencd@0 1049
ferencd@0 1050 if ( !p || !*p )
ferencd@0 1051 {
ferencd@0 1052 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
ferencd@0 1053 return 0;
ferencd@0 1054 }
ferencd@0 1055
ferencd@0 1056 if ( data )
ferencd@0 1057 {
ferencd@0 1058 data->Stamp( p, encoding );
ferencd@0 1059 location = data->Cursor();
ferencd@0 1060 }
ferencd@0 1061
ferencd@0 1062 if ( *p != '<' )
ferencd@0 1063 {
ferencd@0 1064 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
ferencd@0 1065 return 0;
ferencd@0 1066 }
ferencd@0 1067
ferencd@0 1068 p = SkipWhiteSpace( p+1, encoding );
ferencd@0 1069
ferencd@0 1070 // Read the name.
ferencd@0 1071 const char* pErr = p;
ferencd@0 1072
ferencd@0 1073 p = ReadName( p, &value, encoding );
ferencd@0 1074 if ( !p || !*p )
ferencd@0 1075 {
ferencd@0 1076 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
ferencd@0 1077 return 0;
ferencd@0 1078 }
ferencd@0 1079
ferencd@0 1080 TIXML_STRING endTag ("</");
ferencd@0 1081 endTag += value;
ferencd@0 1082
ferencd@0 1083 // Check for and read attributes. Also look for an empty
ferencd@0 1084 // tag or an end tag.
ferencd@0 1085 while ( p && *p )
ferencd@0 1086 {
ferencd@0 1087 pErr = p;
ferencd@0 1088 p = SkipWhiteSpace( p, encoding );
ferencd@0 1089 if ( !p || !*p )
ferencd@0 1090 {
ferencd@0 1091 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
ferencd@0 1092 return 0;
ferencd@0 1093 }
ferencd@0 1094 if ( *p == '/' )
ferencd@0 1095 {
ferencd@0 1096 ++p;
ferencd@0 1097 // Empty tag.
ferencd@0 1098 if ( *p != '>' )
ferencd@0 1099 {
ferencd@0 1100 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
ferencd@0 1101 return 0;
ferencd@0 1102 }
ferencd@0 1103 return (p+1);
ferencd@0 1104 }
ferencd@0 1105 else if ( *p == '>' )
ferencd@0 1106 {
ferencd@0 1107 // Done with attributes (if there were any.)
ferencd@0 1108 // Read the value -- which can include other
ferencd@0 1109 // elements -- read the end tag, and return.
ferencd@0 1110 ++p;
ferencd@0 1111 p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
ferencd@0 1112 if ( !p || !*p ) {
ferencd@0 1113 // We were looking for the end tag, but found nothing.
ferencd@0 1114 // Fix for [ 1663758 ] Failure to report error on bad XML
ferencd@0 1115 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
ferencd@0 1116 return 0;
ferencd@0 1117 }
ferencd@0 1118
ferencd@0 1119 // We should find the end tag now
ferencd@0 1120 // note that:
ferencd@0 1121 // </foo > and
ferencd@0 1122 // </foo>
ferencd@0 1123 // are both valid end tags.
ferencd@0 1124 if ( StringEqual( p, endTag.c_str(), false, encoding ) )
ferencd@0 1125 {
ferencd@0 1126 p += endTag.length();
ferencd@0 1127 p = SkipWhiteSpace( p, encoding );
ferencd@0 1128 if ( p && *p && *p == '>' ) {
ferencd@0 1129 ++p;
ferencd@0 1130 return p;
ferencd@0 1131 }
ferencd@0 1132 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
ferencd@0 1133 return 0;
ferencd@0 1134 }
ferencd@0 1135 else
ferencd@0 1136 {
ferencd@0 1137 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
ferencd@0 1138 return 0;
ferencd@0 1139 }
ferencd@0 1140 }
ferencd@0 1141 else
ferencd@0 1142 {
ferencd@0 1143 // Try to read an attribute:
ferencd@0 1144 TiXmlAttribute* attrib = new TiXmlAttribute();
ferencd@0 1145 if ( !attrib )
ferencd@0 1146 {
ferencd@0 1147 return 0;
ferencd@0 1148 }
ferencd@0 1149
ferencd@0 1150 attrib->SetDocument( document );
ferencd@0 1151 pErr = p;
ferencd@0 1152 p = attrib->Parse( p, data, encoding );
ferencd@0 1153
ferencd@0 1154 if ( !p || !*p )
ferencd@0 1155 {
ferencd@0 1156 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
ferencd@0 1157 delete attrib;
ferencd@0 1158 return 0;
ferencd@0 1159 }
ferencd@0 1160
ferencd@0 1161 // Handle the strange case of double attributes:
ferencd@0 1162 #ifdef TIXML_USE_STL
ferencd@0 1163 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
ferencd@0 1164 #else
ferencd@0 1165 TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
ferencd@0 1166 #endif
ferencd@0 1167 if ( node )
ferencd@0 1168 {
ferencd@0 1169 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
ferencd@0 1170 delete attrib;
ferencd@0 1171 return 0;
ferencd@0 1172 }
ferencd@0 1173
ferencd@0 1174 attributeSet.Add( attrib );
ferencd@0 1175 }
ferencd@0 1176 }
ferencd@0 1177 return p;
ferencd@0 1178 }
ferencd@0 1179
ferencd@0 1180
ferencd@0 1181 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
ferencd@0 1182 {
ferencd@0 1183 TiXmlDocument* document = GetDocument();
ferencd@0 1184
ferencd@0 1185 // Read in text and elements in any order.
ferencd@0 1186 const char* pWithWhiteSpace = p;
ferencd@0 1187 p = SkipWhiteSpace( p, encoding );
ferencd@0 1188
ferencd@0 1189 while ( p && *p )
ferencd@0 1190 {
ferencd@0 1191 if ( *p != '<' )
ferencd@0 1192 {
ferencd@0 1193 // Take what we have, make a text element.
ferencd@0 1194 TiXmlText* textNode = new TiXmlText( "" );
ferencd@0 1195
ferencd@0 1196 if ( !textNode )
ferencd@0 1197 {
ferencd@0 1198 return 0;
ferencd@0 1199 }
ferencd@0 1200
ferencd@0 1201 if ( TiXmlBase::IsWhiteSpaceCondensed() )
ferencd@0 1202 {
ferencd@0 1203 p = textNode->Parse( p, data, encoding );
ferencd@0 1204 }
ferencd@0 1205 else
ferencd@0 1206 {
ferencd@0 1207 // Special case: we want to keep the white space
ferencd@0 1208 // so that leading spaces aren't removed.
ferencd@0 1209 p = textNode->Parse( pWithWhiteSpace, data, encoding );
ferencd@0 1210 }
ferencd@0 1211
ferencd@0 1212 if ( !textNode->Blank() )
ferencd@0 1213 LinkEndChild( textNode );
ferencd@0 1214 else
ferencd@0 1215 delete textNode;
ferencd@0 1216 }
ferencd@0 1217 else
ferencd@0 1218 {
ferencd@0 1219 // We hit a '<'
ferencd@0 1220 // Have we hit a new element or an end tag? This could also be
ferencd@0 1221 // a TiXmlText in the "CDATA" style.
ferencd@0 1222 if ( StringEqual( p, "</", false, encoding ) )
ferencd@0 1223 {
ferencd@0 1224 return p;
ferencd@0 1225 }
ferencd@0 1226 else
ferencd@0 1227 {
ferencd@0 1228 TiXmlNode* node = Identify( p, encoding );
ferencd@0 1229 if ( node )
ferencd@0 1230 {
ferencd@0 1231 p = node->Parse( p, data, encoding );
ferencd@0 1232 LinkEndChild( node );
ferencd@0 1233 }
ferencd@0 1234 else
ferencd@0 1235 {
ferencd@0 1236 return 0;
ferencd@0 1237 }
ferencd@0 1238 }
ferencd@0 1239 }
ferencd@0 1240 pWithWhiteSpace = p;
ferencd@0 1241 p = SkipWhiteSpace( p, encoding );
ferencd@0 1242 }
ferencd@0 1243
ferencd@0 1244 if ( !p )
ferencd@0 1245 {
ferencd@0 1246 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
ferencd@0 1247 }
ferencd@0 1248 return p;
ferencd@0 1249 }
ferencd@0 1250
ferencd@0 1251
ferencd@0 1252 #ifdef TIXML_USE_STL
ferencd@0 1253 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
ferencd@0 1254 {
ferencd@0 1255 while ( in->good() )
ferencd@0 1256 {
ferencd@0 1257 int c = in->get();
ferencd@0 1258 if ( c <= 0 )
ferencd@0 1259 {
ferencd@0 1260 TiXmlDocument* document = GetDocument();
ferencd@0 1261 if ( document )
ferencd@0 1262 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0 1263 return;
ferencd@0 1264 }
ferencd@0 1265 (*tag) += (char) c;
ferencd@0 1266
ferencd@0 1267 if ( c == '>' )
ferencd@0 1268 {
ferencd@0 1269 // All is well.
ferencd@0 1270 return;
ferencd@0 1271 }
ferencd@0 1272 }
ferencd@0 1273 }
ferencd@0 1274 #endif
ferencd@0 1275
ferencd@0 1276
ferencd@0 1277 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
ferencd@0 1278 {
ferencd@0 1279 TiXmlDocument* document = GetDocument();
ferencd@0 1280 p = SkipWhiteSpace( p, encoding );
ferencd@0 1281
ferencd@0 1282 if ( data )
ferencd@0 1283 {
ferencd@0 1284 data->Stamp( p, encoding );
ferencd@0 1285 location = data->Cursor();
ferencd@0 1286 }
ferencd@0 1287 if ( !p || !*p || *p != '<' )
ferencd@0 1288 {
ferencd@0 1289 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
ferencd@0 1290 return 0;
ferencd@0 1291 }
ferencd@0 1292 ++p;
ferencd@0 1293 value = "";
ferencd@0 1294
ferencd@0 1295 while ( p && *p && *p != '>' )
ferencd@0 1296 {
ferencd@0 1297 value += *p;
ferencd@0 1298 ++p;
ferencd@0 1299 }
ferencd@0 1300
ferencd@0 1301 if ( !p )
ferencd@0 1302 {
ferencd@0 1303 if ( document )
ferencd@0 1304 document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
ferencd@0 1305 }
ferencd@0 1306 if ( p && *p == '>' )
ferencd@0 1307 return p+1;
ferencd@0 1308 return p;
ferencd@0 1309 }
ferencd@0 1310
ferencd@0 1311 #ifdef TIXML_USE_STL
ferencd@0 1312 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
ferencd@0 1313 {
ferencd@0 1314 while ( in->good() )
ferencd@0 1315 {
ferencd@0 1316 int c = in->get();
ferencd@0 1317 if ( c <= 0 )
ferencd@0 1318 {
ferencd@0 1319 TiXmlDocument* document = GetDocument();
ferencd@0 1320 if ( document )
ferencd@0 1321 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0 1322 return;
ferencd@0 1323 }
ferencd@0 1324
ferencd@0 1325 (*tag) += (char) c;
ferencd@0 1326
ferencd@0 1327 if ( c == '>'
ferencd@0 1328 && tag->at( tag->length() - 2 ) == '-'
ferencd@0 1329 && tag->at( tag->length() - 3 ) == '-' )
ferencd@0 1330 {
ferencd@0 1331 // All is well.
ferencd@0 1332 return;
ferencd@0 1333 }
ferencd@0 1334 }
ferencd@0 1335 }
ferencd@0 1336 #endif
ferencd@0 1337
ferencd@0 1338
ferencd@0 1339 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
ferencd@0 1340 {
ferencd@0 1341 TiXmlDocument* document = GetDocument();
ferencd@0 1342 value = "";
ferencd@0 1343
ferencd@0 1344 p = SkipWhiteSpace( p, encoding );
ferencd@0 1345
ferencd@0 1346 if ( data )
ferencd@0 1347 {
ferencd@0 1348 data->Stamp( p, encoding );
ferencd@0 1349 location = data->Cursor();
ferencd@0 1350 }
ferencd@0 1351 const char* startTag = "<!--";
ferencd@0 1352 const char* endTag = "-->";
ferencd@0 1353
ferencd@0 1354 if ( !StringEqual( p, startTag, false, encoding ) )
ferencd@0 1355 {
ferencd@0 1356 if ( document )
ferencd@0 1357 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
ferencd@0 1358 return 0;
ferencd@0 1359 }
ferencd@0 1360 p += strlen( startTag );
ferencd@0 1361
ferencd@0 1362 // [ 1475201 ] TinyXML parses entities in comments
ferencd@0 1363 // Oops - ReadText doesn't work, because we don't want to parse the entities.
ferencd@0 1364 // p = ReadText( p, &value, false, endTag, false, encoding );
ferencd@0 1365 //
ferencd@0 1366 // from the XML spec:
ferencd@0 1367 /*
ferencd@0 1368 [Definition: Comments may appear anywhere in a document outside other markup; in addition,
ferencd@0 1369 they may appear within the document type declaration at places allowed by the grammar.
ferencd@0 1370 They are not part of the document's character data; an XML processor MAY, but need not,
ferencd@0 1371 make it possible for an application to retrieve the text of comments. For compatibility,
ferencd@0 1372 the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
ferencd@0 1373 references MUST NOT be recognized within comments.
ferencd@0 1374
ferencd@0 1375 An example of a comment:
ferencd@0 1376
ferencd@0 1377 <!-- declarations for <head> & <body> -->
ferencd@0 1378 */
ferencd@0 1379
ferencd@0 1380 value = "";
ferencd@0 1381 // Keep all the white space.
ferencd@0 1382 while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
ferencd@0 1383 {
ferencd@0 1384 value.append( p, 1 );
ferencd@0 1385 ++p;
ferencd@0 1386 }
ferencd@0 1387 if ( p && *p )
ferencd@0 1388 p += strlen( endTag );
ferencd@0 1389
ferencd@0 1390 return p;
ferencd@0 1391 }
ferencd@0 1392
ferencd@0 1393
ferencd@0 1394 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
ferencd@0 1395 {
ferencd@0 1396 p = SkipWhiteSpace( p, encoding );
ferencd@0 1397 if ( !p || !*p ) return 0;
ferencd@0 1398
ferencd@0 1399 if ( data )
ferencd@0 1400 {
ferencd@0 1401 data->Stamp( p, encoding );
ferencd@0 1402 location = data->Cursor();
ferencd@0 1403 }
ferencd@0 1404 // Read the name, the '=' and the value.
ferencd@0 1405 const char* pErr = p;
ferencd@0 1406 p = ReadName( p, &name, encoding );
ferencd@0 1407 if ( !p || !*p )
ferencd@0 1408 {
ferencd@0 1409 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
ferencd@0 1410 return 0;
ferencd@0 1411 }
ferencd@0 1412 p = SkipWhiteSpace( p, encoding );
ferencd@0 1413 if ( !p || !*p || *p != '=' )
ferencd@0 1414 {
ferencd@0 1415 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
ferencd@0 1416 return 0;
ferencd@0 1417 }
ferencd@0 1418
ferencd@0 1419 ++p; // skip '='
ferencd@0 1420 p = SkipWhiteSpace( p, encoding );
ferencd@0 1421 if ( !p || !*p )
ferencd@0 1422 {
ferencd@0 1423 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
ferencd@0 1424 return 0;
ferencd@0 1425 }
ferencd@0 1426
ferencd@0 1427 const char* end;
ferencd@0 1428 const char SINGLE_QUOTE = '\'';
ferencd@0 1429 const char DOUBLE_QUOTE = '\"';
ferencd@0 1430
ferencd@0 1431 if ( *p == SINGLE_QUOTE )
ferencd@0 1432 {
ferencd@0 1433 ++p;
ferencd@0 1434 end = "\'"; // single quote in string
ferencd@0 1435 p = ReadText( p, &value, false, end, false, encoding );
ferencd@0 1436 }
ferencd@0 1437 else if ( *p == DOUBLE_QUOTE )
ferencd@0 1438 {
ferencd@0 1439 ++p;
ferencd@0 1440 end = "\""; // double quote in string
ferencd@0 1441 p = ReadText( p, &value, false, end, false, encoding );
ferencd@0 1442 }
ferencd@0 1443 else
ferencd@0 1444 {
ferencd@0 1445 // All attribute values should be in single or double quotes.
ferencd@0 1446 // But this is such a common error that the parser will try
ferencd@0 1447 // its best, even without them.
ferencd@0 1448 value = "";
ferencd@0 1449 while ( p && *p // existence
ferencd@0 1450 && !IsWhiteSpace( *p ) // whitespace
ferencd@0 1451 && *p != '/' && *p != '>' ) // tag end
ferencd@0 1452 {
ferencd@0 1453 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
ferencd@0 1454 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
ferencd@0 1455 // We did not have an opening quote but seem to have a
ferencd@0 1456 // closing one. Give up and throw an error.
ferencd@0 1457 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
ferencd@0 1458 return 0;
ferencd@0 1459 }
ferencd@0 1460 value += *p;
ferencd@0 1461 ++p;
ferencd@0 1462 }
ferencd@0 1463 }
ferencd@0 1464 return p;
ferencd@0 1465 }
ferencd@0 1466
ferencd@0 1467 #ifdef TIXML_USE_STL
ferencd@0 1468 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
ferencd@0 1469 {
ferencd@0 1470 while ( in->good() )
ferencd@0 1471 {
ferencd@0 1472 int c = in->peek();
ferencd@0 1473 if ( !cdata && (c == '<' ) )
ferencd@0 1474 {
ferencd@0 1475 return;
ferencd@0 1476 }
ferencd@0 1477 if ( c <= 0 )
ferencd@0 1478 {
ferencd@0 1479 TiXmlDocument* document = GetDocument();
ferencd@0 1480 if ( document )
ferencd@0 1481 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0 1482 return;
ferencd@0 1483 }
ferencd@0 1484
ferencd@0 1485 (*tag) += (char) c;
ferencd@0 1486 in->get(); // "commits" the peek made above
ferencd@0 1487
ferencd@0 1488 if ( cdata && c == '>' && tag->size() >= 3 ) {
ferencd@0 1489 size_t len = tag->size();
ferencd@0 1490 if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
ferencd@0 1491 // terminator of cdata.
ferencd@0 1492 return;
ferencd@0 1493 }
ferencd@0 1494 }
ferencd@0 1495 }
ferencd@0 1496 }
ferencd@0 1497 #endif
ferencd@0 1498
ferencd@0 1499 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
ferencd@0 1500 {
ferencd@0 1501 value = "";
ferencd@0 1502 TiXmlDocument* document = GetDocument();
ferencd@0 1503
ferencd@0 1504 if ( data )
ferencd@0 1505 {
ferencd@0 1506 data->Stamp( p, encoding );
ferencd@0 1507 location = data->Cursor();
ferencd@0 1508 }
ferencd@0 1509
ferencd@0 1510 const char* const startTag = "<![CDATA[";
ferencd@0 1511 const char* const endTag = "]]>";
ferencd@0 1512
ferencd@0 1513 if ( cdata || StringEqual( p, startTag, false, encoding ) )
ferencd@0 1514 {
ferencd@0 1515 cdata = true;
ferencd@0 1516
ferencd@0 1517 if ( !StringEqual( p, startTag, false, encoding ) )
ferencd@0 1518 {
ferencd@0 1519 if ( document )
ferencd@0 1520 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
ferencd@0 1521 return 0;
ferencd@0 1522 }
ferencd@0 1523 p += strlen( startTag );
ferencd@0 1524
ferencd@0 1525 // Keep all the white space, ignore the encoding, etc.
ferencd@0 1526 while ( p && *p
ferencd@0 1527 && !StringEqual( p, endTag, false, encoding )
ferencd@0 1528 )
ferencd@0 1529 {
ferencd@0 1530 value += *p;
ferencd@0 1531 ++p;
ferencd@0 1532 }
ferencd@0 1533
ferencd@0 1534 TIXML_STRING dummy;
ferencd@0 1535 p = ReadText( p, &dummy, false, endTag, false, encoding );
ferencd@0 1536 return p;
ferencd@0 1537 }
ferencd@0 1538 else
ferencd@0 1539 {
ferencd@0 1540 bool ignoreWhite = true;
ferencd@0 1541
ferencd@0 1542 const char* end = "<";
ferencd@0 1543 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
ferencd@0 1544 if ( p && *p )
ferencd@0 1545 return p-1; // don't truncate the '<'
ferencd@0 1546 return 0;
ferencd@0 1547 }
ferencd@0 1548 }
ferencd@0 1549
ferencd@0 1550 #ifdef TIXML_USE_STL
ferencd@0 1551 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
ferencd@0 1552 {
ferencd@0 1553 while ( in->good() )
ferencd@0 1554 {
ferencd@0 1555 int c = in->get();
ferencd@0 1556 if ( c <= 0 )
ferencd@0 1557 {
ferencd@0 1558 TiXmlDocument* document = GetDocument();
ferencd@0 1559 if ( document )
ferencd@0 1560 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
ferencd@0 1561 return;
ferencd@0 1562 }
ferencd@0 1563 (*tag) += (char) c;
ferencd@0 1564
ferencd@0 1565 if ( c == '>' )
ferencd@0 1566 {
ferencd@0 1567 // All is well.
ferencd@0 1568 return;
ferencd@0 1569 }
ferencd@0 1570 }
ferencd@0 1571 }
ferencd@0 1572 #endif
ferencd@0 1573
ferencd@0 1574 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
ferencd@0 1575 {
ferencd@0 1576 p = SkipWhiteSpace( p, _encoding );
ferencd@0 1577 // Find the beginning, find the end, and look for
ferencd@0 1578 // the stuff in-between.
ferencd@0 1579 TiXmlDocument* document = GetDocument();
ferencd@0 1580 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
ferencd@0 1581 {
ferencd@0 1582 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
ferencd@0 1583 return 0;
ferencd@0 1584 }
ferencd@0 1585 if ( data )
ferencd@0 1586 {
ferencd@0 1587 data->Stamp( p, _encoding );
ferencd@0 1588 location = data->Cursor();
ferencd@0 1589 }
ferencd@0 1590 p += 5;
ferencd@0 1591
ferencd@0 1592 version = "";
ferencd@0 1593 encoding = "";
ferencd@0 1594 standalone = "";
ferencd@0 1595
ferencd@0 1596 while ( p && *p )
ferencd@0 1597 {
ferencd@0 1598 if ( *p == '>' )
ferencd@0 1599 {
ferencd@0 1600 ++p;
ferencd@0 1601 return p;
ferencd@0 1602 }
ferencd@0 1603
ferencd@0 1604 p = SkipWhiteSpace( p, _encoding );
ferencd@0 1605 if ( StringEqual( p, "version", true, _encoding ) )
ferencd@0 1606 {
ferencd@0 1607 TiXmlAttribute attrib;
ferencd@0 1608 p = attrib.Parse( p, data, _encoding );
ferencd@0 1609 version = attrib.Value();
ferencd@0 1610 }
ferencd@0 1611 else if ( StringEqual( p, "encoding", true, _encoding ) )
ferencd@0 1612 {
ferencd@0 1613 TiXmlAttribute attrib;
ferencd@0 1614 p = attrib.Parse( p, data, _encoding );
ferencd@0 1615 encoding = attrib.Value();
ferencd@0 1616 }
ferencd@0 1617 else if ( StringEqual( p, "standalone", true, _encoding ) )
ferencd@0 1618 {
ferencd@0 1619 TiXmlAttribute attrib;
ferencd@0 1620 p = attrib.Parse( p, data, _encoding );
ferencd@0 1621 standalone = attrib.Value();
ferencd@0 1622 }
ferencd@0 1623 else
ferencd@0 1624 {
ferencd@0 1625 // Read over whatever it is.
ferencd@0 1626 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
ferencd@0 1627 ++p;
ferencd@0 1628 }
ferencd@0 1629 }
ferencd@0 1630 return 0;
ferencd@0 1631 }
ferencd@0 1632
ferencd@0 1633 bool TiXmlText::Blank() const
ferencd@0 1634 {
ferencd@0 1635 for ( unsigned i=0; i<value.length(); i++ )
ferencd@0 1636 if ( !IsWhiteSpace( value[i] ) )
ferencd@0 1637 return false;
ferencd@0 1638 return true;
ferencd@0 1639 }
ferencd@0 1640