00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "tinyxml.h"
00026 #include <ctype.h>
00027
00028
00029
00030
00031
00032
00033 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
00034 {
00035 { "&", 5, '&' },
00036 { "<", 4, '<' },
00037 { ">", 4, '>' },
00038 { """, 6, '\"' },
00039 { "'", 6, '\'' }
00040 };
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052 const char TIXML_UTF_LEAD_0 = (const char)0xef;
00053 const char TIXML_UTF_LEAD_1 = (const char)0xbb;
00054 const char TIXML_UTF_LEAD_2 = (const char)0xbf;
00055
00056 const int TiXmlBase::utf8ByteTable[256] =
00057 {
00058
00059 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00060 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00061 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00062 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00063 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00064 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00065 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00066 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00067 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00068 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00071 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00072 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00073 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00074 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
00075 };
00076
00077
00078 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00079 {
00080 const unsigned long BYTE_MASK = 0xBF;
00081 const unsigned long BYTE_MARK = 0x80;
00082 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00083
00084 if (input < 0x80)
00085 *length = 1;
00086 else if ( input < 0x800 )
00087 *length = 2;
00088 else if ( input < 0x10000 )
00089 *length = 3;
00090 else if ( input < 0x200000 )
00091 *length = 4;
00092 else
00093 { *length = 0; return; }
00094
00095 output += *length;
00096
00097
00098 switch (*length)
00099 {
00100 case 4:
00101 --output;
00102 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00103 input >>= 6;
00104 case 3:
00105 --output;
00106 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00107 input >>= 6;
00108 case 2:
00109 --output;
00110 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00111 input >>= 6;
00112 case 1:
00113 --output;
00114 *output = (char)(input | FIRST_BYTE_MARK[*length]);
00115 }
00116 }
00117
00118
00119 int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding )
00120 {
00121
00122
00123
00124
00125
00126
00127
00128 if ( anyByte < 127 )
00129 return isalpha( anyByte );
00130 else
00131 return 1;
00132
00133
00134
00135
00136
00137 }
00138
00139
00140 int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding )
00141 {
00142
00143
00144
00145
00146
00147
00148
00149 if ( anyByte < 127 )
00150 return isalnum( anyByte );
00151 else
00152 return 1;
00153
00154
00155
00156
00157
00158 }
00159
00160
00161 class TiXmlParsingData
00162 {
00163 friend class TiXmlDocument;
00164 public:
00165 void Stamp( const char* now, TiXmlEncoding encoding );
00166
00167 const TiXmlCursor& Cursor() { return cursor; }
00168
00169 private:
00170
00171 TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00172 {
00173 assert( start );
00174 stamp = start;
00175 tabsize = _tabsize;
00176 cursor.row = row;
00177 cursor.col = col;
00178 }
00179
00180 TiXmlCursor cursor;
00181 const char* stamp;
00182 int tabsize;
00183 };
00184
00185
00186 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00187 {
00188 assert( now );
00189
00190
00191 if ( tabsize < 1 )
00192 {
00193 return;
00194 }
00195
00196
00197 int row = cursor.row;
00198 int col = cursor.col;
00199 const char* p = stamp;
00200 assert( p );
00201
00202 while ( p < now )
00203 {
00204
00205 switch (*p) {
00206 case 0:
00207
00208
00209 return;
00210
00211 case '\r':
00212
00213 ++row;
00214 col = 0;
00215
00216 ++p;
00217
00218
00219 if (*p == '\n') {
00220 ++p;
00221 }
00222 break;
00223
00224 case '\n':
00225
00226 ++row;
00227 col = 0;
00228
00229
00230 ++p;
00231
00232
00233
00234
00235 if (*p == '\r') {
00236 ++p;
00237 }
00238 break;
00239
00240 case '\t':
00241
00242 ++p;
00243
00244
00245 col = (col / tabsize + 1) * tabsize;
00246 break;
00247
00248 case TIXML_UTF_LEAD_0:
00249 if ( encoding == TIXML_ENCODING_UTF8 )
00250 {
00251 if ( *(p+1) && *(p+2) )
00252 {
00253
00254
00255 if ( *(p+1)==TIXML_UTF_LEAD_1 && *(p+2)==TIXML_UTF_LEAD_2 )
00256 p += 3;
00257 else if ( *(p+1)==(char)(0xbf) && *(p+2)==(char)(0xbe) )
00258 p += 3;
00259 else if ( *(p+1)==(char)(0xbf) && *(p+2)==(char)(0xbf) )
00260 p += 3;
00261 else
00262 { p +=3; ++col; }
00263 }
00264 }
00265 else
00266 {
00267 ++p;
00268 ++col;
00269 }
00270 break;
00271
00272 default:
00273 if ( encoding == TIXML_ENCODING_UTF8 )
00274 {
00275
00276 int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
00277 if ( step == 0 )
00278 step = 1;
00279 p += step;
00280
00281
00282 ++col;
00283 }
00284 else
00285 {
00286 ++p;
00287 ++col;
00288 }
00289 break;
00290 }
00291 }
00292 cursor.row = row;
00293 cursor.col = col;
00294 assert( cursor.row >= -1 );
00295 assert( cursor.col >= -1 );
00296 stamp = p;
00297 assert( stamp );
00298 }
00299
00300
00301 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00302 {
00303 if ( !p || !*p )
00304 {
00305 return 0;
00306 }
00307 if ( encoding == TIXML_ENCODING_UTF8 )
00308 {
00309 while ( *p )
00310 {
00311
00312 if ( *(p+0)==TIXML_UTF_LEAD_0
00313 && *(p+1)==TIXML_UTF_LEAD_1
00314 && *(p+2)==TIXML_UTF_LEAD_2 )
00315 {
00316 p += 3;
00317 continue;
00318 }
00319 else if(*(p+0)==TIXML_UTF_LEAD_0
00320 && *(p+1)==(const char) 0xbf
00321 && *(p+2)==(const char) 0xbe )
00322 {
00323 p += 3;
00324 continue;
00325 }
00326 else if(*(p+0)==TIXML_UTF_LEAD_0
00327 && *(p+1)==(const char) 0xbf
00328 && *(p+2)==(const char) 0xbf )
00329 {
00330 p += 3;
00331 continue;
00332 }
00333
00334 if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
00335 ++p;
00336 else
00337 break;
00338 }
00339 }
00340 else
00341 {
00342 while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
00343 ++p;
00344 }
00345
00346 return p;
00347 }
00348
00349 #ifdef TIXML_USE_STL
00350 bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag )
00351 {
00352 for( ;; )
00353 {
00354 if ( !in->good() ) return false;
00355
00356 int c = in->peek();
00357
00358 if ( !IsWhiteSpace( c ) || c <= 0 )
00359 return true;
00360
00361 *tag += (char) in->get();
00362 }
00363 }
00364
00365 bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag )
00366 {
00367
00368 while ( in->good() )
00369 {
00370 int c = in->peek();
00371 if ( c == character )
00372 return true;
00373 if ( c <= 0 )
00374 return false;
00375
00376 in->get();
00377 *tag += (char) c;
00378 }
00379 return false;
00380 }
00381 #endif
00382
00383 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00384 {
00385 *name = "";
00386 assert( p );
00387
00388
00389
00390
00391
00392
00393
00394
00395 if ( p && *p
00396 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00397 {
00398 while( p && *p
00399 && ( IsAlphaNum( (unsigned char ) *p, encoding )
00400 || *p == '_'
00401 || *p == '-'
00402 || *p == '.'
00403 || *p == ':' ) )
00404 {
00405 (*name) += *p;
00406 ++p;
00407 }
00408 return p;
00409 }
00410 return 0;
00411 }
00412
00413 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00414 {
00415
00416 TIXML_STRING ent;
00417 int i;
00418 *length = 0;
00419
00420 if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00421 {
00422 unsigned long ucs = 0;
00423
00424
00425 size_t delta = 0;
00426 unsigned mult = 1;
00427
00428 if ( *(p+2) == 'x' )
00429 {
00430
00431 if ( !*(p+3) ) return 0;
00432
00433 const char* q = p+3;
00434 q = strchr( q, ';' );
00435
00436 if ( !q || !*q ) return 0;
00437
00438 delta = q-p;
00439 --q;
00440
00441 while ( *q != 'x' )
00442 {
00443 if ( *q >= '0' && *q <= '9' )
00444 ucs += mult * (*q - '0');
00445 else if ( *q >= 'a' && *q <= 'f' )
00446 ucs += mult * (*q - 'a' + 10);
00447 else if ( *q >= 'A' && *q <= 'F' )
00448 ucs += mult * (*q - 'A' + 10 );
00449 else
00450 return 0;
00451 mult *= 16;
00452 --q;
00453 }
00454 }
00455 else
00456 {
00457
00458 if ( !*(p+2) ) return 0;
00459
00460 const char* q = p+2;
00461 q = strchr( q, ';' );
00462
00463 if ( !q || !*q ) return 0;
00464
00465 delta = q-p;
00466 --q;
00467
00468 while ( *q != '#' )
00469 {
00470 if ( *q >= '0' && *q <= '9' )
00471 ucs += mult * (*q - '0');
00472 else
00473 return 0;
00474 mult *= 10;
00475 --q;
00476 }
00477 }
00478 if ( encoding == TIXML_ENCODING_UTF8 )
00479 {
00480
00481 ConvertUTF32ToUTF8( ucs, value, length );
00482 }
00483 else
00484 {
00485 *value = (char)ucs;
00486 *length = 1;
00487 }
00488 return p + delta + 1;
00489 }
00490
00491
00492 for( i=0; i<NUM_ENTITY; ++i )
00493 {
00494 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00495 {
00496 assert( strlen( entity[i].str ) == entity[i].strLength );
00497 *value = entity[i].chr;
00498 *length = 1;
00499 return ( p + entity[i].strLength );
00500 }
00501 }
00502
00503
00504 *value = *p;
00505 return p+1;
00506 }
00507
00508
00509 bool TiXmlBase::StringEqual( const char* p,
00510 const char* tag,
00511 bool ignoreCase,
00512 TiXmlEncoding encoding )
00513 {
00514 assert( p );
00515 assert( tag );
00516 if ( !p || !*p )
00517 {
00518 assert( 0 );
00519 return false;
00520 }
00521
00522 const char* q = p;
00523
00524 if ( ignoreCase )
00525 {
00526 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00527 {
00528 ++q;
00529 ++tag;
00530 }
00531
00532 if ( *tag == 0 )
00533 return true;
00534 }
00535 else
00536 {
00537 while ( *q && *tag && *q == *tag )
00538 {
00539 ++q;
00540 ++tag;
00541 }
00542
00543 if ( *tag == 0 )
00544 return true;
00545 }
00546 return false;
00547 }
00548
00549 const char* TiXmlBase::ReadText( const char* p,
00550 TIXML_STRING * text,
00551 bool trimWhiteSpace,
00552 const char* endTag,
00553 bool caseInsensitive,
00554 TiXmlEncoding encoding )
00555 {
00556 *text = "";
00557 if ( !trimWhiteSpace
00558 || !condenseWhiteSpace )
00559 {
00560
00561 while ( p && *p
00562 && !StringEqual( p, endTag, caseInsensitive, encoding )
00563 )
00564 {
00565 int len;
00566 char cArr[4] = { 0, 0, 0, 0 };
00567 p = GetChar( p, cArr, &len, encoding );
00568 text->append( cArr, len );
00569 }
00570 }
00571 else
00572 {
00573 bool whitespace = false;
00574
00575
00576 p = SkipWhiteSpace( p, encoding );
00577 while ( p && *p
00578 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00579 {
00580 if ( *p == '\r' || *p == '\n' )
00581 {
00582 whitespace = true;
00583 ++p;
00584 }
00585 else if ( IsWhiteSpace( *p ) )
00586 {
00587 whitespace = true;
00588 ++p;
00589 }
00590 else
00591 {
00592
00593
00594 if ( whitespace )
00595 {
00596 (*text) += ' ';
00597 whitespace = false;
00598 }
00599 int len;
00600 char cArr[4] = { 0, 0, 0, 0 };
00601 p = GetChar( p, cArr, &len, encoding );
00602 if ( len == 1 )
00603 (*text) += cArr[0];
00604 else
00605 text->append( cArr, len );
00606 }
00607 }
00608 }
00609 return p + strlen( endTag );
00610 }
00611
00612 #ifdef TIXML_USE_STL
00613
00614 void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
00615 {
00616
00617
00618
00619
00620
00621
00622
00623 if ( !StreamTo( in, '<', tag ) )
00624 {
00625 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00626 return;
00627 }
00628
00629 while ( in->good() )
00630 {
00631 int tagIndex = (int) tag->length();
00632 while ( in->good() && in->peek() != '>' )
00633 {
00634 int c = in->get();
00635 if ( c <= 0 )
00636 {
00637 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00638 break;
00639 }
00640 (*tag) += (char) c;
00641 }
00642
00643 if ( in->good() )
00644 {
00645
00646
00647
00648 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00649
00650 if ( node )
00651 {
00652 node->StreamIn( in, tag );
00653 bool isElement = node->ToElement() != 0;
00654 delete node;
00655 node = 0;
00656
00657
00658
00659 if ( isElement )
00660 {
00661 return;
00662 }
00663 }
00664 else
00665 {
00666 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00667 return;
00668 }
00669 }
00670 }
00671
00672 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00673 }
00674
00675 #endif
00676
00677 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00678 {
00679 ClearError();
00680
00681
00682
00683
00684 if ( !p || !*p )
00685 {
00686 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00687 return 0;
00688 }
00689
00690
00691
00692
00693 location.Clear();
00694 if ( prevData )
00695 {
00696 location.row = prevData->cursor.row;
00697 location.col = prevData->cursor.col;
00698 }
00699 else
00700 {
00701 location.row = 0;
00702 location.col = 0;
00703 }
00704 TiXmlParsingData data( p, TabSize(), location.row, location.col );
00705 location = data.Cursor();
00706
00707 if ( encoding == TIXML_ENCODING_UNKNOWN )
00708 {
00709
00710 if ( *(p+0) && *(p+0) == TIXML_UTF_LEAD_0
00711 && *(p+1) && *(p+1) == TIXML_UTF_LEAD_1
00712 && *(p+2) && *(p+2) == TIXML_UTF_LEAD_2 )
00713 {
00714 encoding = TIXML_ENCODING_UTF8;
00715 }
00716 }
00717
00718 p = SkipWhiteSpace( p, encoding );
00719 if ( !p )
00720 {
00721 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00722 return 0;
00723 }
00724
00725 while ( p && *p )
00726 {
00727 TiXmlNode* node = Identify( p, encoding );
00728 if ( node )
00729 {
00730 p = node->Parse( p, &data, encoding );
00731 LinkEndChild( node );
00732 }
00733 else
00734 {
00735 break;
00736 }
00737
00738
00739 if ( encoding == TIXML_ENCODING_UNKNOWN
00740 && node->ToDeclaration() )
00741 {
00742 TiXmlDeclaration* dec = node->ToDeclaration();
00743 const char* enc = dec->Encoding();
00744 assert( enc );
00745
00746 if ( *enc == 0 )
00747 encoding = TIXML_ENCODING_UTF8;
00748 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00749 encoding = TIXML_ENCODING_UTF8;
00750 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00751 encoding = TIXML_ENCODING_UTF8;
00752 else
00753 encoding = TIXML_ENCODING_LEGACY;
00754 }
00755
00756 p = SkipWhiteSpace( p, encoding );
00757 }
00758
00759
00760 if ( !firstChild ) {
00761 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00762 return 0;
00763 }
00764
00765
00766 return p;
00767 }
00768
00769 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00770 {
00771
00772 if ( error )
00773 return;
00774
00775 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00776 error = true;
00777 errorId = err;
00778 errorDesc = errorString[ errorId ];
00779
00780 errorLocation.Clear();
00781 if ( pError && data )
00782 {
00783
00784 data->Stamp( pError, encoding );
00785 errorLocation = data->Cursor();
00786 }
00787 }
00788
00789
00790 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00791 {
00792 TiXmlNode* returnNode = 0;
00793
00794 p = SkipWhiteSpace( p, encoding );
00795 if( !p || !*p || *p != '<' )
00796 {
00797 return 0;
00798 }
00799
00800 TiXmlDocument* doc = GetDocument();
00801 p = SkipWhiteSpace( p, encoding );
00802
00803 if ( !p || !*p )
00804 {
00805 return 0;
00806 }
00807
00808
00809
00810
00811
00812
00813
00814
00815 const char* xmlHeader = { "<?xml" };
00816 const char* commentHeader = { "<!--" };
00817 const char* dtdHeader = { "<!" };
00818
00819 if ( StringEqual( p, xmlHeader, true, encoding ) )
00820 {
00821 #ifdef DEBUG_PARSER
00822 TIXML_LOG( "XML parsing Declaration\n" );
00823 #endif
00824 returnNode = new TiXmlDeclaration();
00825 }
00826 else if ( StringEqual( p, commentHeader, false, encoding ) )
00827 {
00828 #ifdef DEBUG_PARSER
00829 TIXML_LOG( "XML parsing Comment\n" );
00830 #endif
00831 returnNode = new TiXmlComment();
00832 }
00833 else if ( StringEqual( p, dtdHeader, false, encoding ) )
00834 {
00835 #ifdef DEBUG_PARSER
00836 TIXML_LOG( "XML parsing Unknown(1)\n" );
00837 #endif
00838 returnNode = new TiXmlUnknown();
00839 }
00840 else if ( IsAlpha( *(p+1), encoding )
00841 || *(p+1) == '_' )
00842 {
00843 #ifdef DEBUG_PARSER
00844 TIXML_LOG( "XML parsing Element\n" );
00845 #endif
00846 returnNode = new TiXmlElement( "" );
00847 }
00848 else
00849 {
00850 #ifdef DEBUG_PARSER
00851 TIXML_LOG( "XML parsing Unknown(2)\n" );
00852 #endif
00853 returnNode = new TiXmlUnknown();
00854 }
00855
00856 if ( returnNode )
00857 {
00858
00859 returnNode->parent = this;
00860 }
00861 else
00862 {
00863 if ( doc )
00864 doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
00865 }
00866 return returnNode;
00867 }
00868
00869 #ifdef TIXML_USE_STL
00870
00871 void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
00872 {
00873
00874
00875 while( in->good() )
00876 {
00877 int c = in->get();
00878 if ( c <= 0 )
00879 {
00880 TiXmlDocument* document = GetDocument();
00881 if ( document )
00882 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00883 return;
00884 }
00885 (*tag) += (char) c ;
00886
00887 if ( c == '>' )
00888 break;
00889 }
00890
00891 if ( tag->length() < 3 ) return;
00892
00893
00894
00895
00896 if ( tag->at( tag->length() - 1 ) == '>'
00897 && tag->at( tag->length() - 2 ) == '/' )
00898 {
00899
00900 return;
00901 }
00902 else if ( tag->at( tag->length() - 1 ) == '>' )
00903 {
00904
00905
00906
00907
00908 for ( ;; )
00909 {
00910 StreamWhiteSpace( in, tag );
00911
00912
00913 if ( in->good() && in->peek() != '<' )
00914 {
00915
00916 TiXmlText text( "" );
00917 text.StreamIn( in, tag );
00918
00919
00920
00921 continue;
00922 }
00923
00924
00925
00926 if ( !in->good() ) return;
00927 assert( in->peek() == '<' );
00928 int tagIndex = tag->length();
00929
00930 bool closingTag = false;
00931 bool firstCharFound = false;
00932
00933 for( ;; )
00934 {
00935 if ( !in->good() )
00936 return;
00937
00938 int c = in->peek();
00939 if ( c <= 0 )
00940 {
00941 TiXmlDocument* document = GetDocument();
00942 if ( document )
00943 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00944 return;
00945 }
00946
00947 if ( c == '>' )
00948 break;
00949
00950 *tag += (char) c;
00951 in->get();
00952
00953 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
00954 {
00955 firstCharFound = true;
00956 if ( c == '/' )
00957 closingTag = true;
00958 }
00959 }
00960
00961
00962 if ( closingTag )
00963 {
00964 if ( !in->good() )
00965 return;
00966
00967 int c = in->get();
00968 if ( c <= 0 )
00969 {
00970 TiXmlDocument* document = GetDocument();
00971 if ( document )
00972 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00973 return;
00974 }
00975 assert( c == '>' );
00976 *tag += (char) c;
00977
00978
00979 return;
00980 }
00981 else
00982 {
00983
00984 const char* tagloc = tag->c_str() + tagIndex;
00985 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
00986 if ( !node )
00987 return;
00988 node->StreamIn( in, tag );
00989 delete node;
00990 node = 0;
00991
00992
00993 }
00994 }
00995 }
00996 }
00997 #endif
00998
00999 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01000 {
01001 p = SkipWhiteSpace( p, encoding );
01002 TiXmlDocument* document = GetDocument();
01003
01004 if ( !p || !*p )
01005 {
01006 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01007 return 0;
01008 }
01009
01010
01011 if ( data )
01012 {
01013 data->Stamp( p, encoding );
01014 location = data->Cursor();
01015 }
01016
01017 if ( *p != '<' )
01018 {
01019 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01020 return 0;
01021 }
01022
01023 p = SkipWhiteSpace( p+1, encoding );
01024
01025
01026 const char* pErr = p;
01027
01028 p = ReadName( p, &value, encoding );
01029 if ( !p || !*p )
01030 {
01031 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01032 return 0;
01033 }
01034
01035 TIXML_STRING endTag ("</");
01036 endTag += value;
01037 endTag += ">";
01038
01039
01040
01041 while ( p && *p )
01042 {
01043 pErr = p;
01044 p = SkipWhiteSpace( p, encoding );
01045 if ( !p || !*p )
01046 {
01047 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01048 return 0;
01049 }
01050 if ( *p == '/' )
01051 {
01052 ++p;
01053
01054 if ( *p != '>' )
01055 {
01056 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
01057 return 0;
01058 }
01059 return (p+1);
01060 }
01061 else if ( *p == '>' )
01062 {
01063
01064
01065
01066 ++p;
01067 p = ReadValue( p, data, encoding );
01068 if ( !p || !*p )
01069 return 0;
01070
01071
01072 if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01073 {
01074 p += endTag.length();
01075 return p;
01076 }
01077 else
01078 {
01079 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01080 return 0;
01081 }
01082 }
01083 else
01084 {
01085
01086 TiXmlAttribute* attrib = new TiXmlAttribute();
01087 if ( !attrib )
01088 {
01089 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
01090 return 0;
01091 }
01092
01093 attrib->SetDocument( document );
01094 const char* pErr = p;
01095 p = attrib->Parse( p, data, encoding );
01096
01097 if ( !p || !*p )
01098 {
01099 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01100 delete attrib;
01101 return 0;
01102 }
01103
01104
01105 TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
01106 if ( node )
01107 {
01108 node->SetValue( attrib->Value() );
01109 delete attrib;
01110 return 0;
01111 }
01112
01113 attributeSet.Add( attrib );
01114 }
01115 }
01116 return p;
01117 }
01118
01119
01120 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01121 {
01122 TiXmlDocument* document = GetDocument();
01123
01124 const char* pWithWhiteSpace = p;
01125
01126 p = SkipWhiteSpace( p, encoding );
01127 while ( p && *p )
01128 {
01129 if ( *p != '<' )
01130 {
01131
01132 TiXmlText* textNode = new TiXmlText( "" );
01133
01134 if ( !textNode )
01135 {
01136 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
01137 return 0;
01138 }
01139
01140 if ( TiXmlBase::IsWhiteSpaceCondensed() )
01141 {
01142 p = textNode->Parse( p, data, encoding );
01143 }
01144 else
01145 {
01146
01147
01148 p = textNode->Parse( pWithWhiteSpace, data, encoding );
01149 }
01150
01151 if ( !textNode->Blank() )
01152 LinkEndChild( textNode );
01153 else
01154 delete textNode;
01155 }
01156 else
01157 {
01158
01159
01160 if ( StringEqual( p, "</", false, encoding ) )
01161 {
01162 return p;
01163 }
01164 else
01165 {
01166 TiXmlNode* node = Identify( p, encoding );
01167 if ( node )
01168 {
01169 p = node->Parse( p, data, encoding );
01170 LinkEndChild( node );
01171 }
01172 else
01173 {
01174 return 0;
01175 }
01176 }
01177 }
01178 p = SkipWhiteSpace( p, encoding );
01179 }
01180
01181 if ( !p )
01182 {
01183 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01184 }
01185 return p;
01186 }
01187
01188
01189 #ifdef TIXML_USE_STL
01190 void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01191 {
01192 while ( in->good() )
01193 {
01194 int c = in->get();
01195 if ( c <= 0 )
01196 {
01197 TiXmlDocument* document = GetDocument();
01198 if ( document )
01199 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01200 return;
01201 }
01202 (*tag) += (char) c;
01203
01204 if ( c == '>' )
01205 {
01206
01207 return;
01208 }
01209 }
01210 }
01211 #endif
01212
01213
01214 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01215 {
01216 TiXmlDocument* document = GetDocument();
01217 p = SkipWhiteSpace( p, encoding );
01218
01219
01220 if ( data )
01221 {
01222 data->Stamp( p, encoding );
01223 location = data->Cursor();
01224 }
01225 if ( !p || !*p || *p != '<' )
01226 {
01227 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01228 return 0;
01229 }
01230 ++p;
01231 value = "";
01232
01233 while ( p && *p && *p != '>' )
01234 {
01235 value += *p;
01236 ++p;
01237 }
01238
01239 if ( !p )
01240 {
01241 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01242 }
01243 if ( *p == '>' )
01244 return p+1;
01245 return p;
01246 }
01247
01248 #ifdef TIXML_USE_STL
01249 void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01250 {
01251 while ( in->good() )
01252 {
01253 int c = in->get();
01254 if ( c <= 0 )
01255 {
01256 TiXmlDocument* document = GetDocument();
01257 if ( document )
01258 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01259 return;
01260 }
01261
01262 (*tag) += (char) c;
01263
01264 if ( c == '>'
01265 && tag->at( tag->length() - 2 ) == '-'
01266 && tag->at( tag->length() - 3 ) == '-' )
01267 {
01268
01269 return;
01270 }
01271 }
01272 }
01273 #endif
01274
01275
01276 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01277 {
01278 TiXmlDocument* document = GetDocument();
01279 value = "";
01280
01281 p = SkipWhiteSpace( p, encoding );
01282
01283
01284 if ( data )
01285 {
01286 data->Stamp( p, encoding );
01287 location = data->Cursor();
01288 }
01289 const char* startTag = "<!--";
01290 const char* endTag = "-->";
01291
01292 if ( !StringEqual( p, startTag, false, encoding ) )
01293 {
01294 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01295 return 0;
01296 }
01297 p += strlen( startTag );
01298 p = ReadText( p, &value, false, endTag, false, encoding );
01299 return p;
01300 }
01301
01302
01303 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01304 {
01305 p = SkipWhiteSpace( p, encoding );
01306 if ( !p || !*p ) return 0;
01307
01308 int tabsize = 4;
01309 if ( document )
01310 tabsize = document->TabSize();
01311
01312
01313 if ( data )
01314 {
01315 data->Stamp( p, encoding );
01316 location = data->Cursor();
01317 }
01318
01319 const char* pErr = p;
01320 p = ReadName( p, &name, encoding );
01321 if ( !p || !*p )
01322 {
01323 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01324 return 0;
01325 }
01326 p = SkipWhiteSpace( p, encoding );
01327 if ( !p || !*p || *p != '=' )
01328 {
01329 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01330 return 0;
01331 }
01332
01333 ++p;
01334 p = SkipWhiteSpace( p, encoding );
01335 if ( !p || !*p )
01336 {
01337 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01338 return 0;
01339 }
01340
01341 const char* end;
01342
01343 if ( *p == '\'' )
01344 {
01345 ++p;
01346 end = "\'";
01347 p = ReadText( p, &value, false, end, false, encoding );
01348 }
01349 else if ( *p == '"' )
01350 {
01351 ++p;
01352 end = "\"";
01353 p = ReadText( p, &value, false, end, false, encoding );
01354 }
01355 else
01356 {
01357
01358
01359
01360 value = "";
01361 while ( p && *p
01362 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'
01363 && *p != '/' && *p != '>' )
01364 {
01365 value += *p;
01366 ++p;
01367 }
01368 }
01369 return p;
01370 }
01371
01372 #ifdef TIXML_USE_STL
01373 void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01374 {
01375 while ( in->good() )
01376 {
01377 int c = in->peek();
01378 if ( c == '<' )
01379 return;
01380 if ( c <= 0 )
01381 {
01382 TiXmlDocument* document = GetDocument();
01383 if ( document )
01384 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01385 return;
01386 }
01387
01388 (*tag) += (char) c;
01389 in->get();
01390 }
01391 }
01392 #endif
01393
01394 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01395 {
01396 value = "";
01397
01398 if ( data )
01399 {
01400 data->Stamp( p, encoding );
01401 location = data->Cursor();
01402 }
01403 bool ignoreWhite = true;
01404
01405 const char* end = "<";
01406 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01407 if ( p )
01408 return p-1;
01409 return 0;
01410 }
01411
01412 #ifdef TIXML_USE_STL
01413 void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01414 {
01415 while ( in->good() )
01416 {
01417 int c = in->get();
01418 if ( c <= 0 )
01419 {
01420 TiXmlDocument* document = GetDocument();
01421 if ( document )
01422 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01423 return;
01424 }
01425 (*tag) += (char) c;
01426
01427 if ( c == '>' )
01428 {
01429
01430 return;
01431 }
01432 }
01433 }
01434 #endif
01435
01436 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01437 {
01438 p = SkipWhiteSpace( p, _encoding );
01439
01440
01441 TiXmlDocument* document = GetDocument();
01442 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01443 {
01444 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01445 return 0;
01446 }
01447
01448 if ( data )
01449 {
01450 data->Stamp( p, _encoding );
01451 location = data->Cursor();
01452 }
01453 p += 5;
01454
01455 version = "";
01456 encoding = "";
01457 standalone = "";
01458
01459 while ( p && *p )
01460 {
01461 if ( *p == '>' )
01462 {
01463 ++p;
01464 return p;
01465 }
01466
01467 p = SkipWhiteSpace( p, _encoding );
01468 if ( StringEqual( p, "version", true, _encoding ) )
01469 {
01470 TiXmlAttribute attrib;
01471 p = attrib.Parse( p, data, _encoding );
01472 version = attrib.Value();
01473 }
01474 else if ( StringEqual( p, "encoding", true, _encoding ) )
01475 {
01476 TiXmlAttribute attrib;
01477 p = attrib.Parse( p, data, _encoding );
01478 encoding = attrib.Value();
01479 }
01480 else if ( StringEqual( p, "standalone", true, _encoding ) )
01481 {
01482 TiXmlAttribute attrib;
01483 p = attrib.Parse( p, data, _encoding );
01484 standalone = attrib.Value();
01485 }
01486 else
01487 {
01488
01489 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01490 ++p;
01491 }
01492 }
01493 return 0;
01494 }
01495
01496 bool TiXmlText::Blank() const
01497 {
01498 for ( unsigned i=0; i<value.length(); i++ )
01499 if ( !IsWhiteSpace( value[i] ) )
01500 return false;
01501 return true;
01502 }
01503