2 www.sourceforge.net/projects/tinyxml
3 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
13 1. The origin of this software must not be misrepresented; you must
14 not claim that you wrote the original software. If you use this
15 software in a product, an acknowledgment in the product documentation
16 would be appreciated but is not required.
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
21 3. This notice may not be removed or altered from any source
30 //#define DEBUG_PARSER
31 #if defined( DEBUG_PARSER )
32 # if defined( DEBUG ) && defined( _MSC_VER )
34 # define TIXML_LOG OutputDebugString
36 # define TIXML_LOG printf
40 // Note tha "PutString" hardcodes the same list. This
41 // is less flexible than it appears. Changing the entries
42 // or order will break putstring.
43 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
48 { """, 6, '\"' },
52 // Bunch of unicode info at:
53 // http://www.unicode.org/faq/utf_bom.html
54 // Including the basic of this table, which determines the #bytes in the
55 // sequence from the lead byte. 1 placed for invalid sequences --
56 // although the result will be junk, pass it through as much as possible.
57 // Beware of the non-characters in UTF-8:
58 // ef bb bf (Microsoft "lead bytes")
62 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
63 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
64 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
66 const int TiXmlBase::utf8ByteTable[256] =
68 // 0 1 2 3 4 5 6 7 8 9 a b c d e f
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
81 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
83 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
84 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
88 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
90 const unsigned long BYTE_MASK = 0xBF;
91 const unsigned long BYTE_MARK = 0x80;
92 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
96 else if ( input < 0x800 )
98 else if ( input < 0x10000 )
100 else if ( input < 0x200000 )
103 { *length = 0; return; } // This code won't covert this correctly anyway.
107 // Scary scary fall throughs.
112 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
116 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
120 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
124 *output = (char)(input | FIRST_BYTE_MARK[*length]);
129 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
131 // This will only work for low-ascii, everything else is assumed to be a valid
132 // letter. I'm not sure this is the best approach, but it is quite tricky trying
133 // to figure out alhabetical vs. not across encoding. So take a very
134 // conservative approach.
136 // if ( encoding == TIXML_ENCODING_UTF8 )
139 return isalpha( anyByte );
141 return 1; // What else to do? The unicode set is huge...get the english ones right.
145 // return isalpha( anyByte );
150 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
152 // This will only work for low-ascii, everything else is assumed to be a valid
153 // letter. I'm not sure this is the best approach, but it is quite tricky trying
154 // to figure out alhabetical vs. not across encoding. So take a very
155 // conservative approach.
157 // if ( encoding == TIXML_ENCODING_UTF8 )
160 return isalnum( anyByte );
162 return 1; // What else to do? The unicode set is huge...get the english ones right.
166 // return isalnum( anyByte );
171 class TiXmlParsingData
173 friend class TiXmlDocument;
175 void Stamp( const char* now, TiXmlEncoding encoding );
177 const TiXmlCursor& Cursor() { return cursor; }
180 // Only used by the document!
181 TiXmlParsingData( const char* start, int _tabsize, int row, int col )
196 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
200 // Do nothing if the tabsize is 0.
206 // Get the current row, column.
207 int row = cursor.row;
208 int col = cursor.col;
209 const char* p = stamp;
214 // Treat p as unsigned, so we have a happy compiler.
215 const unsigned char* pU = (const unsigned char*)p;
217 // Code contributed by Fletcher Dunn: (modified by lee)
220 // We *should* never get here, but in case we do, don't
221 // advance past the terminating null character, ever
225 // bump down to the next line
231 // Check for \r\n sequence, and treat this as a single character
238 // bump down to the next line
245 // Check for \n\r sequence, and treat this as a single
246 // character. (Yes, this bizarre thing does occur still
247 // on some arcane platforms...)
257 // Skip to next tab stop
258 col = (col / tabsize + 1) * tabsize;
261 case TIXML_UTF_LEAD_0:
262 if ( encoding == TIXML_ENCODING_UTF8 )
264 if ( *(p+1) && *(p+2) )
266 // In these cases, don't advance the column. These are
268 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
270 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
272 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
275 { p +=3; ++col; } // A normal character.
286 if ( encoding == TIXML_ENCODING_UTF8 )
288 // Eat the 1 to 4 byte utf8 character.
289 int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
291 step = 1; // Error case from bad encoding, but handle gracefully.
294 // Just advance one column, of course.
307 assert( cursor.row >= -1 );
308 assert( cursor.col >= -1 );
314 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
320 if ( encoding == TIXML_ENCODING_UTF8 )
324 const unsigned char* pU = (const unsigned char*)p;
326 // Skip the stupid Microsoft UTF-8 Byte order marks
327 if ( *(pU+0)==TIXML_UTF_LEAD_0
328 && *(pU+1)==TIXML_UTF_LEAD_1
329 && *(pU+2)==TIXML_UTF_LEAD_2 )
334 else if(*(pU+0)==TIXML_UTF_LEAD_0
341 else if(*(pU+0)==TIXML_UTF_LEAD_0
349 if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) // Still using old rules for white space.
357 while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
365 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
369 if ( !in->good() ) return false;
372 // At this scope, we can't get to a document. So fail silently.
373 if ( !IsWhiteSpace( c ) || c <= 0 )
376 *tag += (char) in->get();
380 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
382 //assert( character > 0 && character < 128 ); // else it won't work in utf-8
386 if ( c == character )
388 if ( c <= 0 ) // Silent failure: can't get document at this scope
398 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
399 // "assign" optimization removes over 10% of the execution time.
401 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
403 // Oddly, not supported on some comilers,
409 // Names start with letters or underscores.
410 // Of course, in unicode, tinyxml has no idea what a letter *is*. The
411 // algorithm is generous.
413 // After that, they can be letters, underscores, numbers,
414 // hyphens, or colons. (Colons are valid ony for namespaces,
415 // but tinyxml can't tell namespaces from names.)
417 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
419 const char* start = p;
421 && ( IsAlphaNum( (unsigned char ) *p, encoding )
427 //(*name) += *p; // expensive
431 name->assign( start, p-start );
438 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
440 // Presume an entity, and pull it out.
445 if ( *(p+1) && *(p+1) == '#' && *(p+2) )
447 unsigned long ucs = 0;
454 if ( !*(p+3) ) return 0;
457 q = strchr( q, ';' );
459 if ( !q || !*q ) return 0;
466 if ( *q >= '0' && *q <= '9' )
467 ucs += mult * (*q - '0');
468 else if ( *q >= 'a' && *q <= 'f' )
469 ucs += mult * (*q - 'a' + 10);
470 else if ( *q >= 'A' && *q <= 'F' )
471 ucs += mult * (*q - 'A' + 10 );
481 if ( !*(p+2) ) return 0;
484 q = strchr( q, ';' );
486 if ( !q || !*q ) return 0;
493 if ( *q >= '0' && *q <= '9' )
494 ucs += mult * (*q - '0');
501 if ( encoding == TIXML_ENCODING_UTF8 )
503 // convert the UCS to UTF-8
504 ConvertUTF32ToUTF8( ucs, value, length );
511 return p + delta + 1;
514 // Now try to match it.
515 for( i=0; i<NUM_ENTITY; ++i )
517 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
519 assert( strlen( entity[i].str ) == entity[i].strLength );
520 *value = entity[i].chr;
522 return ( p + entity[i].strLength );
526 // So it wasn't an entity, its unrecognized, or something like that.
527 *value = *p; // Don't put back the last one, since we return it!
528 //*length = 1; // Leave unrecognized entities - this doesn't really work.
529 // Just writes strange XML.
534 bool TiXmlBase::StringEqual( const char* p,
537 TiXmlEncoding encoding )
551 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
562 while ( *q && *tag && *q == *tag )
568 if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
574 const char* TiXmlBase::ReadText( const char* p,
578 bool caseInsensitive,
579 TiXmlEncoding encoding )
582 if ( !trimWhiteSpace // certain tags always keep whitespace
583 || !condenseWhiteSpace ) // if true, whitespace is always kept
585 // Keep all the white space.
587 && !StringEqual( p, endTag, caseInsensitive, encoding )
591 char cArr[4] = { 0, 0, 0, 0 };
592 p = GetChar( p, cArr, &len, encoding );
593 text->append( cArr, len );
598 bool whitespace = false;
600 // Remove leading white space:
601 p = SkipWhiteSpace( p, encoding );
603 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
605 if ( *p == '\r' || *p == '\n' )
610 else if ( IsWhiteSpace( *p ) )
617 // If we've found whitespace, add it before the
618 // new character. Any whitespace just becomes a space.
625 char cArr[4] = { 0, 0, 0, 0 };
626 p = GetChar( p, cArr, &len, encoding );
628 (*text) += cArr[0]; // more efficient
630 text->append( cArr, len );
635 p += strlen( endTag );
641 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
643 // The basic issue with a document is that we don't know what we're
644 // streaming. Read something presumed to be a tag (and hope), then
645 // identify it, and call the appropriate stream method on the tag.
647 // This "pre-streaming" will never read the closing ">" so the
648 // sub-tag can orient itself.
650 if ( !StreamTo( in, '<', tag ) )
652 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
658 int tagIndex = (int) tag->length();
659 while ( in->good() && in->peek() != '>' )
664 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
672 // We now have something we presume to be a node of
673 // some sort. Identify it, and call the node to
674 // continue streaming.
675 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
679 node->StreamIn( in, tag );
680 bool isElement = node->ToElement() != 0;
684 // If this is the root element, we're done. Parsing will be
685 // done by the >> operator.
693 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
698 // We should have returned sooner.
699 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
704 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
708 // Parse away, at the document level. Since a document
709 // contains nothing but other tags, most of what happens
710 // here is skipping white space.
713 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
717 // Note that, for a document, this needs to come
718 // before the while space skip, so that parsing
719 // starts from the pointer we are given.
723 location.row = prevData->cursor.row;
724 location.col = prevData->cursor.col;
731 TiXmlParsingData data( p, TabSize(), location.row, location.col );
732 location = data.Cursor();
734 if ( encoding == TIXML_ENCODING_UNKNOWN )
736 // Check for the Microsoft UTF-8 lead bytes.
737 const unsigned char* pU = (const unsigned char*)p;
738 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
739 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
740 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
742 encoding = TIXML_ENCODING_UTF8;
743 useMicrosoftBOM = true;
747 p = SkipWhiteSpace( p, encoding );
750 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
756 TiXmlNode* node = Identify( p, encoding );
759 p = node->Parse( p, &data, encoding );
760 LinkEndChild( node );
767 // Did we get encoding info?
768 if ( encoding == TIXML_ENCODING_UNKNOWN
769 && node->ToDeclaration() )
771 TiXmlDeclaration* dec = node->ToDeclaration();
772 const char* enc = dec->Encoding();
776 encoding = TIXML_ENCODING_UTF8;
777 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
778 encoding = TIXML_ENCODING_UTF8;
779 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
780 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
782 encoding = TIXML_ENCODING_LEGACY;
785 p = SkipWhiteSpace( p, encoding );
790 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
798 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
800 // The first error in a chain is more accurate - don't set again!
804 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
807 errorDesc = errorString[ errorId ];
809 errorLocation.Clear();
810 if ( pError && data )
812 data->Stamp( pError, encoding );
813 errorLocation = data->Cursor();
818 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
820 TiXmlNode* returnNode = 0;
822 p = SkipWhiteSpace( p, encoding );
823 if( !p || !*p || *p != '<' )
828 TiXmlDocument* doc = GetDocument();
829 p = SkipWhiteSpace( p, encoding );
836 // What is this thing?
837 // - Elements start with a letter or underscore, but xml is reserved.
839 // - Decleration: <?xml
840 // - Everthing else is unknown to tinyxml.
843 const char* xmlHeader = { "<?xml" };
844 const char* commentHeader = { "<!--" };
845 const char* dtdHeader = { "<!" };
846 const char* cdataHeader = { "<![CDATA[" };
848 if ( StringEqual( p, xmlHeader, true, encoding ) )
851 TIXML_LOG( "XML parsing Declaration\n" );
853 returnNode = new TiXmlDeclaration();
855 else if ( StringEqual( p, commentHeader, false, encoding ) )
858 TIXML_LOG( "XML parsing Comment\n" );
860 returnNode = new TiXmlComment();
862 else if ( StringEqual( p, cdataHeader, false, encoding ) )
865 TIXML_LOG( "XML parsing CDATA\n" );
867 TiXmlText* text = new TiXmlText( "" );
868 text->SetCDATA( true );
871 else if ( StringEqual( p, dtdHeader, false, encoding ) )
874 TIXML_LOG( "XML parsing Unknown(1)\n" );
876 returnNode = new TiXmlUnknown();
878 else if ( IsAlpha( *(p+1), encoding )
882 TIXML_LOG( "XML parsing Element\n" );
884 returnNode = new TiXmlElement( "" );
889 TIXML_LOG( "XML parsing Unknown(2)\n" );
891 returnNode = new TiXmlUnknown();
896 // Set the parent, so it can report errors
897 returnNode->parent = this;
902 doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
909 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
911 // We're called with some amount of pre-parsing. That is, some of "this"
912 // element is in "tag". Go ahead and stream to the closing ">"
918 TiXmlDocument* document = GetDocument();
920 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
929 if ( tag->length() < 3 ) return;
931 // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
932 // If not, identify and stream.
934 if ( tag->at( tag->length() - 1 ) == '>'
935 && tag->at( tag->length() - 2 ) == '/' )
940 else if ( tag->at( tag->length() - 1 ) == '>' )
942 // There is more. Could be:
944 // cdata text (which looks like another node)
949 StreamWhiteSpace( in, tag );
952 if ( in->good() && in->peek() != '<' )
955 TiXmlText text( "" );
956 text.StreamIn( in, tag );
958 // What follows text is a closing tag or another node.
959 // Go around again and figure it out.
963 // We now have either a closing tag...or another node.
964 // We should be at a "<", regardless.
965 if ( !in->good() ) return;
966 assert( in->peek() == '<' );
967 int tagIndex = (int) tag->length();
969 bool closingTag = false;
970 bool firstCharFound = false;
980 TiXmlDocument* document = GetDocument();
982 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
992 // Early out if we find the CDATA id.
993 if ( c == '[' && tag->size() >= 9 )
995 size_t len = tag->size();
996 const char* start = tag->c_str() + len - 9;
997 if ( strcmp( start, "<![CDATA[" ) == 0 ) {
998 assert( !closingTag );
1003 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
1005 firstCharFound = true;
1010 // If it was a closing tag, then read in the closing '>' to clean up the input stream.
1011 // If it was not, the streaming will be done by the tag.
1020 TiXmlDocument* document = GetDocument();
1022 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1028 // We are done, once we've found our closing tag.
1033 // If not a closing tag, id it, and stream.
1034 const char* tagloc = tag->c_str() + tagIndex;
1035 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1038 node->StreamIn( in, tag );
1042 // No return: go around from the beginning: text, closing tag, or node.
1049 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1051 p = SkipWhiteSpace( p, encoding );
1052 TiXmlDocument* document = GetDocument();
1056 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1062 data->Stamp( p, encoding );
1063 location = data->Cursor();
1068 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1072 p = SkipWhiteSpace( p+1, encoding );
1075 const char* pErr = p;
1077 p = ReadName( p, &value, encoding );
1080 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1084 TIXML_STRING endTag ("</");
1088 // Check for and read attributes. Also look for an empty
1089 // tag or an end tag.
1093 p = SkipWhiteSpace( p, encoding );
1096 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1105 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1110 else if ( *p == '>' )
1112 // Done with attributes (if there were any.)
1113 // Read the value -- which can include other
1114 // elements -- read the end tag, and return.
1116 p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
1118 // We were looking for the end tag, but found nothing.
1119 // Fix for [ 1663758 ] Failure to report error on bad XML
1120 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1124 // We should find the end tag now
1125 if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1127 p += endTag.length();
1132 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1138 // Try to read an attribute:
1139 TiXmlAttribute* attrib = new TiXmlAttribute();
1142 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
1146 attrib->SetDocument( document );
1148 p = attrib->Parse( p, data, encoding );
1152 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1157 // Handle the strange case of double attributes:
1158 #ifdef TIXML_USE_STL
1159 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
1161 TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
1165 node->SetValue( attrib->Value() );
1170 attributeSet.Add( attrib );
1177 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1179 TiXmlDocument* document = GetDocument();
1181 // Read in text and elements in any order.
1182 const char* pWithWhiteSpace = p;
1183 p = SkipWhiteSpace( p, encoding );
1189 // Take what we have, make a text element.
1190 TiXmlText* textNode = new TiXmlText( "" );
1194 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
1198 if ( TiXmlBase::IsWhiteSpaceCondensed() )
1200 p = textNode->Parse( p, data, encoding );
1204 // Special case: we want to keep the white space
1205 // so that leading spaces aren't removed.
1206 p = textNode->Parse( pWithWhiteSpace, data, encoding );
1209 if ( !textNode->Blank() )
1210 LinkEndChild( textNode );
1217 // Have we hit a new element or an end tag? This could also be
1218 // a TiXmlText in the "CDATA" style.
1219 if ( StringEqual( p, "</", false, encoding ) )
1225 TiXmlNode* node = Identify( p, encoding );
1228 p = node->Parse( p, data, encoding );
1229 LinkEndChild( node );
1237 pWithWhiteSpace = p;
1238 p = SkipWhiteSpace( p, encoding );
1243 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1249 #ifdef TIXML_USE_STL
1250 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
1252 while ( in->good() )
1257 TiXmlDocument* document = GetDocument();
1259 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1274 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1276 TiXmlDocument* document = GetDocument();
1277 p = SkipWhiteSpace( p, encoding );
1281 data->Stamp( p, encoding );
1282 location = data->Cursor();
1284 if ( !p || !*p || *p != '<' )
1286 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1292 while ( p && *p && *p != '>' )
1300 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1307 #ifdef TIXML_USE_STL
1308 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
1310 while ( in->good() )
1315 TiXmlDocument* document = GetDocument();
1317 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1324 && tag->at( tag->length() - 2 ) == '-'
1325 && tag->at( tag->length() - 3 ) == '-' )
1335 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1337 TiXmlDocument* document = GetDocument();
1340 p = SkipWhiteSpace( p, encoding );
1344 data->Stamp( p, encoding );
1345 location = data->Cursor();
1347 const char* startTag = "<!--";
1348 const char* endTag = "-->";
1350 if ( !StringEqual( p, startTag, false, encoding ) )
1352 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1355 p += strlen( startTag );
1357 // [ 1475201 ] TinyXML parses entities in comments
1358 // Oops - ReadText doesn't work, because we don't want to parse the entities.
1359 // p = ReadText( p, &value, false, endTag, false, encoding );
1361 // from the XML spec:
1363 [Definition: Comments may appear anywhere in a document outside other markup; in addition,
1364 they may appear within the document type declaration at places allowed by the grammar.
1365 They are not part of the document's character data; an XML processor MAY, but need not,
1366 make it possible for an application to retrieve the text of comments. For compatibility,
1367 the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
1368 references MUST NOT be recognized within comments.
1370 An example of a comment:
1372 <!-- declarations for <head> & <body> -->
1376 // Keep all the white space.
1377 while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
1379 value.append( p, 1 );
1383 p += strlen( endTag );
1389 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1391 p = SkipWhiteSpace( p, encoding );
1392 if ( !p || !*p ) return 0;
1396 // tabsize = document->TabSize();
1400 data->Stamp( p, encoding );
1401 location = data->Cursor();
1403 // Read the name, the '=' and the value.
1404 const char* pErr = p;
1405 p = ReadName( p, &name, encoding );
1408 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1411 p = SkipWhiteSpace( p, encoding );
1412 if ( !p || !*p || *p != '=' )
1414 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1419 p = SkipWhiteSpace( p, encoding );
1422 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1427 const char SINGLE_QUOTE = '\'';
1428 const char DOUBLE_QUOTE = '\"';
1430 if ( *p == SINGLE_QUOTE )
1433 end = "\'"; // single quote in string
1434 p = ReadText( p, &value, false, end, false, encoding );
1436 else if ( *p == DOUBLE_QUOTE )
1439 end = "\""; // double quote in string
1440 p = ReadText( p, &value, false, end, false, encoding );
1444 // All attribute values should be in single or double quotes.
1445 // But this is such a common error that the parser will try
1446 // its best, even without them.
1448 while ( p && *p // existence
1449 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r' // whitespace
1450 && *p != '/' && *p != '>' ) // tag end
1452 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
1453 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
1454 // We did not have an opening quote but seem to have a
1455 // closing one. Give up and throw an error.
1456 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1466 #ifdef TIXML_USE_STL
1467 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
1469 while ( in->good() )
1472 if ( !cdata && (c == '<' ) )
1478 TiXmlDocument* document = GetDocument();
1480 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1485 in->get(); // "commits" the peek made above
1487 if ( cdata && c == '>' && tag->size() >= 3 ) {
1488 size_t len = tag->size();
1489 if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
1490 // terminator of cdata.
1498 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1501 TiXmlDocument* document = GetDocument();
1505 data->Stamp( p, encoding );
1506 location = data->Cursor();
1509 const char* const startTag = "<![CDATA[";
1510 const char* const endTag = "]]>";
1512 if ( cdata || StringEqual( p, startTag, false, encoding ) )
1516 if ( !StringEqual( p, startTag, false, encoding ) )
1518 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1521 p += strlen( startTag );
1523 // Keep all the white space, ignore the encoding, etc.
1525 && !StringEqual( p, endTag, false, encoding )
1533 p = ReadText( p, &dummy, false, endTag, false, encoding );
1538 bool ignoreWhite = true;
1540 const char* end = "<";
1541 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1543 return p-1; // don't truncate the '<'
1548 #ifdef TIXML_USE_STL
1549 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
1551 while ( in->good() )
1556 TiXmlDocument* document = GetDocument();
1558 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1572 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1574 p = SkipWhiteSpace( p, _encoding );
1575 // Find the beginning, find the end, and look for
1576 // the stuff in-between.
1577 TiXmlDocument* document = GetDocument();
1578 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1580 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1585 data->Stamp( p, _encoding );
1586 location = data->Cursor();
1602 p = SkipWhiteSpace( p, _encoding );
1603 if ( StringEqual( p, "version", true, _encoding ) )
1605 TiXmlAttribute attrib;
1606 p = attrib.Parse( p, data, _encoding );
1607 version = attrib.Value();
1609 else if ( StringEqual( p, "encoding", true, _encoding ) )
1611 TiXmlAttribute attrib;
1612 p = attrib.Parse( p, data, _encoding );
1613 encoding = attrib.Value();
1615 else if ( StringEqual( p, "standalone", true, _encoding ) )
1617 TiXmlAttribute attrib;
1618 p = attrib.Parse( p, data, _encoding );
1619 standalone = attrib.Value();
1623 // Read over whatever it is.
1624 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1631 bool TiXmlText::Blank() const
1633 for ( unsigned i=0; i<value.length(); i++ )
1634 if ( !IsWhiteSpace( value[i] ) )