1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 //#include <stdlib.h> 25 //#include <sal/alloca.h> 26 27 #include <boost/scoped_ptr.hpp> 28 29 #include <osl/diagnose.h> 30 #include <rtl/ustrbuf.hxx> 31 32 #include <com/sun/star/lang/DisposedException.hpp> 33 #include <com/sun/star/xml/sax/XFastContextHandler.hpp> 34 #include <com/sun/star/xml/sax/SAXParseException.hpp> 35 #include <com/sun/star/xml/sax/FastToken.hpp> 36 37 #include "fastparser.hxx" 38 39 #include <string.h> 40 41 using ::rtl::OString; 42 using ::rtl::OUString; 43 using ::rtl::OUStringBuffer; 44 using namespace ::std; 45 using namespace ::osl; 46 using namespace ::cppu; 47 using namespace ::com::sun::star::uno; 48 using namespace ::com::sun::star::lang; 49 using namespace ::com::sun::star::xml::sax; 50 //using namespace ::com::sun::star::util; 51 using namespace ::com::sun::star::io; 52 53 namespace sax_fastparser { 54 55 // -------------------------------------------------------------------- 56 57 struct SaxContextImpl 58 { 59 Reference< XFastContextHandler > mxContext; 60 sal_uInt32 mnNamespaceCount; 61 sal_Int32 mnElementToken; 62 OUString maNamespace; 63 OUString maElementName; 64 65 SaxContextImpl() { mnNamespaceCount = 0; mnElementToken = 0; } 66 SaxContextImpl( const SaxContextImplPtr& p ) { mnNamespaceCount = p->mnNamespaceCount; mnElementToken = p->mnElementToken; maNamespace = p->maNamespace; } 67 }; 68 69 // -------------------------------------------------------------------- 70 71 struct NamespaceDefine 72 { 73 OString maPrefix; 74 sal_Int32 mnToken; 75 OUString maNamespaceURL; 76 77 NamespaceDefine( const OString& rPrefix, sal_Int32 nToken, const OUString& rNamespaceURL ) : maPrefix( rPrefix ), mnToken( nToken ), maNamespaceURL( rNamespaceURL ) {} 78 }; 79 80 // -------------------------------------------------------------------- 81 // FastLocatorImpl 82 // -------------------------------------------------------------------- 83 84 class FastSaxParser; 85 86 class FastLocatorImpl : public WeakImplHelper1< XLocator > 87 { 88 public: 89 FastLocatorImpl( FastSaxParser *p ) : mpParser(p) {} 90 91 void dispose() { mpParser = 0; } 92 void checkDispose() throw (RuntimeException) { if( !mpParser ) throw DisposedException(); } 93 94 //XLocator 95 virtual sal_Int32 SAL_CALL getColumnNumber(void) throw (RuntimeException); 96 virtual sal_Int32 SAL_CALL getLineNumber(void) throw (RuntimeException); 97 virtual OUString SAL_CALL getPublicId(void) throw (RuntimeException); 98 virtual OUString SAL_CALL getSystemId(void) throw (RuntimeException); 99 100 private: 101 FastSaxParser *mpParser; 102 }; 103 104 // -------------------------------------------------------------------- 105 // FastSaxParser 106 // -------------------------------------------------------------------- 107 108 //--------------------------------------------- 109 // the implementation part 110 //--------------------------------------------- 111 112 extern "C" { 113 114 static void call_callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts) 115 { 116 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData ); 117 pFastParser->callbackStartElement( name, atts ); 118 } 119 120 static void call_callbackEndElement(void *userData, const XML_Char *name) 121 { 122 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData ); 123 pFastParser->callbackEndElement( name ); 124 } 125 126 static void call_callbackCharacters( void *userData , const XML_Char *s , int nLen ) 127 { 128 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData ); 129 pFastParser->callbackCharacters( s, nLen ); 130 } 131 132 static int call_callbackExternalEntityRef( XML_Parser parser, 133 const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId ) 134 { 135 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( XML_GetUserData( parser ) ); 136 return pFastParser->callbackExternalEntityRef( parser, openEntityNames, base, systemId, publicId ); 137 } 138 139 } // extern "C" 140 141 // -------------------------------------------------------------------- 142 // FastLocatorImpl implementation 143 // -------------------------------------------------------------------- 144 145 sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber(void) throw (RuntimeException) 146 { 147 checkDispose(); 148 return XML_GetCurrentColumnNumber( mpParser->getEntity().mpParser ); 149 } 150 151 // -------------------------------------------------------------------- 152 153 sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber(void) throw (RuntimeException) 154 { 155 checkDispose(); 156 return XML_GetCurrentLineNumber( mpParser->getEntity().mpParser ); 157 } 158 159 // -------------------------------------------------------------------- 160 161 OUString SAL_CALL FastLocatorImpl::getPublicId(void) throw (RuntimeException) 162 { 163 checkDispose(); 164 return mpParser->getEntity().maStructSource.sPublicId; 165 } 166 // -------------------------------------------------------------------- 167 168 OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException) 169 { 170 checkDispose(); 171 return mpParser->getEntity().maStructSource.sSystemId; 172 } 173 174 // -------------------------------------------------------------------- 175 176 ParserData::ParserData() 177 { 178 } 179 180 ParserData::~ParserData() 181 { 182 } 183 184 // -------------------------------------------------------------------- 185 186 Entity::Entity( const ParserData& rData ) : 187 ParserData( rData ) 188 { 189 // performance-Improvment. Reference is needed when calling the startTag callback. 190 // Handing out the same object with every call is allowed (see sax-specification) 191 mxAttributes.set( new FastAttributeList( mxTokenHandler ) ); 192 } 193 194 Entity::~Entity() 195 { 196 } 197 198 // -------------------------------------------------------------------- 199 // FastSaxParser implementation 200 // -------------------------------------------------------------------- 201 202 FastSaxParser::FastSaxParser() 203 { 204 mxDocumentLocator.set( new FastLocatorImpl( this ) ); 205 } 206 207 // -------------------------------------------------------------------- 208 209 FastSaxParser::~FastSaxParser() 210 { 211 if( mxDocumentLocator.is() ) 212 mxDocumentLocator->dispose(); 213 } 214 215 // -------------------------------------------------------------------- 216 217 void FastSaxParser::pushContext() 218 { 219 Entity& rEntity = getEntity(); 220 if( rEntity.maContextStack.empty() ) 221 { 222 rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl ) ); 223 DefineNamespace( OString("xml"), "http://www.w3.org/XML/1998/namespace"); 224 } 225 else 226 { 227 rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl( rEntity.maContextStack.top() ) ) ); 228 } 229 } 230 231 // -------------------------------------------------------------------- 232 233 void FastSaxParser::popContext() 234 { 235 Entity& rEntity = getEntity(); 236 OSL_ENSURE( !rEntity.maContextStack.empty(), "sax::FastSaxParser::popContext(), pop without push?" ); 237 if( !rEntity.maContextStack.empty() ) 238 rEntity.maContextStack.pop(); 239 } 240 241 // -------------------------------------------------------------------- 242 243 void FastSaxParser::DefineNamespace( const OString& rPrefix, const sal_Char* pNamespaceURL ) 244 { 245 Entity& rEntity = getEntity(); 246 OSL_ENSURE( !rEntity.maContextStack.empty(), "sax::FastSaxParser::DefineNamespace(), I need a context!" ); 247 if( !rEntity.maContextStack.empty() ) 248 { 249 sal_uInt32 nOffset = rEntity.maContextStack.top()->mnNamespaceCount++; 250 251 if( rEntity.maNamespaceDefines.size() <= nOffset ) 252 rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 ); 253 254 const OUString aNamespaceURL( pNamespaceURL, strlen( pNamespaceURL ), RTL_TEXTENCODING_UTF8 ); 255 rEntity.maNamespaceDefines[nOffset].reset( new NamespaceDefine( rPrefix, GetNamespaceToken( aNamespaceURL ), aNamespaceURL ) ); 256 } 257 } 258 259 // -------------------------------------------------------------------- 260 261 sal_Int32 FastSaxParser::GetToken( const OString& rToken ) 262 { 263 Sequence< sal_Int8 > aSeq( (sal_Int8*)rToken.getStr(), rToken.getLength() ); 264 265 return getEntity().mxTokenHandler->getTokenFromUTF8( aSeq ); 266 } 267 268 sal_Int32 FastSaxParser::GetToken( const sal_Char* pToken, sal_Int32 nLen /* = 0 */ ) 269 { 270 if( !nLen ) 271 nLen = strlen( pToken ); 272 273 Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen ); 274 275 return getEntity().mxTokenHandler->getTokenFromUTF8( aSeq ); 276 } 277 278 // -------------------------------------------------------------------- 279 280 sal_Int32 FastSaxParser::GetTokenWithPrefix( const OString& rPrefix, const OString& rName ) throw (SAXException) 281 { 282 sal_Int32 nNamespaceToken = FastToken::DONTKNOW; 283 284 Entity& rEntity = getEntity(); 285 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount; 286 while( nNamespace-- ) 287 { 288 if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix ) 289 { 290 nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken; 291 break; 292 } 293 294 if( !nNamespace ) 295 throw SAXException(); // prefix that has no defined namespace url 296 } 297 298 if( nNamespaceToken != FastToken::DONTKNOW ) 299 { 300 sal_Int32 nNameToken = GetToken( rName.getStr(), rName.getLength() ); 301 if( nNameToken != FastToken::DONTKNOW ) 302 return nNamespaceToken | nNameToken; 303 } 304 305 return FastToken::DONTKNOW; 306 } 307 308 sal_Int32 FastSaxParser::GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (SAXException) 309 { 310 sal_Int32 nNamespaceToken = FastToken::DONTKNOW; 311 312 Entity& rEntity = getEntity(); 313 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount; 314 while( nNamespace-- ) 315 { 316 const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix ); 317 if( (rPrefix.getLength() == nPrefixLen) && 318 (strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) ) 319 { 320 nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken; 321 break; 322 } 323 324 if( !nNamespace ) 325 throw SAXException(); // prefix that has no defined namespace url 326 } 327 328 if( nNamespaceToken != FastToken::DONTKNOW ) 329 { 330 sal_Int32 nNameToken = GetToken( pName, nNameLen ); 331 if( nNameToken != FastToken::DONTKNOW ) 332 return nNamespaceToken | nNameToken; 333 } 334 335 return FastToken::DONTKNOW; 336 } 337 338 // -------------------------------------------------------------------- 339 340 sal_Int32 FastSaxParser::GetNamespaceToken( const OUString& rNamespaceURL ) 341 { 342 NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) ); 343 if( aIter != maNamespaceMap.end() ) 344 return (*aIter).second; 345 else 346 return FastToken::DONTKNOW; 347 } 348 349 // -------------------------------------------------------------------- 350 351 OUString FastSaxParser::GetNamespaceURL( const OString& rPrefix ) throw (SAXException) 352 { 353 Entity& rEntity = getEntity(); 354 if( !rEntity.maContextStack.empty() ) 355 { 356 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount; 357 while( nNamespace-- ) 358 if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix ) 359 return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL; 360 } 361 362 throw SAXException(); // prefix that has no defined namespace url 363 } 364 365 OUString FastSaxParser::GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw(SAXException) 366 { 367 Entity& rEntity = getEntity(); 368 if( pPrefix && !rEntity.maContextStack.empty() ) 369 { 370 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount; 371 while( nNamespace-- ) 372 { 373 const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix ); 374 if( (rPrefix.getLength() == nPrefixLen) && 375 (strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) ) 376 { 377 return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL; 378 } 379 } 380 } 381 382 throw SAXException(); // prefix that has no defined namespace url 383 } 384 385 // -------------------------------------------------------------------- 386 387 sal_Int32 FastSaxParser::GetTokenWithNamespaceURL( const OUString& rNamespaceURL, const sal_Char* pName, int nNameLen ) 388 { 389 sal_Int32 nNamespaceToken = GetNamespaceToken( rNamespaceURL ); 390 391 if( nNamespaceToken != FastToken::DONTKNOW ) 392 { 393 sal_Int32 nNameToken = GetToken( pName, nNameLen ); 394 if( nNameToken != FastToken::DONTKNOW ) 395 return nNamespaceToken | nNameToken; 396 } 397 398 return FastToken::DONTKNOW; 399 } 400 401 // -------------------------------------------------------------------- 402 403 void FastSaxParser::splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen ) 404 { 405 XML_Char *p; 406 for( p = const_cast< XML_Char* >( pwName ), rNameLen = 0, rPrefixLen = 0; *p; p++ ) 407 { 408 if( *p == ':' ) 409 { 410 rPrefixLen = p - pwName; 411 rNameLen = 0; 412 } 413 else 414 { 415 rNameLen++; 416 } 417 } 418 if( rPrefixLen ) 419 { 420 rpPrefix = pwName; 421 rpName = &pwName[ rPrefixLen + 1 ]; 422 } 423 else 424 { 425 rpPrefix = 0; 426 rpName = pwName; 427 } 428 } 429 430 /*************** 431 * 432 * parseStream does Parser-startup initializations. The FastSaxParser::parse() method does 433 * the file-specific initialization work. (During a parser run, external files may be opened) 434 * 435 ****************/ 436 void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXException, IOException, RuntimeException) 437 { 438 // Only one text at one time 439 MutexGuard guard( maMutex ); 440 441 Entity entity( maData ); 442 entity.maStructSource = maStructSource; 443 444 if( !entity.maStructSource.aInputStream.is() ) 445 throw SAXException( OUString( RTL_CONSTASCII_USTRINGPARAM( "No input source" ) ), Reference< XInterface >(), Any() ); 446 447 entity.maConverter.setInputStream( entity.maStructSource.aInputStream ); 448 if( entity.maStructSource.sEncoding.getLength() ) 449 entity.maConverter.setEncoding( OUStringToOString( entity.maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US ) ); 450 451 // create parser with proper encoding 452 entity.mpParser = XML_ParserCreate( 0 ); 453 if( !entity.mpParser ) 454 throw SAXException( OUString( RTL_CONSTASCII_USTRINGPARAM( "Couldn't create parser" ) ), Reference< XInterface >(), Any() ); 455 456 // set all necessary C-Callbacks 457 XML_SetUserData( entity.mpParser, this ); 458 XML_SetElementHandler( entity.mpParser, call_callbackStartElement, call_callbackEndElement ); 459 XML_SetCharacterDataHandler( entity.mpParser, call_callbackCharacters ); 460 XML_SetExternalEntityRefHandler( entity.mpParser, call_callbackExternalEntityRef ); 461 462 pushEntity( entity ); 463 try 464 { 465 // start the document 466 if( entity.mxDocumentHandler.is() ) 467 { 468 Reference< XLocator > xLoc( mxDocumentLocator.get() ); 469 entity.mxDocumentHandler->setDocumentLocator( xLoc ); 470 entity.mxDocumentHandler->startDocument(); 471 } 472 473 parse(); 474 475 // finish document 476 if( entity.mxDocumentHandler.is() ) 477 { 478 entity.mxDocumentHandler->endDocument(); 479 } 480 } 481 catch( SAXException & ) 482 { 483 popEntity(); 484 XML_ParserFree( entity.mpParser ); 485 throw; 486 } 487 catch( IOException & ) 488 { 489 popEntity(); 490 XML_ParserFree( entity.mpParser ); 491 throw; 492 } 493 catch( RuntimeException & ) 494 { 495 popEntity(); 496 XML_ParserFree( entity.mpParser ); 497 throw; 498 } 499 500 popEntity(); 501 XML_ParserFree( entity.mpParser ); 502 } 503 504 void FastSaxParser::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler ) throw (RuntimeException) 505 { 506 maData.mxDocumentHandler = Handler; 507 } 508 509 void SAL_CALL FastSaxParser::setTokenHandler( const Reference< XFastTokenHandler >& Handler ) throw (RuntimeException) 510 { 511 maData.mxTokenHandler = Handler; 512 } 513 514 void SAL_CALL FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (IllegalArgumentException, RuntimeException) 515 { 516 if( NamespaceToken >= FastToken::NAMESPACE ) 517 { 518 if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW ) 519 { 520 maNamespaceMap[ NamespaceURL ] = NamespaceToken; 521 return; 522 } 523 } 524 throw IllegalArgumentException(); 525 } 526 527 void FastSaxParser::setErrorHandler(const Reference< XErrorHandler > & Handler) throw (RuntimeException) 528 { 529 maData.mxErrorHandler = Handler; 530 } 531 532 void FastSaxParser::setEntityResolver(const Reference < XEntityResolver > & Resolver) throw (RuntimeException) 533 { 534 maData.mxEntityResolver = Resolver; 535 } 536 537 void FastSaxParser::setLocale( const Locale & Locale ) throw (RuntimeException) 538 { 539 maData.maLocale = Locale; 540 } 541 542 Sequence< OUString > FastSaxParser::getSupportedServiceNames_Static(void) 543 { 544 Sequence<OUString> aRet(1); 545 aRet.getArray()[0] = ::rtl::OUString( RTL_CONSTASCII_USTRINGPARAM(PARSER_SERVICE_NAME) ); 546 return aRet; 547 } 548 549 // XServiceInfo 550 OUString FastSaxParser::getImplementationName() throw (RuntimeException) 551 { 552 return OUString::createFromAscii( PARSER_IMPLEMENTATION_NAME ); 553 } 554 555 // XServiceInfo 556 sal_Bool FastSaxParser::supportsService(const OUString& ServiceName) throw (RuntimeException) 557 { 558 Sequence< OUString > aSNL = getSupportedServiceNames(); 559 const OUString * pArray = aSNL.getConstArray(); 560 561 for( sal_Int32 i = 0; i < aSNL.getLength(); i++ ) 562 if( pArray[i] == ServiceName ) 563 return sal_True; 564 565 return sal_False; 566 } 567 568 // XServiceInfo 569 Sequence< OUString > FastSaxParser::getSupportedServiceNames(void) throw (RuntimeException) 570 { 571 572 Sequence<OUString> seq(1); 573 seq.getArray()[0] = OUString::createFromAscii( PARSER_SERVICE_NAME ); 574 return seq; 575 } 576 577 578 /*--------------------------------------- 579 * 580 * Helper functions and classes 581 * 582 *-------------------------------------------*/ 583 584 namespace { 585 586 OUString lclGetErrorMessage( XML_Error xmlE, const OUString& sSystemId, sal_Int32 nLine ) 587 { 588 const sal_Char* pMessage = ""; 589 switch( xmlE ) 590 { 591 case XML_ERROR_NONE: pMessage = "No"; break; 592 case XML_ERROR_NO_MEMORY: pMessage = "no memory"; break; 593 case XML_ERROR_SYNTAX: pMessage = "syntax"; break; 594 case XML_ERROR_NO_ELEMENTS: pMessage = "no elements"; break; 595 case XML_ERROR_INVALID_TOKEN: pMessage = "invalid token"; break; 596 case XML_ERROR_UNCLOSED_TOKEN: pMessage = "unclosed token"; break; 597 case XML_ERROR_PARTIAL_CHAR: pMessage = "partial char"; break; 598 case XML_ERROR_TAG_MISMATCH: pMessage = "tag mismatch"; break; 599 case XML_ERROR_DUPLICATE_ATTRIBUTE: pMessage = "duplicate attribute"; break; 600 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: pMessage = "junk after doc element"; break; 601 case XML_ERROR_PARAM_ENTITY_REF: pMessage = "parameter entity reference"; break; 602 case XML_ERROR_UNDEFINED_ENTITY: pMessage = "undefined entity"; break; 603 case XML_ERROR_RECURSIVE_ENTITY_REF: pMessage = "recursive entity reference"; break; 604 case XML_ERROR_ASYNC_ENTITY: pMessage = "async entity"; break; 605 case XML_ERROR_BAD_CHAR_REF: pMessage = "bad char reference"; break; 606 case XML_ERROR_BINARY_ENTITY_REF: pMessage = "binary entity reference"; break; 607 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: pMessage = "attribute external entity reference"; break; 608 case XML_ERROR_MISPLACED_XML_PI: pMessage = "misplaced xml processing instruction"; break; 609 case XML_ERROR_UNKNOWN_ENCODING: pMessage = "unknown encoding"; break; 610 case XML_ERROR_INCORRECT_ENCODING: pMessage = "incorrect encoding"; break; 611 case XML_ERROR_UNCLOSED_CDATA_SECTION: pMessage = "unclosed cdata section"; break; 612 case XML_ERROR_EXTERNAL_ENTITY_HANDLING: pMessage = "external entity reference"; break; 613 case XML_ERROR_NOT_STANDALONE: pMessage = "not standalone"; break; 614 default:; 615 } 616 617 OUStringBuffer aBuffer( sal_Unicode( '[' ) ); 618 aBuffer.append( sSystemId ); 619 aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( " line " ) ); 620 aBuffer.append( nLine ); 621 aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( "]: " ) ); 622 aBuffer.appendAscii( pMessage ); 623 aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( " error" ) ); 624 return aBuffer.makeStringAndClear(); 625 } 626 627 } // namespace 628 629 // starts parsing with actual parser ! 630 void FastSaxParser::parse() 631 { 632 const int BUFFER_SIZE = 16 * 1024; 633 Sequence< sal_Int8 > seqOut( BUFFER_SIZE ); 634 635 Entity& rEntity = getEntity(); 636 int nRead = 0; 637 do 638 { 639 nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE ); 640 if( nRead <= 0 ) 641 { 642 XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), 0, 1 ); 643 break; 644 } 645 646 bool bContinue = XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), nRead, 0 ) != 0; 647 // callbacks used inside XML_Parse may have caught an exception 648 if( !bContinue || rEntity.maSavedException.hasValue() ) 649 { 650 // Error during parsing ! 651 XML_Error xmlE = XML_GetErrorCode( rEntity.mpParser ); 652 OUString sSystemId = mxDocumentLocator->getSystemId(); 653 sal_Int32 nLine = mxDocumentLocator->getLineNumber(); 654 655 SAXParseException aExcept( 656 lclGetErrorMessage( xmlE, sSystemId, nLine ), 657 Reference< XInterface >(), 658 Any( &rEntity.maSavedException, getCppuType( &rEntity.maSavedException ) ), 659 mxDocumentLocator->getPublicId(), 660 mxDocumentLocator->getSystemId(), 661 mxDocumentLocator->getLineNumber(), 662 mxDocumentLocator->getColumnNumber() 663 ); 664 665 // error handler is set, it may throw the exception 666 if( rEntity.mxErrorHandler.is() ) 667 rEntity.mxErrorHandler->fatalError( Any( aExcept ) ); 668 669 // error handler has not thrown, but parsing cannot go on, the 670 // exception MUST be thrown 671 throw aExcept; 672 } 673 } 674 while( nRead > 0 ); 675 } 676 677 //------------------------------------------ 678 // 679 // The C-Callbacks 680 // 681 //----------------------------------------- 682 683 namespace { 684 685 struct AttributeData 686 { 687 OString maPrefix; 688 OString maName; 689 OString maValue; 690 }; 691 692 } // namespace 693 694 void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char** awAttributes ) 695 { 696 Reference< XFastContextHandler > xParentContext; 697 Entity& rEntity = getEntity(); 698 if( !rEntity.maContextStack.empty() ) 699 { 700 xParentContext = rEntity.maContextStack.top()->mxContext; 701 if( !xParentContext.is() ) 702 { 703 // we ignore current elements, so no processing needed 704 pushContext(); 705 return; 706 } 707 } 708 709 pushContext(); 710 711 rEntity.mxAttributes->clear(); 712 713 // create attribute map and process namespace instructions 714 int i = 0; 715 sal_Int32 nNameLen, nPrefixLen; 716 const XML_Char *pName; 717 const XML_Char *pPrefix; 718 719 try 720 { 721 /* #158414# Each element may define new namespaces, also for attribues. 722 First, process all namespace attributes and cache other attributes in a 723 vector. Second, process the attributes after namespaces have been 724 initialized. */ 725 ::std::vector< AttributeData > aAttribs; 726 727 // #158414# first: get namespaces 728 for( ; awAttributes[i]; i += 2 ) 729 { 730 OSL_ASSERT( awAttributes[i+1] ); 731 732 splitName( awAttributes[i], pPrefix, nPrefixLen, pName, nNameLen ); 733 if( nPrefixLen ) 734 { 735 if( (nPrefixLen == 5) && (strncmp( pPrefix, "xmlns", 5 ) == 0) ) 736 { 737 DefineNamespace( OString( pName, nNameLen ), awAttributes[i+1] ); 738 } 739 else 740 { 741 aAttribs.resize( aAttribs.size() + 1 ); 742 aAttribs.back().maPrefix = OString( pPrefix, nPrefixLen ); 743 aAttribs.back().maName = OString( pName, nNameLen ); 744 aAttribs.back().maValue = OString( awAttributes[i+1] ); 745 } 746 } 747 else 748 { 749 if( (nNameLen == 5) && (strcmp( pName, "xmlns" ) == 0) ) 750 { 751 // namespace of the element found 752 rEntity.maContextStack.top()->maNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 ); 753 } 754 else 755 { 756 aAttribs.resize( aAttribs.size() + 1 ); 757 aAttribs.back().maName = OString( pName, nNameLen ); 758 aAttribs.back().maValue = OString( awAttributes[i+1] ); 759 } 760 } 761 } 762 763 // #158414# second: fill attribute list with other attributes 764 for( ::std::vector< AttributeData >::const_iterator aIt = aAttribs.begin(), aEnd = aAttribs.end(); aIt != aEnd; ++aIt ) 765 { 766 if( aIt->maPrefix.getLength() > 0 ) 767 { 768 sal_Int32 nAttributeToken = GetTokenWithPrefix( aIt->maPrefix, aIt->maName ); 769 if( nAttributeToken != FastToken::DONTKNOW ) 770 rEntity.mxAttributes->add( nAttributeToken, aIt->maValue ); 771 else 772 rEntity.mxAttributes->addUnknown( GetNamespaceURL( aIt->maPrefix ), aIt->maName, aIt->maValue ); 773 } 774 else 775 { 776 sal_Int32 nAttributeToken = GetToken( aIt->maName ); 777 if( nAttributeToken != FastToken::DONTKNOW ) 778 rEntity.mxAttributes->add( nAttributeToken, aIt->maValue ); 779 else 780 rEntity.mxAttributes->addUnknown( aIt->maName, aIt->maValue ); 781 } 782 } 783 784 sal_Int32 nElementToken; 785 splitName( pwName, pPrefix, nPrefixLen, pName, nNameLen ); 786 if( nPrefixLen > 0 ) 787 nElementToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen ); 788 else if( rEntity.maContextStack.top()->maNamespace.getLength() > 0 ) 789 nElementToken = GetTokenWithNamespaceURL( rEntity.maContextStack.top()->maNamespace, pName, nNameLen ); 790 else 791 nElementToken = GetToken( pName ); 792 rEntity.maContextStack.top()->mnElementToken = nElementToken; 793 794 Reference< XFastAttributeList > xAttr( rEntity.mxAttributes.get() ); 795 Reference< XFastContextHandler > xContext; 796 if( nElementToken == FastToken::DONTKNOW ) 797 { 798 if( nPrefixLen > 0 ) 799 rEntity.maContextStack.top()->maNamespace = GetNamespaceURL( pPrefix, nPrefixLen ); 800 801 const OUString aNamespace( rEntity.maContextStack.top()->maNamespace ); 802 const OUString aElementName( pPrefix, nPrefixLen, RTL_TEXTENCODING_UTF8 ); 803 rEntity.maContextStack.top()->maElementName = aElementName; 804 805 if( xParentContext.is() ) 806 xContext = xParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr ); 807 else 808 xContext = rEntity.mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr ); 809 810 if( xContext.is() ) 811 { 812 rEntity.maContextStack.top()->mxContext = xContext; 813 xContext->startUnknownElement( aNamespace, aElementName, xAttr ); 814 } 815 } 816 else 817 { 818 if( xParentContext.is() ) 819 xContext = xParentContext->createFastChildContext( nElementToken, xAttr ); 820 else 821 xContext = rEntity.mxDocumentHandler->createFastChildContext( nElementToken, xAttr ); 822 823 824 if( xContext.is() ) 825 { 826 rEntity.maContextStack.top()->mxContext = xContext; 827 xContext->startFastElement( nElementToken, xAttr ); 828 } 829 } 830 } 831 catch( Exception& e ) 832 { 833 rEntity.maSavedException <<= e; 834 } 835 } 836 837 void FastSaxParser::callbackEndElement( const XML_Char* ) 838 { 839 Entity& rEntity = getEntity(); 840 OSL_ENSURE( !rEntity.maContextStack.empty(), "FastSaxParser::callbackEndElement - no context" ); 841 if( !rEntity.maContextStack.empty() ) 842 { 843 SaxContextImplPtr pContext = rEntity.maContextStack.top(); 844 const Reference< XFastContextHandler >& xContext( pContext->mxContext ); 845 if( xContext.is() ) try 846 { 847 sal_Int32 nElementToken = pContext->mnElementToken; 848 if( nElementToken != FastToken::DONTKNOW ) 849 xContext->endFastElement( nElementToken ); 850 else 851 xContext->endUnknownElement( pContext->maNamespace, pContext->maElementName ); 852 } 853 catch( Exception& e ) 854 { 855 rEntity.maSavedException <<= e; 856 } 857 858 popContext(); 859 } 860 } 861 862 863 void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen ) 864 { 865 Entity& rEntity = getEntity(); 866 const Reference< XFastContextHandler >& xContext( rEntity.maContextStack.top()->mxContext ); 867 if( xContext.is() ) try 868 { 869 xContext->characters( OUString( s, nLen, RTL_TEXTENCODING_UTF8 ) ); 870 } 871 catch( Exception& e ) 872 { 873 rEntity.maSavedException <<= e; 874 } 875 } 876 877 int FastSaxParser::callbackExternalEntityRef( XML_Parser parser, 878 const XML_Char *context, const XML_Char * /*base*/, const XML_Char *systemId, const XML_Char *publicId ) 879 { 880 bool bOK = true; 881 InputSource source; 882 883 Entity& rCurrEntity = getEntity(); 884 Entity aNewEntity( rCurrEntity ); 885 886 if( rCurrEntity.mxEntityResolver.is() ) try 887 { 888 aNewEntity.maStructSource = rCurrEntity.mxEntityResolver->resolveEntity( 889 OUString( publicId, strlen( publicId ), RTL_TEXTENCODING_UTF8 ) , 890 OUString( systemId, strlen( systemId ), RTL_TEXTENCODING_UTF8 ) ); 891 } 892 catch( SAXParseException & e ) 893 { 894 rCurrEntity.maSavedException <<= e; 895 bOK = false; 896 } 897 catch( SAXException & e ) 898 { 899 rCurrEntity.maSavedException <<= SAXParseException( 900 e.Message, e.Context, e.WrappedException, 901 mxDocumentLocator->getPublicId(), 902 mxDocumentLocator->getSystemId(), 903 mxDocumentLocator->getLineNumber(), 904 mxDocumentLocator->getColumnNumber() ); 905 bOK = false; 906 } 907 908 if( aNewEntity.maStructSource.aInputStream.is() ) 909 { 910 aNewEntity.mpParser = XML_ExternalEntityParserCreate( parser, context, 0 ); 911 if( !aNewEntity.mpParser ) 912 { 913 return false; 914 } 915 916 aNewEntity.maConverter.setInputStream( aNewEntity.maStructSource.aInputStream ); 917 pushEntity( aNewEntity ); 918 try 919 { 920 parse(); 921 } 922 catch( SAXParseException & e ) 923 { 924 rCurrEntity.maSavedException <<= e; 925 bOK = false; 926 } 927 catch( IOException &e ) 928 { 929 SAXException aEx; 930 aEx.WrappedException <<= e; 931 rCurrEntity.maSavedException <<= aEx; 932 bOK = false; 933 } 934 catch( RuntimeException &e ) 935 { 936 SAXException aEx; 937 aEx.WrappedException <<= e; 938 rCurrEntity.maSavedException <<= aEx; 939 bOK = false; 940 } 941 942 popEntity(); 943 XML_ParserFree( aNewEntity.mpParser ); 944 } 945 946 return bOK; 947 } 948 949 } // namespace sax_fastparser 950