xref: /AOO41X/main/sax/source/fastparser/fastparser.cxx (revision f9b72d1151c0405011e988af4c8d57514307e7a3)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 //#include <stdlib.h>
25 //#include <sal/alloca.h>
26 
27 #include <boost/scoped_ptr.hpp>
28 
29 #include <osl/diagnose.h>
30 #include <rtl/ustrbuf.hxx>
31 
32 #include <com/sun/star/lang/DisposedException.hpp>
33 #include <com/sun/star/xml/sax/XFastContextHandler.hpp>
34 #include <com/sun/star/xml/sax/SAXParseException.hpp>
35 #include <com/sun/star/xml/sax/FastToken.hpp>
36 
37 #include "fastparser.hxx"
38 
39 #include <string.h>
40 
41 using ::rtl::OString;
42 using ::rtl::OUString;
43 using ::rtl::OUStringBuffer;
44 using namespace ::std;
45 using namespace ::osl;
46 using namespace ::cppu;
47 using namespace ::com::sun::star::uno;
48 using namespace ::com::sun::star::lang;
49 using namespace ::com::sun::star::xml::sax;
50 //using namespace ::com::sun::star::util;
51 using namespace ::com::sun::star::io;
52 
53 namespace sax_fastparser {
54 
55 // --------------------------------------------------------------------
56 
57 struct SaxContextImpl
58 {
59     Reference< XFastContextHandler >    mxContext;
60     sal_uInt32      mnNamespaceCount;
61     sal_Int32       mnElementToken;
62     OUString        maNamespace;
63     OUString        maElementName;
64 
SaxContextImplsax_fastparser::SaxContextImpl65     SaxContextImpl() { mnNamespaceCount = 0; mnElementToken = 0; }
SaxContextImplsax_fastparser::SaxContextImpl66     SaxContextImpl( const SaxContextImplPtr& p ) { mnNamespaceCount = p->mnNamespaceCount; mnElementToken = p->mnElementToken; maNamespace = p->maNamespace; }
67 };
68 
69 // --------------------------------------------------------------------
70 
71 struct NamespaceDefine
72 {
73     OString     maPrefix;
74     sal_Int32   mnToken;
75     OUString    maNamespaceURL;
76 
NamespaceDefinesax_fastparser::NamespaceDefine77     NamespaceDefine( const OString& rPrefix, sal_Int32 nToken, const OUString& rNamespaceURL ) : maPrefix( rPrefix ), mnToken( nToken ), maNamespaceURL( rNamespaceURL ) {}
78 };
79 
80 // --------------------------------------------------------------------
81 // FastLocatorImpl
82 // --------------------------------------------------------------------
83 
84 class FastSaxParser;
85 
86 class FastLocatorImpl : public WeakImplHelper1< XLocator >
87 {
88 public:
FastLocatorImpl(FastSaxParser * p)89     FastLocatorImpl( FastSaxParser *p ) : mpParser(p) {}
90 
dispose()91     void dispose() { mpParser = 0; }
checkDispose()92     void checkDispose() throw (RuntimeException) { if( !mpParser ) throw DisposedException(); }
93 
94     //XLocator
95     virtual sal_Int32 SAL_CALL getColumnNumber(void) throw (RuntimeException);
96     virtual sal_Int32 SAL_CALL getLineNumber(void) throw (RuntimeException);
97     virtual OUString SAL_CALL getPublicId(void) throw (RuntimeException);
98     virtual OUString SAL_CALL getSystemId(void) throw (RuntimeException);
99 
100 private:
101     FastSaxParser *mpParser;
102 };
103 
104 // --------------------------------------------------------------------
105 // FastSaxParser
106 // --------------------------------------------------------------------
107 
108 //---------------------------------------------
109 // the implementation part
110 //---------------------------------------------
111 
112 extern "C" {
113 
call_callbackStartElement(void * userData,const XML_Char * name,const XML_Char ** atts)114 static void call_callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts)
115 {
116     FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData );
117     pFastParser->callbackStartElement( name, atts );
118 }
119 
call_callbackEndElement(void * userData,const XML_Char * name)120 static void call_callbackEndElement(void *userData, const XML_Char *name)
121 {
122     FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData );
123     pFastParser->callbackEndElement( name );
124 }
125 
call_callbackCharacters(void * userData,const XML_Char * s,int nLen)126 static void call_callbackCharacters( void *userData , const XML_Char *s , int nLen )
127 {
128     FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData );
129     pFastParser->callbackCharacters( s, nLen );
130 }
131 
call_callbackExternalEntityRef(XML_Parser parser,const XML_Char * openEntityNames,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)132 static int call_callbackExternalEntityRef( XML_Parser parser,
133         const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId )
134 {
135     FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( XML_GetUserData( parser ) );
136     return pFastParser->callbackExternalEntityRef( parser, openEntityNames, base, systemId, publicId );
137 }
138 
139 } // extern "C"
140 
141 // --------------------------------------------------------------------
142 // FastLocatorImpl implementation
143 // --------------------------------------------------------------------
144 
getColumnNumber(void)145 sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber(void) throw (RuntimeException)
146 {
147     checkDispose();
148     return XML_GetCurrentColumnNumber( mpParser->getEntity().mpParser );
149 }
150 
151 // --------------------------------------------------------------------
152 
getLineNumber(void)153 sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber(void) throw (RuntimeException)
154 {
155     checkDispose();
156     return XML_GetCurrentLineNumber( mpParser->getEntity().mpParser );
157 }
158 
159 // --------------------------------------------------------------------
160 
getPublicId(void)161 OUString SAL_CALL FastLocatorImpl::getPublicId(void) throw (RuntimeException)
162 {
163     checkDispose();
164     return mpParser->getEntity().maStructSource.sPublicId;
165 }
166 // --------------------------------------------------------------------
167 
getSystemId(void)168 OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException)
169 {
170     checkDispose();
171     return mpParser->getEntity().maStructSource.sSystemId;
172 }
173 
174 // --------------------------------------------------------------------
175 
ParserData()176 ParserData::ParserData()
177 {
178 }
179 
~ParserData()180 ParserData::~ParserData()
181 {
182 }
183 
184 // --------------------------------------------------------------------
185 
Entity(const ParserData & rData)186 Entity::Entity( const ParserData& rData ) :
187     ParserData( rData )
188 {
189     // performance-Improvment. Reference is needed when calling the startTag callback.
190     // Handing out the same object with every call is allowed (see sax-specification)
191     mxAttributes.set( new FastAttributeList( mxTokenHandler ) );
192 }
193 
~Entity()194 Entity::~Entity()
195 {
196 }
197 
198 // --------------------------------------------------------------------
199 // FastSaxParser implementation
200 // --------------------------------------------------------------------
201 
FastSaxParser()202 FastSaxParser::FastSaxParser()
203 {
204     mxDocumentLocator.set( new FastLocatorImpl( this ) );
205 }
206 
207 // --------------------------------------------------------------------
208 
~FastSaxParser()209 FastSaxParser::~FastSaxParser()
210 {
211     if( mxDocumentLocator.is() )
212         mxDocumentLocator->dispose();
213 }
214 
215 // --------------------------------------------------------------------
216 
pushContext()217 void FastSaxParser::pushContext()
218 {
219     Entity& rEntity = getEntity();
220     if( rEntity.maContextStack.empty() )
221     {
222         rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl ) );
223         DefineNamespace( OString("xml"), "http://www.w3.org/XML/1998/namespace");
224     }
225     else
226     {
227         rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl( rEntity.maContextStack.top() ) ) );
228     }
229 }
230 
231 // --------------------------------------------------------------------
232 
popContext()233 void FastSaxParser::popContext()
234 {
235     Entity& rEntity = getEntity();
236     OSL_ENSURE( !rEntity.maContextStack.empty(), "sax::FastSaxParser::popContext(), pop without push?" );
237     if( !rEntity.maContextStack.empty() )
238         rEntity.maContextStack.pop();
239 }
240 
241 // --------------------------------------------------------------------
242 
DefineNamespace(const OString & rPrefix,const sal_Char * pNamespaceURL)243 void FastSaxParser::DefineNamespace( const OString& rPrefix, const sal_Char* pNamespaceURL )
244 {
245     Entity& rEntity = getEntity();
246     OSL_ENSURE( !rEntity.maContextStack.empty(), "sax::FastSaxParser::DefineNamespace(), I need a context!" );
247     if( !rEntity.maContextStack.empty() )
248     {
249         sal_uInt32 nOffset = rEntity.maContextStack.top()->mnNamespaceCount++;
250 
251         if( rEntity.maNamespaceDefines.size() <= nOffset )
252             rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 );
253 
254         const OUString aNamespaceURL( pNamespaceURL, strlen( pNamespaceURL ), RTL_TEXTENCODING_UTF8 );
255         rEntity.maNamespaceDefines[nOffset].reset( new NamespaceDefine( rPrefix, GetNamespaceToken( aNamespaceURL ), aNamespaceURL ) );
256     }
257 }
258 
259 // --------------------------------------------------------------------
260 
GetToken(const OString & rToken)261 sal_Int32 FastSaxParser::GetToken( const OString& rToken )
262 {
263     Sequence< sal_Int8 > aSeq( (sal_Int8*)rToken.getStr(), rToken.getLength() );
264 
265     return getEntity().mxTokenHandler->getTokenFromUTF8( aSeq );
266 }
267 
GetToken(const sal_Char * pToken,sal_Int32 nLen)268 sal_Int32 FastSaxParser::GetToken( const sal_Char* pToken, sal_Int32 nLen /* = 0 */ )
269 {
270     if( !nLen )
271         nLen = strlen( pToken );
272 
273     Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen );
274 
275     return getEntity().mxTokenHandler->getTokenFromUTF8( aSeq );
276 }
277 
278 // --------------------------------------------------------------------
279 
GetTokenWithPrefix(const OString & rPrefix,const OString & rName)280 sal_Int32 FastSaxParser::GetTokenWithPrefix( const OString& rPrefix, const OString& rName ) throw (SAXException)
281 {
282     sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
283 
284     Entity& rEntity = getEntity();
285     sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
286     while( nNamespace-- )
287     {
288         if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix )
289         {
290             nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken;
291             break;
292         }
293 
294         if( !nNamespace )
295             throw SAXException(); // prefix that has no defined namespace url
296     }
297 
298     if( nNamespaceToken != FastToken::DONTKNOW )
299     {
300         sal_Int32 nNameToken = GetToken( rName.getStr(), rName.getLength() );
301         if( nNameToken != FastToken::DONTKNOW )
302             return nNamespaceToken | nNameToken;
303     }
304 
305     return FastToken::DONTKNOW;
306 }
307 
GetTokenWithPrefix(const sal_Char * pPrefix,int nPrefixLen,const sal_Char * pName,int nNameLen)308 sal_Int32 FastSaxParser::GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (SAXException)
309 {
310     sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
311 
312     Entity& rEntity = getEntity();
313     sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
314     while( nNamespace-- )
315     {
316         const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix );
317         if( (rPrefix.getLength() == nPrefixLen) &&
318             (strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) )
319         {
320             nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken;
321             break;
322         }
323 
324         if( !nNamespace )
325             throw SAXException(); // prefix that has no defined namespace url
326     }
327 
328     if( nNamespaceToken != FastToken::DONTKNOW )
329     {
330         sal_Int32 nNameToken = GetToken( pName, nNameLen );
331         if( nNameToken != FastToken::DONTKNOW )
332             return nNamespaceToken | nNameToken;
333     }
334 
335     return FastToken::DONTKNOW;
336 }
337 
338 // --------------------------------------------------------------------
339 
GetNamespaceToken(const OUString & rNamespaceURL)340 sal_Int32 FastSaxParser::GetNamespaceToken( const OUString& rNamespaceURL )
341 {
342     NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) );
343     if( aIter != maNamespaceMap.end() )
344         return (*aIter).second;
345     else
346         return FastToken::DONTKNOW;
347 }
348 
349 // --------------------------------------------------------------------
350 
GetNamespaceURL(const OString & rPrefix)351 OUString FastSaxParser::GetNamespaceURL( const OString& rPrefix ) throw (SAXException)
352 {
353     Entity& rEntity = getEntity();
354     if( !rEntity.maContextStack.empty() )
355     {
356         sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
357         while( nNamespace-- )
358             if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix )
359                 return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL;
360     }
361 
362     throw SAXException(); // prefix that has no defined namespace url
363 }
364 
GetNamespaceURL(const sal_Char * pPrefix,int nPrefixLen)365 OUString FastSaxParser::GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw(SAXException)
366 {
367     Entity& rEntity = getEntity();
368     if( pPrefix && !rEntity.maContextStack.empty() )
369     {
370         sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
371         while( nNamespace-- )
372         {
373             const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix );
374             if( (rPrefix.getLength() == nPrefixLen) &&
375                 (strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) )
376             {
377                 return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL;
378             }
379         }
380     }
381 
382     throw SAXException(); // prefix that has no defined namespace url
383 }
384 
385 // --------------------------------------------------------------------
386 
GetTokenWithNamespaceURL(const OUString & rNamespaceURL,const sal_Char * pName,int nNameLen)387 sal_Int32 FastSaxParser::GetTokenWithNamespaceURL( const OUString& rNamespaceURL, const sal_Char* pName, int nNameLen )
388 {
389     sal_Int32 nNamespaceToken = GetNamespaceToken( rNamespaceURL );
390 
391     if( nNamespaceToken != FastToken::DONTKNOW )
392     {
393         sal_Int32 nNameToken = GetToken( pName, nNameLen );
394         if( nNameToken != FastToken::DONTKNOW )
395             return nNamespaceToken | nNameToken;
396     }
397 
398     return FastToken::DONTKNOW;
399 }
400 
401 // --------------------------------------------------------------------
402 
splitName(const XML_Char * pwName,const XML_Char * & rpPrefix,sal_Int32 & rPrefixLen,const XML_Char * & rpName,sal_Int32 & rNameLen)403 void FastSaxParser::splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen )
404 {
405     XML_Char *p;
406     for( p = const_cast< XML_Char* >( pwName ), rNameLen = 0, rPrefixLen = 0; *p; p++ )
407     {
408         if( *p == ':' )
409         {
410             rPrefixLen = p - pwName;
411             rNameLen = 0;
412         }
413         else
414         {
415             rNameLen++;
416         }
417     }
418     if( rPrefixLen )
419     {
420         rpPrefix = pwName;
421         rpName = &pwName[ rPrefixLen + 1 ];
422     }
423     else
424     {
425         rpPrefix = 0;
426         rpName = pwName;
427     }
428 }
429 
430 /***************
431 *
432 * parseStream does Parser-startup initializations. The FastSaxParser::parse() method does
433 * the file-specific initialization work. (During a parser run, external files may be opened)
434 *
435 ****************/
parseStream(const InputSource & maStructSource)436 void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXException, IOException, RuntimeException)
437 {
438     // Only one text at one time
439     MutexGuard guard( maMutex );
440 
441     Entity entity( maData );
442     entity.maStructSource = maStructSource;
443 
444     if( !entity.maStructSource.aInputStream.is() )
445         throw SAXException( OUString( RTL_CONSTASCII_USTRINGPARAM( "No input source" ) ), Reference< XInterface >(), Any() );
446 
447     entity.maConverter.setInputStream( entity.maStructSource.aInputStream );
448     if( entity.maStructSource.sEncoding.getLength() )
449         entity.maConverter.setEncoding( OUStringToOString( entity.maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US ) );
450 
451     // create parser with proper encoding
452     entity.mpParser = XML_ParserCreate( 0 );
453     if( !entity.mpParser )
454         throw SAXException( OUString( RTL_CONSTASCII_USTRINGPARAM( "Couldn't create parser" ) ), Reference< XInterface >(), Any() );
455 
456     // set all necessary C-Callbacks
457     XML_SetUserData( entity.mpParser, this );
458     XML_SetElementHandler( entity.mpParser, call_callbackStartElement, call_callbackEndElement );
459     XML_SetCharacterDataHandler( entity.mpParser, call_callbackCharacters );
460     XML_SetExternalEntityRefHandler( entity.mpParser, call_callbackExternalEntityRef );
461 
462     pushEntity( entity );
463     try
464     {
465         // start the document
466         if( entity.mxDocumentHandler.is() )
467         {
468             Reference< XLocator > xLoc( mxDocumentLocator.get() );
469             entity.mxDocumentHandler->setDocumentLocator( xLoc );
470             entity.mxDocumentHandler->startDocument();
471         }
472 
473         parse();
474 
475         // finish document
476         if( entity.mxDocumentHandler.is() )
477         {
478             entity.mxDocumentHandler->endDocument();
479         }
480     }
481     catch( SAXException & )
482     {
483         popEntity();
484         XML_ParserFree( entity.mpParser );
485         throw;
486     }
487     catch( IOException & )
488     {
489         popEntity();
490         XML_ParserFree( entity.mpParser );
491         throw;
492     }
493     catch( RuntimeException & )
494     {
495         popEntity();
496         XML_ParserFree( entity.mpParser );
497         throw;
498     }
499 
500     popEntity();
501     XML_ParserFree( entity.mpParser );
502 }
503 
setFastDocumentHandler(const Reference<XFastDocumentHandler> & Handler)504 void FastSaxParser::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler ) throw (RuntimeException)
505 {
506     maData.mxDocumentHandler = Handler;
507 }
508 
setTokenHandler(const Reference<XFastTokenHandler> & Handler)509 void SAL_CALL FastSaxParser::setTokenHandler( const Reference< XFastTokenHandler >& Handler ) throw (RuntimeException)
510 {
511     maData.mxTokenHandler = Handler;
512 }
513 
registerNamespace(const OUString & NamespaceURL,sal_Int32 NamespaceToken)514 void SAL_CALL FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (IllegalArgumentException, RuntimeException)
515 {
516     if( NamespaceToken >= FastToken::NAMESPACE )
517     {
518         if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW )
519         {
520             maNamespaceMap[ NamespaceURL ] = NamespaceToken;
521             return;
522         }
523     }
524     throw IllegalArgumentException();
525 }
526 
setErrorHandler(const Reference<XErrorHandler> & Handler)527 void FastSaxParser::setErrorHandler(const Reference< XErrorHandler > & Handler) throw (RuntimeException)
528 {
529     maData.mxErrorHandler = Handler;
530 }
531 
setEntityResolver(const Reference<XEntityResolver> & Resolver)532 void FastSaxParser::setEntityResolver(const Reference < XEntityResolver > & Resolver) throw (RuntimeException)
533 {
534     maData.mxEntityResolver = Resolver;
535 }
536 
setLocale(const Locale & Locale)537 void FastSaxParser::setLocale( const Locale & Locale ) throw (RuntimeException)
538 {
539     maData.maLocale = Locale;
540 }
541 
getSupportedServiceNames_Static(void)542 Sequence< OUString > FastSaxParser::getSupportedServiceNames_Static(void)
543 {
544     Sequence<OUString> aRet(1);
545     aRet.getArray()[0] = ::rtl::OUString( RTL_CONSTASCII_USTRINGPARAM(PARSER_SERVICE_NAME) );
546     return aRet;
547 }
548 
549 // XServiceInfo
getImplementationName()550 OUString FastSaxParser::getImplementationName() throw (RuntimeException)
551 {
552     return OUString::createFromAscii( PARSER_IMPLEMENTATION_NAME );
553 }
554 
555 // XServiceInfo
supportsService(const OUString & ServiceName)556 sal_Bool FastSaxParser::supportsService(const OUString& ServiceName) throw (RuntimeException)
557 {
558     Sequence< OUString > aSNL = getSupportedServiceNames();
559     const OUString * pArray = aSNL.getConstArray();
560 
561     for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
562         if( pArray[i] == ServiceName )
563             return sal_True;
564 
565     return sal_False;
566 }
567 
568 // XServiceInfo
getSupportedServiceNames(void)569 Sequence< OUString > FastSaxParser::getSupportedServiceNames(void) throw (RuntimeException)
570 {
571 
572     Sequence<OUString> seq(1);
573     seq.getArray()[0] = OUString::createFromAscii( PARSER_SERVICE_NAME );
574     return seq;
575 }
576 
577 
578 /*---------------------------------------
579 *
580 * Helper functions and classes
581 *
582 *-------------------------------------------*/
583 
584 namespace {
585 
lclGetErrorMessage(XML_Error xmlE,const OUString & sSystemId,sal_Int32 nLine)586 OUString lclGetErrorMessage( XML_Error xmlE, const OUString& sSystemId, sal_Int32 nLine )
587 {
588     const sal_Char* pMessage = "";
589     switch( xmlE )
590     {
591         case XML_ERROR_NONE:                            pMessage = "No";                                    break;
592         case XML_ERROR_NO_MEMORY:                       pMessage = "no memory";                             break;
593         case XML_ERROR_SYNTAX:                          pMessage = "syntax";                                break;
594         case XML_ERROR_NO_ELEMENTS:                     pMessage = "no elements";                           break;
595         case XML_ERROR_INVALID_TOKEN:                   pMessage = "invalid token";                         break;
596         case XML_ERROR_UNCLOSED_TOKEN:                  pMessage = "unclosed token";                        break;
597         case XML_ERROR_PARTIAL_CHAR:                    pMessage = "partial char";                          break;
598         case XML_ERROR_TAG_MISMATCH:                    pMessage = "tag mismatch";                          break;
599         case XML_ERROR_DUPLICATE_ATTRIBUTE:             pMessage = "duplicate attribute";                   break;
600         case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:          pMessage = "junk after doc element";                break;
601         case XML_ERROR_PARAM_ENTITY_REF:                pMessage = "parameter entity reference";            break;
602         case XML_ERROR_UNDEFINED_ENTITY:                pMessage = "undefined entity";                      break;
603         case XML_ERROR_RECURSIVE_ENTITY_REF:            pMessage = "recursive entity reference";            break;
604         case XML_ERROR_ASYNC_ENTITY:                    pMessage = "async entity";                          break;
605         case XML_ERROR_BAD_CHAR_REF:                    pMessage = "bad char reference";                    break;
606         case XML_ERROR_BINARY_ENTITY_REF:               pMessage = "binary entity reference";               break;
607         case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:   pMessage = "attribute external entity reference";   break;
608         case XML_ERROR_MISPLACED_XML_PI:                pMessage = "misplaced xml processing instruction";  break;
609         case XML_ERROR_UNKNOWN_ENCODING:                pMessage = "unknown encoding";                      break;
610         case XML_ERROR_INCORRECT_ENCODING:              pMessage = "incorrect encoding";                    break;
611         case XML_ERROR_UNCLOSED_CDATA_SECTION:          pMessage = "unclosed cdata section";                break;
612         case XML_ERROR_EXTERNAL_ENTITY_HANDLING:        pMessage = "external entity reference";             break;
613         case XML_ERROR_NOT_STANDALONE:                  pMessage = "not standalone";                        break;
614         default:;
615     }
616 
617     OUStringBuffer aBuffer( sal_Unicode( '[' ) );
618     aBuffer.append( sSystemId );
619     aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( " line " ) );
620     aBuffer.append( nLine );
621     aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( "]: " ) );
622     aBuffer.appendAscii( pMessage );
623     aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( " error" ) );
624     return aBuffer.makeStringAndClear();
625 }
626 
627 } // namespace
628 
629 // starts parsing with actual parser !
parse()630 void FastSaxParser::parse()
631 {
632     const int BUFFER_SIZE = 16 * 1024;
633     Sequence< sal_Int8 > seqOut( BUFFER_SIZE );
634 
635     Entity& rEntity = getEntity();
636     int nRead = 0;
637     do
638     {
639         nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE );
640         if( nRead <= 0 )
641         {
642             XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), 0, 1 );
643             break;
644         }
645 
646         bool bContinue = XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), nRead, 0 ) != 0;
647         // callbacks used inside XML_Parse may have caught an exception
648         if( !bContinue || rEntity.maSavedException.hasValue() )
649         {
650             // Error during parsing !
651             XML_Error xmlE = XML_GetErrorCode( rEntity.mpParser );
652             OUString sSystemId = mxDocumentLocator->getSystemId();
653             sal_Int32 nLine = mxDocumentLocator->getLineNumber();
654 
655             SAXParseException aExcept(
656                 lclGetErrorMessage( xmlE, sSystemId, nLine ),
657                 Reference< XInterface >(),
658                 Any( &rEntity.maSavedException, getCppuType( &rEntity.maSavedException ) ),
659                 mxDocumentLocator->getPublicId(),
660                 mxDocumentLocator->getSystemId(),
661                 mxDocumentLocator->getLineNumber(),
662                 mxDocumentLocator->getColumnNumber()
663             );
664 
665             // error handler is set, it may throw the exception
666             if( rEntity.mxErrorHandler.is() )
667                 rEntity.mxErrorHandler->fatalError( Any( aExcept ) );
668 
669             // error handler has not thrown, but parsing cannot go on, the
670             // exception MUST be thrown
671             throw aExcept;
672         }
673     }
674     while( nRead > 0 );
675 }
676 
677 //------------------------------------------
678 //
679 // The C-Callbacks
680 //
681 //-----------------------------------------
682 
683 namespace {
684 
685 struct AttributeData
686 {
687     OString             maPrefix;
688     OString             maName;
689     OString             maValue;
690 };
691 
692 } // namespace
693 
callbackStartElement(const XML_Char * pwName,const XML_Char ** awAttributes)694 void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char** awAttributes )
695 {
696     Reference< XFastContextHandler > xParentContext;
697     Entity& rEntity = getEntity();
698     if( !rEntity.maContextStack.empty() )
699     {
700         xParentContext = rEntity.maContextStack.top()->mxContext;
701         if( !xParentContext.is() )
702         {
703             // we ignore current elements, so no processing needed
704             pushContext();
705             return;
706         }
707     }
708 
709     pushContext();
710 
711     rEntity.mxAttributes->clear();
712 
713     // create attribute map and process namespace instructions
714     int i = 0;
715     sal_Int32 nNameLen, nPrefixLen;
716     const XML_Char *pName;
717     const XML_Char *pPrefix;
718 
719     try
720     {
721         /*  #158414# Each element may define new namespaces, also for attribues.
722             First, process all namespace attributes and cache other attributes in a
723             vector. Second, process the attributes after namespaces have been
724             initialized. */
725         ::std::vector< AttributeData > aAttribs;
726 
727         // #158414# first: get namespaces
728         for( ; awAttributes[i]; i += 2 )
729         {
730             OSL_ASSERT( awAttributes[i+1] );
731 
732             splitName( awAttributes[i], pPrefix, nPrefixLen, pName, nNameLen );
733             if( nPrefixLen )
734             {
735                 if( (nPrefixLen == 5) && (strncmp( pPrefix, "xmlns", 5 ) == 0) )
736                 {
737                     DefineNamespace( OString( pName, nNameLen ), awAttributes[i+1] );
738                 }
739                 else
740                 {
741                     aAttribs.resize( aAttribs.size() + 1 );
742                     aAttribs.back().maPrefix = OString( pPrefix, nPrefixLen );
743                     aAttribs.back().maName = OString( pName, nNameLen );
744                     aAttribs.back().maValue = OString( awAttributes[i+1] );
745                 }
746             }
747             else
748             {
749                 if( (nNameLen == 5) && (strcmp( pName, "xmlns" ) == 0) )
750                 {
751                     // namespace of the element found
752                     rEntity.maContextStack.top()->maNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 );
753                 }
754                 else
755                 {
756                     aAttribs.resize( aAttribs.size() + 1 );
757                     aAttribs.back().maName = OString( pName, nNameLen );
758                     aAttribs.back().maValue = OString( awAttributes[i+1] );
759                 }
760             }
761         }
762 
763         // #158414# second: fill attribute list with other attributes
764         for( ::std::vector< AttributeData >::const_iterator aIt = aAttribs.begin(), aEnd = aAttribs.end(); aIt != aEnd; ++aIt )
765         {
766             if( aIt->maPrefix.getLength() > 0 )
767             {
768                 sal_Int32 nAttributeToken = GetTokenWithPrefix( aIt->maPrefix, aIt->maName );
769                 if( nAttributeToken != FastToken::DONTKNOW )
770                     rEntity.mxAttributes->add( nAttributeToken, aIt->maValue );
771                 else
772                     rEntity.mxAttributes->addUnknown( GetNamespaceURL( aIt->maPrefix ), aIt->maName, aIt->maValue );
773             }
774             else
775             {
776                 sal_Int32 nAttributeToken = GetToken( aIt->maName );
777                 if( nAttributeToken != FastToken::DONTKNOW )
778                     rEntity.mxAttributes->add( nAttributeToken, aIt->maValue );
779                 else
780                     rEntity.mxAttributes->addUnknown( aIt->maName, aIt->maValue );
781             }
782         }
783 
784         sal_Int32 nElementToken;
785         splitName( pwName, pPrefix, nPrefixLen, pName, nNameLen );
786         if( nPrefixLen > 0 )
787             nElementToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
788         else if( rEntity.maContextStack.top()->maNamespace.getLength() > 0 )
789             nElementToken = GetTokenWithNamespaceURL( rEntity.maContextStack.top()->maNamespace, pName, nNameLen );
790         else
791             nElementToken = GetToken( pName );
792         rEntity.maContextStack.top()->mnElementToken = nElementToken;
793 
794         Reference< XFastAttributeList > xAttr( rEntity.mxAttributes.get() );
795         Reference< XFastContextHandler > xContext;
796         if( nElementToken == FastToken::DONTKNOW )
797         {
798             if( nPrefixLen > 0 )
799                 rEntity.maContextStack.top()->maNamespace = GetNamespaceURL( pPrefix, nPrefixLen );
800 
801             const OUString aNamespace( rEntity.maContextStack.top()->maNamespace );
802             const OUString aElementName( pPrefix, nPrefixLen, RTL_TEXTENCODING_UTF8 );
803             rEntity.maContextStack.top()->maElementName = aElementName;
804 
805             if( xParentContext.is() )
806                 xContext = xParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr );
807             else
808                 xContext = rEntity.mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr );
809 
810             if( xContext.is() )
811             {
812                 rEntity.maContextStack.top()->mxContext = xContext;
813                 xContext->startUnknownElement( aNamespace, aElementName, xAttr );
814             }
815         }
816         else
817         {
818             if( xParentContext.is() )
819                 xContext = xParentContext->createFastChildContext( nElementToken, xAttr );
820             else
821                 xContext = rEntity.mxDocumentHandler->createFastChildContext( nElementToken, xAttr );
822 
823 
824             if( xContext.is() )
825             {
826                 rEntity.maContextStack.top()->mxContext = xContext;
827                 xContext->startFastElement( nElementToken, xAttr );
828             }
829         }
830     }
831     catch( Exception& e )
832     {
833         rEntity.maSavedException <<= e;
834     }
835 }
836 
callbackEndElement(const XML_Char *)837 void FastSaxParser::callbackEndElement( const XML_Char* )
838 {
839     Entity& rEntity = getEntity();
840     OSL_ENSURE( !rEntity.maContextStack.empty(), "FastSaxParser::callbackEndElement - no context" );
841     if( !rEntity.maContextStack.empty() )
842     {
843         SaxContextImplPtr pContext = rEntity.maContextStack.top();
844         const Reference< XFastContextHandler >& xContext( pContext->mxContext );
845         if( xContext.is() ) try
846         {
847             sal_Int32 nElementToken = pContext->mnElementToken;
848             if( nElementToken != FastToken::DONTKNOW )
849                 xContext->endFastElement( nElementToken );
850             else
851                 xContext->endUnknownElement( pContext->maNamespace, pContext->maElementName );
852         }
853         catch( Exception& e )
854         {
855             rEntity.maSavedException <<= e;
856         }
857 
858         popContext();
859     }
860 }
861 
862 
callbackCharacters(const XML_Char * s,int nLen)863 void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen )
864 {
865     Entity& rEntity = getEntity();
866     const Reference< XFastContextHandler >& xContext( rEntity.maContextStack.top()->mxContext );
867     if( xContext.is() ) try
868     {
869         xContext->characters( OUString( s, nLen, RTL_TEXTENCODING_UTF8 ) );
870     }
871     catch( Exception& e )
872     {
873         rEntity.maSavedException <<= e;
874     }
875 }
876 
callbackExternalEntityRef(XML_Parser parser,const XML_Char * context,const XML_Char *,const XML_Char * systemId,const XML_Char * publicId)877 int FastSaxParser::callbackExternalEntityRef( XML_Parser parser,
878         const XML_Char *context, const XML_Char * /*base*/, const XML_Char *systemId, const XML_Char *publicId )
879 {
880     bool bOK = true;
881     InputSource source;
882 
883     Entity& rCurrEntity = getEntity();
884     Entity aNewEntity( rCurrEntity );
885 
886     if( rCurrEntity.mxEntityResolver.is() ) try
887     {
888         aNewEntity.maStructSource = rCurrEntity.mxEntityResolver->resolveEntity(
889             OUString( publicId, strlen( publicId ), RTL_TEXTENCODING_UTF8 ) ,
890             OUString( systemId, strlen( systemId ), RTL_TEXTENCODING_UTF8 ) );
891     }
892     catch( SAXParseException & e )
893     {
894         rCurrEntity.maSavedException <<= e;
895         bOK = false;
896     }
897     catch( SAXException & e )
898     {
899         rCurrEntity.maSavedException <<= SAXParseException(
900             e.Message, e.Context, e.WrappedException,
901             mxDocumentLocator->getPublicId(),
902             mxDocumentLocator->getSystemId(),
903             mxDocumentLocator->getLineNumber(),
904             mxDocumentLocator->getColumnNumber() );
905         bOK = false;
906     }
907 
908     if( aNewEntity.maStructSource.aInputStream.is() )
909     {
910         aNewEntity.mpParser = XML_ExternalEntityParserCreate( parser, context, 0 );
911         if( !aNewEntity.mpParser )
912         {
913             return false;
914         }
915 
916         aNewEntity.maConverter.setInputStream( aNewEntity.maStructSource.aInputStream );
917         pushEntity( aNewEntity );
918         try
919         {
920             parse();
921         }
922         catch( SAXParseException & e )
923         {
924             rCurrEntity.maSavedException <<= e;
925             bOK = false;
926         }
927         catch( IOException &e )
928         {
929             SAXException aEx;
930             aEx.WrappedException <<= e;
931             rCurrEntity.maSavedException <<= aEx;
932             bOK = false;
933         }
934         catch( RuntimeException &e )
935         {
936             SAXException aEx;
937             aEx.WrappedException <<= e;
938             rCurrEntity.maSavedException <<= aEx;
939             bOK = false;
940         }
941 
942         popEntity();
943         XML_ParserFree( aNewEntity.mpParser );
944     }
945 
946     return bOK;
947 }
948 
949 } // namespace sax_fastparser
950