1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir #ifndef _SAX_FASTPARSER_HXX_ 29*cdf0e10cSrcweir #define _SAX_FASTPARSER_HXX_ 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #include <vector> 32*cdf0e10cSrcweir #include <stack> 33*cdf0e10cSrcweir #include <hash_map> 34*cdf0e10cSrcweir #include <boost/shared_ptr.hpp> 35*cdf0e10cSrcweir #include <rtl/ref.hxx> 36*cdf0e10cSrcweir #include <com/sun/star/xml/sax/XFastParser.hpp> 37*cdf0e10cSrcweir #include <com/sun/star/xml/sax/XFastTokenHandler.hpp> 38*cdf0e10cSrcweir #include <com/sun/star/xml/sax/XFastDocumentHandler.hpp> 39*cdf0e10cSrcweir #include <com/sun/star/lang/XServiceInfo.hpp> 40*cdf0e10cSrcweir #include <cppuhelper/implbase2.hxx> 41*cdf0e10cSrcweir 42*cdf0e10cSrcweir #include <expat.h> 43*cdf0e10cSrcweir #include "xml2utf.hxx" 44*cdf0e10cSrcweir 45*cdf0e10cSrcweir #include <sax/fastattribs.hxx> 46*cdf0e10cSrcweir 47*cdf0e10cSrcweir #define PARSER_IMPLEMENTATION_NAME "com.sun.star.comp.extensions.xml.sax.FastParser" 48*cdf0e10cSrcweir #define PARSER_SERVICE_NAME "com.sun.star.xml.sax.FastParser" 49*cdf0e10cSrcweir 50*cdf0e10cSrcweir namespace sax_fastparser { 51*cdf0e10cSrcweir 52*cdf0e10cSrcweir class FastLocatorImpl; 53*cdf0e10cSrcweir struct NamespaceDefine; 54*cdf0e10cSrcweir struct SaxContextImpl; 55*cdf0e10cSrcweir 56*cdf0e10cSrcweir typedef ::boost::shared_ptr< SaxContextImpl > SaxContextImplPtr; 57*cdf0e10cSrcweir typedef ::boost::shared_ptr< NamespaceDefine > NamespaceDefineRef; 58*cdf0e10cSrcweir 59*cdf0e10cSrcweir typedef ::std::hash_map< ::rtl::OUString, sal_Int32, 60*cdf0e10cSrcweir ::rtl::OUStringHash, ::std::equal_to< ::rtl::OUString > > NamespaceMap; 61*cdf0e10cSrcweir 62*cdf0e10cSrcweir // -------------------------------------------------------------------- 63*cdf0e10cSrcweir 64*cdf0e10cSrcweir struct ParserData 65*cdf0e10cSrcweir { 66*cdf0e10cSrcweir ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastDocumentHandler > mxDocumentHandler; 67*cdf0e10cSrcweir ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler > mxTokenHandler; 68*cdf0e10cSrcweir ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XErrorHandler > mxErrorHandler; 69*cdf0e10cSrcweir ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XEntityResolver > mxEntityResolver; 70*cdf0e10cSrcweir ::com::sun::star::lang::Locale maLocale; 71*cdf0e10cSrcweir 72*cdf0e10cSrcweir ParserData(); 73*cdf0e10cSrcweir ~ParserData(); 74*cdf0e10cSrcweir }; 75*cdf0e10cSrcweir 76*cdf0e10cSrcweir // -------------------------------------------------------------------- 77*cdf0e10cSrcweir 78*cdf0e10cSrcweir // Entity binds all information needed for a single file 79*cdf0e10cSrcweir struct Entity : public ParserData 80*cdf0e10cSrcweir { 81*cdf0e10cSrcweir ::com::sun::star::xml::sax::InputSource maStructSource; 82*cdf0e10cSrcweir XML_Parser mpParser; 83*cdf0e10cSrcweir ::sax_expatwrap::XMLFile2UTFConverter maConverter; 84*cdf0e10cSrcweir ::rtl::Reference< FastAttributeList > mxAttributes; 85*cdf0e10cSrcweir 86*cdf0e10cSrcweir // Exceptions cannot be thrown through the C-XmlParser (possible resource leaks), 87*cdf0e10cSrcweir // therefore the exception must be saved somewhere. 88*cdf0e10cSrcweir ::com::sun::star::uno::Any maSavedException; 89*cdf0e10cSrcweir 90*cdf0e10cSrcweir ::std::stack< SaxContextImplPtr > maContextStack; 91*cdf0e10cSrcweir ::std::vector< NamespaceDefineRef > maNamespaceDefines; 92*cdf0e10cSrcweir 93*cdf0e10cSrcweir explicit Entity( const ParserData& rData ); 94*cdf0e10cSrcweir ~Entity(); 95*cdf0e10cSrcweir }; 96*cdf0e10cSrcweir 97*cdf0e10cSrcweir // -------------------------------------------------------------------- 98*cdf0e10cSrcweir 99*cdf0e10cSrcweir // This class implements the external Parser interface 100*cdf0e10cSrcweir class FastSaxParser : public ::cppu::WeakImplHelper2< ::com::sun::star::xml::sax::XFastParser, ::com::sun::star::lang::XServiceInfo > 101*cdf0e10cSrcweir { 102*cdf0e10cSrcweir public: 103*cdf0e10cSrcweir FastSaxParser(); 104*cdf0e10cSrcweir virtual ~FastSaxParser(); 105*cdf0e10cSrcweir 106*cdf0e10cSrcweir // The implementation details 107*cdf0e10cSrcweir static ::com::sun::star::uno::Sequence< ::rtl::OUString > getSupportedServiceNames_Static(void); 108*cdf0e10cSrcweir 109*cdf0e10cSrcweir // XFastParser 110*cdf0e10cSrcweir virtual void SAL_CALL parseStream( const ::com::sun::star::xml::sax::InputSource& aInputSource ) throw (::com::sun::star::xml::sax::SAXException, ::com::sun::star::io::IOException, ::com::sun::star::uno::RuntimeException); 111*cdf0e10cSrcweir virtual void SAL_CALL setFastDocumentHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastDocumentHandler >& Handler ) throw (::com::sun::star::uno::RuntimeException); 112*cdf0e10cSrcweir virtual void SAL_CALL setTokenHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& Handler ) throw (::com::sun::star::uno::RuntimeException); 113*cdf0e10cSrcweir virtual void SAL_CALL registerNamespace( const ::rtl::OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException); 114*cdf0e10cSrcweir virtual void SAL_CALL setErrorHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XErrorHandler >& Handler ) throw (::com::sun::star::uno::RuntimeException); 115*cdf0e10cSrcweir virtual void SAL_CALL setEntityResolver( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XEntityResolver >& Resolver ) throw (::com::sun::star::uno::RuntimeException); 116*cdf0e10cSrcweir virtual void SAL_CALL setLocale( const ::com::sun::star::lang::Locale& rLocale ) throw (::com::sun::star::uno::RuntimeException); 117*cdf0e10cSrcweir 118*cdf0e10cSrcweir // XServiceInfo 119*cdf0e10cSrcweir virtual ::rtl::OUString SAL_CALL getImplementationName( ) throw (::com::sun::star::uno::RuntimeException); 120*cdf0e10cSrcweir virtual sal_Bool SAL_CALL supportsService( const ::rtl::OUString& ServiceName ) throw (::com::sun::star::uno::RuntimeException); 121*cdf0e10cSrcweir virtual ::com::sun::star::uno::Sequence< ::rtl::OUString > SAL_CALL getSupportedServiceNames( ) throw (::com::sun::star::uno::RuntimeException); 122*cdf0e10cSrcweir 123*cdf0e10cSrcweir // called by the C callbacks of the expat parser 124*cdf0e10cSrcweir void callbackStartElement( const XML_Char* name, const XML_Char** atts ); 125*cdf0e10cSrcweir void callbackEndElement( const XML_Char* name ); 126*cdf0e10cSrcweir void callbackCharacters( const XML_Char* s, int nLen ); 127*cdf0e10cSrcweir int callbackExternalEntityRef( XML_Parser parser, const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId); 128*cdf0e10cSrcweir 129*cdf0e10cSrcweir inline void pushEntity( const Entity& rEntity ) { maEntities.push( rEntity ); } 130*cdf0e10cSrcweir inline void popEntity() { maEntities.pop(); } 131*cdf0e10cSrcweir Entity& getEntity() { return maEntities.top(); } 132*cdf0e10cSrcweir 133*cdf0e10cSrcweir private: 134*cdf0e10cSrcweir void parse(); 135*cdf0e10cSrcweir 136*cdf0e10cSrcweir sal_Int32 GetToken( const ::rtl::OString& rToken ); 137*cdf0e10cSrcweir sal_Int32 GetToken( const sal_Char* pToken, sal_Int32 nTokenLen = 0 ); 138*cdf0e10cSrcweir sal_Int32 GetTokenWithPrefix( const ::rtl::OString& rPrefix, const ::rtl::OString& rName ) throw (::com::sun::star::xml::sax::SAXException); 139*cdf0e10cSrcweir sal_Int32 GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (::com::sun::star::xml::sax::SAXException); 140*cdf0e10cSrcweir ::rtl::OUString GetNamespaceURL( const ::rtl::OString& rPrefix ) throw (::com::sun::star::xml::sax::SAXException); 141*cdf0e10cSrcweir ::rtl::OUString GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw (::com::sun::star::xml::sax::SAXException); 142*cdf0e10cSrcweir sal_Int32 GetNamespaceToken( const ::rtl::OUString& rNamespaceURL ); 143*cdf0e10cSrcweir sal_Int32 GetTokenWithNamespaceURL( const ::rtl::OUString& rNamespaceURL, const sal_Char* pName, int nNameLen ); 144*cdf0e10cSrcweir void DefineNamespace( const ::rtl::OString& rPrefix, const sal_Char* pNamespaceURL ); 145*cdf0e10cSrcweir sal_Int32 CreateCustomToken( const sal_Char* pToken, int len = 0 ); 146*cdf0e10cSrcweir 147*cdf0e10cSrcweir void pushContext(); 148*cdf0e10cSrcweir void popContext(); 149*cdf0e10cSrcweir 150*cdf0e10cSrcweir void splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen ); 151*cdf0e10cSrcweir 152*cdf0e10cSrcweir private: 153*cdf0e10cSrcweir ::osl::Mutex maMutex; 154*cdf0e10cSrcweir 155*cdf0e10cSrcweir ::rtl::Reference< FastLocatorImpl > mxDocumentLocator; 156*cdf0e10cSrcweir NamespaceMap maNamespaceMap; 157*cdf0e10cSrcweir 158*cdf0e10cSrcweir ParserData maData; /// Cached parser configuration for next call of parseStream(). 159*cdf0e10cSrcweir ::std::stack< Entity > maEntities; /// Entity stack for each call of parseStream(). 160*cdf0e10cSrcweir }; 161*cdf0e10cSrcweir 162*cdf0e10cSrcweir } 163*cdf0e10cSrcweir 164*cdf0e10cSrcweir #endif // _SAX_FASTPARSER_HXX_ 165