xref: /AOO41X/main/sax/source/fastparser/fastparser.hxx (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir #ifndef _SAX_FASTPARSER_HXX_
29*cdf0e10cSrcweir #define _SAX_FASTPARSER_HXX_
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include <vector>
32*cdf0e10cSrcweir #include <stack>
33*cdf0e10cSrcweir #include <hash_map>
34*cdf0e10cSrcweir #include <boost/shared_ptr.hpp>
35*cdf0e10cSrcweir #include <rtl/ref.hxx>
36*cdf0e10cSrcweir #include <com/sun/star/xml/sax/XFastParser.hpp>
37*cdf0e10cSrcweir #include <com/sun/star/xml/sax/XFastTokenHandler.hpp>
38*cdf0e10cSrcweir #include <com/sun/star/xml/sax/XFastDocumentHandler.hpp>
39*cdf0e10cSrcweir #include <com/sun/star/lang/XServiceInfo.hpp>
40*cdf0e10cSrcweir #include <cppuhelper/implbase2.hxx>
41*cdf0e10cSrcweir 
42*cdf0e10cSrcweir #include <expat.h>
43*cdf0e10cSrcweir #include "xml2utf.hxx"
44*cdf0e10cSrcweir 
45*cdf0e10cSrcweir #include <sax/fastattribs.hxx>
46*cdf0e10cSrcweir 
47*cdf0e10cSrcweir #define PARSER_IMPLEMENTATION_NAME "com.sun.star.comp.extensions.xml.sax.FastParser"
48*cdf0e10cSrcweir #define PARSER_SERVICE_NAME        "com.sun.star.xml.sax.FastParser"
49*cdf0e10cSrcweir 
50*cdf0e10cSrcweir namespace sax_fastparser {
51*cdf0e10cSrcweir 
52*cdf0e10cSrcweir class FastLocatorImpl;
53*cdf0e10cSrcweir struct NamespaceDefine;
54*cdf0e10cSrcweir struct SaxContextImpl;
55*cdf0e10cSrcweir 
56*cdf0e10cSrcweir typedef ::boost::shared_ptr< SaxContextImpl > SaxContextImplPtr;
57*cdf0e10cSrcweir typedef ::boost::shared_ptr< NamespaceDefine > NamespaceDefineRef;
58*cdf0e10cSrcweir 
59*cdf0e10cSrcweir typedef ::std::hash_map< ::rtl::OUString, sal_Int32,
60*cdf0e10cSrcweir         ::rtl::OUStringHash, ::std::equal_to< ::rtl::OUString > > NamespaceMap;
61*cdf0e10cSrcweir 
62*cdf0e10cSrcweir // --------------------------------------------------------------------
63*cdf0e10cSrcweir 
64*cdf0e10cSrcweir struct ParserData
65*cdf0e10cSrcweir {
66*cdf0e10cSrcweir     ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastDocumentHandler > mxDocumentHandler;
67*cdf0e10cSrcweir     ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >    mxTokenHandler;
68*cdf0e10cSrcweir     ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XErrorHandler >        mxErrorHandler;
69*cdf0e10cSrcweir     ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XEntityResolver >      mxEntityResolver;
70*cdf0e10cSrcweir     ::com::sun::star::lang::Locale          maLocale;
71*cdf0e10cSrcweir 
72*cdf0e10cSrcweir     ParserData();
73*cdf0e10cSrcweir     ~ParserData();
74*cdf0e10cSrcweir };
75*cdf0e10cSrcweir 
76*cdf0e10cSrcweir // --------------------------------------------------------------------
77*cdf0e10cSrcweir 
78*cdf0e10cSrcweir // Entity binds all information needed for a single file
79*cdf0e10cSrcweir struct Entity : public ParserData
80*cdf0e10cSrcweir {
81*cdf0e10cSrcweir     ::com::sun::star::xml::sax::InputSource maStructSource;
82*cdf0e10cSrcweir     XML_Parser                              mpParser;
83*cdf0e10cSrcweir     ::sax_expatwrap::XMLFile2UTFConverter   maConverter;
84*cdf0e10cSrcweir     ::rtl::Reference< FastAttributeList >   mxAttributes;
85*cdf0e10cSrcweir 
86*cdf0e10cSrcweir     // Exceptions cannot be thrown through the C-XmlParser (possible resource leaks),
87*cdf0e10cSrcweir     // therefore the exception must be saved somewhere.
88*cdf0e10cSrcweir     ::com::sun::star::uno::Any              maSavedException;
89*cdf0e10cSrcweir 
90*cdf0e10cSrcweir     ::std::stack< SaxContextImplPtr >       maContextStack;
91*cdf0e10cSrcweir     ::std::vector< NamespaceDefineRef >     maNamespaceDefines;
92*cdf0e10cSrcweir 
93*cdf0e10cSrcweir     explicit Entity( const ParserData& rData );
94*cdf0e10cSrcweir     ~Entity();
95*cdf0e10cSrcweir };
96*cdf0e10cSrcweir 
97*cdf0e10cSrcweir // --------------------------------------------------------------------
98*cdf0e10cSrcweir 
99*cdf0e10cSrcweir // This class implements the external Parser interface
100*cdf0e10cSrcweir class FastSaxParser : public ::cppu::WeakImplHelper2< ::com::sun::star::xml::sax::XFastParser, ::com::sun::star::lang::XServiceInfo >
101*cdf0e10cSrcweir {
102*cdf0e10cSrcweir public:
103*cdf0e10cSrcweir     FastSaxParser();
104*cdf0e10cSrcweir     virtual ~FastSaxParser();
105*cdf0e10cSrcweir 
106*cdf0e10cSrcweir     // The implementation details
107*cdf0e10cSrcweir     static ::com::sun::star::uno::Sequence< ::rtl::OUString > getSupportedServiceNames_Static(void);
108*cdf0e10cSrcweir 
109*cdf0e10cSrcweir     // XFastParser
110*cdf0e10cSrcweir     virtual void SAL_CALL parseStream( const ::com::sun::star::xml::sax::InputSource& aInputSource ) throw (::com::sun::star::xml::sax::SAXException, ::com::sun::star::io::IOException, ::com::sun::star::uno::RuntimeException);
111*cdf0e10cSrcweir     virtual void SAL_CALL setFastDocumentHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastDocumentHandler >& Handler ) throw (::com::sun::star::uno::RuntimeException);
112*cdf0e10cSrcweir     virtual void SAL_CALL setTokenHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& Handler ) throw (::com::sun::star::uno::RuntimeException);
113*cdf0e10cSrcweir     virtual void SAL_CALL registerNamespace( const ::rtl::OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
114*cdf0e10cSrcweir     virtual void SAL_CALL setErrorHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XErrorHandler >& Handler ) throw (::com::sun::star::uno::RuntimeException);
115*cdf0e10cSrcweir     virtual void SAL_CALL setEntityResolver( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XEntityResolver >& Resolver ) throw (::com::sun::star::uno::RuntimeException);
116*cdf0e10cSrcweir     virtual void SAL_CALL setLocale( const ::com::sun::star::lang::Locale& rLocale ) throw (::com::sun::star::uno::RuntimeException);
117*cdf0e10cSrcweir 
118*cdf0e10cSrcweir     // XServiceInfo
119*cdf0e10cSrcweir     virtual ::rtl::OUString SAL_CALL getImplementationName(  ) throw (::com::sun::star::uno::RuntimeException);
120*cdf0e10cSrcweir     virtual sal_Bool SAL_CALL supportsService( const ::rtl::OUString& ServiceName ) throw (::com::sun::star::uno::RuntimeException);
121*cdf0e10cSrcweir     virtual ::com::sun::star::uno::Sequence< ::rtl::OUString > SAL_CALL getSupportedServiceNames(  ) throw (::com::sun::star::uno::RuntimeException);
122*cdf0e10cSrcweir 
123*cdf0e10cSrcweir     // called by the C callbacks of the expat parser
124*cdf0e10cSrcweir     void callbackStartElement( const XML_Char* name, const XML_Char** atts );
125*cdf0e10cSrcweir     void callbackEndElement( const XML_Char* name );
126*cdf0e10cSrcweir     void callbackCharacters( const XML_Char* s, int nLen );
127*cdf0e10cSrcweir     int callbackExternalEntityRef( XML_Parser parser, const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId);
128*cdf0e10cSrcweir 
129*cdf0e10cSrcweir     inline void pushEntity( const Entity& rEntity ) { maEntities.push( rEntity ); }
130*cdf0e10cSrcweir     inline void popEntity()                         { maEntities.pop(); }
131*cdf0e10cSrcweir     Entity& getEntity()                             { return maEntities.top(); }
132*cdf0e10cSrcweir 
133*cdf0e10cSrcweir private:
134*cdf0e10cSrcweir     void parse();
135*cdf0e10cSrcweir 
136*cdf0e10cSrcweir     sal_Int32 GetToken( const ::rtl::OString& rToken );
137*cdf0e10cSrcweir     sal_Int32 GetToken( const sal_Char* pToken, sal_Int32 nTokenLen = 0 );
138*cdf0e10cSrcweir     sal_Int32 GetTokenWithPrefix( const ::rtl::OString& rPrefix, const ::rtl::OString& rName ) throw (::com::sun::star::xml::sax::SAXException);
139*cdf0e10cSrcweir     sal_Int32 GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (::com::sun::star::xml::sax::SAXException);
140*cdf0e10cSrcweir     ::rtl::OUString GetNamespaceURL( const ::rtl::OString& rPrefix ) throw (::com::sun::star::xml::sax::SAXException);
141*cdf0e10cSrcweir     ::rtl::OUString GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw (::com::sun::star::xml::sax::SAXException);
142*cdf0e10cSrcweir     sal_Int32 GetNamespaceToken( const ::rtl::OUString& rNamespaceURL );
143*cdf0e10cSrcweir     sal_Int32 GetTokenWithNamespaceURL( const ::rtl::OUString& rNamespaceURL, const sal_Char* pName, int nNameLen );
144*cdf0e10cSrcweir     void DefineNamespace( const ::rtl::OString& rPrefix, const sal_Char* pNamespaceURL );
145*cdf0e10cSrcweir     sal_Int32 CreateCustomToken( const sal_Char* pToken, int len = 0 );
146*cdf0e10cSrcweir 
147*cdf0e10cSrcweir     void pushContext();
148*cdf0e10cSrcweir     void popContext();
149*cdf0e10cSrcweir 
150*cdf0e10cSrcweir     void splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen );
151*cdf0e10cSrcweir 
152*cdf0e10cSrcweir private:
153*cdf0e10cSrcweir     ::osl::Mutex maMutex;
154*cdf0e10cSrcweir 
155*cdf0e10cSrcweir     ::rtl::Reference< FastLocatorImpl >     mxDocumentLocator;
156*cdf0e10cSrcweir     NamespaceMap                            maNamespaceMap;
157*cdf0e10cSrcweir 
158*cdf0e10cSrcweir     ParserData maData;                      /// Cached parser configuration for next call of parseStream().
159*cdf0e10cSrcweir     ::std::stack< Entity > maEntities;      /// Entity stack for each call of parseStream().
160*cdf0e10cSrcweir };
161*cdf0e10cSrcweir 
162*cdf0e10cSrcweir }
163*cdf0e10cSrcweir 
164*cdf0e10cSrcweir #endif // _SAX_FASTPARSER_HXX_
165