xref: /AOO41X/main/sax/inc/xml2utf.hxx (revision 8d1920419c874f70e89fee4870a4e630d2304699)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // TODO: Woher?
25 #define Max( a, b )     (((a)>(b)) ? (a) : (b) )
26 #define Min( a, b )     (((a)<(b)) ? (a) : (b) )
27 
28 /*
29 *
30 * Text2UnicodeConverter
31 *
32 **/
33 namespace sax_expatwrap {
34 
35 class Text2UnicodeConverter
36 {
37 
38 public:
39     Text2UnicodeConverter( const ::rtl::OString & sEncoding );
40     ~Text2UnicodeConverter();
41 
42     ::com::sun::star::uno::Sequence < sal_Unicode > convert( const ::com::sun::star::uno::Sequence<sal_Int8> & );
canContinue()43     sal_Bool canContinue() {  return m_bCanContinue; }
44 
45 private:
46     void init( rtl_TextEncoding encoding );
47 
48     rtl_TextToUnicodeConverter  m_convText2Unicode;
49     rtl_TextToUnicodeContext    m_contextText2Unicode;
50     sal_Bool                    m_bCanContinue;
51     sal_Bool                    m_bInitialized;
52     rtl_TextEncoding            m_rtlEncoding;
53     ::com::sun::star::uno::Sequence<sal_Int8> m_seqSource;
54 };
55 
56 /*----------------------------------------
57 *
58 * Unicode2TextConverter
59 *
60 **-----------------------------------------*/
61 class Unicode2TextConverter
62 {
63 public:
64     Unicode2TextConverter( rtl_TextEncoding encoding );
65     ~Unicode2TextConverter();
66 
convert(const::rtl::OUString & s)67     inline ::com::sun::star::uno::Sequence<sal_Int8> convert( const ::rtl::OUString &s )
68         {
69             return convert( s.getStr() , s.getLength() );
70         }
71     ::com::sun::star::uno::Sequence<sal_Int8> convert( const sal_Unicode * , sal_Int32 nLength );
canContinue()72     sal_Bool canContinue() {  return m_bCanContinue; }
73 
74 private:
75     void init( rtl_TextEncoding encoding );
76 
77     rtl_UnicodeToTextConverter  m_convUnicode2Text;
78     rtl_UnicodeToTextContext    m_contextUnicode2Text;
79     sal_Bool                    m_bCanContinue;
80     sal_Bool                    m_bInitialized;
81     rtl_TextEncoding            m_rtlEncoding;
82     ::com::sun::star::uno::Sequence<sal_Unicode>        m_seqSource;
83 };
84 
85 
86 
87 /*----------------------------------------
88 *
89 * XMLFile2UTFConverter
90 *
91 **-----------------------------------------*/
92 class XMLFile2UTFConverter
93 {
94 public:
XMLFile2UTFConverter()95     XMLFile2UTFConverter( ):
96         m_bStarted( sal_False ),
97         m_pText2Unicode( 0 ),
98         m_pUnicode2Text( 0 )
99         {}
100 
101     ~XMLFile2UTFConverter();
102 
setInputStream(::com::sun::star::uno::Reference<::com::sun::star::io::XInputStream> & r)103     void setInputStream( ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream > &r ) { m_in = r; }
setEncoding(const::rtl::OString & s)104     void setEncoding( const ::rtl::OString &s ) { m_sEncoding = s; }
105 
106 
107 
108     // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There
109     //                   may be returned less or more bytes than ordered.
110     sal_Int32 readAndConvert( ::com::sun::star::uno::Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead )
111         throw ( ::com::sun::star::io::IOException,
112                 ::com::sun::star::io::NotConnectedException ,
113                 ::com::sun::star::io::BufferSizeExceededException ,
114                 ::com::sun::star::uno::RuntimeException );
115 
116 private:
117 
118     // Called only on first Sequence of bytes. Tries to figure out file format and encoding information.
119     // @return TRUE, when encoding information could be retrieved
120     // @return FALSE, when no encoding information was found in file
121     sal_Bool scanForEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq );
122 
123     // Called only on first Sequence of bytes. Tries to figure out
124     // if enough data is available to scan encoding
125     // @return TRUE, when encoding is retrievable
126     // @return FALSE, when more data is needed
127     sal_Bool isEncodingRecognizable( const ::com::sun::star::uno::Sequence< sal_Int8 > & seq );
128 
129     // When encoding attribute is within the text (in the first line), it is removed.
130     void removeEncoding( ::com::sun::star::uno::Sequence<sal_Int8> &seq );
131 
132     // Initializes decoding depending on m_sEncoding setting
133     void initializeDecoding();
134 private:
135     ::com::sun::star::uno::Reference< ::com::sun::star::io::XInputStream >  m_in;
136 
137     sal_Bool m_bStarted;
138     ::rtl::OString m_sEncoding;
139 
140     Text2UnicodeConverter *m_pText2Unicode;
141     Unicode2TextConverter *m_pUnicode2Text;
142 };
143 }
144