xref: /AOO41X/main/oox/inc/oox/xls/biffinputstream.hxx (revision 1ecadb572e7010ff3b3382ad9bf179dbc6efadbb)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #ifndef OOX_XLS_BIFFINPUTSTREAM_HXX
29 #define OOX_XLS_BIFFINPUTSTREAM_HXX
30 
31 #include <vector>
32 #include "oox/helper/binaryinputstream.hxx"
33 #include "oox/xls/biffhelper.hxx"
34 #include "oox/xls/biffcodec.hxx"
35 
36 namespace rtl { class OUStringBuffer; }
37 
38 namespace oox {
39 namespace xls {
40 
41 // ============================================================================
42 
43 namespace prv {
44 
45 /** Buffers the contents of a raw record and encapsulates stream decoding. */
46 class BiffInputRecordBuffer
47 {
48 public:
49     explicit            BiffInputRecordBuffer( BinaryInputStream& rInStrm );
50 
51     /** Returns the wrapped binary base stream. */
52     inline const BinaryInputStream& getBaseStream() const { return mrInStrm; }
53 
54     /** Sets a decoder object and decrypts buffered record data. */
55     void                setDecoder( const BiffDecoderRef& rxDecoder );
56     /** Returns the current decoder object. */
57     inline BiffDecoderRef getDecoder() const { return mxDecoder; }
58     /** Enables/disables usage of current decoder. */
59     void                enableDecoder( bool bEnable );
60 
61     /** Restarts the stream at the passed position. Buffer is invalid until the
62         next call of startRecord() or startNextRecord(). */
63     void                restartAt( sal_Int64 nPos );
64 
65     /** Reads the record header at the passed position. */
66     bool                startRecord( sal_Int64 nHeaderPos );
67     /** Reads the next record header from the stream. */
68     bool                startNextRecord();
69     /** Returns the start position of the record header in the core stream. */
70     sal_uInt16          getNextRecId();
71 
72     /** Returns the start position of the record header in the core stream. */
73     inline sal_Int64    getRecHeaderPos() const { return mnHeaderPos; }
74     /** Returns the current record identifier. */
75     inline sal_uInt16   getRecId() const { return mnRecId; }
76     /** Returns the current record size. */
77     inline sal_uInt16   getRecSize() const { return mnRecSize; }
78     /** Returns the current read position in the current record body. */
79     inline sal_uInt16   getRecPos() const { return mnRecPos; }
80     /** Returns the number of remaining bytes in the current record body. */
81     inline sal_uInt16   getRecLeft() const { return mnRecSize - mnRecPos; }
82 
83     /** Reads nBytes bytes to the existing buffer opData. Must NOT overread the source buffer. */
84     void                read( void* opData, sal_uInt16 nBytes );
85     /** Ignores nBytes bytes. Must NOT overread the buffer. */
86     void                skip( sal_uInt16 nBytes );
87 
88 private:
89     /** Updates data buffer from stream, if needed. */
90     void                updateBuffer();
91     /** Updates decoded data from original data. */
92     void                updateDecoded();
93 
94 private:
95     typedef ::std::vector< sal_uInt8 > DataBuffer;
96 
97     BinaryInputStream&  mrInStrm;               /// Core input stream.
98     DataBuffer          maOriginalData;         /// Original data read from stream.
99     DataBuffer          maDecodedData;          /// Decoded data.
100     DataBuffer*         mpCurrentData;          /// Points to data buffer currently in use.
101     BiffDecoderRef      mxDecoder;              /// Decoder object.
102     sal_Int64           mnHeaderPos;            /// Stream start position of current record header.
103     sal_Int64           mnBodyPos;              /// Stream start position of current record body.
104     sal_Int64           mnBufferBodyPos;        /// Stream start position of buffered data.
105     sal_Int64           mnNextHeaderPos;        /// Stream start position of next record header.
106     sal_uInt16          mnRecId;                /// Current record identifier.
107     sal_uInt16          mnRecSize;              /// Current record size.
108     sal_uInt16          mnRecPos;               /// Current position in record body.
109     bool                mbValidHeader;          /// True = valid record header.
110 };
111 
112 } // namespace prv
113 
114 // ============================================================================
115 
116 /** This class is used to read BIFF record streams.
117 
118     An instance is constructed with a BinaryInputStream object. The passed
119     stream is reset to its start while constructing this stream.
120 
121     To start reading a record call startNextRecord(). Now it is possible to
122     read all contents of the record using operator>>() or any of the read***()
123     functions. If some data exceeds the record size limit, the stream looks for
124     a following CONTINUE record and jumps automatically to it. It is NOT
125     allowed that an atomic data type is split into two records (e.g. 4 bytes of
126     a double in one record and the other 4 bytes in a following CONTINUE).
127 
128     Trying to read over the record limits results in a stream error. The
129     isValid() function indicates that by returning false. From now on the data
130     returned by the read functions is undefined. The error state will be reset,
131     if the record is reset (with the function resetRecord()), or if the next
132     record is started.
133 
134     To switch off the automatic lookup of CONTINUE records, use resetRecord()
135     with false parameter. This is useful e.g. on import of drawing layer data,
136     where sometimes solely CONTINUE records will occur. The automatic lookup
137     keeps switched off until the method resetRecord() is called with parameter
138     true. All other settings done on the stream (e.g. alternative CONTINUE
139     record identifier, enabled decryption, NUL substitution character) will be
140     reset to default values, if a new record is started.
141 
142     The import stream supports decrypting the stream data. The contents of a
143     record (not the record header) will be encrypted by Excel if the file has
144     been stored with password protection. The functions setDecoder() and
145     enableDecoder() control the usage of the decryption algorithms.
146     setDecoder() sets a new decryption algorithm and initially enables it.
147     enableDecoder( false ) may be used to stop the usage of the decryption
148     temporarily (sometimes record contents are never encrypted, e.g. all BOF
149     records or the stream position in SHEET records). Decryption will be
150     reenabled automatically, if a new record is started with the function
151     startNextRecord().
152 */
153 class BiffInputStream : public BinaryInputStream
154 {
155 public:
156     /** Constructs the BIFF record stream using the passed binary stream.
157 
158         @param rInStream
159             The base input stream. Must be seekable. Will be seeked to its
160             start position.
161 
162         @param bContLookup  Automatic CONTINUE lookup on/off.
163      */
164     explicit            BiffInputStream(
165                             BinaryInputStream& rInStream,
166                             bool bContLookup = true );
167 
168     // record control ---------------------------------------------------------
169 
170     /** Sets stream pointer to the start of the next record content.
171 
172         Ignores all CONTINUE records of the current record, if automatic
173         CONTINUE usage is switched on.
174 
175         @return  False = no record found (end of stream).
176      */
177     bool                startNextRecord();
178 
179     /** Sets stream pointer to the start of the content of the specified record.
180 
181         The handle of the current record can be received and stored using the
182         function getRecHandle() for later usage with this function. The record
183         handle is equivalent to the position of the underlying binary stream,
184         thus the function can be used to perform a hard seek to a specific
185         position, if it is sure that a record starts exactly at this position.
186 
187         @return  False = no record found (invalid handle passed).
188      */
189     bool                startRecordByHandle( sal_Int64 nRecHandle );
190 
191     /** Sets stream pointer to begin of record content.
192 
193         @param bContLookup
194             Automatic CONTINUE lookup on/off. In difference to other stream
195             settings, this setting is persistent until next call of this
196             function (because it is wanted to receive the next CONTINUE records
197             separately).
198         @param nAltContId
199             Sets an alternative record identifier for content continuation.
200             This value is reset automatically when a new record is started with
201             startNextRecord().
202      */
203     void                resetRecord(
204                             bool bContLookup,
205                             sal_uInt16 nAltContId = BIFF_ID_UNKNOWN );
206 
207     /** Sets stream pointer before current record and invalidates stream.
208 
209         The next call to startNextRecord() will start again the current record.
210         This can be used in situations where a loop or a function leaves on a
211         specific record, but the parent context expects to start this record by
212         itself. The stream is invalid as long as the first record has not been
213         started (it is not allowed to call any other stream operation then).
214      */
215     void                rewindRecord();
216 
217     // decoder ----------------------------------------------------------------
218 
219     /** Sets a new decoder object.
220 
221         Enables decryption of record contents for the rest of the stream.
222      */
223     void                setDecoder( const BiffDecoderRef& rxDecoder );
224 
225     /** Enables/disables usage of current decoder.
226 
227         Decryption is reenabled automatically, if a new record is started using
228         the function startNextRecord().
229      */
230     void                enableDecoder( bool bEnable = true );
231 
232     // stream/record state and info -------------------------------------------
233 
234     /** Returns the current record identifier. */
235     inline sal_uInt16   getRecId() const { return mnRecId; }
236     /** Returns the record identifier of the following record. */
237     sal_uInt16          getNextRecId();
238 
239     /** Returns a unique handle for the current record that can be used with
240         the function startRecordByHandle(). */
241     inline sal_Int64    getRecHandle() const { return mnRecHandle; }
242 
243     // BinaryStreamBase interface (seeking) -----------------------------------
244 
245     /** Returns the data size of the whole record without record headers. */
246     virtual sal_Int64   size() const;
247     /** Returns the position inside of the whole record content. */
248     virtual sal_Int64   tell() const;
249     /** Seeks in record content to the specified position. */
250     virtual void        seek( sal_Int64 nRecPos );
251     /** Closes the input stream but not the wrapped stream. */
252     virtual void        close();
253 
254     /** Returns the absolute position in the wrapped binary stream. */
255     sal_Int64           tellBase() const;
256     /** Returns the total size of the wrapped binary stream. */
257     sal_Int64           sizeBase() const;
258 
259     // BinaryInputStream interface (stream read access) -----------------------
260 
261     /** Reads nBytes bytes to the passed sequence.
262         @return  Number of bytes really read. */
263     virtual sal_Int32   readData( StreamDataSequence& orData, sal_Int32 nBytes, size_t nAtomSize = 1 );
264     /** Reads nBytes bytes and copies them to the passed buffer opMem.
265         @return  Number of bytes really read. */
266     virtual sal_Int32   readMemory( void* opMem, sal_Int32 nBytes, size_t nAtomSize = 1 );
267     /** Seeks forward inside the current record. */
268     virtual void        skip( sal_Int32 nBytes, size_t nAtomSize = 1 );
269 
270     /** Stream operator for integral and floating-point types. */
271     template< typename Type >
272     inline BiffInputStream& operator>>( Type& ornValue ) { readValue( ornValue ); return *this; }
273 
274     // byte strings -----------------------------------------------------------
275 
276     /** Reads 8/16 bit string length and character array, and returns the string.
277         @param b16BitLen
278             True = Read 16-bit string length field before the character array.
279             False = Read 8-bit string length field before the character array.
280         @param bAllowNulChars
281             True = NUL characters are inserted into the imported string.
282             False = NUL characters are replaced by question marks (default).
283      */
284     ::rtl::OString      readByteString( bool b16BitLen, bool bAllowNulChars = false );
285 
286     /** Reads 8/16 bit string length and character array, and returns a Unicode string.
287         @param b16BitLen
288             True = Read 16-bit string length field before the character array.
289             False = Read 8-bit string length field before the character array.
290         @param eTextEnc  The text encoding used to create the Unicode string.
291         @param bAllowNulChars
292             True = NUL characters are inserted into the imported string.
293             False = NUL characters are replaced by question marks (default).
294      */
295     ::rtl::OUString     readByteStringUC( bool b16BitLen, rtl_TextEncoding eTextEnc, bool bAllowNulChars = false );
296 
297     /** Ignores 8/16 bit string length and character array.
298         @param b16BitLen
299             True = Read 16-bit string length field before the character array.
300             False = Read 8-bit string length field before the character array.
301      */
302     void                skipByteString( bool b16BitLen );
303 
304     // Unicode strings --------------------------------------------------------
305 
306     /** Reads nChars characters of a BIFF8 string, and returns the string.
307         @param nChars  Number of characters to read from the stream.
308         @param b16BitChars
309             True = The character array contains 16-bit characters.
310             False = The character array contains truncated 8-bit characters.
311         @param bAllowNulChars
312             True = NUL characters are inserted into the imported string.
313             False = NUL characters are replaced by question marks (default).
314      */
315     ::rtl::OUString     readUniStringChars( sal_uInt16 nChars, bool b16BitChars, bool bAllowNulChars = false );
316 
317     /** Reads 8-bit flags, extended header, nChar characters, extended data of
318         a BIFF8 string, and returns the string.
319         @param nChars  Number of characters to read from the stream.
320         @param bAllowNulChars
321             True = NUL characters are inserted into the imported string.
322             False = NUL characters are replaced by question marks (default).
323      */
324     ::rtl::OUString     readUniStringBody( sal_uInt16 nChars, bool bAllowNulChars = false );
325 
326     /** Reads 16-bit character count, 8-bit flags, extended header, character
327         array, extended data of a BIFF8 string, and returns the string.
328         @param bAllowNulChars
329             True = NUL characters are inserted into the imported string.
330             False = NUL characters are replaced by question marks (default).
331      */
332     ::rtl::OUString     readUniString( bool bAllowNulChars = false );
333 
334     /** Ignores nChars characters of a BIFF8 string.
335         @param nChars  Number of characters to skip in the stream.
336         @param b16BitChars
337             True = The character array contains 16-bit characters.
338             False = The character array contains truncated 8-bit characters.
339      */
340     void                skipUniStringChars( sal_uInt16 nChars, bool b16BitChars );
341 
342     /** Ignores 8-bit flags, extended header, nChar characters, extended data
343         of a BIFF8 string.
344         @param nChars  Number of characters to skip in the stream.
345      */
346     void                skipUniStringBody( sal_uInt16 nChars );
347 
348     /** Ignores 16-bit character count, 8-bit flags, extended header, character
349         array, extended data of a BIFF8 string.
350      */
351     void                skipUniString();
352 
353     // ------------------------------------------------------------------------
354 private:
355     /** Initializes all members after base stream has been seeked to new record. */
356     void                setupRecord();
357     /** Restarts the current record from the beginning. */
358     void                restartRecord( bool bInvalidateRecSize );
359     /** Sets stream pointer before specified record and invalidates stream. */
360     void                rewindToRecord( sal_Int64 nRecHandle );
361     /** Returns true, if stream was able to start a valid record. */
362     inline bool         isInRecord() const { return mnRecHandle >= 0; }
363 
364     /** Returns true, if the passed ID is real or alternative continuation record ID. */
365     bool                isContinueId( sal_uInt16 nRecId ) const;
366     /** Goes to start of the next CONTINUE record.
367         @descr  Stream must be located at the end of a raw record, and handling
368         of CONTINUE records must be enabled.
369         @return  True if next CONTINUE record has been found and initialized. */
370     bool                jumpToNextContinue();
371     /** Goes to start of the next CONTINUE record while reading strings.
372         @descr  Stream must be located at the end of a raw record. If reading
373         has been started in a CONTINUE record, jumps to an existing following
374         CONTINUE record, even if handling of CONTINUE records is disabled (this
375         is a special handling for TXO string data). Reads additional Unicode
376         flag byte at start of the new raw record and sets or resets rb16BitChars.
377         @return  True if next CONTINUE record has been found and initialized. */
378     bool                jumpToNextStringContinue( bool& rb16BitChars );
379     /** Calculates the complete length of the current record including CONTINUE
380         records, stores the length in mnComplRecSize. */
381     void                calcRecordLength();
382 
383     /** Returns the maximum size of raw data possible to read in one block. */
384     sal_uInt16          getMaxRawReadSize( sal_Int32 nBytes, size_t nAtomSize ) const;
385 
386     /** Reads the BIFF8 Unicode string header fields. */
387     void                readUniStringHeader( bool& orb16BitChars, sal_Int32& ornAddSize );
388 
389 private:
390     prv::BiffInputRecordBuffer maRecBuffer; /// Raw record data buffer.
391 
392     sal_Int64           mnRecHandle;        /// Handle of current record.
393     sal_uInt16          mnRecId;            /// Identifier of current record (not the CONTINUE ID).
394     sal_uInt16          mnAltContId;        /// Alternative identifier for content continuation records.
395 
396     sal_Int64           mnCurrRecSize;      /// Helper for record size and position.
397     sal_Int64           mnComplRecSize;     /// Size of complete record data (with CONTINUEs).
398     bool                mbHasComplRec;      /// True = mnComplRecSize is valid.
399 
400     bool                mbCont;             /// True = automatic CONTINUE lookup enabled.
401 };
402 
403 // ============================================================================
404 
405 class BiffInputStreamPos
406 {
407 public:
408     explicit            BiffInputStreamPos( BiffInputStream& rStrm );
409 
410     bool                restorePosition();
411 
412     inline BiffInputStream& getStream() { return mrStrm; }
413 
414 private:
415     BiffInputStream&    mrStrm;
416     sal_Int64           mnRecHandle;
417     sal_Int64           mnRecPos;
418 };
419 
420 // ============================================================================
421 
422 /** Stores the current position of the passed stream on construction and
423     restores it automatically on destruction. */
424 class BiffInputStreamPosGuard : private BiffInputStreamPos
425 {
426 public:
427     explicit            BiffInputStreamPosGuard( BiffInputStream& rStrm );
428                         ~BiffInputStreamPosGuard();
429 };
430 
431 // ============================================================================
432 
433 } // namespace xls
434 } // namespace oox
435 
436 #endif
437