1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 #ifndef OOX_XLS_BIFFINPUTSTREAM_HXX 29 #define OOX_XLS_BIFFINPUTSTREAM_HXX 30 31 #include <vector> 32 #include "oox/helper/binaryinputstream.hxx" 33 #include "oox/xls/biffhelper.hxx" 34 #include "oox/xls/biffcodec.hxx" 35 36 namespace rtl { class OUStringBuffer; } 37 38 namespace oox { 39 namespace xls { 40 41 // ============================================================================ 42 43 namespace prv { 44 45 /** Buffers the contents of a raw record and encapsulates stream decoding. */ 46 class BiffInputRecordBuffer 47 { 48 public: 49 explicit BiffInputRecordBuffer( BinaryInputStream& rInStrm ); 50 51 /** Returns the wrapped binary base stream. */ 52 inline const BinaryInputStream& getBaseStream() const { return mrInStrm; } 53 54 /** Sets a decoder object and decrypts buffered record data. */ 55 void setDecoder( const BiffDecoderRef& rxDecoder ); 56 /** Returns the current decoder object. */ 57 inline BiffDecoderRef getDecoder() const { return mxDecoder; } 58 /** Enables/disables usage of current decoder. */ 59 void enableDecoder( bool bEnable ); 60 61 /** Restarts the stream at the passed position. Buffer is invalid until the 62 next call of startRecord() or startNextRecord(). */ 63 void restartAt( sal_Int64 nPos ); 64 65 /** Reads the record header at the passed position. */ 66 bool startRecord( sal_Int64 nHeaderPos ); 67 /** Reads the next record header from the stream. */ 68 bool startNextRecord(); 69 /** Returns the start position of the record header in the core stream. */ 70 sal_uInt16 getNextRecId(); 71 72 /** Returns the start position of the record header in the core stream. */ 73 inline sal_Int64 getRecHeaderPos() const { return mnHeaderPos; } 74 /** Returns the current record identifier. */ 75 inline sal_uInt16 getRecId() const { return mnRecId; } 76 /** Returns the current record size. */ 77 inline sal_uInt16 getRecSize() const { return mnRecSize; } 78 /** Returns the current read position in the current record body. */ 79 inline sal_uInt16 getRecPos() const { return mnRecPos; } 80 /** Returns the number of remaining bytes in the current record body. */ 81 inline sal_uInt16 getRecLeft() const { return mnRecSize - mnRecPos; } 82 83 /** Reads nBytes bytes to the existing buffer opData. Must NOT overread the source buffer. */ 84 void read( void* opData, sal_uInt16 nBytes ); 85 /** Ignores nBytes bytes. Must NOT overread the buffer. */ 86 void skip( sal_uInt16 nBytes ); 87 88 private: 89 /** Updates data buffer from stream, if needed. */ 90 void updateBuffer(); 91 /** Updates decoded data from original data. */ 92 void updateDecoded(); 93 94 private: 95 typedef ::std::vector< sal_uInt8 > DataBuffer; 96 97 BinaryInputStream& mrInStrm; /// Core input stream. 98 DataBuffer maOriginalData; /// Original data read from stream. 99 DataBuffer maDecodedData; /// Decoded data. 100 DataBuffer* mpCurrentData; /// Points to data buffer currently in use. 101 BiffDecoderRef mxDecoder; /// Decoder object. 102 sal_Int64 mnHeaderPos; /// Stream start position of current record header. 103 sal_Int64 mnBodyPos; /// Stream start position of current record body. 104 sal_Int64 mnBufferBodyPos; /// Stream start position of buffered data. 105 sal_Int64 mnNextHeaderPos; /// Stream start position of next record header. 106 sal_uInt16 mnRecId; /// Current record identifier. 107 sal_uInt16 mnRecSize; /// Current record size. 108 sal_uInt16 mnRecPos; /// Current position in record body. 109 bool mbValidHeader; /// True = valid record header. 110 }; 111 112 } // namespace prv 113 114 // ============================================================================ 115 116 /** This class is used to read BIFF record streams. 117 118 An instance is constructed with a BinaryInputStream object. The passed 119 stream is reset to its start while constructing this stream. 120 121 To start reading a record call startNextRecord(). Now it is possible to 122 read all contents of the record using operator>>() or any of the read***() 123 functions. If some data exceeds the record size limit, the stream looks for 124 a following CONTINUE record and jumps automatically to it. It is NOT 125 allowed that an atomic data type is split into two records (e.g. 4 bytes of 126 a double in one record and the other 4 bytes in a following CONTINUE). 127 128 Trying to read over the record limits results in a stream error. The 129 isValid() function indicates that by returning false. From now on the data 130 returned by the read functions is undefined. The error state will be reset, 131 if the record is reset (with the function resetRecord()), or if the next 132 record is started. 133 134 To switch off the automatic lookup of CONTINUE records, use resetRecord() 135 with false parameter. This is useful e.g. on import of drawing layer data, 136 where sometimes solely CONTINUE records will occur. The automatic lookup 137 keeps switched off until the method resetRecord() is called with parameter 138 true. All other settings done on the stream (e.g. alternative CONTINUE 139 record identifier, enabled decryption, NUL substitution character) will be 140 reset to default values, if a new record is started. 141 142 The import stream supports decrypting the stream data. The contents of a 143 record (not the record header) will be encrypted by Excel if the file has 144 been stored with password protection. The functions setDecoder() and 145 enableDecoder() control the usage of the decryption algorithms. 146 setDecoder() sets a new decryption algorithm and initially enables it. 147 enableDecoder( false ) may be used to stop the usage of the decryption 148 temporarily (sometimes record contents are never encrypted, e.g. all BOF 149 records or the stream position in SHEET records). Decryption will be 150 reenabled automatically, if a new record is started with the function 151 startNextRecord(). 152 */ 153 class BiffInputStream : public BinaryInputStream 154 { 155 public: 156 /** Constructs the BIFF record stream using the passed binary stream. 157 158 @param rInStream 159 The base input stream. Must be seekable. Will be seeked to its 160 start position. 161 162 @param bContLookup Automatic CONTINUE lookup on/off. 163 */ 164 explicit BiffInputStream( 165 BinaryInputStream& rInStream, 166 bool bContLookup = true ); 167 168 // record control --------------------------------------------------------- 169 170 /** Sets stream pointer to the start of the next record content. 171 172 Ignores all CONTINUE records of the current record, if automatic 173 CONTINUE usage is switched on. 174 175 @return False = no record found (end of stream). 176 */ 177 bool startNextRecord(); 178 179 /** Sets stream pointer to the start of the content of the specified record. 180 181 The handle of the current record can be received and stored using the 182 function getRecHandle() for later usage with this function. The record 183 handle is equivalent to the position of the underlying binary stream, 184 thus the function can be used to perform a hard seek to a specific 185 position, if it is sure that a record starts exactly at this position. 186 187 @return False = no record found (invalid handle passed). 188 */ 189 bool startRecordByHandle( sal_Int64 nRecHandle ); 190 191 /** Sets stream pointer to begin of record content. 192 193 @param bContLookup 194 Automatic CONTINUE lookup on/off. In difference to other stream 195 settings, this setting is persistent until next call of this 196 function (because it is wanted to receive the next CONTINUE records 197 separately). 198 @param nAltContId 199 Sets an alternative record identifier for content continuation. 200 This value is reset automatically when a new record is started with 201 startNextRecord(). 202 */ 203 void resetRecord( 204 bool bContLookup, 205 sal_uInt16 nAltContId = BIFF_ID_UNKNOWN ); 206 207 /** Sets stream pointer before current record and invalidates stream. 208 209 The next call to startNextRecord() will start again the current record. 210 This can be used in situations where a loop or a function leaves on a 211 specific record, but the parent context expects to start this record by 212 itself. The stream is invalid as long as the first record has not been 213 started (it is not allowed to call any other stream operation then). 214 */ 215 void rewindRecord(); 216 217 // decoder ---------------------------------------------------------------- 218 219 /** Sets a new decoder object. 220 221 Enables decryption of record contents for the rest of the stream. 222 */ 223 void setDecoder( const BiffDecoderRef& rxDecoder ); 224 225 /** Enables/disables usage of current decoder. 226 227 Decryption is reenabled automatically, if a new record is started using 228 the function startNextRecord(). 229 */ 230 void enableDecoder( bool bEnable = true ); 231 232 // stream/record state and info ------------------------------------------- 233 234 /** Returns the current record identifier. */ 235 inline sal_uInt16 getRecId() const { return mnRecId; } 236 /** Returns the record identifier of the following record. */ 237 sal_uInt16 getNextRecId(); 238 239 /** Returns a unique handle for the current record that can be used with 240 the function startRecordByHandle(). */ 241 inline sal_Int64 getRecHandle() const { return mnRecHandle; } 242 243 // BinaryStreamBase interface (seeking) ----------------------------------- 244 245 /** Returns the data size of the whole record without record headers. */ 246 virtual sal_Int64 size() const; 247 /** Returns the position inside of the whole record content. */ 248 virtual sal_Int64 tell() const; 249 /** Seeks in record content to the specified position. */ 250 virtual void seek( sal_Int64 nRecPos ); 251 /** Closes the input stream but not the wrapped stream. */ 252 virtual void close(); 253 254 /** Returns the absolute position in the wrapped binary stream. */ 255 sal_Int64 tellBase() const; 256 /** Returns the total size of the wrapped binary stream. */ 257 sal_Int64 sizeBase() const; 258 259 // BinaryInputStream interface (stream read access) ----------------------- 260 261 /** Reads nBytes bytes to the passed sequence. 262 @return Number of bytes really read. */ 263 virtual sal_Int32 readData( StreamDataSequence& orData, sal_Int32 nBytes, size_t nAtomSize = 1 ); 264 /** Reads nBytes bytes and copies them to the passed buffer opMem. 265 @return Number of bytes really read. */ 266 virtual sal_Int32 readMemory( void* opMem, sal_Int32 nBytes, size_t nAtomSize = 1 ); 267 /** Seeks forward inside the current record. */ 268 virtual void skip( sal_Int32 nBytes, size_t nAtomSize = 1 ); 269 270 /** Stream operator for integral and floating-point types. */ 271 template< typename Type > 272 inline BiffInputStream& operator>>( Type& ornValue ) { readValue( ornValue ); return *this; } 273 274 // byte strings ----------------------------------------------------------- 275 276 /** Reads 8/16 bit string length and character array, and returns the string. 277 @param b16BitLen 278 True = Read 16-bit string length field before the character array. 279 False = Read 8-bit string length field before the character array. 280 @param bAllowNulChars 281 True = NUL characters are inserted into the imported string. 282 False = NUL characters are replaced by question marks (default). 283 */ 284 ::rtl::OString readByteString( bool b16BitLen, bool bAllowNulChars = false ); 285 286 /** Reads 8/16 bit string length and character array, and returns a Unicode string. 287 @param b16BitLen 288 True = Read 16-bit string length field before the character array. 289 False = Read 8-bit string length field before the character array. 290 @param eTextEnc The text encoding used to create the Unicode string. 291 @param bAllowNulChars 292 True = NUL characters are inserted into the imported string. 293 False = NUL characters are replaced by question marks (default). 294 */ 295 ::rtl::OUString readByteStringUC( bool b16BitLen, rtl_TextEncoding eTextEnc, bool bAllowNulChars = false ); 296 297 /** Ignores 8/16 bit string length and character array. 298 @param b16BitLen 299 True = Read 16-bit string length field before the character array. 300 False = Read 8-bit string length field before the character array. 301 */ 302 void skipByteString( bool b16BitLen ); 303 304 // Unicode strings -------------------------------------------------------- 305 306 /** Reads nChars characters of a BIFF8 string, and returns the string. 307 @param nChars Number of characters to read from the stream. 308 @param b16BitChars 309 True = The character array contains 16-bit characters. 310 False = The character array contains truncated 8-bit characters. 311 @param bAllowNulChars 312 True = NUL characters are inserted into the imported string. 313 False = NUL characters are replaced by question marks (default). 314 */ 315 ::rtl::OUString readUniStringChars( sal_uInt16 nChars, bool b16BitChars, bool bAllowNulChars = false ); 316 317 /** Reads 8-bit flags, extended header, nChar characters, extended data of 318 a BIFF8 string, and returns the string. 319 @param nChars Number of characters to read from the stream. 320 @param bAllowNulChars 321 True = NUL characters are inserted into the imported string. 322 False = NUL characters are replaced by question marks (default). 323 */ 324 ::rtl::OUString readUniStringBody( sal_uInt16 nChars, bool bAllowNulChars = false ); 325 326 /** Reads 16-bit character count, 8-bit flags, extended header, character 327 array, extended data of a BIFF8 string, and returns the string. 328 @param bAllowNulChars 329 True = NUL characters are inserted into the imported string. 330 False = NUL characters are replaced by question marks (default). 331 */ 332 ::rtl::OUString readUniString( bool bAllowNulChars = false ); 333 334 /** Ignores nChars characters of a BIFF8 string. 335 @param nChars Number of characters to skip in the stream. 336 @param b16BitChars 337 True = The character array contains 16-bit characters. 338 False = The character array contains truncated 8-bit characters. 339 */ 340 void skipUniStringChars( sal_uInt16 nChars, bool b16BitChars ); 341 342 /** Ignores 8-bit flags, extended header, nChar characters, extended data 343 of a BIFF8 string. 344 @param nChars Number of characters to skip in the stream. 345 */ 346 void skipUniStringBody( sal_uInt16 nChars ); 347 348 /** Ignores 16-bit character count, 8-bit flags, extended header, character 349 array, extended data of a BIFF8 string. 350 */ 351 void skipUniString(); 352 353 // ------------------------------------------------------------------------ 354 private: 355 /** Initializes all members after base stream has been seeked to new record. */ 356 void setupRecord(); 357 /** Restarts the current record from the beginning. */ 358 void restartRecord( bool bInvalidateRecSize ); 359 /** Sets stream pointer before specified record and invalidates stream. */ 360 void rewindToRecord( sal_Int64 nRecHandle ); 361 /** Returns true, if stream was able to start a valid record. */ 362 inline bool isInRecord() const { return mnRecHandle >= 0; } 363 364 /** Returns true, if the passed ID is real or alternative continuation record ID. */ 365 bool isContinueId( sal_uInt16 nRecId ) const; 366 /** Goes to start of the next CONTINUE record. 367 @descr Stream must be located at the end of a raw record, and handling 368 of CONTINUE records must be enabled. 369 @return True if next CONTINUE record has been found and initialized. */ 370 bool jumpToNextContinue(); 371 /** Goes to start of the next CONTINUE record while reading strings. 372 @descr Stream must be located at the end of a raw record. If reading 373 has been started in a CONTINUE record, jumps to an existing following 374 CONTINUE record, even if handling of CONTINUE records is disabled (this 375 is a special handling for TXO string data). Reads additional Unicode 376 flag byte at start of the new raw record and sets or resets rb16BitChars. 377 @return True if next CONTINUE record has been found and initialized. */ 378 bool jumpToNextStringContinue( bool& rb16BitChars ); 379 /** Calculates the complete length of the current record including CONTINUE 380 records, stores the length in mnComplRecSize. */ 381 void calcRecordLength(); 382 383 /** Returns the maximum size of raw data possible to read in one block. */ 384 sal_uInt16 getMaxRawReadSize( sal_Int32 nBytes, size_t nAtomSize ) const; 385 386 /** Reads the BIFF8 Unicode string header fields. */ 387 void readUniStringHeader( bool& orb16BitChars, sal_Int32& ornAddSize ); 388 389 private: 390 prv::BiffInputRecordBuffer maRecBuffer; /// Raw record data buffer. 391 392 sal_Int64 mnRecHandle; /// Handle of current record. 393 sal_uInt16 mnRecId; /// Identifier of current record (not the CONTINUE ID). 394 sal_uInt16 mnAltContId; /// Alternative identifier for content continuation records. 395 396 sal_Int64 mnCurrRecSize; /// Helper for record size and position. 397 sal_Int64 mnComplRecSize; /// Size of complete record data (with CONTINUEs). 398 bool mbHasComplRec; /// True = mnComplRecSize is valid. 399 400 bool mbCont; /// True = automatic CONTINUE lookup enabled. 401 }; 402 403 // ============================================================================ 404 405 class BiffInputStreamPos 406 { 407 public: 408 explicit BiffInputStreamPos( BiffInputStream& rStrm ); 409 410 bool restorePosition(); 411 412 inline BiffInputStream& getStream() { return mrStrm; } 413 414 private: 415 BiffInputStream& mrStrm; 416 sal_Int64 mnRecHandle; 417 sal_Int64 mnRecPos; 418 }; 419 420 // ============================================================================ 421 422 /** Stores the current position of the passed stream on construction and 423 restores it automatically on destruction. */ 424 class BiffInputStreamPosGuard : private BiffInputStreamPos 425 { 426 public: 427 explicit BiffInputStreamPosGuard( BiffInputStream& rStrm ); 428 ~BiffInputStreamPosGuard(); 429 }; 430 431 // ============================================================================ 432 433 } // namespace xls 434 } // namespace oox 435 436 #endif 437