1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_sdext.hxx" 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #if defined __SUNPRO_CC 32*cdf0e10cSrcweir #pragma disable_warn 33*cdf0e10cSrcweir #elif defined _MSC_VER 34*cdf0e10cSrcweir #pragma warning(push, 1) 35*cdf0e10cSrcweir #endif 36*cdf0e10cSrcweir 37*cdf0e10cSrcweir #include "pdfparse.hxx" 38*cdf0e10cSrcweir 39*cdf0e10cSrcweir // workaround windows compiler: do not include multi_pass.hpp 40*cdf0e10cSrcweir //#include <boost/spirit.hpp> 41*cdf0e10cSrcweir #include <boost/spirit/include/classic_core.hpp> 42*cdf0e10cSrcweir #include <boost/spirit/include/classic_utility.hpp> 43*cdf0e10cSrcweir #include <boost/spirit/include/classic_error_handling.hpp> 44*cdf0e10cSrcweir #include <boost/spirit/include/classic_file_iterator.hpp> 45*cdf0e10cSrcweir #include <boost/bind.hpp> 46*cdf0e10cSrcweir #include <string> 47*cdf0e10cSrcweir 48*cdf0e10cSrcweir #include <rtl/strbuf.hxx> 49*cdf0e10cSrcweir #include <rtl/memory.h> 50*cdf0e10cSrcweir #include <rtl/alloc.h> 51*cdf0e10cSrcweir 52*cdf0e10cSrcweir // disable warnings again because someone along the line has enabled them 53*cdf0e10cSrcweir #if defined __SUNPRO_CC 54*cdf0e10cSrcweir #pragma disable_warn 55*cdf0e10cSrcweir #elif defined _MSC_VER 56*cdf0e10cSrcweir #pragma warning(push, 1) 57*cdf0e10cSrcweir #endif 58*cdf0e10cSrcweir 59*cdf0e10cSrcweir using namespace boost::spirit; 60*cdf0e10cSrcweir using namespace rtl; 61*cdf0e10cSrcweir using namespace pdfparse; 62*cdf0e10cSrcweir 63*cdf0e10cSrcweir class StringEmitContext : public EmitContext 64*cdf0e10cSrcweir { 65*cdf0e10cSrcweir OStringBuffer m_aBuf; 66*cdf0e10cSrcweir public: 67*cdf0e10cSrcweir StringEmitContext() : EmitContext(), m_aBuf(256) {} 68*cdf0e10cSrcweir virtual ~StringEmitContext() {} 69*cdf0e10cSrcweir virtual bool write( const void* pBuf, unsigned int nLen ) throw() 70*cdf0e10cSrcweir { 71*cdf0e10cSrcweir m_aBuf.append( (const sal_Char*)pBuf, nLen ); 72*cdf0e10cSrcweir return true; 73*cdf0e10cSrcweir } 74*cdf0e10cSrcweir virtual unsigned int getCurPos() throw() { return m_aBuf.getLength(); } 75*cdf0e10cSrcweir virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw() 76*cdf0e10cSrcweir { return (nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) ) ? 77*cdf0e10cSrcweir write( m_aBuf.getStr() + nOrigOffset, nLen ) : false; } 78*cdf0e10cSrcweir virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw() 79*cdf0e10cSrcweir { 80*cdf0e10cSrcweir if( nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) ) 81*cdf0e10cSrcweir { 82*cdf0e10cSrcweir rtl_copyMemory( pBuf, m_aBuf.getStr()+nOrigOffset, nLen ); 83*cdf0e10cSrcweir return nLen; 84*cdf0e10cSrcweir } 85*cdf0e10cSrcweir return 0; 86*cdf0e10cSrcweir } 87*cdf0e10cSrcweir 88*cdf0e10cSrcweir OString getString() { return m_aBuf.makeStringAndClear(); } 89*cdf0e10cSrcweir }; 90*cdf0e10cSrcweir 91*cdf0e10cSrcweir template< class iteratorT > 92*cdf0e10cSrcweir class PDFGrammar : public grammar< PDFGrammar<iteratorT> > 93*cdf0e10cSrcweir { 94*cdf0e10cSrcweir public: 95*cdf0e10cSrcweir 96*cdf0e10cSrcweir PDFGrammar( const iteratorT& first ) 97*cdf0e10cSrcweir : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {} 98*cdf0e10cSrcweir ~PDFGrammar() 99*cdf0e10cSrcweir { 100*cdf0e10cSrcweir if( !m_aObjectStack.empty() ) 101*cdf0e10cSrcweir delete m_aObjectStack.front(); 102*cdf0e10cSrcweir } 103*cdf0e10cSrcweir 104*cdf0e10cSrcweir double m_fDouble; 105*cdf0e10cSrcweir std::vector< unsigned int > m_aUIntStack; 106*cdf0e10cSrcweir std::vector< PDFEntry* > m_aObjectStack; 107*cdf0e10cSrcweir rtl::OString m_aErrorString; 108*cdf0e10cSrcweir iteratorT m_aGlobalBegin; 109*cdf0e10cSrcweir 110*cdf0e10cSrcweir public: 111*cdf0e10cSrcweir struct pdf_string_parser 112*cdf0e10cSrcweir { 113*cdf0e10cSrcweir typedef nil_t result_t; 114*cdf0e10cSrcweir template <typename ScannerT> 115*cdf0e10cSrcweir std::ptrdiff_t 116*cdf0e10cSrcweir operator()(ScannerT const& scan, result_t& result) const 117*cdf0e10cSrcweir { 118*cdf0e10cSrcweir std::ptrdiff_t len = 0; 119*cdf0e10cSrcweir 120*cdf0e10cSrcweir int nBraceLevel = 0; 121*cdf0e10cSrcweir while( ! scan.at_end() ) 122*cdf0e10cSrcweir { 123*cdf0e10cSrcweir char c = *scan; 124*cdf0e10cSrcweir if( c == ')' ) 125*cdf0e10cSrcweir { 126*cdf0e10cSrcweir nBraceLevel--; 127*cdf0e10cSrcweir if( nBraceLevel < 0 ) 128*cdf0e10cSrcweir break; 129*cdf0e10cSrcweir } 130*cdf0e10cSrcweir else if( c == '(' ) 131*cdf0e10cSrcweir nBraceLevel++; 132*cdf0e10cSrcweir else if( c == '\\' ) // ignore escaped braces 133*cdf0e10cSrcweir { 134*cdf0e10cSrcweir ++len; 135*cdf0e10cSrcweir ++scan; 136*cdf0e10cSrcweir if( scan.at_end() ) 137*cdf0e10cSrcweir break; 138*cdf0e10cSrcweir } 139*cdf0e10cSrcweir ++len; 140*cdf0e10cSrcweir ++scan; 141*cdf0e10cSrcweir } 142*cdf0e10cSrcweir return scan.at_end() ? -1 : len; 143*cdf0e10cSrcweir } 144*cdf0e10cSrcweir }; 145*cdf0e10cSrcweir 146*cdf0e10cSrcweir template< typename ScannerT > 147*cdf0e10cSrcweir struct definition 148*cdf0e10cSrcweir { 149*cdf0e10cSrcweir definition( const PDFGrammar<iteratorT>& rSelf ) 150*cdf0e10cSrcweir { 151*cdf0e10cSrcweir PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf ); 152*cdf0e10cSrcweir 153*cdf0e10cSrcweir // workaround workshop compiler: comment_p doesn't work 154*cdf0e10cSrcweir // comment = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )]; 155*cdf0e10cSrcweir comment = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ]; 156*cdf0e10cSrcweir 157*cdf0e10cSrcweir boolean = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)]; 158*cdf0e10cSrcweir 159*cdf0e10cSrcweir // workaround workshop compiler: confix_p doesn't work 160*cdf0e10cSrcweir //stream = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )]; 161*cdf0e10cSrcweir stream = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )]; 162*cdf0e10cSrcweir 163*cdf0e10cSrcweir name = lexeme_d[ 164*cdf0e10cSrcweir ch_p('/') 165*cdf0e10cSrcweir >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0'))) 166*cdf0e10cSrcweir [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ]; 167*cdf0e10cSrcweir 168*cdf0e10cSrcweir // workaround workshop compiler: confix_p doesn't work 169*cdf0e10cSrcweir //stringtype = ( confix_p("(",*anychar_p, ")") | 170*cdf0e10cSrcweir // confix_p("<",*xdigit_p, ">") ) 171*cdf0e10cSrcweir // [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)]; 172*cdf0e10cSrcweir 173*cdf0e10cSrcweir stringtype = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) | 174*cdf0e10cSrcweir ( ch_p('<') >> *xdigit_p >> ch_p('>') ) ) 175*cdf0e10cSrcweir [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)]; 176*cdf0e10cSrcweir 177*cdf0e10cSrcweir null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)]; 178*cdf0e10cSrcweir 179*cdf0e10cSrcweir #ifdef USE_ASSIGN_ACTOR 180*cdf0e10cSrcweir objectref = ( uint_p[push_back_a(pSelf->m_aUIntStack)] 181*cdf0e10cSrcweir >> uint_p[push_back_a(pSelf->m_aUIntStack)] 182*cdf0e10cSrcweir >> ch_p('R') 183*cdf0e10cSrcweir >> eps_p 184*cdf0e10cSrcweir )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)]; 185*cdf0e10cSrcweir #else 186*cdf0e10cSrcweir objectref = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)] 187*cdf0e10cSrcweir >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)] 188*cdf0e10cSrcweir >> ch_p('R') 189*cdf0e10cSrcweir >> eps_p 190*cdf0e10cSrcweir )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)]; 191*cdf0e10cSrcweir #endif 192*cdf0e10cSrcweir 193*cdf0e10cSrcweir #ifdef USE_ASSIGN_ACTOR 194*cdf0e10cSrcweir simple_type = objectref | name | 195*cdf0e10cSrcweir ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p ) 196*cdf0e10cSrcweir [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)] 197*cdf0e10cSrcweir | stringtype | boolean | null_object; 198*cdf0e10cSrcweir #else 199*cdf0e10cSrcweir simple_type = objectref | name | 200*cdf0e10cSrcweir ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p ) 201*cdf0e10cSrcweir [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)] 202*cdf0e10cSrcweir | stringtype | boolean | null_object; 203*cdf0e10cSrcweir #endif 204*cdf0e10cSrcweir 205*cdf0e10cSrcweir dict_begin = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)]; 206*cdf0e10cSrcweir dict_end = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)]; 207*cdf0e10cSrcweir 208*cdf0e10cSrcweir array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)]; 209*cdf0e10cSrcweir array_end = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)]; 210*cdf0e10cSrcweir 211*cdf0e10cSrcweir #ifdef USE_ASSIGN_ACTOR 212*cdf0e10cSrcweir object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)] 213*cdf0e10cSrcweir >> uint_p[push_back_a(pSelf->m_aUIntStack)] 214*cdf0e10cSrcweir >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)]; 215*cdf0e10cSrcweir #else 216*cdf0e10cSrcweir object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)] 217*cdf0e10cSrcweir >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)] 218*cdf0e10cSrcweir >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)]; 219*cdf0e10cSrcweir #endif 220*cdf0e10cSrcweir object_end = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)]; 221*cdf0e10cSrcweir 222*cdf0e10cSrcweir xref = str_p( "xref" ) >> uint_p >> uint_p 223*cdf0e10cSrcweir >> lexeme_d[ 224*cdf0e10cSrcweir +( repeat_p(10)[digit_p] 225*cdf0e10cSrcweir >> blank_p 226*cdf0e10cSrcweir >> repeat_p(5)[digit_p] 227*cdf0e10cSrcweir >> blank_p 228*cdf0e10cSrcweir >> ( ch_p('n') | ch_p('f') ) 229*cdf0e10cSrcweir >> repeat_p(2)[space_p] 230*cdf0e10cSrcweir ) ]; 231*cdf0e10cSrcweir 232*cdf0e10cSrcweir dict_element= dict_begin | comment | simple_type 233*cdf0e10cSrcweir | array_begin | array_end | dict_end; 234*cdf0e10cSrcweir 235*cdf0e10cSrcweir object = object_begin 236*cdf0e10cSrcweir >> *dict_element 237*cdf0e10cSrcweir >> !stream 238*cdf0e10cSrcweir >> object_end; 239*cdf0e10cSrcweir 240*cdf0e10cSrcweir trailer = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)] 241*cdf0e10cSrcweir >> *dict_element 242*cdf0e10cSrcweir >> str_p("startxref") 243*cdf0e10cSrcweir >> uint_p 244*cdf0e10cSrcweir >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)]; 245*cdf0e10cSrcweir 246*cdf0e10cSrcweir #ifdef USE_ASSIGN_ACTOR 247*cdf0e10cSrcweir pdfrule = ! (lexeme_d[ 248*cdf0e10cSrcweir str_p( "%PDF-" ) 249*cdf0e10cSrcweir >> uint_p[push_back_a(pSelf->m_aUIntStack)] 250*cdf0e10cSrcweir >> ch_p('.') 251*cdf0e10cSrcweir >> uint_p[push_back_a(pSelf->m_aUIntStack)] 252*cdf0e10cSrcweir >> *((~ch_p('\r') & ~ch_p('\n'))) 253*cdf0e10cSrcweir >> eol_p 254*cdf0e10cSrcweir ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)] 255*cdf0e10cSrcweir >> *( comment | object | ( xref >> trailer ) ); 256*cdf0e10cSrcweir #else 257*cdf0e10cSrcweir pdfrule = ! (lexeme_d[ 258*cdf0e10cSrcweir str_p( "%PDF-" ) 259*cdf0e10cSrcweir >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)] 260*cdf0e10cSrcweir >> ch_p('.') 261*cdf0e10cSrcweir >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)] 262*cdf0e10cSrcweir >> *((~ch_p('\r') & ~ch_p('\n'))) 263*cdf0e10cSrcweir >> eol_p 264*cdf0e10cSrcweir ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)] 265*cdf0e10cSrcweir >> *( comment | object | ( xref >> trailer ) ); 266*cdf0e10cSrcweir #endif 267*cdf0e10cSrcweir } 268*cdf0e10cSrcweir rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type, 269*cdf0e10cSrcweir objectref, array, value, dict_element, dict_begin, dict_end, 270*cdf0e10cSrcweir array_begin, array_end, object, object_begin, object_end, 271*cdf0e10cSrcweir xref, trailer, pdfrule; 272*cdf0e10cSrcweir 273*cdf0e10cSrcweir const rule< ScannerT >& start() const { return pdfrule; } 274*cdf0e10cSrcweir }; 275*cdf0e10cSrcweir 276*cdf0e10cSrcweir #ifndef USE_ASSIGN_ACTOR 277*cdf0e10cSrcweir void push_back_action_uint( unsigned int i ) 278*cdf0e10cSrcweir { 279*cdf0e10cSrcweir m_aUIntStack.push_back( i ); 280*cdf0e10cSrcweir } 281*cdf0e10cSrcweir void assign_action_double( double d ) 282*cdf0e10cSrcweir { 283*cdf0e10cSrcweir m_fDouble = d; 284*cdf0e10cSrcweir } 285*cdf0e10cSrcweir #endif 286*cdf0e10cSrcweir 287*cdf0e10cSrcweir void parseError( const char* pMessage, iteratorT pLocation ) 288*cdf0e10cSrcweir { 289*cdf0e10cSrcweir throw_( pLocation, pMessage ); 290*cdf0e10cSrcweir } 291*cdf0e10cSrcweir 292*cdf0e10cSrcweir rtl::OString iteratorToString( iteratorT first, iteratorT last ) const 293*cdf0e10cSrcweir { 294*cdf0e10cSrcweir rtl::OStringBuffer aStr( 32 ); 295*cdf0e10cSrcweir while( first != last ) 296*cdf0e10cSrcweir { 297*cdf0e10cSrcweir aStr.append( *first ); 298*cdf0e10cSrcweir ++first; 299*cdf0e10cSrcweir } 300*cdf0e10cSrcweir return aStr.makeStringAndClear(); 301*cdf0e10cSrcweir } 302*cdf0e10cSrcweir 303*cdf0e10cSrcweir void haveFile( iteratorT pBegin, iteratorT /*pEnd*/ ) 304*cdf0e10cSrcweir { 305*cdf0e10cSrcweir if( m_aObjectStack.empty() ) 306*cdf0e10cSrcweir { 307*cdf0e10cSrcweir PDFFile* pFile = new PDFFile(); 308*cdf0e10cSrcweir pFile->m_nMinor = m_aUIntStack.back(); 309*cdf0e10cSrcweir m_aUIntStack.pop_back(); 310*cdf0e10cSrcweir pFile->m_nMajor = m_aUIntStack.back(); 311*cdf0e10cSrcweir m_aUIntStack.pop_back(); 312*cdf0e10cSrcweir m_aObjectStack.push_back( pFile ); 313*cdf0e10cSrcweir } 314*cdf0e10cSrcweir else 315*cdf0e10cSrcweir parseError( "found file header in unusual place", pBegin ); 316*cdf0e10cSrcweir } 317*cdf0e10cSrcweir 318*cdf0e10cSrcweir void pushComment( iteratorT first, iteratorT last ) 319*cdf0e10cSrcweir { 320*cdf0e10cSrcweir // add a comment to the current stack element 321*cdf0e10cSrcweir PDFComment* pComment = 322*cdf0e10cSrcweir new PDFComment(iteratorToString(first,last)); 323*cdf0e10cSrcweir if( m_aObjectStack.empty() ) 324*cdf0e10cSrcweir m_aObjectStack.push_back( new PDFPart() ); 325*cdf0e10cSrcweir PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back()); 326*cdf0e10cSrcweir if( pContainer == NULL ) 327*cdf0e10cSrcweir parseError( "comment without container", first ); 328*cdf0e10cSrcweir pContainer->m_aSubElements.push_back( pComment ); 329*cdf0e10cSrcweir } 330*cdf0e10cSrcweir 331*cdf0e10cSrcweir void insertNewValue( PDFEntry* pNewValue, iteratorT pPos ) 332*cdf0e10cSrcweir { 333*cdf0e10cSrcweir PDFContainer* pContainer = NULL; 334*cdf0e10cSrcweir const char* pMsg = NULL; 335*cdf0e10cSrcweir if( ! m_aObjectStack.empty() && 336*cdf0e10cSrcweir (pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back())) != NULL ) 337*cdf0e10cSrcweir { 338*cdf0e10cSrcweir if( dynamic_cast<PDFDict*>(pContainer) == NULL && 339*cdf0e10cSrcweir dynamic_cast<PDFArray*>(pContainer) == NULL ) 340*cdf0e10cSrcweir { 341*cdf0e10cSrcweir PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer); 342*cdf0e10cSrcweir if( pObj ) 343*cdf0e10cSrcweir { 344*cdf0e10cSrcweir if( pObj->m_pObject == NULL ) 345*cdf0e10cSrcweir pObj->m_pObject = pNewValue; 346*cdf0e10cSrcweir else 347*cdf0e10cSrcweir { 348*cdf0e10cSrcweir pMsg = "second value for object"; 349*cdf0e10cSrcweir pContainer = NULL; 350*cdf0e10cSrcweir } 351*cdf0e10cSrcweir } 352*cdf0e10cSrcweir else if( dynamic_cast<PDFDict*>(pNewValue) ) 353*cdf0e10cSrcweir { 354*cdf0e10cSrcweir PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer); 355*cdf0e10cSrcweir if( pTrailer ) 356*cdf0e10cSrcweir { 357*cdf0e10cSrcweir if( pTrailer->m_pDict == NULL ) 358*cdf0e10cSrcweir pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue); 359*cdf0e10cSrcweir else 360*cdf0e10cSrcweir pContainer = NULL; 361*cdf0e10cSrcweir } 362*cdf0e10cSrcweir else 363*cdf0e10cSrcweir pContainer = NULL; 364*cdf0e10cSrcweir } 365*cdf0e10cSrcweir else 366*cdf0e10cSrcweir pContainer = NULL; 367*cdf0e10cSrcweir } 368*cdf0e10cSrcweir } 369*cdf0e10cSrcweir if( pContainer ) 370*cdf0e10cSrcweir pContainer->m_aSubElements.push_back( pNewValue ); 371*cdf0e10cSrcweir else 372*cdf0e10cSrcweir { 373*cdf0e10cSrcweir if( ! pMsg ) 374*cdf0e10cSrcweir { 375*cdf0e10cSrcweir if( dynamic_cast<PDFContainer*>(pNewValue) ) 376*cdf0e10cSrcweir pMsg = "array without container"; 377*cdf0e10cSrcweir else 378*cdf0e10cSrcweir pMsg = "value without container"; 379*cdf0e10cSrcweir } 380*cdf0e10cSrcweir delete pNewValue; 381*cdf0e10cSrcweir parseError( pMsg, pPos ); 382*cdf0e10cSrcweir } 383*cdf0e10cSrcweir } 384*cdf0e10cSrcweir 385*cdf0e10cSrcweir void pushName( iteratorT first, iteratorT last ) 386*cdf0e10cSrcweir { 387*cdf0e10cSrcweir insertNewValue( new PDFName(iteratorToString(first,last)), first ); 388*cdf0e10cSrcweir } 389*cdf0e10cSrcweir 390*cdf0e10cSrcweir void pushDouble( iteratorT first, iteratorT /*last*/ ) 391*cdf0e10cSrcweir { 392*cdf0e10cSrcweir insertNewValue( new PDFNumber(m_fDouble), first ); 393*cdf0e10cSrcweir } 394*cdf0e10cSrcweir 395*cdf0e10cSrcweir void pushString( iteratorT first, iteratorT last ) 396*cdf0e10cSrcweir { 397*cdf0e10cSrcweir insertNewValue( new PDFString(iteratorToString(first,last)), first ); 398*cdf0e10cSrcweir } 399*cdf0e10cSrcweir 400*cdf0e10cSrcweir void pushBool( iteratorT first, iteratorT last ) 401*cdf0e10cSrcweir { 402*cdf0e10cSrcweir insertNewValue( new PDFBool( (last-first == 4) ), first ); 403*cdf0e10cSrcweir } 404*cdf0e10cSrcweir 405*cdf0e10cSrcweir void pushNull( iteratorT first, iteratorT ) 406*cdf0e10cSrcweir { 407*cdf0e10cSrcweir insertNewValue( new PDFNull(), first ); 408*cdf0e10cSrcweir } 409*cdf0e10cSrcweir 410*cdf0e10cSrcweir 411*cdf0e10cSrcweir void beginObject( iteratorT first, iteratorT /*last*/ ) 412*cdf0e10cSrcweir { 413*cdf0e10cSrcweir if( m_aObjectStack.empty() ) 414*cdf0e10cSrcweir m_aObjectStack.push_back( new PDFPart() ); 415*cdf0e10cSrcweir 416*cdf0e10cSrcweir unsigned int nGeneration = m_aUIntStack.back(); 417*cdf0e10cSrcweir m_aUIntStack.pop_back(); 418*cdf0e10cSrcweir unsigned int nObject = m_aUIntStack.back(); 419*cdf0e10cSrcweir m_aUIntStack.pop_back(); 420*cdf0e10cSrcweir 421*cdf0e10cSrcweir PDFObject* pObj = new PDFObject( nObject, nGeneration ); 422*cdf0e10cSrcweir pObj->m_nOffset = first - m_aGlobalBegin; 423*cdf0e10cSrcweir 424*cdf0e10cSrcweir PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back()); 425*cdf0e10cSrcweir if( pContainer && 426*cdf0e10cSrcweir ( dynamic_cast<PDFFile*>(pContainer) || 427*cdf0e10cSrcweir dynamic_cast<PDFPart*>(pContainer) ) ) 428*cdf0e10cSrcweir { 429*cdf0e10cSrcweir pContainer->m_aSubElements.push_back( pObj ); 430*cdf0e10cSrcweir m_aObjectStack.push_back( pObj ); 431*cdf0e10cSrcweir } 432*cdf0e10cSrcweir else 433*cdf0e10cSrcweir parseError( "object in wrong place", first ); 434*cdf0e10cSrcweir } 435*cdf0e10cSrcweir 436*cdf0e10cSrcweir void endObject( iteratorT first, iteratorT ) 437*cdf0e10cSrcweir { 438*cdf0e10cSrcweir if( m_aObjectStack.empty() ) 439*cdf0e10cSrcweir parseError( "endobj without obj", first ); 440*cdf0e10cSrcweir else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == NULL ) 441*cdf0e10cSrcweir parseError( "spurious endobj", first ); 442*cdf0e10cSrcweir else 443*cdf0e10cSrcweir m_aObjectStack.pop_back(); 444*cdf0e10cSrcweir } 445*cdf0e10cSrcweir 446*cdf0e10cSrcweir void pushObjectRef( iteratorT first, iteratorT ) 447*cdf0e10cSrcweir { 448*cdf0e10cSrcweir unsigned int nGeneration = m_aUIntStack.back(); 449*cdf0e10cSrcweir m_aUIntStack.pop_back(); 450*cdf0e10cSrcweir unsigned int nObject = m_aUIntStack.back(); 451*cdf0e10cSrcweir m_aUIntStack.pop_back(); 452*cdf0e10cSrcweir insertNewValue( new PDFObjectRef(nObject,nGeneration), first ); 453*cdf0e10cSrcweir } 454*cdf0e10cSrcweir 455*cdf0e10cSrcweir void beginDict( iteratorT first, iteratorT ) 456*cdf0e10cSrcweir { 457*cdf0e10cSrcweir PDFDict* pDict = new PDFDict(); 458*cdf0e10cSrcweir pDict->m_nOffset = first - m_aGlobalBegin; 459*cdf0e10cSrcweir 460*cdf0e10cSrcweir insertNewValue( pDict, first ); 461*cdf0e10cSrcweir // will not come here if insertion fails (exception) 462*cdf0e10cSrcweir m_aObjectStack.push_back( pDict ); 463*cdf0e10cSrcweir } 464*cdf0e10cSrcweir void endDict( iteratorT first, iteratorT ) 465*cdf0e10cSrcweir { 466*cdf0e10cSrcweir PDFDict* pDict = NULL; 467*cdf0e10cSrcweir if( m_aObjectStack.empty() ) 468*cdf0e10cSrcweir parseError( "dictionary end without begin", first ); 469*cdf0e10cSrcweir else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == NULL ) 470*cdf0e10cSrcweir parseError( "spurious dictionary end", first ); 471*cdf0e10cSrcweir else 472*cdf0e10cSrcweir m_aObjectStack.pop_back(); 473*cdf0e10cSrcweir 474*cdf0e10cSrcweir PDFEntry* pOffender = pDict->buildMap(); 475*cdf0e10cSrcweir if( pOffender ) 476*cdf0e10cSrcweir { 477*cdf0e10cSrcweir StringEmitContext aCtx; 478*cdf0e10cSrcweir aCtx.write( "offending dictionary element: ", 30 ); 479*cdf0e10cSrcweir pOffender->emit( aCtx ); 480*cdf0e10cSrcweir m_aErrorString = aCtx.getString(); 481*cdf0e10cSrcweir parseError( m_aErrorString.getStr(), first ); 482*cdf0e10cSrcweir } 483*cdf0e10cSrcweir } 484*cdf0e10cSrcweir 485*cdf0e10cSrcweir void beginArray( iteratorT first, iteratorT ) 486*cdf0e10cSrcweir { 487*cdf0e10cSrcweir PDFArray* pArray = new PDFArray(); 488*cdf0e10cSrcweir pArray->m_nOffset = first - m_aGlobalBegin; 489*cdf0e10cSrcweir 490*cdf0e10cSrcweir insertNewValue( pArray, first ); 491*cdf0e10cSrcweir // will not come here if insertion fails (exception) 492*cdf0e10cSrcweir m_aObjectStack.push_back( pArray ); 493*cdf0e10cSrcweir } 494*cdf0e10cSrcweir 495*cdf0e10cSrcweir void endArray( iteratorT first, iteratorT ) 496*cdf0e10cSrcweir { 497*cdf0e10cSrcweir if( m_aObjectStack.empty() ) 498*cdf0e10cSrcweir parseError( "array end without begin", first ); 499*cdf0e10cSrcweir else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == NULL ) 500*cdf0e10cSrcweir parseError( "spurious array end", first ); 501*cdf0e10cSrcweir else 502*cdf0e10cSrcweir m_aObjectStack.pop_back(); 503*cdf0e10cSrcweir } 504*cdf0e10cSrcweir 505*cdf0e10cSrcweir void emitStream( iteratorT first, iteratorT last ) 506*cdf0e10cSrcweir { 507*cdf0e10cSrcweir if( m_aObjectStack.empty() ) 508*cdf0e10cSrcweir parseError( "stream without object", first ); 509*cdf0e10cSrcweir PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back()); 510*cdf0e10cSrcweir if( pObj && pObj->m_pObject ) 511*cdf0e10cSrcweir { 512*cdf0e10cSrcweir if( pObj->m_pStream ) 513*cdf0e10cSrcweir parseError( "multiple streams in object", first ); 514*cdf0e10cSrcweir 515*cdf0e10cSrcweir PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject); 516*cdf0e10cSrcweir if( pDict ) 517*cdf0e10cSrcweir { 518*cdf0e10cSrcweir PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict ); 519*cdf0e10cSrcweir 520*cdf0e10cSrcweir pObj->m_pStream = pStream; 521*cdf0e10cSrcweir pObj->m_aSubElements.push_back( pStream ); 522*cdf0e10cSrcweir } 523*cdf0e10cSrcweir } 524*cdf0e10cSrcweir else 525*cdf0e10cSrcweir parseError( "stream without object", first ); 526*cdf0e10cSrcweir } 527*cdf0e10cSrcweir 528*cdf0e10cSrcweir void beginTrailer( iteratorT first, iteratorT ) 529*cdf0e10cSrcweir { 530*cdf0e10cSrcweir if( m_aObjectStack.empty() ) 531*cdf0e10cSrcweir m_aObjectStack.push_back( new PDFPart() ); 532*cdf0e10cSrcweir 533*cdf0e10cSrcweir PDFTrailer* pTrailer = new PDFTrailer(); 534*cdf0e10cSrcweir pTrailer->m_nOffset = first - m_aGlobalBegin; 535*cdf0e10cSrcweir 536*cdf0e10cSrcweir PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back()); 537*cdf0e10cSrcweir if( pContainer && 538*cdf0e10cSrcweir ( dynamic_cast<PDFFile*>(pContainer) || 539*cdf0e10cSrcweir dynamic_cast<PDFPart*>(pContainer) ) ) 540*cdf0e10cSrcweir { 541*cdf0e10cSrcweir pContainer->m_aSubElements.push_back( pTrailer ); 542*cdf0e10cSrcweir m_aObjectStack.push_back( pTrailer ); 543*cdf0e10cSrcweir } 544*cdf0e10cSrcweir else 545*cdf0e10cSrcweir parseError( "trailer in wrong place", first ); 546*cdf0e10cSrcweir } 547*cdf0e10cSrcweir 548*cdf0e10cSrcweir void endTrailer( iteratorT first, iteratorT ) 549*cdf0e10cSrcweir { 550*cdf0e10cSrcweir if( m_aObjectStack.empty() ) 551*cdf0e10cSrcweir parseError( "%%EOF without trailer", first ); 552*cdf0e10cSrcweir else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == NULL ) 553*cdf0e10cSrcweir parseError( "spurious %%EOF", first ); 554*cdf0e10cSrcweir else 555*cdf0e10cSrcweir m_aObjectStack.pop_back(); 556*cdf0e10cSrcweir } 557*cdf0e10cSrcweir }; 558*cdf0e10cSrcweir 559*cdf0e10cSrcweir PDFEntry* PDFReader::read( const char* pBuffer, unsigned int nLen ) 560*cdf0e10cSrcweir { 561*cdf0e10cSrcweir PDFGrammar<const char*> aGrammar( pBuffer ); 562*cdf0e10cSrcweir 563*cdf0e10cSrcweir try 564*cdf0e10cSrcweir { 565*cdf0e10cSrcweir boost::spirit::parse_info<const char*> aInfo = 566*cdf0e10cSrcweir boost::spirit::parse( pBuffer, 567*cdf0e10cSrcweir pBuffer+nLen, 568*cdf0e10cSrcweir aGrammar, 569*cdf0e10cSrcweir boost::spirit::space_p ); 570*cdf0e10cSrcweir #if OSL_DEBUG_LEVEL > 1 571*cdf0e10cSrcweir fprintf( stderr, "parseinfo: stop = %p (buff=%p, offset = %d), hit = %s, full = %s, length = %d\n", 572*cdf0e10cSrcweir aInfo.stop, pBuffer, aInfo.stop - pBuffer, 573*cdf0e10cSrcweir aInfo.hit ? "true" : "false", 574*cdf0e10cSrcweir aInfo.full ? "true" : "false", 575*cdf0e10cSrcweir (int)aInfo.length ); 576*cdf0e10cSrcweir #endif 577*cdf0e10cSrcweir } 578*cdf0e10cSrcweir catch( parser_error<const char*, const char*>& rError ) 579*cdf0e10cSrcweir { 580*cdf0e10cSrcweir #if OSL_DEBUG_LEVEL > 1 581*cdf0e10cSrcweir fprintf( stderr, "parse error: %s at buffer pos %u\nobject stack:\n", 582*cdf0e10cSrcweir rError.descriptor, rError.where - pBuffer ); 583*cdf0e10cSrcweir unsigned int nElem = aGrammar.m_aObjectStack.size(); 584*cdf0e10cSrcweir for( unsigned int i = 0; i < nElem; i++ ) 585*cdf0e10cSrcweir { 586*cdf0e10cSrcweir fprintf( stderr, " %s\n", typeid( *(aGrammar.m_aObjectStack[i]) ).name() ); 587*cdf0e10cSrcweir } 588*cdf0e10cSrcweir #endif 589*cdf0e10cSrcweir } 590*cdf0e10cSrcweir 591*cdf0e10cSrcweir PDFEntry* pRet = NULL; 592*cdf0e10cSrcweir unsigned int nEntries = aGrammar.m_aObjectStack.size(); 593*cdf0e10cSrcweir if( nEntries == 1 ) 594*cdf0e10cSrcweir { 595*cdf0e10cSrcweir pRet = aGrammar.m_aObjectStack.back(); 596*cdf0e10cSrcweir aGrammar.m_aObjectStack.pop_back(); 597*cdf0e10cSrcweir } 598*cdf0e10cSrcweir #if OSL_DEBUG_LEVEL > 1 599*cdf0e10cSrcweir else if( nEntries > 1 ) 600*cdf0e10cSrcweir fprintf( stderr, "error got %u stack objects in parse\n", nEntries ); 601*cdf0e10cSrcweir #endif 602*cdf0e10cSrcweir 603*cdf0e10cSrcweir return pRet; 604*cdf0e10cSrcweir } 605*cdf0e10cSrcweir 606*cdf0e10cSrcweir PDFEntry* PDFReader::read( const char* pFileName ) 607*cdf0e10cSrcweir { 608*cdf0e10cSrcweir #ifdef WIN32 609*cdf0e10cSrcweir /* #i106583# 610*cdf0e10cSrcweir since converting to boost 1.39 file_iterator does not work anymore on all Windows systems 611*cdf0e10cSrcweir C++ stdlib istream_iterator does not allow "-" apparently 612*cdf0e10cSrcweir using spirit 2.0 doesn't work in our environment with the MSC 613*cdf0e10cSrcweir 614*cdf0e10cSrcweir So for the time being bite the bullet and read the whole file. 615*cdf0e10cSrcweir FIXME: give Spirit 2.x another try when we upgrade boost again. 616*cdf0e10cSrcweir */ 617*cdf0e10cSrcweir PDFEntry* pRet = NULL; 618*cdf0e10cSrcweir FILE* fp = fopen( pFileName, "rb" ); 619*cdf0e10cSrcweir if( fp ) 620*cdf0e10cSrcweir { 621*cdf0e10cSrcweir fseek( fp, 0, SEEK_END ); 622*cdf0e10cSrcweir unsigned int nLen = (unsigned int)ftell( fp ); 623*cdf0e10cSrcweir fseek( fp, 0, SEEK_SET ); 624*cdf0e10cSrcweir char* pBuf = (char*)rtl_allocateMemory( nLen ); 625*cdf0e10cSrcweir if( pBuf ) 626*cdf0e10cSrcweir { 627*cdf0e10cSrcweir fread( pBuf, 1, nLen, fp ); 628*cdf0e10cSrcweir pRet = read( pBuf, nLen ); 629*cdf0e10cSrcweir rtl_freeMemory( pBuf ); 630*cdf0e10cSrcweir } 631*cdf0e10cSrcweir fclose( fp ); 632*cdf0e10cSrcweir } 633*cdf0e10cSrcweir return pRet; 634*cdf0e10cSrcweir #else 635*cdf0e10cSrcweir file_iterator<> file_start( pFileName ); 636*cdf0e10cSrcweir if( ! file_start ) 637*cdf0e10cSrcweir return NULL; 638*cdf0e10cSrcweir file_iterator<> file_end = file_start.make_end(); 639*cdf0e10cSrcweir PDFGrammar< file_iterator<> > aGrammar( file_start ); 640*cdf0e10cSrcweir 641*cdf0e10cSrcweir try 642*cdf0e10cSrcweir { 643*cdf0e10cSrcweir boost::spirit::parse_info< file_iterator<> > aInfo = 644*cdf0e10cSrcweir boost::spirit::parse( file_start, 645*cdf0e10cSrcweir file_end, 646*cdf0e10cSrcweir aGrammar, 647*cdf0e10cSrcweir boost::spirit::space_p ); 648*cdf0e10cSrcweir #if OSL_DEBUG_LEVEL > 1 649*cdf0e10cSrcweir fprintf( stderr, "parseinfo: stop at offset = %d, hit = %s, full = %s, length = %d\n", 650*cdf0e10cSrcweir aInfo.stop - file_start, 651*cdf0e10cSrcweir aInfo.hit ? "true" : "false", 652*cdf0e10cSrcweir aInfo.full ? "true" : "false", 653*cdf0e10cSrcweir (int)aInfo.length ); 654*cdf0e10cSrcweir #endif 655*cdf0e10cSrcweir } 656*cdf0e10cSrcweir catch( parser_error< const char*, file_iterator<> >& rError ) 657*cdf0e10cSrcweir { 658*cdf0e10cSrcweir #if OSL_DEBUG_LEVEL > 1 659*cdf0e10cSrcweir fprintf( stderr, "parse error: %s at buffer pos %u\nobject stack:\n", 660*cdf0e10cSrcweir rError.descriptor, rError.where - file_start ); 661*cdf0e10cSrcweir unsigned int nElem = aGrammar.m_aObjectStack.size(); 662*cdf0e10cSrcweir for( unsigned int i = 0; i < nElem; i++ ) 663*cdf0e10cSrcweir { 664*cdf0e10cSrcweir fprintf( stderr, " %s\n", typeid( *(aGrammar.m_aObjectStack[i]) ).name() ); 665*cdf0e10cSrcweir } 666*cdf0e10cSrcweir #endif 667*cdf0e10cSrcweir } 668*cdf0e10cSrcweir 669*cdf0e10cSrcweir PDFEntry* pRet = NULL; 670*cdf0e10cSrcweir unsigned int nEntries = aGrammar.m_aObjectStack.size(); 671*cdf0e10cSrcweir if( nEntries == 1 ) 672*cdf0e10cSrcweir { 673*cdf0e10cSrcweir pRet = aGrammar.m_aObjectStack.back(); 674*cdf0e10cSrcweir aGrammar.m_aObjectStack.pop_back(); 675*cdf0e10cSrcweir } 676*cdf0e10cSrcweir #if OSL_DEBUG_LEVEL > 1 677*cdf0e10cSrcweir else if( nEntries > 1 ) 678*cdf0e10cSrcweir { 679*cdf0e10cSrcweir fprintf( stderr, "error got %u stack objects in parse\n", nEntries ); 680*cdf0e10cSrcweir for( unsigned int i = 0; i < nEntries; i++ ) 681*cdf0e10cSrcweir { 682*cdf0e10cSrcweir fprintf( stderr, "%s\n", typeid(*aGrammar.m_aObjectStack[i]).name() ); 683*cdf0e10cSrcweir PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]); 684*cdf0e10cSrcweir if( pObj ) 685*cdf0e10cSrcweir fprintf( stderr, " -> object %d generation %d\n", pObj->m_nNumber, pObj->m_nGeneration ); 686*cdf0e10cSrcweir else 687*cdf0e10cSrcweir fprintf( stderr, "(type %s)\n", typeid(*aGrammar.m_aObjectStack[i]).name() ); 688*cdf0e10cSrcweir } 689*cdf0e10cSrcweir } 690*cdf0e10cSrcweir #endif 691*cdf0e10cSrcweir return pRet; 692*cdf0e10cSrcweir #endif // WIN32 693*cdf0e10cSrcweir } 694*cdf0e10cSrcweir 695*cdf0e10cSrcweir #if defined __SUNPRO_CC 696*cdf0e10cSrcweir #pragma enable_warn 697*cdf0e10cSrcweir #elif defined _MSC_VER 698*cdf0e10cSrcweir #pragma warning(pop) 699*cdf0e10cSrcweir #endif 700*cdf0e10cSrcweir 701*cdf0e10cSrcweir 702