1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_sdext.hxx" 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #include <stdio.h> 32*cdf0e10cSrcweir #include <sal/main.h> 33*cdf0e10cSrcweir #include <osl/file.h> 34*cdf0e10cSrcweir #include <osl/thread.h> 35*cdf0e10cSrcweir #include <rtl/alloc.h> 36*cdf0e10cSrcweir #include <rtl/ustring.hxx> 37*cdf0e10cSrcweir #include <rtl/strbuf.hxx> 38*cdf0e10cSrcweir 39*cdf0e10cSrcweir #include "pdfparse.hxx" 40*cdf0e10cSrcweir 41*cdf0e10cSrcweir using namespace rtl; 42*cdf0e10cSrcweir using namespace pdfparse; 43*cdf0e10cSrcweir 44*cdf0e10cSrcweir void printHelp( const char* pExe ) 45*cdf0e10cSrcweir { 46*cdf0e10cSrcweir fprintf( stdout, 47*cdf0e10cSrcweir "USAGE: %s [-h,--help]\n" 48*cdf0e10cSrcweir " %s [-pw, --password <password>] <inputfile> [<outputfile>]\n" 49*cdf0e10cSrcweir " %s <-a, --extract-add-streams> [-pw, --password <password>] <inputfile> [<outputfile>]\n" 50*cdf0e10cSrcweir " %s <-f, --extract-fonts> [-pw, --password <password>] <inputfile> [<outputfile>]\n" 51*cdf0e10cSrcweir " %s <-o, --extract-objects> <o0>[:<g0>][,<o1>[:g1][,...]] [-pw, --password <password>] <inputfile> [<outputfile>]\n" 52*cdf0e10cSrcweir " -h, --help: show help\n" 53*cdf0e10cSrcweir " -a, --extract-add-streams: extracts additional streams to outputfile_object\n" 54*cdf0e10cSrcweir " and prints the mimetype found to stdout\n" 55*cdf0e10cSrcweir " -f, --extract-fonts: extracts fonts (currently only type1 and truetype are supported\n" 56*cdf0e10cSrcweir " -o, --extract-objects: extracts object streams, the syntax of the argument is comma separated\n" 57*cdf0e10cSrcweir " object numbers, where object number and generation number are separated by \':\'\n" 58*cdf0e10cSrcweir " an omitted generation number defaults to 0\n" 59*cdf0e10cSrcweir " -pw, --password: use password for decryption\n" 60*cdf0e10cSrcweir "\n" 61*cdf0e10cSrcweir "note: -f, -a, -o and normal unzip operation are mutually exclusive\n" 62*cdf0e10cSrcweir , pExe, pExe, pExe, pExe, pExe ); 63*cdf0e10cSrcweir } 64*cdf0e10cSrcweir 65*cdf0e10cSrcweir class FileEmitContext : public EmitContext 66*cdf0e10cSrcweir { 67*cdf0e10cSrcweir oslFileHandle m_aHandle; 68*cdf0e10cSrcweir oslFileHandle m_aReadHandle; 69*cdf0e10cSrcweir unsigned int m_nReadLen; 70*cdf0e10cSrcweir 71*cdf0e10cSrcweir void openReadFile( const char* pOrigName ); 72*cdf0e10cSrcweir 73*cdf0e10cSrcweir public: 74*cdf0e10cSrcweir FileEmitContext( const char* pFileName, const char* pOrigName, const PDFContainer* pTop ); 75*cdf0e10cSrcweir virtual ~FileEmitContext(); 76*cdf0e10cSrcweir 77*cdf0e10cSrcweir virtual bool write( const void* pBuf, unsigned int nLen ) throw(); 78*cdf0e10cSrcweir virtual unsigned int getCurPos() throw(); 79*cdf0e10cSrcweir virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw(); 80*cdf0e10cSrcweir virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw(); 81*cdf0e10cSrcweir }; 82*cdf0e10cSrcweir 83*cdf0e10cSrcweir FileEmitContext::FileEmitContext( const char* pFileName, const char* pOrigName, const PDFContainer* pTop ) 84*cdf0e10cSrcweir : EmitContext( pTop ), 85*cdf0e10cSrcweir m_aHandle( NULL ), 86*cdf0e10cSrcweir m_aReadHandle( NULL ), 87*cdf0e10cSrcweir m_nReadLen( 0 ) 88*cdf0e10cSrcweir { 89*cdf0e10cSrcweir OUString aSysFile( OStringToOUString( OString( pFileName ), osl_getThreadTextEncoding() ) ); 90*cdf0e10cSrcweir OUString aURL; 91*cdf0e10cSrcweir if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None ) 92*cdf0e10cSrcweir { 93*cdf0e10cSrcweir fprintf( stderr, "filename conversion \"%s\" failed\n", pFileName ); 94*cdf0e10cSrcweir return; 95*cdf0e10cSrcweir } 96*cdf0e10cSrcweir 97*cdf0e10cSrcweir if( osl_openFile( aURL.pData, &m_aHandle, osl_File_OpenFlag_Write ) == osl_File_E_None ) 98*cdf0e10cSrcweir { 99*cdf0e10cSrcweir if( osl_setFileSize( m_aHandle, 0 ) != osl_File_E_None ) 100*cdf0e10cSrcweir { 101*cdf0e10cSrcweir fprintf( stderr, "could not truncate %s\n", pFileName ); 102*cdf0e10cSrcweir osl_closeFile( m_aHandle ); 103*cdf0e10cSrcweir m_aHandle = NULL; 104*cdf0e10cSrcweir } 105*cdf0e10cSrcweir } 106*cdf0e10cSrcweir else if( osl_openFile( aURL.pData, &m_aHandle, 107*cdf0e10cSrcweir osl_File_OpenFlag_Write |osl_File_OpenFlag_Create ) != osl_File_E_None ) 108*cdf0e10cSrcweir { 109*cdf0e10cSrcweir fprintf( stderr, "could not open %s\n", pFileName ); 110*cdf0e10cSrcweir return; 111*cdf0e10cSrcweir } 112*cdf0e10cSrcweir m_bDeflate = true; 113*cdf0e10cSrcweir 114*cdf0e10cSrcweir openReadFile( pOrigName ); 115*cdf0e10cSrcweir } 116*cdf0e10cSrcweir 117*cdf0e10cSrcweir FileEmitContext::~FileEmitContext() 118*cdf0e10cSrcweir { 119*cdf0e10cSrcweir if( m_aHandle ) 120*cdf0e10cSrcweir osl_closeFile( m_aHandle ); 121*cdf0e10cSrcweir if( m_aReadHandle ) 122*cdf0e10cSrcweir osl_closeFile( m_aReadHandle ); 123*cdf0e10cSrcweir } 124*cdf0e10cSrcweir 125*cdf0e10cSrcweir void FileEmitContext::openReadFile( const char* pInFile ) 126*cdf0e10cSrcweir { 127*cdf0e10cSrcweir OUString aSysFile( OStringToOUString( OString( pInFile ), osl_getThreadTextEncoding() ) ); 128*cdf0e10cSrcweir OUString aURL; 129*cdf0e10cSrcweir if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None ) 130*cdf0e10cSrcweir { 131*cdf0e10cSrcweir fprintf( stderr, "filename conversion \"%s\" failed\n", pInFile ); 132*cdf0e10cSrcweir return; 133*cdf0e10cSrcweir } 134*cdf0e10cSrcweir 135*cdf0e10cSrcweir if( osl_openFile( aURL.pData, &m_aReadHandle, osl_File_OpenFlag_Read ) != osl_File_E_None ) 136*cdf0e10cSrcweir { 137*cdf0e10cSrcweir fprintf( stderr, "could not open %s\n", pInFile ); 138*cdf0e10cSrcweir return; 139*cdf0e10cSrcweir } 140*cdf0e10cSrcweir 141*cdf0e10cSrcweir if( osl_setFilePos( m_aReadHandle, osl_Pos_End, 0 ) != osl_File_E_None ) 142*cdf0e10cSrcweir { 143*cdf0e10cSrcweir fprintf( stderr, "could not seek to end of %s\n", pInFile ); 144*cdf0e10cSrcweir osl_closeFile( m_aReadHandle ); 145*cdf0e10cSrcweir return; 146*cdf0e10cSrcweir } 147*cdf0e10cSrcweir 148*cdf0e10cSrcweir sal_uInt64 nFileSize = 0; 149*cdf0e10cSrcweir if( osl_getFilePos( m_aReadHandle, &nFileSize ) != osl_File_E_None ) 150*cdf0e10cSrcweir { 151*cdf0e10cSrcweir fprintf( stderr, "could not get end pos of %s\n", pInFile ); 152*cdf0e10cSrcweir osl_closeFile( m_aReadHandle ); 153*cdf0e10cSrcweir return; 154*cdf0e10cSrcweir } 155*cdf0e10cSrcweir 156*cdf0e10cSrcweir m_nReadLen = static_cast<unsigned int>(nFileSize); 157*cdf0e10cSrcweir } 158*cdf0e10cSrcweir 159*cdf0e10cSrcweir bool FileEmitContext::write( const void* pBuf, unsigned int nLen ) throw() 160*cdf0e10cSrcweir { 161*cdf0e10cSrcweir if( ! m_aHandle ) 162*cdf0e10cSrcweir return false; 163*cdf0e10cSrcweir 164*cdf0e10cSrcweir sal_uInt64 nWrite = static_cast<sal_uInt64>(nLen); 165*cdf0e10cSrcweir sal_uInt64 nWritten = 0; 166*cdf0e10cSrcweir return (osl_writeFile( m_aHandle, pBuf, nWrite, &nWritten ) == osl_File_E_None) 167*cdf0e10cSrcweir && nWrite == nWritten; 168*cdf0e10cSrcweir } 169*cdf0e10cSrcweir 170*cdf0e10cSrcweir unsigned int FileEmitContext::getCurPos() throw() 171*cdf0e10cSrcweir { 172*cdf0e10cSrcweir sal_uInt64 nFileSize = 0; 173*cdf0e10cSrcweir if( m_aHandle ) 174*cdf0e10cSrcweir { 175*cdf0e10cSrcweir if( osl_getFilePos( m_aHandle, &nFileSize ) != osl_File_E_None ) 176*cdf0e10cSrcweir nFileSize = 0; 177*cdf0e10cSrcweir } 178*cdf0e10cSrcweir return static_cast<unsigned int>(nFileSize); 179*cdf0e10cSrcweir } 180*cdf0e10cSrcweir 181*cdf0e10cSrcweir bool FileEmitContext::copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw() 182*cdf0e10cSrcweir { 183*cdf0e10cSrcweir if( nOrigOffset + nLen > m_nReadLen ) 184*cdf0e10cSrcweir return false; 185*cdf0e10cSrcweir 186*cdf0e10cSrcweir if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None ) 187*cdf0e10cSrcweir { 188*cdf0e10cSrcweir fprintf( stderr, "could not seek to offset %u\n", nOrigOffset ); 189*cdf0e10cSrcweir return false; 190*cdf0e10cSrcweir } 191*cdf0e10cSrcweir void* pBuf = rtl_allocateMemory( nLen ); 192*cdf0e10cSrcweir if( ! pBuf ) 193*cdf0e10cSrcweir return false; 194*cdf0e10cSrcweir sal_uInt64 nBytesRead = 0; 195*cdf0e10cSrcweir if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None 196*cdf0e10cSrcweir || nBytesRead != static_cast<sal_uInt64>(nLen) ) 197*cdf0e10cSrcweir { 198*cdf0e10cSrcweir fprintf( stderr, "could not read %u bytes\n", nLen ); 199*cdf0e10cSrcweir rtl_freeMemory( pBuf ); 200*cdf0e10cSrcweir return false; 201*cdf0e10cSrcweir } 202*cdf0e10cSrcweir bool bRet = write( pBuf, nLen ); 203*cdf0e10cSrcweir rtl_freeMemory( pBuf ); 204*cdf0e10cSrcweir return bRet; 205*cdf0e10cSrcweir } 206*cdf0e10cSrcweir 207*cdf0e10cSrcweir unsigned int FileEmitContext::readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw() 208*cdf0e10cSrcweir { 209*cdf0e10cSrcweir if( nOrigOffset + nLen > m_nReadLen ) 210*cdf0e10cSrcweir return 0; 211*cdf0e10cSrcweir 212*cdf0e10cSrcweir if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None ) 213*cdf0e10cSrcweir { 214*cdf0e10cSrcweir fprintf( stderr, "could not seek to offset %u\n", nOrigOffset ); 215*cdf0e10cSrcweir return 0; 216*cdf0e10cSrcweir } 217*cdf0e10cSrcweir sal_uInt64 nBytesRead = 0; 218*cdf0e10cSrcweir if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None ) 219*cdf0e10cSrcweir return 0; 220*cdf0e10cSrcweir return static_cast<unsigned int>(nBytesRead); 221*cdf0e10cSrcweir } 222*cdf0e10cSrcweir 223*cdf0e10cSrcweir typedef int(*PDFFileHdl)(const char*, const char*, PDFFile*); 224*cdf0e10cSrcweir 225*cdf0e10cSrcweir int handleFile( const char* pInFile, const char* pOutFile, const char* pPassword, PDFFileHdl pHdl ) 226*cdf0e10cSrcweir { 227*cdf0e10cSrcweir 228*cdf0e10cSrcweir PDFReader aParser; 229*cdf0e10cSrcweir int nRet = 0; 230*cdf0e10cSrcweir PDFEntry* pEntry = aParser.read( pInFile ); 231*cdf0e10cSrcweir if( pEntry ) 232*cdf0e10cSrcweir { 233*cdf0e10cSrcweir PDFFile* pPDFFile = dynamic_cast<PDFFile*>(pEntry); 234*cdf0e10cSrcweir if( pPDFFile ) 235*cdf0e10cSrcweir { 236*cdf0e10cSrcweir fprintf( stdout, "have a %s PDF file\n", pPDFFile->isEncrypted() ? "encrypted" : "unencrypted" ); 237*cdf0e10cSrcweir if( pPassword ) 238*cdf0e10cSrcweir fprintf( stdout, "password %s\n", 239*cdf0e10cSrcweir pPDFFile->setupDecryptionData( pPassword ) ? "matches" : "does not match" ); 240*cdf0e10cSrcweir nRet = pHdl( pInFile, pOutFile, pPDFFile ); 241*cdf0e10cSrcweir } 242*cdf0e10cSrcweir else 243*cdf0e10cSrcweir nRet = 20; 244*cdf0e10cSrcweir delete pEntry; 245*cdf0e10cSrcweir } 246*cdf0e10cSrcweir return nRet; 247*cdf0e10cSrcweir } 248*cdf0e10cSrcweir 249*cdf0e10cSrcweir int write_unzipFile( const char* pInFile, const char* pOutFile, PDFFile* pPDFFile ) 250*cdf0e10cSrcweir { 251*cdf0e10cSrcweir FileEmitContext aContext( pOutFile, pInFile, pPDFFile ); 252*cdf0e10cSrcweir aContext.m_bDecrypt = pPDFFile->isEncrypted(); 253*cdf0e10cSrcweir pPDFFile->emit(aContext); 254*cdf0e10cSrcweir return 0; 255*cdf0e10cSrcweir } 256*cdf0e10cSrcweir 257*cdf0e10cSrcweir int write_addStreamArray( const char* pOutFile, PDFArray* pStreams, PDFFile* pPDFFile, const char* pInFile ) 258*cdf0e10cSrcweir { 259*cdf0e10cSrcweir int nRet = 0; 260*cdf0e10cSrcweir unsigned int nArrayElements = pStreams->m_aSubElements.size(); 261*cdf0e10cSrcweir for( unsigned int i = 0; i < nArrayElements-1 && nRet == 0; i++ ) 262*cdf0e10cSrcweir { 263*cdf0e10cSrcweir PDFName* pMimeType = dynamic_cast<PDFName*>(pStreams->m_aSubElements[i]); 264*cdf0e10cSrcweir PDFObjectRef* pStreamRef = dynamic_cast<PDFObjectRef*>(pStreams->m_aSubElements[i+1]); 265*cdf0e10cSrcweir if( ! pMimeType ) 266*cdf0e10cSrcweir fprintf( stderr, "error: no mimetype element\n" ); 267*cdf0e10cSrcweir if( ! pStreamRef ) 268*cdf0e10cSrcweir fprintf( stderr, "error: no stream ref element\n" ); 269*cdf0e10cSrcweir if( pMimeType && pStreamRef ) 270*cdf0e10cSrcweir { 271*cdf0e10cSrcweir fprintf( stdout, "found stream %d %d with mimetype %s\n", 272*cdf0e10cSrcweir pStreamRef->m_nNumber, pStreamRef->m_nGeneration, 273*cdf0e10cSrcweir pMimeType->m_aName.getStr() ); 274*cdf0e10cSrcweir PDFObject* pObject = pPDFFile->findObject( pStreamRef->m_nNumber, pStreamRef->m_nGeneration ); 275*cdf0e10cSrcweir if( pObject ) 276*cdf0e10cSrcweir { 277*cdf0e10cSrcweir rtl::OStringBuffer aOutStream( pOutFile ); 278*cdf0e10cSrcweir aOutStream.append( "_stream_" ); 279*cdf0e10cSrcweir aOutStream.append( sal_Int32(pStreamRef->m_nNumber) ); 280*cdf0e10cSrcweir aOutStream.append( "_" ); 281*cdf0e10cSrcweir aOutStream.append( sal_Int32(pStreamRef->m_nGeneration) ); 282*cdf0e10cSrcweir FileEmitContext aContext( aOutStream.getStr(), pInFile, pPDFFile ); 283*cdf0e10cSrcweir aContext.m_bDecrypt = pPDFFile->isEncrypted(); 284*cdf0e10cSrcweir pObject->writeStream( aContext, pPDFFile ); 285*cdf0e10cSrcweir } 286*cdf0e10cSrcweir else 287*cdf0e10cSrcweir { 288*cdf0e10cSrcweir fprintf( stderr, "object not found\n" ); 289*cdf0e10cSrcweir nRet = 121; 290*cdf0e10cSrcweir } 291*cdf0e10cSrcweir } 292*cdf0e10cSrcweir else 293*cdf0e10cSrcweir nRet = 120; 294*cdf0e10cSrcweir } 295*cdf0e10cSrcweir return nRet; 296*cdf0e10cSrcweir } 297*cdf0e10cSrcweir 298*cdf0e10cSrcweir int write_addStreams( const char* pInFile, const char* pOutFile, PDFFile* pPDFFile ) 299*cdf0e10cSrcweir { 300*cdf0e10cSrcweir // find all trailers 301*cdf0e10cSrcweir int nRet = 0; 302*cdf0e10cSrcweir unsigned int nElements = pPDFFile->m_aSubElements.size(); 303*cdf0e10cSrcweir for( unsigned i = 0; i < nElements && nRet == 0; i++ ) 304*cdf0e10cSrcweir { 305*cdf0e10cSrcweir PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pPDFFile->m_aSubElements[i]); 306*cdf0e10cSrcweir if( pTrailer && pTrailer->m_pDict ) 307*cdf0e10cSrcweir { 308*cdf0e10cSrcweir // search for AdditionalStreams entry 309*cdf0e10cSrcweir std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash>::iterator add_stream; 310*cdf0e10cSrcweir add_stream = pTrailer->m_pDict->m_aMap.find( "AdditionalStreams" ); 311*cdf0e10cSrcweir if( add_stream != pTrailer->m_pDict->m_aMap.end() ) 312*cdf0e10cSrcweir { 313*cdf0e10cSrcweir PDFArray* pStreams = dynamic_cast<PDFArray*>(add_stream->second); 314*cdf0e10cSrcweir if( pStreams ) 315*cdf0e10cSrcweir nRet = write_addStreamArray( pOutFile, pStreams, pPDFFile, pInFile ); 316*cdf0e10cSrcweir } 317*cdf0e10cSrcweir } 318*cdf0e10cSrcweir } 319*cdf0e10cSrcweir return nRet; 320*cdf0e10cSrcweir } 321*cdf0e10cSrcweir 322*cdf0e10cSrcweir int write_fonts( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile ) 323*cdf0e10cSrcweir { 324*cdf0e10cSrcweir int nRet = 0; 325*cdf0e10cSrcweir unsigned int nElements = i_pPDFFile->m_aSubElements.size(); 326*cdf0e10cSrcweir for( unsigned i = 0; i < nElements && nRet == 0; i++ ) 327*cdf0e10cSrcweir { 328*cdf0e10cSrcweir // search FontDescriptors 329*cdf0e10cSrcweir PDFObject* pObj = dynamic_cast<PDFObject*>(i_pPDFFile->m_aSubElements[i]); 330*cdf0e10cSrcweir if( ! pObj ) 331*cdf0e10cSrcweir continue; 332*cdf0e10cSrcweir PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject); 333*cdf0e10cSrcweir if( ! pDict ) 334*cdf0e10cSrcweir continue; 335*cdf0e10cSrcweir 336*cdf0e10cSrcweir std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash>::iterator map_it = 337*cdf0e10cSrcweir pDict->m_aMap.find( "Type" ); 338*cdf0e10cSrcweir if( map_it == pDict->m_aMap.end() ) 339*cdf0e10cSrcweir continue; 340*cdf0e10cSrcweir 341*cdf0e10cSrcweir PDFName* pName = dynamic_cast<PDFName*>(map_it->second); 342*cdf0e10cSrcweir if( ! pName ) 343*cdf0e10cSrcweir continue; 344*cdf0e10cSrcweir if( ! pName->m_aName.equals( "FontDescriptor" ) ) 345*cdf0e10cSrcweir continue; 346*cdf0e10cSrcweir 347*cdf0e10cSrcweir // the font name will be helpful, also there must be one in 348*cdf0e10cSrcweir // a font descriptor 349*cdf0e10cSrcweir map_it = pDict->m_aMap.find( "FontName" ); 350*cdf0e10cSrcweir if( map_it == pDict->m_aMap.end() ) 351*cdf0e10cSrcweir continue; 352*cdf0e10cSrcweir pName = dynamic_cast<PDFName*>(map_it->second); 353*cdf0e10cSrcweir if( ! pName ) 354*cdf0e10cSrcweir continue; 355*cdf0e10cSrcweir rtl::OString aFontName( pName->m_aName ); 356*cdf0e10cSrcweir 357*cdf0e10cSrcweir PDFObjectRef* pStreamRef = 0; 358*cdf0e10cSrcweir const char* pFileType = NULL; 359*cdf0e10cSrcweir // we have a font descriptor, try for a type 1 font 360*cdf0e10cSrcweir map_it = pDict->m_aMap.find( "FontFile" ); 361*cdf0e10cSrcweir if( map_it != pDict->m_aMap.end() ) 362*cdf0e10cSrcweir { 363*cdf0e10cSrcweir pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second); 364*cdf0e10cSrcweir if( pStreamRef ) 365*cdf0e10cSrcweir pFileType = "pfa"; 366*cdf0e10cSrcweir } 367*cdf0e10cSrcweir 368*cdf0e10cSrcweir // perhaps it's a truetype file ? 369*cdf0e10cSrcweir if( ! pStreamRef ) 370*cdf0e10cSrcweir { 371*cdf0e10cSrcweir map_it = pDict->m_aMap.find( "FontFile2" ); 372*cdf0e10cSrcweir if( map_it != pDict->m_aMap.end() ) 373*cdf0e10cSrcweir { 374*cdf0e10cSrcweir pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second); 375*cdf0e10cSrcweir if( pStreamRef ) 376*cdf0e10cSrcweir pFileType = "ttf"; 377*cdf0e10cSrcweir } 378*cdf0e10cSrcweir } 379*cdf0e10cSrcweir 380*cdf0e10cSrcweir if( ! pStreamRef ) 381*cdf0e10cSrcweir continue; 382*cdf0e10cSrcweir 383*cdf0e10cSrcweir PDFObject* pStream = i_pPDFFile->findObject( pStreamRef ); 384*cdf0e10cSrcweir if( ! pStream ) 385*cdf0e10cSrcweir continue; 386*cdf0e10cSrcweir 387*cdf0e10cSrcweir rtl::OStringBuffer aOutStream( i_pOutFile ); 388*cdf0e10cSrcweir aOutStream.append( "_font_" ); 389*cdf0e10cSrcweir aOutStream.append( sal_Int32(pStreamRef->m_nNumber) ); 390*cdf0e10cSrcweir aOutStream.append( "_" ); 391*cdf0e10cSrcweir aOutStream.append( sal_Int32(pStreamRef->m_nGeneration) ); 392*cdf0e10cSrcweir aOutStream.append( "_" ); 393*cdf0e10cSrcweir aOutStream.append( aFontName ); 394*cdf0e10cSrcweir if( pFileType ) 395*cdf0e10cSrcweir { 396*cdf0e10cSrcweir aOutStream.append( "." ); 397*cdf0e10cSrcweir aOutStream.append( pFileType ); 398*cdf0e10cSrcweir } 399*cdf0e10cSrcweir FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile ); 400*cdf0e10cSrcweir aContext.m_bDecrypt = i_pPDFFile->isEncrypted(); 401*cdf0e10cSrcweir pStream->writeStream( aContext, i_pPDFFile ); 402*cdf0e10cSrcweir } 403*cdf0e10cSrcweir return nRet; 404*cdf0e10cSrcweir } 405*cdf0e10cSrcweir 406*cdf0e10cSrcweir std::vector< std::pair< sal_Int32, sal_Int32 > > s_aEmitObjects; 407*cdf0e10cSrcweir 408*cdf0e10cSrcweir int write_objects( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile ) 409*cdf0e10cSrcweir { 410*cdf0e10cSrcweir int nRet = 0; 411*cdf0e10cSrcweir unsigned int nElements = s_aEmitObjects.size(); 412*cdf0e10cSrcweir for( unsigned i = 0; i < nElements && nRet == 0; i++ ) 413*cdf0e10cSrcweir { 414*cdf0e10cSrcweir sal_Int32 nObject = s_aEmitObjects[i].first; 415*cdf0e10cSrcweir sal_Int32 nGeneration = s_aEmitObjects[i].second; 416*cdf0e10cSrcweir PDFObject* pStream = i_pPDFFile->findObject( nObject, nGeneration ); 417*cdf0e10cSrcweir if( ! pStream ) 418*cdf0e10cSrcweir { 419*cdf0e10cSrcweir fprintf( stderr, "object %d %d not found !\n", (int)nObject, (int)nGeneration ); 420*cdf0e10cSrcweir continue; 421*cdf0e10cSrcweir } 422*cdf0e10cSrcweir 423*cdf0e10cSrcweir rtl::OStringBuffer aOutStream( i_pOutFile ); 424*cdf0e10cSrcweir aOutStream.append( "_stream_" ); 425*cdf0e10cSrcweir aOutStream.append( nObject ); 426*cdf0e10cSrcweir aOutStream.append( "_" ); 427*cdf0e10cSrcweir aOutStream.append( nGeneration ); 428*cdf0e10cSrcweir FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile ); 429*cdf0e10cSrcweir aContext.m_bDecrypt = i_pPDFFile->isEncrypted(); 430*cdf0e10cSrcweir pStream->writeStream( aContext, i_pPDFFile ); 431*cdf0e10cSrcweir } 432*cdf0e10cSrcweir return nRet; 433*cdf0e10cSrcweir } 434*cdf0e10cSrcweir 435*cdf0e10cSrcweir SAL_IMPLEMENT_MAIN_WITH_ARGS( argc, argv ) 436*cdf0e10cSrcweir { 437*cdf0e10cSrcweir const char* pInFile = NULL; 438*cdf0e10cSrcweir const char* pOutFile = NULL; 439*cdf0e10cSrcweir const char* pPassword = NULL; 440*cdf0e10cSrcweir OStringBuffer aOutFile( 256 ); 441*cdf0e10cSrcweir PDFFileHdl aHdl = write_unzipFile; 442*cdf0e10cSrcweir 443*cdf0e10cSrcweir for( int nArg = 1; nArg < argc; nArg++ ) 444*cdf0e10cSrcweir { 445*cdf0e10cSrcweir if( argv[nArg][0] == '-' ) 446*cdf0e10cSrcweir { 447*cdf0e10cSrcweir if( ! rtl_str_compare( "-pw", argv[nArg] ) || 448*cdf0e10cSrcweir ! rtl_str_compare( "--password" , argv[nArg] ) ) 449*cdf0e10cSrcweir { 450*cdf0e10cSrcweir if( nArg == argc-1 ) 451*cdf0e10cSrcweir { 452*cdf0e10cSrcweir fprintf( stderr, "no password given\n" ); 453*cdf0e10cSrcweir return 1; 454*cdf0e10cSrcweir } 455*cdf0e10cSrcweir nArg++; 456*cdf0e10cSrcweir pPassword = argv[nArg]; 457*cdf0e10cSrcweir } 458*cdf0e10cSrcweir else if( ! rtl_str_compare( "-h", argv[nArg] ) || 459*cdf0e10cSrcweir ! rtl_str_compare( "--help", argv[nArg] ) ) 460*cdf0e10cSrcweir { 461*cdf0e10cSrcweir printHelp( argv[0] ); 462*cdf0e10cSrcweir return 0; 463*cdf0e10cSrcweir } 464*cdf0e10cSrcweir else if( ! rtl_str_compare( "-a", argv[nArg] ) || 465*cdf0e10cSrcweir ! rtl_str_compare( "--extract-add-streams", argv[nArg] ) ) 466*cdf0e10cSrcweir { 467*cdf0e10cSrcweir aHdl = write_addStreams; 468*cdf0e10cSrcweir } 469*cdf0e10cSrcweir else if( ! rtl_str_compare( "-f", argv[nArg] ) || 470*cdf0e10cSrcweir ! rtl_str_compare( "--extract-fonts", argv[nArg] ) ) 471*cdf0e10cSrcweir { 472*cdf0e10cSrcweir aHdl = write_fonts; 473*cdf0e10cSrcweir } 474*cdf0e10cSrcweir else if( ! rtl_str_compare( "-o", argv[nArg] ) || 475*cdf0e10cSrcweir ! rtl_str_compare( "--extract-objects", argv[nArg] ) ) 476*cdf0e10cSrcweir { 477*cdf0e10cSrcweir aHdl = write_objects; 478*cdf0e10cSrcweir nArg++; 479*cdf0e10cSrcweir if( nArg < argc ) 480*cdf0e10cSrcweir { 481*cdf0e10cSrcweir rtl::OString aObjs( argv[nArg] ); 482*cdf0e10cSrcweir sal_Int32 nIndex = 0; 483*cdf0e10cSrcweir while( nIndex != -1 ) 484*cdf0e10cSrcweir { 485*cdf0e10cSrcweir rtl::OString aToken( aObjs.getToken( 0, ',', nIndex ) ); 486*cdf0e10cSrcweir sal_Int32 nObject = 0; 487*cdf0e10cSrcweir sal_Int32 nGeneration = 0; 488*cdf0e10cSrcweir sal_Int32 nGenIndex = 0; 489*cdf0e10cSrcweir nObject = aToken.getToken( 0, ':', nGenIndex ).toInt32(); 490*cdf0e10cSrcweir if( nGenIndex != -1 ) 491*cdf0e10cSrcweir nGeneration = aToken.getToken( 0, ':', nGenIndex ).toInt32(); 492*cdf0e10cSrcweir s_aEmitObjects.push_back( std::pair<sal_Int32,sal_Int32>(nObject,nGeneration) ); 493*cdf0e10cSrcweir } 494*cdf0e10cSrcweir } 495*cdf0e10cSrcweir } 496*cdf0e10cSrcweir else 497*cdf0e10cSrcweir { 498*cdf0e10cSrcweir fprintf( stderr, "unrecognized option \"%s\"\n", 499*cdf0e10cSrcweir argv[nArg] ); 500*cdf0e10cSrcweir printHelp( argv[0] ); 501*cdf0e10cSrcweir return 1; 502*cdf0e10cSrcweir } 503*cdf0e10cSrcweir } 504*cdf0e10cSrcweir else if( pInFile == NULL ) 505*cdf0e10cSrcweir pInFile = argv[nArg]; 506*cdf0e10cSrcweir else if( pOutFile == NULL ) 507*cdf0e10cSrcweir pOutFile = argv[nArg]; 508*cdf0e10cSrcweir } 509*cdf0e10cSrcweir if( ! pInFile ) 510*cdf0e10cSrcweir { 511*cdf0e10cSrcweir fprintf( stderr, "no input file given\n" ); 512*cdf0e10cSrcweir return 10; 513*cdf0e10cSrcweir } 514*cdf0e10cSrcweir if( ! pOutFile ) 515*cdf0e10cSrcweir { 516*cdf0e10cSrcweir OString aFile( pInFile ); 517*cdf0e10cSrcweir if( aFile.getLength() > 0 ) 518*cdf0e10cSrcweir { 519*cdf0e10cSrcweir if( aFile.getLength() > 4 ) 520*cdf0e10cSrcweir { 521*cdf0e10cSrcweir if( aFile.matchIgnoreAsciiCase( OString( ".pdf" ), aFile.getLength()-4 ) ) 522*cdf0e10cSrcweir aOutFile.append( pInFile, aFile.getLength() - 4 ); 523*cdf0e10cSrcweir else 524*cdf0e10cSrcweir aOutFile.append( aFile ); 525*cdf0e10cSrcweir } 526*cdf0e10cSrcweir aOutFile.append( "_unzip.pdf" ); 527*cdf0e10cSrcweir pOutFile = aOutFile.getStr(); 528*cdf0e10cSrcweir } 529*cdf0e10cSrcweir else 530*cdf0e10cSrcweir { 531*cdf0e10cSrcweir fprintf( stderr, "no output file given\n" ); 532*cdf0e10cSrcweir return 11; 533*cdf0e10cSrcweir } 534*cdf0e10cSrcweir } 535*cdf0e10cSrcweir 536*cdf0e10cSrcweir return handleFile( pInFile, pOutFile, pPassword, aHdl ); 537*cdf0e10cSrcweir } 538*cdf0e10cSrcweir 539