xref: /AOO41X/main/sdext/source/pdfimport/test/pdfunzip.cxx (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_sdext.hxx"
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include <stdio.h>
32*cdf0e10cSrcweir #include <sal/main.h>
33*cdf0e10cSrcweir #include <osl/file.h>
34*cdf0e10cSrcweir #include <osl/thread.h>
35*cdf0e10cSrcweir #include <rtl/alloc.h>
36*cdf0e10cSrcweir #include <rtl/ustring.hxx>
37*cdf0e10cSrcweir #include <rtl/strbuf.hxx>
38*cdf0e10cSrcweir 
39*cdf0e10cSrcweir #include "pdfparse.hxx"
40*cdf0e10cSrcweir 
41*cdf0e10cSrcweir using namespace rtl;
42*cdf0e10cSrcweir using namespace pdfparse;
43*cdf0e10cSrcweir 
44*cdf0e10cSrcweir void printHelp( const char* pExe )
45*cdf0e10cSrcweir {
46*cdf0e10cSrcweir     fprintf( stdout,
47*cdf0e10cSrcweir     "USAGE: %s [-h,--help]\n"
48*cdf0e10cSrcweir     "       %s [-pw, --password <password>] <inputfile> [<outputfile>]\n"
49*cdf0e10cSrcweir     "       %s <-a, --extract-add-streams> [-pw, --password <password>] <inputfile> [<outputfile>]\n"
50*cdf0e10cSrcweir     "       %s <-f, --extract-fonts> [-pw, --password <password>] <inputfile> [<outputfile>]\n"
51*cdf0e10cSrcweir     "       %s <-o, --extract-objects> <o0>[:<g0>][,<o1>[:g1][,...]] [-pw, --password <password>] <inputfile> [<outputfile>]\n"
52*cdf0e10cSrcweir     "  -h, --help: show help\n"
53*cdf0e10cSrcweir     "  -a, --extract-add-streams: extracts additional streams to outputfile_object\n"
54*cdf0e10cSrcweir     "      and prints the mimetype found to stdout\n"
55*cdf0e10cSrcweir     "  -f, --extract-fonts: extracts fonts (currently only type1 and truetype are supported\n"
56*cdf0e10cSrcweir     "  -o, --extract-objects: extracts object streams, the syntax of the argument is comma separated\n"
57*cdf0e10cSrcweir     "      object numbers, where object number and generation number are separated by \':\'\n"
58*cdf0e10cSrcweir     "      an omitted generation number defaults to 0\n"
59*cdf0e10cSrcweir     "  -pw, --password: use password for decryption\n"
60*cdf0e10cSrcweir     "\n"
61*cdf0e10cSrcweir     "note: -f, -a, -o and normal unzip operation are mutually exclusive\n"
62*cdf0e10cSrcweir     , pExe, pExe, pExe, pExe, pExe );
63*cdf0e10cSrcweir }
64*cdf0e10cSrcweir 
65*cdf0e10cSrcweir class FileEmitContext : public EmitContext
66*cdf0e10cSrcweir {
67*cdf0e10cSrcweir     oslFileHandle m_aHandle;
68*cdf0e10cSrcweir     oslFileHandle m_aReadHandle;
69*cdf0e10cSrcweir     unsigned int  m_nReadLen;
70*cdf0e10cSrcweir 
71*cdf0e10cSrcweir     void openReadFile( const char* pOrigName );
72*cdf0e10cSrcweir 
73*cdf0e10cSrcweir     public:
74*cdf0e10cSrcweir     FileEmitContext( const char* pFileName, const char* pOrigName, const PDFContainer* pTop );
75*cdf0e10cSrcweir     virtual ~FileEmitContext();
76*cdf0e10cSrcweir 
77*cdf0e10cSrcweir     virtual bool write( const void* pBuf, unsigned int nLen ) throw();
78*cdf0e10cSrcweir     virtual unsigned int getCurPos() throw();
79*cdf0e10cSrcweir     virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw();
80*cdf0e10cSrcweir     virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw();
81*cdf0e10cSrcweir };
82*cdf0e10cSrcweir 
83*cdf0e10cSrcweir FileEmitContext::FileEmitContext( const char* pFileName, const char* pOrigName, const PDFContainer* pTop )
84*cdf0e10cSrcweir     : EmitContext( pTop ),
85*cdf0e10cSrcweir       m_aHandle( NULL ),
86*cdf0e10cSrcweir       m_aReadHandle( NULL ),
87*cdf0e10cSrcweir       m_nReadLen( 0 )
88*cdf0e10cSrcweir {
89*cdf0e10cSrcweir     OUString aSysFile( OStringToOUString( OString( pFileName ), osl_getThreadTextEncoding() ) );
90*cdf0e10cSrcweir     OUString aURL;
91*cdf0e10cSrcweir     if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None )
92*cdf0e10cSrcweir     {
93*cdf0e10cSrcweir         fprintf( stderr, "filename conversion \"%s\" failed\n", pFileName );
94*cdf0e10cSrcweir         return;
95*cdf0e10cSrcweir     }
96*cdf0e10cSrcweir 
97*cdf0e10cSrcweir     if( osl_openFile( aURL.pData, &m_aHandle, osl_File_OpenFlag_Write ) == osl_File_E_None )
98*cdf0e10cSrcweir     {
99*cdf0e10cSrcweir         if( osl_setFileSize( m_aHandle, 0 ) != osl_File_E_None )
100*cdf0e10cSrcweir         {
101*cdf0e10cSrcweir             fprintf( stderr, "could not truncate %s\n", pFileName );
102*cdf0e10cSrcweir             osl_closeFile( m_aHandle );
103*cdf0e10cSrcweir             m_aHandle = NULL;
104*cdf0e10cSrcweir         }
105*cdf0e10cSrcweir     }
106*cdf0e10cSrcweir     else if( osl_openFile( aURL.pData, &m_aHandle,
107*cdf0e10cSrcweir             osl_File_OpenFlag_Write |osl_File_OpenFlag_Create ) != osl_File_E_None )
108*cdf0e10cSrcweir     {
109*cdf0e10cSrcweir         fprintf( stderr, "could not open %s\n", pFileName );
110*cdf0e10cSrcweir         return;
111*cdf0e10cSrcweir     }
112*cdf0e10cSrcweir     m_bDeflate = true;
113*cdf0e10cSrcweir 
114*cdf0e10cSrcweir     openReadFile( pOrigName );
115*cdf0e10cSrcweir }
116*cdf0e10cSrcweir 
117*cdf0e10cSrcweir FileEmitContext::~FileEmitContext()
118*cdf0e10cSrcweir {
119*cdf0e10cSrcweir     if( m_aHandle )
120*cdf0e10cSrcweir         osl_closeFile( m_aHandle );
121*cdf0e10cSrcweir     if( m_aReadHandle )
122*cdf0e10cSrcweir         osl_closeFile( m_aReadHandle );
123*cdf0e10cSrcweir }
124*cdf0e10cSrcweir 
125*cdf0e10cSrcweir void FileEmitContext::openReadFile( const char* pInFile )
126*cdf0e10cSrcweir {
127*cdf0e10cSrcweir     OUString aSysFile( OStringToOUString( OString( pInFile ), osl_getThreadTextEncoding() ) );
128*cdf0e10cSrcweir     OUString aURL;
129*cdf0e10cSrcweir     if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None )
130*cdf0e10cSrcweir     {
131*cdf0e10cSrcweir         fprintf( stderr, "filename conversion \"%s\" failed\n", pInFile );
132*cdf0e10cSrcweir         return;
133*cdf0e10cSrcweir     }
134*cdf0e10cSrcweir 
135*cdf0e10cSrcweir     if( osl_openFile( aURL.pData, &m_aReadHandle, osl_File_OpenFlag_Read ) != osl_File_E_None )
136*cdf0e10cSrcweir     {
137*cdf0e10cSrcweir         fprintf( stderr, "could not open %s\n", pInFile );
138*cdf0e10cSrcweir         return;
139*cdf0e10cSrcweir     }
140*cdf0e10cSrcweir 
141*cdf0e10cSrcweir     if( osl_setFilePos( m_aReadHandle, osl_Pos_End, 0 ) != osl_File_E_None )
142*cdf0e10cSrcweir     {
143*cdf0e10cSrcweir         fprintf( stderr, "could not seek to end of %s\n", pInFile );
144*cdf0e10cSrcweir         osl_closeFile( m_aReadHandle );
145*cdf0e10cSrcweir         return;
146*cdf0e10cSrcweir     }
147*cdf0e10cSrcweir 
148*cdf0e10cSrcweir     sal_uInt64 nFileSize = 0;
149*cdf0e10cSrcweir     if( osl_getFilePos( m_aReadHandle, &nFileSize ) != osl_File_E_None )
150*cdf0e10cSrcweir     {
151*cdf0e10cSrcweir         fprintf( stderr, "could not get end pos of %s\n", pInFile );
152*cdf0e10cSrcweir         osl_closeFile( m_aReadHandle );
153*cdf0e10cSrcweir         return;
154*cdf0e10cSrcweir     }
155*cdf0e10cSrcweir 
156*cdf0e10cSrcweir     m_nReadLen = static_cast<unsigned int>(nFileSize);
157*cdf0e10cSrcweir }
158*cdf0e10cSrcweir 
159*cdf0e10cSrcweir bool FileEmitContext::write( const void* pBuf, unsigned int nLen ) throw()
160*cdf0e10cSrcweir {
161*cdf0e10cSrcweir     if( ! m_aHandle )
162*cdf0e10cSrcweir         return false;
163*cdf0e10cSrcweir 
164*cdf0e10cSrcweir     sal_uInt64 nWrite = static_cast<sal_uInt64>(nLen);
165*cdf0e10cSrcweir     sal_uInt64 nWritten = 0;
166*cdf0e10cSrcweir     return (osl_writeFile( m_aHandle, pBuf, nWrite, &nWritten ) == osl_File_E_None)
167*cdf0e10cSrcweir            && nWrite == nWritten;
168*cdf0e10cSrcweir }
169*cdf0e10cSrcweir 
170*cdf0e10cSrcweir unsigned int FileEmitContext::getCurPos() throw()
171*cdf0e10cSrcweir {
172*cdf0e10cSrcweir     sal_uInt64 nFileSize = 0;
173*cdf0e10cSrcweir     if( m_aHandle )
174*cdf0e10cSrcweir     {
175*cdf0e10cSrcweir         if( osl_getFilePos( m_aHandle, &nFileSize ) != osl_File_E_None )
176*cdf0e10cSrcweir             nFileSize = 0;
177*cdf0e10cSrcweir     }
178*cdf0e10cSrcweir     return static_cast<unsigned int>(nFileSize);
179*cdf0e10cSrcweir }
180*cdf0e10cSrcweir 
181*cdf0e10cSrcweir bool FileEmitContext::copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw()
182*cdf0e10cSrcweir {
183*cdf0e10cSrcweir     if( nOrigOffset + nLen > m_nReadLen )
184*cdf0e10cSrcweir         return false;
185*cdf0e10cSrcweir 
186*cdf0e10cSrcweir     if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
187*cdf0e10cSrcweir     {
188*cdf0e10cSrcweir         fprintf( stderr, "could not seek to offset %u\n", nOrigOffset );
189*cdf0e10cSrcweir         return false;
190*cdf0e10cSrcweir     }
191*cdf0e10cSrcweir     void* pBuf = rtl_allocateMemory( nLen );
192*cdf0e10cSrcweir     if( ! pBuf )
193*cdf0e10cSrcweir         return false;
194*cdf0e10cSrcweir     sal_uInt64 nBytesRead = 0;
195*cdf0e10cSrcweir     if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None
196*cdf0e10cSrcweir         || nBytesRead != static_cast<sal_uInt64>(nLen) )
197*cdf0e10cSrcweir     {
198*cdf0e10cSrcweir         fprintf( stderr, "could not read %u bytes\n", nLen );
199*cdf0e10cSrcweir         rtl_freeMemory( pBuf );
200*cdf0e10cSrcweir         return false;
201*cdf0e10cSrcweir     }
202*cdf0e10cSrcweir     bool bRet = write( pBuf, nLen );
203*cdf0e10cSrcweir     rtl_freeMemory( pBuf );
204*cdf0e10cSrcweir     return bRet;
205*cdf0e10cSrcweir }
206*cdf0e10cSrcweir 
207*cdf0e10cSrcweir unsigned int FileEmitContext::readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw()
208*cdf0e10cSrcweir {
209*cdf0e10cSrcweir     if( nOrigOffset + nLen > m_nReadLen )
210*cdf0e10cSrcweir         return 0;
211*cdf0e10cSrcweir 
212*cdf0e10cSrcweir     if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
213*cdf0e10cSrcweir     {
214*cdf0e10cSrcweir         fprintf( stderr, "could not seek to offset %u\n", nOrigOffset );
215*cdf0e10cSrcweir         return 0;
216*cdf0e10cSrcweir     }
217*cdf0e10cSrcweir     sal_uInt64 nBytesRead = 0;
218*cdf0e10cSrcweir     if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None )
219*cdf0e10cSrcweir         return 0;
220*cdf0e10cSrcweir     return static_cast<unsigned int>(nBytesRead);
221*cdf0e10cSrcweir }
222*cdf0e10cSrcweir 
223*cdf0e10cSrcweir typedef int(*PDFFileHdl)(const char*, const char*, PDFFile*);
224*cdf0e10cSrcweir 
225*cdf0e10cSrcweir int handleFile( const char* pInFile, const char* pOutFile, const char* pPassword, PDFFileHdl pHdl )
226*cdf0e10cSrcweir {
227*cdf0e10cSrcweir 
228*cdf0e10cSrcweir     PDFReader aParser;
229*cdf0e10cSrcweir     int nRet = 0;
230*cdf0e10cSrcweir     PDFEntry* pEntry = aParser.read( pInFile );
231*cdf0e10cSrcweir     if( pEntry )
232*cdf0e10cSrcweir     {
233*cdf0e10cSrcweir         PDFFile* pPDFFile = dynamic_cast<PDFFile*>(pEntry);
234*cdf0e10cSrcweir         if( pPDFFile )
235*cdf0e10cSrcweir         {
236*cdf0e10cSrcweir             fprintf( stdout, "have a %s PDF file\n", pPDFFile->isEncrypted() ? "encrypted" : "unencrypted" );
237*cdf0e10cSrcweir             if( pPassword )
238*cdf0e10cSrcweir                 fprintf( stdout, "password %s\n",
239*cdf0e10cSrcweir                          pPDFFile->setupDecryptionData( pPassword ) ? "matches" : "does not match" );
240*cdf0e10cSrcweir             nRet = pHdl( pInFile, pOutFile, pPDFFile );
241*cdf0e10cSrcweir         }
242*cdf0e10cSrcweir         else
243*cdf0e10cSrcweir             nRet = 20;
244*cdf0e10cSrcweir         delete pEntry;
245*cdf0e10cSrcweir     }
246*cdf0e10cSrcweir     return nRet;
247*cdf0e10cSrcweir }
248*cdf0e10cSrcweir 
249*cdf0e10cSrcweir int write_unzipFile( const char* pInFile, const char* pOutFile, PDFFile* pPDFFile )
250*cdf0e10cSrcweir {
251*cdf0e10cSrcweir     FileEmitContext aContext( pOutFile, pInFile, pPDFFile );
252*cdf0e10cSrcweir     aContext.m_bDecrypt = pPDFFile->isEncrypted();
253*cdf0e10cSrcweir     pPDFFile->emit(aContext);
254*cdf0e10cSrcweir     return 0;
255*cdf0e10cSrcweir }
256*cdf0e10cSrcweir 
257*cdf0e10cSrcweir int write_addStreamArray( const char* pOutFile, PDFArray* pStreams, PDFFile* pPDFFile, const char* pInFile )
258*cdf0e10cSrcweir {
259*cdf0e10cSrcweir     int nRet = 0;
260*cdf0e10cSrcweir     unsigned int nArrayElements = pStreams->m_aSubElements.size();
261*cdf0e10cSrcweir     for( unsigned int i = 0; i < nArrayElements-1 && nRet == 0; i++ )
262*cdf0e10cSrcweir     {
263*cdf0e10cSrcweir         PDFName* pMimeType = dynamic_cast<PDFName*>(pStreams->m_aSubElements[i]);
264*cdf0e10cSrcweir         PDFObjectRef* pStreamRef = dynamic_cast<PDFObjectRef*>(pStreams->m_aSubElements[i+1]);
265*cdf0e10cSrcweir         if( ! pMimeType )
266*cdf0e10cSrcweir             fprintf( stderr, "error: no mimetype element\n" );
267*cdf0e10cSrcweir         if( ! pStreamRef )
268*cdf0e10cSrcweir             fprintf( stderr, "error: no stream ref element\n" );
269*cdf0e10cSrcweir         if( pMimeType && pStreamRef )
270*cdf0e10cSrcweir         {
271*cdf0e10cSrcweir             fprintf( stdout, "found stream %d %d with mimetype %s\n",
272*cdf0e10cSrcweir                      pStreamRef->m_nNumber, pStreamRef->m_nGeneration,
273*cdf0e10cSrcweir                      pMimeType->m_aName.getStr() );
274*cdf0e10cSrcweir             PDFObject* pObject = pPDFFile->findObject( pStreamRef->m_nNumber, pStreamRef->m_nGeneration );
275*cdf0e10cSrcweir             if( pObject )
276*cdf0e10cSrcweir             {
277*cdf0e10cSrcweir                 rtl::OStringBuffer aOutStream( pOutFile );
278*cdf0e10cSrcweir                 aOutStream.append( "_stream_" );
279*cdf0e10cSrcweir                 aOutStream.append( sal_Int32(pStreamRef->m_nNumber) );
280*cdf0e10cSrcweir                 aOutStream.append( "_" );
281*cdf0e10cSrcweir                 aOutStream.append( sal_Int32(pStreamRef->m_nGeneration) );
282*cdf0e10cSrcweir                 FileEmitContext aContext( aOutStream.getStr(), pInFile, pPDFFile );
283*cdf0e10cSrcweir                 aContext.m_bDecrypt = pPDFFile->isEncrypted();
284*cdf0e10cSrcweir                 pObject->writeStream( aContext, pPDFFile );
285*cdf0e10cSrcweir             }
286*cdf0e10cSrcweir             else
287*cdf0e10cSrcweir             {
288*cdf0e10cSrcweir                 fprintf( stderr, "object not found\n" );
289*cdf0e10cSrcweir                 nRet = 121;
290*cdf0e10cSrcweir             }
291*cdf0e10cSrcweir         }
292*cdf0e10cSrcweir         else
293*cdf0e10cSrcweir             nRet = 120;
294*cdf0e10cSrcweir     }
295*cdf0e10cSrcweir     return nRet;
296*cdf0e10cSrcweir }
297*cdf0e10cSrcweir 
298*cdf0e10cSrcweir int write_addStreams( const char* pInFile, const char* pOutFile, PDFFile* pPDFFile )
299*cdf0e10cSrcweir {
300*cdf0e10cSrcweir     // find all trailers
301*cdf0e10cSrcweir     int nRet = 0;
302*cdf0e10cSrcweir     unsigned int nElements = pPDFFile->m_aSubElements.size();
303*cdf0e10cSrcweir     for( unsigned i = 0; i < nElements && nRet == 0; i++ )
304*cdf0e10cSrcweir     {
305*cdf0e10cSrcweir         PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pPDFFile->m_aSubElements[i]);
306*cdf0e10cSrcweir         if( pTrailer && pTrailer->m_pDict )
307*cdf0e10cSrcweir         {
308*cdf0e10cSrcweir             // search for AdditionalStreams entry
309*cdf0e10cSrcweir             std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash>::iterator add_stream;
310*cdf0e10cSrcweir             add_stream = pTrailer->m_pDict->m_aMap.find( "AdditionalStreams" );
311*cdf0e10cSrcweir             if( add_stream != pTrailer->m_pDict->m_aMap.end() )
312*cdf0e10cSrcweir             {
313*cdf0e10cSrcweir                 PDFArray* pStreams = dynamic_cast<PDFArray*>(add_stream->second);
314*cdf0e10cSrcweir                 if( pStreams )
315*cdf0e10cSrcweir                     nRet = write_addStreamArray( pOutFile, pStreams, pPDFFile, pInFile );
316*cdf0e10cSrcweir             }
317*cdf0e10cSrcweir         }
318*cdf0e10cSrcweir     }
319*cdf0e10cSrcweir     return nRet;
320*cdf0e10cSrcweir }
321*cdf0e10cSrcweir 
322*cdf0e10cSrcweir int write_fonts( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile )
323*cdf0e10cSrcweir {
324*cdf0e10cSrcweir     int nRet = 0;
325*cdf0e10cSrcweir     unsigned int nElements = i_pPDFFile->m_aSubElements.size();
326*cdf0e10cSrcweir     for( unsigned i = 0; i < nElements && nRet == 0; i++ )
327*cdf0e10cSrcweir     {
328*cdf0e10cSrcweir         // search FontDescriptors
329*cdf0e10cSrcweir         PDFObject* pObj = dynamic_cast<PDFObject*>(i_pPDFFile->m_aSubElements[i]);
330*cdf0e10cSrcweir         if( ! pObj )
331*cdf0e10cSrcweir             continue;
332*cdf0e10cSrcweir         PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
333*cdf0e10cSrcweir         if( ! pDict )
334*cdf0e10cSrcweir             continue;
335*cdf0e10cSrcweir 
336*cdf0e10cSrcweir         std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash>::iterator map_it =
337*cdf0e10cSrcweir                 pDict->m_aMap.find( "Type" );
338*cdf0e10cSrcweir         if( map_it == pDict->m_aMap.end() )
339*cdf0e10cSrcweir             continue;
340*cdf0e10cSrcweir 
341*cdf0e10cSrcweir         PDFName* pName = dynamic_cast<PDFName*>(map_it->second);
342*cdf0e10cSrcweir         if( ! pName )
343*cdf0e10cSrcweir             continue;
344*cdf0e10cSrcweir         if( ! pName->m_aName.equals( "FontDescriptor" ) )
345*cdf0e10cSrcweir             continue;
346*cdf0e10cSrcweir 
347*cdf0e10cSrcweir         // the font name will be helpful, also there must be one in
348*cdf0e10cSrcweir         // a font descriptor
349*cdf0e10cSrcweir         map_it = pDict->m_aMap.find( "FontName" );
350*cdf0e10cSrcweir         if( map_it == pDict->m_aMap.end() )
351*cdf0e10cSrcweir             continue;
352*cdf0e10cSrcweir         pName = dynamic_cast<PDFName*>(map_it->second);
353*cdf0e10cSrcweir         if( ! pName )
354*cdf0e10cSrcweir             continue;
355*cdf0e10cSrcweir         rtl::OString aFontName( pName->m_aName );
356*cdf0e10cSrcweir 
357*cdf0e10cSrcweir         PDFObjectRef* pStreamRef = 0;
358*cdf0e10cSrcweir         const char* pFileType = NULL;
359*cdf0e10cSrcweir         // we have a font descriptor, try for a type 1 font
360*cdf0e10cSrcweir         map_it = pDict->m_aMap.find( "FontFile" );
361*cdf0e10cSrcweir         if( map_it != pDict->m_aMap.end() )
362*cdf0e10cSrcweir         {
363*cdf0e10cSrcweir             pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second);
364*cdf0e10cSrcweir             if( pStreamRef )
365*cdf0e10cSrcweir                 pFileType = "pfa";
366*cdf0e10cSrcweir         }
367*cdf0e10cSrcweir 
368*cdf0e10cSrcweir         // perhaps it's a truetype file ?
369*cdf0e10cSrcweir         if( ! pStreamRef )
370*cdf0e10cSrcweir         {
371*cdf0e10cSrcweir             map_it  = pDict->m_aMap.find( "FontFile2" );
372*cdf0e10cSrcweir             if( map_it != pDict->m_aMap.end() )
373*cdf0e10cSrcweir             {
374*cdf0e10cSrcweir                 pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second);
375*cdf0e10cSrcweir                 if( pStreamRef )
376*cdf0e10cSrcweir                     pFileType = "ttf";
377*cdf0e10cSrcweir             }
378*cdf0e10cSrcweir         }
379*cdf0e10cSrcweir 
380*cdf0e10cSrcweir         if( ! pStreamRef )
381*cdf0e10cSrcweir             continue;
382*cdf0e10cSrcweir 
383*cdf0e10cSrcweir         PDFObject* pStream = i_pPDFFile->findObject( pStreamRef );
384*cdf0e10cSrcweir         if( ! pStream )
385*cdf0e10cSrcweir             continue;
386*cdf0e10cSrcweir 
387*cdf0e10cSrcweir         rtl::OStringBuffer aOutStream( i_pOutFile );
388*cdf0e10cSrcweir         aOutStream.append( "_font_" );
389*cdf0e10cSrcweir         aOutStream.append( sal_Int32(pStreamRef->m_nNumber) );
390*cdf0e10cSrcweir         aOutStream.append( "_" );
391*cdf0e10cSrcweir         aOutStream.append( sal_Int32(pStreamRef->m_nGeneration) );
392*cdf0e10cSrcweir         aOutStream.append( "_" );
393*cdf0e10cSrcweir         aOutStream.append( aFontName );
394*cdf0e10cSrcweir         if( pFileType )
395*cdf0e10cSrcweir         {
396*cdf0e10cSrcweir             aOutStream.append( "." );
397*cdf0e10cSrcweir             aOutStream.append( pFileType );
398*cdf0e10cSrcweir         }
399*cdf0e10cSrcweir         FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
400*cdf0e10cSrcweir         aContext.m_bDecrypt = i_pPDFFile->isEncrypted();
401*cdf0e10cSrcweir         pStream->writeStream( aContext, i_pPDFFile );
402*cdf0e10cSrcweir     }
403*cdf0e10cSrcweir     return nRet;
404*cdf0e10cSrcweir }
405*cdf0e10cSrcweir 
406*cdf0e10cSrcweir std::vector< std::pair< sal_Int32, sal_Int32 > > s_aEmitObjects;
407*cdf0e10cSrcweir 
408*cdf0e10cSrcweir int write_objects( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile )
409*cdf0e10cSrcweir {
410*cdf0e10cSrcweir     int nRet = 0;
411*cdf0e10cSrcweir     unsigned int nElements = s_aEmitObjects.size();
412*cdf0e10cSrcweir     for( unsigned i = 0; i < nElements && nRet == 0; i++ )
413*cdf0e10cSrcweir     {
414*cdf0e10cSrcweir         sal_Int32 nObject     = s_aEmitObjects[i].first;
415*cdf0e10cSrcweir         sal_Int32 nGeneration = s_aEmitObjects[i].second;
416*cdf0e10cSrcweir         PDFObject* pStream = i_pPDFFile->findObject( nObject, nGeneration );
417*cdf0e10cSrcweir         if( ! pStream )
418*cdf0e10cSrcweir         {
419*cdf0e10cSrcweir             fprintf( stderr, "object %d %d not found !\n", (int)nObject, (int)nGeneration );
420*cdf0e10cSrcweir             continue;
421*cdf0e10cSrcweir         }
422*cdf0e10cSrcweir 
423*cdf0e10cSrcweir         rtl::OStringBuffer aOutStream( i_pOutFile );
424*cdf0e10cSrcweir         aOutStream.append( "_stream_" );
425*cdf0e10cSrcweir         aOutStream.append( nObject );
426*cdf0e10cSrcweir         aOutStream.append( "_" );
427*cdf0e10cSrcweir         aOutStream.append( nGeneration );
428*cdf0e10cSrcweir         FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
429*cdf0e10cSrcweir         aContext.m_bDecrypt = i_pPDFFile->isEncrypted();
430*cdf0e10cSrcweir         pStream->writeStream( aContext, i_pPDFFile );
431*cdf0e10cSrcweir     }
432*cdf0e10cSrcweir     return nRet;
433*cdf0e10cSrcweir }
434*cdf0e10cSrcweir 
435*cdf0e10cSrcweir SAL_IMPLEMENT_MAIN_WITH_ARGS( argc, argv )
436*cdf0e10cSrcweir {
437*cdf0e10cSrcweir     const char* pInFile = NULL;
438*cdf0e10cSrcweir     const char* pOutFile = NULL;
439*cdf0e10cSrcweir     const char* pPassword = NULL;
440*cdf0e10cSrcweir     OStringBuffer aOutFile( 256 );
441*cdf0e10cSrcweir     PDFFileHdl aHdl = write_unzipFile;
442*cdf0e10cSrcweir 
443*cdf0e10cSrcweir     for( int nArg = 1; nArg < argc; nArg++ )
444*cdf0e10cSrcweir     {
445*cdf0e10cSrcweir         if( argv[nArg][0] == '-' )
446*cdf0e10cSrcweir         {
447*cdf0e10cSrcweir             if( ! rtl_str_compare( "-pw", argv[nArg] ) ||
448*cdf0e10cSrcweir                 ! rtl_str_compare( "--password" , argv[nArg] ) )
449*cdf0e10cSrcweir             {
450*cdf0e10cSrcweir                 if( nArg == argc-1 )
451*cdf0e10cSrcweir                 {
452*cdf0e10cSrcweir                     fprintf( stderr, "no password given\n" );
453*cdf0e10cSrcweir                     return 1;
454*cdf0e10cSrcweir                 }
455*cdf0e10cSrcweir                 nArg++;
456*cdf0e10cSrcweir                 pPassword = argv[nArg];
457*cdf0e10cSrcweir             }
458*cdf0e10cSrcweir             else if( ! rtl_str_compare( "-h", argv[nArg] ) ||
459*cdf0e10cSrcweir                 ! rtl_str_compare( "--help", argv[nArg] ) )
460*cdf0e10cSrcweir             {
461*cdf0e10cSrcweir                 printHelp( argv[0] );
462*cdf0e10cSrcweir                 return 0;
463*cdf0e10cSrcweir             }
464*cdf0e10cSrcweir             else if( ! rtl_str_compare( "-a", argv[nArg] ) ||
465*cdf0e10cSrcweir                 ! rtl_str_compare( "--extract-add-streams", argv[nArg] ) )
466*cdf0e10cSrcweir             {
467*cdf0e10cSrcweir                 aHdl = write_addStreams;
468*cdf0e10cSrcweir             }
469*cdf0e10cSrcweir             else if( ! rtl_str_compare( "-f", argv[nArg] ) ||
470*cdf0e10cSrcweir                 ! rtl_str_compare( "--extract-fonts", argv[nArg] ) )
471*cdf0e10cSrcweir             {
472*cdf0e10cSrcweir                 aHdl = write_fonts;
473*cdf0e10cSrcweir             }
474*cdf0e10cSrcweir             else if( ! rtl_str_compare( "-o", argv[nArg] ) ||
475*cdf0e10cSrcweir                 ! rtl_str_compare( "--extract-objects", argv[nArg] ) )
476*cdf0e10cSrcweir             {
477*cdf0e10cSrcweir                 aHdl = write_objects;
478*cdf0e10cSrcweir                 nArg++;
479*cdf0e10cSrcweir                 if( nArg < argc )
480*cdf0e10cSrcweir                 {
481*cdf0e10cSrcweir                     rtl::OString aObjs( argv[nArg] );
482*cdf0e10cSrcweir                     sal_Int32 nIndex = 0;
483*cdf0e10cSrcweir                     while( nIndex != -1 )
484*cdf0e10cSrcweir                     {
485*cdf0e10cSrcweir                         rtl::OString aToken( aObjs.getToken( 0, ',', nIndex ) );
486*cdf0e10cSrcweir                         sal_Int32 nObject = 0;
487*cdf0e10cSrcweir                         sal_Int32 nGeneration = 0;
488*cdf0e10cSrcweir                         sal_Int32 nGenIndex = 0;
489*cdf0e10cSrcweir                         nObject = aToken.getToken( 0, ':', nGenIndex ).toInt32();
490*cdf0e10cSrcweir                         if( nGenIndex != -1 )
491*cdf0e10cSrcweir                             nGeneration = aToken.getToken( 0, ':', nGenIndex ).toInt32();
492*cdf0e10cSrcweir                         s_aEmitObjects.push_back( std::pair<sal_Int32,sal_Int32>(nObject,nGeneration) );
493*cdf0e10cSrcweir                     }
494*cdf0e10cSrcweir                 }
495*cdf0e10cSrcweir             }
496*cdf0e10cSrcweir             else
497*cdf0e10cSrcweir             {
498*cdf0e10cSrcweir                 fprintf( stderr, "unrecognized option \"%s\"\n",
499*cdf0e10cSrcweir                          argv[nArg] );
500*cdf0e10cSrcweir                 printHelp( argv[0] );
501*cdf0e10cSrcweir                 return 1;
502*cdf0e10cSrcweir             }
503*cdf0e10cSrcweir         }
504*cdf0e10cSrcweir         else if( pInFile == NULL )
505*cdf0e10cSrcweir             pInFile = argv[nArg];
506*cdf0e10cSrcweir         else if( pOutFile == NULL )
507*cdf0e10cSrcweir             pOutFile = argv[nArg];
508*cdf0e10cSrcweir     }
509*cdf0e10cSrcweir     if( ! pInFile )
510*cdf0e10cSrcweir     {
511*cdf0e10cSrcweir         fprintf( stderr, "no input file given\n" );
512*cdf0e10cSrcweir         return 10;
513*cdf0e10cSrcweir     }
514*cdf0e10cSrcweir     if( ! pOutFile )
515*cdf0e10cSrcweir     {
516*cdf0e10cSrcweir         OString aFile( pInFile );
517*cdf0e10cSrcweir         if( aFile.getLength() > 0 )
518*cdf0e10cSrcweir         {
519*cdf0e10cSrcweir             if( aFile.getLength() > 4 )
520*cdf0e10cSrcweir             {
521*cdf0e10cSrcweir                 if( aFile.matchIgnoreAsciiCase( OString( ".pdf" ), aFile.getLength()-4 ) )
522*cdf0e10cSrcweir                     aOutFile.append( pInFile, aFile.getLength() - 4 );
523*cdf0e10cSrcweir                 else
524*cdf0e10cSrcweir                     aOutFile.append( aFile );
525*cdf0e10cSrcweir             }
526*cdf0e10cSrcweir             aOutFile.append( "_unzip.pdf" );
527*cdf0e10cSrcweir             pOutFile = aOutFile.getStr();
528*cdf0e10cSrcweir         }
529*cdf0e10cSrcweir         else
530*cdf0e10cSrcweir         {
531*cdf0e10cSrcweir             fprintf( stderr, "no output file given\n" );
532*cdf0e10cSrcweir             return 11;
533*cdf0e10cSrcweir         }
534*cdf0e10cSrcweir     }
535*cdf0e10cSrcweir 
536*cdf0e10cSrcweir     return handleFile( pInFile, pOutFile, pPassword, aHdl );
537*cdf0e10cSrcweir }
538*cdf0e10cSrcweir 
539