1*c142477cSAndrew Rist /**************************************************************
2cdf0e10cSrcweir *
3*c142477cSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*c142477cSAndrew Rist * or more contributor license agreements. See the NOTICE file
5*c142477cSAndrew Rist * distributed with this work for additional information
6*c142477cSAndrew Rist * regarding copyright ownership. The ASF licenses this file
7*c142477cSAndrew Rist * to you under the Apache License, Version 2.0 (the
8*c142477cSAndrew Rist * "License"); you may not use this file except in compliance
9*c142477cSAndrew Rist * with the License. You may obtain a copy of the License at
10cdf0e10cSrcweir *
11*c142477cSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0
12cdf0e10cSrcweir *
13*c142477cSAndrew Rist * Unless required by applicable law or agreed to in writing,
14*c142477cSAndrew Rist * software distributed under the License is distributed on an
15*c142477cSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*c142477cSAndrew Rist * KIND, either express or implied. See the License for the
17*c142477cSAndrew Rist * specific language governing permissions and limitations
18*c142477cSAndrew Rist * under the License.
19cdf0e10cSrcweir *
20*c142477cSAndrew Rist *************************************************************/
21*c142477cSAndrew Rist
22*c142477cSAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_sdext.hxx"
26cdf0e10cSrcweir
27cdf0e10cSrcweir #include "pdfiadaptor.hxx"
28cdf0e10cSrcweir #include "filterdet.hxx"
29cdf0e10cSrcweir #include "saxemitter.hxx"
30cdf0e10cSrcweir #include "odfemitter.hxx"
31cdf0e10cSrcweir #include "inc/wrapper.hxx"
32cdf0e10cSrcweir #include "inc/contentsink.hxx"
33cdf0e10cSrcweir #include "tree/pdfiprocessor.hxx"
34cdf0e10cSrcweir
35cdf0e10cSrcweir #include <osl/file.h>
36cdf0e10cSrcweir #include <osl/thread.h>
37cdf0e10cSrcweir #include <osl/diagnose.h>
38cdf0e10cSrcweir #include <cppuhelper/factory.hxx>
39cdf0e10cSrcweir #include <cppuhelper/implementationentry.hxx>
40cdf0e10cSrcweir #include <com/sun/star/lang/XMultiComponentFactory.hpp>
41cdf0e10cSrcweir #include <com/sun/star/uno/RuntimeException.hpp>
42cdf0e10cSrcweir #include <com/sun/star/io/XInputStream.hpp>
43cdf0e10cSrcweir #include <com/sun/star/frame/XLoadable.hpp>
44cdf0e10cSrcweir #include <com/sun/star/xml/sax/XDocumentHandler.hpp>
45cdf0e10cSrcweir #include <com/sun/star/io/XSeekable.hpp>
46cdf0e10cSrcweir
47cdf0e10cSrcweir
48cdf0e10cSrcweir #include <boost/shared_ptr.hpp>
49cdf0e10cSrcweir
50cdf0e10cSrcweir using namespace com::sun::star;
51cdf0e10cSrcweir
52cdf0e10cSrcweir
53cdf0e10cSrcweir namespace pdfi
54cdf0e10cSrcweir {
55cdf0e10cSrcweir
PDFIHybridAdaptor(const uno::Reference<uno::XComponentContext> & xContext)56cdf0e10cSrcweir PDFIHybridAdaptor::PDFIHybridAdaptor( const uno::Reference< uno::XComponentContext >& xContext ) :
57cdf0e10cSrcweir PDFIHybridAdaptorBase( m_aMutex ),
58cdf0e10cSrcweir m_xContext( xContext ),
59cdf0e10cSrcweir m_xModel()
60cdf0e10cSrcweir {
61cdf0e10cSrcweir }
62cdf0e10cSrcweir
63cdf0e10cSrcweir // XFilter
filter(const uno::Sequence<beans::PropertyValue> & rFilterData)64cdf0e10cSrcweir sal_Bool SAL_CALL PDFIHybridAdaptor::filter( const uno::Sequence< beans::PropertyValue >& rFilterData ) throw( uno::RuntimeException )
65cdf0e10cSrcweir {
66cdf0e10cSrcweir sal_Bool bRet = sal_False;
67cdf0e10cSrcweir if( m_xModel.is() )
68cdf0e10cSrcweir {
69cdf0e10cSrcweir uno::Reference< io::XStream > xSubStream;
70cdf0e10cSrcweir rtl::OUString aPwd;
71cdf0e10cSrcweir const beans::PropertyValue* pAttribs = rFilterData.getConstArray();
72cdf0e10cSrcweir sal_Int32 nAttribs = rFilterData.getLength();
73cdf0e10cSrcweir sal_Int32 nPwPos = -1;
74cdf0e10cSrcweir for( sal_Int32 i = 0; i < nAttribs; i++ )
75cdf0e10cSrcweir {
76cdf0e10cSrcweir #if OSL_DEBUG_LEVEL > 1
77cdf0e10cSrcweir rtl::OUString aVal( RTL_CONSTASCII_USTRINGPARAM( "<no string>" ) );
78cdf0e10cSrcweir pAttribs[i].Value >>= aVal;
79cdf0e10cSrcweir OSL_TRACE( "filter: Attrib: %s = %s\n",
80cdf0e10cSrcweir rtl::OUStringToOString( pAttribs[i].Name, RTL_TEXTENCODING_UTF8 ).getStr(),
81cdf0e10cSrcweir rtl::OUStringToOString( aVal, RTL_TEXTENCODING_UTF8 ).getStr() );
82cdf0e10cSrcweir #endif
83cdf0e10cSrcweir if( pAttribs[i].Name.equalsAscii( "EmbeddedSubstream" ) )
84cdf0e10cSrcweir pAttribs[i].Value >>= xSubStream;
85cdf0e10cSrcweir else if( pAttribs[i].Name.equalsAscii( "Password" ) )
86cdf0e10cSrcweir {
87cdf0e10cSrcweir nPwPos = i;
88cdf0e10cSrcweir pAttribs[i].Value >>= aPwd;
89cdf0e10cSrcweir }
90cdf0e10cSrcweir }
91cdf0e10cSrcweir bool bAddPwdProp = false;
92cdf0e10cSrcweir if( ! xSubStream.is() )
93cdf0e10cSrcweir {
94cdf0e10cSrcweir uno::Reference< io::XInputStream > xInput;
95cdf0e10cSrcweir for( sal_Int32 i = 0; i < nAttribs; i++ )
96cdf0e10cSrcweir {
97cdf0e10cSrcweir if( pAttribs[i].Name.equalsAscii( "InputStream" ) )
98cdf0e10cSrcweir {
99cdf0e10cSrcweir pAttribs[i].Value >>= xInput;
100cdf0e10cSrcweir break;
101cdf0e10cSrcweir }
102cdf0e10cSrcweir }
103cdf0e10cSrcweir if( xInput.is() )
104cdf0e10cSrcweir {
105cdf0e10cSrcweir // TODO(P2): extracting hybrid substream twice - once during detection, second time here
106cdf0e10cSrcweir uno::Reference< io::XSeekable > xSeek( xInput, uno::UNO_QUERY );
107cdf0e10cSrcweir if( xSeek.is() )
108cdf0e10cSrcweir xSeek->seek( 0 );
109cdf0e10cSrcweir oslFileHandle aFile = NULL;
110cdf0e10cSrcweir sal_uInt64 nWritten = 0;
111cdf0e10cSrcweir rtl::OUString aURL;
112cdf0e10cSrcweir if( osl_createTempFile( NULL, &aFile, &aURL.pData ) == osl_File_E_None )
113cdf0e10cSrcweir {
114cdf0e10cSrcweir OSL_TRACE( "created temp file %s\n", rtl::OUStringToOString( aURL, RTL_TEXTENCODING_UTF8 ).getStr() );
115cdf0e10cSrcweir const sal_Int32 nBufSize = 4096;
116cdf0e10cSrcweir uno::Sequence<sal_Int8> aBuf(nBufSize);
117cdf0e10cSrcweir // copy the bytes
118cdf0e10cSrcweir sal_Int32 nBytes;
119cdf0e10cSrcweir do
120cdf0e10cSrcweir {
121cdf0e10cSrcweir nBytes = xInput->readBytes( aBuf, nBufSize );
122cdf0e10cSrcweir if( nBytes > 0 )
123cdf0e10cSrcweir {
124cdf0e10cSrcweir osl_writeFile( aFile, aBuf.getConstArray(), nBytes, &nWritten );
125cdf0e10cSrcweir if( static_cast<sal_Int32>(nWritten) != nBytes )
126cdf0e10cSrcweir {
127cdf0e10cSrcweir xInput.clear();
128cdf0e10cSrcweir break;
129cdf0e10cSrcweir }
130cdf0e10cSrcweir }
131cdf0e10cSrcweir } while( nBytes == nBufSize );
132cdf0e10cSrcweir osl_closeFile( aFile );
133cdf0e10cSrcweir if( xInput.is() )
134cdf0e10cSrcweir {
135cdf0e10cSrcweir rtl::OUString aEmbedMimetype;
136cdf0e10cSrcweir rtl::OUString aOrgPwd( aPwd );
137cdf0e10cSrcweir xSubStream = getAdditionalStream( aURL, aEmbedMimetype, aPwd, m_xContext, rFilterData, true );
138cdf0e10cSrcweir if( aOrgPwd != aPwd )
139cdf0e10cSrcweir bAddPwdProp = true;
140cdf0e10cSrcweir }
141cdf0e10cSrcweir osl_removeFile( aURL.pData );
142cdf0e10cSrcweir }
143cdf0e10cSrcweir else
144cdf0e10cSrcweir xSubStream.clear();
145cdf0e10cSrcweir }
146cdf0e10cSrcweir }
147cdf0e10cSrcweir if( xSubStream.is() )
148cdf0e10cSrcweir {
149cdf0e10cSrcweir uno::Sequence< uno::Any > aArgs( 2 );
150cdf0e10cSrcweir aArgs[0] <<= m_xModel;
151cdf0e10cSrcweir aArgs[1] <<= xSubStream;
152cdf0e10cSrcweir
153cdf0e10cSrcweir OSL_TRACE( "try to instantiate subfilter\n" );
154cdf0e10cSrcweir uno::Reference< document::XFilter > xSubFilter;
155cdf0e10cSrcweir try {
156cdf0e10cSrcweir xSubFilter = uno::Reference<document::XFilter>(
157cdf0e10cSrcweir m_xContext->getServiceManager()->createInstanceWithArgumentsAndContext(
158cdf0e10cSrcweir rtl::OUString( RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.document.OwnSubFilter" ) ),
159cdf0e10cSrcweir aArgs,
160cdf0e10cSrcweir m_xContext ),
161cdf0e10cSrcweir uno::UNO_QUERY );
162cdf0e10cSrcweir }
163cdf0e10cSrcweir catch(uno::Exception& e)
164cdf0e10cSrcweir {
165cdf0e10cSrcweir (void)e;
166cdf0e10cSrcweir OSL_TRACE( "subfilter exception: %s\n",
167cdf0e10cSrcweir OUStringToOString( e.Message, RTL_TEXTENCODING_UTF8 ).getStr() );
168cdf0e10cSrcweir }
169cdf0e10cSrcweir
170cdf0e10cSrcweir OSL_TRACE( "subfilter: %p\n", xSubFilter.get() );
171cdf0e10cSrcweir if( xSubFilter.is() )
172cdf0e10cSrcweir {
173cdf0e10cSrcweir if( bAddPwdProp )
174cdf0e10cSrcweir {
175cdf0e10cSrcweir uno::Sequence<beans::PropertyValue> aFilterData( rFilterData );
176cdf0e10cSrcweir if( nPwPos == -1 )
177cdf0e10cSrcweir {
178cdf0e10cSrcweir nPwPos = aFilterData.getLength();
179cdf0e10cSrcweir aFilterData.realloc( nPwPos+1 );
180cdf0e10cSrcweir aFilterData[nPwPos].Name = rtl::OUString(
181cdf0e10cSrcweir RTL_CONSTASCII_USTRINGPARAM( "Password" ) );
182cdf0e10cSrcweir }
183cdf0e10cSrcweir aFilterData[nPwPos].Value <<= aPwd;
184cdf0e10cSrcweir bRet = xSubFilter->filter( aFilterData );
185cdf0e10cSrcweir }
186cdf0e10cSrcweir else
187cdf0e10cSrcweir bRet = xSubFilter->filter( rFilterData );
188cdf0e10cSrcweir }
189cdf0e10cSrcweir }
190cdf0e10cSrcweir #if OSL_DEBUG_LEVEL > 1
191cdf0e10cSrcweir else
192cdf0e10cSrcweir OSL_TRACE( "PDFIAdaptor::filter: no embedded substream set\n" );
193cdf0e10cSrcweir #endif
194cdf0e10cSrcweir }
195cdf0e10cSrcweir #if OSL_DEBUG_LEVEL > 1
196cdf0e10cSrcweir else
197cdf0e10cSrcweir OSL_TRACE( "PDFIAdaptor::filter: no model set\n" );
198cdf0e10cSrcweir #endif
199cdf0e10cSrcweir
200cdf0e10cSrcweir return bRet;
201cdf0e10cSrcweir }
202cdf0e10cSrcweir
cancel()203cdf0e10cSrcweir void SAL_CALL PDFIHybridAdaptor::cancel() throw()
204cdf0e10cSrcweir {
205cdf0e10cSrcweir }
206cdf0e10cSrcweir
207cdf0e10cSrcweir //XImporter
setTargetDocument(const uno::Reference<lang::XComponent> & xDocument)208cdf0e10cSrcweir void SAL_CALL PDFIHybridAdaptor::setTargetDocument( const uno::Reference< lang::XComponent >& xDocument ) throw( lang::IllegalArgumentException )
209cdf0e10cSrcweir {
210cdf0e10cSrcweir OSL_TRACE( "PDFIAdaptor::setTargetDocument\n" );
211cdf0e10cSrcweir m_xModel = uno::Reference< frame::XModel >( xDocument, uno::UNO_QUERY );
212cdf0e10cSrcweir if( xDocument.is() && ! m_xModel.is() )
213cdf0e10cSrcweir throw lang::IllegalArgumentException();
214cdf0e10cSrcweir }
215cdf0e10cSrcweir
216cdf0e10cSrcweir //---------------------------------------------------------------------------------------
217cdf0e10cSrcweir
PDFIRawAdaptor(const uno::Reference<uno::XComponentContext> & xContext)218cdf0e10cSrcweir PDFIRawAdaptor::PDFIRawAdaptor( const uno::Reference< uno::XComponentContext >& xContext ) :
219cdf0e10cSrcweir PDFIAdaptorBase( m_aMutex ),
220cdf0e10cSrcweir m_xContext( xContext ),
221cdf0e10cSrcweir m_xModel(),
222cdf0e10cSrcweir m_pVisitorFactory(),
223cdf0e10cSrcweir m_bEnableToplevelText(false)
224cdf0e10cSrcweir {
225cdf0e10cSrcweir }
226cdf0e10cSrcweir
setTreeVisitorFactory(const TreeVisitorFactorySharedPtr & rVisitorFactory)227cdf0e10cSrcweir void PDFIRawAdaptor::setTreeVisitorFactory(const TreeVisitorFactorySharedPtr& rVisitorFactory)
228cdf0e10cSrcweir {
229cdf0e10cSrcweir m_pVisitorFactory = rVisitorFactory;
230cdf0e10cSrcweir }
231cdf0e10cSrcweir
parse(const uno::Reference<io::XInputStream> & xInput,const uno::Reference<task::XInteractionHandler> & xIHdl,const rtl::OUString & rPwd,const uno::Reference<task::XStatusIndicator> & xStatus,const XmlEmitterSharedPtr & rEmitter,const rtl::OUString & rURL)232cdf0e10cSrcweir bool PDFIRawAdaptor::parse( const uno::Reference<io::XInputStream>& xInput,
233cdf0e10cSrcweir const uno::Reference<task::XInteractionHandler>& xIHdl,
234cdf0e10cSrcweir const rtl::OUString& rPwd,
235cdf0e10cSrcweir const uno::Reference<task::XStatusIndicator>& xStatus,
236cdf0e10cSrcweir const XmlEmitterSharedPtr& rEmitter,
237cdf0e10cSrcweir const rtl::OUString& rURL )
238cdf0e10cSrcweir {
239cdf0e10cSrcweir // container for metaformat
240cdf0e10cSrcweir boost::shared_ptr<PDFIProcessor> pSink(
241cdf0e10cSrcweir new PDFIProcessor(xStatus, m_xContext));
242cdf0e10cSrcweir
243cdf0e10cSrcweir // TEMP! TEMP!
244cdf0e10cSrcweir if( m_bEnableToplevelText )
245cdf0e10cSrcweir pSink->enableToplevelText();
246cdf0e10cSrcweir
247cdf0e10cSrcweir bool bSuccess=false;
248cdf0e10cSrcweir
249cdf0e10cSrcweir if( xInput.is() && (!rURL.getLength() || rURL.compareToAscii( "file:", 5 ) != 0) )
250cdf0e10cSrcweir bSuccess = xpdf_ImportFromStream( xInput, pSink, xIHdl, rPwd, m_xContext );
251cdf0e10cSrcweir else
252cdf0e10cSrcweir bSuccess = xpdf_ImportFromFile( rURL, pSink, xIHdl, rPwd, m_xContext );
253cdf0e10cSrcweir
254cdf0e10cSrcweir if( bSuccess )
255cdf0e10cSrcweir pSink->emit(*rEmitter,*m_pVisitorFactory);
256cdf0e10cSrcweir
257cdf0e10cSrcweir return bSuccess;
258cdf0e10cSrcweir }
259cdf0e10cSrcweir
odfConvert(const rtl::OUString & rURL,const uno::Reference<io::XOutputStream> & xOutput,const uno::Reference<task::XStatusIndicator> & xStatus)260cdf0e10cSrcweir bool PDFIRawAdaptor::odfConvert( const rtl::OUString& rURL,
261cdf0e10cSrcweir const uno::Reference<io::XOutputStream>& xOutput,
262cdf0e10cSrcweir const uno::Reference<task::XStatusIndicator>& xStatus )
263cdf0e10cSrcweir {
264cdf0e10cSrcweir XmlEmitterSharedPtr pEmitter = createOdfEmitter(xOutput);
265cdf0e10cSrcweir const bool bSuccess = parse(uno::Reference<io::XInputStream>(),
266cdf0e10cSrcweir uno::Reference<task::XInteractionHandler>(),
267cdf0e10cSrcweir rtl::OUString(),
268cdf0e10cSrcweir xStatus,pEmitter,rURL);
269cdf0e10cSrcweir
270cdf0e10cSrcweir // tell input stream that it is no longer needed
271cdf0e10cSrcweir xOutput->closeOutput();
272cdf0e10cSrcweir
273cdf0e10cSrcweir return bSuccess;
274cdf0e10cSrcweir }
275cdf0e10cSrcweir
276cdf0e10cSrcweir // XImportFilter
importer(const uno::Sequence<beans::PropertyValue> & rSourceData,const uno::Reference<xml::sax::XDocumentHandler> & rHdl,const uno::Sequence<rtl::OUString> &)277cdf0e10cSrcweir sal_Bool SAL_CALL PDFIRawAdaptor::importer( const uno::Sequence< beans::PropertyValue >& rSourceData,
278cdf0e10cSrcweir const uno::Reference< xml::sax::XDocumentHandler >& rHdl,
279cdf0e10cSrcweir const uno::Sequence< rtl::OUString >& /*rUserData*/ ) throw( uno::RuntimeException )
280cdf0e10cSrcweir {
281cdf0e10cSrcweir // get the InputStream carrying the PDF content
282cdf0e10cSrcweir uno::Reference< io::XInputStream > xInput;
283cdf0e10cSrcweir uno::Reference< task::XStatusIndicator > xStatus;
284cdf0e10cSrcweir uno::Reference< task::XInteractionHandler > xInteractionHandler;
285cdf0e10cSrcweir rtl::OUString aURL;
286cdf0e10cSrcweir rtl::OUString aPwd;
287cdf0e10cSrcweir const beans::PropertyValue* pAttribs = rSourceData.getConstArray();
288cdf0e10cSrcweir sal_Int32 nAttribs = rSourceData.getLength();
289cdf0e10cSrcweir for( sal_Int32 i = 0; i < nAttribs; i++, pAttribs++ )
290cdf0e10cSrcweir {
291cdf0e10cSrcweir OSL_TRACE("importer Attrib: %s\n", OUStringToOString( pAttribs->Name, RTL_TEXTENCODING_UTF8 ).getStr() );
292cdf0e10cSrcweir if( pAttribs->Name.equalsAscii( "InputStream" ) )
293cdf0e10cSrcweir pAttribs->Value >>= xInput;
294cdf0e10cSrcweir else if( pAttribs->Name.equalsAscii( "URL" ) )
295cdf0e10cSrcweir pAttribs->Value >>= aURL;
296cdf0e10cSrcweir else if( pAttribs->Name.equalsAscii( "StatusIndicator" ) )
297cdf0e10cSrcweir pAttribs->Value >>= xStatus;
298cdf0e10cSrcweir else if( pAttribs->Name.equalsAscii( "InteractionHandler" ) )
299cdf0e10cSrcweir pAttribs->Value >>= xInteractionHandler;
300cdf0e10cSrcweir else if( pAttribs->Name.equalsAscii( "Password" ) )
301cdf0e10cSrcweir pAttribs->Value >>= aPwd;
302cdf0e10cSrcweir }
303cdf0e10cSrcweir if( !xInput.is() )
304cdf0e10cSrcweir return sal_False;
305cdf0e10cSrcweir
306cdf0e10cSrcweir XmlEmitterSharedPtr pEmitter = createSaxEmitter(rHdl);
307cdf0e10cSrcweir const bool bSuccess = parse(xInput,xInteractionHandler, aPwd, xStatus,pEmitter,aURL);
308cdf0e10cSrcweir
309cdf0e10cSrcweir // tell input stream that it is no longer needed
310cdf0e10cSrcweir xInput->closeInput();
311cdf0e10cSrcweir xInput.clear();
312cdf0e10cSrcweir
313cdf0e10cSrcweir return bSuccess;
314cdf0e10cSrcweir }
315cdf0e10cSrcweir
316cdf0e10cSrcweir //XImporter
setTargetDocument(const uno::Reference<lang::XComponent> & xDocument)317cdf0e10cSrcweir void SAL_CALL PDFIRawAdaptor::setTargetDocument( const uno::Reference< lang::XComponent >& xDocument ) throw( lang::IllegalArgumentException )
318cdf0e10cSrcweir {
319cdf0e10cSrcweir OSL_TRACE( "PDFIAdaptor::setTargetDocument\n" );
320cdf0e10cSrcweir m_xModel = uno::Reference< frame::XModel >( xDocument, uno::UNO_QUERY );
321cdf0e10cSrcweir if( xDocument.is() && ! m_xModel.is() )
322cdf0e10cSrcweir throw lang::IllegalArgumentException();
323cdf0e10cSrcweir }
324cdf0e10cSrcweir
325cdf0e10cSrcweir }
326