xref: /AOO41X/main/xmlhelp/source/cxxhelp/provider/resultsetforquery.cxx (revision 89dcb3da00a29b2b7b028d5bd430e2099844a09e)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_xmlhelp.hxx"
26 #include <com/sun/star/ucb/Command.hpp>
27 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
28 #include <com/sun/star/i18n/XExtendedTransliteration.hpp>
29 #include <com/sun/star/ucb/XCommandProcessor.hpp>
30 #include <com/sun/star/lang/Locale.hpp>
31 #include <com/sun/star/script/XInvocation.hpp>
32 
33 #ifndef INCLUDED_STL_ALGORITHM
34 #include <algorithm>
35 #define INCLUDED_STL_ALGORITHM
36 #endif
37 #ifndef INCLUDED_STL_SET
38 #include <set>
39 #define INCLUDED_STL_SET
40 #endif
41 
42 #include <qe/Query.hxx>
43 #include <qe/DocGenerator.hxx>
44 #include "resultsetforquery.hxx"
45 #include "databases.hxx"
46 
47 // For testing
48 // #define LOGGING
49 
50 using namespace std;
51 using namespace chelp;
52 using namespace xmlsearch::excep;
53 using namespace xmlsearch::qe;
54 using namespace com::sun::star;
55 using namespace com::sun::star::ucb;
56 using namespace com::sun::star::i18n;
57 using namespace com::sun::star::uno;
58 using namespace com::sun::star::lang;
59 
60 struct HitItem
61 {
62     rtl::OUString   m_aURL;
63     float           m_fScore;
64 
HitItemHitItem65     HitItem( void ) {}
HitItemHitItem66     HitItem( const rtl::OUString& aURL, float fScore )
67         : m_aURL( aURL )
68         , m_fScore( fScore )
69     {}
operator <HitItem70     bool operator < ( const HitItem& rHitItem ) const
71     {
72         return rHitItem.m_fScore < m_fScore;
73     }
74 };
75 
ResultSetForQuery(const uno::Reference<lang::XMultiServiceFactory> & xMSF,const uno::Reference<XContentProvider> & xProvider,sal_Int32 nOpenMode,const uno::Sequence<beans::Property> & seq,const uno::Sequence<NumberedSortingInfo> & seqSort,URLParameter & aURLParameter,Databases * pDatabases)76 ResultSetForQuery::ResultSetForQuery( const uno::Reference< lang::XMultiServiceFactory >&  xMSF,
77                                       const uno::Reference< XContentProvider >&  xProvider,
78                                       sal_Int32 nOpenMode,
79                                       const uno::Sequence< beans::Property >& seq,
80                                       const uno::Sequence< NumberedSortingInfo >& seqSort,
81                                       URLParameter& aURLParameter,
82                                       Databases* pDatabases )
83     : ResultSetBase( xMSF,xProvider,nOpenMode,seq,seqSort ),
84       m_pDatabases( pDatabases ),
85       m_aURLParameter( aURLParameter )
86 {
87     Reference< XTransliteration > xTrans(
88         xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.i18n.Transliteration" ) ),
89         UNO_QUERY );
90     Locale aLocale( aURLParameter.get_language(),
91                     rtl::OUString(),
92                     rtl::OUString() );
93     if(xTrans.is())
94         xTrans->loadModule(TransliterationModules_UPPERCASE_LOWERCASE,
95                            aLocale );
96 
97     // Access Lucene via XInvocation
98     Reference< script::XInvocation > xInvocation(
99         xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.help.HelpSearch" ) ),
100         UNO_QUERY );
101 
102     vector< vector< rtl::OUString > > queryList;
103     {
104         sal_Int32 idx;
105         rtl::OUString query = m_aURLParameter.get_query();
106         while( query.getLength() )
107         {
108             idx = query.indexOf( sal_Unicode( ' ' ) );
109             if( idx == -1 )
110                 idx = query.getLength();
111 
112             vector< rtl::OUString > currentQuery;
113             rtl::OUString tmp(query.copy( 0,idx ));
114             rtl:: OUString toliterate = tmp;
115             if(xTrans.is()) {
116                 Sequence<sal_Int32> aSeq;
117                 toliterate = xTrans->transliterate(
118                     tmp,0,tmp.getLength(),aSeq);
119             }
120 
121             currentQuery.push_back( toliterate );
122             queryList.push_back( currentQuery );
123 
124             int nCpy = 1 + idx;
125             if( nCpy >= query.getLength() )
126                 query = rtl::OUString();
127             else
128                 query = query.copy( 1 + idx );
129         }
130     }
131 
132     vector< rtl::OUString > aCompleteResultVector;
133     if( xInvocation.is() )
134     {
135         rtl::OUString scope = m_aURLParameter.get_scope();
136         bool bCaptionsOnly = ( scope.compareToAscii( "Heading" ) == 0 );
137         sal_Int32 hitCount = m_aURLParameter.get_hitCount();
138 
139 #ifdef LOGGING
140         FILE* pFile = fopen( "d:\\resultset_out.txt", "w" );
141 #endif
142 
143         IndexFolderIterator aIndexFolderIt( *pDatabases, m_aURLParameter.get_module(), m_aURLParameter.get_language() );
144         rtl::OUString idxDir;
145         bool bExtension = false;
146         int iDir = 0;
147         vector< vector<HitItem>* > aIndexFolderResultVectorVector;
148 
149         bool bTemporary;
150         while( (idxDir = aIndexFolderIt.nextIndexFolder( bExtension, bTemporary )).getLength() > 0 )
151         {
152             vector<HitItem> aIndexFolderResultVector;
153 
154             try
155             {
156                 vector< vector<HitItem>* > aQueryListResultVectorVector;
157                 set< rtl::OUString > aSet,aCurrent,aResultSet;
158 
159                 int nQueryListSize = queryList.size();
160                 if( nQueryListSize > 1 )
161                     hitCount = 2000;
162 
163                 for( int i = 0; i < nQueryListSize; ++i )
164                 {
165                     vector<HitItem>* pQueryResultVector;
166                     if( nQueryListSize > 1 )
167                     {
168                         pQueryResultVector = new vector<HitItem>();
169                         aQueryListResultVectorVector.push_back( pQueryResultVector );
170                     }
171                     else
172                     {
173                         pQueryResultVector = &aIndexFolderResultVector;
174                     }
175                     pQueryResultVector->reserve( hitCount );
176 
177                     int nParamCount = bCaptionsOnly ? 7 : 6;
178                     Sequence<uno::Any> aParamsSeq( nParamCount );
179 
180                     aParamsSeq[0] = uno::makeAny( rtl::OUString::createFromAscii( "-lang" ) );
181                     aParamsSeq[1] = uno::makeAny( m_aURLParameter.get_language() );
182 
183                     aParamsSeq[2] = uno::makeAny( rtl::OUString::createFromAscii( "-index" ) );
184                     rtl::OUString aSystemPath;
185                     osl::FileBase::getSystemPathFromFileURL( idxDir, aSystemPath );
186                     aParamsSeq[3] = uno::makeAny( aSystemPath );
187 
188                     aParamsSeq[4] = uno::makeAny( rtl::OUString::createFromAscii( "-query" ) );
189 
190                     const std::vector< rtl::OUString >& aListItem = queryList[i];
191                     ::rtl::OUString aNewQueryStr = aListItem[0];
192                     aParamsSeq[5] = uno::makeAny( aNewQueryStr );
193 
194                     if( bCaptionsOnly )
195                         aParamsSeq[6] = uno::makeAny( rtl::OUString::createFromAscii( "-caption" ) );
196 
197                     Sequence< sal_Int16 > aOutParamIndex;
198                     Sequence< uno::Any > aOutParam;
199 
200                     uno::Any aRet = xInvocation->invoke( rtl::OUString::createFromAscii( "search" ),
201                         aParamsSeq, aOutParamIndex, aOutParam );
202 
203                     Sequence< float > aScoreSeq;
204                     int nScoreCount = 0;
205                     int nOutParamCount = aOutParam.getLength();
206                     if( nOutParamCount == 1 )
207                     {
208                         const uno::Any* pScoreAnySeq = aOutParam.getConstArray();
209                         if( pScoreAnySeq[0] >>= aScoreSeq )
210                             nScoreCount = aScoreSeq.getLength();
211                     }
212 
213                     Sequence<rtl::OUString> aRetSeq;
214                     if( aRet >>= aRetSeq )
215                     {
216                         if( nQueryListSize > 1 )
217                             aSet.clear();
218 
219                         const rtl::OUString* pRetSeq = aRetSeq.getConstArray();
220                         int nCount = aRetSeq.getLength();
221                         if( nCount > hitCount )
222                             nCount = hitCount;
223                         for( int j = 0 ; j < nCount ; ++j )
224                         {
225                             float fScore = 0.0;
226                             if( j < nScoreCount )
227                                 fScore = aScoreSeq[j];
228 
229                             rtl::OUString aURL = pRetSeq[j];
230                             pQueryResultVector->push_back( HitItem( aURL, fScore ) );
231                             if( nQueryListSize > 1 )
232                                 aSet.insert( aURL );
233 
234 #ifdef LOGGING
235                             if( pFile )
236                             {
237                                 rtl::OString tmp(rtl::OUStringToOString( aURL, RTL_TEXTENCODING_UTF8));
238                                 fprintf( pFile, "Dir %d, Query %d, Item: score=%f, URL=%s\n", iDir, i, fScore, tmp.getStr() );
239                             }
240 #endif
241                         }
242                     }
243 
244                     // intersect
245                     if( nQueryListSize > 1 )
246                     {
247                         if( i == 0 )
248                         {
249                             aResultSet = aSet;
250                         }
251                         else
252                         {
253                             aCurrent = aResultSet;
254                             aResultSet.clear();
255                             set_intersection( aSet.begin(),aSet.end(),
256                                               aCurrent.begin(),aCurrent.end(),
257                                               inserter(aResultSet,aResultSet.begin()));
258                         }
259                     }
260                 }
261 
262                 // Combine results in aIndexFolderResultVector
263                 if( nQueryListSize > 1 )
264                 {
265                     for( int n = 0 ; n < nQueryListSize ; ++n )
266                     {
267                         vector<HitItem>* pQueryResultVector = aQueryListResultVectorVector[n];
268                         vector<HitItem>& rQueryResultVector = *pQueryResultVector;
269 
270                         int nItemCount = rQueryResultVector.size();
271                         for( int i = 0 ; i < nItemCount ; ++i )
272                         {
273                             const HitItem& rItem = rQueryResultVector[ i ];
274                             set< rtl::OUString >::iterator it;
275                             if( (it = aResultSet.find( rItem.m_aURL )) != aResultSet.end() )
276                             {
277                                 HitItem aItemCopy( rItem );
278                                 aItemCopy.m_fScore /= nQueryListSize;   // To get average score
279                                 if( n == 0 )
280                                 {
281                                     // Use first pass to create entry
282                                     aIndexFolderResultVector.push_back( aItemCopy );
283 
284 #ifdef LOGGING
285                                     if( pFile )
286                                     {
287                                         rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8));
288                                         fprintf( pFile, "Combine: Query %d (first pass), Item %d: score=%f (%f), URL=%s\n", n, i, aItemCopy.m_fScore, rItem.m_fScore, tmp.getStr() );
289                                     }
290 #endif
291                                 }
292                                 else
293                                 {
294                                     // Find entry in vector
295                                     int nCount = aIndexFolderResultVector.size();
296                                     for( int j = 0 ; j < nCount ; ++j )
297                                     {
298                                         HitItem& rFindItem = aIndexFolderResultVector[ j ];
299                                         if( rFindItem.m_aURL.equals( aItemCopy.m_aURL ) )
300                                         {
301 #ifdef LOGGING
302                                             if( pFile )
303                                             {
304                                                 rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8));
305                                                 fprintf( pFile, "Combine: Query %d, Item %d: score=%f + %f = %f, URL=%s\n", n, i,
306                                                     rFindItem.m_fScore, aItemCopy.m_fScore, rFindItem.m_fScore + aItemCopy.m_fScore, tmp.getStr() );
307                                             }
308 #endif
309 
310                                             rFindItem.m_fScore += aItemCopy.m_fScore;
311                                             break;
312                                         }
313                                     }
314                                 }
315                             }
316                         }
317 
318                         delete pQueryResultVector;
319                     }
320 
321                     sort( aIndexFolderResultVector.begin(), aIndexFolderResultVector.end() );
322                 }
323 
324                 vector<HitItem>* pIndexFolderHitItemVector = new vector<HitItem>( aIndexFolderResultVector );
325                 aIndexFolderResultVectorVector.push_back( pIndexFolderHitItemVector );
326                 aIndexFolderResultVector.clear();
327             }
328             catch( const Exception& )
329             {
330             }
331 
332             ++iDir;
333 
334             if( bTemporary )
335                 aIndexFolderIt.deleteTempIndexFolder( idxDir );
336 
337         }   // Iterator
338 
339 
340         int nVectorCount = aIndexFolderResultVectorVector.size();
341         vector<HitItem>::size_type* pCurrentVectorIndex = new vector<HitItem>::size_type[nVectorCount];
342         for( int j = 0 ; j < nVectorCount ; ++j )
343             pCurrentVectorIndex[j] = 0;
344 
345 #ifdef LOGGING
346         if( pFile )
347         {
348             for( int k = 0 ; k < nVectorCount ; ++k )
349             {
350                 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k];
351                 int nItemCount = rIndexFolderVector.size();
352 
353                 fprintf( pFile, "Vector %d, %d elements\n", k, nItemCount );
354 
355                 for( int i = 0 ; i < nItemCount ; ++i )
356                 {
357                     const HitItem& rItem = rIndexFolderVector[ i ];
358                     rtl::OString tmp(rtl::OUStringToOString(rItem.m_aURL, RTL_TEXTENCODING_UTF8));
359                     fprintf( pFile, "    Item_vector%d, %d/%d: score=%f, URL=%s\n", k, i, nItemCount, rItem.m_fScore, tmp.getStr() );
360                 }
361             }
362         }
363 #endif
364 
365         sal_Int32 nTotalHitCount = m_aURLParameter.get_hitCount();
366         sal_Int32 nHitCount = 0;
367         while( nHitCount < nTotalHitCount )
368         {
369             int iVectorWithBestScore = -1;
370             float fBestScore = 0.0;
371             for( int k = 0 ; k < nVectorCount ; ++k )
372             {
373                 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k];
374                 if( pCurrentVectorIndex[k] < rIndexFolderVector.size() )
375                 {
376                     const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[k] ];
377 
378                     if( fBestScore < rItem.m_fScore )
379                     {
380                         fBestScore = rItem.m_fScore;
381                         iVectorWithBestScore = k;
382                     }
383                 }
384             }
385 
386             if( iVectorWithBestScore == -1 )    // No item left at all
387                 break;
388 
389             vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[iVectorWithBestScore];
390             const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[iVectorWithBestScore] ];
391 
392             pCurrentVectorIndex[iVectorWithBestScore]++;
393 
394             aCompleteResultVector.push_back( rItem.m_aURL );
395             ++nHitCount;
396         }
397 
398         delete[] pCurrentVectorIndex;
399         for( int n = 0 ; n < nVectorCount ; ++n )
400         {
401             vector<HitItem>* pIndexFolderVector = aIndexFolderResultVectorVector[n];
402             delete pIndexFolderVector;
403         }
404 
405 #ifdef LOGGING
406         fclose( pFile );
407 #endif
408     }
409 
410     sal_Int32 replIdx = rtl::OUString::createFromAscii( "#HLP#" ).getLength();
411     rtl::OUString replWith = rtl::OUString::createFromAscii( "vnd.sun.star.help://" );
412 
413     int nResultCount = aCompleteResultVector.size();
414     for( int r = 0 ; r < nResultCount ; ++r )
415     {
416         rtl::OUString aURL = aCompleteResultVector[r];
417         rtl::OUString aResultStr = replWith + aURL.copy(replIdx);
418         m_aPath.push_back( aResultStr );
419     }
420 
421     m_aItems.resize( m_aPath.size() );
422     m_aIdents.resize( m_aPath.size() );
423 
424     Command aCommand;
425     aCommand.Name = rtl::OUString::createFromAscii( "getPropertyValues" );
426     aCommand.Argument <<= m_sProperty;
427 
428     for( m_nRow = 0; sal::static_int_cast<sal_uInt32>( m_nRow ) < m_aPath.size(); ++m_nRow )
429     {
430         m_aPath[m_nRow] =
431             m_aPath[m_nRow]                                          +
432             rtl::OUString::createFromAscii( "?Language=" )           +
433             m_aURLParameter.get_language()                           +
434             rtl::OUString::createFromAscii( "&System=" )             +
435             m_aURLParameter.get_system();
436 
437         uno::Reference< XContent > content = queryContent();
438         if( content.is() )
439         {
440             uno::Reference< XCommandProcessor > cmd( content,uno::UNO_QUERY );
441             cmd->execute( aCommand,0,uno::Reference< XCommandEnvironment >( 0 ) ) >>= m_aItems[m_nRow]; //TODO: check return value of operator >>=
442         }
443     }
444     m_nRow = 0xffffffff;
445 }
446