1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_xmlhelp.hxx" 26 #include <com/sun/star/ucb/Command.hpp> 27 #include <com/sun/star/ucb/XCommandEnvironment.hpp> 28 #include <com/sun/star/i18n/XExtendedTransliteration.hpp> 29 #include <com/sun/star/ucb/XCommandProcessor.hpp> 30 #include <com/sun/star/lang/Locale.hpp> 31 #include <com/sun/star/script/XInvocation.hpp> 32 33 #ifndef INCLUDED_STL_ALGORITHM 34 #include <algorithm> 35 #define INCLUDED_STL_ALGORITHM 36 #endif 37 #ifndef INCLUDED_STL_SET 38 #include <set> 39 #define INCLUDED_STL_SET 40 #endif 41 42 #include <qe/Query.hxx> 43 #include <qe/DocGenerator.hxx> 44 #include "resultsetforquery.hxx" 45 #include "databases.hxx" 46 47 // For testing 48 // #define LOGGING 49 50 using namespace std; 51 using namespace chelp; 52 using namespace xmlsearch::excep; 53 using namespace xmlsearch::qe; 54 using namespace com::sun::star; 55 using namespace com::sun::star::ucb; 56 using namespace com::sun::star::i18n; 57 using namespace com::sun::star::uno; 58 using namespace com::sun::star::lang; 59 60 struct HitItem 61 { 62 rtl::OUString m_aURL; 63 float m_fScore; 64 65 HitItem( void ) {} 66 HitItem( const rtl::OUString& aURL, float fScore ) 67 : m_aURL( aURL ) 68 , m_fScore( fScore ) 69 {} 70 bool operator < ( const HitItem& rHitItem ) const 71 { 72 return rHitItem.m_fScore < m_fScore; 73 } 74 }; 75 76 ResultSetForQuery::ResultSetForQuery( const uno::Reference< lang::XMultiServiceFactory >& xMSF, 77 const uno::Reference< XContentProvider >& xProvider, 78 sal_Int32 nOpenMode, 79 const uno::Sequence< beans::Property >& seq, 80 const uno::Sequence< NumberedSortingInfo >& seqSort, 81 URLParameter& aURLParameter, 82 Databases* pDatabases ) 83 : ResultSetBase( xMSF,xProvider,nOpenMode,seq,seqSort ), 84 m_pDatabases( pDatabases ), 85 m_aURLParameter( aURLParameter ) 86 { 87 Reference< XTransliteration > xTrans( 88 xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.i18n.Transliteration" ) ), 89 UNO_QUERY ); 90 Locale aLocale( aURLParameter.get_language(), 91 rtl::OUString(), 92 rtl::OUString() ); 93 if(xTrans.is()) 94 xTrans->loadModule(TransliterationModules_UPPERCASE_LOWERCASE, 95 aLocale ); 96 97 // Access Lucene via XInvocation 98 Reference< script::XInvocation > xInvocation( 99 xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.help.HelpSearch" ) ), 100 UNO_QUERY ); 101 102 vector< vector< rtl::OUString > > queryList; 103 { 104 sal_Int32 idx; 105 rtl::OUString query = m_aURLParameter.get_query(); 106 while( query.getLength() ) 107 { 108 idx = query.indexOf( sal_Unicode( ' ' ) ); 109 if( idx == -1 ) 110 idx = query.getLength(); 111 112 vector< rtl::OUString > currentQuery; 113 rtl::OUString tmp(query.copy( 0,idx )); 114 rtl:: OUString toliterate = tmp; 115 if(xTrans.is()) { 116 Sequence<sal_Int32> aSeq; 117 toliterate = xTrans->transliterate( 118 tmp,0,tmp.getLength(),aSeq); 119 } 120 121 currentQuery.push_back( toliterate ); 122 queryList.push_back( currentQuery ); 123 124 int nCpy = 1 + idx; 125 if( nCpy >= query.getLength() ) 126 query = rtl::OUString(); 127 else 128 query = query.copy( 1 + idx ); 129 } 130 } 131 132 vector< rtl::OUString > aCompleteResultVector; 133 if( xInvocation.is() ) 134 { 135 rtl::OUString scope = m_aURLParameter.get_scope(); 136 bool bCaptionsOnly = ( scope.compareToAscii( "Heading" ) == 0 ); 137 sal_Int32 hitCount = m_aURLParameter.get_hitCount(); 138 139 #ifdef LOGGING 140 FILE* pFile = fopen( "d:\\resultset_out.txt", "w" ); 141 #endif 142 143 IndexFolderIterator aIndexFolderIt( *pDatabases, m_aURLParameter.get_module(), m_aURLParameter.get_language() ); 144 rtl::OUString idxDir; 145 bool bExtension = false; 146 int iDir = 0; 147 vector< vector<HitItem>* > aIndexFolderResultVectorVector; 148 149 bool bTemporary; 150 while( (idxDir = aIndexFolderIt.nextIndexFolder( bExtension, bTemporary )).getLength() > 0 ) 151 { 152 vector<HitItem> aIndexFolderResultVector; 153 154 try 155 { 156 vector< vector<HitItem>* > aQueryListResultVectorVector; 157 set< rtl::OUString > aSet,aCurrent,aResultSet; 158 159 int nQueryListSize = queryList.size(); 160 if( nQueryListSize > 1 ) 161 hitCount = 2000; 162 163 for( int i = 0; i < nQueryListSize; ++i ) 164 { 165 vector<HitItem>* pQueryResultVector; 166 if( nQueryListSize > 1 ) 167 { 168 pQueryResultVector = new vector<HitItem>(); 169 aQueryListResultVectorVector.push_back( pQueryResultVector ); 170 } 171 else 172 { 173 pQueryResultVector = &aIndexFolderResultVector; 174 } 175 pQueryResultVector->reserve( hitCount ); 176 177 int nParamCount = bCaptionsOnly ? 7 : 6; 178 Sequence<uno::Any> aParamsSeq( nParamCount ); 179 180 aParamsSeq[0] = uno::makeAny( rtl::OUString::createFromAscii( "-lang" ) ); 181 aParamsSeq[1] = uno::makeAny( m_aURLParameter.get_language() ); 182 183 aParamsSeq[2] = uno::makeAny( rtl::OUString::createFromAscii( "-index" ) ); 184 rtl::OUString aSystemPath; 185 osl::FileBase::getSystemPathFromFileURL( idxDir, aSystemPath ); 186 aParamsSeq[3] = uno::makeAny( aSystemPath ); 187 188 aParamsSeq[4] = uno::makeAny( rtl::OUString::createFromAscii( "-query" ) ); 189 190 const std::vector< rtl::OUString >& aListItem = queryList[i]; 191 ::rtl::OUString aNewQueryStr = aListItem[0]; 192 aParamsSeq[5] = uno::makeAny( aNewQueryStr ); 193 194 if( bCaptionsOnly ) 195 aParamsSeq[6] = uno::makeAny( rtl::OUString::createFromAscii( "-caption" ) ); 196 197 Sequence< sal_Int16 > aOutParamIndex; 198 Sequence< uno::Any > aOutParam; 199 200 uno::Any aRet = xInvocation->invoke( rtl::OUString::createFromAscii( "search" ), 201 aParamsSeq, aOutParamIndex, aOutParam ); 202 203 Sequence< float > aScoreSeq; 204 int nScoreCount = 0; 205 int nOutParamCount = aOutParam.getLength(); 206 if( nOutParamCount == 1 ) 207 { 208 const uno::Any* pScoreAnySeq = aOutParam.getConstArray(); 209 if( pScoreAnySeq[0] >>= aScoreSeq ) 210 nScoreCount = aScoreSeq.getLength(); 211 } 212 213 Sequence<rtl::OUString> aRetSeq; 214 if( aRet >>= aRetSeq ) 215 { 216 if( nQueryListSize > 1 ) 217 aSet.clear(); 218 219 const rtl::OUString* pRetSeq = aRetSeq.getConstArray(); 220 int nCount = aRetSeq.getLength(); 221 if( nCount > hitCount ) 222 nCount = hitCount; 223 for( int j = 0 ; j < nCount ; ++j ) 224 { 225 float fScore = 0.0; 226 if( j < nScoreCount ) 227 fScore = aScoreSeq[j]; 228 229 rtl::OUString aURL = pRetSeq[j]; 230 pQueryResultVector->push_back( HitItem( aURL, fScore ) ); 231 if( nQueryListSize > 1 ) 232 aSet.insert( aURL ); 233 234 #ifdef LOGGING 235 if( pFile ) 236 { 237 rtl::OString tmp(rtl::OUStringToOString( aURL, RTL_TEXTENCODING_UTF8)); 238 fprintf( pFile, "Dir %d, Query %d, Item: score=%f, URL=%s\n", iDir, i, fScore, tmp.getStr() ); 239 } 240 #endif 241 } 242 } 243 244 // intersect 245 if( nQueryListSize > 1 ) 246 { 247 if( i == 0 ) 248 { 249 aResultSet = aSet; 250 } 251 else 252 { 253 aCurrent = aResultSet; 254 aResultSet.clear(); 255 set_intersection( aSet.begin(),aSet.end(), 256 aCurrent.begin(),aCurrent.end(), 257 inserter(aResultSet,aResultSet.begin())); 258 } 259 } 260 } 261 262 // Combine results in aIndexFolderResultVector 263 if( nQueryListSize > 1 ) 264 { 265 for( int n = 0 ; n < nQueryListSize ; ++n ) 266 { 267 vector<HitItem>* pQueryResultVector = aQueryListResultVectorVector[n]; 268 vector<HitItem>& rQueryResultVector = *pQueryResultVector; 269 270 int nItemCount = rQueryResultVector.size(); 271 for( int i = 0 ; i < nItemCount ; ++i ) 272 { 273 const HitItem& rItem = rQueryResultVector[ i ]; 274 set< rtl::OUString >::iterator it; 275 if( (it = aResultSet.find( rItem.m_aURL )) != aResultSet.end() ) 276 { 277 HitItem aItemCopy( rItem ); 278 aItemCopy.m_fScore /= nQueryListSize; // To get average score 279 if( n == 0 ) 280 { 281 // Use first pass to create entry 282 aIndexFolderResultVector.push_back( aItemCopy ); 283 284 #ifdef LOGGING 285 if( pFile ) 286 { 287 rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8)); 288 fprintf( pFile, "Combine: Query %d (first pass), Item %d: score=%f (%f), URL=%s\n", n, i, aItemCopy.m_fScore, rItem.m_fScore, tmp.getStr() ); 289 } 290 #endif 291 } 292 else 293 { 294 // Find entry in vector 295 int nCount = aIndexFolderResultVector.size(); 296 for( int j = 0 ; j < nCount ; ++j ) 297 { 298 HitItem& rFindItem = aIndexFolderResultVector[ j ]; 299 if( rFindItem.m_aURL.equals( aItemCopy.m_aURL ) ) 300 { 301 #ifdef LOGGING 302 if( pFile ) 303 { 304 rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8)); 305 fprintf( pFile, "Combine: Query %d, Item %d: score=%f + %f = %f, URL=%s\n", n, i, 306 rFindItem.m_fScore, aItemCopy.m_fScore, rFindItem.m_fScore + aItemCopy.m_fScore, tmp.getStr() ); 307 } 308 #endif 309 310 rFindItem.m_fScore += aItemCopy.m_fScore; 311 break; 312 } 313 } 314 } 315 } 316 } 317 318 delete pQueryResultVector; 319 } 320 321 sort( aIndexFolderResultVector.begin(), aIndexFolderResultVector.end() ); 322 } 323 324 vector<HitItem>* pIndexFolderHitItemVector = new vector<HitItem>( aIndexFolderResultVector ); 325 aIndexFolderResultVectorVector.push_back( pIndexFolderHitItemVector ); 326 aIndexFolderResultVector.clear(); 327 } 328 catch( const Exception& ) 329 { 330 } 331 332 ++iDir; 333 334 if( bTemporary ) 335 aIndexFolderIt.deleteTempIndexFolder( idxDir ); 336 337 } // Iterator 338 339 340 int nVectorCount = aIndexFolderResultVectorVector.size(); 341 vector<HitItem>::size_type* pCurrentVectorIndex = new vector<HitItem>::size_type[nVectorCount]; 342 for( int j = 0 ; j < nVectorCount ; ++j ) 343 pCurrentVectorIndex[j] = 0; 344 345 #ifdef LOGGING 346 if( pFile ) 347 { 348 for( int k = 0 ; k < nVectorCount ; ++k ) 349 { 350 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k]; 351 int nItemCount = rIndexFolderVector.size(); 352 353 fprintf( pFile, "Vector %d, %d elements\n", k, nItemCount ); 354 355 for( int i = 0 ; i < nItemCount ; ++i ) 356 { 357 const HitItem& rItem = rIndexFolderVector[ i ]; 358 rtl::OString tmp(rtl::OUStringToOString(rItem.m_aURL, RTL_TEXTENCODING_UTF8)); 359 fprintf( pFile, " Item_vector%d, %d/%d: score=%f, URL=%s\n", k, i, nItemCount, rItem.m_fScore, tmp.getStr() ); 360 } 361 } 362 } 363 #endif 364 365 sal_Int32 nTotalHitCount = m_aURLParameter.get_hitCount(); 366 sal_Int32 nHitCount = 0; 367 while( nHitCount < nTotalHitCount ) 368 { 369 int iVectorWithBestScore = -1; 370 float fBestScore = 0.0; 371 for( int k = 0 ; k < nVectorCount ; ++k ) 372 { 373 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k]; 374 if( pCurrentVectorIndex[k] < rIndexFolderVector.size() ) 375 { 376 const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[k] ]; 377 378 if( fBestScore < rItem.m_fScore ) 379 { 380 fBestScore = rItem.m_fScore; 381 iVectorWithBestScore = k; 382 } 383 } 384 } 385 386 if( iVectorWithBestScore == -1 ) // No item left at all 387 break; 388 389 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[iVectorWithBestScore]; 390 const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[iVectorWithBestScore] ]; 391 392 pCurrentVectorIndex[iVectorWithBestScore]++; 393 394 aCompleteResultVector.push_back( rItem.m_aURL ); 395 ++nHitCount; 396 } 397 398 delete[] pCurrentVectorIndex; 399 for( int n = 0 ; n < nVectorCount ; ++n ) 400 { 401 vector<HitItem>* pIndexFolderVector = aIndexFolderResultVectorVector[n]; 402 delete pIndexFolderVector; 403 } 404 405 #ifdef LOGGING 406 fclose( pFile ); 407 #endif 408 } 409 410 sal_Int32 replIdx = rtl::OUString::createFromAscii( "#HLP#" ).getLength(); 411 rtl::OUString replWith = rtl::OUString::createFromAscii( "vnd.sun.star.help://" ); 412 413 int nResultCount = aCompleteResultVector.size(); 414 for( int r = 0 ; r < nResultCount ; ++r ) 415 { 416 rtl::OUString aURL = aCompleteResultVector[r]; 417 rtl::OUString aResultStr = replWith + aURL.copy(replIdx); 418 m_aPath.push_back( aResultStr ); 419 } 420 421 m_aItems.resize( m_aPath.size() ); 422 m_aIdents.resize( m_aPath.size() ); 423 424 Command aCommand; 425 aCommand.Name = rtl::OUString::createFromAscii( "getPropertyValues" ); 426 aCommand.Argument <<= m_sProperty; 427 428 for( m_nRow = 0; sal::static_int_cast<sal_uInt32>( m_nRow ) < m_aPath.size(); ++m_nRow ) 429 { 430 m_aPath[m_nRow] = 431 m_aPath[m_nRow] + 432 rtl::OUString::createFromAscii( "?Language=" ) + 433 m_aURLParameter.get_language() + 434 rtl::OUString::createFromAscii( "&System=" ) + 435 m_aURLParameter.get_system(); 436 437 uno::Reference< XContent > content = queryContent(); 438 if( content.is() ) 439 { 440 uno::Reference< XCommandProcessor > cmd( content,uno::UNO_QUERY ); 441 cmd->execute( aCommand,0,uno::Reference< XCommandEnvironment >( 0 ) ) >>= m_aItems[m_nRow]; //TODO: check return value of operator >>= 442 } 443 } 444 m_nRow = 0xffffffff; 445 } 446