1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_i18npool.hxx" 26 27 #include <breakiteratorImpl.hxx> 28 #include <unicode/uchar.h> 29 #include <rtl/ustrbuf.hxx> 30 31 using namespace ::com::sun::star::uno; 32 using namespace ::com::sun::star::lang; 33 using namespace ::rtl; 34 35 namespace com { namespace sun { namespace star { namespace i18n { 36 37 BreakIteratorImpl::BreakIteratorImpl( const Reference < XMultiServiceFactory >& rxMSF ) : xMSF( rxMSF ) 38 { 39 } 40 41 BreakIteratorImpl::BreakIteratorImpl() 42 { 43 } 44 45 BreakIteratorImpl::~BreakIteratorImpl() 46 { 47 // Clear lookuptable 48 for (size_t l = 0; l < lookupTable.size(); l++) 49 delete lookupTable[l]; 50 lookupTable.clear(); 51 } 52 53 #define LBI getLocaleSpecificBreakIterator(rLocale) 54 55 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharacters( const OUString& Text, sal_Int32 nStartPos, 56 const Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone ) 57 throw(RuntimeException) 58 { 59 if (nCount < 0) throw RuntimeException(); 60 61 return LBI->nextCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone); 62 } 63 64 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharacters( const OUString& Text, sal_Int32 nStartPos, 65 const Locale& rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone ) 66 throw(RuntimeException) 67 { 68 if (nCount < 0) throw RuntimeException(); 69 70 return LBI->previousCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone); 71 } 72 73 #define isZWSP(c) (ch == 0x200B) 74 75 static sal_Int32 skipSpace(const OUString& Text, sal_Int32 nPos, sal_Int32 len, sal_Int16 rWordType, sal_Bool bDirection) 76 { 77 sal_uInt32 ch=0; 78 sal_Int32 pos=nPos; 79 switch (rWordType) { 80 case WordType::ANYWORD_IGNOREWHITESPACES: 81 if (bDirection) 82 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch))) nPos=pos; 83 else 84 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos; 85 break; 86 case WordType::DICTIONARY_WORD: 87 if (bDirection) 88 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch) || 89 ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos; 90 else 91 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch) || 92 ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos; 93 break; 94 case WordType::WORD_COUNT: 95 if (bDirection) 96 while (nPos < len && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch))) nPos=pos; 97 else 98 while (nPos > 0 && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos; 99 break; 100 } 101 return nPos; 102 } 103 104 Boundary SAL_CALL BreakIteratorImpl::nextWord( const OUString& Text, sal_Int32 nStartPos, 105 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException) 106 { 107 sal_Int32 len = Text.getLength(); 108 if( nStartPos < 0 || len == 0 ) 109 result.endPos = result.startPos = 0; 110 else if (nStartPos >= len) 111 result.endPos = result.startPos = len; 112 else { 113 result = LBI->nextWord(Text, nStartPos, rLocale, rWordType); 114 115 nStartPos = skipSpace(Text, result.startPos, len, rWordType, sal_True); 116 117 if ( nStartPos != result.startPos) { 118 if( nStartPos >= len ) 119 result.startPos = result.endPos = len; 120 else { 121 result = LBI->getWordBoundary(Text, nStartPos, rLocale, rWordType, sal_True); 122 // i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts 123 if (result.startPos < nStartPos) result.startPos = nStartPos; 124 } 125 } 126 } 127 return result; 128 } 129 130 static inline sal_Bool SAL_CALL isCJK( const Locale& rLocale ) { 131 return rLocale.Language.equalsAscii("zh") || rLocale.Language.equalsAscii("ja") || rLocale.Language.equalsAscii("ko"); 132 } 133 134 Boundary SAL_CALL BreakIteratorImpl::previousWord( const OUString& Text, sal_Int32 nStartPos, 135 const Locale& rLocale, sal_Int16 rWordType) throw(RuntimeException) 136 { 137 sal_Int32 len = Text.getLength(); 138 if( nStartPos <= 0 || len == 0 ) { 139 result.endPos = result.startPos = 0; 140 return result; 141 } else if (nStartPos > len) { 142 result.endPos = result.startPos = len; 143 return result; 144 } 145 146 sal_Int32 nPos = skipSpace(Text, nStartPos, len, rWordType, sal_False); 147 148 // if some spaces are skiped, and the script type is Asian with no CJK rLocale, we have to return 149 // (nStartPos, -1) for caller to send correct rLocale for loading correct dictionary. 150 result.startPos = nPos; 151 if (nPos != nStartPos && nPos > 0 && !isCJK(rLocale) && getScriptClass(Text.iterateCodePoints(&nPos, -1)) == ScriptType::ASIAN) { 152 result.endPos = -1; 153 return result; 154 } 155 156 return LBI->previousWord(Text, result.startPos, rLocale, rWordType); 157 } 158 159 160 Boundary SAL_CALL BreakIteratorImpl::getWordBoundary( const OUString& Text, sal_Int32 nPos, const Locale& rLocale, 161 sal_Int16 rWordType, sal_Bool bDirection ) throw(RuntimeException) 162 { 163 sal_Int32 len = Text.getLength(); 164 if( nPos < 0 || len == 0 ) 165 result.endPos = result.startPos = 0; 166 else if (nPos > len) 167 result.endPos = result.startPos = len; 168 else { 169 sal_Int32 next, prev; 170 next = skipSpace(Text, nPos, len, rWordType, sal_True); 171 prev = skipSpace(Text, nPos, len, rWordType, sal_False); 172 if (prev == 0 && next == len) { 173 result.endPos = result.startPos = nPos; 174 } else if (prev == 0 && ! bDirection) { 175 result.endPos = result.startPos = 0; 176 } else if (next == len && bDirection) { 177 result.endPos = result.startPos = len; 178 } else { 179 if (next != prev) { 180 if (next == nPos && next != len) 181 bDirection = sal_True; 182 else if (prev == nPos && prev != 0) 183 bDirection = sal_False; 184 else 185 nPos = bDirection ? next : prev; 186 } 187 result = LBI->getWordBoundary(Text, nPos, rLocale, rWordType, bDirection); 188 } 189 } 190 return result; 191 } 192 193 sal_Bool SAL_CALL BreakIteratorImpl::isBeginWord( const OUString& Text, sal_Int32 nPos, 194 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException) 195 { 196 sal_Int32 len = Text.getLength(); 197 198 if (nPos < 0 || nPos >= len) return sal_False; 199 200 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, sal_True); 201 202 if (tmp != nPos) return sal_False; 203 204 result = getWordBoundary(Text, nPos, rLocale, rWordType, sal_True); 205 206 return result.startPos == nPos; 207 } 208 209 sal_Bool SAL_CALL BreakIteratorImpl::isEndWord( const OUString& Text, sal_Int32 nPos, 210 const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException) 211 { 212 sal_Int32 len = Text.getLength(); 213 214 if (nPos <= 0 || nPos > len) return sal_False; 215 216 sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, sal_False); 217 218 if (tmp != nPos) return sal_False; 219 220 result = getWordBoundary(Text, nPos, rLocale, rWordType, sal_False); 221 222 return result.endPos == nPos; 223 } 224 225 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfSentence( const OUString& Text, sal_Int32 nStartPos, 226 const Locale &rLocale ) throw(RuntimeException) 227 { 228 if (nStartPos < 0 || nStartPos > Text.getLength()) 229 return -1; 230 if (Text.getLength() == 0) return 0; 231 return LBI->beginOfSentence(Text, nStartPos, rLocale); 232 } 233 234 sal_Int32 SAL_CALL BreakIteratorImpl::endOfSentence( const OUString& Text, sal_Int32 nStartPos, 235 const Locale &rLocale ) throw(RuntimeException) 236 { 237 if (nStartPos < 0 || nStartPos > Text.getLength()) 238 return -1; 239 if (Text.getLength() == 0) return 0; 240 return LBI->endOfSentence(Text, nStartPos, rLocale); 241 } 242 243 LineBreakResults SAL_CALL BreakIteratorImpl::getLineBreak( const OUString& Text, sal_Int32 nStartPos, 244 const Locale& rLocale, sal_Int32 nMinBreakPos, const LineBreakHyphenationOptions& hOptions, 245 const LineBreakUserOptions& bOptions ) throw(RuntimeException) 246 { 247 return LBI->getLineBreak(Text, nStartPos, rLocale, nMinBreakPos, hOptions, bOptions); 248 } 249 250 sal_Int16 SAL_CALL BreakIteratorImpl::getScriptType( const OUString& Text, sal_Int32 nPos ) 251 throw(RuntimeException) 252 { 253 return (nPos < 0 || nPos >= Text.getLength()) ? ScriptType::WEAK : 254 getScriptClass(Text.iterateCodePoints(&nPos, 0)); 255 } 256 257 258 /** Increments/decrements position first, then obtains character. 259 @return current position, may be -1 or text length if string was consumed. 260 */ 261 static sal_Int32 SAL_CALL iterateCodePoints(const OUString& Text, sal_Int32 &nStartPos, sal_Int32 inc, sal_uInt32& ch) { 262 sal_Int32 nLen = Text.getLength(); 263 if (nStartPos + inc < 0 || nStartPos + inc >= nLen) { 264 ch = 0; 265 nStartPos = nStartPos + inc < 0 ? -1 : nLen; 266 } else { 267 ch = Text.iterateCodePoints(&nStartPos, inc); 268 // Fix for #i80436#. 269 // erAck: 2009-06-30T21:52+0200 This logic looks somewhat 270 // suspicious as if it cures a symptom.. anyway, had to add 271 // nStartPos < Text.getLength() to silence the (correct) assertion 272 // in rtl_uString_iterateCodePoints() if Text was one character 273 // (codepoint) only, made up of a surrogate pair. 274 //if (inc > 0 && nStartPos < Text.getLength()) 275 // ch = Text.iterateCodePoints(&nStartPos, 0); 276 // With surrogates, nStartPos may actually point behind string 277 // now, even if inc is only +1 278 if (inc > 0) 279 ch = (nStartPos < nLen ? Text.iterateCodePoints(&nStartPos, 0) : 0); 280 } 281 return nStartPos; 282 } 283 284 285 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfScript( const OUString& Text, 286 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException) 287 { 288 if (nStartPos < 0 || nStartPos >= Text.getLength()) 289 return -1; 290 291 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) 292 return -1; 293 294 if (nStartPos == 0) return 0; 295 sal_uInt32 ch=0; 296 while (iterateCodePoints(Text, nStartPos, -1, ch) >= 0 && ScriptType == getScriptClass(ch)) { 297 if (nStartPos == 0) return 0; 298 } 299 300 return iterateCodePoints(Text, nStartPos, 1, ch); 301 } 302 303 sal_Int32 SAL_CALL BreakIteratorImpl::endOfScript( const OUString& Text, 304 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException) 305 { 306 if (nStartPos < 0 || nStartPos >= Text.getLength()) 307 return -1; 308 309 if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) 310 return -1; 311 312 sal_Int32 strLen = Text.getLength(); 313 sal_uInt32 ch=0; 314 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen ) { 315 sal_Int16 currentCharScriptType = getScriptClass(ch); 316 if(ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK) 317 break; 318 } 319 return nStartPos; 320 } 321 322 sal_Int32 SAL_CALL BreakIteratorImpl::previousScript( const OUString& Text, 323 sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException) 324 { 325 if (nStartPos < 0) 326 return -1; 327 if (nStartPos > Text.getLength()) 328 nStartPos = Text.getLength(); 329 330 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2; 331 332 sal_uInt32 ch=0; 333 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) { 334 if ((((numberOfChange % 2) == 0) ^ (ScriptType != getScriptClass(ch)))) 335 numberOfChange--; 336 else if (nStartPos == 0) { 337 if (numberOfChange > 0) 338 numberOfChange--; 339 if (nStartPos > 0) 340 Text.iterateCodePoints(&nStartPos, -1); 341 else 342 return -1; 343 } 344 } 345 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1; 346 } 347 348 sal_Int32 SAL_CALL BreakIteratorImpl::nextScript( const OUString& Text, sal_Int32 nStartPos, 349 sal_Int16 ScriptType ) throw(RuntimeException) 350 351 { 352 if (nStartPos < 0) 353 nStartPos = 0; 354 sal_Int32 strLen = Text.getLength(); 355 if (nStartPos > strLen) 356 return -1; 357 358 sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1; 359 360 sal_uInt32 ch=0; 361 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) { 362 sal_Int16 currentCharScriptType = getScriptClass(ch); 363 if ((numberOfChange == 1) ? (ScriptType == currentCharScriptType) : 364 (ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK)) 365 numberOfChange--; 366 } 367 return numberOfChange == 0 ? nStartPos : -1; 368 } 369 370 sal_Int32 SAL_CALL BreakIteratorImpl::beginOfCharBlock( const OUString& Text, sal_Int32 nStartPos, 371 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException) 372 { 373 if (CharType == CharType::ANY_CHAR) return 0; 374 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1; 375 if (CharType != (sal_Int16)u_charType( Text.iterateCodePoints(&nStartPos, 0))) return -1; 376 377 sal_Int32 nPos=nStartPos; 378 while(nStartPos > 0 && CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nPos, -1))) { nStartPos=nPos; } 379 return nStartPos; // begin of char block is inclusive 380 } 381 382 sal_Int32 SAL_CALL BreakIteratorImpl::endOfCharBlock( const OUString& Text, sal_Int32 nStartPos, 383 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException) 384 { 385 sal_Int32 strLen = Text.getLength(); 386 387 if (CharType == CharType::ANY_CHAR) return strLen; // end of char block is exclusive 388 if (nStartPos < 0 || nStartPos >= strLen) return -1; 389 if (CharType != (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) return -1; 390 391 sal_uInt32 ch=0; 392 while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen && CharType == (sal_Int16)u_charType(ch)) {} 393 return nStartPos; // end of char block is exclusive 394 } 395 396 sal_Int32 SAL_CALL BreakIteratorImpl::nextCharBlock( const OUString& Text, sal_Int32 nStartPos, 397 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException) 398 { 399 if (CharType == CharType::ANY_CHAR) return -1; 400 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1; 401 402 sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1; 403 sal_Int32 strLen = Text.getLength(); 404 405 sal_uInt32 ch=0; 406 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) { 407 if ((CharType != (sal_Int16)u_charType(ch)) ^ (numberOfChange == 1)) 408 numberOfChange--; 409 } 410 return numberOfChange == 0 ? nStartPos : -1; 411 } 412 413 sal_Int32 SAL_CALL BreakIteratorImpl::previousCharBlock( const OUString& Text, sal_Int32 nStartPos, 414 const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException) 415 { 416 if(CharType == CharType::ANY_CHAR) return -1; 417 if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1; 418 419 sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2; 420 421 sal_uInt32 ch=0; 422 while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) { 423 if (((numberOfChange % 2) == 0) ^ (CharType != (sal_Int16)u_charType(ch))) 424 numberOfChange--; 425 if (nStartPos == 0 && numberOfChange > 0) { 426 numberOfChange--; 427 if (numberOfChange == 0) return nStartPos; 428 } 429 } 430 return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1; 431 } 432 433 434 435 sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/, 436 sal_Int32 /*nPos*/, const Locale& /*rLocale*/ ) throw(RuntimeException) 437 { 438 return 0; 439 } 440 441 typedef struct { 442 UBlockCode from; 443 UBlockCode to; 444 sal_Int16 script; 445 } UBlock2Script; 446 447 // for a list of the UBLOCK_... values see: 448 // http://icu-project.org/apiref/icu4c/uchar_8h.html 449 // where enum UBlockCode is defined. 450 // See also http://www.unicode.org/charts/ for general reference 451 static UBlock2Script scriptList[] = { 452 {UBLOCK_NO_BLOCK, UBLOCK_NO_BLOCK, ScriptType::WEAK}, 453 {UBLOCK_BASIC_LATIN, UBLOCK_ARMENIAN, ScriptType::LATIN}, 454 {UBLOCK_HEBREW, UBLOCK_MYANMAR, ScriptType::COMPLEX}, 455 {UBLOCK_GEORGIAN, UBLOCK_GEORGIAN, ScriptType::LATIN}, 456 {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO, ScriptType::ASIAN}, 457 {UBLOCK_ETHIOPIC, UBLOCK_ETHIOPIC, ScriptType::COMPLEX}, 458 {UBLOCK_CHEROKEE, UBLOCK_RUNIC, ScriptType::LATIN}, 459 {UBLOCK_KHMER, UBLOCK_MONGOLIAN, ScriptType::COMPLEX}, 460 {UBLOCK_LATIN_EXTENDED_ADDITIONAL, UBLOCK_GREEK_EXTENDED, ScriptType::LATIN}, 461 {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES, ScriptType::ASIAN}, 462 {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, ScriptType::ASIAN}, 463 {UBLOCK_ARABIC_PRESENTATION_FORMS_A, UBLOCK_ARABIC_PRESENTATION_FORMS_A, ScriptType::COMPLEX}, 464 {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS, ScriptType::ASIAN}, 465 {UBLOCK_ARABIC_PRESENTATION_FORMS_B, UBLOCK_ARABIC_PRESENTATION_FORMS_B, ScriptType::COMPLEX}, 466 {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, ScriptType::ASIAN}, 467 {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, ScriptType::ASIAN}, 468 {UBLOCK_CJK_STROKES, UBLOCK_CJK_STROKES, ScriptType::ASIAN}, 469 {UBLOCK_LATIN_EXTENDED_C, UBLOCK_LATIN_EXTENDED_D, ScriptType::LATIN} 470 }; 471 472 #define scriptListCount sizeof (scriptList) / sizeof (UBlock2Script) 473 474 sal_Int16 BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar) 475 { 476 static sal_uInt32 lastChar = 0; 477 static sal_Int16 nRet = 0; 478 479 if (currentChar != lastChar) { 480 lastChar = currentChar; 481 482 //JP 21.9.2001: handle specific characters - always as weak 483 // definition of 1 - this breaks a word 484 // 2 - this can be inside a word 485 // 0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char. 486 if( 1 == currentChar || 2 == currentChar || 0x20 == currentChar || 0xA0 == currentChar) 487 nRet = ScriptType::WEAK; 488 // workaround for Coptic 489 else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar) 490 nRet = ScriptType::LATIN; 491 // work-around for ligatures (see http://www.unicode.org/charts/PDF/UFB00.pdf) 492 else if ((0xFB00 <= currentChar && currentChar <= 0xFB06) || 493 (0xFB13 <= currentChar && currentChar <= 0xFB17)) 494 nRet = ScriptType::LATIN; 495 else { 496 UBlockCode block=ublock_getCode(currentChar); 497 sal_uInt16 i; 498 for ( i = 0; i < scriptListCount; i++) { 499 if (block <= scriptList[i].to) break; 500 } 501 nRet=(i < scriptListCount && block >= scriptList[i].from) ? scriptList[i].script : ScriptType::WEAK; 502 } 503 } 504 return nRet; 505 } 506 507 static inline sal_Bool operator == (const Locale& l1, const Locale& l2) { 508 return l1.Language == l2.Language && l1.Country == l2.Country && l1.Variant == l2.Variant; 509 } 510 511 sal_Bool SAL_CALL BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString& aLocaleName) throw( RuntimeException ) 512 { 513 // to share service between same Language but different Country code, like zh_CN and zh_TW 514 for (size_t l = 0; l < lookupTable.size(); l++) { 515 lookupTableItem *listItem = lookupTable[l]; 516 if (aLocaleName == listItem->aLocale.Language) { 517 xBI = listItem->xBI; 518 return sal_True; 519 } 520 } 521 522 Reference < uno::XInterface > xI = xMSF->createInstance( 523 OUString::createFromAscii("com.sun.star.i18n.BreakIterator_") + aLocaleName); 524 525 if ( xI.is() ) { 526 xI->queryInterface( getCppuType((const Reference< XBreakIterator>*)0) ) >>= xBI; 527 if (xBI.is()) { 528 lookupTable.push_back(new lookupTableItem(Locale(aLocaleName, aLocaleName, aLocaleName), xBI)); 529 return sal_True; 530 } 531 } 532 return sal_False; 533 } 534 535 Reference < XBreakIterator > SAL_CALL 536 BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale& rLocale) throw (RuntimeException) 537 { 538 if (xBI.is() && rLocale == aLocale) 539 return xBI; 540 else if (xMSF.is()) { 541 aLocale = rLocale; 542 543 for (size_t i = 0; i < lookupTable.size(); i++) { 544 lookupTableItem *listItem = lookupTable[i]; 545 if (rLocale == listItem->aLocale) 546 return xBI = listItem->xBI; 547 } 548 549 sal_Unicode under = (sal_Unicode)'_'; 550 551 sal_Int32 l = rLocale.Language.getLength(); 552 sal_Int32 c = rLocale.Country.getLength(); 553 sal_Int32 v = rLocale.Variant.getLength(); 554 OUStringBuffer aBuf(l+c+v+3); 555 556 if ((l > 0 && c > 0 && v > 0 && 557 // load service with name <base>_<lang>_<country>_<varian> 558 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append( 559 rLocale.Country).append(under).append(rLocale.Variant).makeStringAndClear())) || 560 (l > 0 && c > 0 && 561 // load service with name <base>_<lang>_<country> 562 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append( 563 rLocale.Country).makeStringAndClear())) || 564 (l > 0 && c > 0 && rLocale.Language.compareToAscii("zh") == 0 && 565 (rLocale.Country.compareToAscii("HK") == 0 || 566 rLocale.Country.compareToAscii("MO") == 0) && 567 // if the country code is HK or MO, one more step to try TW. 568 createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).appendAscii( 569 "TW").makeStringAndClear())) || 570 (l > 0 && 571 // load service with name <base>_<lang> 572 createLocaleSpecificBreakIterator(rLocale.Language)) || 573 // load default service with name <base>_Unicode 574 createLocaleSpecificBreakIterator(OUString::createFromAscii("Unicode"))) { 575 lookupTable.push_back( new lookupTableItem(aLocale, xBI) ); 576 return xBI; 577 } 578 } 579 throw RuntimeException(); 580 } 581 582 const sal_Char cBreakIterator[] = "com.sun.star.i18n.BreakIterator"; 583 584 OUString SAL_CALL 585 BreakIteratorImpl::getImplementationName(void) throw( RuntimeException ) 586 { 587 return OUString::createFromAscii(cBreakIterator); 588 } 589 590 sal_Bool SAL_CALL 591 BreakIteratorImpl::supportsService(const OUString& rServiceName) throw( RuntimeException ) 592 { 593 return !rServiceName.compareToAscii(cBreakIterator); 594 } 595 596 Sequence< OUString > SAL_CALL 597 BreakIteratorImpl::getSupportedServiceNames(void) throw( RuntimeException ) 598 { 599 Sequence< OUString > aRet(1); 600 aRet[0] = OUString::createFromAscii(cBreakIterator); 601 return aRet; 602 } 603 604 } } } } 605 606