1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 #if defined(_MSC_VER) && (_MSC_VER >= 1400) 24 #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance 25 #endif 26 27 #include <rtl/memory.h> 28 #include <osl/diagnose.h> 29 #include <osl/interlck.h> 30 #include <rtl/alloc.h> 31 #include <osl/mutex.h> 32 #include <osl/doublecheckedlocking.h> 33 #include <rtl/tencinfo.h> 34 35 #include <string.h> 36 #include <sal/alloca.h> 37 38 #include "hash.h" 39 #include "strimp.h" 40 #include "surrogates.h" 41 #include <rtl/ustring.h> 42 43 #include "rtl/math.h" 44 #include "rtl/tencinfo.h" 45 46 /* ======================================================================= */ 47 48 /* static data to be referenced by all empty strings 49 * the refCount is predefined to 1 and must never become 0 ! 50 */ 51 static rtl_uString const aImplEmpty_rtl_uString = 52 { 53 (sal_Int32) (SAL_STRING_INTERN_FLAG|SAL_STRING_STATIC_FLAG|1), /*sal_Int32 refCount; */ 54 0, /*sal_Int32 length; */ 55 { 0 } /*sal_Unicode buffer[1];*/ 56 }; 57 58 /* ======================================================================= */ 59 60 #define IMPL_RTL_STRCODE sal_Unicode 61 #define IMPL_RTL_USTRCODE( c ) (c) 62 #define IMPL_RTL_STRNAME( n ) rtl_ustr_ ## n 63 64 #define IMPL_RTL_STRINGNAME( n ) rtl_uString_ ## n 65 #define IMPL_RTL_STRINGDATA rtl_uString 66 #define IMPL_RTL_EMPTYSTRING aImplEmpty_rtl_uString 67 #define IMPL_RTL_INTERN 68 static void internRelease (rtl_uString *pThis); 69 70 /* ======================================================================= */ 71 72 /* Include String/UString template code */ 73 74 #include "strtmpl.c" 75 76 sal_Int32 rtl_ustr_indexOfAscii_WithLength( 77 sal_Unicode const * str, sal_Int32 len, 78 char const * subStr, sal_Int32 subLen) 79 { 80 if (subLen > 0 && subLen <= len) { 81 sal_Int32 i; 82 for (i = 0; i <= len - subLen; ++i) { 83 if (rtl_ustr_asciil_reverseEquals_WithLength( 84 str + i, subStr, subLen)) 85 { 86 return i; 87 } 88 } 89 } 90 return -1; 91 } 92 93 sal_Int32 rtl_ustr_lastIndexOfAscii_WithLength( 94 sal_Unicode const * str, sal_Int32 len, 95 char const * subStr, sal_Int32 subLen) 96 { 97 if (subLen > 0 && subLen <= len) { 98 sal_Int32 i; 99 for (i = len - subLen; i >= 0; --i) { 100 if (rtl_ustr_asciil_reverseEquals_WithLength( 101 str + i, subStr, subLen)) 102 { 103 return i; 104 } 105 } 106 } 107 return -1; 108 } 109 110 sal_Int32 SAL_CALL rtl_ustr_valueOfFloat(sal_Unicode * pStr, float f) 111 { 112 rtl_uString * pResult = NULL; 113 sal_Int32 nLen; 114 rtl_math_doubleToUString( 115 &pResult, 0, 0, f, rtl_math_StringFormat_G, 116 RTL_USTR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 117 0, sal_True); 118 nLen = pResult->length; 119 OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFFLOAT); 120 rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode)); 121 rtl_uString_release(pResult); 122 return nLen; 123 } 124 125 sal_Int32 SAL_CALL rtl_ustr_valueOfDouble(sal_Unicode * pStr, double d) 126 { 127 rtl_uString * pResult = NULL; 128 sal_Int32 nLen; 129 rtl_math_doubleToUString( 130 &pResult, 0, 0, d, rtl_math_StringFormat_G, 131 RTL_USTR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 132 0, sal_True); 133 nLen = pResult->length; 134 OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFDOUBLE); 135 rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode)); 136 rtl_uString_release(pResult); 137 return nLen; 138 } 139 140 float SAL_CALL rtl_ustr_toFloat(sal_Unicode const * pStr) 141 { 142 return (float) rtl_math_uStringToDouble(pStr, 143 pStr + rtl_ustr_getLength(pStr), 144 '.', 0, 0, 0); 145 } 146 147 double SAL_CALL rtl_ustr_toDouble(sal_Unicode const * pStr) 148 { 149 return rtl_math_uStringToDouble(pStr, pStr + rtl_ustr_getLength(pStr), '.', 150 0, 0, 0); 151 } 152 153 /* ======================================================================= */ 154 155 sal_Int32 SAL_CALL rtl_ustr_ascii_compare( const sal_Unicode* pStr1, 156 const sal_Char* pStr2 ) 157 { 158 sal_Int32 nRet; 159 while ( ((nRet = ((sal_Int32)(*pStr1))- 160 ((sal_Int32)((unsigned char)(*pStr2)))) == 0) && 161 *pStr2 ) 162 { 163 pStr1++; 164 pStr2++; 165 } 166 167 return nRet; 168 } 169 170 /* ----------------------------------------------------------------------- */ 171 172 sal_Int32 SAL_CALL rtl_ustr_ascii_compare_WithLength( const sal_Unicode* pStr1, 173 sal_Int32 nStr1Len, 174 const sal_Char* pStr2 ) 175 { 176 sal_Int32 nRet = 0; 177 while( ((nRet = (nStr1Len ? (sal_Int32)(*pStr1) : 0)- 178 ((sal_Int32)((unsigned char)(*pStr2)))) == 0) && 179 nStr1Len && *pStr2 ) 180 { 181 pStr1++; 182 pStr2++; 183 nStr1Len--; 184 } 185 186 return nRet; 187 } 188 189 /* ----------------------------------------------------------------------- */ 190 191 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompare_WithLength( const sal_Unicode* pStr1, 192 sal_Int32 nStr1Len, 193 const sal_Char* pStr2, 194 sal_Int32 nShortenedLength ) 195 { 196 const sal_Unicode* pStr1End = pStr1 + nStr1Len; 197 sal_Int32 nRet; 198 while ( (nShortenedLength > 0) && 199 (pStr1 < pStr1End) && *pStr2 ) 200 { 201 /* Check ASCII range */ 202 OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127"); 203 204 nRet = ((sal_Int32)*pStr1)- 205 ((sal_Int32)(unsigned char)*pStr2); 206 if ( nRet != 0 ) 207 return nRet; 208 209 nShortenedLength--; 210 pStr1++; 211 pStr2++; 212 } 213 214 if ( nShortenedLength <= 0 ) 215 return 0; 216 217 if ( *pStr2 ) 218 { 219 OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" ); 220 // first is a substring of the second string => less (negative value) 221 nRet = -1; 222 } 223 else 224 { 225 // greater or equal 226 nRet = pStr1End - pStr1; 227 } 228 229 return nRet; 230 } 231 232 /* ----------------------------------------------------------------------- */ 233 234 sal_Int32 SAL_CALL rtl_ustr_asciil_reverseCompare_WithLength( const sal_Unicode* pStr1, 235 sal_Int32 nStr1Len, 236 const sal_Char* pStr2, 237 sal_Int32 nStr2Len ) 238 { 239 const sal_Unicode* pStr1Run = pStr1+nStr1Len; 240 const sal_Char* pStr2Run = pStr2+nStr2Len; 241 sal_Int32 nRet; 242 while ( (pStr1 < pStr1Run) && (pStr2 < pStr2Run) ) 243 { 244 pStr1Run--; 245 pStr2Run--; 246 nRet = ((sal_Int32)*pStr1Run)-((sal_Int32)*pStr2Run); 247 if ( nRet ) 248 return nRet; 249 } 250 251 return nStr1Len - nStr2Len; 252 } 253 254 /* ----------------------------------------------------------------------- */ 255 256 sal_Bool SAL_CALL rtl_ustr_asciil_reverseEquals_WithLength( const sal_Unicode* pStr1, 257 const sal_Char* pStr2, 258 sal_Int32 nStrLen ) 259 { 260 const sal_Unicode* pStr1Run = pStr1+nStrLen; 261 const sal_Char* pStr2Run = pStr2+nStrLen; 262 while ( pStr1 < pStr1Run ) 263 { 264 pStr1Run--; 265 pStr2Run--; 266 if( *pStr1Run != (sal_Unicode)*pStr2Run ) 267 return sal_False; 268 } 269 270 return sal_True; 271 } 272 273 /* ----------------------------------------------------------------------- */ 274 275 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase( const sal_Unicode* pStr1, 276 const sal_Char* pStr2 ) 277 { 278 sal_Int32 nRet; 279 sal_Int32 c1; 280 sal_Int32 c2; 281 do 282 { 283 /* If character between 'A' and 'Z', than convert it to lowercase */ 284 c1 = (sal_Int32)*pStr1; 285 c2 = (sal_Int32)((unsigned char)*pStr2); 286 if ( (c1 >= 65) && (c1 <= 90) ) 287 c1 += 32; 288 if ( (c2 >= 65) && (c2 <= 90) ) 289 c2 += 32; 290 nRet = c1-c2; 291 if ( nRet != 0 ) 292 return nRet; 293 294 pStr1++; 295 pStr2++; 296 } 297 while ( c2 ); 298 299 return 0; 300 } 301 302 /* ----------------------------------------------------------------------- */ 303 304 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1, 305 sal_Int32 nStr1Len, 306 const sal_Char* pStr2 ) 307 { 308 sal_Int32 nRet; 309 sal_Int32 c1; 310 sal_Int32 c2; 311 do 312 { 313 if ( !nStr1Len ) 314 return *pStr2 == '\0' ? 0 : -1; 315 316 /* If character between 'A' and 'Z', than convert it to lowercase */ 317 c1 = (sal_Int32)*pStr1; 318 c2 = (sal_Int32)((unsigned char)*pStr2); 319 if ( (c1 >= 65) && (c1 <= 90) ) 320 c1 += 32; 321 if ( (c2 >= 65) && (c2 <= 90) ) 322 c2 += 32; 323 nRet = c1-c2; 324 if ( nRet != 0 ) 325 return nRet; 326 327 pStr1++; 328 pStr2++; 329 nStr1Len--; 330 } 331 while( c2 ); 332 333 return 0; 334 } 335 336 sal_Int32 rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths( 337 sal_Unicode const * first, sal_Int32 firstLen, 338 char const * second, sal_Int32 secondLen) 339 { 340 sal_Int32 i; 341 sal_Int32 len = firstLen < secondLen ? firstLen : secondLen; 342 for (i = 0; i < len; ++i) { 343 sal_Int32 c1 = *first++; 344 sal_Int32 c2 = (unsigned char) *second++; 345 sal_Int32 d; 346 if (c1 >= 65 && c1 <= 90) { 347 c1 += 32; 348 } 349 if (c2 >= 65 && c2 <= 90) { 350 c2 += 32; 351 } 352 d = c1 - c2; 353 if (d != 0) { 354 return d; 355 } 356 } 357 return firstLen - secondLen; 358 } 359 360 /* ----------------------------------------------------------------------- */ 361 362 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1, 363 sal_Int32 nStr1Len, 364 const sal_Char* pStr2, 365 sal_Int32 nShortenedLength ) 366 { 367 const sal_Unicode* pStr1End = pStr1 + nStr1Len; 368 sal_Int32 nRet; 369 sal_Int32 c1; 370 sal_Int32 c2; 371 while ( (nShortenedLength > 0) && 372 (pStr1 < pStr1End) && *pStr2 ) 373 { 374 /* Check ASCII range */ 375 OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127"); 376 377 /* If character between 'A' and 'Z', than convert it to lowercase */ 378 c1 = (sal_Int32)*pStr1; 379 c2 = (sal_Int32)((unsigned char)*pStr2); 380 if ( (c1 >= 65) && (c1 <= 90) ) 381 c1 += 32; 382 if ( (c2 >= 65) && (c2 <= 90) ) 383 c2 += 32; 384 nRet = c1-c2; 385 if ( nRet != 0 ) 386 return nRet; 387 388 nShortenedLength--; 389 pStr1++; 390 pStr2++; 391 } 392 393 if ( nShortenedLength <= 0 ) 394 return 0; 395 396 if ( *pStr2 ) 397 { 398 OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" ); 399 // first is a substring of the second string => less (negative value) 400 nRet = -1; 401 } 402 else 403 { 404 // greater or equal 405 nRet = pStr1End - pStr1; 406 } 407 408 return nRet; 409 } 410 411 /* ----------------------------------------------------------------------- */ 412 413 void SAL_CALL rtl_uString_newFromAscii( rtl_uString** ppThis, 414 const sal_Char* pCharStr ) 415 { 416 sal_Int32 nLen; 417 418 if ( pCharStr ) 419 { 420 const sal_Char* pTempStr = pCharStr; 421 while( *pTempStr ) 422 pTempStr++; 423 nLen = pTempStr-pCharStr; 424 } 425 else 426 nLen = 0; 427 428 if ( !nLen ) 429 { 430 IMPL_RTL_STRINGNAME( new )( ppThis ); 431 return; 432 } 433 434 if ( *ppThis ) 435 IMPL_RTL_STRINGNAME( release )( *ppThis ); 436 437 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen ); 438 OSL_ASSERT(*ppThis != NULL); 439 if ( (*ppThis) ) 440 { 441 IMPL_RTL_STRCODE* pBuffer = (*ppThis)->buffer; 442 do 443 { 444 /* Check ASCII range */ 445 OSL_ENSURE( ((unsigned char)*pCharStr) <= 127, 446 "rtl_uString_newFromAscii() - Found ASCII char > 127" ); 447 448 *pBuffer = *pCharStr; 449 pBuffer++; 450 pCharStr++; 451 } 452 while ( *pCharStr ); 453 } 454 } 455 456 void SAL_CALL rtl_uString_newFromCodePoints( 457 rtl_uString ** newString, sal_uInt32 const * codePoints, 458 sal_Int32 codePointCount) 459 { 460 sal_Int32 n; 461 sal_Int32 i; 462 sal_Unicode * p; 463 OSL_ASSERT( 464 newString != NULL && 465 (codePoints != NULL || codePointCount == 0) && 466 codePointCount >= 0); 467 if (codePointCount == 0) { 468 rtl_uString_new(newString); 469 return; 470 } 471 if (*newString != NULL) { 472 rtl_uString_release(*newString); 473 } 474 n = codePointCount; 475 for (i = 0; i < codePointCount; ++i) { 476 OSL_ASSERT(codePoints[i] <= 0x10FFFF); 477 if (codePoints[i] >= 0x10000) { 478 ++n; 479 } 480 } 481 /* Builds on the assumption that sal_Int32 uses 32 bit two's complement 482 representation with wrap around (the necessary number of UTF-16 code 483 units will be no larger than 2 * SAL_MAX_INT32, represented as 484 sal_Int32 -2): */ 485 if (n < 0) { 486 *newString = NULL; 487 return; 488 } 489 *newString = rtl_uString_ImplAlloc(n); 490 if (*newString == NULL) { 491 return; 492 } 493 p = (*newString)->buffer; 494 for (i = 0; i < codePointCount; ++i) { 495 sal_uInt32 c = codePoints[i]; 496 if (c < 0x10000) { 497 *p++ = (sal_Unicode) c; 498 } else { 499 c -= 0x10000; 500 *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE); 501 *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE); 502 } 503 } 504 } 505 506 /* ======================================================================= */ 507 508 static int rtl_ImplGetFastUTF8UnicodeLen( const sal_Char* pStr, sal_Int32 nLen ) 509 { 510 int n; 511 sal_uChar c; 512 const sal_Char* pEndStr; 513 514 n = 0; 515 pEndStr = pStr+nLen; 516 while ( pStr < pEndStr ) 517 { 518 c = (sal_uChar)*pStr; 519 520 if ( !(c & 0x80) ) 521 pStr++; 522 else if ( (c & 0xE0) == 0xC0 ) 523 pStr += 2; 524 else if ( (c & 0xF0) == 0xE0 ) 525 pStr += 3; 526 else if ( (c & 0xF8) == 0xF0 ) 527 pStr += 4; 528 else if ( (c & 0xFC) == 0xF8 ) 529 pStr += 5; 530 else if ( (c & 0xFE) == 0xFC ) 531 pStr += 6; 532 else 533 pStr++; 534 535 n++; 536 } 537 538 return n; 539 } 540 541 /* ----------------------------------------------------------------------- */ 542 543 static void rtl_string2UString_status( rtl_uString** ppThis, 544 const sal_Char* pStr, 545 sal_Int32 nLen, 546 rtl_TextEncoding eTextEncoding, 547 sal_uInt32 nCvtFlags, 548 sal_uInt32 *pInfo ) 549 { 550 OSL_ENSURE(rtl_isOctetTextEncoding(eTextEncoding), 551 "rtl_string2UString_status() - Wrong TextEncoding" ); 552 553 if ( !nLen ) 554 { 555 rtl_uString_new( ppThis ); 556 if (pInfo != NULL) { 557 *pInfo = 0; 558 } 559 } 560 else 561 { 562 if ( *ppThis ) 563 IMPL_RTL_STRINGNAME( release )( *ppThis ); 564 565 /* Optimization for US-ASCII */ 566 if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US ) 567 { 568 IMPL_RTL_STRCODE* pBuffer; 569 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen ); 570 if (*ppThis == NULL) { 571 if (pInfo != NULL) { 572 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | 573 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 574 } 575 return; 576 } 577 pBuffer = (*ppThis)->buffer; 578 do 579 { 580 /* Check ASCII range */ 581 OSL_ENSURE( ((unsigned char)*pStr) <= 127, 582 "rtl_string2UString_status() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" ); 583 584 *pBuffer = *pStr; 585 pBuffer++; 586 pStr++; 587 nLen--; 588 } 589 while ( nLen ); 590 if (pInfo != NULL) { 591 *pInfo = 0; 592 } 593 } 594 else 595 { 596 rtl_uString* pTemp; 597 rtl_uString* pTemp2 = NULL; 598 rtl_TextToUnicodeConverter hConverter; 599 sal_uInt32 nInfo; 600 sal_Size nSrcBytes; 601 sal_Size nDestChars; 602 sal_Size nNewLen; 603 604 /* Optimization for UTF-8 - we try to calculate the exact length */ 605 /* For all other encoding we try the maximum - and reallocate 606 the buffer if needed */ 607 if ( eTextEncoding == RTL_TEXTENCODING_UTF8 ) 608 { 609 nNewLen = rtl_ImplGetFastUTF8UnicodeLen( pStr, nLen ); 610 /* Includes the string only ASCII, then we could copy 611 the buffer faster */ 612 if ( nNewLen == (sal_Size)nLen ) 613 { 614 IMPL_RTL_STRCODE* pBuffer; 615 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen ); 616 if (*ppThis == NULL) 617 { 618 if (pInfo != NULL) { 619 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | 620 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 621 } 622 return; 623 } 624 pBuffer = (*ppThis)->buffer; 625 do 626 { 627 /* Check ASCII range */ 628 OSL_ENSURE( ((unsigned char)*pStr) <= 127, 629 "rtl_string2UString_status() - UTF8 test encoding is wrong" ); 630 631 *pBuffer = *pStr; 632 pBuffer++; 633 pStr++; 634 nLen--; 635 } 636 while ( nLen ); 637 if (pInfo != NULL) { 638 *pInfo = 0; 639 } 640 return; 641 } 642 } 643 else 644 nNewLen = nLen; 645 646 nCvtFlags |= RTL_TEXTTOUNICODE_FLAGS_FLUSH; 647 hConverter = rtl_createTextToUnicodeConverter( eTextEncoding ); 648 649 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen ); 650 if (pTemp == NULL) { 651 if (pInfo != NULL) { 652 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | 653 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 654 } 655 return; 656 } 657 nDestChars = rtl_convertTextToUnicode( hConverter, 0, 658 pStr, nLen, 659 pTemp->buffer, nNewLen, 660 nCvtFlags, 661 &nInfo, &nSrcBytes ); 662 663 /* Buffer not big enough, try again with enough space */ 664 /* Shouldn't be the case, but if we get textencoding which 665 could results in more unicode characters we have this 666 code here. Could be the case for apple encodings */ 667 while ( nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL ) 668 { 669 rtl_freeMemory( pTemp ); 670 nNewLen += 8; 671 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen ); 672 if (pTemp == NULL) { 673 if (pInfo != NULL) { 674 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | 675 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 676 } 677 return; 678 } 679 nDestChars = rtl_convertTextToUnicode( hConverter, 0, 680 pStr, nLen, 681 pTemp->buffer, nNewLen, 682 nCvtFlags, 683 &nInfo, &nSrcBytes ); 684 } 685 686 if (pInfo) 687 *pInfo = nInfo; 688 689 /* Set the buffer to the correct size or if there is too 690 much overhead, reallocate to the correct size */ 691 if ( nNewLen > nDestChars+8 ) 692 { 693 pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestChars ); 694 } 695 if (pTemp2 != NULL) 696 { 697 rtl_str_ImplCopy(pTemp2->buffer, pTemp->buffer, nDestChars); 698 rtl_freeMemory(pTemp); 699 pTemp = pTemp2; 700 } 701 else 702 { 703 pTemp->length = nDestChars; 704 pTemp->buffer[nDestChars] = 0; 705 } 706 707 rtl_destroyTextToUnicodeConverter( hConverter ); 708 *ppThis = pTemp; 709 710 /* Results the conversion in an empty buffer - 711 create an empty string */ 712 if ( pTemp && !nDestChars ) 713 rtl_uString_new( ppThis ); 714 } 715 } 716 } 717 718 void SAL_CALL rtl_string2UString( rtl_uString** ppThis, 719 const sal_Char* pStr, 720 sal_Int32 nLen, 721 rtl_TextEncoding eTextEncoding, 722 sal_uInt32 nCvtFlags ) 723 { 724 rtl_string2UString_status( ppThis, pStr, nLen, eTextEncoding, 725 nCvtFlags, NULL ); 726 } 727 728 /* ----------------------------------------------------------------------- */ 729 730 typedef enum { 731 CANNOT_RETURN, 732 CAN_RETURN = 1 733 } StrLifecycle; 734 735 static oslMutex 736 getInternMutex() 737 { 738 static oslMutex pPoolGuard = NULL; 739 if( !pPoolGuard ) 740 { 741 oslMutex pGlobalGuard; 742 pGlobalGuard = *osl_getGlobalMutex(); 743 osl_acquireMutex( pGlobalGuard ); 744 if( !pPoolGuard ) 745 { 746 oslMutex p = osl_createMutex(); 747 OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); 748 pPoolGuard = p; 749 } 750 osl_releaseMutex( pGlobalGuard ); 751 } 752 else 753 { 754 OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); 755 } 756 757 return pPoolGuard; 758 } 759 760 /* returns true if we found a dup in the pool */ 761 static void rtl_ustring_intern_internal( rtl_uString ** newStr, 762 rtl_uString * str, 763 StrLifecycle can_return ) 764 { 765 oslMutex pPoolMutex; 766 767 pPoolMutex = getInternMutex(); 768 769 osl_acquireMutex( pPoolMutex ); 770 771 *newStr = rtl_str_hash_intern (str, can_return); 772 773 osl_releaseMutex( pPoolMutex ); 774 775 if( can_return && *newStr != str ) 776 { /* we dupped, then found a match */ 777 rtl_freeMemory( str ); 778 } 779 } 780 781 void SAL_CALL rtl_uString_intern( rtl_uString ** newStr, 782 rtl_uString * str) 783 { 784 if (SAL_STRING_IS_INTERN(str)) 785 { 786 IMPL_RTL_AQUIRE( str ); 787 *newStr = str; 788 } 789 else 790 { 791 rtl_uString *pOrg = *newStr; 792 *newStr = NULL; 793 rtl_ustring_intern_internal( newStr, str, CANNOT_RETURN ); 794 if (pOrg) 795 rtl_uString_release (pOrg); 796 } 797 } 798 799 void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr, 800 const sal_Char * str, 801 sal_Int32 len, 802 rtl_TextEncoding eTextEncoding, 803 sal_uInt32 convertFlags, 804 sal_uInt32 * pInfo ) 805 { 806 rtl_uString *scratch; 807 808 if (*newStr) 809 { 810 rtl_uString_release (*newStr); 811 *newStr = NULL; 812 } 813 814 if ( len < 256 ) 815 { // try various optimisations 816 if ( len < 0 ) 817 len = strlen( str ); 818 if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US ) 819 { 820 int i; 821 rtl_uString *pScratch; 822 pScratch = alloca( sizeof( rtl_uString ) 823 + len * sizeof (IMPL_RTL_STRCODE ) ); 824 for (i = 0; i < len; i++) 825 { 826 /* Check ASCII range */ 827 OSL_ENSURE( ((unsigned char)str[i]) <= 127, 828 "rtl_ustring_internConvert() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" ); 829 pScratch->buffer[i] = str[i]; 830 } 831 pScratch->length = len; 832 rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN ); 833 return; 834 } 835 /* FIXME: we want a nice UTF-8 / alloca shortcut here */ 836 } 837 838 scratch = NULL; 839 rtl_string2UString_status( &scratch, str, len, eTextEncoding, convertFlags, 840 pInfo ); 841 if (!scratch) { 842 return; 843 } 844 rtl_ustring_intern_internal( newStr, scratch, CAN_RETURN ); 845 } 846 847 static void 848 internRelease (rtl_uString *pThis) 849 { 850 oslMutex pPoolMutex; 851 852 rtl_uString *pFree = NULL; 853 if ( SAL_STRING_REFCOUNT( 854 osl_decrementInterlockedCount( &(pThis->refCount) ) ) == 0) 855 { 856 pPoolMutex = getInternMutex(); 857 osl_acquireMutex( pPoolMutex ); 858 859 rtl_str_hash_remove (pThis); 860 861 /* May have been separately acquired */ 862 if ( SAL_STRING_REFCOUNT( 863 osl_incrementInterlockedCount( &(pThis->refCount) ) ) == 1 ) 864 { 865 /* we got the last ref */ 866 pFree = pThis; 867 } 868 else /* very unusual */ 869 { 870 internRelease (pThis); 871 } 872 873 osl_releaseMutex( pPoolMutex ); 874 } 875 if (pFree) 876 rtl_freeMemory (pFree); 877 } 878 879 sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints( 880 rtl_uString const * string, sal_Int32 * indexUtf16, 881 sal_Int32 incrementCodePoints) 882 { 883 sal_Int32 n; 884 sal_Unicode cu; 885 sal_uInt32 cp; 886 OSL_ASSERT(string != NULL && indexUtf16 != NULL); 887 n = *indexUtf16; 888 OSL_ASSERT(n >= 0 && n <= string->length); 889 while (incrementCodePoints < 0) { 890 OSL_ASSERT(n > 0); 891 cu = string->buffer[--n]; 892 if (SAL_RTL_IS_LOW_SURROGATE(cu) && n != 0 && 893 SAL_RTL_IS_HIGH_SURROGATE(string->buffer[n - 1])) 894 { 895 --n; 896 } 897 ++incrementCodePoints; 898 } 899 OSL_ASSERT(n >= 0 && n < string->length); 900 cu = string->buffer[n]; 901 if (SAL_RTL_IS_HIGH_SURROGATE(cu) && string->length - n >= 2 && 902 SAL_RTL_IS_LOW_SURROGATE(string->buffer[n + 1])) 903 { 904 cp = SAL_RTL_COMBINE_SURROGATES(cu, string->buffer[n + 1]); 905 } else { 906 cp = cu; 907 } 908 while (incrementCodePoints > 0) { 909 OSL_ASSERT(n < string->length); 910 cu = string->buffer[n++]; 911 if (SAL_RTL_IS_HIGH_SURROGATE(cu) && n != string->length && 912 SAL_RTL_IS_LOW_SURROGATE(string->buffer[n])) 913 { 914 ++n; 915 } 916 --incrementCodePoints; 917 } 918 OSL_ASSERT(n >= 0 && n <= string->length); 919 *indexUtf16 = n; 920 return cp; 921 } 922 923 sal_Bool rtl_convertStringToUString( 924 rtl_uString ** target, char const * source, sal_Int32 length, 925 rtl_TextEncoding encoding, sal_uInt32 flags) SAL_THROW_EXTERN_C() 926 { 927 sal_uInt32 info; 928 rtl_string2UString_status(target, source, length, encoding, flags, &info); 929 return (sal_Bool) ((info & RTL_TEXTTOUNICODE_INFO_ERROR) == 0); 930 } 931