1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir #if defined(_MSC_VER) && (_MSC_VER >= 1400) 28*cdf0e10cSrcweir #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance 29*cdf0e10cSrcweir #endif 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #include <rtl/memory.h> 32*cdf0e10cSrcweir #include <osl/diagnose.h> 33*cdf0e10cSrcweir #include <osl/interlck.h> 34*cdf0e10cSrcweir #include <rtl/alloc.h> 35*cdf0e10cSrcweir #include <osl/mutex.h> 36*cdf0e10cSrcweir #include <osl/doublecheckedlocking.h> 37*cdf0e10cSrcweir #include <rtl/tencinfo.h> 38*cdf0e10cSrcweir 39*cdf0e10cSrcweir #include <string.h> 40*cdf0e10cSrcweir #include <sal/alloca.h> 41*cdf0e10cSrcweir 42*cdf0e10cSrcweir #include "hash.h" 43*cdf0e10cSrcweir #include "strimp.h" 44*cdf0e10cSrcweir #include "surrogates.h" 45*cdf0e10cSrcweir #include <rtl/ustring.h> 46*cdf0e10cSrcweir 47*cdf0e10cSrcweir #include "rtl/math.h" 48*cdf0e10cSrcweir #include "rtl/tencinfo.h" 49*cdf0e10cSrcweir 50*cdf0e10cSrcweir /* ======================================================================= */ 51*cdf0e10cSrcweir 52*cdf0e10cSrcweir /* static data to be referenced by all empty strings 53*cdf0e10cSrcweir * the refCount is predefined to 1 and must never become 0 ! 54*cdf0e10cSrcweir */ 55*cdf0e10cSrcweir static rtl_uString const aImplEmpty_rtl_uString = 56*cdf0e10cSrcweir { 57*cdf0e10cSrcweir (sal_Int32) (SAL_STRING_INTERN_FLAG|SAL_STRING_STATIC_FLAG|1), /*sal_Int32 refCount; */ 58*cdf0e10cSrcweir 0, /*sal_Int32 length; */ 59*cdf0e10cSrcweir { 0 } /*sal_Unicode buffer[1];*/ 60*cdf0e10cSrcweir }; 61*cdf0e10cSrcweir 62*cdf0e10cSrcweir /* ======================================================================= */ 63*cdf0e10cSrcweir 64*cdf0e10cSrcweir #define IMPL_RTL_STRCODE sal_Unicode 65*cdf0e10cSrcweir #define IMPL_RTL_USTRCODE( c ) (c) 66*cdf0e10cSrcweir #define IMPL_RTL_STRNAME( n ) rtl_ustr_ ## n 67*cdf0e10cSrcweir 68*cdf0e10cSrcweir #define IMPL_RTL_STRINGNAME( n ) rtl_uString_ ## n 69*cdf0e10cSrcweir #define IMPL_RTL_STRINGDATA rtl_uString 70*cdf0e10cSrcweir #define IMPL_RTL_EMPTYSTRING aImplEmpty_rtl_uString 71*cdf0e10cSrcweir #define IMPL_RTL_INTERN 72*cdf0e10cSrcweir static void internRelease (rtl_uString *pThis); 73*cdf0e10cSrcweir 74*cdf0e10cSrcweir /* ======================================================================= */ 75*cdf0e10cSrcweir 76*cdf0e10cSrcweir /* Include String/UString template code */ 77*cdf0e10cSrcweir 78*cdf0e10cSrcweir #include "strtmpl.c" 79*cdf0e10cSrcweir 80*cdf0e10cSrcweir sal_Int32 rtl_ustr_indexOfAscii_WithLength( 81*cdf0e10cSrcweir sal_Unicode const * str, sal_Int32 len, 82*cdf0e10cSrcweir char const * subStr, sal_Int32 subLen) 83*cdf0e10cSrcweir { 84*cdf0e10cSrcweir if (subLen > 0 && subLen <= len) { 85*cdf0e10cSrcweir sal_Int32 i; 86*cdf0e10cSrcweir for (i = 0; i <= len - subLen; ++i) { 87*cdf0e10cSrcweir if (rtl_ustr_asciil_reverseEquals_WithLength( 88*cdf0e10cSrcweir str + i, subStr, subLen)) 89*cdf0e10cSrcweir { 90*cdf0e10cSrcweir return i; 91*cdf0e10cSrcweir } 92*cdf0e10cSrcweir } 93*cdf0e10cSrcweir } 94*cdf0e10cSrcweir return -1; 95*cdf0e10cSrcweir } 96*cdf0e10cSrcweir 97*cdf0e10cSrcweir sal_Int32 rtl_ustr_lastIndexOfAscii_WithLength( 98*cdf0e10cSrcweir sal_Unicode const * str, sal_Int32 len, 99*cdf0e10cSrcweir char const * subStr, sal_Int32 subLen) 100*cdf0e10cSrcweir { 101*cdf0e10cSrcweir if (subLen > 0 && subLen <= len) { 102*cdf0e10cSrcweir sal_Int32 i; 103*cdf0e10cSrcweir for (i = len - subLen; i >= 0; --i) { 104*cdf0e10cSrcweir if (rtl_ustr_asciil_reverseEquals_WithLength( 105*cdf0e10cSrcweir str + i, subStr, subLen)) 106*cdf0e10cSrcweir { 107*cdf0e10cSrcweir return i; 108*cdf0e10cSrcweir } 109*cdf0e10cSrcweir } 110*cdf0e10cSrcweir } 111*cdf0e10cSrcweir return -1; 112*cdf0e10cSrcweir } 113*cdf0e10cSrcweir 114*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_valueOfFloat(sal_Unicode * pStr, float f) 115*cdf0e10cSrcweir { 116*cdf0e10cSrcweir rtl_uString * pResult = NULL; 117*cdf0e10cSrcweir sal_Int32 nLen; 118*cdf0e10cSrcweir rtl_math_doubleToUString( 119*cdf0e10cSrcweir &pResult, 0, 0, f, rtl_math_StringFormat_G, 120*cdf0e10cSrcweir RTL_USTR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 121*cdf0e10cSrcweir 0, sal_True); 122*cdf0e10cSrcweir nLen = pResult->length; 123*cdf0e10cSrcweir OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFFLOAT); 124*cdf0e10cSrcweir rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode)); 125*cdf0e10cSrcweir rtl_uString_release(pResult); 126*cdf0e10cSrcweir return nLen; 127*cdf0e10cSrcweir } 128*cdf0e10cSrcweir 129*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_valueOfDouble(sal_Unicode * pStr, double d) 130*cdf0e10cSrcweir { 131*cdf0e10cSrcweir rtl_uString * pResult = NULL; 132*cdf0e10cSrcweir sal_Int32 nLen; 133*cdf0e10cSrcweir rtl_math_doubleToUString( 134*cdf0e10cSrcweir &pResult, 0, 0, d, rtl_math_StringFormat_G, 135*cdf0e10cSrcweir RTL_USTR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 136*cdf0e10cSrcweir 0, sal_True); 137*cdf0e10cSrcweir nLen = pResult->length; 138*cdf0e10cSrcweir OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFDOUBLE); 139*cdf0e10cSrcweir rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode)); 140*cdf0e10cSrcweir rtl_uString_release(pResult); 141*cdf0e10cSrcweir return nLen; 142*cdf0e10cSrcweir } 143*cdf0e10cSrcweir 144*cdf0e10cSrcweir float SAL_CALL rtl_ustr_toFloat(sal_Unicode const * pStr) 145*cdf0e10cSrcweir { 146*cdf0e10cSrcweir return (float) rtl_math_uStringToDouble(pStr, 147*cdf0e10cSrcweir pStr + rtl_ustr_getLength(pStr), 148*cdf0e10cSrcweir '.', 0, 0, 0); 149*cdf0e10cSrcweir } 150*cdf0e10cSrcweir 151*cdf0e10cSrcweir double SAL_CALL rtl_ustr_toDouble(sal_Unicode const * pStr) 152*cdf0e10cSrcweir { 153*cdf0e10cSrcweir return rtl_math_uStringToDouble(pStr, pStr + rtl_ustr_getLength(pStr), '.', 154*cdf0e10cSrcweir 0, 0, 0); 155*cdf0e10cSrcweir } 156*cdf0e10cSrcweir 157*cdf0e10cSrcweir /* ======================================================================= */ 158*cdf0e10cSrcweir 159*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_compare( const sal_Unicode* pStr1, 160*cdf0e10cSrcweir const sal_Char* pStr2 ) 161*cdf0e10cSrcweir { 162*cdf0e10cSrcweir sal_Int32 nRet; 163*cdf0e10cSrcweir while ( ((nRet = ((sal_Int32)(*pStr1))- 164*cdf0e10cSrcweir ((sal_Int32)((unsigned char)(*pStr2)))) == 0) && 165*cdf0e10cSrcweir *pStr2 ) 166*cdf0e10cSrcweir { 167*cdf0e10cSrcweir pStr1++; 168*cdf0e10cSrcweir pStr2++; 169*cdf0e10cSrcweir } 170*cdf0e10cSrcweir 171*cdf0e10cSrcweir return nRet; 172*cdf0e10cSrcweir } 173*cdf0e10cSrcweir 174*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 175*cdf0e10cSrcweir 176*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_compare_WithLength( const sal_Unicode* pStr1, 177*cdf0e10cSrcweir sal_Int32 nStr1Len, 178*cdf0e10cSrcweir const sal_Char* pStr2 ) 179*cdf0e10cSrcweir { 180*cdf0e10cSrcweir sal_Int32 nRet = 0; 181*cdf0e10cSrcweir while( ((nRet = (nStr1Len ? (sal_Int32)(*pStr1) : 0)- 182*cdf0e10cSrcweir ((sal_Int32)((unsigned char)(*pStr2)))) == 0) && 183*cdf0e10cSrcweir nStr1Len && *pStr2 ) 184*cdf0e10cSrcweir { 185*cdf0e10cSrcweir pStr1++; 186*cdf0e10cSrcweir pStr2++; 187*cdf0e10cSrcweir nStr1Len--; 188*cdf0e10cSrcweir } 189*cdf0e10cSrcweir 190*cdf0e10cSrcweir return nRet; 191*cdf0e10cSrcweir } 192*cdf0e10cSrcweir 193*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 194*cdf0e10cSrcweir 195*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompare_WithLength( const sal_Unicode* pStr1, 196*cdf0e10cSrcweir sal_Int32 nStr1Len, 197*cdf0e10cSrcweir const sal_Char* pStr2, 198*cdf0e10cSrcweir sal_Int32 nShortenedLength ) 199*cdf0e10cSrcweir { 200*cdf0e10cSrcweir const sal_Unicode* pStr1End = pStr1 + nStr1Len; 201*cdf0e10cSrcweir sal_Int32 nRet; 202*cdf0e10cSrcweir while ( (nShortenedLength > 0) && 203*cdf0e10cSrcweir (pStr1 < pStr1End) && *pStr2 ) 204*cdf0e10cSrcweir { 205*cdf0e10cSrcweir /* Check ASCII range */ 206*cdf0e10cSrcweir OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127"); 207*cdf0e10cSrcweir 208*cdf0e10cSrcweir nRet = ((sal_Int32)*pStr1)- 209*cdf0e10cSrcweir ((sal_Int32)(unsigned char)*pStr2); 210*cdf0e10cSrcweir if ( nRet != 0 ) 211*cdf0e10cSrcweir return nRet; 212*cdf0e10cSrcweir 213*cdf0e10cSrcweir nShortenedLength--; 214*cdf0e10cSrcweir pStr1++; 215*cdf0e10cSrcweir pStr2++; 216*cdf0e10cSrcweir } 217*cdf0e10cSrcweir 218*cdf0e10cSrcweir if ( nShortenedLength <= 0 ) 219*cdf0e10cSrcweir return 0; 220*cdf0e10cSrcweir 221*cdf0e10cSrcweir if ( *pStr2 ) 222*cdf0e10cSrcweir { 223*cdf0e10cSrcweir OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" ); 224*cdf0e10cSrcweir // first is a substring of the second string => less (negative value) 225*cdf0e10cSrcweir nRet = -1; 226*cdf0e10cSrcweir } 227*cdf0e10cSrcweir else 228*cdf0e10cSrcweir { 229*cdf0e10cSrcweir // greater or equal 230*cdf0e10cSrcweir nRet = pStr1End - pStr1; 231*cdf0e10cSrcweir } 232*cdf0e10cSrcweir 233*cdf0e10cSrcweir return nRet; 234*cdf0e10cSrcweir } 235*cdf0e10cSrcweir 236*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 237*cdf0e10cSrcweir 238*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_asciil_reverseCompare_WithLength( const sal_Unicode* pStr1, 239*cdf0e10cSrcweir sal_Int32 nStr1Len, 240*cdf0e10cSrcweir const sal_Char* pStr2, 241*cdf0e10cSrcweir sal_Int32 nStr2Len ) 242*cdf0e10cSrcweir { 243*cdf0e10cSrcweir const sal_Unicode* pStr1Run = pStr1+nStr1Len; 244*cdf0e10cSrcweir const sal_Char* pStr2Run = pStr2+nStr2Len; 245*cdf0e10cSrcweir sal_Int32 nRet; 246*cdf0e10cSrcweir while ( (pStr1 < pStr1Run) && (pStr2 < pStr2Run) ) 247*cdf0e10cSrcweir { 248*cdf0e10cSrcweir pStr1Run--; 249*cdf0e10cSrcweir pStr2Run--; 250*cdf0e10cSrcweir nRet = ((sal_Int32)*pStr1Run)-((sal_Int32)*pStr2Run); 251*cdf0e10cSrcweir if ( nRet ) 252*cdf0e10cSrcweir return nRet; 253*cdf0e10cSrcweir } 254*cdf0e10cSrcweir 255*cdf0e10cSrcweir return nStr1Len - nStr2Len; 256*cdf0e10cSrcweir } 257*cdf0e10cSrcweir 258*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 259*cdf0e10cSrcweir 260*cdf0e10cSrcweir sal_Bool SAL_CALL rtl_ustr_asciil_reverseEquals_WithLength( const sal_Unicode* pStr1, 261*cdf0e10cSrcweir const sal_Char* pStr2, 262*cdf0e10cSrcweir sal_Int32 nStrLen ) 263*cdf0e10cSrcweir { 264*cdf0e10cSrcweir const sal_Unicode* pStr1Run = pStr1+nStrLen; 265*cdf0e10cSrcweir const sal_Char* pStr2Run = pStr2+nStrLen; 266*cdf0e10cSrcweir while ( pStr1 < pStr1Run ) 267*cdf0e10cSrcweir { 268*cdf0e10cSrcweir pStr1Run--; 269*cdf0e10cSrcweir pStr2Run--; 270*cdf0e10cSrcweir if( *pStr1Run != (sal_Unicode)*pStr2Run ) 271*cdf0e10cSrcweir return sal_False; 272*cdf0e10cSrcweir } 273*cdf0e10cSrcweir 274*cdf0e10cSrcweir return sal_True; 275*cdf0e10cSrcweir } 276*cdf0e10cSrcweir 277*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 278*cdf0e10cSrcweir 279*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase( const sal_Unicode* pStr1, 280*cdf0e10cSrcweir const sal_Char* pStr2 ) 281*cdf0e10cSrcweir { 282*cdf0e10cSrcweir sal_Int32 nRet; 283*cdf0e10cSrcweir sal_Int32 c1; 284*cdf0e10cSrcweir sal_Int32 c2; 285*cdf0e10cSrcweir do 286*cdf0e10cSrcweir { 287*cdf0e10cSrcweir /* If character between 'A' and 'Z', than convert it to lowercase */ 288*cdf0e10cSrcweir c1 = (sal_Int32)*pStr1; 289*cdf0e10cSrcweir c2 = (sal_Int32)((unsigned char)*pStr2); 290*cdf0e10cSrcweir if ( (c1 >= 65) && (c1 <= 90) ) 291*cdf0e10cSrcweir c1 += 32; 292*cdf0e10cSrcweir if ( (c2 >= 65) && (c2 <= 90) ) 293*cdf0e10cSrcweir c2 += 32; 294*cdf0e10cSrcweir nRet = c1-c2; 295*cdf0e10cSrcweir if ( nRet != 0 ) 296*cdf0e10cSrcweir return nRet; 297*cdf0e10cSrcweir 298*cdf0e10cSrcweir pStr1++; 299*cdf0e10cSrcweir pStr2++; 300*cdf0e10cSrcweir } 301*cdf0e10cSrcweir while ( c2 ); 302*cdf0e10cSrcweir 303*cdf0e10cSrcweir return 0; 304*cdf0e10cSrcweir } 305*cdf0e10cSrcweir 306*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 307*cdf0e10cSrcweir 308*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1, 309*cdf0e10cSrcweir sal_Int32 nStr1Len, 310*cdf0e10cSrcweir const sal_Char* pStr2 ) 311*cdf0e10cSrcweir { 312*cdf0e10cSrcweir sal_Int32 nRet; 313*cdf0e10cSrcweir sal_Int32 c1; 314*cdf0e10cSrcweir sal_Int32 c2; 315*cdf0e10cSrcweir do 316*cdf0e10cSrcweir { 317*cdf0e10cSrcweir if ( !nStr1Len ) 318*cdf0e10cSrcweir return *pStr2 == '\0' ? 0 : -1; 319*cdf0e10cSrcweir 320*cdf0e10cSrcweir /* If character between 'A' and 'Z', than convert it to lowercase */ 321*cdf0e10cSrcweir c1 = (sal_Int32)*pStr1; 322*cdf0e10cSrcweir c2 = (sal_Int32)((unsigned char)*pStr2); 323*cdf0e10cSrcweir if ( (c1 >= 65) && (c1 <= 90) ) 324*cdf0e10cSrcweir c1 += 32; 325*cdf0e10cSrcweir if ( (c2 >= 65) && (c2 <= 90) ) 326*cdf0e10cSrcweir c2 += 32; 327*cdf0e10cSrcweir nRet = c1-c2; 328*cdf0e10cSrcweir if ( nRet != 0 ) 329*cdf0e10cSrcweir return nRet; 330*cdf0e10cSrcweir 331*cdf0e10cSrcweir pStr1++; 332*cdf0e10cSrcweir pStr2++; 333*cdf0e10cSrcweir nStr1Len--; 334*cdf0e10cSrcweir } 335*cdf0e10cSrcweir while( c2 ); 336*cdf0e10cSrcweir 337*cdf0e10cSrcweir return 0; 338*cdf0e10cSrcweir } 339*cdf0e10cSrcweir 340*cdf0e10cSrcweir sal_Int32 rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths( 341*cdf0e10cSrcweir sal_Unicode const * first, sal_Int32 firstLen, 342*cdf0e10cSrcweir char const * second, sal_Int32 secondLen) 343*cdf0e10cSrcweir { 344*cdf0e10cSrcweir sal_Int32 i; 345*cdf0e10cSrcweir sal_Int32 len = firstLen < secondLen ? firstLen : secondLen; 346*cdf0e10cSrcweir for (i = 0; i < len; ++i) { 347*cdf0e10cSrcweir sal_Int32 c1 = *first++; 348*cdf0e10cSrcweir sal_Int32 c2 = (unsigned char) *second++; 349*cdf0e10cSrcweir sal_Int32 d; 350*cdf0e10cSrcweir if (c1 >= 65 && c1 <= 90) { 351*cdf0e10cSrcweir c1 += 32; 352*cdf0e10cSrcweir } 353*cdf0e10cSrcweir if (c2 >= 65 && c2 <= 90) { 354*cdf0e10cSrcweir c2 += 32; 355*cdf0e10cSrcweir } 356*cdf0e10cSrcweir d = c1 - c2; 357*cdf0e10cSrcweir if (d != 0) { 358*cdf0e10cSrcweir return d; 359*cdf0e10cSrcweir } 360*cdf0e10cSrcweir } 361*cdf0e10cSrcweir return firstLen - secondLen; 362*cdf0e10cSrcweir } 363*cdf0e10cSrcweir 364*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 365*cdf0e10cSrcweir 366*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1, 367*cdf0e10cSrcweir sal_Int32 nStr1Len, 368*cdf0e10cSrcweir const sal_Char* pStr2, 369*cdf0e10cSrcweir sal_Int32 nShortenedLength ) 370*cdf0e10cSrcweir { 371*cdf0e10cSrcweir const sal_Unicode* pStr1End = pStr1 + nStr1Len; 372*cdf0e10cSrcweir sal_Int32 nRet; 373*cdf0e10cSrcweir sal_Int32 c1; 374*cdf0e10cSrcweir sal_Int32 c2; 375*cdf0e10cSrcweir while ( (nShortenedLength > 0) && 376*cdf0e10cSrcweir (pStr1 < pStr1End) && *pStr2 ) 377*cdf0e10cSrcweir { 378*cdf0e10cSrcweir /* Check ASCII range */ 379*cdf0e10cSrcweir OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127"); 380*cdf0e10cSrcweir 381*cdf0e10cSrcweir /* If character between 'A' and 'Z', than convert it to lowercase */ 382*cdf0e10cSrcweir c1 = (sal_Int32)*pStr1; 383*cdf0e10cSrcweir c2 = (sal_Int32)((unsigned char)*pStr2); 384*cdf0e10cSrcweir if ( (c1 >= 65) && (c1 <= 90) ) 385*cdf0e10cSrcweir c1 += 32; 386*cdf0e10cSrcweir if ( (c2 >= 65) && (c2 <= 90) ) 387*cdf0e10cSrcweir c2 += 32; 388*cdf0e10cSrcweir nRet = c1-c2; 389*cdf0e10cSrcweir if ( nRet != 0 ) 390*cdf0e10cSrcweir return nRet; 391*cdf0e10cSrcweir 392*cdf0e10cSrcweir nShortenedLength--; 393*cdf0e10cSrcweir pStr1++; 394*cdf0e10cSrcweir pStr2++; 395*cdf0e10cSrcweir } 396*cdf0e10cSrcweir 397*cdf0e10cSrcweir if ( nShortenedLength <= 0 ) 398*cdf0e10cSrcweir return 0; 399*cdf0e10cSrcweir 400*cdf0e10cSrcweir if ( *pStr2 ) 401*cdf0e10cSrcweir { 402*cdf0e10cSrcweir OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" ); 403*cdf0e10cSrcweir // first is a substring of the second string => less (negative value) 404*cdf0e10cSrcweir nRet = -1; 405*cdf0e10cSrcweir } 406*cdf0e10cSrcweir else 407*cdf0e10cSrcweir { 408*cdf0e10cSrcweir // greater or equal 409*cdf0e10cSrcweir nRet = pStr1End - pStr1; 410*cdf0e10cSrcweir } 411*cdf0e10cSrcweir 412*cdf0e10cSrcweir return nRet; 413*cdf0e10cSrcweir } 414*cdf0e10cSrcweir 415*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 416*cdf0e10cSrcweir 417*cdf0e10cSrcweir void SAL_CALL rtl_uString_newFromAscii( rtl_uString** ppThis, 418*cdf0e10cSrcweir const sal_Char* pCharStr ) 419*cdf0e10cSrcweir { 420*cdf0e10cSrcweir sal_Int32 nLen; 421*cdf0e10cSrcweir 422*cdf0e10cSrcweir if ( pCharStr ) 423*cdf0e10cSrcweir { 424*cdf0e10cSrcweir const sal_Char* pTempStr = pCharStr; 425*cdf0e10cSrcweir while( *pTempStr ) 426*cdf0e10cSrcweir pTempStr++; 427*cdf0e10cSrcweir nLen = pTempStr-pCharStr; 428*cdf0e10cSrcweir } 429*cdf0e10cSrcweir else 430*cdf0e10cSrcweir nLen = 0; 431*cdf0e10cSrcweir 432*cdf0e10cSrcweir if ( !nLen ) 433*cdf0e10cSrcweir { 434*cdf0e10cSrcweir IMPL_RTL_STRINGNAME( new )( ppThis ); 435*cdf0e10cSrcweir return; 436*cdf0e10cSrcweir } 437*cdf0e10cSrcweir 438*cdf0e10cSrcweir if ( *ppThis ) 439*cdf0e10cSrcweir IMPL_RTL_STRINGNAME( release )( *ppThis ); 440*cdf0e10cSrcweir 441*cdf0e10cSrcweir *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen ); 442*cdf0e10cSrcweir OSL_ASSERT(*ppThis != NULL); 443*cdf0e10cSrcweir if ( (*ppThis) ) 444*cdf0e10cSrcweir { 445*cdf0e10cSrcweir IMPL_RTL_STRCODE* pBuffer = (*ppThis)->buffer; 446*cdf0e10cSrcweir do 447*cdf0e10cSrcweir { 448*cdf0e10cSrcweir /* Check ASCII range */ 449*cdf0e10cSrcweir OSL_ENSURE( ((unsigned char)*pCharStr) <= 127, 450*cdf0e10cSrcweir "rtl_uString_newFromAscii() - Found ASCII char > 127" ); 451*cdf0e10cSrcweir 452*cdf0e10cSrcweir *pBuffer = *pCharStr; 453*cdf0e10cSrcweir pBuffer++; 454*cdf0e10cSrcweir pCharStr++; 455*cdf0e10cSrcweir } 456*cdf0e10cSrcweir while ( *pCharStr ); 457*cdf0e10cSrcweir } 458*cdf0e10cSrcweir } 459*cdf0e10cSrcweir 460*cdf0e10cSrcweir void SAL_CALL rtl_uString_newFromCodePoints( 461*cdf0e10cSrcweir rtl_uString ** newString, sal_uInt32 const * codePoints, 462*cdf0e10cSrcweir sal_Int32 codePointCount) 463*cdf0e10cSrcweir { 464*cdf0e10cSrcweir sal_Int32 n; 465*cdf0e10cSrcweir sal_Int32 i; 466*cdf0e10cSrcweir sal_Unicode * p; 467*cdf0e10cSrcweir OSL_ASSERT( 468*cdf0e10cSrcweir newString != NULL && 469*cdf0e10cSrcweir (codePoints != NULL || codePointCount == 0) && 470*cdf0e10cSrcweir codePointCount >= 0); 471*cdf0e10cSrcweir if (codePointCount == 0) { 472*cdf0e10cSrcweir rtl_uString_new(newString); 473*cdf0e10cSrcweir return; 474*cdf0e10cSrcweir } 475*cdf0e10cSrcweir if (*newString != NULL) { 476*cdf0e10cSrcweir rtl_uString_release(*newString); 477*cdf0e10cSrcweir } 478*cdf0e10cSrcweir n = codePointCount; 479*cdf0e10cSrcweir for (i = 0; i < codePointCount; ++i) { 480*cdf0e10cSrcweir OSL_ASSERT(codePoints[i] <= 0x10FFFF); 481*cdf0e10cSrcweir if (codePoints[i] >= 0x10000) { 482*cdf0e10cSrcweir ++n; 483*cdf0e10cSrcweir } 484*cdf0e10cSrcweir } 485*cdf0e10cSrcweir /* Builds on the assumption that sal_Int32 uses 32 bit two's complement 486*cdf0e10cSrcweir representation with wrap around (the necessary number of UTF-16 code 487*cdf0e10cSrcweir units will be no larger than 2 * SAL_MAX_INT32, represented as 488*cdf0e10cSrcweir sal_Int32 -2): */ 489*cdf0e10cSrcweir if (n < 0) { 490*cdf0e10cSrcweir *newString = NULL; 491*cdf0e10cSrcweir return; 492*cdf0e10cSrcweir } 493*cdf0e10cSrcweir *newString = rtl_uString_ImplAlloc(n); 494*cdf0e10cSrcweir if (*newString == NULL) { 495*cdf0e10cSrcweir return; 496*cdf0e10cSrcweir } 497*cdf0e10cSrcweir p = (*newString)->buffer; 498*cdf0e10cSrcweir for (i = 0; i < codePointCount; ++i) { 499*cdf0e10cSrcweir sal_uInt32 c = codePoints[i]; 500*cdf0e10cSrcweir if (c < 0x10000) { 501*cdf0e10cSrcweir *p++ = (sal_Unicode) c; 502*cdf0e10cSrcweir } else { 503*cdf0e10cSrcweir c -= 0x10000; 504*cdf0e10cSrcweir *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE); 505*cdf0e10cSrcweir *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE); 506*cdf0e10cSrcweir } 507*cdf0e10cSrcweir } 508*cdf0e10cSrcweir } 509*cdf0e10cSrcweir 510*cdf0e10cSrcweir /* ======================================================================= */ 511*cdf0e10cSrcweir 512*cdf0e10cSrcweir static int rtl_ImplGetFastUTF8UnicodeLen( const sal_Char* pStr, sal_Int32 nLen ) 513*cdf0e10cSrcweir { 514*cdf0e10cSrcweir int n; 515*cdf0e10cSrcweir sal_uChar c; 516*cdf0e10cSrcweir const sal_Char* pEndStr; 517*cdf0e10cSrcweir 518*cdf0e10cSrcweir n = 0; 519*cdf0e10cSrcweir pEndStr = pStr+nLen; 520*cdf0e10cSrcweir while ( pStr < pEndStr ) 521*cdf0e10cSrcweir { 522*cdf0e10cSrcweir c = (sal_uChar)*pStr; 523*cdf0e10cSrcweir 524*cdf0e10cSrcweir if ( !(c & 0x80) ) 525*cdf0e10cSrcweir pStr++; 526*cdf0e10cSrcweir else if ( (c & 0xE0) == 0xC0 ) 527*cdf0e10cSrcweir pStr += 2; 528*cdf0e10cSrcweir else if ( (c & 0xF0) == 0xE0 ) 529*cdf0e10cSrcweir pStr += 3; 530*cdf0e10cSrcweir else if ( (c & 0xF8) == 0xF0 ) 531*cdf0e10cSrcweir pStr += 4; 532*cdf0e10cSrcweir else if ( (c & 0xFC) == 0xF8 ) 533*cdf0e10cSrcweir pStr += 5; 534*cdf0e10cSrcweir else if ( (c & 0xFE) == 0xFC ) 535*cdf0e10cSrcweir pStr += 6; 536*cdf0e10cSrcweir else 537*cdf0e10cSrcweir pStr++; 538*cdf0e10cSrcweir 539*cdf0e10cSrcweir n++; 540*cdf0e10cSrcweir } 541*cdf0e10cSrcweir 542*cdf0e10cSrcweir return n; 543*cdf0e10cSrcweir } 544*cdf0e10cSrcweir 545*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 546*cdf0e10cSrcweir 547*cdf0e10cSrcweir static void rtl_string2UString_status( rtl_uString** ppThis, 548*cdf0e10cSrcweir const sal_Char* pStr, 549*cdf0e10cSrcweir sal_Int32 nLen, 550*cdf0e10cSrcweir rtl_TextEncoding eTextEncoding, 551*cdf0e10cSrcweir sal_uInt32 nCvtFlags, 552*cdf0e10cSrcweir sal_uInt32 *pInfo ) 553*cdf0e10cSrcweir { 554*cdf0e10cSrcweir OSL_ENSURE(rtl_isOctetTextEncoding(eTextEncoding), 555*cdf0e10cSrcweir "rtl_string2UString_status() - Wrong TextEncoding" ); 556*cdf0e10cSrcweir 557*cdf0e10cSrcweir if ( !nLen ) 558*cdf0e10cSrcweir { 559*cdf0e10cSrcweir rtl_uString_new( ppThis ); 560*cdf0e10cSrcweir if (pInfo != NULL) { 561*cdf0e10cSrcweir *pInfo = 0; 562*cdf0e10cSrcweir } 563*cdf0e10cSrcweir } 564*cdf0e10cSrcweir else 565*cdf0e10cSrcweir { 566*cdf0e10cSrcweir if ( *ppThis ) 567*cdf0e10cSrcweir IMPL_RTL_STRINGNAME( release )( *ppThis ); 568*cdf0e10cSrcweir 569*cdf0e10cSrcweir /* Optimization for US-ASCII */ 570*cdf0e10cSrcweir if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US ) 571*cdf0e10cSrcweir { 572*cdf0e10cSrcweir IMPL_RTL_STRCODE* pBuffer; 573*cdf0e10cSrcweir *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen ); 574*cdf0e10cSrcweir if (*ppThis == NULL) { 575*cdf0e10cSrcweir if (pInfo != NULL) { 576*cdf0e10cSrcweir *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | 577*cdf0e10cSrcweir RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 578*cdf0e10cSrcweir } 579*cdf0e10cSrcweir return; 580*cdf0e10cSrcweir } 581*cdf0e10cSrcweir pBuffer = (*ppThis)->buffer; 582*cdf0e10cSrcweir do 583*cdf0e10cSrcweir { 584*cdf0e10cSrcweir /* Check ASCII range */ 585*cdf0e10cSrcweir OSL_ENSURE( ((unsigned char)*pStr) <= 127, 586*cdf0e10cSrcweir "rtl_string2UString_status() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" ); 587*cdf0e10cSrcweir 588*cdf0e10cSrcweir *pBuffer = *pStr; 589*cdf0e10cSrcweir pBuffer++; 590*cdf0e10cSrcweir pStr++; 591*cdf0e10cSrcweir nLen--; 592*cdf0e10cSrcweir } 593*cdf0e10cSrcweir while ( nLen ); 594*cdf0e10cSrcweir if (pInfo != NULL) { 595*cdf0e10cSrcweir *pInfo = 0; 596*cdf0e10cSrcweir } 597*cdf0e10cSrcweir } 598*cdf0e10cSrcweir else 599*cdf0e10cSrcweir { 600*cdf0e10cSrcweir rtl_uString* pTemp; 601*cdf0e10cSrcweir rtl_uString* pTemp2 = NULL; 602*cdf0e10cSrcweir rtl_TextToUnicodeConverter hConverter; 603*cdf0e10cSrcweir sal_uInt32 nInfo; 604*cdf0e10cSrcweir sal_Size nSrcBytes; 605*cdf0e10cSrcweir sal_Size nDestChars; 606*cdf0e10cSrcweir sal_Size nNewLen; 607*cdf0e10cSrcweir 608*cdf0e10cSrcweir /* Optimization for UTF-8 - we try to calculate the exact length */ 609*cdf0e10cSrcweir /* For all other encoding we try the maximum - and reallocate 610*cdf0e10cSrcweir the buffer if needed */ 611*cdf0e10cSrcweir if ( eTextEncoding == RTL_TEXTENCODING_UTF8 ) 612*cdf0e10cSrcweir { 613*cdf0e10cSrcweir nNewLen = rtl_ImplGetFastUTF8UnicodeLen( pStr, nLen ); 614*cdf0e10cSrcweir /* Includes the string only ASCII, then we could copy 615*cdf0e10cSrcweir the buffer faster */ 616*cdf0e10cSrcweir if ( nNewLen == (sal_Size)nLen ) 617*cdf0e10cSrcweir { 618*cdf0e10cSrcweir IMPL_RTL_STRCODE* pBuffer; 619*cdf0e10cSrcweir *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen ); 620*cdf0e10cSrcweir if (*ppThis == NULL) 621*cdf0e10cSrcweir { 622*cdf0e10cSrcweir if (pInfo != NULL) { 623*cdf0e10cSrcweir *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | 624*cdf0e10cSrcweir RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 625*cdf0e10cSrcweir } 626*cdf0e10cSrcweir return; 627*cdf0e10cSrcweir } 628*cdf0e10cSrcweir pBuffer = (*ppThis)->buffer; 629*cdf0e10cSrcweir do 630*cdf0e10cSrcweir { 631*cdf0e10cSrcweir /* Check ASCII range */ 632*cdf0e10cSrcweir OSL_ENSURE( ((unsigned char)*pStr) <= 127, 633*cdf0e10cSrcweir "rtl_string2UString_status() - UTF8 test encoding is wrong" ); 634*cdf0e10cSrcweir 635*cdf0e10cSrcweir *pBuffer = *pStr; 636*cdf0e10cSrcweir pBuffer++; 637*cdf0e10cSrcweir pStr++; 638*cdf0e10cSrcweir nLen--; 639*cdf0e10cSrcweir } 640*cdf0e10cSrcweir while ( nLen ); 641*cdf0e10cSrcweir if (pInfo != NULL) { 642*cdf0e10cSrcweir *pInfo = 0; 643*cdf0e10cSrcweir } 644*cdf0e10cSrcweir return; 645*cdf0e10cSrcweir } 646*cdf0e10cSrcweir } 647*cdf0e10cSrcweir else 648*cdf0e10cSrcweir nNewLen = nLen; 649*cdf0e10cSrcweir 650*cdf0e10cSrcweir nCvtFlags |= RTL_TEXTTOUNICODE_FLAGS_FLUSH; 651*cdf0e10cSrcweir hConverter = rtl_createTextToUnicodeConverter( eTextEncoding ); 652*cdf0e10cSrcweir 653*cdf0e10cSrcweir pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen ); 654*cdf0e10cSrcweir if (pTemp == NULL) { 655*cdf0e10cSrcweir if (pInfo != NULL) { 656*cdf0e10cSrcweir *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | 657*cdf0e10cSrcweir RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 658*cdf0e10cSrcweir } 659*cdf0e10cSrcweir return; 660*cdf0e10cSrcweir } 661*cdf0e10cSrcweir nDestChars = rtl_convertTextToUnicode( hConverter, 0, 662*cdf0e10cSrcweir pStr, nLen, 663*cdf0e10cSrcweir pTemp->buffer, nNewLen, 664*cdf0e10cSrcweir nCvtFlags, 665*cdf0e10cSrcweir &nInfo, &nSrcBytes ); 666*cdf0e10cSrcweir 667*cdf0e10cSrcweir /* Buffer not big enough, try again with enough space */ 668*cdf0e10cSrcweir /* Shouldn't be the case, but if we get textencoding which 669*cdf0e10cSrcweir could results in more unicode characters we have this 670*cdf0e10cSrcweir code here. Could be the case for apple encodings */ 671*cdf0e10cSrcweir while ( nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL ) 672*cdf0e10cSrcweir { 673*cdf0e10cSrcweir rtl_freeMemory( pTemp ); 674*cdf0e10cSrcweir nNewLen += 8; 675*cdf0e10cSrcweir pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen ); 676*cdf0e10cSrcweir if (pTemp == NULL) { 677*cdf0e10cSrcweir if (pInfo != NULL) { 678*cdf0e10cSrcweir *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | 679*cdf0e10cSrcweir RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 680*cdf0e10cSrcweir } 681*cdf0e10cSrcweir return; 682*cdf0e10cSrcweir } 683*cdf0e10cSrcweir nDestChars = rtl_convertTextToUnicode( hConverter, 0, 684*cdf0e10cSrcweir pStr, nLen, 685*cdf0e10cSrcweir pTemp->buffer, nNewLen, 686*cdf0e10cSrcweir nCvtFlags, 687*cdf0e10cSrcweir &nInfo, &nSrcBytes ); 688*cdf0e10cSrcweir } 689*cdf0e10cSrcweir 690*cdf0e10cSrcweir if (pInfo) 691*cdf0e10cSrcweir *pInfo = nInfo; 692*cdf0e10cSrcweir 693*cdf0e10cSrcweir /* Set the buffer to the correct size or if there is too 694*cdf0e10cSrcweir much overhead, reallocate to the correct size */ 695*cdf0e10cSrcweir if ( nNewLen > nDestChars+8 ) 696*cdf0e10cSrcweir { 697*cdf0e10cSrcweir pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestChars ); 698*cdf0e10cSrcweir } 699*cdf0e10cSrcweir if (pTemp2 != NULL) 700*cdf0e10cSrcweir { 701*cdf0e10cSrcweir rtl_str_ImplCopy(pTemp2->buffer, pTemp->buffer, nDestChars); 702*cdf0e10cSrcweir rtl_freeMemory(pTemp); 703*cdf0e10cSrcweir pTemp = pTemp2; 704*cdf0e10cSrcweir } 705*cdf0e10cSrcweir else 706*cdf0e10cSrcweir { 707*cdf0e10cSrcweir pTemp->length = nDestChars; 708*cdf0e10cSrcweir pTemp->buffer[nDestChars] = 0; 709*cdf0e10cSrcweir } 710*cdf0e10cSrcweir 711*cdf0e10cSrcweir rtl_destroyTextToUnicodeConverter( hConverter ); 712*cdf0e10cSrcweir *ppThis = pTemp; 713*cdf0e10cSrcweir 714*cdf0e10cSrcweir /* Results the conversion in an empty buffer - 715*cdf0e10cSrcweir create an empty string */ 716*cdf0e10cSrcweir if ( pTemp && !nDestChars ) 717*cdf0e10cSrcweir rtl_uString_new( ppThis ); 718*cdf0e10cSrcweir } 719*cdf0e10cSrcweir } 720*cdf0e10cSrcweir } 721*cdf0e10cSrcweir 722*cdf0e10cSrcweir void SAL_CALL rtl_string2UString( rtl_uString** ppThis, 723*cdf0e10cSrcweir const sal_Char* pStr, 724*cdf0e10cSrcweir sal_Int32 nLen, 725*cdf0e10cSrcweir rtl_TextEncoding eTextEncoding, 726*cdf0e10cSrcweir sal_uInt32 nCvtFlags ) 727*cdf0e10cSrcweir { 728*cdf0e10cSrcweir rtl_string2UString_status( ppThis, pStr, nLen, eTextEncoding, 729*cdf0e10cSrcweir nCvtFlags, NULL ); 730*cdf0e10cSrcweir } 731*cdf0e10cSrcweir 732*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 733*cdf0e10cSrcweir 734*cdf0e10cSrcweir typedef enum { 735*cdf0e10cSrcweir CANNOT_RETURN, 736*cdf0e10cSrcweir CAN_RETURN = 1 737*cdf0e10cSrcweir } StrLifecycle; 738*cdf0e10cSrcweir 739*cdf0e10cSrcweir static oslMutex 740*cdf0e10cSrcweir getInternMutex() 741*cdf0e10cSrcweir { 742*cdf0e10cSrcweir static oslMutex pPoolGuard = NULL; 743*cdf0e10cSrcweir if( !pPoolGuard ) 744*cdf0e10cSrcweir { 745*cdf0e10cSrcweir oslMutex pGlobalGuard; 746*cdf0e10cSrcweir pGlobalGuard = *osl_getGlobalMutex(); 747*cdf0e10cSrcweir osl_acquireMutex( pGlobalGuard ); 748*cdf0e10cSrcweir if( !pPoolGuard ) 749*cdf0e10cSrcweir { 750*cdf0e10cSrcweir oslMutex p = osl_createMutex(); 751*cdf0e10cSrcweir OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); 752*cdf0e10cSrcweir pPoolGuard = p; 753*cdf0e10cSrcweir } 754*cdf0e10cSrcweir osl_releaseMutex( pGlobalGuard ); 755*cdf0e10cSrcweir } 756*cdf0e10cSrcweir else 757*cdf0e10cSrcweir { 758*cdf0e10cSrcweir OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); 759*cdf0e10cSrcweir } 760*cdf0e10cSrcweir 761*cdf0e10cSrcweir return pPoolGuard; 762*cdf0e10cSrcweir } 763*cdf0e10cSrcweir 764*cdf0e10cSrcweir /* returns true if we found a dup in the pool */ 765*cdf0e10cSrcweir static void rtl_ustring_intern_internal( rtl_uString ** newStr, 766*cdf0e10cSrcweir rtl_uString * str, 767*cdf0e10cSrcweir StrLifecycle can_return ) 768*cdf0e10cSrcweir { 769*cdf0e10cSrcweir oslMutex pPoolMutex; 770*cdf0e10cSrcweir 771*cdf0e10cSrcweir pPoolMutex = getInternMutex(); 772*cdf0e10cSrcweir 773*cdf0e10cSrcweir osl_acquireMutex( pPoolMutex ); 774*cdf0e10cSrcweir 775*cdf0e10cSrcweir *newStr = rtl_str_hash_intern (str, can_return); 776*cdf0e10cSrcweir 777*cdf0e10cSrcweir osl_releaseMutex( pPoolMutex ); 778*cdf0e10cSrcweir 779*cdf0e10cSrcweir if( can_return && *newStr != str ) 780*cdf0e10cSrcweir { /* we dupped, then found a match */ 781*cdf0e10cSrcweir rtl_freeMemory( str ); 782*cdf0e10cSrcweir } 783*cdf0e10cSrcweir } 784*cdf0e10cSrcweir 785*cdf0e10cSrcweir void SAL_CALL rtl_uString_intern( rtl_uString ** newStr, 786*cdf0e10cSrcweir rtl_uString * str) 787*cdf0e10cSrcweir { 788*cdf0e10cSrcweir if (SAL_STRING_IS_INTERN(str)) 789*cdf0e10cSrcweir { 790*cdf0e10cSrcweir IMPL_RTL_AQUIRE( str ); 791*cdf0e10cSrcweir *newStr = str; 792*cdf0e10cSrcweir } 793*cdf0e10cSrcweir else 794*cdf0e10cSrcweir { 795*cdf0e10cSrcweir rtl_uString *pOrg = *newStr; 796*cdf0e10cSrcweir *newStr = NULL; 797*cdf0e10cSrcweir rtl_ustring_intern_internal( newStr, str, CANNOT_RETURN ); 798*cdf0e10cSrcweir if (pOrg) 799*cdf0e10cSrcweir rtl_uString_release (pOrg); 800*cdf0e10cSrcweir } 801*cdf0e10cSrcweir } 802*cdf0e10cSrcweir 803*cdf0e10cSrcweir void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr, 804*cdf0e10cSrcweir const sal_Char * str, 805*cdf0e10cSrcweir sal_Int32 len, 806*cdf0e10cSrcweir rtl_TextEncoding eTextEncoding, 807*cdf0e10cSrcweir sal_uInt32 convertFlags, 808*cdf0e10cSrcweir sal_uInt32 * pInfo ) 809*cdf0e10cSrcweir { 810*cdf0e10cSrcweir rtl_uString *scratch; 811*cdf0e10cSrcweir 812*cdf0e10cSrcweir if (*newStr) 813*cdf0e10cSrcweir { 814*cdf0e10cSrcweir rtl_uString_release (*newStr); 815*cdf0e10cSrcweir *newStr = NULL; 816*cdf0e10cSrcweir } 817*cdf0e10cSrcweir 818*cdf0e10cSrcweir if ( len < 256 ) 819*cdf0e10cSrcweir { // try various optimisations 820*cdf0e10cSrcweir if ( len < 0 ) 821*cdf0e10cSrcweir len = strlen( str ); 822*cdf0e10cSrcweir if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US ) 823*cdf0e10cSrcweir { 824*cdf0e10cSrcweir int i; 825*cdf0e10cSrcweir rtl_uString *pScratch; 826*cdf0e10cSrcweir pScratch = alloca( sizeof( rtl_uString ) 827*cdf0e10cSrcweir + len * sizeof (IMPL_RTL_STRCODE ) ); 828*cdf0e10cSrcweir for (i = 0; i < len; i++) 829*cdf0e10cSrcweir { 830*cdf0e10cSrcweir /* Check ASCII range */ 831*cdf0e10cSrcweir OSL_ENSURE( ((unsigned char)str[i]) <= 127, 832*cdf0e10cSrcweir "rtl_ustring_internConvert() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" ); 833*cdf0e10cSrcweir pScratch->buffer[i] = str[i]; 834*cdf0e10cSrcweir } 835*cdf0e10cSrcweir pScratch->length = len; 836*cdf0e10cSrcweir rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN ); 837*cdf0e10cSrcweir return; 838*cdf0e10cSrcweir } 839*cdf0e10cSrcweir /* FIXME: we want a nice UTF-8 / alloca shortcut here */ 840*cdf0e10cSrcweir } 841*cdf0e10cSrcweir 842*cdf0e10cSrcweir scratch = NULL; 843*cdf0e10cSrcweir rtl_string2UString_status( &scratch, str, len, eTextEncoding, convertFlags, 844*cdf0e10cSrcweir pInfo ); 845*cdf0e10cSrcweir if (!scratch) { 846*cdf0e10cSrcweir return; 847*cdf0e10cSrcweir } 848*cdf0e10cSrcweir rtl_ustring_intern_internal( newStr, scratch, CAN_RETURN ); 849*cdf0e10cSrcweir } 850*cdf0e10cSrcweir 851*cdf0e10cSrcweir static void 852*cdf0e10cSrcweir internRelease (rtl_uString *pThis) 853*cdf0e10cSrcweir { 854*cdf0e10cSrcweir oslMutex pPoolMutex; 855*cdf0e10cSrcweir 856*cdf0e10cSrcweir rtl_uString *pFree = NULL; 857*cdf0e10cSrcweir if ( SAL_STRING_REFCOUNT( 858*cdf0e10cSrcweir osl_decrementInterlockedCount( &(pThis->refCount) ) ) == 0) 859*cdf0e10cSrcweir { 860*cdf0e10cSrcweir pPoolMutex = getInternMutex(); 861*cdf0e10cSrcweir osl_acquireMutex( pPoolMutex ); 862*cdf0e10cSrcweir 863*cdf0e10cSrcweir rtl_str_hash_remove (pThis); 864*cdf0e10cSrcweir 865*cdf0e10cSrcweir /* May have been separately acquired */ 866*cdf0e10cSrcweir if ( SAL_STRING_REFCOUNT( 867*cdf0e10cSrcweir osl_incrementInterlockedCount( &(pThis->refCount) ) ) == 1 ) 868*cdf0e10cSrcweir { 869*cdf0e10cSrcweir /* we got the last ref */ 870*cdf0e10cSrcweir pFree = pThis; 871*cdf0e10cSrcweir } 872*cdf0e10cSrcweir else /* very unusual */ 873*cdf0e10cSrcweir { 874*cdf0e10cSrcweir internRelease (pThis); 875*cdf0e10cSrcweir } 876*cdf0e10cSrcweir 877*cdf0e10cSrcweir osl_releaseMutex( pPoolMutex ); 878*cdf0e10cSrcweir } 879*cdf0e10cSrcweir if (pFree) 880*cdf0e10cSrcweir rtl_freeMemory (pFree); 881*cdf0e10cSrcweir } 882*cdf0e10cSrcweir 883*cdf0e10cSrcweir sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints( 884*cdf0e10cSrcweir rtl_uString const * string, sal_Int32 * indexUtf16, 885*cdf0e10cSrcweir sal_Int32 incrementCodePoints) 886*cdf0e10cSrcweir { 887*cdf0e10cSrcweir sal_Int32 n; 888*cdf0e10cSrcweir sal_Unicode cu; 889*cdf0e10cSrcweir sal_uInt32 cp; 890*cdf0e10cSrcweir OSL_ASSERT(string != NULL && indexUtf16 != NULL); 891*cdf0e10cSrcweir n = *indexUtf16; 892*cdf0e10cSrcweir OSL_ASSERT(n >= 0 && n <= string->length); 893*cdf0e10cSrcweir while (incrementCodePoints < 0) { 894*cdf0e10cSrcweir OSL_ASSERT(n > 0); 895*cdf0e10cSrcweir cu = string->buffer[--n]; 896*cdf0e10cSrcweir if (SAL_RTL_IS_LOW_SURROGATE(cu) && n != 0 && 897*cdf0e10cSrcweir SAL_RTL_IS_HIGH_SURROGATE(string->buffer[n - 1])) 898*cdf0e10cSrcweir { 899*cdf0e10cSrcweir --n; 900*cdf0e10cSrcweir } 901*cdf0e10cSrcweir ++incrementCodePoints; 902*cdf0e10cSrcweir } 903*cdf0e10cSrcweir OSL_ASSERT(n >= 0 && n < string->length); 904*cdf0e10cSrcweir cu = string->buffer[n]; 905*cdf0e10cSrcweir if (SAL_RTL_IS_HIGH_SURROGATE(cu) && string->length - n >= 2 && 906*cdf0e10cSrcweir SAL_RTL_IS_LOW_SURROGATE(string->buffer[n + 1])) 907*cdf0e10cSrcweir { 908*cdf0e10cSrcweir cp = SAL_RTL_COMBINE_SURROGATES(cu, string->buffer[n + 1]); 909*cdf0e10cSrcweir } else { 910*cdf0e10cSrcweir cp = cu; 911*cdf0e10cSrcweir } 912*cdf0e10cSrcweir while (incrementCodePoints > 0) { 913*cdf0e10cSrcweir OSL_ASSERT(n < string->length); 914*cdf0e10cSrcweir cu = string->buffer[n++]; 915*cdf0e10cSrcweir if (SAL_RTL_IS_HIGH_SURROGATE(cu) && n != string->length && 916*cdf0e10cSrcweir SAL_RTL_IS_LOW_SURROGATE(string->buffer[n])) 917*cdf0e10cSrcweir { 918*cdf0e10cSrcweir ++n; 919*cdf0e10cSrcweir } 920*cdf0e10cSrcweir --incrementCodePoints; 921*cdf0e10cSrcweir } 922*cdf0e10cSrcweir OSL_ASSERT(n >= 0 && n <= string->length); 923*cdf0e10cSrcweir *indexUtf16 = n; 924*cdf0e10cSrcweir return cp; 925*cdf0e10cSrcweir } 926*cdf0e10cSrcweir 927*cdf0e10cSrcweir sal_Bool rtl_convertStringToUString( 928*cdf0e10cSrcweir rtl_uString ** target, char const * source, sal_Int32 length, 929*cdf0e10cSrcweir rtl_TextEncoding encoding, sal_uInt32 flags) SAL_THROW_EXTERN_C() 930*cdf0e10cSrcweir { 931*cdf0e10cSrcweir sal_uInt32 info; 932*cdf0e10cSrcweir rtl_string2UString_status(target, source, length, encoding, flags, &info); 933*cdf0e10cSrcweir return (sal_Bool) ((info & RTL_TEXTTOUNICODE_INFO_ERROR) == 0); 934*cdf0e10cSrcweir } 935