xref: /AOO41X/main/sal/rtl/source/ustring.c (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir #if defined(_MSC_VER) && (_MSC_VER >= 1400)
28*cdf0e10cSrcweir #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
29*cdf0e10cSrcweir #endif
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include <rtl/memory.h>
32*cdf0e10cSrcweir #include <osl/diagnose.h>
33*cdf0e10cSrcweir #include <osl/interlck.h>
34*cdf0e10cSrcweir #include <rtl/alloc.h>
35*cdf0e10cSrcweir #include <osl/mutex.h>
36*cdf0e10cSrcweir #include <osl/doublecheckedlocking.h>
37*cdf0e10cSrcweir #include <rtl/tencinfo.h>
38*cdf0e10cSrcweir 
39*cdf0e10cSrcweir #include <string.h>
40*cdf0e10cSrcweir #include <sal/alloca.h>
41*cdf0e10cSrcweir 
42*cdf0e10cSrcweir #include "hash.h"
43*cdf0e10cSrcweir #include "strimp.h"
44*cdf0e10cSrcweir #include "surrogates.h"
45*cdf0e10cSrcweir #include <rtl/ustring.h>
46*cdf0e10cSrcweir 
47*cdf0e10cSrcweir #include "rtl/math.h"
48*cdf0e10cSrcweir #include "rtl/tencinfo.h"
49*cdf0e10cSrcweir 
50*cdf0e10cSrcweir /* ======================================================================= */
51*cdf0e10cSrcweir 
52*cdf0e10cSrcweir /* static data to be referenced by all empty strings
53*cdf0e10cSrcweir  * the refCount is predefined to 1 and must never become 0 !
54*cdf0e10cSrcweir  */
55*cdf0e10cSrcweir static rtl_uString const aImplEmpty_rtl_uString =
56*cdf0e10cSrcweir {
57*cdf0e10cSrcweir     (sal_Int32) (SAL_STRING_INTERN_FLAG|SAL_STRING_STATIC_FLAG|1), /*sal_Int32    refCount; */
58*cdf0e10cSrcweir     0,                                               /*sal_Int32    length;   */
59*cdf0e10cSrcweir     { 0 }                                            /*sal_Unicode  buffer[1];*/
60*cdf0e10cSrcweir };
61*cdf0e10cSrcweir 
62*cdf0e10cSrcweir /* ======================================================================= */
63*cdf0e10cSrcweir 
64*cdf0e10cSrcweir #define IMPL_RTL_STRCODE            sal_Unicode
65*cdf0e10cSrcweir #define IMPL_RTL_USTRCODE( c )      (c)
66*cdf0e10cSrcweir #define IMPL_RTL_STRNAME( n )       rtl_ustr_ ## n
67*cdf0e10cSrcweir 
68*cdf0e10cSrcweir #define IMPL_RTL_STRINGNAME( n )    rtl_uString_ ## n
69*cdf0e10cSrcweir #define IMPL_RTL_STRINGDATA         rtl_uString
70*cdf0e10cSrcweir #define IMPL_RTL_EMPTYSTRING        aImplEmpty_rtl_uString
71*cdf0e10cSrcweir #define IMPL_RTL_INTERN
72*cdf0e10cSrcweir static void internRelease (rtl_uString *pThis);
73*cdf0e10cSrcweir 
74*cdf0e10cSrcweir /* ======================================================================= */
75*cdf0e10cSrcweir 
76*cdf0e10cSrcweir /* Include String/UString template code */
77*cdf0e10cSrcweir 
78*cdf0e10cSrcweir #include "strtmpl.c"
79*cdf0e10cSrcweir 
80*cdf0e10cSrcweir sal_Int32 rtl_ustr_indexOfAscii_WithLength(
81*cdf0e10cSrcweir     sal_Unicode const * str, sal_Int32 len,
82*cdf0e10cSrcweir     char const * subStr, sal_Int32 subLen)
83*cdf0e10cSrcweir {
84*cdf0e10cSrcweir     if (subLen > 0 && subLen <= len) {
85*cdf0e10cSrcweir         sal_Int32 i;
86*cdf0e10cSrcweir         for (i = 0; i <= len - subLen; ++i) {
87*cdf0e10cSrcweir             if (rtl_ustr_asciil_reverseEquals_WithLength(
88*cdf0e10cSrcweir                     str + i, subStr, subLen))
89*cdf0e10cSrcweir             {
90*cdf0e10cSrcweir                 return i;
91*cdf0e10cSrcweir             }
92*cdf0e10cSrcweir         }
93*cdf0e10cSrcweir     }
94*cdf0e10cSrcweir     return -1;
95*cdf0e10cSrcweir }
96*cdf0e10cSrcweir 
97*cdf0e10cSrcweir sal_Int32 rtl_ustr_lastIndexOfAscii_WithLength(
98*cdf0e10cSrcweir     sal_Unicode const * str, sal_Int32 len,
99*cdf0e10cSrcweir     char const * subStr, sal_Int32 subLen)
100*cdf0e10cSrcweir {
101*cdf0e10cSrcweir     if (subLen > 0 && subLen <= len) {
102*cdf0e10cSrcweir         sal_Int32 i;
103*cdf0e10cSrcweir         for (i = len - subLen; i >= 0; --i) {
104*cdf0e10cSrcweir             if (rtl_ustr_asciil_reverseEquals_WithLength(
105*cdf0e10cSrcweir                     str + i, subStr, subLen))
106*cdf0e10cSrcweir             {
107*cdf0e10cSrcweir                 return i;
108*cdf0e10cSrcweir             }
109*cdf0e10cSrcweir         }
110*cdf0e10cSrcweir     }
111*cdf0e10cSrcweir     return -1;
112*cdf0e10cSrcweir }
113*cdf0e10cSrcweir 
114*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_valueOfFloat(sal_Unicode * pStr, float f)
115*cdf0e10cSrcweir {
116*cdf0e10cSrcweir     rtl_uString * pResult = NULL;
117*cdf0e10cSrcweir     sal_Int32 nLen;
118*cdf0e10cSrcweir     rtl_math_doubleToUString(
119*cdf0e10cSrcweir         &pResult, 0, 0, f, rtl_math_StringFormat_G,
120*cdf0e10cSrcweir         RTL_USTR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
121*cdf0e10cSrcweir         0, sal_True);
122*cdf0e10cSrcweir     nLen = pResult->length;
123*cdf0e10cSrcweir     OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFFLOAT);
124*cdf0e10cSrcweir     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
125*cdf0e10cSrcweir     rtl_uString_release(pResult);
126*cdf0e10cSrcweir     return nLen;
127*cdf0e10cSrcweir }
128*cdf0e10cSrcweir 
129*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_valueOfDouble(sal_Unicode * pStr, double d)
130*cdf0e10cSrcweir {
131*cdf0e10cSrcweir     rtl_uString * pResult = NULL;
132*cdf0e10cSrcweir     sal_Int32 nLen;
133*cdf0e10cSrcweir     rtl_math_doubleToUString(
134*cdf0e10cSrcweir         &pResult, 0, 0, d, rtl_math_StringFormat_G,
135*cdf0e10cSrcweir         RTL_USTR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
136*cdf0e10cSrcweir         0, sal_True);
137*cdf0e10cSrcweir     nLen = pResult->length;
138*cdf0e10cSrcweir     OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFDOUBLE);
139*cdf0e10cSrcweir     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
140*cdf0e10cSrcweir     rtl_uString_release(pResult);
141*cdf0e10cSrcweir     return nLen;
142*cdf0e10cSrcweir }
143*cdf0e10cSrcweir 
144*cdf0e10cSrcweir float SAL_CALL rtl_ustr_toFloat(sal_Unicode const * pStr)
145*cdf0e10cSrcweir {
146*cdf0e10cSrcweir     return (float) rtl_math_uStringToDouble(pStr,
147*cdf0e10cSrcweir                                             pStr + rtl_ustr_getLength(pStr),
148*cdf0e10cSrcweir                                             '.', 0, 0, 0);
149*cdf0e10cSrcweir }
150*cdf0e10cSrcweir 
151*cdf0e10cSrcweir double SAL_CALL rtl_ustr_toDouble(sal_Unicode const * pStr)
152*cdf0e10cSrcweir {
153*cdf0e10cSrcweir     return rtl_math_uStringToDouble(pStr, pStr + rtl_ustr_getLength(pStr), '.',
154*cdf0e10cSrcweir                                     0, 0, 0);
155*cdf0e10cSrcweir }
156*cdf0e10cSrcweir 
157*cdf0e10cSrcweir /* ======================================================================= */
158*cdf0e10cSrcweir 
159*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_compare( const sal_Unicode* pStr1,
160*cdf0e10cSrcweir                                            const sal_Char* pStr2 )
161*cdf0e10cSrcweir {
162*cdf0e10cSrcweir     sal_Int32 nRet;
163*cdf0e10cSrcweir     while ( ((nRet = ((sal_Int32)(*pStr1))-
164*cdf0e10cSrcweir                      ((sal_Int32)((unsigned char)(*pStr2)))) == 0) &&
165*cdf0e10cSrcweir             *pStr2 )
166*cdf0e10cSrcweir     {
167*cdf0e10cSrcweir         pStr1++;
168*cdf0e10cSrcweir         pStr2++;
169*cdf0e10cSrcweir     }
170*cdf0e10cSrcweir 
171*cdf0e10cSrcweir     return nRet;
172*cdf0e10cSrcweir }
173*cdf0e10cSrcweir 
174*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
175*cdf0e10cSrcweir 
176*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_compare_WithLength( const sal_Unicode* pStr1,
177*cdf0e10cSrcweir                                                       sal_Int32 nStr1Len,
178*cdf0e10cSrcweir                                                       const sal_Char* pStr2 )
179*cdf0e10cSrcweir {
180*cdf0e10cSrcweir 	sal_Int32 nRet = 0;
181*cdf0e10cSrcweir     while( ((nRet = (nStr1Len ? (sal_Int32)(*pStr1) : 0)-
182*cdf0e10cSrcweir                     ((sal_Int32)((unsigned char)(*pStr2)))) == 0) &&
183*cdf0e10cSrcweir            nStr1Len && *pStr2 )
184*cdf0e10cSrcweir     {
185*cdf0e10cSrcweir         pStr1++;
186*cdf0e10cSrcweir         pStr2++;
187*cdf0e10cSrcweir         nStr1Len--;
188*cdf0e10cSrcweir     }
189*cdf0e10cSrcweir 
190*cdf0e10cSrcweir     return nRet;
191*cdf0e10cSrcweir }
192*cdf0e10cSrcweir 
193*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
194*cdf0e10cSrcweir 
195*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompare_WithLength( const sal_Unicode* pStr1,
196*cdf0e10cSrcweir                                                                sal_Int32 nStr1Len,
197*cdf0e10cSrcweir                                                                const sal_Char* pStr2,
198*cdf0e10cSrcweir                                                                sal_Int32 nShortenedLength )
199*cdf0e10cSrcweir {
200*cdf0e10cSrcweir     const sal_Unicode*  pStr1End = pStr1 + nStr1Len;
201*cdf0e10cSrcweir     sal_Int32           nRet;
202*cdf0e10cSrcweir     while ( (nShortenedLength > 0) &&
203*cdf0e10cSrcweir             (pStr1 < pStr1End) && *pStr2 )
204*cdf0e10cSrcweir     {
205*cdf0e10cSrcweir         /* Check ASCII range */
206*cdf0e10cSrcweir         OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127");
207*cdf0e10cSrcweir 
208*cdf0e10cSrcweir         nRet = ((sal_Int32)*pStr1)-
209*cdf0e10cSrcweir                ((sal_Int32)(unsigned char)*pStr2);
210*cdf0e10cSrcweir         if ( nRet != 0 )
211*cdf0e10cSrcweir             return nRet;
212*cdf0e10cSrcweir 
213*cdf0e10cSrcweir         nShortenedLength--;
214*cdf0e10cSrcweir         pStr1++;
215*cdf0e10cSrcweir         pStr2++;
216*cdf0e10cSrcweir     }
217*cdf0e10cSrcweir 
218*cdf0e10cSrcweir     if ( nShortenedLength <= 0 )
219*cdf0e10cSrcweir         return 0;
220*cdf0e10cSrcweir 
221*cdf0e10cSrcweir     if ( *pStr2 )
222*cdf0e10cSrcweir     {
223*cdf0e10cSrcweir         OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
224*cdf0e10cSrcweir         // first is a substring of the second string => less (negative value)
225*cdf0e10cSrcweir         nRet = -1;
226*cdf0e10cSrcweir     }
227*cdf0e10cSrcweir     else
228*cdf0e10cSrcweir     {
229*cdf0e10cSrcweir         // greater or equal
230*cdf0e10cSrcweir         nRet = pStr1End - pStr1;
231*cdf0e10cSrcweir     }
232*cdf0e10cSrcweir 
233*cdf0e10cSrcweir     return nRet;
234*cdf0e10cSrcweir }
235*cdf0e10cSrcweir 
236*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
237*cdf0e10cSrcweir 
238*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_asciil_reverseCompare_WithLength( const sal_Unicode* pStr1,
239*cdf0e10cSrcweir                                                               sal_Int32 nStr1Len,
240*cdf0e10cSrcweir                                                               const sal_Char* pStr2,
241*cdf0e10cSrcweir                                                               sal_Int32 nStr2Len )
242*cdf0e10cSrcweir {
243*cdf0e10cSrcweir     const sal_Unicode*  pStr1Run = pStr1+nStr1Len;
244*cdf0e10cSrcweir     const sal_Char*     pStr2Run = pStr2+nStr2Len;
245*cdf0e10cSrcweir     sal_Int32           nRet;
246*cdf0e10cSrcweir     while ( (pStr1 < pStr1Run) && (pStr2 < pStr2Run) )
247*cdf0e10cSrcweir     {
248*cdf0e10cSrcweir         pStr1Run--;
249*cdf0e10cSrcweir         pStr2Run--;
250*cdf0e10cSrcweir         nRet = ((sal_Int32)*pStr1Run)-((sal_Int32)*pStr2Run);
251*cdf0e10cSrcweir         if ( nRet )
252*cdf0e10cSrcweir             return nRet;
253*cdf0e10cSrcweir     }
254*cdf0e10cSrcweir 
255*cdf0e10cSrcweir     return nStr1Len - nStr2Len;
256*cdf0e10cSrcweir }
257*cdf0e10cSrcweir 
258*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
259*cdf0e10cSrcweir 
260*cdf0e10cSrcweir sal_Bool SAL_CALL rtl_ustr_asciil_reverseEquals_WithLength( const sal_Unicode* pStr1,
261*cdf0e10cSrcweir                                                               const sal_Char* pStr2,
262*cdf0e10cSrcweir                                                               sal_Int32 nStrLen )
263*cdf0e10cSrcweir {
264*cdf0e10cSrcweir     const sal_Unicode*  pStr1Run = pStr1+nStrLen;
265*cdf0e10cSrcweir     const sal_Char*     pStr2Run = pStr2+nStrLen;
266*cdf0e10cSrcweir     while ( pStr1 < pStr1Run )
267*cdf0e10cSrcweir     {
268*cdf0e10cSrcweir         pStr1Run--;
269*cdf0e10cSrcweir         pStr2Run--;
270*cdf0e10cSrcweir 		if( *pStr1Run != (sal_Unicode)*pStr2Run )
271*cdf0e10cSrcweir 			return sal_False;
272*cdf0e10cSrcweir     }
273*cdf0e10cSrcweir 
274*cdf0e10cSrcweir     return sal_True;
275*cdf0e10cSrcweir }
276*cdf0e10cSrcweir 
277*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
278*cdf0e10cSrcweir 
279*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase( const sal_Unicode* pStr1,
280*cdf0e10cSrcweir                                                           const sal_Char* pStr2 )
281*cdf0e10cSrcweir {
282*cdf0e10cSrcweir     sal_Int32   nRet;
283*cdf0e10cSrcweir     sal_Int32   c1;
284*cdf0e10cSrcweir     sal_Int32   c2;
285*cdf0e10cSrcweir     do
286*cdf0e10cSrcweir     {
287*cdf0e10cSrcweir         /* If character between 'A' and 'Z', than convert it to lowercase */
288*cdf0e10cSrcweir         c1 = (sal_Int32)*pStr1;
289*cdf0e10cSrcweir         c2 = (sal_Int32)((unsigned char)*pStr2);
290*cdf0e10cSrcweir         if ( (c1 >= 65) && (c1 <= 90) )
291*cdf0e10cSrcweir             c1 += 32;
292*cdf0e10cSrcweir         if ( (c2 >= 65) && (c2 <= 90) )
293*cdf0e10cSrcweir             c2 += 32;
294*cdf0e10cSrcweir         nRet = c1-c2;
295*cdf0e10cSrcweir         if ( nRet != 0 )
296*cdf0e10cSrcweir             return nRet;
297*cdf0e10cSrcweir 
298*cdf0e10cSrcweir         pStr1++;
299*cdf0e10cSrcweir         pStr2++;
300*cdf0e10cSrcweir     }
301*cdf0e10cSrcweir     while ( c2 );
302*cdf0e10cSrcweir 
303*cdf0e10cSrcweir     return 0;
304*cdf0e10cSrcweir }
305*cdf0e10cSrcweir 
306*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
307*cdf0e10cSrcweir 
308*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
309*cdf0e10cSrcweir                                                                      sal_Int32 nStr1Len,
310*cdf0e10cSrcweir                                                                      const sal_Char* pStr2 )
311*cdf0e10cSrcweir {
312*cdf0e10cSrcweir     sal_Int32   nRet;
313*cdf0e10cSrcweir     sal_Int32   c1;
314*cdf0e10cSrcweir     sal_Int32   c2;
315*cdf0e10cSrcweir     do
316*cdf0e10cSrcweir     {
317*cdf0e10cSrcweir         if ( !nStr1Len )
318*cdf0e10cSrcweir             return *pStr2 == '\0' ? 0 : -1;
319*cdf0e10cSrcweir 
320*cdf0e10cSrcweir         /* If character between 'A' and 'Z', than convert it to lowercase */
321*cdf0e10cSrcweir         c1 = (sal_Int32)*pStr1;
322*cdf0e10cSrcweir         c2 = (sal_Int32)((unsigned char)*pStr2);
323*cdf0e10cSrcweir         if ( (c1 >= 65) && (c1 <= 90) )
324*cdf0e10cSrcweir             c1 += 32;
325*cdf0e10cSrcweir         if ( (c2 >= 65) && (c2 <= 90) )
326*cdf0e10cSrcweir             c2 += 32;
327*cdf0e10cSrcweir         nRet = c1-c2;
328*cdf0e10cSrcweir         if ( nRet != 0 )
329*cdf0e10cSrcweir             return nRet;
330*cdf0e10cSrcweir 
331*cdf0e10cSrcweir         pStr1++;
332*cdf0e10cSrcweir         pStr2++;
333*cdf0e10cSrcweir         nStr1Len--;
334*cdf0e10cSrcweir     }
335*cdf0e10cSrcweir     while( c2 );
336*cdf0e10cSrcweir 
337*cdf0e10cSrcweir     return 0;
338*cdf0e10cSrcweir }
339*cdf0e10cSrcweir 
340*cdf0e10cSrcweir sal_Int32 rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths(
341*cdf0e10cSrcweir     sal_Unicode const * first, sal_Int32 firstLen,
342*cdf0e10cSrcweir     char const * second, sal_Int32 secondLen)
343*cdf0e10cSrcweir {
344*cdf0e10cSrcweir     sal_Int32 i;
345*cdf0e10cSrcweir     sal_Int32 len = firstLen < secondLen ? firstLen : secondLen;
346*cdf0e10cSrcweir     for (i = 0; i < len; ++i) {
347*cdf0e10cSrcweir         sal_Int32 c1 = *first++;
348*cdf0e10cSrcweir         sal_Int32 c2 = (unsigned char) *second++;
349*cdf0e10cSrcweir         sal_Int32 d;
350*cdf0e10cSrcweir         if (c1 >= 65 && c1 <= 90) {
351*cdf0e10cSrcweir             c1 += 32;
352*cdf0e10cSrcweir         }
353*cdf0e10cSrcweir         if (c2 >= 65 && c2 <= 90) {
354*cdf0e10cSrcweir             c2 += 32;
355*cdf0e10cSrcweir         }
356*cdf0e10cSrcweir         d = c1 - c2;
357*cdf0e10cSrcweir         if (d != 0) {
358*cdf0e10cSrcweir             return d;
359*cdf0e10cSrcweir         }
360*cdf0e10cSrcweir     }
361*cdf0e10cSrcweir     return firstLen - secondLen;
362*cdf0e10cSrcweir }
363*cdf0e10cSrcweir 
364*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
365*cdf0e10cSrcweir 
366*cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
367*cdf0e10cSrcweir                                                                               sal_Int32 nStr1Len,
368*cdf0e10cSrcweir                                                                               const sal_Char* pStr2,
369*cdf0e10cSrcweir                                                                               sal_Int32 nShortenedLength )
370*cdf0e10cSrcweir {
371*cdf0e10cSrcweir     const sal_Unicode*  pStr1End = pStr1 + nStr1Len;
372*cdf0e10cSrcweir     sal_Int32           nRet;
373*cdf0e10cSrcweir     sal_Int32           c1;
374*cdf0e10cSrcweir     sal_Int32           c2;
375*cdf0e10cSrcweir     while ( (nShortenedLength > 0) &&
376*cdf0e10cSrcweir             (pStr1 < pStr1End) && *pStr2 )
377*cdf0e10cSrcweir     {
378*cdf0e10cSrcweir         /* Check ASCII range */
379*cdf0e10cSrcweir         OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127");
380*cdf0e10cSrcweir 
381*cdf0e10cSrcweir         /* If character between 'A' and 'Z', than convert it to lowercase */
382*cdf0e10cSrcweir         c1 = (sal_Int32)*pStr1;
383*cdf0e10cSrcweir         c2 = (sal_Int32)((unsigned char)*pStr2);
384*cdf0e10cSrcweir         if ( (c1 >= 65) && (c1 <= 90) )
385*cdf0e10cSrcweir             c1 += 32;
386*cdf0e10cSrcweir         if ( (c2 >= 65) && (c2 <= 90) )
387*cdf0e10cSrcweir             c2 += 32;
388*cdf0e10cSrcweir         nRet = c1-c2;
389*cdf0e10cSrcweir         if ( nRet != 0 )
390*cdf0e10cSrcweir             return nRet;
391*cdf0e10cSrcweir 
392*cdf0e10cSrcweir         nShortenedLength--;
393*cdf0e10cSrcweir         pStr1++;
394*cdf0e10cSrcweir         pStr2++;
395*cdf0e10cSrcweir     }
396*cdf0e10cSrcweir 
397*cdf0e10cSrcweir     if ( nShortenedLength <= 0 )
398*cdf0e10cSrcweir         return 0;
399*cdf0e10cSrcweir 
400*cdf0e10cSrcweir     if ( *pStr2 )
401*cdf0e10cSrcweir     {
402*cdf0e10cSrcweir         OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
403*cdf0e10cSrcweir         // first is a substring of the second string => less (negative value)
404*cdf0e10cSrcweir         nRet = -1;
405*cdf0e10cSrcweir     }
406*cdf0e10cSrcweir     else
407*cdf0e10cSrcweir     {
408*cdf0e10cSrcweir         // greater or equal
409*cdf0e10cSrcweir         nRet = pStr1End - pStr1;
410*cdf0e10cSrcweir     }
411*cdf0e10cSrcweir 
412*cdf0e10cSrcweir     return nRet;
413*cdf0e10cSrcweir }
414*cdf0e10cSrcweir 
415*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
416*cdf0e10cSrcweir 
417*cdf0e10cSrcweir void SAL_CALL rtl_uString_newFromAscii( rtl_uString** ppThis,
418*cdf0e10cSrcweir                                         const sal_Char* pCharStr )
419*cdf0e10cSrcweir {
420*cdf0e10cSrcweir     sal_Int32 nLen;
421*cdf0e10cSrcweir 
422*cdf0e10cSrcweir     if ( pCharStr )
423*cdf0e10cSrcweir     {
424*cdf0e10cSrcweir         const sal_Char* pTempStr = pCharStr;
425*cdf0e10cSrcweir         while( *pTempStr )
426*cdf0e10cSrcweir             pTempStr++;
427*cdf0e10cSrcweir         nLen = pTempStr-pCharStr;
428*cdf0e10cSrcweir     }
429*cdf0e10cSrcweir     else
430*cdf0e10cSrcweir         nLen = 0;
431*cdf0e10cSrcweir 
432*cdf0e10cSrcweir     if ( !nLen )
433*cdf0e10cSrcweir     {
434*cdf0e10cSrcweir         IMPL_RTL_STRINGNAME( new )( ppThis );
435*cdf0e10cSrcweir         return;
436*cdf0e10cSrcweir     }
437*cdf0e10cSrcweir 
438*cdf0e10cSrcweir     if ( *ppThis )
439*cdf0e10cSrcweir         IMPL_RTL_STRINGNAME( release )( *ppThis );
440*cdf0e10cSrcweir 
441*cdf0e10cSrcweir     *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
442*cdf0e10cSrcweir     OSL_ASSERT(*ppThis != NULL);
443*cdf0e10cSrcweir     if ( (*ppThis) )
444*cdf0e10cSrcweir     {
445*cdf0e10cSrcweir         IMPL_RTL_STRCODE* pBuffer = (*ppThis)->buffer;
446*cdf0e10cSrcweir         do
447*cdf0e10cSrcweir         {
448*cdf0e10cSrcweir             /* Check ASCII range */
449*cdf0e10cSrcweir             OSL_ENSURE( ((unsigned char)*pCharStr) <= 127,
450*cdf0e10cSrcweir                         "rtl_uString_newFromAscii() - Found ASCII char > 127" );
451*cdf0e10cSrcweir 
452*cdf0e10cSrcweir             *pBuffer = *pCharStr;
453*cdf0e10cSrcweir             pBuffer++;
454*cdf0e10cSrcweir             pCharStr++;
455*cdf0e10cSrcweir         }
456*cdf0e10cSrcweir         while ( *pCharStr );
457*cdf0e10cSrcweir     }
458*cdf0e10cSrcweir }
459*cdf0e10cSrcweir 
460*cdf0e10cSrcweir void SAL_CALL rtl_uString_newFromCodePoints(
461*cdf0e10cSrcweir     rtl_uString ** newString, sal_uInt32 const * codePoints,
462*cdf0e10cSrcweir     sal_Int32 codePointCount)
463*cdf0e10cSrcweir {
464*cdf0e10cSrcweir     sal_Int32 n;
465*cdf0e10cSrcweir     sal_Int32 i;
466*cdf0e10cSrcweir     sal_Unicode * p;
467*cdf0e10cSrcweir     OSL_ASSERT(
468*cdf0e10cSrcweir         newString != NULL &&
469*cdf0e10cSrcweir         (codePoints != NULL || codePointCount == 0) &&
470*cdf0e10cSrcweir         codePointCount >= 0);
471*cdf0e10cSrcweir     if (codePointCount == 0) {
472*cdf0e10cSrcweir         rtl_uString_new(newString);
473*cdf0e10cSrcweir         return;
474*cdf0e10cSrcweir     }
475*cdf0e10cSrcweir     if (*newString != NULL) {
476*cdf0e10cSrcweir         rtl_uString_release(*newString);
477*cdf0e10cSrcweir     }
478*cdf0e10cSrcweir     n = codePointCount;
479*cdf0e10cSrcweir     for (i = 0; i < codePointCount; ++i) {
480*cdf0e10cSrcweir         OSL_ASSERT(codePoints[i] <= 0x10FFFF);
481*cdf0e10cSrcweir         if (codePoints[i] >= 0x10000) {
482*cdf0e10cSrcweir             ++n;
483*cdf0e10cSrcweir         }
484*cdf0e10cSrcweir     }
485*cdf0e10cSrcweir     /* Builds on the assumption that sal_Int32 uses 32 bit two's complement
486*cdf0e10cSrcweir        representation with wrap around (the necessary number of UTF-16 code
487*cdf0e10cSrcweir        units will be no larger than 2 * SAL_MAX_INT32, represented as
488*cdf0e10cSrcweir        sal_Int32 -2): */
489*cdf0e10cSrcweir     if (n < 0) {
490*cdf0e10cSrcweir         *newString = NULL;
491*cdf0e10cSrcweir         return;
492*cdf0e10cSrcweir     }
493*cdf0e10cSrcweir     *newString = rtl_uString_ImplAlloc(n);
494*cdf0e10cSrcweir     if (*newString == NULL) {
495*cdf0e10cSrcweir         return;
496*cdf0e10cSrcweir     }
497*cdf0e10cSrcweir     p = (*newString)->buffer;
498*cdf0e10cSrcweir     for (i = 0; i < codePointCount; ++i) {
499*cdf0e10cSrcweir         sal_uInt32 c = codePoints[i];
500*cdf0e10cSrcweir         if (c < 0x10000) {
501*cdf0e10cSrcweir             *p++ = (sal_Unicode) c;
502*cdf0e10cSrcweir         } else {
503*cdf0e10cSrcweir             c -= 0x10000;
504*cdf0e10cSrcweir             *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE);
505*cdf0e10cSrcweir             *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE);
506*cdf0e10cSrcweir         }
507*cdf0e10cSrcweir     }
508*cdf0e10cSrcweir }
509*cdf0e10cSrcweir 
510*cdf0e10cSrcweir /* ======================================================================= */
511*cdf0e10cSrcweir 
512*cdf0e10cSrcweir static int rtl_ImplGetFastUTF8UnicodeLen( const sal_Char* pStr, sal_Int32 nLen )
513*cdf0e10cSrcweir {
514*cdf0e10cSrcweir     int             n;
515*cdf0e10cSrcweir     sal_uChar       c;
516*cdf0e10cSrcweir     const sal_Char* pEndStr;
517*cdf0e10cSrcweir 
518*cdf0e10cSrcweir     n = 0;
519*cdf0e10cSrcweir     pEndStr  = pStr+nLen;
520*cdf0e10cSrcweir     while ( pStr < pEndStr )
521*cdf0e10cSrcweir     {
522*cdf0e10cSrcweir         c = (sal_uChar)*pStr;
523*cdf0e10cSrcweir 
524*cdf0e10cSrcweir         if ( !(c & 0x80) )
525*cdf0e10cSrcweir             pStr++;
526*cdf0e10cSrcweir         else if ( (c & 0xE0) == 0xC0 )
527*cdf0e10cSrcweir             pStr += 2;
528*cdf0e10cSrcweir         else if ( (c & 0xF0) == 0xE0 )
529*cdf0e10cSrcweir             pStr += 3;
530*cdf0e10cSrcweir         else if ( (c & 0xF8) == 0xF0 )
531*cdf0e10cSrcweir             pStr += 4;
532*cdf0e10cSrcweir         else if ( (c & 0xFC) == 0xF8 )
533*cdf0e10cSrcweir             pStr += 5;
534*cdf0e10cSrcweir         else if ( (c & 0xFE) == 0xFC )
535*cdf0e10cSrcweir             pStr += 6;
536*cdf0e10cSrcweir         else
537*cdf0e10cSrcweir             pStr++;
538*cdf0e10cSrcweir 
539*cdf0e10cSrcweir         n++;
540*cdf0e10cSrcweir     }
541*cdf0e10cSrcweir 
542*cdf0e10cSrcweir     return n;
543*cdf0e10cSrcweir }
544*cdf0e10cSrcweir 
545*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
546*cdf0e10cSrcweir 
547*cdf0e10cSrcweir static void rtl_string2UString_status( rtl_uString** ppThis,
548*cdf0e10cSrcweir                                        const sal_Char* pStr,
549*cdf0e10cSrcweir                                        sal_Int32 nLen,
550*cdf0e10cSrcweir                                        rtl_TextEncoding eTextEncoding,
551*cdf0e10cSrcweir                                        sal_uInt32 nCvtFlags,
552*cdf0e10cSrcweir                                        sal_uInt32 *pInfo )
553*cdf0e10cSrcweir {
554*cdf0e10cSrcweir     OSL_ENSURE(rtl_isOctetTextEncoding(eTextEncoding),
555*cdf0e10cSrcweir                "rtl_string2UString_status() - Wrong TextEncoding" );
556*cdf0e10cSrcweir 
557*cdf0e10cSrcweir     if ( !nLen )
558*cdf0e10cSrcweir     {
559*cdf0e10cSrcweir         rtl_uString_new( ppThis );
560*cdf0e10cSrcweir         if (pInfo != NULL) {
561*cdf0e10cSrcweir             *pInfo = 0;
562*cdf0e10cSrcweir         }
563*cdf0e10cSrcweir     }
564*cdf0e10cSrcweir     else
565*cdf0e10cSrcweir     {
566*cdf0e10cSrcweir         if ( *ppThis )
567*cdf0e10cSrcweir             IMPL_RTL_STRINGNAME( release )( *ppThis );
568*cdf0e10cSrcweir 
569*cdf0e10cSrcweir         /* Optimization for US-ASCII */
570*cdf0e10cSrcweir         if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
571*cdf0e10cSrcweir         {
572*cdf0e10cSrcweir             IMPL_RTL_STRCODE* pBuffer;
573*cdf0e10cSrcweir             *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
574*cdf0e10cSrcweir             if (*ppThis == NULL) {
575*cdf0e10cSrcweir                 if (pInfo != NULL) {
576*cdf0e10cSrcweir                     *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
577*cdf0e10cSrcweir                         RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
578*cdf0e10cSrcweir                 }
579*cdf0e10cSrcweir                 return;
580*cdf0e10cSrcweir             }
581*cdf0e10cSrcweir             pBuffer = (*ppThis)->buffer;
582*cdf0e10cSrcweir             do
583*cdf0e10cSrcweir             {
584*cdf0e10cSrcweir                 /* Check ASCII range */
585*cdf0e10cSrcweir                 OSL_ENSURE( ((unsigned char)*pStr) <= 127,
586*cdf0e10cSrcweir                             "rtl_string2UString_status() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
587*cdf0e10cSrcweir 
588*cdf0e10cSrcweir                 *pBuffer = *pStr;
589*cdf0e10cSrcweir                 pBuffer++;
590*cdf0e10cSrcweir                 pStr++;
591*cdf0e10cSrcweir                 nLen--;
592*cdf0e10cSrcweir             }
593*cdf0e10cSrcweir             while ( nLen );
594*cdf0e10cSrcweir             if (pInfo != NULL) {
595*cdf0e10cSrcweir                 *pInfo = 0;
596*cdf0e10cSrcweir             }
597*cdf0e10cSrcweir         }
598*cdf0e10cSrcweir         else
599*cdf0e10cSrcweir         {
600*cdf0e10cSrcweir             rtl_uString*                pTemp;
601*cdf0e10cSrcweir             rtl_uString*                pTemp2 = NULL;
602*cdf0e10cSrcweir             rtl_TextToUnicodeConverter  hConverter;
603*cdf0e10cSrcweir             sal_uInt32                  nInfo;
604*cdf0e10cSrcweir             sal_Size                    nSrcBytes;
605*cdf0e10cSrcweir             sal_Size                    nDestChars;
606*cdf0e10cSrcweir             sal_Size                    nNewLen;
607*cdf0e10cSrcweir 
608*cdf0e10cSrcweir             /* Optimization for UTF-8 - we try to calculate the exact length */
609*cdf0e10cSrcweir             /* For all other encoding we try the maximum - and reallocate
610*cdf0e10cSrcweir                the buffer if needed */
611*cdf0e10cSrcweir             if ( eTextEncoding == RTL_TEXTENCODING_UTF8 )
612*cdf0e10cSrcweir             {
613*cdf0e10cSrcweir                 nNewLen = rtl_ImplGetFastUTF8UnicodeLen( pStr, nLen );
614*cdf0e10cSrcweir                 /* Includes the string only ASCII, then we could copy
615*cdf0e10cSrcweir                    the buffer faster */
616*cdf0e10cSrcweir                 if ( nNewLen == (sal_Size)nLen )
617*cdf0e10cSrcweir                 {
618*cdf0e10cSrcweir                     IMPL_RTL_STRCODE* pBuffer;
619*cdf0e10cSrcweir                     *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
620*cdf0e10cSrcweir                     if (*ppThis == NULL)
621*cdf0e10cSrcweir                     {
622*cdf0e10cSrcweir                         if (pInfo != NULL) {
623*cdf0e10cSrcweir                             *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
624*cdf0e10cSrcweir                                 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
625*cdf0e10cSrcweir                         }
626*cdf0e10cSrcweir                         return;
627*cdf0e10cSrcweir                     }
628*cdf0e10cSrcweir                     pBuffer = (*ppThis)->buffer;
629*cdf0e10cSrcweir                     do
630*cdf0e10cSrcweir                     {
631*cdf0e10cSrcweir                         /* Check ASCII range */
632*cdf0e10cSrcweir                         OSL_ENSURE( ((unsigned char)*pStr) <= 127,
633*cdf0e10cSrcweir                                     "rtl_string2UString_status() - UTF8 test encoding is wrong" );
634*cdf0e10cSrcweir 
635*cdf0e10cSrcweir                         *pBuffer = *pStr;
636*cdf0e10cSrcweir                         pBuffer++;
637*cdf0e10cSrcweir                         pStr++;
638*cdf0e10cSrcweir                         nLen--;
639*cdf0e10cSrcweir                     }
640*cdf0e10cSrcweir                     while ( nLen );
641*cdf0e10cSrcweir                     if (pInfo != NULL) {
642*cdf0e10cSrcweir                         *pInfo = 0;
643*cdf0e10cSrcweir                     }
644*cdf0e10cSrcweir                     return;
645*cdf0e10cSrcweir                 }
646*cdf0e10cSrcweir             }
647*cdf0e10cSrcweir             else
648*cdf0e10cSrcweir                 nNewLen = nLen;
649*cdf0e10cSrcweir 
650*cdf0e10cSrcweir             nCvtFlags |= RTL_TEXTTOUNICODE_FLAGS_FLUSH;
651*cdf0e10cSrcweir             hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
652*cdf0e10cSrcweir 
653*cdf0e10cSrcweir             pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
654*cdf0e10cSrcweir             if (pTemp == NULL) {
655*cdf0e10cSrcweir                 if (pInfo != NULL) {
656*cdf0e10cSrcweir                     *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
657*cdf0e10cSrcweir                         RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
658*cdf0e10cSrcweir                 }
659*cdf0e10cSrcweir                 return;
660*cdf0e10cSrcweir             }
661*cdf0e10cSrcweir             nDestChars = rtl_convertTextToUnicode( hConverter, 0,
662*cdf0e10cSrcweir                                                    pStr, nLen,
663*cdf0e10cSrcweir                                                    pTemp->buffer, nNewLen,
664*cdf0e10cSrcweir                                                    nCvtFlags,
665*cdf0e10cSrcweir                                                    &nInfo, &nSrcBytes );
666*cdf0e10cSrcweir 
667*cdf0e10cSrcweir             /* Buffer not big enough, try again with enough space */
668*cdf0e10cSrcweir             /* Shouldn't be the case, but if we get textencoding which
669*cdf0e10cSrcweir                could results in more unicode characters we have this
670*cdf0e10cSrcweir                code here. Could be the case for apple encodings */
671*cdf0e10cSrcweir             while ( nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL )
672*cdf0e10cSrcweir             {
673*cdf0e10cSrcweir                 rtl_freeMemory( pTemp );
674*cdf0e10cSrcweir                 nNewLen += 8;
675*cdf0e10cSrcweir                 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
676*cdf0e10cSrcweir                 if (pTemp == NULL) {
677*cdf0e10cSrcweir                     if (pInfo != NULL) {
678*cdf0e10cSrcweir                         *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
679*cdf0e10cSrcweir                             RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
680*cdf0e10cSrcweir                     }
681*cdf0e10cSrcweir                     return;
682*cdf0e10cSrcweir                 }
683*cdf0e10cSrcweir                 nDestChars = rtl_convertTextToUnicode( hConverter, 0,
684*cdf0e10cSrcweir                                                        pStr, nLen,
685*cdf0e10cSrcweir                                                        pTemp->buffer, nNewLen,
686*cdf0e10cSrcweir                                                        nCvtFlags,
687*cdf0e10cSrcweir                                                        &nInfo, &nSrcBytes );
688*cdf0e10cSrcweir             }
689*cdf0e10cSrcweir 
690*cdf0e10cSrcweir             if (pInfo)
691*cdf0e10cSrcweir                 *pInfo = nInfo;
692*cdf0e10cSrcweir 
693*cdf0e10cSrcweir             /* Set the buffer to the correct size or if there is too
694*cdf0e10cSrcweir                much overhead, reallocate to the correct size */
695*cdf0e10cSrcweir             if ( nNewLen > nDestChars+8 )
696*cdf0e10cSrcweir             {
697*cdf0e10cSrcweir                 pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestChars );
698*cdf0e10cSrcweir             }
699*cdf0e10cSrcweir             if (pTemp2 != NULL)
700*cdf0e10cSrcweir             {
701*cdf0e10cSrcweir                 rtl_str_ImplCopy(pTemp2->buffer, pTemp->buffer, nDestChars);
702*cdf0e10cSrcweir                 rtl_freeMemory(pTemp);
703*cdf0e10cSrcweir                 pTemp = pTemp2;
704*cdf0e10cSrcweir             }
705*cdf0e10cSrcweir             else
706*cdf0e10cSrcweir             {
707*cdf0e10cSrcweir                 pTemp->length = nDestChars;
708*cdf0e10cSrcweir                 pTemp->buffer[nDestChars] = 0;
709*cdf0e10cSrcweir             }
710*cdf0e10cSrcweir 
711*cdf0e10cSrcweir             rtl_destroyTextToUnicodeConverter( hConverter );
712*cdf0e10cSrcweir             *ppThis = pTemp;
713*cdf0e10cSrcweir 
714*cdf0e10cSrcweir             /* Results the conversion in an empty buffer -
715*cdf0e10cSrcweir                create an empty string */
716*cdf0e10cSrcweir             if ( pTemp && !nDestChars )
717*cdf0e10cSrcweir                 rtl_uString_new( ppThis );
718*cdf0e10cSrcweir         }
719*cdf0e10cSrcweir     }
720*cdf0e10cSrcweir }
721*cdf0e10cSrcweir 
722*cdf0e10cSrcweir void SAL_CALL rtl_string2UString( rtl_uString** ppThis,
723*cdf0e10cSrcweir                                   const sal_Char* pStr,
724*cdf0e10cSrcweir                                   sal_Int32 nLen,
725*cdf0e10cSrcweir                                   rtl_TextEncoding eTextEncoding,
726*cdf0e10cSrcweir                                   sal_uInt32 nCvtFlags )
727*cdf0e10cSrcweir {
728*cdf0e10cSrcweir     rtl_string2UString_status( ppThis, pStr, nLen, eTextEncoding,
729*cdf0e10cSrcweir                                nCvtFlags, NULL );
730*cdf0e10cSrcweir }
731*cdf0e10cSrcweir 
732*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
733*cdf0e10cSrcweir 
734*cdf0e10cSrcweir typedef enum {
735*cdf0e10cSrcweir     CANNOT_RETURN,
736*cdf0e10cSrcweir     CAN_RETURN = 1
737*cdf0e10cSrcweir } StrLifecycle;
738*cdf0e10cSrcweir 
739*cdf0e10cSrcweir static oslMutex
740*cdf0e10cSrcweir getInternMutex()
741*cdf0e10cSrcweir {
742*cdf0e10cSrcweir     static oslMutex pPoolGuard = NULL;
743*cdf0e10cSrcweir     if( !pPoolGuard )
744*cdf0e10cSrcweir     {
745*cdf0e10cSrcweir         oslMutex pGlobalGuard;
746*cdf0e10cSrcweir         pGlobalGuard = *osl_getGlobalMutex();
747*cdf0e10cSrcweir         osl_acquireMutex( pGlobalGuard );
748*cdf0e10cSrcweir         if( !pPoolGuard )
749*cdf0e10cSrcweir         {
750*cdf0e10cSrcweir             oslMutex p = osl_createMutex();
751*cdf0e10cSrcweir             OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
752*cdf0e10cSrcweir             pPoolGuard = p;
753*cdf0e10cSrcweir         }
754*cdf0e10cSrcweir         osl_releaseMutex( pGlobalGuard );
755*cdf0e10cSrcweir     }
756*cdf0e10cSrcweir     else
757*cdf0e10cSrcweir     {
758*cdf0e10cSrcweir         OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
759*cdf0e10cSrcweir     }
760*cdf0e10cSrcweir 
761*cdf0e10cSrcweir     return pPoolGuard;
762*cdf0e10cSrcweir }
763*cdf0e10cSrcweir 
764*cdf0e10cSrcweir /* returns true if we found a dup in the pool */
765*cdf0e10cSrcweir static void rtl_ustring_intern_internal( rtl_uString ** newStr,
766*cdf0e10cSrcweir                                          rtl_uString  * str,
767*cdf0e10cSrcweir                                          StrLifecycle   can_return )
768*cdf0e10cSrcweir {
769*cdf0e10cSrcweir     oslMutex pPoolMutex;
770*cdf0e10cSrcweir 
771*cdf0e10cSrcweir     pPoolMutex = getInternMutex();
772*cdf0e10cSrcweir 
773*cdf0e10cSrcweir     osl_acquireMutex( pPoolMutex );
774*cdf0e10cSrcweir 
775*cdf0e10cSrcweir     *newStr = rtl_str_hash_intern (str, can_return);
776*cdf0e10cSrcweir 
777*cdf0e10cSrcweir     osl_releaseMutex( pPoolMutex );
778*cdf0e10cSrcweir 
779*cdf0e10cSrcweir     if( can_return && *newStr != str )
780*cdf0e10cSrcweir     { /* we dupped, then found a match */
781*cdf0e10cSrcweir         rtl_freeMemory( str );
782*cdf0e10cSrcweir     }
783*cdf0e10cSrcweir }
784*cdf0e10cSrcweir 
785*cdf0e10cSrcweir void SAL_CALL rtl_uString_intern( rtl_uString ** newStr,
786*cdf0e10cSrcweir                                   rtl_uString  * str)
787*cdf0e10cSrcweir {
788*cdf0e10cSrcweir     if (SAL_STRING_IS_INTERN(str))
789*cdf0e10cSrcweir     {
790*cdf0e10cSrcweir         IMPL_RTL_AQUIRE( str );
791*cdf0e10cSrcweir         *newStr = str;
792*cdf0e10cSrcweir     }
793*cdf0e10cSrcweir     else
794*cdf0e10cSrcweir     {
795*cdf0e10cSrcweir         rtl_uString *pOrg = *newStr;
796*cdf0e10cSrcweir         *newStr = NULL;
797*cdf0e10cSrcweir         rtl_ustring_intern_internal( newStr, str, CANNOT_RETURN );
798*cdf0e10cSrcweir         if (pOrg)
799*cdf0e10cSrcweir             rtl_uString_release (pOrg);
800*cdf0e10cSrcweir     }
801*cdf0e10cSrcweir }
802*cdf0e10cSrcweir 
803*cdf0e10cSrcweir void SAL_CALL rtl_uString_internConvert( rtl_uString   ** newStr,
804*cdf0e10cSrcweir                                          const sal_Char * str,
805*cdf0e10cSrcweir                                          sal_Int32        len,
806*cdf0e10cSrcweir                                          rtl_TextEncoding eTextEncoding,
807*cdf0e10cSrcweir                                          sal_uInt32       convertFlags,
808*cdf0e10cSrcweir                                          sal_uInt32     * pInfo )
809*cdf0e10cSrcweir {
810*cdf0e10cSrcweir     rtl_uString *scratch;
811*cdf0e10cSrcweir 
812*cdf0e10cSrcweir     if (*newStr)
813*cdf0e10cSrcweir     {
814*cdf0e10cSrcweir         rtl_uString_release (*newStr);
815*cdf0e10cSrcweir         *newStr = NULL;
816*cdf0e10cSrcweir     }
817*cdf0e10cSrcweir 
818*cdf0e10cSrcweir     if ( len < 256 )
819*cdf0e10cSrcweir     { // try various optimisations
820*cdf0e10cSrcweir         if ( len < 0 )
821*cdf0e10cSrcweir             len = strlen( str );
822*cdf0e10cSrcweir         if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
823*cdf0e10cSrcweir         {
824*cdf0e10cSrcweir             int i;
825*cdf0e10cSrcweir             rtl_uString *pScratch;
826*cdf0e10cSrcweir             pScratch = alloca( sizeof( rtl_uString )
827*cdf0e10cSrcweir                                + len * sizeof (IMPL_RTL_STRCODE ) );
828*cdf0e10cSrcweir             for (i = 0; i < len; i++)
829*cdf0e10cSrcweir             {
830*cdf0e10cSrcweir                 /* Check ASCII range */
831*cdf0e10cSrcweir                 OSL_ENSURE( ((unsigned char)str[i]) <= 127,
832*cdf0e10cSrcweir                             "rtl_ustring_internConvert() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
833*cdf0e10cSrcweir                 pScratch->buffer[i] = str[i];
834*cdf0e10cSrcweir             }
835*cdf0e10cSrcweir             pScratch->length = len;
836*cdf0e10cSrcweir             rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
837*cdf0e10cSrcweir             return;
838*cdf0e10cSrcweir         }
839*cdf0e10cSrcweir         /* FIXME: we want a nice UTF-8 / alloca shortcut here */
840*cdf0e10cSrcweir     }
841*cdf0e10cSrcweir 
842*cdf0e10cSrcweir     scratch = NULL;
843*cdf0e10cSrcweir     rtl_string2UString_status( &scratch, str, len, eTextEncoding, convertFlags,
844*cdf0e10cSrcweir                                pInfo );
845*cdf0e10cSrcweir     if (!scratch) {
846*cdf0e10cSrcweir         return;
847*cdf0e10cSrcweir     }
848*cdf0e10cSrcweir     rtl_ustring_intern_internal( newStr, scratch, CAN_RETURN );
849*cdf0e10cSrcweir }
850*cdf0e10cSrcweir 
851*cdf0e10cSrcweir static void
852*cdf0e10cSrcweir internRelease (rtl_uString *pThis)
853*cdf0e10cSrcweir {
854*cdf0e10cSrcweir     oslMutex pPoolMutex;
855*cdf0e10cSrcweir 
856*cdf0e10cSrcweir     rtl_uString *pFree = NULL;
857*cdf0e10cSrcweir     if ( SAL_STRING_REFCOUNT(
858*cdf0e10cSrcweir              osl_decrementInterlockedCount( &(pThis->refCount) ) ) == 0)
859*cdf0e10cSrcweir     {
860*cdf0e10cSrcweir         pPoolMutex = getInternMutex();
861*cdf0e10cSrcweir         osl_acquireMutex( pPoolMutex );
862*cdf0e10cSrcweir 
863*cdf0e10cSrcweir         rtl_str_hash_remove (pThis);
864*cdf0e10cSrcweir 
865*cdf0e10cSrcweir         /* May have been separately acquired */
866*cdf0e10cSrcweir         if ( SAL_STRING_REFCOUNT(
867*cdf0e10cSrcweir                  osl_incrementInterlockedCount( &(pThis->refCount) ) ) == 1 )
868*cdf0e10cSrcweir         {
869*cdf0e10cSrcweir             /* we got the last ref */
870*cdf0e10cSrcweir             pFree = pThis;
871*cdf0e10cSrcweir         }
872*cdf0e10cSrcweir         else /* very unusual */
873*cdf0e10cSrcweir         {
874*cdf0e10cSrcweir             internRelease (pThis);
875*cdf0e10cSrcweir         }
876*cdf0e10cSrcweir 
877*cdf0e10cSrcweir         osl_releaseMutex( pPoolMutex );
878*cdf0e10cSrcweir     }
879*cdf0e10cSrcweir     if (pFree)
880*cdf0e10cSrcweir         rtl_freeMemory (pFree);
881*cdf0e10cSrcweir }
882*cdf0e10cSrcweir 
883*cdf0e10cSrcweir sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
884*cdf0e10cSrcweir     rtl_uString const * string, sal_Int32 * indexUtf16,
885*cdf0e10cSrcweir     sal_Int32 incrementCodePoints)
886*cdf0e10cSrcweir {
887*cdf0e10cSrcweir     sal_Int32 n;
888*cdf0e10cSrcweir     sal_Unicode cu;
889*cdf0e10cSrcweir     sal_uInt32 cp;
890*cdf0e10cSrcweir     OSL_ASSERT(string != NULL && indexUtf16 != NULL);
891*cdf0e10cSrcweir     n = *indexUtf16;
892*cdf0e10cSrcweir     OSL_ASSERT(n >= 0 && n <= string->length);
893*cdf0e10cSrcweir     while (incrementCodePoints < 0) {
894*cdf0e10cSrcweir         OSL_ASSERT(n > 0);
895*cdf0e10cSrcweir         cu = string->buffer[--n];
896*cdf0e10cSrcweir         if (SAL_RTL_IS_LOW_SURROGATE(cu) && n != 0 &&
897*cdf0e10cSrcweir             SAL_RTL_IS_HIGH_SURROGATE(string->buffer[n - 1]))
898*cdf0e10cSrcweir         {
899*cdf0e10cSrcweir             --n;
900*cdf0e10cSrcweir         }
901*cdf0e10cSrcweir         ++incrementCodePoints;
902*cdf0e10cSrcweir     }
903*cdf0e10cSrcweir     OSL_ASSERT(n >= 0 && n < string->length);
904*cdf0e10cSrcweir     cu = string->buffer[n];
905*cdf0e10cSrcweir     if (SAL_RTL_IS_HIGH_SURROGATE(cu) && string->length - n >= 2 &&
906*cdf0e10cSrcweir         SAL_RTL_IS_LOW_SURROGATE(string->buffer[n + 1]))
907*cdf0e10cSrcweir     {
908*cdf0e10cSrcweir         cp = SAL_RTL_COMBINE_SURROGATES(cu, string->buffer[n + 1]);
909*cdf0e10cSrcweir     } else {
910*cdf0e10cSrcweir         cp = cu;
911*cdf0e10cSrcweir     }
912*cdf0e10cSrcweir     while (incrementCodePoints > 0) {
913*cdf0e10cSrcweir         OSL_ASSERT(n < string->length);
914*cdf0e10cSrcweir         cu = string->buffer[n++];
915*cdf0e10cSrcweir         if (SAL_RTL_IS_HIGH_SURROGATE(cu) && n != string->length &&
916*cdf0e10cSrcweir             SAL_RTL_IS_LOW_SURROGATE(string->buffer[n]))
917*cdf0e10cSrcweir         {
918*cdf0e10cSrcweir             ++n;
919*cdf0e10cSrcweir         }
920*cdf0e10cSrcweir         --incrementCodePoints;
921*cdf0e10cSrcweir     }
922*cdf0e10cSrcweir     OSL_ASSERT(n >= 0 && n <= string->length);
923*cdf0e10cSrcweir     *indexUtf16 = n;
924*cdf0e10cSrcweir     return cp;
925*cdf0e10cSrcweir }
926*cdf0e10cSrcweir 
927*cdf0e10cSrcweir sal_Bool rtl_convertStringToUString(
928*cdf0e10cSrcweir     rtl_uString ** target, char const * source, sal_Int32 length,
929*cdf0e10cSrcweir     rtl_TextEncoding encoding, sal_uInt32 flags) SAL_THROW_EXTERN_C()
930*cdf0e10cSrcweir {
931*cdf0e10cSrcweir     sal_uInt32 info;
932*cdf0e10cSrcweir     rtl_string2UString_status(target, source, length, encoding, flags, &info);
933*cdf0e10cSrcweir     return (sal_Bool) ((info & RTL_TEXTTOUNICODE_INFO_ERROR) == 0);
934*cdf0e10cSrcweir }
935