xref: /AOO41X/main/sal/textenc/tcvtutf8.c (revision 647f063d49501903f1667b75f5634541fc603283)
1*647f063dSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*647f063dSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*647f063dSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*647f063dSAndrew Rist  * distributed with this work for additional information
6*647f063dSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*647f063dSAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*647f063dSAndrew Rist  * "License"); you may not use this file except in compliance
9*647f063dSAndrew Rist  * with the License.  You may obtain a copy of the License at
10cdf0e10cSrcweir  *
11*647f063dSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12cdf0e10cSrcweir  *
13*647f063dSAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*647f063dSAndrew Rist  * software distributed under the License is distributed on an
15*647f063dSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*647f063dSAndrew Rist  * KIND, either express or implied.  See the License for the
17*647f063dSAndrew Rist  * specific language governing permissions and limitations
18*647f063dSAndrew Rist  * under the License.
19cdf0e10cSrcweir  *
20*647f063dSAndrew Rist  *************************************************************/
21*647f063dSAndrew Rist 
22*647f063dSAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir #include "sal/types.h"
25cdf0e10cSrcweir #include "rtl/alloc.h"
26cdf0e10cSrcweir #include "rtl/textcvt.h"
27cdf0e10cSrcweir 
28cdf0e10cSrcweir #include "converter.h"
29cdf0e10cSrcweir #include "tenchelp.h"
30cdf0e10cSrcweir #include "unichars.h"
31cdf0e10cSrcweir 
32cdf0e10cSrcweir struct ImplUtf8ToUnicodeContext
33cdf0e10cSrcweir {
34cdf0e10cSrcweir     sal_uInt32 nUtf32;
35cdf0e10cSrcweir     int nShift;
36cdf0e10cSrcweir     sal_Bool bCheckBom;
37cdf0e10cSrcweir };
38cdf0e10cSrcweir 
39cdf0e10cSrcweir struct ImplUnicodeToUtf8Context
40cdf0e10cSrcweir {
41cdf0e10cSrcweir     sal_Unicode nHighSurrogate; /* 0xFFFF: write BOM */
42cdf0e10cSrcweir };
43cdf0e10cSrcweir 
ImplCreateUtf8ToUnicodeContext(void)44cdf0e10cSrcweir void * ImplCreateUtf8ToUnicodeContext(void)
45cdf0e10cSrcweir {
46cdf0e10cSrcweir     void * p = rtl_allocateMemory(sizeof (struct ImplUtf8ToUnicodeContext));
47cdf0e10cSrcweir     ImplResetUtf8ToUnicodeContext(p);
48cdf0e10cSrcweir     return p;
49cdf0e10cSrcweir }
50cdf0e10cSrcweir 
ImplResetUtf8ToUnicodeContext(void * pContext)51cdf0e10cSrcweir void ImplResetUtf8ToUnicodeContext(void * pContext)
52cdf0e10cSrcweir {
53cdf0e10cSrcweir     if (pContext != NULL)
54cdf0e10cSrcweir     {
55cdf0e10cSrcweir         ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift = -1;
56cdf0e10cSrcweir         ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom = sal_True;
57cdf0e10cSrcweir     }
58cdf0e10cSrcweir }
59cdf0e10cSrcweir 
ImplConvertUtf8ToUnicode(ImplTextConverterData const * pData,void * pContext,sal_Char const * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)60cdf0e10cSrcweir sal_Size ImplConvertUtf8ToUnicode(ImplTextConverterData const * pData,
61cdf0e10cSrcweir                                   void * pContext, sal_Char const * pSrcBuf,
62cdf0e10cSrcweir                                   sal_Size nSrcBytes, sal_Unicode * pDestBuf,
63cdf0e10cSrcweir                                   sal_Size nDestChars, sal_uInt32 nFlags,
64cdf0e10cSrcweir                                   sal_uInt32 * pInfo, sal_Size * pSrcCvtBytes)
65cdf0e10cSrcweir {
66cdf0e10cSrcweir 	/*
67cdf0e10cSrcweir        This function is very liberal with the UTF-8 input.  Accepted are:
68cdf0e10cSrcweir        - non-shortest forms (e.g., C0 41 instead of 41 to represent U+0041)
69cdf0e10cSrcweir        - surrogates (e.g., ED A0 80 to represent U+D800)
70cdf0e10cSrcweir        - encodings with up to six bytes (everything outside the range
71cdf0e10cSrcweir          U+0000..10FFFF is considered "undefined")
72cdf0e10cSrcweir        The first two of these points allow this routine to translate from both
73cdf0e10cSrcweir        RTL_TEXTENCODING_UTF8 and RTL_TEXTENCODING_JAVA_UTF8.
74cdf0e10cSrcweir 	  */
75cdf0e10cSrcweir 
76cdf0e10cSrcweir     int bJavaUtf8 = pData != NULL;
77cdf0e10cSrcweir     sal_uInt32 nUtf32 = 0;
78cdf0e10cSrcweir     int nShift = -1;
79cdf0e10cSrcweir     sal_Bool bCheckBom = sal_True;
80cdf0e10cSrcweir     sal_uInt32 nInfo = 0;
81cdf0e10cSrcweir     sal_uChar const * pSrcBufPtr = (sal_uChar const *) pSrcBuf;
82cdf0e10cSrcweir     sal_uChar const * pSrcBufEnd = pSrcBufPtr + nSrcBytes;
83cdf0e10cSrcweir     sal_Unicode * pDestBufPtr = pDestBuf;
84cdf0e10cSrcweir     sal_Unicode * pDestBufEnd = pDestBufPtr + nDestChars;
85cdf0e10cSrcweir 
86cdf0e10cSrcweir     if (pContext != NULL)
87cdf0e10cSrcweir     {
88cdf0e10cSrcweir         nUtf32 = ((struct ImplUtf8ToUnicodeContext *) pContext)->nUtf32;
89cdf0e10cSrcweir         nShift = ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift;
90cdf0e10cSrcweir         bCheckBom = ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom;
91cdf0e10cSrcweir     }
92cdf0e10cSrcweir 
93cdf0e10cSrcweir     while (pSrcBufPtr < pSrcBufEnd)
94cdf0e10cSrcweir     {
95cdf0e10cSrcweir         sal_Bool bUndefined = sal_False;
96cdf0e10cSrcweir         int bConsume = sal_True;
97cdf0e10cSrcweir         sal_uInt32 nChar = *pSrcBufPtr++;
98cdf0e10cSrcweir         if (nShift < 0)
99cdf0e10cSrcweir             if (nChar <= 0x7F)
100cdf0e10cSrcweir             {
101cdf0e10cSrcweir                 nUtf32 = nChar;
102cdf0e10cSrcweir                 goto transform;
103cdf0e10cSrcweir             }
104cdf0e10cSrcweir             else if (nChar <= 0xBF)
105cdf0e10cSrcweir                 goto bad_input;
106cdf0e10cSrcweir             else if (nChar <= 0xDF)
107cdf0e10cSrcweir             {
108cdf0e10cSrcweir                 nUtf32 = (nChar & 0x1F) << 6;
109cdf0e10cSrcweir                 nShift = 0;
110cdf0e10cSrcweir             }
111cdf0e10cSrcweir             else if (nChar <= 0xEF)
112cdf0e10cSrcweir             {
113cdf0e10cSrcweir                 nUtf32 = (nChar & 0x0F) << 12;
114cdf0e10cSrcweir                 nShift = 6;
115cdf0e10cSrcweir             }
116cdf0e10cSrcweir             else if (nChar <= 0xF7)
117cdf0e10cSrcweir             {
118cdf0e10cSrcweir                 nUtf32 = (nChar & 0x07) << 18;
119cdf0e10cSrcweir                 nShift = 12;
120cdf0e10cSrcweir             }
121cdf0e10cSrcweir             else if (nChar <= 0xFB)
122cdf0e10cSrcweir             {
123cdf0e10cSrcweir                 nUtf32 = (nChar & 0x03) << 24;
124cdf0e10cSrcweir                 nShift = 18;
125cdf0e10cSrcweir             }
126cdf0e10cSrcweir             else if (nChar <= 0xFD)
127cdf0e10cSrcweir             {
128cdf0e10cSrcweir                 nUtf32 = (nChar & 0x01) << 30;
129cdf0e10cSrcweir                 nShift = 24;
130cdf0e10cSrcweir             }
131cdf0e10cSrcweir             else
132cdf0e10cSrcweir                 goto bad_input;
133cdf0e10cSrcweir         else if ((nChar & 0xC0) == 0x80)
134cdf0e10cSrcweir         {
135cdf0e10cSrcweir             nUtf32 |= (nChar & 0x3F) << nShift;
136cdf0e10cSrcweir             if (nShift == 0)
137cdf0e10cSrcweir                 goto transform;
138cdf0e10cSrcweir             else
139cdf0e10cSrcweir                 nShift -= 6;
140cdf0e10cSrcweir         }
141cdf0e10cSrcweir         else
142cdf0e10cSrcweir         {
143cdf0e10cSrcweir 			/*
144cdf0e10cSrcweir              This byte is preceeded by a broken UTF-8 sequence; if this byte
145cdf0e10cSrcweir              is neither in the range [0x80..0xBF] nor in the range
146cdf0e10cSrcweir              [0xFE..0xFF], assume that this byte does not belong to that
147cdf0e10cSrcweir              broken sequence, but instead starts a new, legal UTF-8 sequence:
148cdf0e10cSrcweir 			 */
149cdf0e10cSrcweir             bConsume = nChar >= 0xFE;
150cdf0e10cSrcweir             goto bad_input;
151cdf0e10cSrcweir         }
152cdf0e10cSrcweir         continue;
153cdf0e10cSrcweir 
154cdf0e10cSrcweir     transform:
155cdf0e10cSrcweir         if (!bCheckBom || nUtf32 != 0xFEFF
156cdf0e10cSrcweir             || (nFlags & RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE) == 0
157cdf0e10cSrcweir             || bJavaUtf8)
158cdf0e10cSrcweir         {
159cdf0e10cSrcweir             if (nUtf32 <= 0xFFFF)
160cdf0e10cSrcweir                 if (pDestBufPtr != pDestBufEnd)
161cdf0e10cSrcweir                     *pDestBufPtr++ = (sal_Unicode) nUtf32;
162cdf0e10cSrcweir                 else
163cdf0e10cSrcweir                     goto no_output;
164cdf0e10cSrcweir             else if (nUtf32 <= 0x10FFFF)
165cdf0e10cSrcweir                 if (pDestBufEnd - pDestBufPtr >= 2)
166cdf0e10cSrcweir                 {
167cdf0e10cSrcweir                     *pDestBufPtr++ = (sal_Unicode) ImplGetHighSurrogate(nUtf32);
168cdf0e10cSrcweir                     *pDestBufPtr++ = (sal_Unicode) ImplGetLowSurrogate(nUtf32);
169cdf0e10cSrcweir                 }
170cdf0e10cSrcweir                 else
171cdf0e10cSrcweir                     goto no_output;
172cdf0e10cSrcweir             else
173cdf0e10cSrcweir             {
174cdf0e10cSrcweir                 bUndefined = sal_True;
175cdf0e10cSrcweir                 goto bad_input;
176cdf0e10cSrcweir             }
177cdf0e10cSrcweir         }
178cdf0e10cSrcweir         nShift = -1;
179cdf0e10cSrcweir         bCheckBom = sal_False;
180cdf0e10cSrcweir         continue;
181cdf0e10cSrcweir 
182cdf0e10cSrcweir     bad_input:
183cdf0e10cSrcweir         switch (ImplHandleBadInputTextToUnicodeConversion(
184cdf0e10cSrcweir                     bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
185cdf0e10cSrcweir                     &nInfo))
186cdf0e10cSrcweir         {
187cdf0e10cSrcweir         case IMPL_BAD_INPUT_STOP:
188cdf0e10cSrcweir             nShift = -1;
189cdf0e10cSrcweir             bCheckBom = sal_False;
190cdf0e10cSrcweir             if (!bConsume)
191cdf0e10cSrcweir                 --pSrcBufPtr;
192cdf0e10cSrcweir             break;
193cdf0e10cSrcweir 
194cdf0e10cSrcweir         case IMPL_BAD_INPUT_CONTINUE:
195cdf0e10cSrcweir             nShift = -1;
196cdf0e10cSrcweir             bCheckBom = sal_False;
197cdf0e10cSrcweir             if (!bConsume)
198cdf0e10cSrcweir                 --pSrcBufPtr;
199cdf0e10cSrcweir             continue;
200cdf0e10cSrcweir 
201cdf0e10cSrcweir         case IMPL_BAD_INPUT_NO_OUTPUT:
202cdf0e10cSrcweir             goto no_output;
203cdf0e10cSrcweir         }
204cdf0e10cSrcweir         break;
205cdf0e10cSrcweir 
206cdf0e10cSrcweir     no_output:
207cdf0e10cSrcweir         --pSrcBufPtr;
208cdf0e10cSrcweir         nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
209cdf0e10cSrcweir         break;
210cdf0e10cSrcweir     }
211cdf0e10cSrcweir 
212cdf0e10cSrcweir     if (nShift >= 0
213cdf0e10cSrcweir         && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
214cdf0e10cSrcweir                          | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
215cdf0e10cSrcweir                == 0)
216cdf0e10cSrcweir     {
217cdf0e10cSrcweir         if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
218cdf0e10cSrcweir             nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
219cdf0e10cSrcweir         else
220cdf0e10cSrcweir             switch (ImplHandleBadInputTextToUnicodeConversion(
221cdf0e10cSrcweir                         sal_False, sal_True, 0, nFlags, &pDestBufPtr,
222cdf0e10cSrcweir                         pDestBufEnd, &nInfo))
223cdf0e10cSrcweir             {
224cdf0e10cSrcweir             case IMPL_BAD_INPUT_STOP:
225cdf0e10cSrcweir             case IMPL_BAD_INPUT_CONTINUE:
226cdf0e10cSrcweir                 nShift = -1;
227cdf0e10cSrcweir                 bCheckBom = sal_False;
228cdf0e10cSrcweir                 break;
229cdf0e10cSrcweir 
230cdf0e10cSrcweir             case IMPL_BAD_INPUT_NO_OUTPUT:
231cdf0e10cSrcweir                 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
232cdf0e10cSrcweir                 break;
233cdf0e10cSrcweir             }
234cdf0e10cSrcweir     }
235cdf0e10cSrcweir 
236cdf0e10cSrcweir     if (pContext != NULL)
237cdf0e10cSrcweir     {
238cdf0e10cSrcweir         ((struct ImplUtf8ToUnicodeContext *) pContext)->nUtf32 = nUtf32;
239cdf0e10cSrcweir         ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift = nShift;
240cdf0e10cSrcweir         ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom = bCheckBom;
241cdf0e10cSrcweir     }
242cdf0e10cSrcweir     if (pInfo != NULL)
243cdf0e10cSrcweir         *pInfo = nInfo;
244cdf0e10cSrcweir     if (pSrcCvtBytes != NULL)
245cdf0e10cSrcweir         *pSrcCvtBytes = (sal_Char const *) pSrcBufPtr - pSrcBuf;
246cdf0e10cSrcweir     return pDestBufPtr - pDestBuf;
247cdf0e10cSrcweir }
248cdf0e10cSrcweir 
ImplCreateUnicodeToUtf8Context(void)249cdf0e10cSrcweir void * ImplCreateUnicodeToUtf8Context(void)
250cdf0e10cSrcweir {
251cdf0e10cSrcweir     void * p = rtl_allocateMemory(sizeof (struct ImplUnicodeToUtf8Context));
252cdf0e10cSrcweir     ImplResetUnicodeToUtf8Context(p);
253cdf0e10cSrcweir     return p;
254cdf0e10cSrcweir }
255cdf0e10cSrcweir 
ImplResetUnicodeToUtf8Context(void * pContext)256cdf0e10cSrcweir void ImplResetUnicodeToUtf8Context(void * pContext)
257cdf0e10cSrcweir {
258cdf0e10cSrcweir     if (pContext != NULL)
259cdf0e10cSrcweir         ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate = 0xFFFF;
260cdf0e10cSrcweir }
261cdf0e10cSrcweir 
ImplConvertUnicodeToUtf8(ImplTextConverterData const * pData,void * pContext,sal_Unicode const * pSrcBuf,sal_Size nSrcChars,sal_Char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)262cdf0e10cSrcweir sal_Size ImplConvertUnicodeToUtf8(ImplTextConverterData const * pData,
263cdf0e10cSrcweir                                   void * pContext, sal_Unicode const * pSrcBuf,
264cdf0e10cSrcweir                                   sal_Size nSrcChars, sal_Char * pDestBuf,
265cdf0e10cSrcweir                                   sal_Size nDestBytes, sal_uInt32 nFlags,
266cdf0e10cSrcweir                                   sal_uInt32 * pInfo, sal_Size* pSrcCvtChars)
267cdf0e10cSrcweir {
268cdf0e10cSrcweir     int bJavaUtf8 = pData != NULL;
269cdf0e10cSrcweir     sal_Unicode nHighSurrogate = 0xFFFF;
270cdf0e10cSrcweir     sal_uInt32 nInfo = 0;
271cdf0e10cSrcweir     sal_Unicode const * pSrcBufPtr = pSrcBuf;
272cdf0e10cSrcweir     sal_Unicode const * pSrcBufEnd = pSrcBufPtr + nSrcChars;
273cdf0e10cSrcweir     sal_Char * pDestBufPtr = pDestBuf;
274cdf0e10cSrcweir     sal_Char * pDestBufEnd = pDestBufPtr + nDestBytes;
275cdf0e10cSrcweir 
276cdf0e10cSrcweir     if (pContext != NULL)
277cdf0e10cSrcweir         nHighSurrogate
278cdf0e10cSrcweir             = ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate;
279cdf0e10cSrcweir 
280cdf0e10cSrcweir     if (nHighSurrogate == 0xFFFF)
281cdf0e10cSrcweir     {
282cdf0e10cSrcweir         if ((nFlags & RTL_UNICODETOTEXT_FLAGS_GLOBAL_SIGNATURE) != 0
283cdf0e10cSrcweir             && !bJavaUtf8)
284cdf0e10cSrcweir         {
285cdf0e10cSrcweir             if (pDestBufEnd - pDestBufPtr >= 3)
286cdf0e10cSrcweir             {
287cdf0e10cSrcweir                 /* Write BOM (U+FEFF) as UTF-8: */
288cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (unsigned char) 0xEF;
289cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (unsigned char) 0xBB;
290cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (unsigned char) 0xBF;
291cdf0e10cSrcweir             }
292cdf0e10cSrcweir             else
293cdf0e10cSrcweir             {
294cdf0e10cSrcweir                 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
295cdf0e10cSrcweir                 goto done;
296cdf0e10cSrcweir             }
297cdf0e10cSrcweir         }
298cdf0e10cSrcweir         nHighSurrogate = 0;
299cdf0e10cSrcweir     }
300cdf0e10cSrcweir 
301cdf0e10cSrcweir     while (pSrcBufPtr < pSrcBufEnd)
302cdf0e10cSrcweir     {
303cdf0e10cSrcweir         sal_uInt32 nChar = *pSrcBufPtr++;
304cdf0e10cSrcweir         if (nHighSurrogate == 0)
305cdf0e10cSrcweir         {
306cdf0e10cSrcweir             if (ImplIsHighSurrogate(nChar) && !bJavaUtf8)
307cdf0e10cSrcweir             {
308cdf0e10cSrcweir                 nHighSurrogate = (sal_Unicode) nChar;
309cdf0e10cSrcweir                 continue;
310cdf0e10cSrcweir             }
311cdf0e10cSrcweir         }
312cdf0e10cSrcweir         else if (ImplIsLowSurrogate(nChar) && !bJavaUtf8)
313cdf0e10cSrcweir             nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
314cdf0e10cSrcweir         else
315cdf0e10cSrcweir             goto bad_input;
316cdf0e10cSrcweir 
317cdf0e10cSrcweir         if ((ImplIsLowSurrogate(nChar) && !bJavaUtf8)
318cdf0e10cSrcweir             || ImplIsNoncharacter(nChar))
319cdf0e10cSrcweir             goto bad_input;
320cdf0e10cSrcweir 
321cdf0e10cSrcweir         if (nChar <= 0x7F && (!bJavaUtf8 || nChar != 0))
322cdf0e10cSrcweir             if (pDestBufPtr != pDestBufEnd)
323cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) nChar;
324cdf0e10cSrcweir             else
325cdf0e10cSrcweir                 goto no_output;
326cdf0e10cSrcweir         else if (nChar <= 0x7FF)
327cdf0e10cSrcweir             if (pDestBufEnd - pDestBufPtr >= 2)
328cdf0e10cSrcweir             {
329cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (0xC0 | (nChar >> 6));
330cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F));
331cdf0e10cSrcweir             }
332cdf0e10cSrcweir             else
333cdf0e10cSrcweir                 goto no_output;
334cdf0e10cSrcweir         else if (nChar <= 0xFFFF)
335cdf0e10cSrcweir             if (pDestBufEnd - pDestBufPtr >= 3)
336cdf0e10cSrcweir             {
337cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (0xE0 | (nChar >> 12));
338cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 6) & 0x3F));
339cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F));
340cdf0e10cSrcweir             }
341cdf0e10cSrcweir             else
342cdf0e10cSrcweir                 goto no_output;
343cdf0e10cSrcweir         else if (pDestBufEnd - pDestBufPtr >= 4)
344cdf0e10cSrcweir         {
345cdf0e10cSrcweir             *pDestBufPtr++ = (sal_Char) (0xF0 | (nChar >> 18));
346cdf0e10cSrcweir             *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 12) & 0x3F));
347cdf0e10cSrcweir             *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 6) & 0x3F));
348cdf0e10cSrcweir             *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F));
349cdf0e10cSrcweir         }
350cdf0e10cSrcweir         else
351cdf0e10cSrcweir             goto no_output;
352cdf0e10cSrcweir         nHighSurrogate = 0;
353cdf0e10cSrcweir         continue;
354cdf0e10cSrcweir 
355cdf0e10cSrcweir     bad_input:
356cdf0e10cSrcweir         switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, 0, nFlags,
357cdf0e10cSrcweir                                                           &pDestBufPtr,
358cdf0e10cSrcweir                                                           pDestBufEnd, &nInfo,
359cdf0e10cSrcweir                                                           NULL, 0, NULL))
360cdf0e10cSrcweir         {
361cdf0e10cSrcweir         case IMPL_BAD_INPUT_STOP:
362cdf0e10cSrcweir             nHighSurrogate = 0;
363cdf0e10cSrcweir             break;
364cdf0e10cSrcweir 
365cdf0e10cSrcweir         case IMPL_BAD_INPUT_CONTINUE:
366cdf0e10cSrcweir             nHighSurrogate = 0;
367cdf0e10cSrcweir             continue;
368cdf0e10cSrcweir 
369cdf0e10cSrcweir         case IMPL_BAD_INPUT_NO_OUTPUT:
370cdf0e10cSrcweir             goto no_output;
371cdf0e10cSrcweir         }
372cdf0e10cSrcweir         break;
373cdf0e10cSrcweir 
374cdf0e10cSrcweir     no_output:
375cdf0e10cSrcweir         --pSrcBufPtr;
376cdf0e10cSrcweir         nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
377cdf0e10cSrcweir         break;
378cdf0e10cSrcweir     }
379cdf0e10cSrcweir 
380cdf0e10cSrcweir     if (nHighSurrogate != 0
381cdf0e10cSrcweir         && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
382cdf0e10cSrcweir                          | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
383cdf0e10cSrcweir                == 0)
384cdf0e10cSrcweir     {
385cdf0e10cSrcweir         if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
386cdf0e10cSrcweir             nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
387cdf0e10cSrcweir         else
388cdf0e10cSrcweir             switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, 0,
389cdf0e10cSrcweir                                                               nFlags,
390cdf0e10cSrcweir                                                               &pDestBufPtr,
391cdf0e10cSrcweir                                                               pDestBufEnd,
392cdf0e10cSrcweir                                                               &nInfo, NULL, 0,
393cdf0e10cSrcweir                                                               NULL))
394cdf0e10cSrcweir             {
395cdf0e10cSrcweir             case IMPL_BAD_INPUT_STOP:
396cdf0e10cSrcweir             case IMPL_BAD_INPUT_CONTINUE:
397cdf0e10cSrcweir                 nHighSurrogate = 0;
398cdf0e10cSrcweir                 break;
399cdf0e10cSrcweir 
400cdf0e10cSrcweir             case IMPL_BAD_INPUT_NO_OUTPUT:
401cdf0e10cSrcweir                 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
402cdf0e10cSrcweir                 break;
403cdf0e10cSrcweir             }
404cdf0e10cSrcweir     }
405cdf0e10cSrcweir 
406cdf0e10cSrcweir  done:
407cdf0e10cSrcweir     if (pContext != NULL)
408cdf0e10cSrcweir         ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate
409cdf0e10cSrcweir             = nHighSurrogate;
410cdf0e10cSrcweir     if (pInfo != NULL)
411cdf0e10cSrcweir         *pInfo = nInfo;
412cdf0e10cSrcweir     if (pSrcCvtChars != NULL)
413cdf0e10cSrcweir         *pSrcCvtChars = pSrcBufPtr - pSrcBuf;
414cdf0e10cSrcweir     return pDestBufPtr - pDestBuf;
415cdf0e10cSrcweir }
416