xref: /AOO41X/main/sal/textenc/tcvtutf8.c (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir #include "sal/types.h"
29*cdf0e10cSrcweir #include "rtl/alloc.h"
30*cdf0e10cSrcweir #include "rtl/textcvt.h"
31*cdf0e10cSrcweir 
32*cdf0e10cSrcweir #include "converter.h"
33*cdf0e10cSrcweir #include "tenchelp.h"
34*cdf0e10cSrcweir #include "unichars.h"
35*cdf0e10cSrcweir 
36*cdf0e10cSrcweir struct ImplUtf8ToUnicodeContext
37*cdf0e10cSrcweir {
38*cdf0e10cSrcweir     sal_uInt32 nUtf32;
39*cdf0e10cSrcweir     int nShift;
40*cdf0e10cSrcweir     sal_Bool bCheckBom;
41*cdf0e10cSrcweir };
42*cdf0e10cSrcweir 
43*cdf0e10cSrcweir struct ImplUnicodeToUtf8Context
44*cdf0e10cSrcweir {
45*cdf0e10cSrcweir     sal_Unicode nHighSurrogate; /* 0xFFFF: write BOM */
46*cdf0e10cSrcweir };
47*cdf0e10cSrcweir 
48*cdf0e10cSrcweir void * ImplCreateUtf8ToUnicodeContext(void)
49*cdf0e10cSrcweir {
50*cdf0e10cSrcweir     void * p = rtl_allocateMemory(sizeof (struct ImplUtf8ToUnicodeContext));
51*cdf0e10cSrcweir     ImplResetUtf8ToUnicodeContext(p);
52*cdf0e10cSrcweir     return p;
53*cdf0e10cSrcweir }
54*cdf0e10cSrcweir 
55*cdf0e10cSrcweir void ImplResetUtf8ToUnicodeContext(void * pContext)
56*cdf0e10cSrcweir {
57*cdf0e10cSrcweir     if (pContext != NULL)
58*cdf0e10cSrcweir     {
59*cdf0e10cSrcweir         ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift = -1;
60*cdf0e10cSrcweir         ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom = sal_True;
61*cdf0e10cSrcweir     }
62*cdf0e10cSrcweir }
63*cdf0e10cSrcweir 
64*cdf0e10cSrcweir sal_Size ImplConvertUtf8ToUnicode(ImplTextConverterData const * pData,
65*cdf0e10cSrcweir                                   void * pContext, sal_Char const * pSrcBuf,
66*cdf0e10cSrcweir                                   sal_Size nSrcBytes, sal_Unicode * pDestBuf,
67*cdf0e10cSrcweir                                   sal_Size nDestChars, sal_uInt32 nFlags,
68*cdf0e10cSrcweir                                   sal_uInt32 * pInfo, sal_Size * pSrcCvtBytes)
69*cdf0e10cSrcweir {
70*cdf0e10cSrcweir 	/*
71*cdf0e10cSrcweir        This function is very liberal with the UTF-8 input.  Accepted are:
72*cdf0e10cSrcweir        - non-shortest forms (e.g., C0 41 instead of 41 to represent U+0041)
73*cdf0e10cSrcweir        - surrogates (e.g., ED A0 80 to represent U+D800)
74*cdf0e10cSrcweir        - encodings with up to six bytes (everything outside the range
75*cdf0e10cSrcweir          U+0000..10FFFF is considered "undefined")
76*cdf0e10cSrcweir        The first two of these points allow this routine to translate from both
77*cdf0e10cSrcweir        RTL_TEXTENCODING_UTF8 and RTL_TEXTENCODING_JAVA_UTF8.
78*cdf0e10cSrcweir 	  */
79*cdf0e10cSrcweir 
80*cdf0e10cSrcweir     int bJavaUtf8 = pData != NULL;
81*cdf0e10cSrcweir     sal_uInt32 nUtf32 = 0;
82*cdf0e10cSrcweir     int nShift = -1;
83*cdf0e10cSrcweir     sal_Bool bCheckBom = sal_True;
84*cdf0e10cSrcweir     sal_uInt32 nInfo = 0;
85*cdf0e10cSrcweir     sal_uChar const * pSrcBufPtr = (sal_uChar const *) pSrcBuf;
86*cdf0e10cSrcweir     sal_uChar const * pSrcBufEnd = pSrcBufPtr + nSrcBytes;
87*cdf0e10cSrcweir     sal_Unicode * pDestBufPtr = pDestBuf;
88*cdf0e10cSrcweir     sal_Unicode * pDestBufEnd = pDestBufPtr + nDestChars;
89*cdf0e10cSrcweir 
90*cdf0e10cSrcweir     if (pContext != NULL)
91*cdf0e10cSrcweir     {
92*cdf0e10cSrcweir         nUtf32 = ((struct ImplUtf8ToUnicodeContext *) pContext)->nUtf32;
93*cdf0e10cSrcweir         nShift = ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift;
94*cdf0e10cSrcweir         bCheckBom = ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom;
95*cdf0e10cSrcweir     }
96*cdf0e10cSrcweir 
97*cdf0e10cSrcweir     while (pSrcBufPtr < pSrcBufEnd)
98*cdf0e10cSrcweir     {
99*cdf0e10cSrcweir         sal_Bool bUndefined = sal_False;
100*cdf0e10cSrcweir         int bConsume = sal_True;
101*cdf0e10cSrcweir         sal_uInt32 nChar = *pSrcBufPtr++;
102*cdf0e10cSrcweir         if (nShift < 0)
103*cdf0e10cSrcweir             if (nChar <= 0x7F)
104*cdf0e10cSrcweir             {
105*cdf0e10cSrcweir                 nUtf32 = nChar;
106*cdf0e10cSrcweir                 goto transform;
107*cdf0e10cSrcweir             }
108*cdf0e10cSrcweir             else if (nChar <= 0xBF)
109*cdf0e10cSrcweir                 goto bad_input;
110*cdf0e10cSrcweir             else if (nChar <= 0xDF)
111*cdf0e10cSrcweir             {
112*cdf0e10cSrcweir                 nUtf32 = (nChar & 0x1F) << 6;
113*cdf0e10cSrcweir                 nShift = 0;
114*cdf0e10cSrcweir             }
115*cdf0e10cSrcweir             else if (nChar <= 0xEF)
116*cdf0e10cSrcweir             {
117*cdf0e10cSrcweir                 nUtf32 = (nChar & 0x0F) << 12;
118*cdf0e10cSrcweir                 nShift = 6;
119*cdf0e10cSrcweir             }
120*cdf0e10cSrcweir             else if (nChar <= 0xF7)
121*cdf0e10cSrcweir             {
122*cdf0e10cSrcweir                 nUtf32 = (nChar & 0x07) << 18;
123*cdf0e10cSrcweir                 nShift = 12;
124*cdf0e10cSrcweir             }
125*cdf0e10cSrcweir             else if (nChar <= 0xFB)
126*cdf0e10cSrcweir             {
127*cdf0e10cSrcweir                 nUtf32 = (nChar & 0x03) << 24;
128*cdf0e10cSrcweir                 nShift = 18;
129*cdf0e10cSrcweir             }
130*cdf0e10cSrcweir             else if (nChar <= 0xFD)
131*cdf0e10cSrcweir             {
132*cdf0e10cSrcweir                 nUtf32 = (nChar & 0x01) << 30;
133*cdf0e10cSrcweir                 nShift = 24;
134*cdf0e10cSrcweir             }
135*cdf0e10cSrcweir             else
136*cdf0e10cSrcweir                 goto bad_input;
137*cdf0e10cSrcweir         else if ((nChar & 0xC0) == 0x80)
138*cdf0e10cSrcweir         {
139*cdf0e10cSrcweir             nUtf32 |= (nChar & 0x3F) << nShift;
140*cdf0e10cSrcweir             if (nShift == 0)
141*cdf0e10cSrcweir                 goto transform;
142*cdf0e10cSrcweir             else
143*cdf0e10cSrcweir                 nShift -= 6;
144*cdf0e10cSrcweir         }
145*cdf0e10cSrcweir         else
146*cdf0e10cSrcweir         {
147*cdf0e10cSrcweir 			/*
148*cdf0e10cSrcweir              This byte is preceeded by a broken UTF-8 sequence; if this byte
149*cdf0e10cSrcweir              is neither in the range [0x80..0xBF] nor in the range
150*cdf0e10cSrcweir              [0xFE..0xFF], assume that this byte does not belong to that
151*cdf0e10cSrcweir              broken sequence, but instead starts a new, legal UTF-8 sequence:
152*cdf0e10cSrcweir 			 */
153*cdf0e10cSrcweir             bConsume = nChar >= 0xFE;
154*cdf0e10cSrcweir             goto bad_input;
155*cdf0e10cSrcweir         }
156*cdf0e10cSrcweir         continue;
157*cdf0e10cSrcweir 
158*cdf0e10cSrcweir     transform:
159*cdf0e10cSrcweir         if (!bCheckBom || nUtf32 != 0xFEFF
160*cdf0e10cSrcweir             || (nFlags & RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE) == 0
161*cdf0e10cSrcweir             || bJavaUtf8)
162*cdf0e10cSrcweir         {
163*cdf0e10cSrcweir             if (nUtf32 <= 0xFFFF)
164*cdf0e10cSrcweir                 if (pDestBufPtr != pDestBufEnd)
165*cdf0e10cSrcweir                     *pDestBufPtr++ = (sal_Unicode) nUtf32;
166*cdf0e10cSrcweir                 else
167*cdf0e10cSrcweir                     goto no_output;
168*cdf0e10cSrcweir             else if (nUtf32 <= 0x10FFFF)
169*cdf0e10cSrcweir                 if (pDestBufEnd - pDestBufPtr >= 2)
170*cdf0e10cSrcweir                 {
171*cdf0e10cSrcweir                     *pDestBufPtr++ = (sal_Unicode) ImplGetHighSurrogate(nUtf32);
172*cdf0e10cSrcweir                     *pDestBufPtr++ = (sal_Unicode) ImplGetLowSurrogate(nUtf32);
173*cdf0e10cSrcweir                 }
174*cdf0e10cSrcweir                 else
175*cdf0e10cSrcweir                     goto no_output;
176*cdf0e10cSrcweir             else
177*cdf0e10cSrcweir             {
178*cdf0e10cSrcweir                 bUndefined = sal_True;
179*cdf0e10cSrcweir                 goto bad_input;
180*cdf0e10cSrcweir             }
181*cdf0e10cSrcweir         }
182*cdf0e10cSrcweir         nShift = -1;
183*cdf0e10cSrcweir         bCheckBom = sal_False;
184*cdf0e10cSrcweir         continue;
185*cdf0e10cSrcweir 
186*cdf0e10cSrcweir     bad_input:
187*cdf0e10cSrcweir         switch (ImplHandleBadInputTextToUnicodeConversion(
188*cdf0e10cSrcweir                     bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
189*cdf0e10cSrcweir                     &nInfo))
190*cdf0e10cSrcweir         {
191*cdf0e10cSrcweir         case IMPL_BAD_INPUT_STOP:
192*cdf0e10cSrcweir             nShift = -1;
193*cdf0e10cSrcweir             bCheckBom = sal_False;
194*cdf0e10cSrcweir             if (!bConsume)
195*cdf0e10cSrcweir                 --pSrcBufPtr;
196*cdf0e10cSrcweir             break;
197*cdf0e10cSrcweir 
198*cdf0e10cSrcweir         case IMPL_BAD_INPUT_CONTINUE:
199*cdf0e10cSrcweir             nShift = -1;
200*cdf0e10cSrcweir             bCheckBom = sal_False;
201*cdf0e10cSrcweir             if (!bConsume)
202*cdf0e10cSrcweir                 --pSrcBufPtr;
203*cdf0e10cSrcweir             continue;
204*cdf0e10cSrcweir 
205*cdf0e10cSrcweir         case IMPL_BAD_INPUT_NO_OUTPUT:
206*cdf0e10cSrcweir             goto no_output;
207*cdf0e10cSrcweir         }
208*cdf0e10cSrcweir         break;
209*cdf0e10cSrcweir 
210*cdf0e10cSrcweir     no_output:
211*cdf0e10cSrcweir         --pSrcBufPtr;
212*cdf0e10cSrcweir         nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
213*cdf0e10cSrcweir         break;
214*cdf0e10cSrcweir     }
215*cdf0e10cSrcweir 
216*cdf0e10cSrcweir     if (nShift >= 0
217*cdf0e10cSrcweir         && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
218*cdf0e10cSrcweir                          | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
219*cdf0e10cSrcweir                == 0)
220*cdf0e10cSrcweir     {
221*cdf0e10cSrcweir         if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
222*cdf0e10cSrcweir             nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
223*cdf0e10cSrcweir         else
224*cdf0e10cSrcweir             switch (ImplHandleBadInputTextToUnicodeConversion(
225*cdf0e10cSrcweir                         sal_False, sal_True, 0, nFlags, &pDestBufPtr,
226*cdf0e10cSrcweir                         pDestBufEnd, &nInfo))
227*cdf0e10cSrcweir             {
228*cdf0e10cSrcweir             case IMPL_BAD_INPUT_STOP:
229*cdf0e10cSrcweir             case IMPL_BAD_INPUT_CONTINUE:
230*cdf0e10cSrcweir                 nShift = -1;
231*cdf0e10cSrcweir                 bCheckBom = sal_False;
232*cdf0e10cSrcweir                 break;
233*cdf0e10cSrcweir 
234*cdf0e10cSrcweir             case IMPL_BAD_INPUT_NO_OUTPUT:
235*cdf0e10cSrcweir                 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
236*cdf0e10cSrcweir                 break;
237*cdf0e10cSrcweir             }
238*cdf0e10cSrcweir     }
239*cdf0e10cSrcweir 
240*cdf0e10cSrcweir     if (pContext != NULL)
241*cdf0e10cSrcweir     {
242*cdf0e10cSrcweir         ((struct ImplUtf8ToUnicodeContext *) pContext)->nUtf32 = nUtf32;
243*cdf0e10cSrcweir         ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift = nShift;
244*cdf0e10cSrcweir         ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom = bCheckBom;
245*cdf0e10cSrcweir     }
246*cdf0e10cSrcweir     if (pInfo != NULL)
247*cdf0e10cSrcweir         *pInfo = nInfo;
248*cdf0e10cSrcweir     if (pSrcCvtBytes != NULL)
249*cdf0e10cSrcweir         *pSrcCvtBytes = (sal_Char const *) pSrcBufPtr - pSrcBuf;
250*cdf0e10cSrcweir     return pDestBufPtr - pDestBuf;
251*cdf0e10cSrcweir }
252*cdf0e10cSrcweir 
253*cdf0e10cSrcweir void * ImplCreateUnicodeToUtf8Context(void)
254*cdf0e10cSrcweir {
255*cdf0e10cSrcweir     void * p = rtl_allocateMemory(sizeof (struct ImplUnicodeToUtf8Context));
256*cdf0e10cSrcweir     ImplResetUnicodeToUtf8Context(p);
257*cdf0e10cSrcweir     return p;
258*cdf0e10cSrcweir }
259*cdf0e10cSrcweir 
260*cdf0e10cSrcweir void ImplResetUnicodeToUtf8Context(void * pContext)
261*cdf0e10cSrcweir {
262*cdf0e10cSrcweir     if (pContext != NULL)
263*cdf0e10cSrcweir         ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate = 0xFFFF;
264*cdf0e10cSrcweir }
265*cdf0e10cSrcweir 
266*cdf0e10cSrcweir sal_Size ImplConvertUnicodeToUtf8(ImplTextConverterData const * pData,
267*cdf0e10cSrcweir                                   void * pContext, sal_Unicode const * pSrcBuf,
268*cdf0e10cSrcweir                                   sal_Size nSrcChars, sal_Char * pDestBuf,
269*cdf0e10cSrcweir                                   sal_Size nDestBytes, sal_uInt32 nFlags,
270*cdf0e10cSrcweir                                   sal_uInt32 * pInfo, sal_Size* pSrcCvtChars)
271*cdf0e10cSrcweir {
272*cdf0e10cSrcweir     int bJavaUtf8 = pData != NULL;
273*cdf0e10cSrcweir     sal_Unicode nHighSurrogate = 0xFFFF;
274*cdf0e10cSrcweir     sal_uInt32 nInfo = 0;
275*cdf0e10cSrcweir     sal_Unicode const * pSrcBufPtr = pSrcBuf;
276*cdf0e10cSrcweir     sal_Unicode const * pSrcBufEnd = pSrcBufPtr + nSrcChars;
277*cdf0e10cSrcweir     sal_Char * pDestBufPtr = pDestBuf;
278*cdf0e10cSrcweir     sal_Char * pDestBufEnd = pDestBufPtr + nDestBytes;
279*cdf0e10cSrcweir 
280*cdf0e10cSrcweir     if (pContext != NULL)
281*cdf0e10cSrcweir         nHighSurrogate
282*cdf0e10cSrcweir             = ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate;
283*cdf0e10cSrcweir 
284*cdf0e10cSrcweir     if (nHighSurrogate == 0xFFFF)
285*cdf0e10cSrcweir     {
286*cdf0e10cSrcweir         if ((nFlags & RTL_UNICODETOTEXT_FLAGS_GLOBAL_SIGNATURE) != 0
287*cdf0e10cSrcweir             && !bJavaUtf8)
288*cdf0e10cSrcweir         {
289*cdf0e10cSrcweir             if (pDestBufEnd - pDestBufPtr >= 3)
290*cdf0e10cSrcweir             {
291*cdf0e10cSrcweir                 /* Write BOM (U+FEFF) as UTF-8: */
292*cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (unsigned char) 0xEF;
293*cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (unsigned char) 0xBB;
294*cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (unsigned char) 0xBF;
295*cdf0e10cSrcweir             }
296*cdf0e10cSrcweir             else
297*cdf0e10cSrcweir             {
298*cdf0e10cSrcweir                 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
299*cdf0e10cSrcweir                 goto done;
300*cdf0e10cSrcweir             }
301*cdf0e10cSrcweir         }
302*cdf0e10cSrcweir         nHighSurrogate = 0;
303*cdf0e10cSrcweir     }
304*cdf0e10cSrcweir 
305*cdf0e10cSrcweir     while (pSrcBufPtr < pSrcBufEnd)
306*cdf0e10cSrcweir     {
307*cdf0e10cSrcweir         sal_uInt32 nChar = *pSrcBufPtr++;
308*cdf0e10cSrcweir         if (nHighSurrogate == 0)
309*cdf0e10cSrcweir         {
310*cdf0e10cSrcweir             if (ImplIsHighSurrogate(nChar) && !bJavaUtf8)
311*cdf0e10cSrcweir             {
312*cdf0e10cSrcweir                 nHighSurrogate = (sal_Unicode) nChar;
313*cdf0e10cSrcweir                 continue;
314*cdf0e10cSrcweir             }
315*cdf0e10cSrcweir         }
316*cdf0e10cSrcweir         else if (ImplIsLowSurrogate(nChar) && !bJavaUtf8)
317*cdf0e10cSrcweir             nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
318*cdf0e10cSrcweir         else
319*cdf0e10cSrcweir             goto bad_input;
320*cdf0e10cSrcweir 
321*cdf0e10cSrcweir         if ((ImplIsLowSurrogate(nChar) && !bJavaUtf8)
322*cdf0e10cSrcweir             || ImplIsNoncharacter(nChar))
323*cdf0e10cSrcweir             goto bad_input;
324*cdf0e10cSrcweir 
325*cdf0e10cSrcweir         if (nChar <= 0x7F && (!bJavaUtf8 || nChar != 0))
326*cdf0e10cSrcweir             if (pDestBufPtr != pDestBufEnd)
327*cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) nChar;
328*cdf0e10cSrcweir             else
329*cdf0e10cSrcweir                 goto no_output;
330*cdf0e10cSrcweir         else if (nChar <= 0x7FF)
331*cdf0e10cSrcweir             if (pDestBufEnd - pDestBufPtr >= 2)
332*cdf0e10cSrcweir             {
333*cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (0xC0 | (nChar >> 6));
334*cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F));
335*cdf0e10cSrcweir             }
336*cdf0e10cSrcweir             else
337*cdf0e10cSrcweir                 goto no_output;
338*cdf0e10cSrcweir         else if (nChar <= 0xFFFF)
339*cdf0e10cSrcweir             if (pDestBufEnd - pDestBufPtr >= 3)
340*cdf0e10cSrcweir             {
341*cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (0xE0 | (nChar >> 12));
342*cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 6) & 0x3F));
343*cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F));
344*cdf0e10cSrcweir             }
345*cdf0e10cSrcweir             else
346*cdf0e10cSrcweir                 goto no_output;
347*cdf0e10cSrcweir         else if (pDestBufEnd - pDestBufPtr >= 4)
348*cdf0e10cSrcweir         {
349*cdf0e10cSrcweir             *pDestBufPtr++ = (sal_Char) (0xF0 | (nChar >> 18));
350*cdf0e10cSrcweir             *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 12) & 0x3F));
351*cdf0e10cSrcweir             *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 6) & 0x3F));
352*cdf0e10cSrcweir             *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F));
353*cdf0e10cSrcweir         }
354*cdf0e10cSrcweir         else
355*cdf0e10cSrcweir             goto no_output;
356*cdf0e10cSrcweir         nHighSurrogate = 0;
357*cdf0e10cSrcweir         continue;
358*cdf0e10cSrcweir 
359*cdf0e10cSrcweir     bad_input:
360*cdf0e10cSrcweir         switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, 0, nFlags,
361*cdf0e10cSrcweir                                                           &pDestBufPtr,
362*cdf0e10cSrcweir                                                           pDestBufEnd, &nInfo,
363*cdf0e10cSrcweir                                                           NULL, 0, NULL))
364*cdf0e10cSrcweir         {
365*cdf0e10cSrcweir         case IMPL_BAD_INPUT_STOP:
366*cdf0e10cSrcweir             nHighSurrogate = 0;
367*cdf0e10cSrcweir             break;
368*cdf0e10cSrcweir 
369*cdf0e10cSrcweir         case IMPL_BAD_INPUT_CONTINUE:
370*cdf0e10cSrcweir             nHighSurrogate = 0;
371*cdf0e10cSrcweir             continue;
372*cdf0e10cSrcweir 
373*cdf0e10cSrcweir         case IMPL_BAD_INPUT_NO_OUTPUT:
374*cdf0e10cSrcweir             goto no_output;
375*cdf0e10cSrcweir         }
376*cdf0e10cSrcweir         break;
377*cdf0e10cSrcweir 
378*cdf0e10cSrcweir     no_output:
379*cdf0e10cSrcweir         --pSrcBufPtr;
380*cdf0e10cSrcweir         nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
381*cdf0e10cSrcweir         break;
382*cdf0e10cSrcweir     }
383*cdf0e10cSrcweir 
384*cdf0e10cSrcweir     if (nHighSurrogate != 0
385*cdf0e10cSrcweir         && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
386*cdf0e10cSrcweir                          | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
387*cdf0e10cSrcweir                == 0)
388*cdf0e10cSrcweir     {
389*cdf0e10cSrcweir         if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
390*cdf0e10cSrcweir             nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
391*cdf0e10cSrcweir         else
392*cdf0e10cSrcweir             switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, 0,
393*cdf0e10cSrcweir                                                               nFlags,
394*cdf0e10cSrcweir                                                               &pDestBufPtr,
395*cdf0e10cSrcweir                                                               pDestBufEnd,
396*cdf0e10cSrcweir                                                               &nInfo, NULL, 0,
397*cdf0e10cSrcweir                                                               NULL))
398*cdf0e10cSrcweir             {
399*cdf0e10cSrcweir             case IMPL_BAD_INPUT_STOP:
400*cdf0e10cSrcweir             case IMPL_BAD_INPUT_CONTINUE:
401*cdf0e10cSrcweir                 nHighSurrogate = 0;
402*cdf0e10cSrcweir                 break;
403*cdf0e10cSrcweir 
404*cdf0e10cSrcweir             case IMPL_BAD_INPUT_NO_OUTPUT:
405*cdf0e10cSrcweir                 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
406*cdf0e10cSrcweir                 break;
407*cdf0e10cSrcweir             }
408*cdf0e10cSrcweir     }
409*cdf0e10cSrcweir 
410*cdf0e10cSrcweir  done:
411*cdf0e10cSrcweir     if (pContext != NULL)
412*cdf0e10cSrcweir         ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate
413*cdf0e10cSrcweir             = nHighSurrogate;
414*cdf0e10cSrcweir     if (pInfo != NULL)
415*cdf0e10cSrcweir         *pInfo = nInfo;
416*cdf0e10cSrcweir     if (pSrcCvtChars != NULL)
417*cdf0e10cSrcweir         *pSrcCvtChars = pSrcBufPtr - pSrcBuf;
418*cdf0e10cSrcweir     return pDestBufPtr - pDestBuf;
419*cdf0e10cSrcweir }
420