1*647f063dSAndrew Rist /**************************************************************
2cdf0e10cSrcweir *
3*647f063dSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*647f063dSAndrew Rist * or more contributor license agreements. See the NOTICE file
5*647f063dSAndrew Rist * distributed with this work for additional information
6*647f063dSAndrew Rist * regarding copyright ownership. The ASF licenses this file
7*647f063dSAndrew Rist * to you under the Apache License, Version 2.0 (the
8*647f063dSAndrew Rist * "License"); you may not use this file except in compliance
9*647f063dSAndrew Rist * with the License. You may obtain a copy of the License at
10cdf0e10cSrcweir *
11*647f063dSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0
12cdf0e10cSrcweir *
13*647f063dSAndrew Rist * Unless required by applicable law or agreed to in writing,
14*647f063dSAndrew Rist * software distributed under the License is distributed on an
15*647f063dSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*647f063dSAndrew Rist * KIND, either express or implied. See the License for the
17*647f063dSAndrew Rist * specific language governing permissions and limitations
18*647f063dSAndrew Rist * under the License.
19cdf0e10cSrcweir *
20*647f063dSAndrew Rist *************************************************************/
21*647f063dSAndrew Rist
22*647f063dSAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir #include "sal/types.h"
25cdf0e10cSrcweir #include "rtl/alloc.h"
26cdf0e10cSrcweir #include "rtl/textcvt.h"
27cdf0e10cSrcweir
28cdf0e10cSrcweir #include "converter.h"
29cdf0e10cSrcweir #include "tenchelp.h"
30cdf0e10cSrcweir #include "unichars.h"
31cdf0e10cSrcweir
32cdf0e10cSrcweir struct ImplUtf8ToUnicodeContext
33cdf0e10cSrcweir {
34cdf0e10cSrcweir sal_uInt32 nUtf32;
35cdf0e10cSrcweir int nShift;
36cdf0e10cSrcweir sal_Bool bCheckBom;
37cdf0e10cSrcweir };
38cdf0e10cSrcweir
39cdf0e10cSrcweir struct ImplUnicodeToUtf8Context
40cdf0e10cSrcweir {
41cdf0e10cSrcweir sal_Unicode nHighSurrogate; /* 0xFFFF: write BOM */
42cdf0e10cSrcweir };
43cdf0e10cSrcweir
ImplCreateUtf8ToUnicodeContext(void)44cdf0e10cSrcweir void * ImplCreateUtf8ToUnicodeContext(void)
45cdf0e10cSrcweir {
46cdf0e10cSrcweir void * p = rtl_allocateMemory(sizeof (struct ImplUtf8ToUnicodeContext));
47cdf0e10cSrcweir ImplResetUtf8ToUnicodeContext(p);
48cdf0e10cSrcweir return p;
49cdf0e10cSrcweir }
50cdf0e10cSrcweir
ImplResetUtf8ToUnicodeContext(void * pContext)51cdf0e10cSrcweir void ImplResetUtf8ToUnicodeContext(void * pContext)
52cdf0e10cSrcweir {
53cdf0e10cSrcweir if (pContext != NULL)
54cdf0e10cSrcweir {
55cdf0e10cSrcweir ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift = -1;
56cdf0e10cSrcweir ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom = sal_True;
57cdf0e10cSrcweir }
58cdf0e10cSrcweir }
59cdf0e10cSrcweir
ImplConvertUtf8ToUnicode(ImplTextConverterData const * pData,void * pContext,sal_Char const * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)60cdf0e10cSrcweir sal_Size ImplConvertUtf8ToUnicode(ImplTextConverterData const * pData,
61cdf0e10cSrcweir void * pContext, sal_Char const * pSrcBuf,
62cdf0e10cSrcweir sal_Size nSrcBytes, sal_Unicode * pDestBuf,
63cdf0e10cSrcweir sal_Size nDestChars, sal_uInt32 nFlags,
64cdf0e10cSrcweir sal_uInt32 * pInfo, sal_Size * pSrcCvtBytes)
65cdf0e10cSrcweir {
66cdf0e10cSrcweir /*
67cdf0e10cSrcweir This function is very liberal with the UTF-8 input. Accepted are:
68cdf0e10cSrcweir - non-shortest forms (e.g., C0 41 instead of 41 to represent U+0041)
69cdf0e10cSrcweir - surrogates (e.g., ED A0 80 to represent U+D800)
70cdf0e10cSrcweir - encodings with up to six bytes (everything outside the range
71cdf0e10cSrcweir U+0000..10FFFF is considered "undefined")
72cdf0e10cSrcweir The first two of these points allow this routine to translate from both
73cdf0e10cSrcweir RTL_TEXTENCODING_UTF8 and RTL_TEXTENCODING_JAVA_UTF8.
74cdf0e10cSrcweir */
75cdf0e10cSrcweir
76cdf0e10cSrcweir int bJavaUtf8 = pData != NULL;
77cdf0e10cSrcweir sal_uInt32 nUtf32 = 0;
78cdf0e10cSrcweir int nShift = -1;
79cdf0e10cSrcweir sal_Bool bCheckBom = sal_True;
80cdf0e10cSrcweir sal_uInt32 nInfo = 0;
81cdf0e10cSrcweir sal_uChar const * pSrcBufPtr = (sal_uChar const *) pSrcBuf;
82cdf0e10cSrcweir sal_uChar const * pSrcBufEnd = pSrcBufPtr + nSrcBytes;
83cdf0e10cSrcweir sal_Unicode * pDestBufPtr = pDestBuf;
84cdf0e10cSrcweir sal_Unicode * pDestBufEnd = pDestBufPtr + nDestChars;
85cdf0e10cSrcweir
86cdf0e10cSrcweir if (pContext != NULL)
87cdf0e10cSrcweir {
88cdf0e10cSrcweir nUtf32 = ((struct ImplUtf8ToUnicodeContext *) pContext)->nUtf32;
89cdf0e10cSrcweir nShift = ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift;
90cdf0e10cSrcweir bCheckBom = ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom;
91cdf0e10cSrcweir }
92cdf0e10cSrcweir
93cdf0e10cSrcweir while (pSrcBufPtr < pSrcBufEnd)
94cdf0e10cSrcweir {
95cdf0e10cSrcweir sal_Bool bUndefined = sal_False;
96cdf0e10cSrcweir int bConsume = sal_True;
97cdf0e10cSrcweir sal_uInt32 nChar = *pSrcBufPtr++;
98cdf0e10cSrcweir if (nShift < 0)
99cdf0e10cSrcweir if (nChar <= 0x7F)
100cdf0e10cSrcweir {
101cdf0e10cSrcweir nUtf32 = nChar;
102cdf0e10cSrcweir goto transform;
103cdf0e10cSrcweir }
104cdf0e10cSrcweir else if (nChar <= 0xBF)
105cdf0e10cSrcweir goto bad_input;
106cdf0e10cSrcweir else if (nChar <= 0xDF)
107cdf0e10cSrcweir {
108cdf0e10cSrcweir nUtf32 = (nChar & 0x1F) << 6;
109cdf0e10cSrcweir nShift = 0;
110cdf0e10cSrcweir }
111cdf0e10cSrcweir else if (nChar <= 0xEF)
112cdf0e10cSrcweir {
113cdf0e10cSrcweir nUtf32 = (nChar & 0x0F) << 12;
114cdf0e10cSrcweir nShift = 6;
115cdf0e10cSrcweir }
116cdf0e10cSrcweir else if (nChar <= 0xF7)
117cdf0e10cSrcweir {
118cdf0e10cSrcweir nUtf32 = (nChar & 0x07) << 18;
119cdf0e10cSrcweir nShift = 12;
120cdf0e10cSrcweir }
121cdf0e10cSrcweir else if (nChar <= 0xFB)
122cdf0e10cSrcweir {
123cdf0e10cSrcweir nUtf32 = (nChar & 0x03) << 24;
124cdf0e10cSrcweir nShift = 18;
125cdf0e10cSrcweir }
126cdf0e10cSrcweir else if (nChar <= 0xFD)
127cdf0e10cSrcweir {
128cdf0e10cSrcweir nUtf32 = (nChar & 0x01) << 30;
129cdf0e10cSrcweir nShift = 24;
130cdf0e10cSrcweir }
131cdf0e10cSrcweir else
132cdf0e10cSrcweir goto bad_input;
133cdf0e10cSrcweir else if ((nChar & 0xC0) == 0x80)
134cdf0e10cSrcweir {
135cdf0e10cSrcweir nUtf32 |= (nChar & 0x3F) << nShift;
136cdf0e10cSrcweir if (nShift == 0)
137cdf0e10cSrcweir goto transform;
138cdf0e10cSrcweir else
139cdf0e10cSrcweir nShift -= 6;
140cdf0e10cSrcweir }
141cdf0e10cSrcweir else
142cdf0e10cSrcweir {
143cdf0e10cSrcweir /*
144cdf0e10cSrcweir This byte is preceeded by a broken UTF-8 sequence; if this byte
145cdf0e10cSrcweir is neither in the range [0x80..0xBF] nor in the range
146cdf0e10cSrcweir [0xFE..0xFF], assume that this byte does not belong to that
147cdf0e10cSrcweir broken sequence, but instead starts a new, legal UTF-8 sequence:
148cdf0e10cSrcweir */
149cdf0e10cSrcweir bConsume = nChar >= 0xFE;
150cdf0e10cSrcweir goto bad_input;
151cdf0e10cSrcweir }
152cdf0e10cSrcweir continue;
153cdf0e10cSrcweir
154cdf0e10cSrcweir transform:
155cdf0e10cSrcweir if (!bCheckBom || nUtf32 != 0xFEFF
156cdf0e10cSrcweir || (nFlags & RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE) == 0
157cdf0e10cSrcweir || bJavaUtf8)
158cdf0e10cSrcweir {
159cdf0e10cSrcweir if (nUtf32 <= 0xFFFF)
160cdf0e10cSrcweir if (pDestBufPtr != pDestBufEnd)
161cdf0e10cSrcweir *pDestBufPtr++ = (sal_Unicode) nUtf32;
162cdf0e10cSrcweir else
163cdf0e10cSrcweir goto no_output;
164cdf0e10cSrcweir else if (nUtf32 <= 0x10FFFF)
165cdf0e10cSrcweir if (pDestBufEnd - pDestBufPtr >= 2)
166cdf0e10cSrcweir {
167cdf0e10cSrcweir *pDestBufPtr++ = (sal_Unicode) ImplGetHighSurrogate(nUtf32);
168cdf0e10cSrcweir *pDestBufPtr++ = (sal_Unicode) ImplGetLowSurrogate(nUtf32);
169cdf0e10cSrcweir }
170cdf0e10cSrcweir else
171cdf0e10cSrcweir goto no_output;
172cdf0e10cSrcweir else
173cdf0e10cSrcweir {
174cdf0e10cSrcweir bUndefined = sal_True;
175cdf0e10cSrcweir goto bad_input;
176cdf0e10cSrcweir }
177cdf0e10cSrcweir }
178cdf0e10cSrcweir nShift = -1;
179cdf0e10cSrcweir bCheckBom = sal_False;
180cdf0e10cSrcweir continue;
181cdf0e10cSrcweir
182cdf0e10cSrcweir bad_input:
183cdf0e10cSrcweir switch (ImplHandleBadInputTextToUnicodeConversion(
184cdf0e10cSrcweir bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
185cdf0e10cSrcweir &nInfo))
186cdf0e10cSrcweir {
187cdf0e10cSrcweir case IMPL_BAD_INPUT_STOP:
188cdf0e10cSrcweir nShift = -1;
189cdf0e10cSrcweir bCheckBom = sal_False;
190cdf0e10cSrcweir if (!bConsume)
191cdf0e10cSrcweir --pSrcBufPtr;
192cdf0e10cSrcweir break;
193cdf0e10cSrcweir
194cdf0e10cSrcweir case IMPL_BAD_INPUT_CONTINUE:
195cdf0e10cSrcweir nShift = -1;
196cdf0e10cSrcweir bCheckBom = sal_False;
197cdf0e10cSrcweir if (!bConsume)
198cdf0e10cSrcweir --pSrcBufPtr;
199cdf0e10cSrcweir continue;
200cdf0e10cSrcweir
201cdf0e10cSrcweir case IMPL_BAD_INPUT_NO_OUTPUT:
202cdf0e10cSrcweir goto no_output;
203cdf0e10cSrcweir }
204cdf0e10cSrcweir break;
205cdf0e10cSrcweir
206cdf0e10cSrcweir no_output:
207cdf0e10cSrcweir --pSrcBufPtr;
208cdf0e10cSrcweir nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
209cdf0e10cSrcweir break;
210cdf0e10cSrcweir }
211cdf0e10cSrcweir
212cdf0e10cSrcweir if (nShift >= 0
213cdf0e10cSrcweir && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
214cdf0e10cSrcweir | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
215cdf0e10cSrcweir == 0)
216cdf0e10cSrcweir {
217cdf0e10cSrcweir if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
218cdf0e10cSrcweir nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
219cdf0e10cSrcweir else
220cdf0e10cSrcweir switch (ImplHandleBadInputTextToUnicodeConversion(
221cdf0e10cSrcweir sal_False, sal_True, 0, nFlags, &pDestBufPtr,
222cdf0e10cSrcweir pDestBufEnd, &nInfo))
223cdf0e10cSrcweir {
224cdf0e10cSrcweir case IMPL_BAD_INPUT_STOP:
225cdf0e10cSrcweir case IMPL_BAD_INPUT_CONTINUE:
226cdf0e10cSrcweir nShift = -1;
227cdf0e10cSrcweir bCheckBom = sal_False;
228cdf0e10cSrcweir break;
229cdf0e10cSrcweir
230cdf0e10cSrcweir case IMPL_BAD_INPUT_NO_OUTPUT:
231cdf0e10cSrcweir nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
232cdf0e10cSrcweir break;
233cdf0e10cSrcweir }
234cdf0e10cSrcweir }
235cdf0e10cSrcweir
236cdf0e10cSrcweir if (pContext != NULL)
237cdf0e10cSrcweir {
238cdf0e10cSrcweir ((struct ImplUtf8ToUnicodeContext *) pContext)->nUtf32 = nUtf32;
239cdf0e10cSrcweir ((struct ImplUtf8ToUnicodeContext *) pContext)->nShift = nShift;
240cdf0e10cSrcweir ((struct ImplUtf8ToUnicodeContext *) pContext)->bCheckBom = bCheckBom;
241cdf0e10cSrcweir }
242cdf0e10cSrcweir if (pInfo != NULL)
243cdf0e10cSrcweir *pInfo = nInfo;
244cdf0e10cSrcweir if (pSrcCvtBytes != NULL)
245cdf0e10cSrcweir *pSrcCvtBytes = (sal_Char const *) pSrcBufPtr - pSrcBuf;
246cdf0e10cSrcweir return pDestBufPtr - pDestBuf;
247cdf0e10cSrcweir }
248cdf0e10cSrcweir
ImplCreateUnicodeToUtf8Context(void)249cdf0e10cSrcweir void * ImplCreateUnicodeToUtf8Context(void)
250cdf0e10cSrcweir {
251cdf0e10cSrcweir void * p = rtl_allocateMemory(sizeof (struct ImplUnicodeToUtf8Context));
252cdf0e10cSrcweir ImplResetUnicodeToUtf8Context(p);
253cdf0e10cSrcweir return p;
254cdf0e10cSrcweir }
255cdf0e10cSrcweir
ImplResetUnicodeToUtf8Context(void * pContext)256cdf0e10cSrcweir void ImplResetUnicodeToUtf8Context(void * pContext)
257cdf0e10cSrcweir {
258cdf0e10cSrcweir if (pContext != NULL)
259cdf0e10cSrcweir ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate = 0xFFFF;
260cdf0e10cSrcweir }
261cdf0e10cSrcweir
ImplConvertUnicodeToUtf8(ImplTextConverterData const * pData,void * pContext,sal_Unicode const * pSrcBuf,sal_Size nSrcChars,sal_Char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)262cdf0e10cSrcweir sal_Size ImplConvertUnicodeToUtf8(ImplTextConverterData const * pData,
263cdf0e10cSrcweir void * pContext, sal_Unicode const * pSrcBuf,
264cdf0e10cSrcweir sal_Size nSrcChars, sal_Char * pDestBuf,
265cdf0e10cSrcweir sal_Size nDestBytes, sal_uInt32 nFlags,
266cdf0e10cSrcweir sal_uInt32 * pInfo, sal_Size* pSrcCvtChars)
267cdf0e10cSrcweir {
268cdf0e10cSrcweir int bJavaUtf8 = pData != NULL;
269cdf0e10cSrcweir sal_Unicode nHighSurrogate = 0xFFFF;
270cdf0e10cSrcweir sal_uInt32 nInfo = 0;
271cdf0e10cSrcweir sal_Unicode const * pSrcBufPtr = pSrcBuf;
272cdf0e10cSrcweir sal_Unicode const * pSrcBufEnd = pSrcBufPtr + nSrcChars;
273cdf0e10cSrcweir sal_Char * pDestBufPtr = pDestBuf;
274cdf0e10cSrcweir sal_Char * pDestBufEnd = pDestBufPtr + nDestBytes;
275cdf0e10cSrcweir
276cdf0e10cSrcweir if (pContext != NULL)
277cdf0e10cSrcweir nHighSurrogate
278cdf0e10cSrcweir = ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate;
279cdf0e10cSrcweir
280cdf0e10cSrcweir if (nHighSurrogate == 0xFFFF)
281cdf0e10cSrcweir {
282cdf0e10cSrcweir if ((nFlags & RTL_UNICODETOTEXT_FLAGS_GLOBAL_SIGNATURE) != 0
283cdf0e10cSrcweir && !bJavaUtf8)
284cdf0e10cSrcweir {
285cdf0e10cSrcweir if (pDestBufEnd - pDestBufPtr >= 3)
286cdf0e10cSrcweir {
287cdf0e10cSrcweir /* Write BOM (U+FEFF) as UTF-8: */
288cdf0e10cSrcweir *pDestBufPtr++ = (sal_Char) (unsigned char) 0xEF;
289cdf0e10cSrcweir *pDestBufPtr++ = (sal_Char) (unsigned char) 0xBB;
290cdf0e10cSrcweir *pDestBufPtr++ = (sal_Char) (unsigned char) 0xBF;
291cdf0e10cSrcweir }
292cdf0e10cSrcweir else
293cdf0e10cSrcweir {
294cdf0e10cSrcweir nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
295cdf0e10cSrcweir goto done;
296cdf0e10cSrcweir }
297cdf0e10cSrcweir }
298cdf0e10cSrcweir nHighSurrogate = 0;
299cdf0e10cSrcweir }
300cdf0e10cSrcweir
301cdf0e10cSrcweir while (pSrcBufPtr < pSrcBufEnd)
302cdf0e10cSrcweir {
303cdf0e10cSrcweir sal_uInt32 nChar = *pSrcBufPtr++;
304cdf0e10cSrcweir if (nHighSurrogate == 0)
305cdf0e10cSrcweir {
306cdf0e10cSrcweir if (ImplIsHighSurrogate(nChar) && !bJavaUtf8)
307cdf0e10cSrcweir {
308cdf0e10cSrcweir nHighSurrogate = (sal_Unicode) nChar;
309cdf0e10cSrcweir continue;
310cdf0e10cSrcweir }
311cdf0e10cSrcweir }
312cdf0e10cSrcweir else if (ImplIsLowSurrogate(nChar) && !bJavaUtf8)
313cdf0e10cSrcweir nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
314cdf0e10cSrcweir else
315cdf0e10cSrcweir goto bad_input;
316cdf0e10cSrcweir
317cdf0e10cSrcweir if ((ImplIsLowSurrogate(nChar) && !bJavaUtf8)
318cdf0e10cSrcweir || ImplIsNoncharacter(nChar))
319cdf0e10cSrcweir goto bad_input;
320cdf0e10cSrcweir
321cdf0e10cSrcweir if (nChar <= 0x7F && (!bJavaUtf8 || nChar != 0))
322cdf0e10cSrcweir if (pDestBufPtr != pDestBufEnd)
323cdf0e10cSrcweir *pDestBufPtr++ = (sal_Char) nChar;
324cdf0e10cSrcweir else
325cdf0e10cSrcweir goto no_output;
326cdf0e10cSrcweir else if (nChar <= 0x7FF)
327cdf0e10cSrcweir if (pDestBufEnd - pDestBufPtr >= 2)
328cdf0e10cSrcweir {
329cdf0e10cSrcweir *pDestBufPtr++ = (sal_Char) (0xC0 | (nChar >> 6));
330cdf0e10cSrcweir *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F));
331cdf0e10cSrcweir }
332cdf0e10cSrcweir else
333cdf0e10cSrcweir goto no_output;
334cdf0e10cSrcweir else if (nChar <= 0xFFFF)
335cdf0e10cSrcweir if (pDestBufEnd - pDestBufPtr >= 3)
336cdf0e10cSrcweir {
337cdf0e10cSrcweir *pDestBufPtr++ = (sal_Char) (0xE0 | (nChar >> 12));
338cdf0e10cSrcweir *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 6) & 0x3F));
339cdf0e10cSrcweir *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F));
340cdf0e10cSrcweir }
341cdf0e10cSrcweir else
342cdf0e10cSrcweir goto no_output;
343cdf0e10cSrcweir else if (pDestBufEnd - pDestBufPtr >= 4)
344cdf0e10cSrcweir {
345cdf0e10cSrcweir *pDestBufPtr++ = (sal_Char) (0xF0 | (nChar >> 18));
346cdf0e10cSrcweir *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 12) & 0x3F));
347cdf0e10cSrcweir *pDestBufPtr++ = (sal_Char) (0x80 | ((nChar >> 6) & 0x3F));
348cdf0e10cSrcweir *pDestBufPtr++ = (sal_Char) (0x80 | (nChar & 0x3F));
349cdf0e10cSrcweir }
350cdf0e10cSrcweir else
351cdf0e10cSrcweir goto no_output;
352cdf0e10cSrcweir nHighSurrogate = 0;
353cdf0e10cSrcweir continue;
354cdf0e10cSrcweir
355cdf0e10cSrcweir bad_input:
356cdf0e10cSrcweir switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, 0, nFlags,
357cdf0e10cSrcweir &pDestBufPtr,
358cdf0e10cSrcweir pDestBufEnd, &nInfo,
359cdf0e10cSrcweir NULL, 0, NULL))
360cdf0e10cSrcweir {
361cdf0e10cSrcweir case IMPL_BAD_INPUT_STOP:
362cdf0e10cSrcweir nHighSurrogate = 0;
363cdf0e10cSrcweir break;
364cdf0e10cSrcweir
365cdf0e10cSrcweir case IMPL_BAD_INPUT_CONTINUE:
366cdf0e10cSrcweir nHighSurrogate = 0;
367cdf0e10cSrcweir continue;
368cdf0e10cSrcweir
369cdf0e10cSrcweir case IMPL_BAD_INPUT_NO_OUTPUT:
370cdf0e10cSrcweir goto no_output;
371cdf0e10cSrcweir }
372cdf0e10cSrcweir break;
373cdf0e10cSrcweir
374cdf0e10cSrcweir no_output:
375cdf0e10cSrcweir --pSrcBufPtr;
376cdf0e10cSrcweir nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
377cdf0e10cSrcweir break;
378cdf0e10cSrcweir }
379cdf0e10cSrcweir
380cdf0e10cSrcweir if (nHighSurrogate != 0
381cdf0e10cSrcweir && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
382cdf0e10cSrcweir | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
383cdf0e10cSrcweir == 0)
384cdf0e10cSrcweir {
385cdf0e10cSrcweir if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
386cdf0e10cSrcweir nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
387cdf0e10cSrcweir else
388cdf0e10cSrcweir switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, 0,
389cdf0e10cSrcweir nFlags,
390cdf0e10cSrcweir &pDestBufPtr,
391cdf0e10cSrcweir pDestBufEnd,
392cdf0e10cSrcweir &nInfo, NULL, 0,
393cdf0e10cSrcweir NULL))
394cdf0e10cSrcweir {
395cdf0e10cSrcweir case IMPL_BAD_INPUT_STOP:
396cdf0e10cSrcweir case IMPL_BAD_INPUT_CONTINUE:
397cdf0e10cSrcweir nHighSurrogate = 0;
398cdf0e10cSrcweir break;
399cdf0e10cSrcweir
400cdf0e10cSrcweir case IMPL_BAD_INPUT_NO_OUTPUT:
401cdf0e10cSrcweir nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
402cdf0e10cSrcweir break;
403cdf0e10cSrcweir }
404cdf0e10cSrcweir }
405cdf0e10cSrcweir
406cdf0e10cSrcweir done:
407cdf0e10cSrcweir if (pContext != NULL)
408cdf0e10cSrcweir ((struct ImplUnicodeToUtf8Context *) pContext)->nHighSurrogate
409cdf0e10cSrcweir = nHighSurrogate;
410cdf0e10cSrcweir if (pInfo != NULL)
411cdf0e10cSrcweir *pInfo = nInfo;
412cdf0e10cSrcweir if (pSrcCvtChars != NULL)
413cdf0e10cSrcweir *pSrcCvtChars = pSrcBufPtr - pSrcBuf;
414cdf0e10cSrcweir return pDestBufPtr - pDestBuf;
415cdf0e10cSrcweir }
416