1*647f063dSAndrew Rist /**************************************************************
2cdf0e10cSrcweir *
3*647f063dSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*647f063dSAndrew Rist * or more contributor license agreements. See the NOTICE file
5*647f063dSAndrew Rist * distributed with this work for additional information
6*647f063dSAndrew Rist * regarding copyright ownership. The ASF licenses this file
7*647f063dSAndrew Rist * to you under the Apache License, Version 2.0 (the
8*647f063dSAndrew Rist * "License"); you may not use this file except in compliance
9*647f063dSAndrew Rist * with the License. You may obtain a copy of the License at
10cdf0e10cSrcweir *
11*647f063dSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0
12cdf0e10cSrcweir *
13*647f063dSAndrew Rist * Unless required by applicable law or agreed to in writing,
14*647f063dSAndrew Rist * software distributed under the License is distributed on an
15*647f063dSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*647f063dSAndrew Rist * KIND, either express or implied. See the License for the
17*647f063dSAndrew Rist * specific language governing permissions and limitations
18*647f063dSAndrew Rist * under the License.
19cdf0e10cSrcweir *
20*647f063dSAndrew Rist *************************************************************/
21*647f063dSAndrew Rist
22*647f063dSAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir #include "tenchelp.h"
25cdf0e10cSrcweir #include "unichars.h"
26cdf0e10cSrcweir #include "rtl/textcvt.h"
27cdf0e10cSrcweir
28cdf0e10cSrcweir /* ======================================================================= */
29cdf0e10cSrcweir
30cdf0e10cSrcweir /* DBCS to Unicode conversion routine use a lead table for the first byte, */
31cdf0e10cSrcweir /* where we determine the trail table or for single byte chars the unicode */
32cdf0e10cSrcweir /* value. We have for all lead byte a separate table, because we can */
33cdf0e10cSrcweir /* then share many tables for diffrent charset encodings. */
34cdf0e10cSrcweir
35cdf0e10cSrcweir /* ======================================================================= */
36cdf0e10cSrcweir
ImplDBCSToUnicode(const ImplTextConverterData * pData,void * pContext,const sal_Char * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)37cdf0e10cSrcweir sal_Size ImplDBCSToUnicode( const ImplTextConverterData* pData, void* pContext,
38cdf0e10cSrcweir const sal_Char* pSrcBuf, sal_Size nSrcBytes,
39cdf0e10cSrcweir sal_Unicode* pDestBuf, sal_Size nDestChars,
40cdf0e10cSrcweir sal_uInt32 nFlags, sal_uInt32* pInfo,
41cdf0e10cSrcweir sal_Size* pSrcCvtBytes )
42cdf0e10cSrcweir {
43cdf0e10cSrcweir sal_uChar cLead;
44cdf0e10cSrcweir sal_uChar cTrail;
45cdf0e10cSrcweir sal_Unicode cConv;
46cdf0e10cSrcweir const ImplDBCSToUniLeadTab* pLeadEntry;
47cdf0e10cSrcweir const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData;
48cdf0e10cSrcweir const ImplDBCSToUniLeadTab* pLeadTab = pConvertData->mpToUniLeadTab;
49cdf0e10cSrcweir sal_Unicode* pEndDestBuf;
50cdf0e10cSrcweir const sal_Char* pEndSrcBuf;
51cdf0e10cSrcweir
52cdf0e10cSrcweir (void) pContext; /* unused */
53cdf0e10cSrcweir
54cdf0e10cSrcweir *pInfo = 0;
55cdf0e10cSrcweir pEndDestBuf = pDestBuf+nDestChars;
56cdf0e10cSrcweir pEndSrcBuf = pSrcBuf+nSrcBytes;
57cdf0e10cSrcweir while ( pSrcBuf < pEndSrcBuf )
58cdf0e10cSrcweir {
59cdf0e10cSrcweir cLead = (sal_uChar)*pSrcBuf;
60cdf0e10cSrcweir
61cdf0e10cSrcweir /* get entry for the lead byte */
62cdf0e10cSrcweir pLeadEntry = pLeadTab+cLead;
63cdf0e10cSrcweir
64cdf0e10cSrcweir /* SingleByte char? */
65cdf0e10cSrcweir if (pLeadEntry->mpToUniTrailTab == NULL
66cdf0e10cSrcweir || cLead < pConvertData->mnLeadStart
67cdf0e10cSrcweir || cLead > pConvertData->mnLeadEnd)
68cdf0e10cSrcweir {
69cdf0e10cSrcweir cConv = pLeadEntry->mnUniChar;
70cdf0e10cSrcweir if ( !cConv && (cLead != 0) )
71cdf0e10cSrcweir {
72cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED;
73cdf0e10cSrcweir if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR )
74cdf0e10cSrcweir {
75cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
76cdf0e10cSrcweir break;
77cdf0e10cSrcweir }
78cdf0e10cSrcweir else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE )
79cdf0e10cSrcweir {
80cdf0e10cSrcweir pSrcBuf++;
81cdf0e10cSrcweir continue;
82cdf0e10cSrcweir }
83cdf0e10cSrcweir else
84cdf0e10cSrcweir cConv = ImplGetUndefinedUnicodeChar(cLead, nFlags);
85cdf0e10cSrcweir }
86cdf0e10cSrcweir }
87cdf0e10cSrcweir else
88cdf0e10cSrcweir {
89cdf0e10cSrcweir /* Source buffer to small */
90cdf0e10cSrcweir if ( pSrcBuf +1 == pEndSrcBuf )
91cdf0e10cSrcweir {
92cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
93cdf0e10cSrcweir break;
94cdf0e10cSrcweir }
95cdf0e10cSrcweir
96cdf0e10cSrcweir pSrcBuf++;
97cdf0e10cSrcweir cTrail = (sal_uChar)*pSrcBuf;
98cdf0e10cSrcweir if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
99cdf0e10cSrcweir cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
100cdf0e10cSrcweir else
101cdf0e10cSrcweir cConv = 0;
102cdf0e10cSrcweir
103cdf0e10cSrcweir if ( !cConv )
104cdf0e10cSrcweir {
105cdf0e10cSrcweir /* EUDC Ranges */
106cdf0e10cSrcweir sal_uInt16 i;
107cdf0e10cSrcweir const ImplDBCSEUDCData* pEUDCTab = pConvertData->mpEUDCTab;
108cdf0e10cSrcweir for ( i = 0; i < pConvertData->mnEUDCCount; i++ )
109cdf0e10cSrcweir {
110cdf0e10cSrcweir if ( (cLead >= pEUDCTab->mnLeadStart) &&
111cdf0e10cSrcweir (cLead <= pEUDCTab->mnLeadEnd) )
112cdf0e10cSrcweir {
113cdf0e10cSrcweir sal_uInt16 nTrailCount = 0;
114cdf0e10cSrcweir if ( (cTrail >= pEUDCTab->mnTrail1Start) &&
115cdf0e10cSrcweir (cTrail <= pEUDCTab->mnTrail1End) )
116cdf0e10cSrcweir {
117cdf0e10cSrcweir cConv = pEUDCTab->mnUniStart+
118cdf0e10cSrcweir ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
119cdf0e10cSrcweir (cTrail-pEUDCTab->mnTrail1Start);
120cdf0e10cSrcweir break;
121cdf0e10cSrcweir }
122cdf0e10cSrcweir else
123cdf0e10cSrcweir {
124cdf0e10cSrcweir nTrailCount = pEUDCTab->mnTrail1End-pEUDCTab->mnTrail1Start+1;
125cdf0e10cSrcweir if ( (pEUDCTab->mnTrailCount >= 2) &&
126cdf0e10cSrcweir (cTrail >= pEUDCTab->mnTrail2Start) &&
127cdf0e10cSrcweir (cTrail <= pEUDCTab->mnTrail2End) )
128cdf0e10cSrcweir {
129cdf0e10cSrcweir cConv = pEUDCTab->mnUniStart+
130cdf0e10cSrcweir ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
131cdf0e10cSrcweir nTrailCount+
132cdf0e10cSrcweir (cTrail-pEUDCTab->mnTrail2Start);
133cdf0e10cSrcweir break;
134cdf0e10cSrcweir }
135cdf0e10cSrcweir else
136cdf0e10cSrcweir {
137cdf0e10cSrcweir nTrailCount = pEUDCTab->mnTrail2End-pEUDCTab->mnTrail2Start+1;
138cdf0e10cSrcweir if ( (pEUDCTab->mnTrailCount >= 3) &&
139cdf0e10cSrcweir (cTrail >= pEUDCTab->mnTrail3Start) &&
140cdf0e10cSrcweir (cTrail <= pEUDCTab->mnTrail3End) )
141cdf0e10cSrcweir {
142cdf0e10cSrcweir cConv = pEUDCTab->mnUniStart+
143cdf0e10cSrcweir ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
144cdf0e10cSrcweir nTrailCount+
145cdf0e10cSrcweir (cTrail-pEUDCTab->mnTrail3Start);
146cdf0e10cSrcweir break;
147cdf0e10cSrcweir }
148cdf0e10cSrcweir }
149cdf0e10cSrcweir }
150cdf0e10cSrcweir }
151cdf0e10cSrcweir
152cdf0e10cSrcweir pEUDCTab++;
153cdf0e10cSrcweir }
154cdf0e10cSrcweir
155cdf0e10cSrcweir if ( !cConv )
156cdf0e10cSrcweir {
157cdf0e10cSrcweir /* Wir vergleichen den kompletten Trailbereich den wir */
158cdf0e10cSrcweir /* definieren, der normalerweise groesser sein kann als */
159cdf0e10cSrcweir /* der definierte. Dies machen wir, damit Erweiterungen von */
160cdf0e10cSrcweir /* uns nicht beruecksichtigten Encodings so weit wie */
161cdf0e10cSrcweir /* moeglich auch richtig zu behandeln, das double byte */
162cdf0e10cSrcweir /* characters auch als ein einzelner Character behandelt */
163cdf0e10cSrcweir /* wird. */
164cdf0e10cSrcweir if (cLead < pConvertData->mnLeadStart
165cdf0e10cSrcweir || cLead > pConvertData->mnLeadEnd
166cdf0e10cSrcweir || cTrail < pConvertData->mnTrailStart
167cdf0e10cSrcweir || cTrail > pConvertData->mnTrailEnd)
168cdf0e10cSrcweir {
169cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
170cdf0e10cSrcweir if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
171cdf0e10cSrcweir {
172cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
173cdf0e10cSrcweir break;
174cdf0e10cSrcweir }
175cdf0e10cSrcweir else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
176cdf0e10cSrcweir {
177cdf0e10cSrcweir pSrcBuf++;
178cdf0e10cSrcweir continue;
179cdf0e10cSrcweir }
180cdf0e10cSrcweir else
181cdf0e10cSrcweir cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
182cdf0e10cSrcweir }
183cdf0e10cSrcweir else
184cdf0e10cSrcweir {
185cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
186cdf0e10cSrcweir if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
187cdf0e10cSrcweir {
188cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
189cdf0e10cSrcweir break;
190cdf0e10cSrcweir }
191cdf0e10cSrcweir else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
192cdf0e10cSrcweir {
193cdf0e10cSrcweir pSrcBuf++;
194cdf0e10cSrcweir continue;
195cdf0e10cSrcweir }
196cdf0e10cSrcweir else
197cdf0e10cSrcweir cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
198cdf0e10cSrcweir }
199cdf0e10cSrcweir }
200cdf0e10cSrcweir }
201cdf0e10cSrcweir }
202cdf0e10cSrcweir
203cdf0e10cSrcweir if ( pDestBuf == pEndDestBuf )
204cdf0e10cSrcweir {
205cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
206cdf0e10cSrcweir break;
207cdf0e10cSrcweir }
208cdf0e10cSrcweir
209cdf0e10cSrcweir *pDestBuf = cConv;
210cdf0e10cSrcweir pDestBuf++;
211cdf0e10cSrcweir pSrcBuf++;
212cdf0e10cSrcweir }
213cdf0e10cSrcweir
214cdf0e10cSrcweir *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
215cdf0e10cSrcweir return (nDestChars - (pEndDestBuf-pDestBuf));
216cdf0e10cSrcweir }
217cdf0e10cSrcweir
218cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
219cdf0e10cSrcweir
ImplUnicodeToDBCS(const ImplTextConverterData * pData,void * pContext,const sal_Unicode * pSrcBuf,sal_Size nSrcChars,sal_Char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)220cdf0e10cSrcweir sal_Size ImplUnicodeToDBCS( const ImplTextConverterData* pData, void* pContext,
221cdf0e10cSrcweir const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
222cdf0e10cSrcweir sal_Char* pDestBuf, sal_Size nDestBytes,
223cdf0e10cSrcweir sal_uInt32 nFlags, sal_uInt32* pInfo,
224cdf0e10cSrcweir sal_Size* pSrcCvtChars )
225cdf0e10cSrcweir {
226cdf0e10cSrcweir sal_uInt16 cConv;
227cdf0e10cSrcweir sal_Unicode c;
228cdf0e10cSrcweir sal_uChar nHighChar;
229cdf0e10cSrcweir sal_uChar nLowChar;
230cdf0e10cSrcweir const ImplUniToDBCSHighTab* pHighEntry;
231cdf0e10cSrcweir const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData;
232cdf0e10cSrcweir const ImplUniToDBCSHighTab* pHighTab = pConvertData->mpToDBCSHighTab;
233cdf0e10cSrcweir sal_Char* pEndDestBuf;
234cdf0e10cSrcweir const sal_Unicode* pEndSrcBuf;
235cdf0e10cSrcweir
236cdf0e10cSrcweir sal_Bool bCheckRange = (pConvertData->mnLeadStart != 0
237cdf0e10cSrcweir || pConvertData->mnLeadEnd != 0xFF);
238cdf0e10cSrcweir /* this statement has the effect that this extra check is only done for
239cdf0e10cSrcweir EUC-KR, which uses the MS-949 tables, but does not support the full
240cdf0e10cSrcweir range of MS-949 */
241cdf0e10cSrcweir
242cdf0e10cSrcweir (void) pContext; /* unused */
243cdf0e10cSrcweir
244cdf0e10cSrcweir *pInfo = 0;
245cdf0e10cSrcweir pEndDestBuf = pDestBuf+nDestBytes;
246cdf0e10cSrcweir pEndSrcBuf = pSrcBuf+nSrcChars;
247cdf0e10cSrcweir while ( pSrcBuf < pEndSrcBuf )
248cdf0e10cSrcweir {
249cdf0e10cSrcweir c = *pSrcBuf;
250cdf0e10cSrcweir nHighChar = (sal_uChar)((c >> 8) & 0xFF);
251cdf0e10cSrcweir nLowChar = (sal_uChar)(c & 0xFF);
252cdf0e10cSrcweir
253cdf0e10cSrcweir /* get entry for the high byte */
254cdf0e10cSrcweir pHighEntry = pHighTab+nHighChar;
255cdf0e10cSrcweir
256cdf0e10cSrcweir /* is low byte in the table range */
257cdf0e10cSrcweir if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
258cdf0e10cSrcweir {
259cdf0e10cSrcweir cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
260cdf0e10cSrcweir if (bCheckRange && cConv > 0x7F
261cdf0e10cSrcweir && ((cConv >> 8) < pConvertData->mnLeadStart
262cdf0e10cSrcweir || (cConv >> 8) > pConvertData->mnLeadEnd
263cdf0e10cSrcweir || (cConv & 0xFF) < pConvertData->mnTrailStart
264cdf0e10cSrcweir || (cConv & 0xFF) > pConvertData->mnTrailEnd))
265cdf0e10cSrcweir cConv = 0;
266cdf0e10cSrcweir }
267cdf0e10cSrcweir else
268cdf0e10cSrcweir cConv = 0;
269cdf0e10cSrcweir
270cdf0e10cSrcweir if (cConv == 0 && c != 0)
271cdf0e10cSrcweir {
272cdf0e10cSrcweir /* Map to EUDC ranges: */
273cdf0e10cSrcweir ImplDBCSEUDCData const * pEUDCTab = pConvertData->mpEUDCTab;
274cdf0e10cSrcweir sal_uInt32 i;
275cdf0e10cSrcweir for (i = 0; i < pConvertData->mnEUDCCount; ++i)
276cdf0e10cSrcweir {
277cdf0e10cSrcweir if (c >= pEUDCTab->mnUniStart && c <= pEUDCTab->mnUniEnd)
278cdf0e10cSrcweir {
279cdf0e10cSrcweir sal_uInt32 nIndex = c - pEUDCTab->mnUniStart;
280cdf0e10cSrcweir sal_uInt32 nLeadOff
281cdf0e10cSrcweir = nIndex / pEUDCTab->mnTrailRangeCount;
282cdf0e10cSrcweir sal_uInt32 nTrailOff
283cdf0e10cSrcweir = nIndex % pEUDCTab->mnTrailRangeCount;
284cdf0e10cSrcweir sal_uInt32 nSize;
285cdf0e10cSrcweir cConv = (sal_uInt16)
286cdf0e10cSrcweir ((pEUDCTab->mnLeadStart + nLeadOff) << 8);
287cdf0e10cSrcweir nSize
288cdf0e10cSrcweir = pEUDCTab->mnTrail1End - pEUDCTab->mnTrail1Start + 1;
289cdf0e10cSrcweir if (nTrailOff < nSize)
290cdf0e10cSrcweir {
291cdf0e10cSrcweir cConv |= pEUDCTab->mnTrail1Start + nTrailOff;
292cdf0e10cSrcweir break;
293cdf0e10cSrcweir }
294cdf0e10cSrcweir nTrailOff -= nSize;
295cdf0e10cSrcweir nSize
296cdf0e10cSrcweir = pEUDCTab->mnTrail2End - pEUDCTab->mnTrail2Start + 1;
297cdf0e10cSrcweir if (nTrailOff < nSize)
298cdf0e10cSrcweir {
299cdf0e10cSrcweir cConv |= pEUDCTab->mnTrail2Start + nTrailOff;
300cdf0e10cSrcweir break;
301cdf0e10cSrcweir }
302cdf0e10cSrcweir nTrailOff -= nSize;
303cdf0e10cSrcweir cConv |= pEUDCTab->mnTrail3Start + nTrailOff;
304cdf0e10cSrcweir break;
305cdf0e10cSrcweir }
306cdf0e10cSrcweir pEUDCTab++;
307cdf0e10cSrcweir }
308cdf0e10cSrcweir
309cdf0e10cSrcweir /* FIXME
310cdf0e10cSrcweir * SB: Not sure why this is in here. Plus, it does not work as
311cdf0e10cSrcweir * intended when (c & 0xFF) == 0, because the next !cConv check
312cdf0e10cSrcweir * will then think c has not yet been converted...
313cdf0e10cSrcweir */
314cdf0e10cSrcweir if (c >= RTL_TEXTCVT_BYTE_PRIVATE_START
315cdf0e10cSrcweir && c <= RTL_TEXTCVT_BYTE_PRIVATE_END)
316cdf0e10cSrcweir {
317cdf0e10cSrcweir if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
318cdf0e10cSrcweir cConv = (sal_Char)(sal_uChar)(c & 0xFF);
319cdf0e10cSrcweir }
320cdf0e10cSrcweir }
321cdf0e10cSrcweir
322cdf0e10cSrcweir if ( !cConv )
323cdf0e10cSrcweir {
324cdf0e10cSrcweir if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
325cdf0e10cSrcweir {
326cdf0e10cSrcweir /* !!! */
327cdf0e10cSrcweir }
328cdf0e10cSrcweir
329cdf0e10cSrcweir if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
330cdf0e10cSrcweir {
331cdf0e10cSrcweir /* !!! */
332cdf0e10cSrcweir }
333cdf0e10cSrcweir
334cdf0e10cSrcweir /* Handle undefined and surrogates characters */
335cdf0e10cSrcweir /* (all surrogates characters are undefined) */
336cdf0e10cSrcweir if (ImplHandleUndefinedUnicodeToTextChar(pData,
337cdf0e10cSrcweir &pSrcBuf,
338cdf0e10cSrcweir pEndSrcBuf,
339cdf0e10cSrcweir &pDestBuf,
340cdf0e10cSrcweir pEndDestBuf,
341cdf0e10cSrcweir nFlags,
342cdf0e10cSrcweir pInfo))
343cdf0e10cSrcweir continue;
344cdf0e10cSrcweir else
345cdf0e10cSrcweir break;
346cdf0e10cSrcweir }
347cdf0e10cSrcweir
348cdf0e10cSrcweir /* SingleByte */
349cdf0e10cSrcweir if ( !(cConv & 0xFF00) )
350cdf0e10cSrcweir {
351cdf0e10cSrcweir if ( pDestBuf == pEndDestBuf )
352cdf0e10cSrcweir {
353cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
354cdf0e10cSrcweir break;
355cdf0e10cSrcweir }
356cdf0e10cSrcweir
357cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
358cdf0e10cSrcweir pDestBuf++;
359cdf0e10cSrcweir }
360cdf0e10cSrcweir else
361cdf0e10cSrcweir {
362cdf0e10cSrcweir if ( pDestBuf+1 >= pEndDestBuf )
363cdf0e10cSrcweir {
364cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
365cdf0e10cSrcweir break;
366cdf0e10cSrcweir }
367cdf0e10cSrcweir
368cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF);
369cdf0e10cSrcweir pDestBuf++;
370cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
371cdf0e10cSrcweir pDestBuf++;
372cdf0e10cSrcweir }
373cdf0e10cSrcweir
374cdf0e10cSrcweir pSrcBuf++;
375cdf0e10cSrcweir }
376cdf0e10cSrcweir
377cdf0e10cSrcweir *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
378cdf0e10cSrcweir return (nDestBytes - (pEndDestBuf-pDestBuf));
379cdf0e10cSrcweir }
380cdf0e10cSrcweir
381cdf0e10cSrcweir /* ======================================================================= */
382cdf0e10cSrcweir
383cdf0e10cSrcweir #define JIS_EUC_LEAD_OFF 0x80
384cdf0e10cSrcweir #define JIS_EUC_TRAIL_OFF 0x80
385cdf0e10cSrcweir
386cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
387cdf0e10cSrcweir
ImplEUCJPToUnicode(const ImplTextConverterData * pData,void * pContext,const sal_Char * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)388cdf0e10cSrcweir sal_Size ImplEUCJPToUnicode( const ImplTextConverterData* pData,
389cdf0e10cSrcweir void* pContext,
390cdf0e10cSrcweir const sal_Char* pSrcBuf, sal_Size nSrcBytes,
391cdf0e10cSrcweir sal_Unicode* pDestBuf, sal_Size nDestChars,
392cdf0e10cSrcweir sal_uInt32 nFlags, sal_uInt32* pInfo,
393cdf0e10cSrcweir sal_Size* pSrcCvtBytes )
394cdf0e10cSrcweir {
395cdf0e10cSrcweir sal_uChar c;
396cdf0e10cSrcweir sal_uChar cLead = '\0';
397cdf0e10cSrcweir sal_uChar cTrail = '\0';
398cdf0e10cSrcweir sal_Unicode cConv;
399cdf0e10cSrcweir const ImplDBCSToUniLeadTab* pLeadEntry;
400cdf0e10cSrcweir const ImplDBCSToUniLeadTab* pLeadTab;
401cdf0e10cSrcweir const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData;
402cdf0e10cSrcweir sal_Unicode* pEndDestBuf;
403cdf0e10cSrcweir const sal_Char* pEndSrcBuf;
404cdf0e10cSrcweir
405cdf0e10cSrcweir (void) pContext; /* unused */
406cdf0e10cSrcweir
407cdf0e10cSrcweir *pInfo = 0;
408cdf0e10cSrcweir pEndDestBuf = pDestBuf+nDestChars;
409cdf0e10cSrcweir pEndSrcBuf = pSrcBuf+nSrcBytes;
410cdf0e10cSrcweir while ( pSrcBuf < pEndSrcBuf )
411cdf0e10cSrcweir {
412cdf0e10cSrcweir c = (sal_uChar)*pSrcBuf;
413cdf0e10cSrcweir
414cdf0e10cSrcweir /* ASCII */
415cdf0e10cSrcweir if ( c <= 0x7F )
416cdf0e10cSrcweir cConv = c;
417cdf0e10cSrcweir else
418cdf0e10cSrcweir {
419cdf0e10cSrcweir /* SS2 - Half-width katakana */
420cdf0e10cSrcweir /* 8E + A1-DF */
421cdf0e10cSrcweir if ( c == 0x8E )
422cdf0e10cSrcweir {
423cdf0e10cSrcweir /* Source buffer to small */
424cdf0e10cSrcweir if ( pSrcBuf + 1 == pEndSrcBuf )
425cdf0e10cSrcweir {
426cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
427cdf0e10cSrcweir break;
428cdf0e10cSrcweir }
429cdf0e10cSrcweir
430cdf0e10cSrcweir pSrcBuf++;
431cdf0e10cSrcweir c = (sal_uChar)*pSrcBuf;
432cdf0e10cSrcweir if ( (c >= 0xA1) && (c <= 0xDF) )
433cdf0e10cSrcweir cConv = 0xFF61+(c-0xA1);
434cdf0e10cSrcweir else
435cdf0e10cSrcweir {
436cdf0e10cSrcweir cConv = 0;
437cdf0e10cSrcweir cLead = 0x8E;
438cdf0e10cSrcweir cTrail = c;
439cdf0e10cSrcweir }
440cdf0e10cSrcweir }
441cdf0e10cSrcweir else
442cdf0e10cSrcweir {
443cdf0e10cSrcweir /* SS3 - JIS 0212-1990 */
444cdf0e10cSrcweir /* 8F + A1-FE + A1-FE */
445cdf0e10cSrcweir if ( c == 0x8F )
446cdf0e10cSrcweir {
447cdf0e10cSrcweir /* Source buffer to small */
448cdf0e10cSrcweir if (pEndSrcBuf - pSrcBuf < 3)
449cdf0e10cSrcweir {
450cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
451cdf0e10cSrcweir break;
452cdf0e10cSrcweir }
453cdf0e10cSrcweir
454cdf0e10cSrcweir pSrcBuf++;
455cdf0e10cSrcweir cLead = (sal_uChar)*pSrcBuf;
456cdf0e10cSrcweir pSrcBuf++;
457cdf0e10cSrcweir cTrail = (sal_uChar)*pSrcBuf;
458cdf0e10cSrcweir pLeadTab = pConvertData->mpJIS0212ToUniLeadTab;
459cdf0e10cSrcweir }
460cdf0e10cSrcweir /* CodeSet 2 JIS 0208-1997 */
461cdf0e10cSrcweir /* A1-FE + A1-FE */
462cdf0e10cSrcweir else
463cdf0e10cSrcweir {
464cdf0e10cSrcweir /* Source buffer to small */
465cdf0e10cSrcweir if ( pSrcBuf + 1 == pEndSrcBuf )
466cdf0e10cSrcweir {
467cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
468cdf0e10cSrcweir break;
469cdf0e10cSrcweir }
470cdf0e10cSrcweir
471cdf0e10cSrcweir cLead = c;
472cdf0e10cSrcweir pSrcBuf++;
473cdf0e10cSrcweir cTrail = (sal_uChar)*pSrcBuf;
474cdf0e10cSrcweir pLeadTab = pConvertData->mpJIS0208ToUniLeadTab;
475cdf0e10cSrcweir }
476cdf0e10cSrcweir
477cdf0e10cSrcweir /* Undefined Range */
478cdf0e10cSrcweir if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
479cdf0e10cSrcweir cConv = 0;
480cdf0e10cSrcweir else
481cdf0e10cSrcweir {
482cdf0e10cSrcweir cLead -= JIS_EUC_LEAD_OFF;
483cdf0e10cSrcweir cTrail -= JIS_EUC_TRAIL_OFF;
484cdf0e10cSrcweir pLeadEntry = pLeadTab+cLead;
485cdf0e10cSrcweir if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
486cdf0e10cSrcweir cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
487cdf0e10cSrcweir else
488cdf0e10cSrcweir cConv = 0;
489cdf0e10cSrcweir }
490cdf0e10cSrcweir }
491cdf0e10cSrcweir
492cdf0e10cSrcweir if ( !cConv )
493cdf0e10cSrcweir {
494cdf0e10cSrcweir /* Wir vergleichen den kompletten Trailbereich den wir */
495cdf0e10cSrcweir /* definieren, der normalerweise groesser sein kann als */
496cdf0e10cSrcweir /* der definierte. Dies machen wir, damit Erweiterungen von */
497cdf0e10cSrcweir /* uns nicht beruecksichtigten Encodings so weit wie */
498cdf0e10cSrcweir /* moeglich auch richtig zu behandeln, das double byte */
499cdf0e10cSrcweir /* characters auch als ein einzelner Character behandelt */
500cdf0e10cSrcweir /* wird. */
501cdf0e10cSrcweir if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
502cdf0e10cSrcweir {
503cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
504cdf0e10cSrcweir if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
505cdf0e10cSrcweir {
506cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
507cdf0e10cSrcweir break;
508cdf0e10cSrcweir }
509cdf0e10cSrcweir else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
510cdf0e10cSrcweir {
511cdf0e10cSrcweir pSrcBuf++;
512cdf0e10cSrcweir continue;
513cdf0e10cSrcweir }
514cdf0e10cSrcweir else
515cdf0e10cSrcweir cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
516cdf0e10cSrcweir }
517cdf0e10cSrcweir else
518cdf0e10cSrcweir {
519cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
520cdf0e10cSrcweir if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
521cdf0e10cSrcweir {
522cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
523cdf0e10cSrcweir break;
524cdf0e10cSrcweir }
525cdf0e10cSrcweir else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
526cdf0e10cSrcweir {
527cdf0e10cSrcweir pSrcBuf++;
528cdf0e10cSrcweir continue;
529cdf0e10cSrcweir }
530cdf0e10cSrcweir else
531cdf0e10cSrcweir cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
532cdf0e10cSrcweir }
533cdf0e10cSrcweir }
534cdf0e10cSrcweir }
535cdf0e10cSrcweir
536cdf0e10cSrcweir if ( pDestBuf == pEndDestBuf )
537cdf0e10cSrcweir {
538cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
539cdf0e10cSrcweir break;
540cdf0e10cSrcweir }
541cdf0e10cSrcweir
542cdf0e10cSrcweir *pDestBuf = cConv;
543cdf0e10cSrcweir pDestBuf++;
544cdf0e10cSrcweir pSrcBuf++;
545cdf0e10cSrcweir }
546cdf0e10cSrcweir
547cdf0e10cSrcweir *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
548cdf0e10cSrcweir return (nDestChars - (pEndDestBuf-pDestBuf));
549cdf0e10cSrcweir }
550cdf0e10cSrcweir
551cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
552cdf0e10cSrcweir
ImplUnicodeToEUCJP(const ImplTextConverterData * pData,void * pContext,const sal_Unicode * pSrcBuf,sal_Size nSrcChars,sal_Char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)553cdf0e10cSrcweir sal_Size ImplUnicodeToEUCJP( const ImplTextConverterData* pData,
554cdf0e10cSrcweir void* pContext,
555cdf0e10cSrcweir const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
556cdf0e10cSrcweir sal_Char* pDestBuf, sal_Size nDestBytes,
557cdf0e10cSrcweir sal_uInt32 nFlags, sal_uInt32* pInfo,
558cdf0e10cSrcweir sal_Size* pSrcCvtChars )
559cdf0e10cSrcweir {
560cdf0e10cSrcweir sal_uInt32 cConv;
561cdf0e10cSrcweir sal_Unicode c;
562cdf0e10cSrcweir sal_uChar nHighChar;
563cdf0e10cSrcweir sal_uChar nLowChar;
564cdf0e10cSrcweir const ImplUniToDBCSHighTab* pHighEntry;
565cdf0e10cSrcweir const ImplUniToDBCSHighTab* pHighTab;
566cdf0e10cSrcweir const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData;
567cdf0e10cSrcweir sal_Char* pEndDestBuf;
568cdf0e10cSrcweir const sal_Unicode* pEndSrcBuf;
569cdf0e10cSrcweir
570cdf0e10cSrcweir (void) pContext; /* unused */
571cdf0e10cSrcweir
572cdf0e10cSrcweir *pInfo = 0;
573cdf0e10cSrcweir pEndDestBuf = pDestBuf+nDestBytes;
574cdf0e10cSrcweir pEndSrcBuf = pSrcBuf+nSrcChars;
575cdf0e10cSrcweir while ( pSrcBuf < pEndSrcBuf )
576cdf0e10cSrcweir {
577cdf0e10cSrcweir c = *pSrcBuf;
578cdf0e10cSrcweir
579cdf0e10cSrcweir /* ASCII */
580cdf0e10cSrcweir if ( c <= 0x7F )
581cdf0e10cSrcweir cConv = c;
582cdf0e10cSrcweir /* Half-width katakana */
583cdf0e10cSrcweir else if ( (c >= 0xFF61) && (c <= 0xFF9F) )
584cdf0e10cSrcweir cConv = 0x8E00+0xA1+(c-0xFF61);
585cdf0e10cSrcweir else
586cdf0e10cSrcweir {
587cdf0e10cSrcweir nHighChar = (sal_uChar)((c >> 8) & 0xFF);
588cdf0e10cSrcweir nLowChar = (sal_uChar)(c & 0xFF);
589cdf0e10cSrcweir
590cdf0e10cSrcweir /* JIS 0208 */
591cdf0e10cSrcweir pHighTab = pConvertData->mpUniToJIS0208HighTab;
592cdf0e10cSrcweir pHighEntry = pHighTab+nHighChar;
593cdf0e10cSrcweir if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
594cdf0e10cSrcweir {
595cdf0e10cSrcweir cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
596cdf0e10cSrcweir if (cConv != 0)
597cdf0e10cSrcweir cConv |= 0x8080;
598cdf0e10cSrcweir }
599cdf0e10cSrcweir else
600cdf0e10cSrcweir cConv = 0;
601cdf0e10cSrcweir
602cdf0e10cSrcweir /* JIS 0212 */
603cdf0e10cSrcweir if ( !cConv )
604cdf0e10cSrcweir {
605cdf0e10cSrcweir pHighTab = pConvertData->mpUniToJIS0212HighTab;
606cdf0e10cSrcweir pHighEntry = pHighTab+nHighChar;
607cdf0e10cSrcweir if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
608cdf0e10cSrcweir {
609cdf0e10cSrcweir cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
610cdf0e10cSrcweir if (cConv != 0)
611cdf0e10cSrcweir cConv |= 0x8F8080;
612cdf0e10cSrcweir }
613cdf0e10cSrcweir
614cdf0e10cSrcweir if ( !cConv )
615cdf0e10cSrcweir {
616cdf0e10cSrcweir if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
617cdf0e10cSrcweir {
618cdf0e10cSrcweir /* !!! */
619cdf0e10cSrcweir }
620cdf0e10cSrcweir
621cdf0e10cSrcweir if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
622cdf0e10cSrcweir {
623cdf0e10cSrcweir /* !!! */
624cdf0e10cSrcweir }
625cdf0e10cSrcweir
626cdf0e10cSrcweir /* Handle undefined and surrogates characters */
627cdf0e10cSrcweir /* (all surrogates characters are undefined) */
628cdf0e10cSrcweir if (ImplHandleUndefinedUnicodeToTextChar(pData,
629cdf0e10cSrcweir &pSrcBuf,
630cdf0e10cSrcweir pEndSrcBuf,
631cdf0e10cSrcweir &pDestBuf,
632cdf0e10cSrcweir pEndDestBuf,
633cdf0e10cSrcweir nFlags,
634cdf0e10cSrcweir pInfo))
635cdf0e10cSrcweir continue;
636cdf0e10cSrcweir else
637cdf0e10cSrcweir break;
638cdf0e10cSrcweir }
639cdf0e10cSrcweir }
640cdf0e10cSrcweir }
641cdf0e10cSrcweir
642cdf0e10cSrcweir /* SingleByte */
643cdf0e10cSrcweir if ( !(cConv & 0xFFFF00) )
644cdf0e10cSrcweir {
645cdf0e10cSrcweir if ( pDestBuf == pEndDestBuf )
646cdf0e10cSrcweir {
647cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
648cdf0e10cSrcweir break;
649cdf0e10cSrcweir }
650cdf0e10cSrcweir
651cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
652cdf0e10cSrcweir pDestBuf++;
653cdf0e10cSrcweir }
654cdf0e10cSrcweir /* DoubleByte */
655cdf0e10cSrcweir else if ( !(cConv & 0xFF0000) )
656cdf0e10cSrcweir {
657cdf0e10cSrcweir if ( pDestBuf+1 >= pEndDestBuf )
658cdf0e10cSrcweir {
659cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
660cdf0e10cSrcweir break;
661cdf0e10cSrcweir }
662cdf0e10cSrcweir
663cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF);
664cdf0e10cSrcweir pDestBuf++;
665cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
666cdf0e10cSrcweir pDestBuf++;
667cdf0e10cSrcweir }
668cdf0e10cSrcweir else
669cdf0e10cSrcweir {
670cdf0e10cSrcweir if ( pDestBuf+2 >= pEndDestBuf )
671cdf0e10cSrcweir {
672cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
673cdf0e10cSrcweir break;
674cdf0e10cSrcweir }
675cdf0e10cSrcweir
676cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 16) & 0xFF);
677cdf0e10cSrcweir pDestBuf++;
678cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF);
679cdf0e10cSrcweir pDestBuf++;
680cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
681cdf0e10cSrcweir pDestBuf++;
682cdf0e10cSrcweir }
683cdf0e10cSrcweir
684cdf0e10cSrcweir pSrcBuf++;
685cdf0e10cSrcweir }
686cdf0e10cSrcweir
687cdf0e10cSrcweir *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
688cdf0e10cSrcweir return (nDestBytes - (pEndDestBuf-pDestBuf));
689cdf0e10cSrcweir }
690