1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir #include "tenchelp.h" 29*cdf0e10cSrcweir #include "unichars.h" 30*cdf0e10cSrcweir #include "rtl/textcvt.h" 31*cdf0e10cSrcweir 32*cdf0e10cSrcweir /* ======================================================================= */ 33*cdf0e10cSrcweir 34*cdf0e10cSrcweir /* DBCS to Unicode conversion routine use a lead table for the first byte, */ 35*cdf0e10cSrcweir /* where we determine the trail table or for single byte chars the unicode */ 36*cdf0e10cSrcweir /* value. We have for all lead byte a separate table, because we can */ 37*cdf0e10cSrcweir /* then share many tables for diffrent charset encodings. */ 38*cdf0e10cSrcweir 39*cdf0e10cSrcweir /* ======================================================================= */ 40*cdf0e10cSrcweir 41*cdf0e10cSrcweir sal_Size ImplDBCSToUnicode( const ImplTextConverterData* pData, void* pContext, 42*cdf0e10cSrcweir const sal_Char* pSrcBuf, sal_Size nSrcBytes, 43*cdf0e10cSrcweir sal_Unicode* pDestBuf, sal_Size nDestChars, 44*cdf0e10cSrcweir sal_uInt32 nFlags, sal_uInt32* pInfo, 45*cdf0e10cSrcweir sal_Size* pSrcCvtBytes ) 46*cdf0e10cSrcweir { 47*cdf0e10cSrcweir sal_uChar cLead; 48*cdf0e10cSrcweir sal_uChar cTrail; 49*cdf0e10cSrcweir sal_Unicode cConv; 50*cdf0e10cSrcweir const ImplDBCSToUniLeadTab* pLeadEntry; 51*cdf0e10cSrcweir const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData; 52*cdf0e10cSrcweir const ImplDBCSToUniLeadTab* pLeadTab = pConvertData->mpToUniLeadTab; 53*cdf0e10cSrcweir sal_Unicode* pEndDestBuf; 54*cdf0e10cSrcweir const sal_Char* pEndSrcBuf; 55*cdf0e10cSrcweir 56*cdf0e10cSrcweir (void) pContext; /* unused */ 57*cdf0e10cSrcweir 58*cdf0e10cSrcweir *pInfo = 0; 59*cdf0e10cSrcweir pEndDestBuf = pDestBuf+nDestChars; 60*cdf0e10cSrcweir pEndSrcBuf = pSrcBuf+nSrcBytes; 61*cdf0e10cSrcweir while ( pSrcBuf < pEndSrcBuf ) 62*cdf0e10cSrcweir { 63*cdf0e10cSrcweir cLead = (sal_uChar)*pSrcBuf; 64*cdf0e10cSrcweir 65*cdf0e10cSrcweir /* get entry for the lead byte */ 66*cdf0e10cSrcweir pLeadEntry = pLeadTab+cLead; 67*cdf0e10cSrcweir 68*cdf0e10cSrcweir /* SingleByte char? */ 69*cdf0e10cSrcweir if (pLeadEntry->mpToUniTrailTab == NULL 70*cdf0e10cSrcweir || cLead < pConvertData->mnLeadStart 71*cdf0e10cSrcweir || cLead > pConvertData->mnLeadEnd) 72*cdf0e10cSrcweir { 73*cdf0e10cSrcweir cConv = pLeadEntry->mnUniChar; 74*cdf0e10cSrcweir if ( !cConv && (cLead != 0) ) 75*cdf0e10cSrcweir { 76*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED; 77*cdf0e10cSrcweir if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR ) 78*cdf0e10cSrcweir { 79*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; 80*cdf0e10cSrcweir break; 81*cdf0e10cSrcweir } 82*cdf0e10cSrcweir else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE ) 83*cdf0e10cSrcweir { 84*cdf0e10cSrcweir pSrcBuf++; 85*cdf0e10cSrcweir continue; 86*cdf0e10cSrcweir } 87*cdf0e10cSrcweir else 88*cdf0e10cSrcweir cConv = ImplGetUndefinedUnicodeChar(cLead, nFlags); 89*cdf0e10cSrcweir } 90*cdf0e10cSrcweir } 91*cdf0e10cSrcweir else 92*cdf0e10cSrcweir { 93*cdf0e10cSrcweir /* Source buffer to small */ 94*cdf0e10cSrcweir if ( pSrcBuf +1 == pEndSrcBuf ) 95*cdf0e10cSrcweir { 96*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 97*cdf0e10cSrcweir break; 98*cdf0e10cSrcweir } 99*cdf0e10cSrcweir 100*cdf0e10cSrcweir pSrcBuf++; 101*cdf0e10cSrcweir cTrail = (sal_uChar)*pSrcBuf; 102*cdf0e10cSrcweir if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) ) 103*cdf0e10cSrcweir cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart]; 104*cdf0e10cSrcweir else 105*cdf0e10cSrcweir cConv = 0; 106*cdf0e10cSrcweir 107*cdf0e10cSrcweir if ( !cConv ) 108*cdf0e10cSrcweir { 109*cdf0e10cSrcweir /* EUDC Ranges */ 110*cdf0e10cSrcweir sal_uInt16 i; 111*cdf0e10cSrcweir const ImplDBCSEUDCData* pEUDCTab = pConvertData->mpEUDCTab; 112*cdf0e10cSrcweir for ( i = 0; i < pConvertData->mnEUDCCount; i++ ) 113*cdf0e10cSrcweir { 114*cdf0e10cSrcweir if ( (cLead >= pEUDCTab->mnLeadStart) && 115*cdf0e10cSrcweir (cLead <= pEUDCTab->mnLeadEnd) ) 116*cdf0e10cSrcweir { 117*cdf0e10cSrcweir sal_uInt16 nTrailCount = 0; 118*cdf0e10cSrcweir if ( (cTrail >= pEUDCTab->mnTrail1Start) && 119*cdf0e10cSrcweir (cTrail <= pEUDCTab->mnTrail1End) ) 120*cdf0e10cSrcweir { 121*cdf0e10cSrcweir cConv = pEUDCTab->mnUniStart+ 122*cdf0e10cSrcweir ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+ 123*cdf0e10cSrcweir (cTrail-pEUDCTab->mnTrail1Start); 124*cdf0e10cSrcweir break; 125*cdf0e10cSrcweir } 126*cdf0e10cSrcweir else 127*cdf0e10cSrcweir { 128*cdf0e10cSrcweir nTrailCount = pEUDCTab->mnTrail1End-pEUDCTab->mnTrail1Start+1; 129*cdf0e10cSrcweir if ( (pEUDCTab->mnTrailCount >= 2) && 130*cdf0e10cSrcweir (cTrail >= pEUDCTab->mnTrail2Start) && 131*cdf0e10cSrcweir (cTrail <= pEUDCTab->mnTrail2End) ) 132*cdf0e10cSrcweir { 133*cdf0e10cSrcweir cConv = pEUDCTab->mnUniStart+ 134*cdf0e10cSrcweir ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+ 135*cdf0e10cSrcweir nTrailCount+ 136*cdf0e10cSrcweir (cTrail-pEUDCTab->mnTrail2Start); 137*cdf0e10cSrcweir break; 138*cdf0e10cSrcweir } 139*cdf0e10cSrcweir else 140*cdf0e10cSrcweir { 141*cdf0e10cSrcweir nTrailCount = pEUDCTab->mnTrail2End-pEUDCTab->mnTrail2Start+1; 142*cdf0e10cSrcweir if ( (pEUDCTab->mnTrailCount >= 3) && 143*cdf0e10cSrcweir (cTrail >= pEUDCTab->mnTrail3Start) && 144*cdf0e10cSrcweir (cTrail <= pEUDCTab->mnTrail3End) ) 145*cdf0e10cSrcweir { 146*cdf0e10cSrcweir cConv = pEUDCTab->mnUniStart+ 147*cdf0e10cSrcweir ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+ 148*cdf0e10cSrcweir nTrailCount+ 149*cdf0e10cSrcweir (cTrail-pEUDCTab->mnTrail3Start); 150*cdf0e10cSrcweir break; 151*cdf0e10cSrcweir } 152*cdf0e10cSrcweir } 153*cdf0e10cSrcweir } 154*cdf0e10cSrcweir } 155*cdf0e10cSrcweir 156*cdf0e10cSrcweir pEUDCTab++; 157*cdf0e10cSrcweir } 158*cdf0e10cSrcweir 159*cdf0e10cSrcweir if ( !cConv ) 160*cdf0e10cSrcweir { 161*cdf0e10cSrcweir /* Wir vergleichen den kompletten Trailbereich den wir */ 162*cdf0e10cSrcweir /* definieren, der normalerweise groesser sein kann als */ 163*cdf0e10cSrcweir /* der definierte. Dies machen wir, damit Erweiterungen von */ 164*cdf0e10cSrcweir /* uns nicht beruecksichtigten Encodings so weit wie */ 165*cdf0e10cSrcweir /* moeglich auch richtig zu behandeln, das double byte */ 166*cdf0e10cSrcweir /* characters auch als ein einzelner Character behandelt */ 167*cdf0e10cSrcweir /* wird. */ 168*cdf0e10cSrcweir if (cLead < pConvertData->mnLeadStart 169*cdf0e10cSrcweir || cLead > pConvertData->mnLeadEnd 170*cdf0e10cSrcweir || cTrail < pConvertData->mnTrailStart 171*cdf0e10cSrcweir || cTrail > pConvertData->mnTrailEnd) 172*cdf0e10cSrcweir { 173*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; 174*cdf0e10cSrcweir if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) 175*cdf0e10cSrcweir { 176*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; 177*cdf0e10cSrcweir break; 178*cdf0e10cSrcweir } 179*cdf0e10cSrcweir else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE ) 180*cdf0e10cSrcweir { 181*cdf0e10cSrcweir pSrcBuf++; 182*cdf0e10cSrcweir continue; 183*cdf0e10cSrcweir } 184*cdf0e10cSrcweir else 185*cdf0e10cSrcweir cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; 186*cdf0e10cSrcweir } 187*cdf0e10cSrcweir else 188*cdf0e10cSrcweir { 189*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED; 190*cdf0e10cSrcweir if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR ) 191*cdf0e10cSrcweir { 192*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; 193*cdf0e10cSrcweir break; 194*cdf0e10cSrcweir } 195*cdf0e10cSrcweir else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE ) 196*cdf0e10cSrcweir { 197*cdf0e10cSrcweir pSrcBuf++; 198*cdf0e10cSrcweir continue; 199*cdf0e10cSrcweir } 200*cdf0e10cSrcweir else 201*cdf0e10cSrcweir cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; 202*cdf0e10cSrcweir } 203*cdf0e10cSrcweir } 204*cdf0e10cSrcweir } 205*cdf0e10cSrcweir } 206*cdf0e10cSrcweir 207*cdf0e10cSrcweir if ( pDestBuf == pEndDestBuf ) 208*cdf0e10cSrcweir { 209*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 210*cdf0e10cSrcweir break; 211*cdf0e10cSrcweir } 212*cdf0e10cSrcweir 213*cdf0e10cSrcweir *pDestBuf = cConv; 214*cdf0e10cSrcweir pDestBuf++; 215*cdf0e10cSrcweir pSrcBuf++; 216*cdf0e10cSrcweir } 217*cdf0e10cSrcweir 218*cdf0e10cSrcweir *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); 219*cdf0e10cSrcweir return (nDestChars - (pEndDestBuf-pDestBuf)); 220*cdf0e10cSrcweir } 221*cdf0e10cSrcweir 222*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 223*cdf0e10cSrcweir 224*cdf0e10cSrcweir sal_Size ImplUnicodeToDBCS( const ImplTextConverterData* pData, void* pContext, 225*cdf0e10cSrcweir const sal_Unicode* pSrcBuf, sal_Size nSrcChars, 226*cdf0e10cSrcweir sal_Char* pDestBuf, sal_Size nDestBytes, 227*cdf0e10cSrcweir sal_uInt32 nFlags, sal_uInt32* pInfo, 228*cdf0e10cSrcweir sal_Size* pSrcCvtChars ) 229*cdf0e10cSrcweir { 230*cdf0e10cSrcweir sal_uInt16 cConv; 231*cdf0e10cSrcweir sal_Unicode c; 232*cdf0e10cSrcweir sal_uChar nHighChar; 233*cdf0e10cSrcweir sal_uChar nLowChar; 234*cdf0e10cSrcweir const ImplUniToDBCSHighTab* pHighEntry; 235*cdf0e10cSrcweir const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData; 236*cdf0e10cSrcweir const ImplUniToDBCSHighTab* pHighTab = pConvertData->mpToDBCSHighTab; 237*cdf0e10cSrcweir sal_Char* pEndDestBuf; 238*cdf0e10cSrcweir const sal_Unicode* pEndSrcBuf; 239*cdf0e10cSrcweir 240*cdf0e10cSrcweir sal_Bool bCheckRange = (pConvertData->mnLeadStart != 0 241*cdf0e10cSrcweir || pConvertData->mnLeadEnd != 0xFF); 242*cdf0e10cSrcweir /* this statement has the effect that this extra check is only done for 243*cdf0e10cSrcweir EUC-KR, which uses the MS-949 tables, but does not support the full 244*cdf0e10cSrcweir range of MS-949 */ 245*cdf0e10cSrcweir 246*cdf0e10cSrcweir (void) pContext; /* unused */ 247*cdf0e10cSrcweir 248*cdf0e10cSrcweir *pInfo = 0; 249*cdf0e10cSrcweir pEndDestBuf = pDestBuf+nDestBytes; 250*cdf0e10cSrcweir pEndSrcBuf = pSrcBuf+nSrcChars; 251*cdf0e10cSrcweir while ( pSrcBuf < pEndSrcBuf ) 252*cdf0e10cSrcweir { 253*cdf0e10cSrcweir c = *pSrcBuf; 254*cdf0e10cSrcweir nHighChar = (sal_uChar)((c >> 8) & 0xFF); 255*cdf0e10cSrcweir nLowChar = (sal_uChar)(c & 0xFF); 256*cdf0e10cSrcweir 257*cdf0e10cSrcweir /* get entry for the high byte */ 258*cdf0e10cSrcweir pHighEntry = pHighTab+nHighChar; 259*cdf0e10cSrcweir 260*cdf0e10cSrcweir /* is low byte in the table range */ 261*cdf0e10cSrcweir if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) ) 262*cdf0e10cSrcweir { 263*cdf0e10cSrcweir cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart]; 264*cdf0e10cSrcweir if (bCheckRange && cConv > 0x7F 265*cdf0e10cSrcweir && ((cConv >> 8) < pConvertData->mnLeadStart 266*cdf0e10cSrcweir || (cConv >> 8) > pConvertData->mnLeadEnd 267*cdf0e10cSrcweir || (cConv & 0xFF) < pConvertData->mnTrailStart 268*cdf0e10cSrcweir || (cConv & 0xFF) > pConvertData->mnTrailEnd)) 269*cdf0e10cSrcweir cConv = 0; 270*cdf0e10cSrcweir } 271*cdf0e10cSrcweir else 272*cdf0e10cSrcweir cConv = 0; 273*cdf0e10cSrcweir 274*cdf0e10cSrcweir if (cConv == 0 && c != 0) 275*cdf0e10cSrcweir { 276*cdf0e10cSrcweir /* Map to EUDC ranges: */ 277*cdf0e10cSrcweir ImplDBCSEUDCData const * pEUDCTab = pConvertData->mpEUDCTab; 278*cdf0e10cSrcweir sal_uInt32 i; 279*cdf0e10cSrcweir for (i = 0; i < pConvertData->mnEUDCCount; ++i) 280*cdf0e10cSrcweir { 281*cdf0e10cSrcweir if (c >= pEUDCTab->mnUniStart && c <= pEUDCTab->mnUniEnd) 282*cdf0e10cSrcweir { 283*cdf0e10cSrcweir sal_uInt32 nIndex = c - pEUDCTab->mnUniStart; 284*cdf0e10cSrcweir sal_uInt32 nLeadOff 285*cdf0e10cSrcweir = nIndex / pEUDCTab->mnTrailRangeCount; 286*cdf0e10cSrcweir sal_uInt32 nTrailOff 287*cdf0e10cSrcweir = nIndex % pEUDCTab->mnTrailRangeCount; 288*cdf0e10cSrcweir sal_uInt32 nSize; 289*cdf0e10cSrcweir cConv = (sal_uInt16) 290*cdf0e10cSrcweir ((pEUDCTab->mnLeadStart + nLeadOff) << 8); 291*cdf0e10cSrcweir nSize 292*cdf0e10cSrcweir = pEUDCTab->mnTrail1End - pEUDCTab->mnTrail1Start + 1; 293*cdf0e10cSrcweir if (nTrailOff < nSize) 294*cdf0e10cSrcweir { 295*cdf0e10cSrcweir cConv |= pEUDCTab->mnTrail1Start + nTrailOff; 296*cdf0e10cSrcweir break; 297*cdf0e10cSrcweir } 298*cdf0e10cSrcweir nTrailOff -= nSize; 299*cdf0e10cSrcweir nSize 300*cdf0e10cSrcweir = pEUDCTab->mnTrail2End - pEUDCTab->mnTrail2Start + 1; 301*cdf0e10cSrcweir if (nTrailOff < nSize) 302*cdf0e10cSrcweir { 303*cdf0e10cSrcweir cConv |= pEUDCTab->mnTrail2Start + nTrailOff; 304*cdf0e10cSrcweir break; 305*cdf0e10cSrcweir } 306*cdf0e10cSrcweir nTrailOff -= nSize; 307*cdf0e10cSrcweir cConv |= pEUDCTab->mnTrail3Start + nTrailOff; 308*cdf0e10cSrcweir break; 309*cdf0e10cSrcweir } 310*cdf0e10cSrcweir pEUDCTab++; 311*cdf0e10cSrcweir } 312*cdf0e10cSrcweir 313*cdf0e10cSrcweir /* FIXME 314*cdf0e10cSrcweir * SB: Not sure why this is in here. Plus, it does not work as 315*cdf0e10cSrcweir * intended when (c & 0xFF) == 0, because the next !cConv check 316*cdf0e10cSrcweir * will then think c has not yet been converted... 317*cdf0e10cSrcweir */ 318*cdf0e10cSrcweir if (c >= RTL_TEXTCVT_BYTE_PRIVATE_START 319*cdf0e10cSrcweir && c <= RTL_TEXTCVT_BYTE_PRIVATE_END) 320*cdf0e10cSrcweir { 321*cdf0e10cSrcweir if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 ) 322*cdf0e10cSrcweir cConv = (sal_Char)(sal_uChar)(c & 0xFF); 323*cdf0e10cSrcweir } 324*cdf0e10cSrcweir } 325*cdf0e10cSrcweir 326*cdf0e10cSrcweir if ( !cConv ) 327*cdf0e10cSrcweir { 328*cdf0e10cSrcweir if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE ) 329*cdf0e10cSrcweir { 330*cdf0e10cSrcweir /* !!! */ 331*cdf0e10cSrcweir } 332*cdf0e10cSrcweir 333*cdf0e10cSrcweir if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR ) 334*cdf0e10cSrcweir { 335*cdf0e10cSrcweir /* !!! */ 336*cdf0e10cSrcweir } 337*cdf0e10cSrcweir 338*cdf0e10cSrcweir /* Handle undefined and surrogates characters */ 339*cdf0e10cSrcweir /* (all surrogates characters are undefined) */ 340*cdf0e10cSrcweir if (ImplHandleUndefinedUnicodeToTextChar(pData, 341*cdf0e10cSrcweir &pSrcBuf, 342*cdf0e10cSrcweir pEndSrcBuf, 343*cdf0e10cSrcweir &pDestBuf, 344*cdf0e10cSrcweir pEndDestBuf, 345*cdf0e10cSrcweir nFlags, 346*cdf0e10cSrcweir pInfo)) 347*cdf0e10cSrcweir continue; 348*cdf0e10cSrcweir else 349*cdf0e10cSrcweir break; 350*cdf0e10cSrcweir } 351*cdf0e10cSrcweir 352*cdf0e10cSrcweir /* SingleByte */ 353*cdf0e10cSrcweir if ( !(cConv & 0xFF00) ) 354*cdf0e10cSrcweir { 355*cdf0e10cSrcweir if ( pDestBuf == pEndDestBuf ) 356*cdf0e10cSrcweir { 357*cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 358*cdf0e10cSrcweir break; 359*cdf0e10cSrcweir } 360*cdf0e10cSrcweir 361*cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); 362*cdf0e10cSrcweir pDestBuf++; 363*cdf0e10cSrcweir } 364*cdf0e10cSrcweir else 365*cdf0e10cSrcweir { 366*cdf0e10cSrcweir if ( pDestBuf+1 >= pEndDestBuf ) 367*cdf0e10cSrcweir { 368*cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 369*cdf0e10cSrcweir break; 370*cdf0e10cSrcweir } 371*cdf0e10cSrcweir 372*cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF); 373*cdf0e10cSrcweir pDestBuf++; 374*cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); 375*cdf0e10cSrcweir pDestBuf++; 376*cdf0e10cSrcweir } 377*cdf0e10cSrcweir 378*cdf0e10cSrcweir pSrcBuf++; 379*cdf0e10cSrcweir } 380*cdf0e10cSrcweir 381*cdf0e10cSrcweir *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); 382*cdf0e10cSrcweir return (nDestBytes - (pEndDestBuf-pDestBuf)); 383*cdf0e10cSrcweir } 384*cdf0e10cSrcweir 385*cdf0e10cSrcweir /* ======================================================================= */ 386*cdf0e10cSrcweir 387*cdf0e10cSrcweir #define JIS_EUC_LEAD_OFF 0x80 388*cdf0e10cSrcweir #define JIS_EUC_TRAIL_OFF 0x80 389*cdf0e10cSrcweir 390*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 391*cdf0e10cSrcweir 392*cdf0e10cSrcweir sal_Size ImplEUCJPToUnicode( const ImplTextConverterData* pData, 393*cdf0e10cSrcweir void* pContext, 394*cdf0e10cSrcweir const sal_Char* pSrcBuf, sal_Size nSrcBytes, 395*cdf0e10cSrcweir sal_Unicode* pDestBuf, sal_Size nDestChars, 396*cdf0e10cSrcweir sal_uInt32 nFlags, sal_uInt32* pInfo, 397*cdf0e10cSrcweir sal_Size* pSrcCvtBytes ) 398*cdf0e10cSrcweir { 399*cdf0e10cSrcweir sal_uChar c; 400*cdf0e10cSrcweir sal_uChar cLead = '\0'; 401*cdf0e10cSrcweir sal_uChar cTrail = '\0'; 402*cdf0e10cSrcweir sal_Unicode cConv; 403*cdf0e10cSrcweir const ImplDBCSToUniLeadTab* pLeadEntry; 404*cdf0e10cSrcweir const ImplDBCSToUniLeadTab* pLeadTab; 405*cdf0e10cSrcweir const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData; 406*cdf0e10cSrcweir sal_Unicode* pEndDestBuf; 407*cdf0e10cSrcweir const sal_Char* pEndSrcBuf; 408*cdf0e10cSrcweir 409*cdf0e10cSrcweir (void) pContext; /* unused */ 410*cdf0e10cSrcweir 411*cdf0e10cSrcweir *pInfo = 0; 412*cdf0e10cSrcweir pEndDestBuf = pDestBuf+nDestChars; 413*cdf0e10cSrcweir pEndSrcBuf = pSrcBuf+nSrcBytes; 414*cdf0e10cSrcweir while ( pSrcBuf < pEndSrcBuf ) 415*cdf0e10cSrcweir { 416*cdf0e10cSrcweir c = (sal_uChar)*pSrcBuf; 417*cdf0e10cSrcweir 418*cdf0e10cSrcweir /* ASCII */ 419*cdf0e10cSrcweir if ( c <= 0x7F ) 420*cdf0e10cSrcweir cConv = c; 421*cdf0e10cSrcweir else 422*cdf0e10cSrcweir { 423*cdf0e10cSrcweir /* SS2 - Half-width katakana */ 424*cdf0e10cSrcweir /* 8E + A1-DF */ 425*cdf0e10cSrcweir if ( c == 0x8E ) 426*cdf0e10cSrcweir { 427*cdf0e10cSrcweir /* Source buffer to small */ 428*cdf0e10cSrcweir if ( pSrcBuf + 1 == pEndSrcBuf ) 429*cdf0e10cSrcweir { 430*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 431*cdf0e10cSrcweir break; 432*cdf0e10cSrcweir } 433*cdf0e10cSrcweir 434*cdf0e10cSrcweir pSrcBuf++; 435*cdf0e10cSrcweir c = (sal_uChar)*pSrcBuf; 436*cdf0e10cSrcweir if ( (c >= 0xA1) && (c <= 0xDF) ) 437*cdf0e10cSrcweir cConv = 0xFF61+(c-0xA1); 438*cdf0e10cSrcweir else 439*cdf0e10cSrcweir { 440*cdf0e10cSrcweir cConv = 0; 441*cdf0e10cSrcweir cLead = 0x8E; 442*cdf0e10cSrcweir cTrail = c; 443*cdf0e10cSrcweir } 444*cdf0e10cSrcweir } 445*cdf0e10cSrcweir else 446*cdf0e10cSrcweir { 447*cdf0e10cSrcweir /* SS3 - JIS 0212-1990 */ 448*cdf0e10cSrcweir /* 8F + A1-FE + A1-FE */ 449*cdf0e10cSrcweir if ( c == 0x8F ) 450*cdf0e10cSrcweir { 451*cdf0e10cSrcweir /* Source buffer to small */ 452*cdf0e10cSrcweir if (pEndSrcBuf - pSrcBuf < 3) 453*cdf0e10cSrcweir { 454*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 455*cdf0e10cSrcweir break; 456*cdf0e10cSrcweir } 457*cdf0e10cSrcweir 458*cdf0e10cSrcweir pSrcBuf++; 459*cdf0e10cSrcweir cLead = (sal_uChar)*pSrcBuf; 460*cdf0e10cSrcweir pSrcBuf++; 461*cdf0e10cSrcweir cTrail = (sal_uChar)*pSrcBuf; 462*cdf0e10cSrcweir pLeadTab = pConvertData->mpJIS0212ToUniLeadTab; 463*cdf0e10cSrcweir } 464*cdf0e10cSrcweir /* CodeSet 2 JIS 0208-1997 */ 465*cdf0e10cSrcweir /* A1-FE + A1-FE */ 466*cdf0e10cSrcweir else 467*cdf0e10cSrcweir { 468*cdf0e10cSrcweir /* Source buffer to small */ 469*cdf0e10cSrcweir if ( pSrcBuf + 1 == pEndSrcBuf ) 470*cdf0e10cSrcweir { 471*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 472*cdf0e10cSrcweir break; 473*cdf0e10cSrcweir } 474*cdf0e10cSrcweir 475*cdf0e10cSrcweir cLead = c; 476*cdf0e10cSrcweir pSrcBuf++; 477*cdf0e10cSrcweir cTrail = (sal_uChar)*pSrcBuf; 478*cdf0e10cSrcweir pLeadTab = pConvertData->mpJIS0208ToUniLeadTab; 479*cdf0e10cSrcweir } 480*cdf0e10cSrcweir 481*cdf0e10cSrcweir /* Undefined Range */ 482*cdf0e10cSrcweir if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) ) 483*cdf0e10cSrcweir cConv = 0; 484*cdf0e10cSrcweir else 485*cdf0e10cSrcweir { 486*cdf0e10cSrcweir cLead -= JIS_EUC_LEAD_OFF; 487*cdf0e10cSrcweir cTrail -= JIS_EUC_TRAIL_OFF; 488*cdf0e10cSrcweir pLeadEntry = pLeadTab+cLead; 489*cdf0e10cSrcweir if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) ) 490*cdf0e10cSrcweir cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart]; 491*cdf0e10cSrcweir else 492*cdf0e10cSrcweir cConv = 0; 493*cdf0e10cSrcweir } 494*cdf0e10cSrcweir } 495*cdf0e10cSrcweir 496*cdf0e10cSrcweir if ( !cConv ) 497*cdf0e10cSrcweir { 498*cdf0e10cSrcweir /* Wir vergleichen den kompletten Trailbereich den wir */ 499*cdf0e10cSrcweir /* definieren, der normalerweise groesser sein kann als */ 500*cdf0e10cSrcweir /* der definierte. Dies machen wir, damit Erweiterungen von */ 501*cdf0e10cSrcweir /* uns nicht beruecksichtigten Encodings so weit wie */ 502*cdf0e10cSrcweir /* moeglich auch richtig zu behandeln, das double byte */ 503*cdf0e10cSrcweir /* characters auch als ein einzelner Character behandelt */ 504*cdf0e10cSrcweir /* wird. */ 505*cdf0e10cSrcweir if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) ) 506*cdf0e10cSrcweir { 507*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; 508*cdf0e10cSrcweir if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) 509*cdf0e10cSrcweir { 510*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; 511*cdf0e10cSrcweir break; 512*cdf0e10cSrcweir } 513*cdf0e10cSrcweir else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE ) 514*cdf0e10cSrcweir { 515*cdf0e10cSrcweir pSrcBuf++; 516*cdf0e10cSrcweir continue; 517*cdf0e10cSrcweir } 518*cdf0e10cSrcweir else 519*cdf0e10cSrcweir cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; 520*cdf0e10cSrcweir } 521*cdf0e10cSrcweir else 522*cdf0e10cSrcweir { 523*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED; 524*cdf0e10cSrcweir if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR ) 525*cdf0e10cSrcweir { 526*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; 527*cdf0e10cSrcweir break; 528*cdf0e10cSrcweir } 529*cdf0e10cSrcweir else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE ) 530*cdf0e10cSrcweir { 531*cdf0e10cSrcweir pSrcBuf++; 532*cdf0e10cSrcweir continue; 533*cdf0e10cSrcweir } 534*cdf0e10cSrcweir else 535*cdf0e10cSrcweir cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; 536*cdf0e10cSrcweir } 537*cdf0e10cSrcweir } 538*cdf0e10cSrcweir } 539*cdf0e10cSrcweir 540*cdf0e10cSrcweir if ( pDestBuf == pEndDestBuf ) 541*cdf0e10cSrcweir { 542*cdf0e10cSrcweir *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 543*cdf0e10cSrcweir break; 544*cdf0e10cSrcweir } 545*cdf0e10cSrcweir 546*cdf0e10cSrcweir *pDestBuf = cConv; 547*cdf0e10cSrcweir pDestBuf++; 548*cdf0e10cSrcweir pSrcBuf++; 549*cdf0e10cSrcweir } 550*cdf0e10cSrcweir 551*cdf0e10cSrcweir *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); 552*cdf0e10cSrcweir return (nDestChars - (pEndDestBuf-pDestBuf)); 553*cdf0e10cSrcweir } 554*cdf0e10cSrcweir 555*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 556*cdf0e10cSrcweir 557*cdf0e10cSrcweir sal_Size ImplUnicodeToEUCJP( const ImplTextConverterData* pData, 558*cdf0e10cSrcweir void* pContext, 559*cdf0e10cSrcweir const sal_Unicode* pSrcBuf, sal_Size nSrcChars, 560*cdf0e10cSrcweir sal_Char* pDestBuf, sal_Size nDestBytes, 561*cdf0e10cSrcweir sal_uInt32 nFlags, sal_uInt32* pInfo, 562*cdf0e10cSrcweir sal_Size* pSrcCvtChars ) 563*cdf0e10cSrcweir { 564*cdf0e10cSrcweir sal_uInt32 cConv; 565*cdf0e10cSrcweir sal_Unicode c; 566*cdf0e10cSrcweir sal_uChar nHighChar; 567*cdf0e10cSrcweir sal_uChar nLowChar; 568*cdf0e10cSrcweir const ImplUniToDBCSHighTab* pHighEntry; 569*cdf0e10cSrcweir const ImplUniToDBCSHighTab* pHighTab; 570*cdf0e10cSrcweir const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData; 571*cdf0e10cSrcweir sal_Char* pEndDestBuf; 572*cdf0e10cSrcweir const sal_Unicode* pEndSrcBuf; 573*cdf0e10cSrcweir 574*cdf0e10cSrcweir (void) pContext; /* unused */ 575*cdf0e10cSrcweir 576*cdf0e10cSrcweir *pInfo = 0; 577*cdf0e10cSrcweir pEndDestBuf = pDestBuf+nDestBytes; 578*cdf0e10cSrcweir pEndSrcBuf = pSrcBuf+nSrcChars; 579*cdf0e10cSrcweir while ( pSrcBuf < pEndSrcBuf ) 580*cdf0e10cSrcweir { 581*cdf0e10cSrcweir c = *pSrcBuf; 582*cdf0e10cSrcweir 583*cdf0e10cSrcweir /* ASCII */ 584*cdf0e10cSrcweir if ( c <= 0x7F ) 585*cdf0e10cSrcweir cConv = c; 586*cdf0e10cSrcweir /* Half-width katakana */ 587*cdf0e10cSrcweir else if ( (c >= 0xFF61) && (c <= 0xFF9F) ) 588*cdf0e10cSrcweir cConv = 0x8E00+0xA1+(c-0xFF61); 589*cdf0e10cSrcweir else 590*cdf0e10cSrcweir { 591*cdf0e10cSrcweir nHighChar = (sal_uChar)((c >> 8) & 0xFF); 592*cdf0e10cSrcweir nLowChar = (sal_uChar)(c & 0xFF); 593*cdf0e10cSrcweir 594*cdf0e10cSrcweir /* JIS 0208 */ 595*cdf0e10cSrcweir pHighTab = pConvertData->mpUniToJIS0208HighTab; 596*cdf0e10cSrcweir pHighEntry = pHighTab+nHighChar; 597*cdf0e10cSrcweir if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) ) 598*cdf0e10cSrcweir { 599*cdf0e10cSrcweir cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart]; 600*cdf0e10cSrcweir if (cConv != 0) 601*cdf0e10cSrcweir cConv |= 0x8080; 602*cdf0e10cSrcweir } 603*cdf0e10cSrcweir else 604*cdf0e10cSrcweir cConv = 0; 605*cdf0e10cSrcweir 606*cdf0e10cSrcweir /* JIS 0212 */ 607*cdf0e10cSrcweir if ( !cConv ) 608*cdf0e10cSrcweir { 609*cdf0e10cSrcweir pHighTab = pConvertData->mpUniToJIS0212HighTab; 610*cdf0e10cSrcweir pHighEntry = pHighTab+nHighChar; 611*cdf0e10cSrcweir if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) ) 612*cdf0e10cSrcweir { 613*cdf0e10cSrcweir cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart]; 614*cdf0e10cSrcweir if (cConv != 0) 615*cdf0e10cSrcweir cConv |= 0x8F8080; 616*cdf0e10cSrcweir } 617*cdf0e10cSrcweir 618*cdf0e10cSrcweir if ( !cConv ) 619*cdf0e10cSrcweir { 620*cdf0e10cSrcweir if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE ) 621*cdf0e10cSrcweir { 622*cdf0e10cSrcweir /* !!! */ 623*cdf0e10cSrcweir } 624*cdf0e10cSrcweir 625*cdf0e10cSrcweir if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR ) 626*cdf0e10cSrcweir { 627*cdf0e10cSrcweir /* !!! */ 628*cdf0e10cSrcweir } 629*cdf0e10cSrcweir 630*cdf0e10cSrcweir /* Handle undefined and surrogates characters */ 631*cdf0e10cSrcweir /* (all surrogates characters are undefined) */ 632*cdf0e10cSrcweir if (ImplHandleUndefinedUnicodeToTextChar(pData, 633*cdf0e10cSrcweir &pSrcBuf, 634*cdf0e10cSrcweir pEndSrcBuf, 635*cdf0e10cSrcweir &pDestBuf, 636*cdf0e10cSrcweir pEndDestBuf, 637*cdf0e10cSrcweir nFlags, 638*cdf0e10cSrcweir pInfo)) 639*cdf0e10cSrcweir continue; 640*cdf0e10cSrcweir else 641*cdf0e10cSrcweir break; 642*cdf0e10cSrcweir } 643*cdf0e10cSrcweir } 644*cdf0e10cSrcweir } 645*cdf0e10cSrcweir 646*cdf0e10cSrcweir /* SingleByte */ 647*cdf0e10cSrcweir if ( !(cConv & 0xFFFF00) ) 648*cdf0e10cSrcweir { 649*cdf0e10cSrcweir if ( pDestBuf == pEndDestBuf ) 650*cdf0e10cSrcweir { 651*cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 652*cdf0e10cSrcweir break; 653*cdf0e10cSrcweir } 654*cdf0e10cSrcweir 655*cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); 656*cdf0e10cSrcweir pDestBuf++; 657*cdf0e10cSrcweir } 658*cdf0e10cSrcweir /* DoubleByte */ 659*cdf0e10cSrcweir else if ( !(cConv & 0xFF0000) ) 660*cdf0e10cSrcweir { 661*cdf0e10cSrcweir if ( pDestBuf+1 >= pEndDestBuf ) 662*cdf0e10cSrcweir { 663*cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 664*cdf0e10cSrcweir break; 665*cdf0e10cSrcweir } 666*cdf0e10cSrcweir 667*cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF); 668*cdf0e10cSrcweir pDestBuf++; 669*cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); 670*cdf0e10cSrcweir pDestBuf++; 671*cdf0e10cSrcweir } 672*cdf0e10cSrcweir else 673*cdf0e10cSrcweir { 674*cdf0e10cSrcweir if ( pDestBuf+2 >= pEndDestBuf ) 675*cdf0e10cSrcweir { 676*cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 677*cdf0e10cSrcweir break; 678*cdf0e10cSrcweir } 679*cdf0e10cSrcweir 680*cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 16) & 0xFF); 681*cdf0e10cSrcweir pDestBuf++; 682*cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF); 683*cdf0e10cSrcweir pDestBuf++; 684*cdf0e10cSrcweir *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); 685*cdf0e10cSrcweir pDestBuf++; 686*cdf0e10cSrcweir } 687*cdf0e10cSrcweir 688*cdf0e10cSrcweir pSrcBuf++; 689*cdf0e10cSrcweir } 690*cdf0e10cSrcweir 691*cdf0e10cSrcweir *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); 692*cdf0e10cSrcweir return (nDestBytes - (pEndDestBuf-pDestBuf)); 693*cdf0e10cSrcweir } 694