1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 #include "convertbig5hkscs.h" 25 #include "context.h" 26 #include "converter.h" 27 #include "tenchelp.h" 28 #include "unichars.h" 29 #include "osl/diagnose.h" 30 #include "rtl/alloc.h" 31 #include "rtl/textcvt.h" 32 #include "sal/types.h" 33 34 typedef struct 35 { 36 sal_Int32 m_nRow; /* 0--255; 0 means none */ 37 } ImplBig5HkscsToUnicodeContext; 38 39 void * ImplCreateBig5HkscsToUnicodeContext(void) 40 { 41 void * pContext 42 = rtl_allocateMemory(sizeof (ImplBig5HkscsToUnicodeContext)); 43 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0; 44 return pContext; 45 } 46 47 void ImplResetBig5HkscsToUnicodeContext(void * pContext) 48 { 49 if (pContext) 50 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = 0; 51 } 52 53 sal_Size ImplConvertBig5HkscsToUnicode(ImplTextConverterData const * pData, 54 void * pContext, 55 sal_Char const * pSrcBuf, 56 sal_Size nSrcBytes, 57 sal_Unicode * pDestBuf, 58 sal_Size nDestChars, 59 sal_uInt32 nFlags, 60 sal_uInt32 * pInfo, 61 sal_Size * pSrcCvtBytes) 62 { 63 sal_uInt16 const * pBig5Hkscs2001Data 64 = ((ImplBig5HkscsConverterData const *) pData)-> 65 m_pBig5Hkscs2001ToUnicodeData; 66 sal_Int32 const * pBig5Hkscs2001RowOffsets 67 = ((ImplBig5HkscsConverterData const *) pData)-> 68 m_pBig5Hkscs2001ToUnicodeRowOffsets; 69 ImplDBCSToUniLeadTab const * pBig5Data 70 = ((ImplBig5HkscsConverterData const *) pData)-> 71 m_pBig5ToUnicodeData; 72 sal_Int32 nRow = 0; 73 sal_uInt32 nInfo = 0; 74 sal_Size nConverted = 0; 75 sal_Unicode * pDestBufPtr = pDestBuf; 76 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; 77 78 if (pContext) 79 nRow = ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow; 80 81 for (; nConverted < nSrcBytes; ++nConverted) 82 { 83 sal_Bool bUndefined = sal_True; 84 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; 85 if (nRow == 0) 86 if (nChar < 0x80) 87 if (pDestBufPtr != pDestBufEnd) 88 *pDestBufPtr++ = (sal_Unicode) nChar; 89 else 90 goto no_output; 91 else if (nChar >= 0x81 && nChar <= 0xFE) 92 nRow = nChar; 93 else 94 { 95 bUndefined = sal_False; 96 goto bad_input; 97 } 98 else 99 if ((nChar >= 0x40 && nChar <= 0x7E) 100 || (nChar >= 0xA1 && nChar <= 0xFE)) 101 { 102 sal_uInt32 nUnicode = 0xFFFF; 103 sal_Int32 nOffset = pBig5Hkscs2001RowOffsets[nRow]; 104 sal_uInt32 nFirst=0; 105 sal_uInt32 nLast=0; 106 if (nOffset != -1) 107 { 108 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++]; 109 nFirst = nFirstLast & 0xFF; 110 nLast = nFirstLast >> 8; 111 if (nChar >= nFirst && nChar <= nLast) 112 nUnicode 113 = pBig5Hkscs2001Data[nOffset + (nChar - nFirst)]; 114 } 115 if (nUnicode == 0xFFFF) 116 { 117 sal_uInt32 nFirst = pBig5Data[nRow].mnTrailStart; 118 if (nChar >= nFirst 119 && nChar <= pBig5Data[nRow].mnTrailEnd) 120 { 121 nUnicode 122 = pBig5Data[nRow].mpToUniTrailTab[nChar - nFirst]; 123 if (nUnicode == 0) 124 nUnicode = 0xFFFF; 125 OSL_VERIFY(!ImplIsHighSurrogate(nUnicode)); 126 } 127 } 128 if (nUnicode == 0xFFFF) 129 { 130 ImplDBCSEUDCData const * p 131 = ((ImplBig5HkscsConverterData const *) pData)-> 132 m_pEudcData; 133 sal_uInt32 nCount 134 = ((ImplBig5HkscsConverterData const *) pData)-> 135 m_nEudcCount; 136 sal_uInt32 i; 137 for (i = 0; i < nCount; ++i) 138 { 139 if (nRow >= p->mnLeadStart && nRow <= p->mnLeadEnd) 140 { 141 if (nChar < p->mnTrail1Start) 142 break; 143 if (nChar <= p->mnTrail1End) 144 { 145 nUnicode 146 = p->mnUniStart 147 + (nRow - p->mnLeadStart) 148 * p->mnTrailRangeCount 149 + (nChar - p->mnTrail1Start); 150 break; 151 } 152 if (p->mnTrailCount < 2 153 || nChar < p->mnTrail2Start) 154 break; 155 if (nChar <= p->mnTrail2End) 156 { 157 nUnicode 158 = p->mnUniStart 159 + (nRow - p->mnLeadStart) 160 * p->mnTrailRangeCount 161 + (nChar - p->mnTrail2Start) 162 + (p->mnTrail1End - p->mnTrail1Start 163 + 1); 164 break; 165 } 166 if (p->mnTrailCount < 3 167 || nChar < p->mnTrail3Start) 168 break; 169 if (nChar <= p->mnTrail3End) 170 { 171 nUnicode 172 = p->mnUniStart 173 + (nRow - p->mnLeadStart) 174 * p->mnTrailRangeCount 175 + (nChar - p->mnTrail3Start) 176 + (p->mnTrail1End - p->mnTrail1Start 177 + 1) 178 + (p->mnTrail2End - p->mnTrail2Start 179 + 1); 180 break; 181 } 182 break; 183 } 184 ++p; 185 } 186 OSL_VERIFY(!ImplIsHighSurrogate(nUnicode)); 187 } 188 if (nUnicode == 0xFFFF) 189 goto bad_input; 190 if (ImplIsHighSurrogate(nUnicode)) 191 if (pDestBufEnd - pDestBufPtr >= 2) 192 { 193 nOffset += nLast - nFirst + 1; 194 nFirst = pBig5Hkscs2001Data[nOffset++]; 195 *pDestBufPtr++ = (sal_Unicode) nUnicode; 196 *pDestBufPtr++ 197 = (sal_Unicode) pBig5Hkscs2001Data[ 198 nOffset + (nChar - nFirst)]; 199 } 200 else 201 goto no_output; 202 else 203 if (pDestBufPtr != pDestBufEnd) 204 *pDestBufPtr++ = (sal_Unicode) nUnicode; 205 else 206 goto no_output; 207 nRow = 0; 208 } 209 else 210 { 211 bUndefined = sal_False; 212 goto bad_input; 213 } 214 continue; 215 216 bad_input: 217 switch (ImplHandleBadInputTextToUnicodeConversion( 218 bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, 219 &nInfo)) 220 { 221 case IMPL_BAD_INPUT_STOP: 222 nRow = 0; 223 break; 224 225 case IMPL_BAD_INPUT_CONTINUE: 226 nRow = 0; 227 continue; 228 229 case IMPL_BAD_INPUT_NO_OUTPUT: 230 goto no_output; 231 } 232 break; 233 234 no_output: 235 --pSrcBuf; 236 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 237 break; 238 } 239 240 if (nRow != 0 241 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR 242 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) 243 == 0) 244 { 245 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) 246 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 247 else 248 switch (ImplHandleBadInputTextToUnicodeConversion( 249 sal_False, sal_True, 0, nFlags, &pDestBufPtr, 250 pDestBufEnd, &nInfo)) 251 { 252 case IMPL_BAD_INPUT_STOP: 253 case IMPL_BAD_INPUT_CONTINUE: 254 nRow = 0; 255 break; 256 257 case IMPL_BAD_INPUT_NO_OUTPUT: 258 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 259 break; 260 } 261 } 262 263 if (pContext) 264 ((ImplBig5HkscsToUnicodeContext *) pContext)->m_nRow = nRow; 265 if (pInfo) 266 *pInfo = nInfo; 267 if (pSrcCvtBytes) 268 *pSrcCvtBytes = nConverted; 269 270 return pDestBufPtr - pDestBuf; 271 } 272 273 sal_Size ImplConvertUnicodeToBig5Hkscs(ImplTextConverterData const * pData, 274 void * pContext, 275 sal_Unicode const * pSrcBuf, 276 sal_Size nSrcChars, 277 sal_Char * pDestBuf, 278 sal_Size nDestBytes, 279 sal_uInt32 nFlags, 280 sal_uInt32 * pInfo, 281 sal_Size * pSrcCvtChars) 282 { 283 sal_uInt16 const * pBig5Hkscs2001Data 284 = ((ImplBig5HkscsConverterData const *) pData)-> 285 m_pUnicodeToBig5Hkscs2001Data; 286 sal_Int32 const * pBig5Hkscs2001PageOffsets 287 = ((ImplBig5HkscsConverterData const *) pData)-> 288 m_pUnicodeToBig5Hkscs2001PageOffsets; 289 sal_Int32 const * pBig5Hkscs2001PlaneOffsets 290 = ((ImplBig5HkscsConverterData const *) pData)-> 291 m_pUnicodeToBig5Hkscs2001PlaneOffsets; 292 ImplUniToDBCSHighTab const * pBig5Data 293 = ((ImplBig5HkscsConverterData const *) pData)-> 294 m_pUnicodeToBig5Data; 295 sal_Unicode nHighSurrogate = 0; 296 sal_uInt32 nInfo = 0; 297 sal_Size nConverted = 0; 298 sal_Char * pDestBufPtr = pDestBuf; 299 sal_Char * pDestBufEnd = pDestBuf + nDestBytes; 300 301 if (pContext) 302 nHighSurrogate 303 = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate; 304 305 for (; nConverted < nSrcChars; ++nConverted) 306 { 307 sal_Bool bUndefined = sal_True; 308 sal_uInt32 nChar = *pSrcBuf++; 309 if (nHighSurrogate == 0) 310 { 311 if (ImplIsHighSurrogate(nChar)) 312 { 313 nHighSurrogate = (sal_Unicode) nChar; 314 continue; 315 } 316 } 317 else if (ImplIsLowSurrogate(nChar)) 318 nChar = ImplCombineSurrogates(nHighSurrogate, nChar); 319 else 320 { 321 bUndefined = sal_False; 322 goto bad_input; 323 } 324 325 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) 326 { 327 bUndefined = sal_False; 328 goto bad_input; 329 } 330 331 if (nChar < 0x80) 332 if (pDestBufPtr != pDestBufEnd) 333 *pDestBufPtr++ = (sal_Char) nChar; 334 else 335 goto no_output; 336 else 337 { 338 sal_uInt32 nBytes = 0; 339 sal_Int32 nOffset = pBig5Hkscs2001PlaneOffsets[nChar >> 16]; 340 if (nOffset != -1) 341 { 342 nOffset 343 = pBig5Hkscs2001PageOffsets[nOffset + ((nChar & 0xFF00) 344 >> 8)]; 345 if (nOffset != -1) 346 { 347 sal_uInt32 nFirstLast = pBig5Hkscs2001Data[nOffset++]; 348 sal_uInt32 nFirst = nFirstLast & 0xFF; 349 sal_uInt32 nLast = nFirstLast >> 8; 350 sal_uInt32 nIndex = nChar & 0xFF; 351 if (nIndex >= nFirst && nIndex <= nLast) 352 { 353 nBytes 354 = pBig5Hkscs2001Data[nOffset + (nIndex - nFirst)]; 355 } 356 } 357 } 358 if (nBytes == 0) 359 { 360 sal_uInt32 nIndex1 = nChar >> 8; 361 if (nIndex1 < 0x100) 362 { 363 sal_uInt32 nIndex2 = nChar & 0xFF; 364 sal_uInt32 nFirst = pBig5Data[nIndex1].mnLowStart; 365 if (nIndex2 >= nFirst 366 && nIndex2 <= pBig5Data[nIndex1].mnLowEnd) 367 nBytes = pBig5Data[nIndex1]. 368 mpToUniTrailTab[nIndex2 - nFirst]; 369 } 370 } 371 if (nBytes == 0) 372 { 373 ImplDBCSEUDCData const * p 374 = ((ImplBig5HkscsConverterData const *) pData)-> 375 m_pEudcData; 376 sal_uInt32 nCount 377 = ((ImplBig5HkscsConverterData const *) pData)-> 378 m_nEudcCount; 379 sal_uInt32 i; 380 for (i = 0; i < nCount; ++i) { 381 if (nChar >= p->mnUniStart && nChar <= p->mnUniEnd) 382 { 383 sal_uInt32 nIndex = nChar - p->mnUniStart; 384 sal_uInt32 nLeadOff = nIndex / p->mnTrailRangeCount; 385 sal_uInt32 nTrailOff = nIndex % p->mnTrailRangeCount; 386 sal_uInt32 nSize; 387 nBytes = (p->mnLeadStart + nLeadOff) << 8; 388 nSize = p->mnTrail1End - p->mnTrail1Start + 1; 389 if (nTrailOff < nSize) 390 { 391 nBytes |= p->mnTrail1Start + nTrailOff; 392 break; 393 } 394 nTrailOff -= nSize; 395 nSize = p->mnTrail2End - p->mnTrail2Start + 1; 396 if (nTrailOff < nSize) 397 { 398 nBytes |= p->mnTrail2Start + nTrailOff; 399 break; 400 } 401 nTrailOff -= nSize; 402 nBytes |= p->mnTrail3Start + nTrailOff; 403 break; 404 } 405 ++p; 406 } 407 } 408 if (nBytes == 0) 409 goto bad_input; 410 if (pDestBufEnd - pDestBufPtr >= 2) 411 { 412 *pDestBufPtr++ = (sal_Char) (nBytes >> 8); 413 *pDestBufPtr++ = (sal_Char) (nBytes & 0xFF); 414 } 415 else 416 goto no_output; 417 } 418 nHighSurrogate = 0; 419 continue; 420 421 bad_input: 422 switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, 423 nChar, 424 nFlags, 425 &pDestBufPtr, 426 pDestBufEnd, 427 &nInfo, 428 NULL, 429 0, 430 NULL)) 431 { 432 case IMPL_BAD_INPUT_STOP: 433 nHighSurrogate = 0; 434 break; 435 436 case IMPL_BAD_INPUT_CONTINUE: 437 nHighSurrogate = 0; 438 continue; 439 440 case IMPL_BAD_INPUT_NO_OUTPUT: 441 goto no_output; 442 } 443 break; 444 445 no_output: 446 --pSrcBuf; 447 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 448 break; 449 } 450 451 if (nHighSurrogate != 0 452 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR 453 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) 454 == 0) 455 { 456 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) 457 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; 458 else 459 switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, 460 0, 461 nFlags, 462 &pDestBufPtr, 463 pDestBufEnd, 464 &nInfo, 465 NULL, 466 0, 467 NULL)) 468 { 469 case IMPL_BAD_INPUT_STOP: 470 case IMPL_BAD_INPUT_CONTINUE: 471 nHighSurrogate = 0; 472 break; 473 474 case IMPL_BAD_INPUT_NO_OUTPUT: 475 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 476 break; 477 } 478 } 479 480 if (pContext) 481 ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate 482 = nHighSurrogate; 483 if (pInfo) 484 *pInfo = nInfo; 485 if (pSrcCvtChars) 486 *pSrcCvtChars = nConverted; 487 488 return pDestBufPtr - pDestBuf; 489 } 490