1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 #include "converteuctw.h" 25 #include "context.h" 26 #include "converter.h" 27 #include "tenchelp.h" 28 #include "unichars.h" 29 #include "rtl/alloc.h" 30 #include "rtl/textcvt.h" 31 #include "sal/types.h" 32 33 typedef enum 34 { 35 IMPL_EUC_TW_TO_UNICODE_STATE_0, 36 IMPL_EUC_TW_TO_UNICODE_STATE_1, 37 IMPL_EUC_TW_TO_UNICODE_STATE_2_1, 38 IMPL_EUC_TW_TO_UNICODE_STATE_2_2, 39 IMPL_EUC_TW_TO_UNICODE_STATE_2_3 40 } ImplEucTwToUnicodeState; 41 42 typedef struct 43 { 44 ImplEucTwToUnicodeState m_eState; 45 sal_Int32 m_nPlane; /* 0--15 */ 46 sal_Int32 m_nRow; /* 0--93 */ 47 } ImplEucTwToUnicodeContext; 48 49 void * ImplCreateEucTwToUnicodeContext(void) 50 { 51 void * pContext = rtl_allocateMemory(sizeof (ImplEucTwToUnicodeContext)); 52 ((ImplEucTwToUnicodeContext *) pContext)->m_eState 53 = IMPL_EUC_TW_TO_UNICODE_STATE_0; 54 return pContext; 55 } 56 57 void ImplResetEucTwToUnicodeContext(void * pContext) 58 { 59 if (pContext) 60 ((ImplEucTwToUnicodeContext *) pContext)->m_eState 61 = IMPL_EUC_TW_TO_UNICODE_STATE_0; 62 } 63 64 sal_Size ImplConvertEucTwToUnicode(ImplTextConverterData const * pData, 65 void * pContext, 66 sal_Char const * pSrcBuf, 67 sal_Size nSrcBytes, 68 sal_Unicode * pDestBuf, 69 sal_Size nDestChars, 70 sal_uInt32 nFlags, 71 sal_uInt32 * pInfo, 72 sal_Size * pSrcCvtBytes) 73 { 74 sal_uInt16 const * pCns116431992Data 75 = ((ImplEucTwConverterData const *) pData)-> 76 m_pCns116431992ToUnicodeData; 77 sal_Int32 const * pCns116431992RowOffsets 78 = ((ImplEucTwConverterData const *) pData)-> 79 m_pCns116431992ToUnicodeRowOffsets; 80 sal_Int32 const * pCns116431992PlaneOffsets 81 = ((ImplEucTwConverterData const *) pData)-> 82 m_pCns116431992ToUnicodePlaneOffsets; 83 ImplEucTwToUnicodeState eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; 84 sal_Int32 nPlane = 0; 85 sal_Int32 nRow = 0; 86 sal_uInt32 nInfo = 0; 87 sal_Size nConverted = 0; 88 sal_Unicode * pDestBufPtr = pDestBuf; 89 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; 90 91 if (pContext) 92 { 93 eState = ((ImplEucTwToUnicodeContext *) pContext)->m_eState; 94 nPlane = ((ImplEucTwToUnicodeContext *) pContext)->m_nPlane; 95 nRow = ((ImplEucTwToUnicodeContext *) pContext)->m_nRow; 96 } 97 98 for (; nConverted < nSrcBytes; ++nConverted) 99 { 100 sal_Bool bUndefined = sal_True; 101 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; 102 switch (eState) 103 { 104 case IMPL_EUC_TW_TO_UNICODE_STATE_0: 105 if (nChar < 0x80) 106 if (pDestBufPtr != pDestBufEnd) 107 *pDestBufPtr++ = (sal_Unicode) nChar; 108 else 109 goto no_output; 110 else if (nChar >= 0xA1 && nChar <= 0xFE) 111 { 112 nRow = nChar - 0xA1; 113 eState = IMPL_EUC_TW_TO_UNICODE_STATE_1; 114 } 115 else if (nChar == 0x8E) 116 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_1; 117 else 118 { 119 bUndefined = sal_False; 120 goto bad_input; 121 } 122 break; 123 124 case IMPL_EUC_TW_TO_UNICODE_STATE_1: 125 if (nChar >= 0xA1 && nChar <= 0xFE) 126 { 127 nPlane = 0; 128 goto transform; 129 } 130 else 131 { 132 bUndefined = sal_False; 133 goto bad_input; 134 } 135 break; 136 137 case IMPL_EUC_TW_TO_UNICODE_STATE_2_1: 138 if (nChar >= 0xA1 && nChar <= 0xB0) 139 { 140 nPlane = nChar - 0xA1; 141 ++eState; 142 } 143 else 144 { 145 bUndefined = sal_False; 146 goto bad_input; 147 } 148 break; 149 150 case IMPL_EUC_TW_TO_UNICODE_STATE_2_2: 151 if (nChar >= 0xA1 && nChar <= 0xFE) 152 { 153 nRow = nChar - 0xA1; 154 ++eState; 155 } 156 else 157 { 158 bUndefined = sal_False; 159 goto bad_input; 160 } 161 break; 162 163 case IMPL_EUC_TW_TO_UNICODE_STATE_2_3: 164 if (nChar >= 0xA1 && nChar <= 0xFE) 165 goto transform; 166 else 167 { 168 bUndefined = sal_False; 169 goto bad_input; 170 } 171 break; 172 } 173 continue; 174 175 transform: 176 { 177 sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane]; 178 if (nPlaneOffset == -1) 179 goto bad_input; 180 else 181 { 182 sal_Int32 nOffset 183 = pCns116431992RowOffsets[nPlaneOffset + nRow]; 184 if (nOffset == -1) 185 goto bad_input; 186 else 187 { 188 sal_uInt32 nFirstLast = pCns116431992Data[nOffset++]; 189 sal_uInt32 nFirst = nFirstLast & 0xFF; 190 sal_uInt32 nLast = nFirstLast >> 8; 191 nChar -= 0xA0; 192 if (nChar >= nFirst && nChar <= nLast) 193 { 194 sal_uInt32 nUnicode 195 = pCns116431992Data[nOffset + (nChar - nFirst)]; 196 if (nUnicode == 0xFFFF) 197 goto bad_input; 198 else if (ImplIsHighSurrogate(nUnicode)) 199 if (pDestBufEnd - pDestBufPtr >= 2) 200 { 201 nOffset += nLast - nFirst + 1; 202 nFirst = pCns116431992Data[nOffset++]; 203 *pDestBufPtr++ = (sal_Unicode) nUnicode; 204 *pDestBufPtr++ 205 = (sal_Unicode) 206 pCns116431992Data[ 207 nOffset + (nChar - nFirst)]; 208 } 209 else 210 goto no_output; 211 else 212 if (pDestBufPtr != pDestBufEnd) 213 *pDestBufPtr++ = (sal_Unicode) nUnicode; 214 else 215 goto no_output; 216 } 217 else 218 goto bad_input; 219 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; 220 } 221 } 222 continue; 223 } 224 225 bad_input: 226 switch (ImplHandleBadInputTextToUnicodeConversion( 227 bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, 228 &nInfo)) 229 { 230 case IMPL_BAD_INPUT_STOP: 231 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; 232 break; 233 234 case IMPL_BAD_INPUT_CONTINUE: 235 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; 236 continue; 237 238 case IMPL_BAD_INPUT_NO_OUTPUT: 239 goto no_output; 240 } 241 break; 242 243 no_output: 244 --pSrcBuf; 245 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 246 break; 247 } 248 249 if (eState != IMPL_EUC_TW_TO_UNICODE_STATE_0 250 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR 251 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) 252 == 0) 253 { 254 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) 255 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 256 else 257 switch (ImplHandleBadInputTextToUnicodeConversion( 258 sal_False, sal_True, 0, nFlags, &pDestBufPtr, 259 pDestBufEnd, &nInfo)) 260 { 261 case IMPL_BAD_INPUT_STOP: 262 case IMPL_BAD_INPUT_CONTINUE: 263 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; 264 break; 265 266 case IMPL_BAD_INPUT_NO_OUTPUT: 267 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 268 break; 269 } 270 } 271 272 if (pContext) 273 { 274 ((ImplEucTwToUnicodeContext *) pContext)->m_eState = eState; 275 ((ImplEucTwToUnicodeContext *) pContext)->m_nPlane = nPlane; 276 ((ImplEucTwToUnicodeContext *) pContext)->m_nRow = nRow; 277 } 278 if (pInfo) 279 *pInfo = nInfo; 280 if (pSrcCvtBytes) 281 *pSrcCvtBytes = nConverted; 282 283 return pDestBufPtr - pDestBuf; 284 } 285 286 sal_Size ImplConvertUnicodeToEucTw(ImplTextConverterData const * pData, 287 void * pContext, 288 sal_Unicode const * pSrcBuf, 289 sal_Size nSrcChars, 290 sal_Char * pDestBuf, 291 sal_Size nDestBytes, 292 sal_uInt32 nFlags, 293 sal_uInt32 * pInfo, 294 sal_Size * pSrcCvtChars) 295 { 296 sal_uInt8 const * pCns116431992Data 297 = ((ImplEucTwConverterData const *) pData)-> 298 m_pUnicodeToCns116431992Data; 299 sal_Int32 const * pCns116431992PageOffsets 300 = ((ImplEucTwConverterData const *) pData)-> 301 m_pUnicodeToCns116431992PageOffsets; 302 sal_Int32 const * pCns116431992PlaneOffsets 303 = ((ImplEucTwConverterData const *) pData)-> 304 m_pUnicodeToCns116431992PlaneOffsets; 305 sal_Unicode nHighSurrogate = 0; 306 sal_uInt32 nInfo = 0; 307 sal_Size nConverted = 0; 308 sal_Char * pDestBufPtr = pDestBuf; 309 sal_Char * pDestBufEnd = pDestBuf + nDestBytes; 310 311 if (pContext) 312 nHighSurrogate 313 = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate; 314 315 for (; nConverted < nSrcChars; ++nConverted) 316 { 317 sal_Bool bUndefined = sal_True; 318 sal_uInt32 nChar = *pSrcBuf++; 319 if (nHighSurrogate == 0) 320 { 321 if (ImplIsHighSurrogate(nChar)) 322 { 323 nHighSurrogate = (sal_Unicode) nChar; 324 continue; 325 } 326 } 327 else if (ImplIsLowSurrogate(nChar)) 328 nChar = ImplCombineSurrogates(nHighSurrogate, nChar); 329 else 330 { 331 bUndefined = sal_False; 332 goto bad_input; 333 } 334 335 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) 336 { 337 bUndefined = sal_False; 338 goto bad_input; 339 } 340 341 if (nChar < 0x80) 342 if (pDestBufPtr != pDestBufEnd) 343 *pDestBufPtr++ = (sal_Char) nChar; 344 else 345 goto no_output; 346 else 347 { 348 sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16]; 349 sal_uInt32 nFirst; 350 sal_uInt32 nLast; 351 sal_uInt32 nPlane; 352 if (nOffset == -1) 353 goto bad_input; 354 nOffset 355 = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)]; 356 if (nOffset == -1) 357 goto bad_input; 358 nFirst = pCns116431992Data[nOffset++]; 359 nLast = pCns116431992Data[nOffset++]; 360 nChar &= 0xFF; 361 if (nChar < nFirst || nChar > nLast) 362 goto bad_input; 363 nOffset += 3 * (nChar - nFirst); 364 nPlane = pCns116431992Data[nOffset++]; 365 if (nPlane == 0) 366 goto bad_input; 367 if (pDestBufEnd - pDestBufPtr < (nPlane == 1 ? 2 : 4)) 368 goto no_output; 369 if (nPlane != 1) 370 { 371 *pDestBufPtr++ = (sal_Char) (unsigned char) 0x8E; 372 *pDestBufPtr++ = (sal_Char) (0xA0 + nPlane); 373 } 374 *pDestBufPtr++ = (sal_Char) (0xA0 + pCns116431992Data[nOffset++]); 375 *pDestBufPtr++ = (sal_Char) (0xA0 + pCns116431992Data[nOffset]); 376 } 377 nHighSurrogate = 0; 378 continue; 379 380 bad_input: 381 switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, 382 nChar, 383 nFlags, 384 &pDestBufPtr, 385 pDestBufEnd, 386 &nInfo, 387 NULL, 388 0, 389 NULL)) 390 { 391 case IMPL_BAD_INPUT_STOP: 392 nHighSurrogate = 0; 393 break; 394 395 case IMPL_BAD_INPUT_CONTINUE: 396 nHighSurrogate = 0; 397 continue; 398 399 case IMPL_BAD_INPUT_NO_OUTPUT: 400 goto no_output; 401 } 402 break; 403 404 no_output: 405 --pSrcBuf; 406 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 407 break; 408 } 409 410 if (nHighSurrogate != 0 411 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR 412 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) 413 == 0) 414 { 415 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) 416 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; 417 else 418 switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, 419 0, 420 nFlags, 421 &pDestBufPtr, 422 pDestBufEnd, 423 &nInfo, 424 NULL, 425 0, 426 NULL)) 427 { 428 case IMPL_BAD_INPUT_STOP: 429 case IMPL_BAD_INPUT_CONTINUE: 430 nHighSurrogate = 0; 431 break; 432 433 case IMPL_BAD_INPUT_NO_OUTPUT: 434 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 435 break; 436 } 437 } 438 439 if (pContext) 440 ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate 441 = nHighSurrogate; 442 if (pInfo) 443 *pInfo = nInfo; 444 if (pSrcCvtChars) 445 *pSrcCvtChars = nConverted; 446 447 return pDestBufPtr - pDestBuf; 448 } 449