1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 #include "convertiso2022kr.h" 25 #include "context.h" 26 #include "converter.h" 27 #include "tenchelp.h" 28 #include "unichars.h" 29 #include "rtl/alloc.h" 30 #include "rtl/textcvt.h" 31 #include "sal/types.h" 32 33 typedef enum /* order is important: */ 34 { 35 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII, 36 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001, 37 IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2, 38 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC, 39 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR, 40 IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN 41 } ImplIso2022KrToUnicodeState; 42 43 typedef struct 44 { 45 ImplIso2022KrToUnicodeState m_eState; 46 sal_uInt32 m_nRow; 47 } ImplIso2022KrToUnicodeContext; 48 49 typedef enum 50 { 51 IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE, 52 IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII, 53 IMPL_UNICODE_TO_ISO_2022_KR_SET_1001 54 } ImplUnicodeToIso2022KrSet; 55 56 typedef struct 57 { 58 sal_Unicode m_nHighSurrogate; 59 ImplUnicodeToIso2022KrSet m_eSet; 60 } ImplUnicodeToIso2022KrContext; 61 62 void * ImplCreateIso2022KrToUnicodeContext(void) 63 { 64 void * pContext 65 = rtl_allocateMemory(sizeof (ImplIso2022KrToUnicodeContext)); 66 ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState 67 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; 68 return pContext; 69 } 70 71 void ImplResetIso2022KrToUnicodeContext(void * pContext) 72 { 73 if (pContext) 74 ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState 75 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; 76 } 77 78 sal_Size ImplConvertIso2022KrToUnicode(ImplTextConverterData const * pData, 79 void * pContext, 80 sal_Char const * pSrcBuf, 81 sal_Size nSrcBytes, 82 sal_Unicode * pDestBuf, 83 sal_Size nDestChars, 84 sal_uInt32 nFlags, 85 sal_uInt32 * pInfo, 86 sal_Size * pSrcCvtBytes) 87 { 88 ImplDBCSToUniLeadTab const * pKsX1001Data 89 = ((ImplIso2022KrConverterData const *) pData)-> 90 m_pKsX1001ToUnicodeData; 91 ImplIso2022KrToUnicodeState eState 92 = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; 93 sal_uInt32 nRow = 0; 94 sal_uInt32 nInfo = 0; 95 sal_Size nConverted = 0; 96 sal_Unicode * pDestBufPtr = pDestBuf; 97 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; 98 99 if (pContext) 100 { 101 eState = ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState; 102 nRow = ((ImplIso2022KrToUnicodeContext *) pContext)->m_nRow; 103 } 104 105 for (; nConverted < nSrcBytes; ++nConverted) 106 { 107 sal_Bool bUndefined = sal_True; 108 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; 109 switch (eState) 110 { 111 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII: 112 if (nChar == 0x0E) /* SO */ 113 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001; 114 else if (nChar == 0x1B) /* ESC */ 115 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC; 116 else if (nChar < 0x80) 117 if (pDestBufPtr != pDestBufEnd) 118 *pDestBufPtr++ = (sal_Unicode) nChar; 119 else 120 goto no_output; 121 else 122 { 123 bUndefined = sal_False; 124 goto bad_input; 125 } 126 break; 127 128 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001: 129 if (nChar == 0x0F) /* SI */ 130 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; 131 else if (nChar >= 0x21 && nChar <= 0x7E) 132 { 133 nRow = nChar + 0x80; 134 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2; 135 } 136 else 137 { 138 bUndefined = sal_False; 139 goto bad_input; 140 } 141 break; 142 143 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001_2: 144 if (nChar >= 0x21 && nChar <= 0x7E) 145 { 146 sal_uInt16 nUnicode = 0; 147 sal_uInt32 nFirst = pKsX1001Data[nRow].mnTrailStart; 148 nChar += 0x80; 149 if (nChar >= nFirst && nChar <= pKsX1001Data[nRow].mnTrailEnd) 150 nUnicode = pKsX1001Data[nRow]. 151 mpToUniTrailTab[nChar - nFirst]; 152 if (nUnicode != 0) 153 if (pDestBufPtr != pDestBufEnd) 154 { 155 *pDestBufPtr++ = (sal_Unicode) nUnicode; 156 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001; 157 } 158 else 159 goto no_output; 160 else 161 goto bad_input; 162 } 163 else 164 { 165 bUndefined = sal_False; 166 goto bad_input; 167 } 168 break; 169 170 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC: 171 if (nChar == 0x24) /* $ */ 172 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR; 173 else 174 { 175 bUndefined = sal_False; 176 goto bad_input; 177 } 178 break; 179 180 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR: 181 if (nChar == 0x29) /* ) */ 182 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN; 183 else 184 { 185 bUndefined = sal_False; 186 goto bad_input; 187 } 188 break; 189 190 case IMPL_ISO_2022_KR_TO_UNICODE_STATE_ESC_DOLLAR_RPAREN: 191 if (nChar == 0x43) /* C */ 192 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; 193 else 194 { 195 bUndefined = sal_False; 196 goto bad_input; 197 } 198 break; 199 } 200 continue; 201 202 bad_input: 203 switch (ImplHandleBadInputTextToUnicodeConversion( 204 bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, 205 &nInfo)) 206 { 207 case IMPL_BAD_INPUT_STOP: 208 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; 209 break; 210 211 case IMPL_BAD_INPUT_CONTINUE: 212 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; 213 continue; 214 215 case IMPL_BAD_INPUT_NO_OUTPUT: 216 goto no_output; 217 } 218 break; 219 220 no_output: 221 --pSrcBuf; 222 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 223 break; 224 } 225 226 if (eState > IMPL_ISO_2022_KR_TO_UNICODE_STATE_1001 227 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR 228 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) 229 == 0) 230 { 231 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) 232 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 233 else 234 switch (ImplHandleBadInputTextToUnicodeConversion( 235 sal_False, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, 236 &nInfo)) 237 { 238 case IMPL_BAD_INPUT_STOP: 239 case IMPL_BAD_INPUT_CONTINUE: 240 eState = IMPL_ISO_2022_KR_TO_UNICODE_STATE_ASCII; 241 break; 242 243 case IMPL_BAD_INPUT_NO_OUTPUT: 244 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 245 break; 246 } 247 } 248 249 if (pContext) 250 { 251 ((ImplIso2022KrToUnicodeContext *) pContext)->m_eState = eState; 252 ((ImplIso2022KrToUnicodeContext *) pContext)->m_nRow = nRow; 253 } 254 if (pInfo) 255 *pInfo = nInfo; 256 if (pSrcCvtBytes) 257 *pSrcCvtBytes = nConverted; 258 259 return pDestBufPtr - pDestBuf; 260 } 261 262 void * ImplCreateUnicodeToIso2022KrContext(void) 263 { 264 void * pContext 265 = rtl_allocateMemory(sizeof (ImplUnicodeToIso2022KrContext)); 266 ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate = 0; 267 ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet 268 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE; 269 return pContext; 270 } 271 272 void ImplResetUnicodeToIso2022KrContext(void * pContext) 273 { 274 if (pContext) 275 { 276 ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate = 0; 277 ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet 278 = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE; 279 } 280 } 281 282 sal_Size ImplConvertUnicodeToIso2022Kr(ImplTextConverterData const * pData, 283 void * pContext, 284 sal_Unicode const * pSrcBuf, 285 sal_Size nSrcChars, 286 sal_Char * pDestBuf, 287 sal_Size nDestBytes, 288 sal_uInt32 nFlags, 289 sal_uInt32 * pInfo, 290 sal_Size * pSrcCvtChars) 291 { 292 ImplUniToDBCSHighTab const * pKsX1001Data 293 = ((ImplIso2022KrConverterData const *) pData)-> 294 m_pUnicodeToKsX1001Data; 295 sal_Unicode nHighSurrogate = 0; 296 ImplUnicodeToIso2022KrSet eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE; 297 sal_uInt32 nInfo = 0; 298 sal_Size nConverted = 0; 299 sal_Char * pDestBufPtr = pDestBuf; 300 sal_Char * pDestBufEnd = pDestBuf + nDestBytes; 301 sal_Bool bWritten; 302 303 if (pContext) 304 { 305 nHighSurrogate 306 = ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate; 307 eSet = ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet; 308 } 309 310 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_NONE) 311 { 312 if (pDestBufEnd - pDestBufPtr >= 4) 313 { 314 *pDestBufPtr++ = 0x1B; /* ESC */ 315 *pDestBufPtr++ = 0x24; /* $ */ 316 *pDestBufPtr++ = 0x29; /* ) */ 317 *pDestBufPtr++ = 0x43; /* C */ 318 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; 319 } 320 else 321 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 322 } 323 324 if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0) 325 for (; nConverted < nSrcChars; ++nConverted) 326 { 327 sal_Bool bUndefined = sal_True; 328 sal_uInt32 nChar = *pSrcBuf++; 329 if (nHighSurrogate == 0) 330 { 331 if (ImplIsHighSurrogate(nChar)) 332 { 333 nHighSurrogate = (sal_Unicode) nChar; 334 continue; 335 } 336 } 337 else if (ImplIsLowSurrogate(nChar)) 338 nChar = ImplCombineSurrogates(nHighSurrogate, nChar); 339 else 340 { 341 bUndefined = sal_False; 342 goto bad_input; 343 } 344 345 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) 346 { 347 bUndefined = sal_False; 348 goto bad_input; 349 } 350 351 if (nChar == 0x0A || nChar == 0x0D) /* LF, CR */ 352 { 353 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001) 354 { 355 if (pDestBufPtr != pDestBufEnd) 356 { 357 *pDestBufPtr++ = 0x0F; /* SI */ 358 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; 359 } 360 else 361 goto no_output; 362 } 363 if (pDestBufPtr != pDestBufEnd) 364 *pDestBufPtr++ = (sal_Char) nChar; 365 else 366 goto no_output; 367 } 368 else if (nChar == 0x0E || nChar == 0x0F || nChar == 0x1B) 369 goto bad_input; 370 else if (nChar < 0x80) 371 { 372 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001) 373 { 374 if (pDestBufPtr != pDestBufEnd) 375 { 376 *pDestBufPtr++ = 0x0F; /* SI */ 377 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; 378 } 379 else 380 goto no_output; 381 } 382 if (pDestBufPtr != pDestBufEnd) 383 *pDestBufPtr++ = (sal_Char) nChar; 384 else 385 goto no_output; 386 } 387 else 388 { 389 sal_uInt16 nBytes = 0; 390 sal_uInt32 nIndex1 = nChar >> 8; 391 if (nIndex1 < 0x100) 392 { 393 sal_uInt32 nIndex2 = nChar & 0xFF; 394 sal_uInt32 nFirst = pKsX1001Data[nIndex1].mnLowStart; 395 if (nIndex2 >= nFirst 396 && nIndex2 <= pKsX1001Data[nIndex1].mnLowEnd) 397 nBytes = pKsX1001Data[nIndex1]. 398 mpToUniTrailTab[nIndex2 - nFirst]; 399 } 400 if (nBytes != 0) 401 { 402 if (eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII) 403 { 404 if (pDestBufPtr != pDestBufEnd) 405 { 406 *pDestBufPtr++ = 0x0E; /* SO */ 407 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_1001; 408 } 409 else 410 goto no_output; 411 } 412 if (pDestBufEnd - pDestBufPtr >= 2) 413 { 414 *pDestBufPtr++ = (sal_Char) ((nBytes >> 8) & 0x7F); 415 *pDestBufPtr++ = (sal_Char) (nBytes & 0x7F); 416 } 417 else 418 goto no_output; 419 } 420 else 421 goto bad_input; 422 } 423 nHighSurrogate = 0; 424 continue; 425 426 bad_input: 427 switch (ImplHandleBadInputUnicodeToTextConversion( 428 bUndefined, 429 nChar, 430 nFlags, 431 &pDestBufPtr, 432 pDestBufEnd, 433 &nInfo, 434 "\x0F", /* SI */ 435 eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII ? 0 : 1, 436 &bWritten)) 437 { 438 case IMPL_BAD_INPUT_STOP: 439 nHighSurrogate = 0; 440 break; 441 442 case IMPL_BAD_INPUT_CONTINUE: 443 if (bWritten) 444 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; 445 nHighSurrogate = 0; 446 continue; 447 448 case IMPL_BAD_INPUT_NO_OUTPUT: 449 goto no_output; 450 } 451 break; 452 453 no_output: 454 --pSrcBuf; 455 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 456 break; 457 } 458 459 if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR 460 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) 461 == 0) 462 { 463 sal_Bool bFlush = sal_True; 464 if (nHighSurrogate != 0) 465 { 466 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) 467 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; 468 else 469 switch (ImplHandleBadInputUnicodeToTextConversion( 470 sal_False, 471 0, 472 nFlags, 473 &pDestBufPtr, 474 pDestBufEnd, 475 &nInfo, 476 "\x0F", /* SI */ 477 eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII ? 478 0 : 1, 479 &bWritten)) 480 { 481 case IMPL_BAD_INPUT_STOP: 482 nHighSurrogate = 0; 483 bFlush = sal_False; 484 break; 485 486 case IMPL_BAD_INPUT_CONTINUE: 487 if (bWritten) 488 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; 489 nHighSurrogate = 0; 490 break; 491 492 case IMPL_BAD_INPUT_NO_OUTPUT: 493 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 494 break; 495 } 496 } 497 if (bFlush 498 && eSet == IMPL_UNICODE_TO_ISO_2022_KR_SET_1001 499 && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) 500 { 501 if (pDestBufPtr != pDestBufEnd) 502 { 503 *pDestBufPtr++ = 0x0F; /* SI */ 504 eSet = IMPL_UNICODE_TO_ISO_2022_KR_SET_ASCII; 505 } 506 else 507 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 508 } 509 } 510 511 if (pContext) 512 { 513 ((ImplUnicodeToIso2022KrContext *) pContext)->m_nHighSurrogate 514 = nHighSurrogate; 515 ((ImplUnicodeToIso2022KrContext *) pContext)->m_eSet = eSet; 516 } 517 if (pInfo) 518 *pInfo = nInfo; 519 if (pSrcCvtChars) 520 *pSrcCvtChars = nConverted; 521 522 return pDestBufPtr - pDestBuf; 523 } 524