1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 #include "tenchelp.h" 25 #include "unichars.h" 26 27 #ifndef _RTL_ALLOC_H 28 #include "rtl/alloc.h" 29 #endif 30 #include "rtl/textcvt.h" 31 32 /* ======================================================================= */ 33 34 static sal_uChar const aImplBase64Tab[64] = 35 { 36 /* A-Z */ 37 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 38 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 39 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 40 0x58, 0x59, 0x5A, 41 /* a-z */ 42 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 43 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 44 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 45 0x78, 0x79, 0x7A, 46 /* 0-9,+,/ */ 47 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 48 0x38, 0x39, 0x2B, 0x2F 49 }; 50 51 /* Index in Base64Tab or 0xFF, when is a invalid character */ 52 static sal_uChar const aImplBase64IndexTab[128] = 53 { 54 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00-0x07 */ 55 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08-0x0F */ 56 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10-0x17 */ 57 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18-0x1F */ 58 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20-0x27 !"#$%&' */ 59 0xFF, 0xFF, 0xFF, 62, 0xFF, 0xFF, 0xFF, 63, /* 0x28-0x2F ()*+,-./ */ 60 52, 53, 54, 55, 56, 57, 58, 59, /* 0x30-0x37 01234567 */ 61 60, 61, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F 89:;<=>? */ 62 0xFF, 0, 1, 2, 3, 4, 5, 6, /* 0x40-0x47 @ABCDEFG */ 63 7, 8, 9, 10, 11, 12, 13, 14, /* 0x48-0x4F HIJKLMNO */ 64 15, 16, 17, 18, 19, 20, 21, 22, /* 0x50-0x57 PQRSTUVW */ 65 23, 24, 25, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5F XYZ[\]^_ */ 66 0xFF, 26, 27, 28, 29, 30, 31, 32, /* 0x60-0x67 `abcdefg */ 67 33, 34, 35, 36, 37, 38, 39, 40, /* 0x68-0x6F hijklmno */ 68 41, 42, 43, 44, 45, 46, 47, 48, /* 0x70-0x77 pqrstuvw */ 69 49, 50, 51, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF /* 0x78-0x7F xyz{|}~ */ 70 }; 71 72 static sal_uChar const aImplMustShiftTab[128] = 73 { 74 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00-0x07 */ 75 1, 0, 0, 1, 0, 1, 1, 1, /* 0x08-0x0F 0x09 == HTAB, 0x0A == LF 0x0C == CR */ 76 1, 1, 1, 1, 1, 1, 1, 1, /* 0x10-0x17 */ 77 1, 1, 1, 1, 1, 1, 1, 1, /* 0x18-0x1F */ 78 0, 1, 1, 1, 1, 1, 1, 0, /* 0x20-0x27 !"#$%&' */ 79 0, 0, 1, 1, 0, 1, 0, 0, /* 0x28-0x2F ()*+,-./ */ 80 0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x37 01234567 */ 81 0, 0, 0, 1, 1, 1, 1, 0, /* 0x38-0x3F 89:;<=>? */ 82 1, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x47 @ABCDEFG */ 83 0, 0, 0, 0, 0, 0, 0, 0, /* 0x48-0x4F HIJKLMNO */ 84 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x57 PQRSTUVW */ 85 0, 0, 0, 1, 1, 1, 1, 1, /* 0x58-0x5F XYZ[\]^_ */ 86 1, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x67 `abcdefg */ 87 0, 0, 0, 0, 0, 0, 0, 0, /* 0x68-0x6F hijklmno */ 88 0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x77 pqrstuvw */ 89 0, 0, 0, 1, 1, 1, 1, 1 /* 0x78-0x7F xyz{|}~ */ 90 }; 91 92 /* + */ 93 #define IMPL_SHIFT_IN_CHAR 0x2B 94 /* - */ 95 #define IMPL_SHIFT_OUT_CHAR 0x2D 96 97 /* ----------------------------------------------------------------------- */ 98 99 typedef struct 100 { 101 int mbShifted; 102 int mbFirst; 103 int mbWroteOne; 104 sal_uInt32 mnBitBuffer; 105 sal_uInt32 mnBufferBits; 106 } ImplUTF7ToUCContextData; 107 108 /* ----------------------------------------------------------------------- */ 109 110 void* ImplUTF7CreateUTF7TextToUnicodeContext( void ) 111 { 112 ImplUTF7ToUCContextData* pContextData; 113 pContextData = (ImplUTF7ToUCContextData*)rtl_allocateMemory( sizeof( ImplUTF7ToUCContextData ) ); 114 pContextData->mbShifted = sal_False; 115 pContextData->mbFirst = sal_False; 116 pContextData->mbWroteOne = sal_False; 117 pContextData->mnBitBuffer = 0; 118 pContextData->mnBufferBits = 0; 119 return (void*)pContextData; 120 } 121 122 /* ----------------------------------------------------------------------- */ 123 124 void ImplUTF7DestroyTextToUnicodeContext( void* pContext ) 125 { 126 rtl_freeMemory( pContext ); 127 } 128 129 /* ----------------------------------------------------------------------- */ 130 131 void ImplUTF7ResetTextToUnicodeContext( void* pContext ) 132 { 133 ImplUTF7ToUCContextData* pContextData = (ImplUTF7ToUCContextData*)pContext; 134 pContextData->mbShifted = sal_False; 135 pContextData->mbFirst = sal_False; 136 pContextData->mbWroteOne = sal_False; 137 pContextData->mnBitBuffer = 0; 138 pContextData->mnBufferBits = 0; 139 } 140 141 /* ----------------------------------------------------------------------- */ 142 143 sal_Size ImplUTF7ToUnicode( const ImplTextConverterData* pData, void* pContext, 144 const sal_Char* pSrcBuf, sal_Size nSrcBytes, 145 sal_Unicode* pDestBuf, sal_Size nDestChars, 146 sal_uInt32 nFlags, sal_uInt32* pInfo, 147 sal_Size* pSrcCvtBytes ) 148 { 149 ImplUTF7ToUCContextData* pContextData = (ImplUTF7ToUCContextData*)pContext; 150 sal_uChar c ='\0'; 151 sal_uChar nBase64Value = 0; 152 int bEnd = sal_False; 153 int bShifted; 154 int bFirst; 155 int bWroteOne; 156 int bBase64End; 157 sal_uInt32 nBitBuffer; 158 sal_uInt32 nBitBufferTemp; 159 sal_uInt32 nBufferBits; 160 sal_Unicode* pEndDestBuf; 161 const sal_Char* pEndSrcBuf; 162 163 (void) pData; /* unused */ 164 165 /* !!! Implementation not finnished !!! 166 if ( pContextData ) 167 { 168 bShifted = pContextData->mbShifted; 169 bFirst = pContextData->mbFirst; 170 bWroteOne = pContextData->mbWroteOne; 171 nBitBuffer = pContextData->mnBitBuffer; 172 nBufferBits = pContextData->mnBufferBits; 173 } 174 else 175 */ 176 { 177 bShifted = sal_False; 178 bFirst = sal_False; 179 bWroteOne = sal_False; 180 nBitBuffer = 0; 181 nBufferBits = 0; 182 } 183 184 *pInfo = 0; 185 pEndDestBuf = pDestBuf+nDestChars; 186 pEndSrcBuf = pSrcBuf+nSrcBytes; 187 do 188 { 189 if ( pSrcBuf < pEndSrcBuf ) 190 { 191 c = (sal_uChar)*pSrcBuf; 192 193 /* End, when not a base64 character */ 194 bBase64End = sal_False; 195 if ( c <= 0x7F ) 196 { 197 nBase64Value = aImplBase64IndexTab[c]; 198 if ( nBase64Value == 0xFF ) 199 bBase64End = sal_True; 200 } 201 } 202 else 203 { 204 bEnd = sal_True; 205 bBase64End = sal_True; 206 } 207 208 if ( bShifted ) 209 { 210 if ( bBase64End ) 211 { 212 bShifted = sal_False; 213 214 /* If the character causing us to drop out was SHIFT_IN */ 215 /* or SHIFT_OUT, it may be a special escape for SHIFT_IN. */ 216 /* The test for SHIFT_IN is not necessary, but allows */ 217 /* an alternate form of UTF-7 where SHIFT_IN is escaped */ 218 /* by SHIFT_IN. This only works for some values of */ 219 /* SHIFT_IN. It is so implemented, because this comes */ 220 /* from the officel unicode book (The Unicode Standard, */ 221 /* Version 2.0) and so I think, that someone of the */ 222 /* world has used this feature. */ 223 if ( !bEnd ) 224 { 225 if ( (c == IMPL_SHIFT_IN_CHAR) || (c == IMPL_SHIFT_OUT_CHAR) ) 226 { 227 /* If no base64 character, and the terminating */ 228 /* character of the shift sequence was the */ 229 /* SHIFT_OUT_CHAR, then it't a special escape */ 230 /* for SHIFT_IN_CHAR. */ 231 if ( bFirst && (c == IMPL_SHIFT_OUT_CHAR) ) 232 { 233 if ( pDestBuf >= pEndDestBuf ) 234 { 235 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 236 break; 237 } 238 *pDestBuf = IMPL_SHIFT_IN_CHAR; 239 pDestBuf++; 240 bWroteOne = sal_True; 241 } 242 243 /* Skip character */ 244 pSrcBuf++; 245 if ( pSrcBuf < pEndSrcBuf ) 246 c = (sal_uChar)*pSrcBuf; 247 else 248 bEnd = sal_True; 249 } 250 } 251 252 /* Empty sequence not allowed, so when we don't write one */ 253 /* valid char, then the sequence is corrupt */ 254 if ( !bWroteOne ) 255 { 256 /* When no more bytes in the source buffer, then */ 257 /* this buffer may be to small */ 258 if ( bEnd ) 259 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 260 else 261 { 262 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; 263 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) 264 { 265 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; 266 break; 267 } 268 /* We insert here no default char, because I think */ 269 /* this is better to ignore this */ 270 } 271 } 272 } 273 else 274 { 275 /* Add 6 Bits from character to the bit buffer */ 276 nBufferBits += 6; 277 nBitBuffer |= ((sal_uInt32)(nBase64Value & 0x3F)) << (32-nBufferBits); 278 bFirst = sal_False; 279 } 280 281 /* Extract as many full 16 bit characters as possible from the */ 282 /* bit buffer. */ 283 while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 16) ) 284 { 285 nBitBufferTemp = nBitBuffer >> (32-16); 286 *pDestBuf = (sal_Unicode)((nBitBufferTemp) & 0xFFFF); 287 pDestBuf++; 288 nBitBuffer <<= 16; 289 nBufferBits -= 16; 290 bWroteOne = sal_True; 291 } 292 293 if ( nBufferBits >= 16 ) 294 { 295 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 296 break; 297 } 298 299 if ( bBase64End ) 300 { 301 /* Sequence ended and we have some bits, then the */ 302 /* sequence is corrupted */ 303 if ( nBufferBits && nBitBuffer ) 304 { 305 /* When no more bytes in the source buffer, then */ 306 /* this buffer may be to small */ 307 if ( bEnd ) 308 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 309 else 310 { 311 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; 312 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) 313 { 314 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; 315 break; 316 } 317 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE ) 318 { 319 if ( pDestBuf >= pEndDestBuf ) 320 { 321 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 322 break; 323 } 324 *pDestBuf++ 325 = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; 326 } 327 } 328 329 } 330 331 nBitBuffer = 0; 332 nBufferBits = 0; 333 } 334 } 335 336 if ( !bEnd ) 337 { 338 if ( !bShifted ) 339 { 340 if ( c == IMPL_SHIFT_IN_CHAR ) 341 { 342 bShifted = sal_True; 343 bFirst = sal_True; 344 bWroteOne = sal_False; 345 } 346 else 347 { 348 /* No direct encoded charcater, then the buffer is */ 349 /* corrupt */ 350 if ( c > 0x7F ) 351 { 352 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; 353 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) 354 { 355 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; 356 break; 357 } 358 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE ) 359 { 360 if ( pDestBuf >= pEndDestBuf ) 361 { 362 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 363 break; 364 } 365 *pDestBuf++ 366 = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; 367 } 368 } 369 370 /* Write char to unicode buffer */ 371 if ( pDestBuf >= pEndDestBuf ) 372 { 373 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 374 break; 375 } 376 *pDestBuf = c; 377 pDestBuf++; 378 } 379 } 380 381 pSrcBuf++; 382 } 383 } 384 while ( !bEnd ); 385 386 if ( pContextData ) 387 { 388 pContextData->mbShifted = bShifted; 389 pContextData->mbFirst = bFirst; 390 pContextData->mbWroteOne = bWroteOne; 391 pContextData->mnBitBuffer = nBitBuffer; 392 pContextData->mnBufferBits = nBufferBits; 393 } 394 395 *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); 396 return (nDestChars - (pEndDestBuf-pDestBuf)); 397 } 398 399 /* ======================================================================= */ 400 401 typedef struct 402 { 403 int mbShifted; 404 sal_uInt32 mnBitBuffer; 405 sal_uInt32 mnBufferBits; 406 } ImplUTF7FromUCContextData; 407 408 /* ----------------------------------------------------------------------- */ 409 410 void* ImplUTF7CreateUnicodeToTextContext( void ) 411 { 412 ImplUTF7FromUCContextData* pContextData; 413 pContextData = (ImplUTF7FromUCContextData*)rtl_allocateMemory( sizeof( ImplUTF7FromUCContextData ) ); 414 pContextData->mbShifted = sal_False; 415 pContextData->mnBitBuffer = 0; 416 pContextData->mnBufferBits = 0; 417 return (void*)pContextData; 418 } 419 420 /* ----------------------------------------------------------------------- */ 421 422 void ImplUTF7DestroyUnicodeToTextContext( void* pContext ) 423 { 424 rtl_freeMemory( pContext ); 425 } 426 427 /* ----------------------------------------------------------------------- */ 428 429 void ImplUTF7ResetUnicodeToTextContext( void* pContext ) 430 { 431 ImplUTF7FromUCContextData* pContextData = (ImplUTF7FromUCContextData*)pContext; 432 pContextData->mbShifted = sal_False; 433 pContextData->mnBitBuffer = 0; 434 pContextData->mnBufferBits = 0; 435 } 436 437 /* ----------------------------------------------------------------------- */ 438 439 sal_Size ImplUnicodeToUTF7( const ImplTextConverterData* pData, void* pContext, 440 const sal_Unicode* pSrcBuf, sal_Size nSrcChars, 441 sal_Char* pDestBuf, sal_Size nDestBytes, 442 sal_uInt32 nFlags, sal_uInt32* pInfo, 443 sal_Size* pSrcCvtChars ) 444 { 445 ImplUTF7FromUCContextData* pContextData = (ImplUTF7FromUCContextData*)pContext; 446 sal_Unicode c = '\0'; 447 int bEnd = sal_False; 448 int bShifted; 449 int bNeedShift; 450 sal_uInt32 nBitBuffer; 451 sal_uInt32 nBitBufferTemp; 452 sal_uInt32 nBufferBits; 453 sal_Char* pEndDestBuf; 454 const sal_Unicode* pEndSrcBuf; 455 456 (void) pData; /* unused */ 457 (void) nFlags; /* unused */ 458 459 /* !!! Implementation not finnished !!! 460 if ( pContextData ) 461 { 462 bShifted = pContextData->mbShifted; 463 nBitBuffer = pContextData->mnBitBuffer; 464 nBufferBits = pContextData->mnBufferBits; 465 } 466 else 467 */ 468 { 469 bShifted = sal_False; 470 nBitBuffer = 0; 471 nBufferBits = 0; 472 } 473 474 *pInfo = 0; 475 pEndDestBuf = pDestBuf+nDestBytes; 476 pEndSrcBuf = pSrcBuf+nSrcChars; 477 do 478 { 479 if ( pSrcBuf < pEndSrcBuf ) 480 { 481 c = *pSrcBuf; 482 483 bNeedShift = (c > 0x7F) || aImplMustShiftTab[c]; 484 if ( bNeedShift && !bShifted ) 485 { 486 if ( pDestBuf >= pEndDestBuf ) 487 { 488 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 489 break; 490 } 491 *pDestBuf = IMPL_SHIFT_IN_CHAR; 492 pDestBuf++; 493 /* Special case handling for SHIFT_IN_CHAR */ 494 if ( c == IMPL_SHIFT_IN_CHAR ) 495 { 496 if ( pDestBuf >= pEndDestBuf ) 497 { 498 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 499 break; 500 } 501 *pDestBuf = IMPL_SHIFT_OUT_CHAR; 502 pDestBuf++; 503 } 504 else 505 bShifted = sal_True; 506 } 507 } 508 else 509 { 510 bEnd = sal_True; 511 bNeedShift = sal_False; 512 } 513 514 if ( bShifted ) 515 { 516 /* Write the character to the bit buffer, or pad the bit */ 517 /* buffer out to a full base64 character */ 518 if ( bNeedShift ) 519 { 520 nBufferBits += 16; 521 nBitBuffer |= ((sal_uInt32)c) << (32-nBufferBits); 522 } 523 else 524 nBufferBits += (6-(nBufferBits%6))%6; 525 526 /* Flush out as many full base64 characters as possible */ 527 while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 6) ) 528 { 529 nBitBufferTemp = nBitBuffer >> (32-6); 530 *pDestBuf = aImplBase64Tab[nBitBufferTemp]; 531 pDestBuf++; 532 nBitBuffer <<= 6; 533 nBufferBits -= 6; 534 } 535 536 if ( nBufferBits >= 6 ) 537 { 538 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 539 break; 540 } 541 542 /* Write SHIFT_OUT_CHAR, when needed */ 543 if ( !bNeedShift ) 544 { 545 if ( pDestBuf >= pEndDestBuf ) 546 { 547 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 548 break; 549 } 550 *pDestBuf = IMPL_SHIFT_OUT_CHAR; 551 pDestBuf++; 552 bShifted = sal_False; 553 } 554 } 555 556 if ( !bEnd ) 557 { 558 /* Character can be directly endcoded */ 559 if ( !bNeedShift ) 560 { 561 if ( pDestBuf >= pEndDestBuf ) 562 { 563 *pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 564 break; 565 } 566 *pDestBuf = (sal_Char)(sal_uChar)c; 567 pDestBuf++; 568 } 569 570 pSrcBuf++; 571 } 572 } 573 while ( !bEnd ); 574 575 if ( pContextData ) 576 { 577 pContextData->mbShifted = bShifted; 578 pContextData->mnBitBuffer = nBitBuffer; 579 pContextData->mnBufferBits = nBufferBits; 580 } 581 582 *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); 583 return (nDestBytes - (pEndDestBuf-pDestBuf)); 584 } 585