1*75272fefSAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*75272fefSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*75272fefSAndrew Rist * or more contributor license agreements. See the NOTICE file 5*75272fefSAndrew Rist * distributed with this work for additional information 6*75272fefSAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*75272fefSAndrew Rist * to you under the Apache License, Version 2.0 (the 8*75272fefSAndrew Rist * "License"); you may not use this file except in compliance 9*75272fefSAndrew Rist * with the License. You may obtain a copy of the License at 10cdf0e10cSrcweir * 11*75272fefSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12cdf0e10cSrcweir * 13*75272fefSAndrew Rist * Unless required by applicable law or agreed to in writing, 14*75272fefSAndrew Rist * software distributed under the License is distributed on an 15*75272fefSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*75272fefSAndrew Rist * KIND, either express or implied. See the License for the 17*75272fefSAndrew Rist * specific language governing permissions and limitations 18*75272fefSAndrew Rist * under the License. 19cdf0e10cSrcweir * 20*75272fefSAndrew Rist *************************************************************/ 21*75272fefSAndrew Rist 22*75272fefSAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir // prevent internal compiler error with MSVC6SP3 25cdf0e10cSrcweir #include <utility> 26cdf0e10cSrcweir #include <i18nutil/widthfolding.hxx> 27cdf0e10cSrcweir #include <i18nutil/x_rtl_ustring.h> 28cdf0e10cSrcweir #include "widthfolding_data.h" 29cdf0e10cSrcweir 30cdf0e10cSrcweir using namespace com::sun::star::uno; 31cdf0e10cSrcweir using namespace rtl; 32cdf0e10cSrcweir 33cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n { 34cdf0e10cSrcweir 35cdf0e10cSrcweir sal_Unicode widthfolding::decompose_ja_voiced_sound_marksChar2Char (sal_Unicode inChar) 36cdf0e10cSrcweir { 37cdf0e10cSrcweir if (0x30a0 <= inChar && inChar <= 0x30ff) { 38cdf0e10cSrcweir sal_Int16 i = inChar - 0x3040; 39cdf0e10cSrcweir if (decomposition_table[i].decomposited_character_1) 40cdf0e10cSrcweir return 0xFFFF; 41cdf0e10cSrcweir } 42cdf0e10cSrcweir return inChar; 43cdf0e10cSrcweir } 44cdf0e10cSrcweir 45cdf0e10cSrcweir /** 46cdf0e10cSrcweir * Decompose Japanese specific voiced and semi-voiced sound marks. 47cdf0e10cSrcweir */ 48cdf0e10cSrcweir OUString widthfolding::decompose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, sal_Bool useOffset ) 49cdf0e10cSrcweir { 50cdf0e10cSrcweir // Create a string buffer which can hold nCount * 2 + 1 characters. 51cdf0e10cSrcweir // Its size may become double of nCount. 52cdf0e10cSrcweir rtl_uString * newStr; 53cdf0e10cSrcweir x_rtl_uString_new_WithLength( &newStr, nCount * 2 ); // defined in x_rtl_ustring.h The reference count is 0 now. 54cdf0e10cSrcweir 55cdf0e10cSrcweir sal_Int32 *p = NULL; 56cdf0e10cSrcweir sal_Int32 position = 0; 57cdf0e10cSrcweir if (useOffset) { 58cdf0e10cSrcweir // Allocate double of nCount length to offset argument. 59cdf0e10cSrcweir offset.realloc( nCount * 2 ); 60cdf0e10cSrcweir p = offset.getArray(); 61cdf0e10cSrcweir position = startPos; 62cdf0e10cSrcweir } 63cdf0e10cSrcweir 64cdf0e10cSrcweir // Prepare pointers of unicode character arrays. 65cdf0e10cSrcweir const sal_Unicode* src = inStr.getStr() + startPos; 66cdf0e10cSrcweir sal_Unicode* dst = newStr->buffer; 67cdf0e10cSrcweir 68cdf0e10cSrcweir // Decomposition: GA --> KA + voice-mark 69cdf0e10cSrcweir while (nCount -- > 0) { 70cdf0e10cSrcweir sal_Unicode c = *src++; 71cdf0e10cSrcweir // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) 72cdf0e10cSrcweir // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) 73cdf0e10cSrcweir // Hiragana is not applied to decomposition. 74cdf0e10cSrcweir // Only Katakana is applied to decomposition 75cdf0e10cSrcweir if (0x30a0 <= c && c <= 0x30ff) { 76cdf0e10cSrcweir int i = int(c - 0x3040); 77cdf0e10cSrcweir sal_Unicode first = decomposition_table[i].decomposited_character_1; 78cdf0e10cSrcweir if (first != 0x0000) { 79cdf0e10cSrcweir *dst ++ = first; 80cdf0e10cSrcweir *dst ++ = decomposition_table[i].decomposited_character_2; // second 81cdf0e10cSrcweir if (useOffset) { 82cdf0e10cSrcweir *p ++ = position; 83cdf0e10cSrcweir *p ++ = position ++; 84cdf0e10cSrcweir } 85cdf0e10cSrcweir continue; 86cdf0e10cSrcweir } 87cdf0e10cSrcweir } 88cdf0e10cSrcweir *dst ++ = c; 89cdf0e10cSrcweir if (useOffset) 90cdf0e10cSrcweir *p ++ = position ++; 91cdf0e10cSrcweir } 92cdf0e10cSrcweir *dst = (sal_Unicode) 0; 93cdf0e10cSrcweir 94cdf0e10cSrcweir newStr->length = sal_Int32(dst - newStr->buffer); 95cdf0e10cSrcweir if (useOffset) 96cdf0e10cSrcweir offset.realloc(newStr->length); 97cdf0e10cSrcweir return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. 98cdf0e10cSrcweir } 99cdf0e10cSrcweir 100cdf0e10cSrcweir oneToOneMapping& widthfolding::getfull2halfTable(void) 101cdf0e10cSrcweir { 102cdf0e10cSrcweir static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_NORMAL); 103cdf0e10cSrcweir table.makeIndex(); 104cdf0e10cSrcweir return table; 105cdf0e10cSrcweir } 106cdf0e10cSrcweir 107cdf0e10cSrcweir /** 108cdf0e10cSrcweir * Compose Japanese specific voiced and semi-voiced sound marks. 109cdf0e10cSrcweir */ 110cdf0e10cSrcweir OUString widthfolding::compose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, sal_Bool useOffset, sal_Int32 nFlags ) 111cdf0e10cSrcweir { 112cdf0e10cSrcweir // Create a string buffer which can hold nCount + 1 characters. 113cdf0e10cSrcweir // Its size may become equal to nCount or smaller. 114cdf0e10cSrcweir // The reference count is 0 now. 115cdf0e10cSrcweir rtl_uString * newStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h 116cdf0e10cSrcweir 117cdf0e10cSrcweir // Prepare pointers of unicode character arrays. 118cdf0e10cSrcweir const sal_Unicode* src = inStr.getStr() + startPos; 119cdf0e10cSrcweir sal_Unicode* dst = newStr->buffer; 120cdf0e10cSrcweir 121cdf0e10cSrcweir // This conversion algorithm requires at least one character. 122cdf0e10cSrcweir if (nCount > 0) { 123cdf0e10cSrcweir 124cdf0e10cSrcweir // .. .. KA VOICE .. .. 125cdf0e10cSrcweir // ^ ^ 126cdf0e10cSrcweir // previousChar currentChar 127cdf0e10cSrcweir // ^ 128cdf0e10cSrcweir // position 129cdf0e10cSrcweir // 130cdf0e10cSrcweir // will be converted to 131cdf0e10cSrcweir // .. .. GA .. .. 132cdf0e10cSrcweir 133cdf0e10cSrcweir sal_Int32 *p = NULL; 134cdf0e10cSrcweir sal_Int32 position = 0; 135cdf0e10cSrcweir if (useOffset) { 136cdf0e10cSrcweir // Allocate nCount length to offset argument. 137cdf0e10cSrcweir offset.realloc( nCount ); 138cdf0e10cSrcweir p = offset.getArray(); 139cdf0e10cSrcweir position = startPos; 140cdf0e10cSrcweir } 141cdf0e10cSrcweir 142cdf0e10cSrcweir // 143cdf0e10cSrcweir sal_Unicode previousChar = *src ++; 144cdf0e10cSrcweir sal_Unicode currentChar; 145cdf0e10cSrcweir 146cdf0e10cSrcweir // Composition: KA + voice-mark --> GA 147cdf0e10cSrcweir while (-- nCount > 0) { 148cdf0e10cSrcweir currentChar = *src ++; 149cdf0e10cSrcweir // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) 150cdf0e10cSrcweir // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) 151cdf0e10cSrcweir // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK 152cdf0e10cSrcweir // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 153cdf0e10cSrcweir // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK 154cdf0e10cSrcweir // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 155cdf0e10cSrcweir int j = currentChar - 0x3099; // 0x3099, 0x309a, 0x309b, 0x309c ? 156cdf0e10cSrcweir 157cdf0e10cSrcweir if (2 <= j && j <= 3) // 0x309b or 0x309c 158cdf0e10cSrcweir j -= 2; 159cdf0e10cSrcweir 160cdf0e10cSrcweir if (0 <= j && j <= 1) { 161cdf0e10cSrcweir // 0 addresses a code point regarding 0x3099 or 0x309b (voiced sound mark), 162cdf0e10cSrcweir // 1 is 0x309a or 0x309c (semi-voiced sound mark) 163cdf0e10cSrcweir int i = int(previousChar - 0x3040); // i acts as an index of array 164cdf0e10cSrcweir sal_Bool bCompose = sal_False; 165cdf0e10cSrcweir 166cdf0e10cSrcweir if (0 <= i && i <= (0x30ff - 0x3040) && composition_table[i][j]) 167cdf0e10cSrcweir bCompose = sal_True; 168cdf0e10cSrcweir 169cdf0e10cSrcweir // not to use combined KATAKANA LETTER VU 170cdf0e10cSrcweir if ( previousChar == 0x30a6 && (nFlags & WIDTHFOLDNIG_DONT_USE_COMBINED_VU) ) 171cdf0e10cSrcweir bCompose = sal_False; 172cdf0e10cSrcweir 173cdf0e10cSrcweir if( bCompose ){ 174cdf0e10cSrcweir if (useOffset) { 175cdf0e10cSrcweir position ++; 176cdf0e10cSrcweir *p ++ = position ++; 177cdf0e10cSrcweir } 178cdf0e10cSrcweir *dst ++ = composition_table[i][j]; 179cdf0e10cSrcweir previousChar = *src ++; 180cdf0e10cSrcweir nCount --; 181cdf0e10cSrcweir continue; 182cdf0e10cSrcweir } 183cdf0e10cSrcweir } 184cdf0e10cSrcweir if (useOffset) 185cdf0e10cSrcweir *p ++ = position ++; 186cdf0e10cSrcweir *dst ++ = previousChar; 187cdf0e10cSrcweir previousChar = currentChar; 188cdf0e10cSrcweir } 189cdf0e10cSrcweir 190cdf0e10cSrcweir if (nCount == 0) { 191cdf0e10cSrcweir if (useOffset) 192cdf0e10cSrcweir *p = position; 193cdf0e10cSrcweir *dst ++ = previousChar; 194cdf0e10cSrcweir } 195cdf0e10cSrcweir 196cdf0e10cSrcweir *dst = (sal_Unicode) 0; 197cdf0e10cSrcweir 198cdf0e10cSrcweir newStr->length = sal_Int32(dst - newStr->buffer); 199cdf0e10cSrcweir } 200cdf0e10cSrcweir if (useOffset) 201cdf0e10cSrcweir offset.realloc(newStr->length); 202cdf0e10cSrcweir return OUString( newStr ); // defined in rtl/usrting. The reference count is increased from 0 to 1. 203cdf0e10cSrcweir } 204cdf0e10cSrcweir 205cdf0e10cSrcweir oneToOneMapping& widthfolding::gethalf2fullTable(void) 206cdf0e10cSrcweir { 207cdf0e10cSrcweir static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_NORMAL); 208cdf0e10cSrcweir table.makeIndex(); 209cdf0e10cSrcweir return table; 210cdf0e10cSrcweir } 211cdf0e10cSrcweir 212cdf0e10cSrcweir sal_Unicode widthfolding::getCompositionChar(sal_Unicode c1, sal_Unicode c2) 213cdf0e10cSrcweir { 214cdf0e10cSrcweir return composition_table[c1 - 0x3040][c2 - 0x3099]; 215cdf0e10cSrcweir } 216cdf0e10cSrcweir 217cdf0e10cSrcweir 218cdf0e10cSrcweir oneToOneMapping& widthfolding::getfull2halfTableForASC() 219cdf0e10cSrcweir { 220cdf0e10cSrcweir static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_ASC_FUNCTION); 221cdf0e10cSrcweir table.makeIndex(); 222cdf0e10cSrcweir 223cdf0e10cSrcweir // bluedwarf: dirty hack! 224cdf0e10cSrcweir // There is an exception. Additional conversion is required following: 225cdf0e10cSrcweir // 0xFFE5 (FULLWIDTH YEN SIGN) --> 0x005C (REVERSE SOLIDUS) 226cdf0e10cSrcweir // 227cdf0e10cSrcweir // See the following page for detail: 228cdf0e10cSrcweir // http://wiki.services.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions 229cdf0e10cSrcweir int i, j, high, low; 230cdf0e10cSrcweir int n = sizeof(full2halfASCException) / sizeof(UnicodePairWithFlag); 231cdf0e10cSrcweir for( i = 0; i < n; i++ ) 232cdf0e10cSrcweir { 233cdf0e10cSrcweir high = (full2halfASCException[i].first >> 8) & 0xFF; 234cdf0e10cSrcweir low = (full2halfASCException[i].first) & 0xFF; 235cdf0e10cSrcweir 236cdf0e10cSrcweir if( !table.mpIndex[high] ) 237cdf0e10cSrcweir { 238cdf0e10cSrcweir table.mpIndex[high] = new UnicodePairWithFlag*[256]; 239cdf0e10cSrcweir 240cdf0e10cSrcweir for( j = 0; j < 256; j++ ) 241cdf0e10cSrcweir table.mpIndex[high][j] = NULL; 242cdf0e10cSrcweir } 243cdf0e10cSrcweir table.mpIndex[high][low] = &full2halfASCException[i]; 244cdf0e10cSrcweir } 245cdf0e10cSrcweir 246cdf0e10cSrcweir return table; 247cdf0e10cSrcweir } 248cdf0e10cSrcweir 249cdf0e10cSrcweir oneToOneMapping& widthfolding::gethalf2fullTableForJIS() 250cdf0e10cSrcweir { 251cdf0e10cSrcweir static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_JIS_FUNCTION); 252cdf0e10cSrcweir table.makeIndex(); 253cdf0e10cSrcweir 254cdf0e10cSrcweir // bluedwarf: dirty hack! 255cdf0e10cSrcweir // There are some exceptions. Additional conversion are required following: 256cdf0e10cSrcweir // 0x0022 (QUOTATION MARK) --> 0x201D (RIGHT DOUBLE QUOTATION MARK) 257cdf0e10cSrcweir // 0x0027 (APOSTROPHE) --> 0x2019 (RIGHT SINGLE QUOTATION MARK) 258cdf0e10cSrcweir // 0x005C (REVERSE SOLIDUS) --> 0xFFE5 (FULLWIDTH YEN SIGN) 259cdf0e10cSrcweir // 0x0060 (GRAVE ACCENT) --> 0x2018 (LEFT SINGLE QUOTATION MARK) 260cdf0e10cSrcweir // 261cdf0e10cSrcweir // See the following page for detail: 262cdf0e10cSrcweir // http://wiki.services.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions 263cdf0e10cSrcweir int i, j, high, low; 264cdf0e10cSrcweir int n = sizeof(half2fullJISException) / sizeof(UnicodePairWithFlag); 265cdf0e10cSrcweir for( i = 0; i < n; i++ ) 266cdf0e10cSrcweir { 267cdf0e10cSrcweir high = (half2fullJISException[i].first >> 8) & 0xFF; 268cdf0e10cSrcweir low = (half2fullJISException[i].first) & 0xFF; 269cdf0e10cSrcweir 270cdf0e10cSrcweir if( !table.mpIndex[high] ) 271cdf0e10cSrcweir { 272cdf0e10cSrcweir table.mpIndex[high] = new UnicodePairWithFlag*[256]; 273cdf0e10cSrcweir 274cdf0e10cSrcweir for( j = 0; j < 256; j++ ) 275cdf0e10cSrcweir table.mpIndex[high][j] = NULL; 276cdf0e10cSrcweir } 277cdf0e10cSrcweir table.mpIndex[high][low] = &half2fullJISException[i]; 278cdf0e10cSrcweir } 279cdf0e10cSrcweir 280cdf0e10cSrcweir return table; 281cdf0e10cSrcweir } 282cdf0e10cSrcweir 283cdf0e10cSrcweir oneToOneMapping& widthfolding::getfullKana2halfKanaTable() 284cdf0e10cSrcweir { 285cdf0e10cSrcweir static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_KATAKANA_ONLY); 286cdf0e10cSrcweir table.makeIndex(); 287cdf0e10cSrcweir return table; 288cdf0e10cSrcweir } 289cdf0e10cSrcweir 290cdf0e10cSrcweir oneToOneMapping& widthfolding::gethalfKana2fullKanaTable() 291cdf0e10cSrcweir { 292cdf0e10cSrcweir static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_KATAKANA_ONLY); 293cdf0e10cSrcweir table.makeIndex(); 294cdf0e10cSrcweir return table; 295cdf0e10cSrcweir } 296cdf0e10cSrcweir 297cdf0e10cSrcweir } } } } 298