1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp> 29*cdf0e10cSrcweir #include <com/sun/star/i18n/KCharacterType.hpp> 30*cdf0e10cSrcweir #include <i18nutil/unicode.hxx> 31*cdf0e10cSrcweir #include "unicode_data.h" 32*cdf0e10cSrcweir 33*cdf0e10cSrcweir using namespace ::com::sun::star::i18n; 34*cdf0e10cSrcweir 35*cdf0e10cSrcweir static ScriptTypeList defaultTypeList[] = { 36*cdf0e10cSrcweir { UnicodeScript_kBasicLatin, 37*cdf0e10cSrcweir UnicodeScript_kBasicLatin, 38*cdf0e10cSrcweir UnicodeScript_kBasicLatin }, // 0, 39*cdf0e10cSrcweir { UnicodeScript_kLatin1Supplement, 40*cdf0e10cSrcweir UnicodeScript_kLatin1Supplement, 41*cdf0e10cSrcweir UnicodeScript_kLatin1Supplement },// 1, 42*cdf0e10cSrcweir { UnicodeScript_kLatinExtendedA, 43*cdf0e10cSrcweir UnicodeScript_kLatinExtendedA, 44*cdf0e10cSrcweir UnicodeScript_kLatinExtendedA }, // 2, 45*cdf0e10cSrcweir { UnicodeScript_kLatinExtendedB, 46*cdf0e10cSrcweir UnicodeScript_kLatinExtendedB, 47*cdf0e10cSrcweir UnicodeScript_kLatinExtendedB }, // 3, 48*cdf0e10cSrcweir { UnicodeScript_kIPAExtension, 49*cdf0e10cSrcweir UnicodeScript_kIPAExtension, 50*cdf0e10cSrcweir UnicodeScript_kIPAExtension }, // 4, 51*cdf0e10cSrcweir { UnicodeScript_kSpacingModifier, 52*cdf0e10cSrcweir UnicodeScript_kSpacingModifier, 53*cdf0e10cSrcweir UnicodeScript_kSpacingModifier }, // 5, 54*cdf0e10cSrcweir { UnicodeScript_kCombiningDiacritical, 55*cdf0e10cSrcweir UnicodeScript_kCombiningDiacritical, 56*cdf0e10cSrcweir UnicodeScript_kCombiningDiacritical }, // 6, 57*cdf0e10cSrcweir { UnicodeScript_kGreek, 58*cdf0e10cSrcweir UnicodeScript_kGreek, 59*cdf0e10cSrcweir UnicodeScript_kGreek }, // 7, 60*cdf0e10cSrcweir { UnicodeScript_kCyrillic, 61*cdf0e10cSrcweir UnicodeScript_kCyrillic, 62*cdf0e10cSrcweir UnicodeScript_kCyrillic }, // 8, 63*cdf0e10cSrcweir { UnicodeScript_kArmenian, 64*cdf0e10cSrcweir UnicodeScript_kArmenian, 65*cdf0e10cSrcweir UnicodeScript_kArmenian }, // 9, 66*cdf0e10cSrcweir { UnicodeScript_kHebrew, 67*cdf0e10cSrcweir UnicodeScript_kHebrew, 68*cdf0e10cSrcweir UnicodeScript_kHebrew }, // 10, 69*cdf0e10cSrcweir { UnicodeScript_kArabic, 70*cdf0e10cSrcweir UnicodeScript_kArabic, 71*cdf0e10cSrcweir UnicodeScript_kArabic }, // 11, 72*cdf0e10cSrcweir { UnicodeScript_kSyriac, 73*cdf0e10cSrcweir UnicodeScript_kSyriac, 74*cdf0e10cSrcweir UnicodeScript_kSyriac }, // 12, 75*cdf0e10cSrcweir { UnicodeScript_kThaana, 76*cdf0e10cSrcweir UnicodeScript_kThaana, 77*cdf0e10cSrcweir UnicodeScript_kThaana }, // 13, 78*cdf0e10cSrcweir { UnicodeScript_kDevanagari, 79*cdf0e10cSrcweir UnicodeScript_kDevanagari, 80*cdf0e10cSrcweir UnicodeScript_kDevanagari }, // 14, 81*cdf0e10cSrcweir { UnicodeScript_kBengali, 82*cdf0e10cSrcweir UnicodeScript_kBengali, 83*cdf0e10cSrcweir UnicodeScript_kBengali }, // 15, 84*cdf0e10cSrcweir { UnicodeScript_kGurmukhi, 85*cdf0e10cSrcweir UnicodeScript_kGurmukhi, 86*cdf0e10cSrcweir UnicodeScript_kGurmukhi }, // 16, 87*cdf0e10cSrcweir { UnicodeScript_kGujarati, 88*cdf0e10cSrcweir UnicodeScript_kGujarati, 89*cdf0e10cSrcweir UnicodeScript_kGujarati }, // 17, 90*cdf0e10cSrcweir { UnicodeScript_kOriya, 91*cdf0e10cSrcweir UnicodeScript_kOriya, 92*cdf0e10cSrcweir UnicodeScript_kOriya }, // 18, 93*cdf0e10cSrcweir { UnicodeScript_kTamil, 94*cdf0e10cSrcweir UnicodeScript_kTamil, 95*cdf0e10cSrcweir UnicodeScript_kTamil }, // 19, 96*cdf0e10cSrcweir { UnicodeScript_kTelugu, 97*cdf0e10cSrcweir UnicodeScript_kTelugu, 98*cdf0e10cSrcweir UnicodeScript_kTelugu }, // 20, 99*cdf0e10cSrcweir { UnicodeScript_kKannada, 100*cdf0e10cSrcweir UnicodeScript_kKannada, 101*cdf0e10cSrcweir UnicodeScript_kKannada }, // 21, 102*cdf0e10cSrcweir { UnicodeScript_kMalayalam, 103*cdf0e10cSrcweir UnicodeScript_kMalayalam, 104*cdf0e10cSrcweir UnicodeScript_kMalayalam }, // 22, 105*cdf0e10cSrcweir { UnicodeScript_kSinhala, 106*cdf0e10cSrcweir UnicodeScript_kSinhala, 107*cdf0e10cSrcweir UnicodeScript_kSinhala }, // 23, 108*cdf0e10cSrcweir { UnicodeScript_kThai, 109*cdf0e10cSrcweir UnicodeScript_kThai, 110*cdf0e10cSrcweir UnicodeScript_kThai }, // 24, 111*cdf0e10cSrcweir { UnicodeScript_kLao, 112*cdf0e10cSrcweir UnicodeScript_kLao, 113*cdf0e10cSrcweir UnicodeScript_kLao }, // 25, 114*cdf0e10cSrcweir { UnicodeScript_kTibetan, 115*cdf0e10cSrcweir UnicodeScript_kTibetan, 116*cdf0e10cSrcweir UnicodeScript_kTibetan }, // 26, 117*cdf0e10cSrcweir { UnicodeScript_kMyanmar, 118*cdf0e10cSrcweir UnicodeScript_kMyanmar, 119*cdf0e10cSrcweir UnicodeScript_kMyanmar }, // 27, 120*cdf0e10cSrcweir { UnicodeScript_kGeorgian, 121*cdf0e10cSrcweir UnicodeScript_kGeorgian, 122*cdf0e10cSrcweir UnicodeScript_kGeorgian }, // 28, 123*cdf0e10cSrcweir { UnicodeScript_kHangulJamo, 124*cdf0e10cSrcweir UnicodeScript_kHangulJamo, 125*cdf0e10cSrcweir UnicodeScript_kHangulJamo }, // 29, 126*cdf0e10cSrcweir { UnicodeScript_kEthiopic, 127*cdf0e10cSrcweir UnicodeScript_kEthiopic, 128*cdf0e10cSrcweir UnicodeScript_kEthiopic }, // 30, 129*cdf0e10cSrcweir { UnicodeScript_kCherokee, 130*cdf0e10cSrcweir UnicodeScript_kCherokee, 131*cdf0e10cSrcweir UnicodeScript_kCherokee }, // 31, 132*cdf0e10cSrcweir { UnicodeScript_kUnifiedCanadianAboriginalSyllabics, 133*cdf0e10cSrcweir UnicodeScript_kUnifiedCanadianAboriginalSyllabics, 134*cdf0e10cSrcweir UnicodeScript_kUnifiedCanadianAboriginalSyllabics }, // 32, 135*cdf0e10cSrcweir { UnicodeScript_kOgham, 136*cdf0e10cSrcweir UnicodeScript_kOgham, 137*cdf0e10cSrcweir UnicodeScript_kOgham }, // 33, 138*cdf0e10cSrcweir { UnicodeScript_kRunic, 139*cdf0e10cSrcweir UnicodeScript_kRunic, 140*cdf0e10cSrcweir UnicodeScript_kRunic }, // 34, 141*cdf0e10cSrcweir { UnicodeScript_kKhmer, 142*cdf0e10cSrcweir UnicodeScript_kKhmer, 143*cdf0e10cSrcweir UnicodeScript_kKhmer }, // 35, 144*cdf0e10cSrcweir { UnicodeScript_kMongolian, 145*cdf0e10cSrcweir UnicodeScript_kMongolian, 146*cdf0e10cSrcweir UnicodeScript_kMongolian }, // 36, 147*cdf0e10cSrcweir { UnicodeScript_kLatinExtendedAdditional, 148*cdf0e10cSrcweir UnicodeScript_kLatinExtendedAdditional, 149*cdf0e10cSrcweir UnicodeScript_kLatinExtendedAdditional }, // 37, 150*cdf0e10cSrcweir { UnicodeScript_kGreekExtended, 151*cdf0e10cSrcweir UnicodeScript_kGreekExtended, 152*cdf0e10cSrcweir UnicodeScript_kGreekExtended }, // 38, 153*cdf0e10cSrcweir { UnicodeScript_kGeneralPunctuation, 154*cdf0e10cSrcweir UnicodeScript_kGeneralPunctuation, 155*cdf0e10cSrcweir UnicodeScript_kGeneralPunctuation }, // 39, 156*cdf0e10cSrcweir { UnicodeScript_kSuperSubScript, 157*cdf0e10cSrcweir UnicodeScript_kSuperSubScript, 158*cdf0e10cSrcweir UnicodeScript_kSuperSubScript }, // 40, 159*cdf0e10cSrcweir { UnicodeScript_kCurrencySymbolScript, 160*cdf0e10cSrcweir UnicodeScript_kCurrencySymbolScript, 161*cdf0e10cSrcweir UnicodeScript_kCurrencySymbolScript }, // 41, 162*cdf0e10cSrcweir { UnicodeScript_kSymbolCombiningMark, 163*cdf0e10cSrcweir UnicodeScript_kSymbolCombiningMark, 164*cdf0e10cSrcweir UnicodeScript_kSymbolCombiningMark }, // 42, 165*cdf0e10cSrcweir { UnicodeScript_kLetterlikeSymbol, 166*cdf0e10cSrcweir UnicodeScript_kLetterlikeSymbol, 167*cdf0e10cSrcweir UnicodeScript_kLetterlikeSymbol }, // 43, 168*cdf0e10cSrcweir { UnicodeScript_kNumberForm, 169*cdf0e10cSrcweir UnicodeScript_kNumberForm, 170*cdf0e10cSrcweir UnicodeScript_kNumberForm }, // 44, 171*cdf0e10cSrcweir { UnicodeScript_kArrow, 172*cdf0e10cSrcweir UnicodeScript_kArrow, 173*cdf0e10cSrcweir UnicodeScript_kArrow }, // 45, 174*cdf0e10cSrcweir { UnicodeScript_kMathOperator, 175*cdf0e10cSrcweir UnicodeScript_kMathOperator, 176*cdf0e10cSrcweir UnicodeScript_kMathOperator }, // 46, 177*cdf0e10cSrcweir { UnicodeScript_kMiscTechnical, 178*cdf0e10cSrcweir UnicodeScript_kMiscTechnical, 179*cdf0e10cSrcweir UnicodeScript_kMiscTechnical }, // 47, 180*cdf0e10cSrcweir { UnicodeScript_kControlPicture, 181*cdf0e10cSrcweir UnicodeScript_kControlPicture, 182*cdf0e10cSrcweir UnicodeScript_kControlPicture }, // 48, 183*cdf0e10cSrcweir { UnicodeScript_kOpticalCharacter, 184*cdf0e10cSrcweir UnicodeScript_kOpticalCharacter, 185*cdf0e10cSrcweir UnicodeScript_kOpticalCharacter }, // 49, 186*cdf0e10cSrcweir { UnicodeScript_kEnclosedAlphanumeric, 187*cdf0e10cSrcweir UnicodeScript_kEnclosedAlphanumeric, 188*cdf0e10cSrcweir UnicodeScript_kEnclosedAlphanumeric }, // 50, 189*cdf0e10cSrcweir { UnicodeScript_kBoxDrawing, 190*cdf0e10cSrcweir UnicodeScript_kBoxDrawing, 191*cdf0e10cSrcweir UnicodeScript_kBoxDrawing }, // 51, 192*cdf0e10cSrcweir { UnicodeScript_kBlockElement, 193*cdf0e10cSrcweir UnicodeScript_kBlockElement, 194*cdf0e10cSrcweir UnicodeScript_kBlockElement }, // 52, 195*cdf0e10cSrcweir { UnicodeScript_kGeometricShape, 196*cdf0e10cSrcweir UnicodeScript_kGeometricShape, 197*cdf0e10cSrcweir UnicodeScript_kGeometricShape }, // 53, 198*cdf0e10cSrcweir { UnicodeScript_kMiscSymbol, 199*cdf0e10cSrcweir UnicodeScript_kMiscSymbol, 200*cdf0e10cSrcweir UnicodeScript_kMiscSymbol }, // 54, 201*cdf0e10cSrcweir { UnicodeScript_kDingbat, 202*cdf0e10cSrcweir UnicodeScript_kDingbat, 203*cdf0e10cSrcweir UnicodeScript_kDingbat }, // 55, 204*cdf0e10cSrcweir { UnicodeScript_kBraillePatterns, 205*cdf0e10cSrcweir UnicodeScript_kBraillePatterns, 206*cdf0e10cSrcweir UnicodeScript_kBraillePatterns }, // 56, 207*cdf0e10cSrcweir { UnicodeScript_kCJKRadicalsSupplement, 208*cdf0e10cSrcweir UnicodeScript_kCJKRadicalsSupplement, 209*cdf0e10cSrcweir UnicodeScript_kCJKRadicalsSupplement }, // 57, 210*cdf0e10cSrcweir { UnicodeScript_kKangxiRadicals, 211*cdf0e10cSrcweir UnicodeScript_kKangxiRadicals, 212*cdf0e10cSrcweir UnicodeScript_kKangxiRadicals }, // 58, 213*cdf0e10cSrcweir { UnicodeScript_kIdeographicDescriptionCharacters, 214*cdf0e10cSrcweir UnicodeScript_kIdeographicDescriptionCharacters, 215*cdf0e10cSrcweir UnicodeScript_kIdeographicDescriptionCharacters }, // 59, 216*cdf0e10cSrcweir { UnicodeScript_kCJKSymbolPunctuation, 217*cdf0e10cSrcweir UnicodeScript_kCJKSymbolPunctuation, 218*cdf0e10cSrcweir UnicodeScript_kCJKSymbolPunctuation }, // 60, 219*cdf0e10cSrcweir { UnicodeScript_kHiragana, 220*cdf0e10cSrcweir UnicodeScript_kHiragana, 221*cdf0e10cSrcweir UnicodeScript_kHiragana }, // 61, 222*cdf0e10cSrcweir { UnicodeScript_kKatakana, 223*cdf0e10cSrcweir UnicodeScript_kKatakana, 224*cdf0e10cSrcweir UnicodeScript_kKatakana }, // 62, 225*cdf0e10cSrcweir { UnicodeScript_kBopomofo, 226*cdf0e10cSrcweir UnicodeScript_kBopomofo, 227*cdf0e10cSrcweir UnicodeScript_kBopomofo }, // 63, 228*cdf0e10cSrcweir { UnicodeScript_kHangulCompatibilityJamo, 229*cdf0e10cSrcweir UnicodeScript_kHangulCompatibilityJamo, 230*cdf0e10cSrcweir UnicodeScript_kHangulCompatibilityJamo }, // 64, 231*cdf0e10cSrcweir { UnicodeScript_kKanbun, 232*cdf0e10cSrcweir UnicodeScript_kKanbun, 233*cdf0e10cSrcweir UnicodeScript_kKanbun }, // 65, 234*cdf0e10cSrcweir { UnicodeScript_kBopomofoExtended, 235*cdf0e10cSrcweir UnicodeScript_kBopomofoExtended, 236*cdf0e10cSrcweir UnicodeScript_kBopomofoExtended }, // 66, 237*cdf0e10cSrcweir { UnicodeScript_kEnclosedCJKLetterMonth, 238*cdf0e10cSrcweir UnicodeScript_kEnclosedCJKLetterMonth, 239*cdf0e10cSrcweir UnicodeScript_kEnclosedCJKLetterMonth }, // 67, 240*cdf0e10cSrcweir { UnicodeScript_kCJKCompatibility, 241*cdf0e10cSrcweir UnicodeScript_kCJKCompatibility, 242*cdf0e10cSrcweir UnicodeScript_kCJKCompatibility }, // 68, 243*cdf0e10cSrcweir { UnicodeScript_k_CJKUnifiedIdeographsExtensionA, 244*cdf0e10cSrcweir UnicodeScript_k_CJKUnifiedIdeographsExtensionA, 245*cdf0e10cSrcweir UnicodeScript_k_CJKUnifiedIdeographsExtensionA }, // 69, 246*cdf0e10cSrcweir { UnicodeScript_kCJKUnifiedIdeograph, 247*cdf0e10cSrcweir UnicodeScript_kCJKUnifiedIdeograph, 248*cdf0e10cSrcweir UnicodeScript_kCJKUnifiedIdeograph }, // 70, 249*cdf0e10cSrcweir { UnicodeScript_kYiSyllables, 250*cdf0e10cSrcweir UnicodeScript_kYiSyllables, 251*cdf0e10cSrcweir UnicodeScript_kYiSyllables }, // 71, 252*cdf0e10cSrcweir { UnicodeScript_kYiRadicals, 253*cdf0e10cSrcweir UnicodeScript_kYiRadicals, 254*cdf0e10cSrcweir UnicodeScript_kYiRadicals }, // 72, 255*cdf0e10cSrcweir { UnicodeScript_kHangulSyllable, 256*cdf0e10cSrcweir UnicodeScript_kHangulSyllable, 257*cdf0e10cSrcweir UnicodeScript_kHangulSyllable }, // 73, 258*cdf0e10cSrcweir { UnicodeScript_kHighSurrogate, 259*cdf0e10cSrcweir UnicodeScript_kHighSurrogate, 260*cdf0e10cSrcweir UnicodeScript_kHighSurrogate }, // 74, 261*cdf0e10cSrcweir { UnicodeScript_kHighPrivateUseSurrogate, 262*cdf0e10cSrcweir UnicodeScript_kHighPrivateUseSurrogate, 263*cdf0e10cSrcweir UnicodeScript_kHighPrivateUseSurrogate }, // 75, 264*cdf0e10cSrcweir { UnicodeScript_kLowSurrogate, 265*cdf0e10cSrcweir UnicodeScript_kLowSurrogate, 266*cdf0e10cSrcweir UnicodeScript_kLowSurrogate }, // 76, 267*cdf0e10cSrcweir { UnicodeScript_kPrivateUse, 268*cdf0e10cSrcweir UnicodeScript_kPrivateUse, 269*cdf0e10cSrcweir UnicodeScript_kPrivateUse }, // 77, 270*cdf0e10cSrcweir { UnicodeScript_kCJKCompatibilityIdeograph, 271*cdf0e10cSrcweir UnicodeScript_kCJKCompatibilityIdeograph, 272*cdf0e10cSrcweir UnicodeScript_kCJKCompatibilityIdeograph }, // 78, 273*cdf0e10cSrcweir { UnicodeScript_kAlphabeticPresentation, 274*cdf0e10cSrcweir UnicodeScript_kAlphabeticPresentation, 275*cdf0e10cSrcweir UnicodeScript_kAlphabeticPresentation }, // 79, 276*cdf0e10cSrcweir { UnicodeScript_kArabicPresentationA, 277*cdf0e10cSrcweir UnicodeScript_kArabicPresentationA, 278*cdf0e10cSrcweir UnicodeScript_kArabicPresentationA }, // 80, 279*cdf0e10cSrcweir { UnicodeScript_kCombiningHalfMark, 280*cdf0e10cSrcweir UnicodeScript_kCombiningHalfMark, 281*cdf0e10cSrcweir UnicodeScript_kCombiningHalfMark }, // 81, 282*cdf0e10cSrcweir { UnicodeScript_kCJKCompatibilityForm, 283*cdf0e10cSrcweir UnicodeScript_kCJKCompatibilityForm, 284*cdf0e10cSrcweir UnicodeScript_kCJKCompatibilityForm }, // 82, 285*cdf0e10cSrcweir { UnicodeScript_kSmallFormVariant, 286*cdf0e10cSrcweir UnicodeScript_kSmallFormVariant, 287*cdf0e10cSrcweir UnicodeScript_kSmallFormVariant }, // 83, 288*cdf0e10cSrcweir { UnicodeScript_kArabicPresentationB, 289*cdf0e10cSrcweir UnicodeScript_kArabicPresentationB, 290*cdf0e10cSrcweir UnicodeScript_kArabicPresentationB }, // 84, 291*cdf0e10cSrcweir { UnicodeScript_kNoScript, 292*cdf0e10cSrcweir UnicodeScript_kNoScript, 293*cdf0e10cSrcweir UnicodeScript_kNoScript }, // 85, 294*cdf0e10cSrcweir { UnicodeScript_kHalfwidthFullwidthForm, 295*cdf0e10cSrcweir UnicodeScript_kHalfwidthFullwidthForm, 296*cdf0e10cSrcweir UnicodeScript_kHalfwidthFullwidthForm }, // 86, 297*cdf0e10cSrcweir { UnicodeScript_kScriptCount, 298*cdf0e10cSrcweir UnicodeScript_kScriptCount, 299*cdf0e10cSrcweir UnicodeScript_kNoScript } // 87, 300*cdf0e10cSrcweir }; 301*cdf0e10cSrcweir 302*cdf0e10cSrcweir sal_Int16 SAL_CALL 303*cdf0e10cSrcweir unicode::getUnicodeScriptType( const sal_Unicode ch, ScriptTypeList* typeList, sal_Int16 unknownType ) { 304*cdf0e10cSrcweir 305*cdf0e10cSrcweir if (!typeList) { 306*cdf0e10cSrcweir typeList = defaultTypeList; 307*cdf0e10cSrcweir unknownType = UnicodeScript_kNoScript; 308*cdf0e10cSrcweir } 309*cdf0e10cSrcweir 310*cdf0e10cSrcweir sal_Int16 i = 0, type = typeList[0].to; 311*cdf0e10cSrcweir while (type < UnicodeScript_kScriptCount && ch > UnicodeScriptType[type][UnicodeScriptTypeTo]) { 312*cdf0e10cSrcweir type = typeList[++i].to; 313*cdf0e10cSrcweir } 314*cdf0e10cSrcweir 315*cdf0e10cSrcweir return (type < UnicodeScript_kScriptCount && 316*cdf0e10cSrcweir ch >= UnicodeScriptType[typeList[i].from][UnicodeScriptTypeFrom]) ? 317*cdf0e10cSrcweir typeList[i].value : unknownType; 318*cdf0e10cSrcweir } 319*cdf0e10cSrcweir 320*cdf0e10cSrcweir sal_Bool SAL_CALL 321*cdf0e10cSrcweir unicode::isUnicodeScriptType( const sal_Unicode ch, sal_Int16 type) { 322*cdf0e10cSrcweir return ch >= UnicodeScriptType[type][UnicodeScriptTypeFrom] && 323*cdf0e10cSrcweir ch <= UnicodeScriptType[type][UnicodeScriptTypeTo]; 324*cdf0e10cSrcweir } 325*cdf0e10cSrcweir 326*cdf0e10cSrcweir sal_Unicode SAL_CALL 327*cdf0e10cSrcweir unicode::getUnicodeScriptStart( UnicodeScript type) { 328*cdf0e10cSrcweir return UnicodeScriptType[type][UnicodeScriptTypeFrom]; 329*cdf0e10cSrcweir } 330*cdf0e10cSrcweir 331*cdf0e10cSrcweir sal_Unicode SAL_CALL 332*cdf0e10cSrcweir unicode::getUnicodeScriptEnd( UnicodeScript type) { 333*cdf0e10cSrcweir return UnicodeScriptType[type][UnicodeScriptTypeTo]; 334*cdf0e10cSrcweir } 335*cdf0e10cSrcweir 336*cdf0e10cSrcweir sal_Int16 SAL_CALL 337*cdf0e10cSrcweir unicode::getUnicodeType( const sal_Unicode ch ) { 338*cdf0e10cSrcweir static sal_Unicode c = 0x00; 339*cdf0e10cSrcweir static sal_Int16 r = 0x00; 340*cdf0e10cSrcweir 341*cdf0e10cSrcweir if (ch == c) return r; 342*cdf0e10cSrcweir else c = ch; 343*cdf0e10cSrcweir 344*cdf0e10cSrcweir sal_Int16 address = UnicodeTypeIndex[ch >> 8]; 345*cdf0e10cSrcweir return r = (sal_Int16)((address < UnicodeTypeNumberBlock) ? UnicodeTypeBlockValue[address] : 346*cdf0e10cSrcweir UnicodeTypeValue[((address - UnicodeTypeNumberBlock) << 8) + (ch & 0xff)]); 347*cdf0e10cSrcweir } 348*cdf0e10cSrcweir 349*cdf0e10cSrcweir sal_uInt8 SAL_CALL 350*cdf0e10cSrcweir unicode::getUnicodeDirection( const sal_Unicode ch ) { 351*cdf0e10cSrcweir static sal_Unicode c = 0x00; 352*cdf0e10cSrcweir static sal_uInt8 r = 0x00; 353*cdf0e10cSrcweir 354*cdf0e10cSrcweir if (ch == c) return r; 355*cdf0e10cSrcweir else c = ch; 356*cdf0e10cSrcweir 357*cdf0e10cSrcweir sal_Int16 address = UnicodeDirectionIndex[ch >> 8]; 358*cdf0e10cSrcweir return r = ((address < UnicodeDirectionNumberBlock) ? UnicodeDirectionBlockValue[address] : 359*cdf0e10cSrcweir UnicodeDirectionValue[((address - UnicodeDirectionNumberBlock) << 8) + (ch & 0xff)]); 360*cdf0e10cSrcweir 361*cdf0e10cSrcweir } 362*cdf0e10cSrcweir 363*cdf0e10cSrcweir #define bit(name) (1 << name) 364*cdf0e10cSrcweir 365*cdf0e10cSrcweir #define UPPERMASK bit(UnicodeType::UPPERCASE_LETTER) 366*cdf0e10cSrcweir 367*cdf0e10cSrcweir #define LOWERMASK bit(UnicodeType::LOWERCASE_LETTER) 368*cdf0e10cSrcweir 369*cdf0e10cSrcweir #define TITLEMASK bit(UnicodeType::TITLECASE_LETTER) 370*cdf0e10cSrcweir 371*cdf0e10cSrcweir #define DIGITMASK bit(UnicodeType::DECIMAL_DIGIT_NUMBER)|\ 372*cdf0e10cSrcweir bit(UnicodeType::LETTER_NUMBER)|\ 373*cdf0e10cSrcweir bit(UnicodeType::OTHER_NUMBER) 374*cdf0e10cSrcweir 375*cdf0e10cSrcweir #define ALPHAMASK UPPERMASK|LOWERMASK|TITLEMASK|\ 376*cdf0e10cSrcweir bit(UnicodeType::MODIFIER_LETTER)|\ 377*cdf0e10cSrcweir bit(UnicodeType::OTHER_LETTER) 378*cdf0e10cSrcweir 379*cdf0e10cSrcweir #define BASEMASK DIGITMASK|ALPHAMASK|\ 380*cdf0e10cSrcweir bit(UnicodeType::NON_SPACING_MARK)|\ 381*cdf0e10cSrcweir bit(UnicodeType::ENCLOSING_MARK)|\ 382*cdf0e10cSrcweir bit(UnicodeType::COMBINING_SPACING_MARK) 383*cdf0e10cSrcweir 384*cdf0e10cSrcweir #define SPACEMASK bit(UnicodeType::SPACE_SEPARATOR)|\ 385*cdf0e10cSrcweir bit(UnicodeType::LINE_SEPARATOR)|\ 386*cdf0e10cSrcweir bit(UnicodeType::PARAGRAPH_SEPARATOR) 387*cdf0e10cSrcweir 388*cdf0e10cSrcweir #define PUNCTUATIONMASK bit(UnicodeType::DASH_PUNCTUATION)|\ 389*cdf0e10cSrcweir bit(UnicodeType::INITIAL_PUNCTUATION)|\ 390*cdf0e10cSrcweir bit(UnicodeType::FINAL_PUNCTUATION)|\ 391*cdf0e10cSrcweir bit(UnicodeType::CONNECTOR_PUNCTUATION)|\ 392*cdf0e10cSrcweir bit(UnicodeType::OTHER_PUNCTUATION) 393*cdf0e10cSrcweir 394*cdf0e10cSrcweir #define SYMBOLMASK bit(UnicodeType::MATH_SYMBOL)|\ 395*cdf0e10cSrcweir bit(UnicodeType::CURRENCY_SYMBOL)|\ 396*cdf0e10cSrcweir bit(UnicodeType::MODIFIER_SYMBOL)|\ 397*cdf0e10cSrcweir bit(UnicodeType::OTHER_SYMBOL) 398*cdf0e10cSrcweir 399*cdf0e10cSrcweir #define PRINTMASK BASEMASK|SPACEMASK|PUNCTUATIONMASK|SYMBOLMASK 400*cdf0e10cSrcweir 401*cdf0e10cSrcweir #define CONTROLMASK bit(UnicodeType::CONTROL)|\ 402*cdf0e10cSrcweir bit(UnicodeType::FORMAT)|\ 403*cdf0e10cSrcweir bit(UnicodeType::LINE_SEPARATOR)|\ 404*cdf0e10cSrcweir bit(UnicodeType::PARAGRAPH_SEPARATOR) 405*cdf0e10cSrcweir 406*cdf0e10cSrcweir #define IsType(func, mask) \ 407*cdf0e10cSrcweir sal_Bool SAL_CALL func( const sal_Unicode ch) {\ 408*cdf0e10cSrcweir return (bit(getUnicodeType(ch)) & (mask)) != 0;\ 409*cdf0e10cSrcweir } 410*cdf0e10cSrcweir 411*cdf0e10cSrcweir IsType(unicode::isUpper, UPPERMASK) 412*cdf0e10cSrcweir IsType(unicode::isLower, LOWERMASK) 413*cdf0e10cSrcweir IsType(unicode::isTitle, DIGITMASK) 414*cdf0e10cSrcweir IsType(unicode::isControl, CONTROLMASK) 415*cdf0e10cSrcweir IsType(unicode::isPrint, PRINTMASK) 416*cdf0e10cSrcweir IsType(unicode::isAlpha, ALPHAMASK) 417*cdf0e10cSrcweir IsType(unicode::isDigit, DIGITMASK) 418*cdf0e10cSrcweir IsType(unicode::isAlphaDigit, ALPHAMASK|DIGITMASK) 419*cdf0e10cSrcweir IsType(unicode::isSpace, SPACEMASK) 420*cdf0e10cSrcweir IsType(unicode::isBase, BASEMASK) 421*cdf0e10cSrcweir IsType(unicode::isPunctuation, PUNCTUATIONMASK) 422*cdf0e10cSrcweir 423*cdf0e10cSrcweir #define CONTROLSPACE bit(0x09)|bit(0x0a)|bit(0x0b)|bit(0x0c)|bit(0x0d)|\ 424*cdf0e10cSrcweir bit(0x1c)|bit(0x1d)|bit(0x1e)|bit(0x1f) 425*cdf0e10cSrcweir 426*cdf0e10cSrcweir sal_Bool SAL_CALL unicode::isWhiteSpace( const sal_Unicode ch) { 427*cdf0e10cSrcweir return (ch != 0xa0 && isSpace(ch)) || (ch <= 0x1F && (bit(ch) & (CONTROLSPACE))); 428*cdf0e10cSrcweir } 429*cdf0e10cSrcweir 430*cdf0e10cSrcweir sal_Int32 SAL_CALL unicode::getCharType( const sal_Unicode ch ) 431*cdf0e10cSrcweir { 432*cdf0e10cSrcweir using namespace ::com::sun::star::i18n::KCharacterType; 433*cdf0e10cSrcweir 434*cdf0e10cSrcweir switch ( getUnicodeType( ch ) ) { 435*cdf0e10cSrcweir // Upper 436*cdf0e10cSrcweir case UnicodeType::UPPERCASE_LETTER : 437*cdf0e10cSrcweir return UPPER|LETTER|PRINTABLE|BASE_FORM; 438*cdf0e10cSrcweir 439*cdf0e10cSrcweir // Lower 440*cdf0e10cSrcweir case UnicodeType::LOWERCASE_LETTER : 441*cdf0e10cSrcweir return LOWER|LETTER|PRINTABLE|BASE_FORM; 442*cdf0e10cSrcweir 443*cdf0e10cSrcweir // Title 444*cdf0e10cSrcweir case UnicodeType::TITLECASE_LETTER : 445*cdf0e10cSrcweir return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM; 446*cdf0e10cSrcweir 447*cdf0e10cSrcweir // Letter 448*cdf0e10cSrcweir case UnicodeType::MODIFIER_LETTER : 449*cdf0e10cSrcweir case UnicodeType::OTHER_LETTER : 450*cdf0e10cSrcweir return LETTER|PRINTABLE|BASE_FORM; 451*cdf0e10cSrcweir 452*cdf0e10cSrcweir // Digit 453*cdf0e10cSrcweir case UnicodeType::DECIMAL_DIGIT_NUMBER: 454*cdf0e10cSrcweir case UnicodeType::LETTER_NUMBER: 455*cdf0e10cSrcweir case UnicodeType::OTHER_NUMBER: 456*cdf0e10cSrcweir return DIGIT|PRINTABLE|BASE_FORM; 457*cdf0e10cSrcweir 458*cdf0e10cSrcweir // Base 459*cdf0e10cSrcweir case UnicodeType::NON_SPACING_MARK: 460*cdf0e10cSrcweir case UnicodeType::ENCLOSING_MARK: 461*cdf0e10cSrcweir case UnicodeType::COMBINING_SPACING_MARK: 462*cdf0e10cSrcweir return BASE_FORM|PRINTABLE; 463*cdf0e10cSrcweir 464*cdf0e10cSrcweir // Print 465*cdf0e10cSrcweir case UnicodeType::SPACE_SEPARATOR: 466*cdf0e10cSrcweir 467*cdf0e10cSrcweir case UnicodeType::DASH_PUNCTUATION: 468*cdf0e10cSrcweir case UnicodeType::INITIAL_PUNCTUATION: 469*cdf0e10cSrcweir case UnicodeType::FINAL_PUNCTUATION: 470*cdf0e10cSrcweir case UnicodeType::CONNECTOR_PUNCTUATION: 471*cdf0e10cSrcweir case UnicodeType::OTHER_PUNCTUATION: 472*cdf0e10cSrcweir 473*cdf0e10cSrcweir case UnicodeType::MATH_SYMBOL: 474*cdf0e10cSrcweir case UnicodeType::CURRENCY_SYMBOL: 475*cdf0e10cSrcweir case UnicodeType::MODIFIER_SYMBOL: 476*cdf0e10cSrcweir case UnicodeType::OTHER_SYMBOL: 477*cdf0e10cSrcweir return PRINTABLE; 478*cdf0e10cSrcweir 479*cdf0e10cSrcweir // Control 480*cdf0e10cSrcweir case UnicodeType::CONTROL: 481*cdf0e10cSrcweir case UnicodeType::FORMAT: 482*cdf0e10cSrcweir return CONTROL; 483*cdf0e10cSrcweir 484*cdf0e10cSrcweir case UnicodeType::LINE_SEPARATOR: 485*cdf0e10cSrcweir case UnicodeType::PARAGRAPH_SEPARATOR: 486*cdf0e10cSrcweir return CONTROL|PRINTABLE; 487*cdf0e10cSrcweir 488*cdf0e10cSrcweir // for all others 489*cdf0e10cSrcweir default: 490*cdf0e10cSrcweir return 0; 491*cdf0e10cSrcweir } 492*cdf0e10cSrcweir } 493*cdf0e10cSrcweir 494*cdf0e10cSrcweir 495