/**************************************************************
 * 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * 
 *************************************************************/



#include <com/sun/star/i18n/UnicodeType.hpp>
#include <com/sun/star/i18n/KCharacterType.hpp>
#include <i18nutil/unicode.hxx>
#include "unicode_data.h"

using namespace ::com::sun::star::i18n;

static ScriptTypeList defaultTypeList[] = {
    { UnicodeScript_kBasicLatin,
      UnicodeScript_kBasicLatin,
      UnicodeScript_kBasicLatin },      // 0,
    { UnicodeScript_kLatin1Supplement,
      UnicodeScript_kLatin1Supplement,
      UnicodeScript_kLatin1Supplement },// 1,
    { UnicodeScript_kLatinExtendedA,
      UnicodeScript_kLatinExtendedA,
      UnicodeScript_kLatinExtendedA }, // 2,
    { UnicodeScript_kLatinExtendedB,
      UnicodeScript_kLatinExtendedB,
      UnicodeScript_kLatinExtendedB }, // 3,
    { UnicodeScript_kIPAExtension,
      UnicodeScript_kIPAExtension,
      UnicodeScript_kIPAExtension }, // 4,
    { UnicodeScript_kSpacingModifier,
      UnicodeScript_kSpacingModifier,
      UnicodeScript_kSpacingModifier }, // 5,
    { UnicodeScript_kCombiningDiacritical,
      UnicodeScript_kCombiningDiacritical,
      UnicodeScript_kCombiningDiacritical }, // 6,
    { UnicodeScript_kGreek,
      UnicodeScript_kGreek,
      UnicodeScript_kGreek }, // 7,
    { UnicodeScript_kCyrillic,
      UnicodeScript_kCyrillic,
      UnicodeScript_kCyrillic }, // 8,
    { UnicodeScript_kArmenian,
      UnicodeScript_kArmenian,
      UnicodeScript_kArmenian }, // 9,
    { UnicodeScript_kHebrew,
      UnicodeScript_kHebrew,
      UnicodeScript_kHebrew }, // 10,
    { UnicodeScript_kArabic,
      UnicodeScript_kArabic,
      UnicodeScript_kArabic }, // 11,
    { UnicodeScript_kSyriac,
      UnicodeScript_kSyriac,
      UnicodeScript_kSyriac }, // 12,
    { UnicodeScript_kThaana,
      UnicodeScript_kThaana,
      UnicodeScript_kThaana }, // 13,
    { UnicodeScript_kDevanagari,
      UnicodeScript_kDevanagari,
      UnicodeScript_kDevanagari }, // 14,
    { UnicodeScript_kBengali,
      UnicodeScript_kBengali,
      UnicodeScript_kBengali }, // 15,
    { UnicodeScript_kGurmukhi,
      UnicodeScript_kGurmukhi,
      UnicodeScript_kGurmukhi }, // 16,
    { UnicodeScript_kGujarati,
      UnicodeScript_kGujarati,
      UnicodeScript_kGujarati }, // 17,
    { UnicodeScript_kOriya,
      UnicodeScript_kOriya,
      UnicodeScript_kOriya }, // 18,
    { UnicodeScript_kTamil,
      UnicodeScript_kTamil,
      UnicodeScript_kTamil }, // 19,
    { UnicodeScript_kTelugu,
      UnicodeScript_kTelugu,
      UnicodeScript_kTelugu }, // 20,
    { UnicodeScript_kKannada,
      UnicodeScript_kKannada,
      UnicodeScript_kKannada }, // 21,
    { UnicodeScript_kMalayalam,
      UnicodeScript_kMalayalam,
      UnicodeScript_kMalayalam }, // 22,
    { UnicodeScript_kSinhala,
      UnicodeScript_kSinhala,
      UnicodeScript_kSinhala }, // 23,
    { UnicodeScript_kThai,
      UnicodeScript_kThai,
      UnicodeScript_kThai }, // 24,
    { UnicodeScript_kLao,
      UnicodeScript_kLao,
      UnicodeScript_kLao }, // 25,
    { UnicodeScript_kTibetan,
      UnicodeScript_kTibetan,
      UnicodeScript_kTibetan }, // 26,
    { UnicodeScript_kMyanmar,
      UnicodeScript_kMyanmar,
      UnicodeScript_kMyanmar }, // 27,
    { UnicodeScript_kGeorgian,
      UnicodeScript_kGeorgian,
      UnicodeScript_kGeorgian }, // 28,
    { UnicodeScript_kHangulJamo,
      UnicodeScript_kHangulJamo,
      UnicodeScript_kHangulJamo }, // 29,
    { UnicodeScript_kEthiopic,
      UnicodeScript_kEthiopic,
      UnicodeScript_kEthiopic }, // 30,
    { UnicodeScript_kCherokee,
      UnicodeScript_kCherokee,
      UnicodeScript_kCherokee }, // 31,
    { UnicodeScript_kUnifiedCanadianAboriginalSyllabics,
      UnicodeScript_kUnifiedCanadianAboriginalSyllabics,
      UnicodeScript_kUnifiedCanadianAboriginalSyllabics }, // 32,
    { UnicodeScript_kOgham,
      UnicodeScript_kOgham,
      UnicodeScript_kOgham }, // 33,
    { UnicodeScript_kRunic,
      UnicodeScript_kRunic,
      UnicodeScript_kRunic }, // 34,
    { UnicodeScript_kKhmer,
      UnicodeScript_kKhmer,
      UnicodeScript_kKhmer }, // 35,
    { UnicodeScript_kMongolian,
      UnicodeScript_kMongolian,
      UnicodeScript_kMongolian }, // 36,
    { UnicodeScript_kLatinExtendedAdditional,
      UnicodeScript_kLatinExtendedAdditional,
      UnicodeScript_kLatinExtendedAdditional }, // 37,
    { UnicodeScript_kGreekExtended,
      UnicodeScript_kGreekExtended,
      UnicodeScript_kGreekExtended }, // 38,
    { UnicodeScript_kGeneralPunctuation,
      UnicodeScript_kGeneralPunctuation,
      UnicodeScript_kGeneralPunctuation }, // 39,
    { UnicodeScript_kSuperSubScript,
      UnicodeScript_kSuperSubScript,
      UnicodeScript_kSuperSubScript }, // 40,
    { UnicodeScript_kCurrencySymbolScript,
      UnicodeScript_kCurrencySymbolScript,
      UnicodeScript_kCurrencySymbolScript }, // 41,
    { UnicodeScript_kSymbolCombiningMark,
      UnicodeScript_kSymbolCombiningMark,
      UnicodeScript_kSymbolCombiningMark }, // 42,
    { UnicodeScript_kLetterlikeSymbol,
      UnicodeScript_kLetterlikeSymbol,
      UnicodeScript_kLetterlikeSymbol }, // 43,
    { UnicodeScript_kNumberForm,
      UnicodeScript_kNumberForm,
      UnicodeScript_kNumberForm }, // 44,
    { UnicodeScript_kArrow,
      UnicodeScript_kArrow,
      UnicodeScript_kArrow }, // 45,
    { UnicodeScript_kMathOperator,
      UnicodeScript_kMathOperator,
      UnicodeScript_kMathOperator }, // 46,
    { UnicodeScript_kMiscTechnical,
      UnicodeScript_kMiscTechnical,
      UnicodeScript_kMiscTechnical }, // 47,
    { UnicodeScript_kControlPicture,
      UnicodeScript_kControlPicture,
      UnicodeScript_kControlPicture }, // 48,
    { UnicodeScript_kOpticalCharacter,
      UnicodeScript_kOpticalCharacter,
      UnicodeScript_kOpticalCharacter }, // 49,
    { UnicodeScript_kEnclosedAlphanumeric,
      UnicodeScript_kEnclosedAlphanumeric,
      UnicodeScript_kEnclosedAlphanumeric }, // 50,
    { UnicodeScript_kBoxDrawing,
      UnicodeScript_kBoxDrawing,
      UnicodeScript_kBoxDrawing }, // 51,
    { UnicodeScript_kBlockElement,
      UnicodeScript_kBlockElement,
      UnicodeScript_kBlockElement }, // 52,
    { UnicodeScript_kGeometricShape,
      UnicodeScript_kGeometricShape,
      UnicodeScript_kGeometricShape }, // 53,
    { UnicodeScript_kMiscSymbol,
      UnicodeScript_kMiscSymbol,
      UnicodeScript_kMiscSymbol }, // 54,
    { UnicodeScript_kDingbat,
      UnicodeScript_kDingbat,
      UnicodeScript_kDingbat }, // 55,
    { UnicodeScript_kBraillePatterns,
      UnicodeScript_kBraillePatterns,
      UnicodeScript_kBraillePatterns }, // 56,
    { UnicodeScript_kCJKRadicalsSupplement,
      UnicodeScript_kCJKRadicalsSupplement,
      UnicodeScript_kCJKRadicalsSupplement }, // 57,
    { UnicodeScript_kKangxiRadicals,
      UnicodeScript_kKangxiRadicals,
      UnicodeScript_kKangxiRadicals }, // 58,
    { UnicodeScript_kIdeographicDescriptionCharacters,
      UnicodeScript_kIdeographicDescriptionCharacters,
      UnicodeScript_kIdeographicDescriptionCharacters }, // 59,
    { UnicodeScript_kCJKSymbolPunctuation,
      UnicodeScript_kCJKSymbolPunctuation,
      UnicodeScript_kCJKSymbolPunctuation }, // 60,
    { UnicodeScript_kHiragana,
      UnicodeScript_kHiragana,
      UnicodeScript_kHiragana }, // 61,
    { UnicodeScript_kKatakana,
      UnicodeScript_kKatakana,
      UnicodeScript_kKatakana }, // 62,
    { UnicodeScript_kBopomofo,
      UnicodeScript_kBopomofo,
      UnicodeScript_kBopomofo }, // 63,
    { UnicodeScript_kHangulCompatibilityJamo,
      UnicodeScript_kHangulCompatibilityJamo,
      UnicodeScript_kHangulCompatibilityJamo }, // 64,
    { UnicodeScript_kKanbun,
      UnicodeScript_kKanbun,
      UnicodeScript_kKanbun }, // 65,
    { UnicodeScript_kBopomofoExtended,
      UnicodeScript_kBopomofoExtended,
      UnicodeScript_kBopomofoExtended }, // 66,
    { UnicodeScript_kEnclosedCJKLetterMonth,
      UnicodeScript_kEnclosedCJKLetterMonth,
      UnicodeScript_kEnclosedCJKLetterMonth }, // 67,
    { UnicodeScript_kCJKCompatibility,
      UnicodeScript_kCJKCompatibility,
      UnicodeScript_kCJKCompatibility }, // 68,
    { UnicodeScript_k_CJKUnifiedIdeographsExtensionA,
      UnicodeScript_k_CJKUnifiedIdeographsExtensionA,
      UnicodeScript_k_CJKUnifiedIdeographsExtensionA }, // 69,
    { UnicodeScript_kCJKUnifiedIdeograph,
      UnicodeScript_kCJKUnifiedIdeograph,
      UnicodeScript_kCJKUnifiedIdeograph }, // 70,
    { UnicodeScript_kYiSyllables,
      UnicodeScript_kYiSyllables,
      UnicodeScript_kYiSyllables }, // 71,
    { UnicodeScript_kYiRadicals,
      UnicodeScript_kYiRadicals,
      UnicodeScript_kYiRadicals }, // 72,
    { UnicodeScript_kHangulSyllable,
      UnicodeScript_kHangulSyllable,
      UnicodeScript_kHangulSyllable }, // 73,
    { UnicodeScript_kHighSurrogate,
      UnicodeScript_kHighSurrogate,
      UnicodeScript_kHighSurrogate }, // 74,
    { UnicodeScript_kHighPrivateUseSurrogate,
      UnicodeScript_kHighPrivateUseSurrogate,
      UnicodeScript_kHighPrivateUseSurrogate }, // 75,
    { UnicodeScript_kLowSurrogate,
      UnicodeScript_kLowSurrogate,
      UnicodeScript_kLowSurrogate }, // 76,
    { UnicodeScript_kPrivateUse,
      UnicodeScript_kPrivateUse,
      UnicodeScript_kPrivateUse }, // 77,
    { UnicodeScript_kCJKCompatibilityIdeograph,
      UnicodeScript_kCJKCompatibilityIdeograph,
      UnicodeScript_kCJKCompatibilityIdeograph }, // 78,
    { UnicodeScript_kAlphabeticPresentation,
      UnicodeScript_kAlphabeticPresentation,
      UnicodeScript_kAlphabeticPresentation }, // 79,
    { UnicodeScript_kArabicPresentationA,
      UnicodeScript_kArabicPresentationA,
      UnicodeScript_kArabicPresentationA }, // 80,
    { UnicodeScript_kCombiningHalfMark,
      UnicodeScript_kCombiningHalfMark,
      UnicodeScript_kCombiningHalfMark }, // 81,
    { UnicodeScript_kCJKCompatibilityForm,
      UnicodeScript_kCJKCompatibilityForm,
      UnicodeScript_kCJKCompatibilityForm }, // 82,
    { UnicodeScript_kSmallFormVariant,
      UnicodeScript_kSmallFormVariant,
      UnicodeScript_kSmallFormVariant }, // 83,
    { UnicodeScript_kArabicPresentationB,
      UnicodeScript_kArabicPresentationB,
      UnicodeScript_kArabicPresentationB }, // 84,
    { UnicodeScript_kNoScript,
      UnicodeScript_kNoScript,
      UnicodeScript_kNoScript }, // 85,
    { UnicodeScript_kHalfwidthFullwidthForm,
      UnicodeScript_kHalfwidthFullwidthForm,
      UnicodeScript_kHalfwidthFullwidthForm }, // 86,
    { UnicodeScript_kScriptCount,
      UnicodeScript_kScriptCount,
      UnicodeScript_kNoScript } // 87,
};

sal_Int16 SAL_CALL
unicode::getUnicodeScriptType( const sal_Unicode ch, ScriptTypeList* typeList, sal_Int16 unknownType ) {

    if (!typeList) {
        typeList = defaultTypeList;
        unknownType = UnicodeScript_kNoScript;
    }

    sal_Int16 i = 0, type = typeList[0].to;
    while (type < UnicodeScript_kScriptCount && ch > UnicodeScriptType[type][UnicodeScriptTypeTo]) {
        type = typeList[++i].to;
    }

    return (type < UnicodeScript_kScriptCount &&
            ch >= UnicodeScriptType[typeList[i].from][UnicodeScriptTypeFrom]) ?
            typeList[i].value : unknownType;
}

sal_Bool SAL_CALL
unicode::isUnicodeScriptType( const sal_Unicode ch, sal_Int16 type) {
    return ch >= UnicodeScriptType[type][UnicodeScriptTypeFrom] && 
        ch <= UnicodeScriptType[type][UnicodeScriptTypeTo];
}

sal_Unicode SAL_CALL
unicode::getUnicodeScriptStart( UnicodeScript type) {
    return UnicodeScriptType[type][UnicodeScriptTypeFrom];
}

sal_Unicode SAL_CALL
unicode::getUnicodeScriptEnd( UnicodeScript type) {
    return UnicodeScriptType[type][UnicodeScriptTypeTo];
}

sal_Int16 SAL_CALL
unicode::getUnicodeType( const sal_Unicode ch ) {
    static sal_Unicode c = 0x00;
    static sal_Int16 r = 0x00;

    if (ch == c) return r;
    else c = ch;

    sal_Int16 address = UnicodeTypeIndex[ch >> 8];
    return r = (sal_Int16)((address < UnicodeTypeNumberBlock) ? UnicodeTypeBlockValue[address] :
        UnicodeTypeValue[((address - UnicodeTypeNumberBlock) << 8) + (ch & 0xff)]);
}

sal_uInt8 SAL_CALL
unicode::getUnicodeDirection( const sal_Unicode ch ) {
    static sal_Unicode c = 0x00;
    static sal_uInt8 r = 0x00;

    if (ch == c) return r;
    else c = ch;

    sal_Int16 address = UnicodeDirectionIndex[ch >> 8];
    return r = ((address < UnicodeDirectionNumberBlock) ? UnicodeDirectionBlockValue[address] :
        UnicodeDirectionValue[((address - UnicodeDirectionNumberBlock) << 8) + (ch & 0xff)]);

}

#define bit(name)   (1 << name)

#define UPPERMASK   bit(UnicodeType::UPPERCASE_LETTER)

#define LOWERMASK   bit(UnicodeType::LOWERCASE_LETTER)

#define TITLEMASK   bit(UnicodeType::TITLECASE_LETTER)

#define DIGITMASK   bit(UnicodeType::DECIMAL_DIGIT_NUMBER)|\
            bit(UnicodeType::LETTER_NUMBER)|\
            bit(UnicodeType::OTHER_NUMBER)

#define ALPHAMASK   UPPERMASK|LOWERMASK|TITLEMASK|\
            bit(UnicodeType::MODIFIER_LETTER)|\
            bit(UnicodeType::OTHER_LETTER)

#define BASEMASK    DIGITMASK|ALPHAMASK|\
            bit(UnicodeType::NON_SPACING_MARK)|\
            bit(UnicodeType::ENCLOSING_MARK)|\
            bit(UnicodeType::COMBINING_SPACING_MARK)

#define SPACEMASK   bit(UnicodeType::SPACE_SEPARATOR)|\
            bit(UnicodeType::LINE_SEPARATOR)|\
            bit(UnicodeType::PARAGRAPH_SEPARATOR)

#define PUNCTUATIONMASK bit(UnicodeType::DASH_PUNCTUATION)|\
            bit(UnicodeType::INITIAL_PUNCTUATION)|\
            bit(UnicodeType::FINAL_PUNCTUATION)|\
            bit(UnicodeType::CONNECTOR_PUNCTUATION)|\
            bit(UnicodeType::OTHER_PUNCTUATION)

#define SYMBOLMASK  bit(UnicodeType::MATH_SYMBOL)|\
            bit(UnicodeType::CURRENCY_SYMBOL)|\
            bit(UnicodeType::MODIFIER_SYMBOL)|\
            bit(UnicodeType::OTHER_SYMBOL)

#define PRINTMASK   BASEMASK|SPACEMASK|PUNCTUATIONMASK|SYMBOLMASK

#define CONTROLMASK bit(UnicodeType::CONTROL)|\
            bit(UnicodeType::FORMAT)|\
            bit(UnicodeType::LINE_SEPARATOR)|\
            bit(UnicodeType::PARAGRAPH_SEPARATOR)

#define IsType(func, mask)  \
sal_Bool SAL_CALL func( const sal_Unicode ch) {\
    return (bit(getUnicodeType(ch)) & (mask)) != 0;\
}

IsType(unicode::isUpper, UPPERMASK)
IsType(unicode::isLower, LOWERMASK)
IsType(unicode::isTitle, DIGITMASK)
IsType(unicode::isControl, CONTROLMASK)
IsType(unicode::isPrint, PRINTMASK)
IsType(unicode::isAlpha, ALPHAMASK)
IsType(unicode::isDigit, DIGITMASK)
IsType(unicode::isAlphaDigit, ALPHAMASK|DIGITMASK)
IsType(unicode::isSpace, SPACEMASK)
IsType(unicode::isBase, BASEMASK)
IsType(unicode::isPunctuation, PUNCTUATIONMASK)

#define CONTROLSPACE    bit(0x09)|bit(0x0a)|bit(0x0b)|bit(0x0c)|bit(0x0d)|\
            bit(0x1c)|bit(0x1d)|bit(0x1e)|bit(0x1f)

sal_Bool SAL_CALL unicode::isWhiteSpace( const sal_Unicode ch) {
    return (ch != 0xa0 && isSpace(ch)) || (ch <= 0x1F && (bit(ch) & (CONTROLSPACE)));
}

sal_Int32 SAL_CALL unicode::getCharType( const sal_Unicode ch )
{
    using namespace ::com::sun::star::i18n::KCharacterType;

    switch ( getUnicodeType( ch ) ) {
    // Upper
    case UnicodeType::UPPERCASE_LETTER :
        return UPPER|LETTER|PRINTABLE|BASE_FORM;

    // Lower
    case UnicodeType::LOWERCASE_LETTER :
        return LOWER|LETTER|PRINTABLE|BASE_FORM;

    // Title
    case UnicodeType::TITLECASE_LETTER :
        return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;

    // Letter
    case UnicodeType::MODIFIER_LETTER :
    case UnicodeType::OTHER_LETTER :
        return LETTER|PRINTABLE|BASE_FORM;

    // Digit
    case UnicodeType::DECIMAL_DIGIT_NUMBER:
    case UnicodeType::LETTER_NUMBER:
    case UnicodeType::OTHER_NUMBER:
        return DIGIT|PRINTABLE|BASE_FORM;

    // Base
    case UnicodeType::NON_SPACING_MARK:
    case UnicodeType::ENCLOSING_MARK:
    case UnicodeType::COMBINING_SPACING_MARK:
        return BASE_FORM|PRINTABLE;

    // Print
    case UnicodeType::SPACE_SEPARATOR:

    case UnicodeType::DASH_PUNCTUATION:
    case UnicodeType::INITIAL_PUNCTUATION:
    case UnicodeType::FINAL_PUNCTUATION:
    case UnicodeType::CONNECTOR_PUNCTUATION:
    case UnicodeType::OTHER_PUNCTUATION:

    case UnicodeType::MATH_SYMBOL:
    case UnicodeType::CURRENCY_SYMBOL:
    case UnicodeType::MODIFIER_SYMBOL:
    case UnicodeType::OTHER_SYMBOL:
        return PRINTABLE;

    // Control
    case UnicodeType::CONTROL:
    case UnicodeType::FORMAT:
        return CONTROL;

    case UnicodeType::LINE_SEPARATOR:
    case UnicodeType::PARAGRAPH_SEPARATOR:
        return CONTROL|PRINTABLE;

    // for all others
    default:
        return 0;
    }
}


