1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 #include "unichars.h" 25 #include "osl/diagnose.h" 26 #include "sal/types.h" 27 28 int ImplIsNoncharacter(sal_uInt32 nUtf32) 29 { 30 /* All code points that are noncharacters, as of Unicode 3.1.1. */ 31 return (nUtf32 >= 0xFDD0 && nUtf32 <= 0xFDEF) 32 || (nUtf32 & 0xFFFF) >= 0xFFFE 33 || nUtf32 > 0x10FFFF; 34 } 35 36 int ImplIsControlOrFormat(sal_uInt32 nUtf32) 37 { 38 /* All code points of <http://www.unicode.org/Public/UNIDATA/ 39 UnicodeData.txt>, Version 3.1.1, that have a General Category of Cc 40 (Other, Control) or Cf (Other, Format). 41 */ 42 return nUtf32 <= 0x001F 43 || (nUtf32 >= 0x007F && nUtf32 <= 0x009F) 44 || nUtf32 == 0x070F /* SYRIAC ABBREVIATION MARK */ 45 || nUtf32 == 0x180B /* MONGOLIAN FREE VARIATION SELECTOR ONE */ 46 || nUtf32 == 0x180C /* MONGOLIAN FREE VARIATION SELECTOR TWO */ 47 || nUtf32 == 0x180D /* MONGOLIAN FREE VARIATION SELECTOR THREE */ 48 || nUtf32 == 0x180E /* MONGOLIAN VOWEL SEPARATOR */ 49 || nUtf32 == 0x200C /* ZERO WIDTH NON-JOINER */ 50 || nUtf32 == 0x200D /* ZERO WIDTH JOINER */ 51 || nUtf32 == 0x200E /* LEFT-TO-RIGHT MARK */ 52 || nUtf32 == 0x200F /* RIGHT-TO-LEFT MARK */ 53 || nUtf32 == 0x202A /* LEFT-TO-RIGHT EMBEDDING */ 54 || nUtf32 == 0x202B /* RIGHT-TO-LEFT EMBEDDING */ 55 || nUtf32 == 0x202C /* POP DIRECTIONAL FORMATTING */ 56 || nUtf32 == 0x202D /* LEFT-TO-RIGHT OVERRIDE */ 57 || nUtf32 == 0x202E /* RIGHT-TO-LEFT OVERRIDE */ 58 || nUtf32 == 0x206A /* INHIBIT SYMMETRIC SWAPPING */ 59 || nUtf32 == 0x206B /* ACTIVATE SYMMETRIC SWAPPING */ 60 || nUtf32 == 0x206C /* INHIBIT ARABIC FORM SHAPING */ 61 || nUtf32 == 0x206D /* ACTIVATE ARABIC FORM SHAPING */ 62 || nUtf32 == 0x206E /* NATIONAL DIGIT SHAPES */ 63 || nUtf32 == 0x206F /* NOMINAL DIGIT SHAPES */ 64 || nUtf32 == 0xFEFF /* ZERO WIDTH NO-BREAK SPACE */ 65 || nUtf32 == 0xFFF9 /* INTERLINEAR ANNOTATION ANCHOR */ 66 || nUtf32 == 0xFFFA /* INTERLINEAR ANNOTATION SEPARATOR */ 67 || nUtf32 == 0xFFFB /* INTERLINEAR ANNOTATION TERMINATOR */ 68 || nUtf32 == 0x1D173 /* MUSICAL SYMBOL BEGIN BEAM */ 69 || nUtf32 == 0x1D174 /* MUSICAL SYMBOL END BEAM */ 70 || nUtf32 == 0x1D175 /* MUSICAL SYMBOL BEGIN TIE */ 71 || nUtf32 == 0x1D176 /* MUSICAL SYMBOL END TIE */ 72 || nUtf32 == 0x1D177 /* MUSICAL SYMBOL BEGIN SLUR */ 73 || nUtf32 == 0x1D178 /* MUSICAL SYMBOL END SLUR */ 74 || nUtf32 == 0x1D179 /* MUSICAL SYMBOL BEGIN PHRASE */ 75 || nUtf32 == 0x1D17A /* MUSICAL SYMBOL END PHRASE */ 76 || nUtf32 == 0xE0001 /* LANGUAGE TAG */ 77 || (nUtf32 >= 0xE0020 && nUtf32 <= 0xE007F); 78 } 79 80 int ImplIsHighSurrogate(sal_uInt32 nUtf32) 81 { 82 /* All code points that are high-surrogates, as of Unicode 3.1.1. */ 83 return nUtf32 >= 0xD800 && nUtf32 <= 0xDBFF; 84 } 85 86 int ImplIsLowSurrogate(sal_uInt32 nUtf32) 87 { 88 /* All code points that are low-surrogates, as of Unicode 3.1.1. */ 89 return nUtf32 >= 0xDC00 && nUtf32 <= 0xDFFF; 90 } 91 92 int ImplIsPrivateUse(sal_uInt32 nUtf32) 93 { 94 /* All code points of <http://www.unicode.org/Public/UNIDATA/ 95 UnicodeData.txt>, Version 3.1.1, that have a General Category of Co 96 (Other, Private Use). 97 */ 98 return (nUtf32 >= 0xE000 && nUtf32 <= 0xF8FF) 99 || (nUtf32 >= 0xF0000 && nUtf32 <= 0xFFFFD) 100 || (nUtf32 >= 0x100000 && nUtf32 <= 0x10FFFD); 101 } 102 103 int ImplIsZeroWidth(sal_uInt32 nUtf32) 104 { 105 /* All code points of <http://www.unicode.org/Public/UNIDATA/ 106 UnicodeData.txt>, Version 3.1.1, that have "ZERO WIDTH" in their 107 Character name. 108 */ 109 return nUtf32 == 0x200B /* ZERO WIDTH SPACE */ 110 || nUtf32 == 0x200C /* ZERO WIDTH NON-JOINER */ 111 || nUtf32 == 0x200D /* ZERO WIDTH JOINER */ 112 || nUtf32 == 0xFEFF; /* ZEOR WIDTH NO-BREAK SPACE */ 113 } 114 115 sal_uInt32 ImplGetHighSurrogate(sal_uInt32 nUtf32) 116 { 117 OSL_ENSURE(nUtf32 >= 0x10000, "specification violation"); 118 return ((nUtf32 - 0x10000) >> 10) | 0xD800; 119 } 120 121 sal_uInt32 ImplGetLowSurrogate(sal_uInt32 nUtf32) 122 { 123 OSL_ENSURE(nUtf32 >= 0x10000, "specification violation"); 124 return ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00; 125 } 126 127 sal_uInt32 ImplCombineSurrogates(sal_uInt32 nHigh, sal_uInt32 nLow) 128 { 129 OSL_ENSURE(ImplIsHighSurrogate(nHigh) && ImplIsLowSurrogate(nLow), 130 "specification violation"); 131 return (((nHigh & 0x3FF) << 10) | (nLow & 0x3FF)) + 0x10000; 132 } 133