1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 #include "tenchelp.h" 25 #include "unichars.h" 26 #include "rtl/textcvt.h" 27 #include "sal/types.h" 28 29 static sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags, 30 sal_Char * pBuf, 31 sal_Size nMaxLen); 32 33 static sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags, 34 sal_Char * pBuf, 35 sal_Size nMaxLen); 36 37 static int ImplIsUnicodeIgnoreChar(sal_Unicode c, sal_uInt32 nFlags); 38 39 sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags, 40 sal_Char * pBuf, 41 sal_Size nMaxLen) 42 { 43 if (nMaxLen == 0) 44 return sal_False; 45 switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) 46 { 47 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0: 48 *pBuf = 0x00; 49 break; 50 51 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK: 52 default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */ 53 *pBuf = 0x3F; 54 break; 55 56 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE: 57 *pBuf = 0x5F; 58 break; 59 } 60 return sal_True; 61 } 62 63 sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags, 64 sal_Char * pBuf, 65 sal_Size nMaxLen) 66 { 67 if (nMaxLen == 0) 68 return sal_False; 69 switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) 70 { 71 case RTL_UNICODETOTEXT_FLAGS_INVALID_0: 72 *pBuf = 0x00; 73 break; 74 75 case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK: 76 default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */ 77 *pBuf = 0x3F; 78 break; 79 80 case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE: 81 *pBuf = 0x5F; 82 break; 83 } 84 return sal_True; 85 } 86 87 int ImplIsUnicodeIgnoreChar( sal_Unicode c, sal_uInt32 nFlags ) 88 { 89 return 90 ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0 91 && ImplIsZeroWidth(c)) 92 || ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0 93 && ImplIsControlOrFormat(c)) 94 || ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0 95 && ImplIsPrivateUse(c)); 96 } 97 98 /* ======================================================================= */ 99 100 sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags) 101 { 102 return ((nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) 103 == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE) ? 104 RTL_TEXTCVT_BYTE_PRIVATE_START + cChar : 105 RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; 106 } 107 108 /* ----------------------------------------------------------------------- */ 109 110 sal_Bool 111 ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData, 112 sal_Unicode const ** ppSrcBuf, 113 sal_Unicode const * pEndSrcBuf, 114 sal_Char ** ppDestBuf, 115 sal_Char const * pEndDestBuf, 116 sal_uInt32 nFlags, 117 sal_uInt32 * pInfo) 118 { 119 sal_Unicode c = **ppSrcBuf; 120 121 (void) pData; /* unused */ 122 123 /* Should the private character map to one byte */ 124 if ( (c >= RTL_TEXTCVT_BYTE_PRIVATE_START) && (c <= RTL_TEXTCVT_BYTE_PRIVATE_END) ) 125 { 126 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 ) 127 { 128 **ppDestBuf = (sal_Char)(sal_uChar)(c-RTL_TEXTCVT_BYTE_PRIVATE_START); 129 (*ppDestBuf)++; 130 (*ppSrcBuf)++; 131 return sal_True; 132 } 133 } 134 135 /* Should this character ignored (Private, Non Spacing, Control) */ 136 if ( ImplIsUnicodeIgnoreChar( c, nFlags ) ) 137 { 138 (*ppSrcBuf)++; 139 return sal_True; 140 } 141 142 /* Surrogates Characters should result in */ 143 /* one replacement character */ 144 if (ImplIsHighSurrogate(c)) 145 { 146 if ( *ppSrcBuf == pEndSrcBuf ) 147 { 148 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; 149 return sal_False; 150 } 151 152 c = *((*ppSrcBuf)+1); 153 if (ImplIsLowSurrogate(c)) 154 (*ppSrcBuf)++; 155 else 156 { 157 *pInfo |= RTL_UNICODETOTEXT_INFO_INVALID; 158 if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR ) 159 { 160 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; 161 return sal_False; 162 } 163 else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE ) 164 { 165 (*ppSrcBuf)++; 166 return sal_True; 167 } 168 else if (ImplGetInvalidAsciiMultiByte(nFlags, 169 *ppDestBuf, 170 pEndDestBuf - *ppDestBuf)) 171 { 172 ++*ppSrcBuf; 173 ++*ppDestBuf; 174 return sal_True; 175 } 176 else 177 { 178 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR 179 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 180 return sal_False; 181 } 182 } 183 } 184 185 *pInfo |= RTL_UNICODETOTEXT_INFO_UNDEFINED; 186 if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR ) 187 { 188 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; 189 return sal_False; 190 } 191 else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE ) 192 (*ppSrcBuf)++; 193 else if (ImplGetUndefinedAsciiMultiByte(nFlags, 194 *ppDestBuf, 195 pEndDestBuf - *ppDestBuf)) 196 { 197 ++*ppSrcBuf; 198 ++*ppDestBuf; 199 } 200 else 201 { 202 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR 203 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 204 return sal_False; 205 } 206 207 return sal_True; 208 } 209 210