1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 #include "tenchelp.h" 29 #include "unichars.h" 30 #include "rtl/textcvt.h" 31 #include "sal/types.h" 32 33 static sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags, 34 sal_Char * pBuf, 35 sal_Size nMaxLen); 36 37 static sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags, 38 sal_Char * pBuf, 39 sal_Size nMaxLen); 40 41 static int ImplIsUnicodeIgnoreChar(sal_Unicode c, sal_uInt32 nFlags); 42 43 sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags, 44 sal_Char * pBuf, 45 sal_Size nMaxLen) 46 { 47 if (nMaxLen == 0) 48 return sal_False; 49 switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) 50 { 51 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0: 52 *pBuf = 0x00; 53 break; 54 55 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK: 56 default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */ 57 *pBuf = 0x3F; 58 break; 59 60 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE: 61 *pBuf = 0x5F; 62 break; 63 } 64 return sal_True; 65 } 66 67 sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags, 68 sal_Char * pBuf, 69 sal_Size nMaxLen) 70 { 71 if (nMaxLen == 0) 72 return sal_False; 73 switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) 74 { 75 case RTL_UNICODETOTEXT_FLAGS_INVALID_0: 76 *pBuf = 0x00; 77 break; 78 79 case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK: 80 default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */ 81 *pBuf = 0x3F; 82 break; 83 84 case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE: 85 *pBuf = 0x5F; 86 break; 87 } 88 return sal_True; 89 } 90 91 int ImplIsUnicodeIgnoreChar( sal_Unicode c, sal_uInt32 nFlags ) 92 { 93 return 94 ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0 95 && ImplIsZeroWidth(c)) 96 || ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0 97 && ImplIsControlOrFormat(c)) 98 || ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0 99 && ImplIsPrivateUse(c)); 100 } 101 102 /* ======================================================================= */ 103 104 sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags) 105 { 106 return ((nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) 107 == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE) ? 108 RTL_TEXTCVT_BYTE_PRIVATE_START + cChar : 109 RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; 110 } 111 112 /* ----------------------------------------------------------------------- */ 113 114 sal_Bool 115 ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData, 116 sal_Unicode const ** ppSrcBuf, 117 sal_Unicode const * pEndSrcBuf, 118 sal_Char ** ppDestBuf, 119 sal_Char const * pEndDestBuf, 120 sal_uInt32 nFlags, 121 sal_uInt32 * pInfo) 122 { 123 sal_Unicode c = **ppSrcBuf; 124 125 (void) pData; /* unused */ 126 127 /* Should the private character map to one byte */ 128 if ( (c >= RTL_TEXTCVT_BYTE_PRIVATE_START) && (c <= RTL_TEXTCVT_BYTE_PRIVATE_END) ) 129 { 130 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 ) 131 { 132 **ppDestBuf = (sal_Char)(sal_uChar)(c-RTL_TEXTCVT_BYTE_PRIVATE_START); 133 (*ppDestBuf)++; 134 (*ppSrcBuf)++; 135 return sal_True; 136 } 137 } 138 139 /* Should this character ignored (Private, Non Spacing, Control) */ 140 if ( ImplIsUnicodeIgnoreChar( c, nFlags ) ) 141 { 142 (*ppSrcBuf)++; 143 return sal_True; 144 } 145 146 /* Surrogates Characters should result in */ 147 /* one replacement character */ 148 if (ImplIsHighSurrogate(c)) 149 { 150 if ( *ppSrcBuf == pEndSrcBuf ) 151 { 152 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; 153 return sal_False; 154 } 155 156 c = *((*ppSrcBuf)+1); 157 if (ImplIsLowSurrogate(c)) 158 (*ppSrcBuf)++; 159 else 160 { 161 *pInfo |= RTL_UNICODETOTEXT_INFO_INVALID; 162 if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR ) 163 { 164 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; 165 return sal_False; 166 } 167 else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE ) 168 { 169 (*ppSrcBuf)++; 170 return sal_True; 171 } 172 else if (ImplGetInvalidAsciiMultiByte(nFlags, 173 *ppDestBuf, 174 pEndDestBuf - *ppDestBuf)) 175 { 176 ++*ppSrcBuf; 177 ++*ppDestBuf; 178 return sal_True; 179 } 180 else 181 { 182 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR 183 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 184 return sal_False; 185 } 186 } 187 } 188 189 *pInfo |= RTL_UNICODETOTEXT_INFO_UNDEFINED; 190 if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR ) 191 { 192 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; 193 return sal_False; 194 } 195 else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE ) 196 (*ppSrcBuf)++; 197 else if (ImplGetUndefinedAsciiMultiByte(nFlags, 198 *ppDestBuf, 199 pEndDestBuf - *ppDestBuf)) 200 { 201 ++*ppSrcBuf; 202 ++*ppDestBuf; 203 } 204 else 205 { 206 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR 207 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 208 return sal_False; 209 } 210 211 return sal_True; 212 } 213 214