1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_sal.hxx" 26 27 #include "context.h" 28 #include "converter.h" 29 #include "convertsinglebytetobmpunicode.hxx" 30 #include "unichars.h" 31 32 #include "osl/diagnose.h" 33 #include "rtl/textcvt.h" 34 #include "sal/types.h" 35 36 #include <cstddef> 37 38 sal_Size rtl_textenc_convertSingleByteToBmpUnicode( 39 ImplTextConverterData const * data, void *, sal_Char const * srcBuf, 40 sal_Size srcBytes, sal_Unicode * destBuf, sal_Size destChars, 41 sal_uInt32 flags, sal_uInt32 * info, sal_Size * srcCvtBytes) 42 { 43 sal_Unicode const * map = static_cast< 44 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( 45 data)->byteToUnicode; 46 sal_uInt32 infoFlags = 0; 47 sal_Size converted = 0; 48 sal_Unicode * destBufPtr = destBuf; 49 sal_Unicode * destBufEnd = destBuf + destChars; 50 for (; converted < srcBytes; ++converted) { 51 bool undefined = true; 52 sal_Char b = *srcBuf++; 53 sal_Unicode c = map[static_cast< sal_uInt8 >(b)]; 54 if (c == 0xFFFF) { 55 goto bad_input; 56 } 57 if (destBufEnd - destBufPtr < 1) { 58 goto no_output; 59 } 60 *destBufPtr++ = c; 61 continue; 62 bad_input: 63 switch (ImplHandleBadInputTextToUnicodeConversion( 64 undefined, false, b, flags, &destBufPtr, destBufEnd, 65 &infoFlags)) 66 { 67 case IMPL_BAD_INPUT_STOP: 68 break; 69 70 case IMPL_BAD_INPUT_CONTINUE: 71 continue; 72 73 case IMPL_BAD_INPUT_NO_OUTPUT: 74 goto no_output; 75 } 76 break; 77 no_output: 78 --srcBuf; 79 infoFlags |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 80 break; 81 } 82 if (info != 0) { 83 *info = infoFlags; 84 } 85 if (srcCvtBytes != 0) { 86 *srcCvtBytes = converted; 87 } 88 return destBufPtr - destBuf; 89 } 90 91 sal_Size rtl_textenc_convertBmpUnicodeToSingleByte( 92 ImplTextConverterData const * data, void * context, 93 sal_Unicode const * srcBuf, sal_Size srcChars, sal_Char * destBuf, 94 sal_Size destBytes, sal_uInt32 flags, sal_uInt32 * info, 95 sal_Size * srcCvtChars) 96 { 97 std::size_t entries = static_cast< 98 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( 99 data)->unicodeToByteEntries; 100 rtl::textenc::BmpUnicodeToSingleByteRange const * ranges = static_cast< 101 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( 102 data)->unicodeToByte; 103 sal_Unicode highSurrogate = 0; 104 sal_uInt32 infoFlags = 0; 105 sal_Size converted = 0; 106 sal_Char * destBufPtr = destBuf; 107 sal_Char * destBufEnd = destBuf + destBytes; 108 if (context != 0) { 109 highSurrogate = static_cast< ImplUnicodeToTextContext * >(context)-> 110 m_nHighSurrogate; 111 } 112 for (; converted < srcChars; ++converted) { 113 bool undefined = true; 114 sal_uInt32 c = *srcBuf++; 115 if (highSurrogate == 0) { 116 if (ImplIsHighSurrogate(c)) { 117 highSurrogate = static_cast< sal_Unicode >(c); 118 continue; 119 } 120 } else if (ImplIsLowSurrogate(c)) { 121 c = ImplCombineSurrogates(highSurrogate, c); 122 } else { 123 undefined = false; 124 goto bad_input; 125 } 126 if (ImplIsLowSurrogate(c) || ImplIsNoncharacter(c)) { 127 undefined = false; 128 goto bad_input; 129 } 130 // Linearly searching through the ranges if probably fastest, assuming 131 // that most converted characters belong to the ASCII subset: 132 for (std::size_t i = 0; i < entries; ++i) { 133 if (c < ranges[i].unicode) { 134 break; 135 } else if (c <= sal::static_int_cast< sal_uInt32 >( 136 ranges[i].unicode + ranges[i].range)) 137 { 138 if (destBufEnd - destBufPtr < 1) { 139 goto no_output; 140 } 141 *destBufPtr++ = static_cast< sal_Char >( 142 ranges[i].byte + (c - ranges[i].unicode)); 143 goto done; 144 } 145 } 146 goto bad_input; 147 done: 148 highSurrogate = 0; 149 continue; 150 bad_input: 151 switch (ImplHandleBadInputUnicodeToTextConversion( 152 undefined, c, flags, &destBufPtr, destBufEnd, &infoFlags, 0, 153 0, 0)) 154 { 155 case IMPL_BAD_INPUT_STOP: 156 highSurrogate = 0; 157 break; 158 159 case IMPL_BAD_INPUT_CONTINUE: 160 highSurrogate = 0; 161 continue; 162 163 case IMPL_BAD_INPUT_NO_OUTPUT: 164 goto no_output; 165 } 166 break; 167 no_output: 168 --srcBuf; 169 infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 170 break; 171 } 172 if (highSurrogate != 0 173 && ((infoFlags 174 & (RTL_UNICODETOTEXT_INFO_ERROR 175 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) 176 == 0)) 177 { 178 if ((flags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) { 179 infoFlags |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; 180 } else { 181 switch (ImplHandleBadInputUnicodeToTextConversion( 182 false, 0, flags, &destBufPtr, destBufEnd, &infoFlags, 0, 183 0, 0)) 184 { 185 case IMPL_BAD_INPUT_STOP: 186 case IMPL_BAD_INPUT_CONTINUE: 187 highSurrogate = 0; 188 break; 189 190 case IMPL_BAD_INPUT_NO_OUTPUT: 191 infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 192 break; 193 } 194 } 195 } 196 if (context != 0) { 197 static_cast< ImplUnicodeToTextContext * >(context)->m_nHighSurrogate 198 = highSurrogate; 199 } 200 if (info != 0) { 201 *info = infoFlags; 202 } 203 if (srcCvtChars != 0) { 204 *srcCvtChars = converted; 205 } 206 return destBufPtr - destBuf; 207 } 208