1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_i18npool.hxx" 26 27 #include <cclass_unicode.hxx> 28 #include <unicode/uchar.h> 29 #include <rtl/math.hxx> 30 #include <rtl/ustring.hxx> 31 #include <com/sun/star/i18n/KParseTokens.hpp> 32 #include <com/sun/star/i18n/KParseType.hpp> 33 #include <com/sun/star/i18n/UnicodeType.hpp> 34 #include <com/sun/star/i18n/XLocaleData.hpp> 35 #include <com/sun/star/i18n/NativeNumberMode.hpp> 36 37 #include <string.h> // memcpy() 38 39 using namespace ::com::sun::star::uno; 40 using namespace ::com::sun::star::lang; 41 using namespace ::rtl; 42 43 namespace com { namespace sun { namespace star { namespace i18n { 44 45 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL = 0x00000000; 46 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR = 0x00000001; 47 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL = 0x00000002; 48 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD = 0x00000004; 49 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE = 0x00000008; 50 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING = 0x00000010; 51 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020; 52 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL = 0x00000040; 53 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD = 0x00000080; 54 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP = 0x00000100; 55 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE = 0x00000200; 56 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP = 0x00000400; 57 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP = 0x00000800; 58 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN = 0x00001000; 59 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE = 0x00002000; 60 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT = 0x00004000; 61 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP = 0x20000000; 62 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP = 0x40000000; 63 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED = 0x80000000; 64 65 #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT) 66 67 // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]* 68 69 const sal_uInt8 cclass_Unicode::nDefCnt = 128; 70 const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] = 71 { 72 // (...) == Calc formula compiler specific, commented out and modified 73 74 /* \0 */ TOKEN_EXCLUDED, 75 TOKEN_ILLEGAL, 76 TOKEN_ILLEGAL, 77 TOKEN_ILLEGAL, 78 TOKEN_ILLEGAL, 79 TOKEN_ILLEGAL, 80 TOKEN_ILLEGAL, 81 TOKEN_ILLEGAL, 82 TOKEN_ILLEGAL, 83 /* 9 \t */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL) 84 TOKEN_ILLEGAL, 85 /* 11 \v */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL) 86 TOKEN_ILLEGAL, 87 TOKEN_ILLEGAL, 88 TOKEN_ILLEGAL, 89 TOKEN_ILLEGAL, 90 TOKEN_ILLEGAL, 91 TOKEN_ILLEGAL, 92 TOKEN_ILLEGAL, 93 TOKEN_ILLEGAL, 94 TOKEN_ILLEGAL, 95 TOKEN_ILLEGAL, 96 TOKEN_ILLEGAL, 97 TOKEN_ILLEGAL, 98 TOKEN_ILLEGAL, 99 TOKEN_ILLEGAL, 100 TOKEN_ILLEGAL, 101 TOKEN_ILLEGAL, 102 TOKEN_ILLEGAL, 103 TOKEN_ILLEGAL, 104 TOKEN_ILLEGAL, 105 TOKEN_ILLEGAL, 106 /* 32 */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 107 /* 33 ! */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 108 /* 34 " */ TOKEN_CHAR_STRING | TOKEN_STRING_SEP, 109 /* 35 # */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD_SEP) 110 /* 36 $ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD) 111 /* 37 % */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_VALUE) 112 /* 38 & */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 113 /* 39 ' */ TOKEN_NAME_SEP, 114 /* 40 ( */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 115 /* 41 ) */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 116 /* 42 * */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 117 /* 43 + */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN, 118 /* 44 , */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_VALUE | TOKEN_VALUE) 119 /* 45 - */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN, 120 /* 46 . */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE) 121 /* 47 / */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 122 //for ( i = 48; i < 58; i++ ) 123 /* 48 0 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 124 /* 49 1 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 125 /* 50 2 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 126 /* 51 3 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 127 /* 52 4 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 128 /* 53 5 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 129 /* 54 6 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 130 /* 55 7 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 131 /* 56 8 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 132 /* 57 9 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD, 133 /* 58 : */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD) 134 /* 59 ; */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 135 /* 60 < */ TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 136 /* 61 = */ TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 137 /* 62 > */ TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 138 /* 63 ? */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD) 139 /* 64 @ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 140 //for ( i = 65; i < 91; i++ ) 141 /* 65 A */ TOKEN_CHAR_WORD | TOKEN_WORD, 142 /* 66 B */ TOKEN_CHAR_WORD | TOKEN_WORD, 143 /* 67 C */ TOKEN_CHAR_WORD | TOKEN_WORD, 144 /* 68 D */ TOKEN_CHAR_WORD | TOKEN_WORD, 145 /* 69 E */ TOKEN_CHAR_WORD | TOKEN_WORD, 146 /* 70 F */ TOKEN_CHAR_WORD | TOKEN_WORD, 147 /* 71 G */ TOKEN_CHAR_WORD | TOKEN_WORD, 148 /* 72 H */ TOKEN_CHAR_WORD | TOKEN_WORD, 149 /* 73 I */ TOKEN_CHAR_WORD | TOKEN_WORD, 150 /* 74 J */ TOKEN_CHAR_WORD | TOKEN_WORD, 151 /* 75 K */ TOKEN_CHAR_WORD | TOKEN_WORD, 152 /* 76 L */ TOKEN_CHAR_WORD | TOKEN_WORD, 153 /* 77 M */ TOKEN_CHAR_WORD | TOKEN_WORD, 154 /* 78 N */ TOKEN_CHAR_WORD | TOKEN_WORD, 155 /* 79 O */ TOKEN_CHAR_WORD | TOKEN_WORD, 156 /* 80 P */ TOKEN_CHAR_WORD | TOKEN_WORD, 157 /* 81 Q */ TOKEN_CHAR_WORD | TOKEN_WORD, 158 /* 82 R */ TOKEN_CHAR_WORD | TOKEN_WORD, 159 /* 83 S */ TOKEN_CHAR_WORD | TOKEN_WORD, 160 /* 84 T */ TOKEN_CHAR_WORD | TOKEN_WORD, 161 /* 85 U */ TOKEN_CHAR_WORD | TOKEN_WORD, 162 /* 86 V */ TOKEN_CHAR_WORD | TOKEN_WORD, 163 /* 87 W */ TOKEN_CHAR_WORD | TOKEN_WORD, 164 /* 88 X */ TOKEN_CHAR_WORD | TOKEN_WORD, 165 /* 89 Y */ TOKEN_CHAR_WORD | TOKEN_WORD, 166 /* 90 Z */ TOKEN_CHAR_WORD | TOKEN_WORD, 167 /* 91 [ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 168 /* 92 \ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 169 /* 93 ] */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 170 /* 94 ^ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, 171 /* 95 _ */ TOKEN_CHAR_WORD | TOKEN_WORD, 172 /* 96 ` */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 173 //for ( i = 97; i < 123; i++ ) 174 /* 97 a */ TOKEN_CHAR_WORD | TOKEN_WORD, 175 /* 98 b */ TOKEN_CHAR_WORD | TOKEN_WORD, 176 /* 99 c */ TOKEN_CHAR_WORD | TOKEN_WORD, 177 /* 100 d */ TOKEN_CHAR_WORD | TOKEN_WORD, 178 /* 101 e */ TOKEN_CHAR_WORD | TOKEN_WORD, 179 /* 102 f */ TOKEN_CHAR_WORD | TOKEN_WORD, 180 /* 103 g */ TOKEN_CHAR_WORD | TOKEN_WORD, 181 /* 104 h */ TOKEN_CHAR_WORD | TOKEN_WORD, 182 /* 105 i */ TOKEN_CHAR_WORD | TOKEN_WORD, 183 /* 106 j */ TOKEN_CHAR_WORD | TOKEN_WORD, 184 /* 107 k */ TOKEN_CHAR_WORD | TOKEN_WORD, 185 /* 108 l */ TOKEN_CHAR_WORD | TOKEN_WORD, 186 /* 109 m */ TOKEN_CHAR_WORD | TOKEN_WORD, 187 /* 110 n */ TOKEN_CHAR_WORD | TOKEN_WORD, 188 /* 111 o */ TOKEN_CHAR_WORD | TOKEN_WORD, 189 /* 112 p */ TOKEN_CHAR_WORD | TOKEN_WORD, 190 /* 113 q */ TOKEN_CHAR_WORD | TOKEN_WORD, 191 /* 114 r */ TOKEN_CHAR_WORD | TOKEN_WORD, 192 /* 115 s */ TOKEN_CHAR_WORD | TOKEN_WORD, 193 /* 116 t */ TOKEN_CHAR_WORD | TOKEN_WORD, 194 /* 117 u */ TOKEN_CHAR_WORD | TOKEN_WORD, 195 /* 118 v */ TOKEN_CHAR_WORD | TOKEN_WORD, 196 /* 119 w */ TOKEN_CHAR_WORD | TOKEN_WORD, 197 /* 120 x */ TOKEN_CHAR_WORD | TOKEN_WORD, 198 /* 121 y */ TOKEN_CHAR_WORD | TOKEN_WORD, 199 /* 122 z */ TOKEN_CHAR_WORD | TOKEN_WORD, 200 /* 123 { */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 201 /* 124 | */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 202 /* 125 } */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 203 /* 126 ~ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED) 204 /* 127 */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP // (TOKEN_ILLEGAL // UNUSED) 205 }; 206 207 208 const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] = 209 { 210 /* \0 */ KParseTokens::ASC_OTHER, 211 KParseTokens::ASC_CONTROL, 212 KParseTokens::ASC_CONTROL, 213 KParseTokens::ASC_CONTROL, 214 KParseTokens::ASC_CONTROL, 215 KParseTokens::ASC_CONTROL, 216 KParseTokens::ASC_CONTROL, 217 KParseTokens::ASC_CONTROL, 218 KParseTokens::ASC_CONTROL, 219 /* 9 \t */ KParseTokens::ASC_CONTROL, 220 KParseTokens::ASC_CONTROL, 221 /* 11 \v */ KParseTokens::ASC_CONTROL, 222 KParseTokens::ASC_CONTROL, 223 KParseTokens::ASC_CONTROL, 224 KParseTokens::ASC_CONTROL, 225 KParseTokens::ASC_CONTROL, 226 KParseTokens::ASC_CONTROL, 227 KParseTokens::ASC_CONTROL, 228 KParseTokens::ASC_CONTROL, 229 KParseTokens::ASC_CONTROL, 230 KParseTokens::ASC_CONTROL, 231 KParseTokens::ASC_CONTROL, 232 KParseTokens::ASC_CONTROL, 233 KParseTokens::ASC_CONTROL, 234 KParseTokens::ASC_CONTROL, 235 KParseTokens::ASC_CONTROL, 236 KParseTokens::ASC_CONTROL, 237 KParseTokens::ASC_CONTROL, 238 KParseTokens::ASC_CONTROL, 239 KParseTokens::ASC_CONTROL, 240 KParseTokens::ASC_CONTROL, 241 KParseTokens::ASC_CONTROL, 242 /* 32 */ KParseTokens::ASC_OTHER, 243 /* 33 ! */ KParseTokens::ASC_OTHER, 244 /* 34 " */ KParseTokens::ASC_OTHER, 245 /* 35 # */ KParseTokens::ASC_OTHER, 246 /* 36 $ */ KParseTokens::ASC_DOLLAR, 247 /* 37 % */ KParseTokens::ASC_OTHER, 248 /* 38 & */ KParseTokens::ASC_OTHER, 249 /* 39 ' */ KParseTokens::ASC_OTHER, 250 /* 40 ( */ KParseTokens::ASC_OTHER, 251 /* 41 ) */ KParseTokens::ASC_OTHER, 252 /* 42 * */ KParseTokens::ASC_OTHER, 253 /* 43 + */ KParseTokens::ASC_OTHER, 254 /* 44 , */ KParseTokens::ASC_OTHER, 255 /* 45 - */ KParseTokens::ASC_OTHER, 256 /* 46 . */ KParseTokens::ASC_DOT, 257 /* 47 / */ KParseTokens::ASC_OTHER, 258 //for ( i = 48; i < 58; i++ ) 259 /* 48 0 */ KParseTokens::ASC_DIGIT, 260 /* 49 1 */ KParseTokens::ASC_DIGIT, 261 /* 50 2 */ KParseTokens::ASC_DIGIT, 262 /* 51 3 */ KParseTokens::ASC_DIGIT, 263 /* 52 4 */ KParseTokens::ASC_DIGIT, 264 /* 53 5 */ KParseTokens::ASC_DIGIT, 265 /* 54 6 */ KParseTokens::ASC_DIGIT, 266 /* 55 7 */ KParseTokens::ASC_DIGIT, 267 /* 56 8 */ KParseTokens::ASC_DIGIT, 268 /* 57 9 */ KParseTokens::ASC_DIGIT, 269 /* 58 : */ KParseTokens::ASC_COLON, 270 /* 59 ; */ KParseTokens::ASC_OTHER, 271 /* 60 < */ KParseTokens::ASC_OTHER, 272 /* 61 = */ KParseTokens::ASC_OTHER, 273 /* 62 > */ KParseTokens::ASC_OTHER, 274 /* 63 ? */ KParseTokens::ASC_OTHER, 275 /* 64 @ */ KParseTokens::ASC_OTHER, 276 //for ( i = 65; i < 91; i++ ) 277 /* 65 A */ KParseTokens::ASC_UPALPHA, 278 /* 66 B */ KParseTokens::ASC_UPALPHA, 279 /* 67 C */ KParseTokens::ASC_UPALPHA, 280 /* 68 D */ KParseTokens::ASC_UPALPHA, 281 /* 69 E */ KParseTokens::ASC_UPALPHA, 282 /* 70 F */ KParseTokens::ASC_UPALPHA, 283 /* 71 G */ KParseTokens::ASC_UPALPHA, 284 /* 72 H */ KParseTokens::ASC_UPALPHA, 285 /* 73 I */ KParseTokens::ASC_UPALPHA, 286 /* 74 J */ KParseTokens::ASC_UPALPHA, 287 /* 75 K */ KParseTokens::ASC_UPALPHA, 288 /* 76 L */ KParseTokens::ASC_UPALPHA, 289 /* 77 M */ KParseTokens::ASC_UPALPHA, 290 /* 78 N */ KParseTokens::ASC_UPALPHA, 291 /* 79 O */ KParseTokens::ASC_UPALPHA, 292 /* 80 P */ KParseTokens::ASC_UPALPHA, 293 /* 81 Q */ KParseTokens::ASC_UPALPHA, 294 /* 82 R */ KParseTokens::ASC_UPALPHA, 295 /* 83 S */ KParseTokens::ASC_UPALPHA, 296 /* 84 T */ KParseTokens::ASC_UPALPHA, 297 /* 85 U */ KParseTokens::ASC_UPALPHA, 298 /* 86 V */ KParseTokens::ASC_UPALPHA, 299 /* 87 W */ KParseTokens::ASC_UPALPHA, 300 /* 88 X */ KParseTokens::ASC_UPALPHA, 301 /* 89 Y */ KParseTokens::ASC_UPALPHA, 302 /* 90 Z */ KParseTokens::ASC_UPALPHA, 303 /* 91 [ */ KParseTokens::ASC_OTHER, 304 /* 92 \ */ KParseTokens::ASC_OTHER, 305 /* 93 ] */ KParseTokens::ASC_OTHER, 306 /* 94 ^ */ KParseTokens::ASC_OTHER, 307 /* 95 _ */ KParseTokens::ASC_UNDERSCORE, 308 /* 96 ` */ KParseTokens::ASC_OTHER, 309 //for ( i = 97; i < 123; i++ ) 310 /* 97 a */ KParseTokens::ASC_LOALPHA, 311 /* 98 b */ KParseTokens::ASC_LOALPHA, 312 /* 99 c */ KParseTokens::ASC_LOALPHA, 313 /* 100 d */ KParseTokens::ASC_LOALPHA, 314 /* 101 e */ KParseTokens::ASC_LOALPHA, 315 /* 102 f */ KParseTokens::ASC_LOALPHA, 316 /* 103 g */ KParseTokens::ASC_LOALPHA, 317 /* 104 h */ KParseTokens::ASC_LOALPHA, 318 /* 105 i */ KParseTokens::ASC_LOALPHA, 319 /* 106 j */ KParseTokens::ASC_LOALPHA, 320 /* 107 k */ KParseTokens::ASC_LOALPHA, 321 /* 108 l */ KParseTokens::ASC_LOALPHA, 322 /* 109 m */ KParseTokens::ASC_LOALPHA, 323 /* 110 n */ KParseTokens::ASC_LOALPHA, 324 /* 111 o */ KParseTokens::ASC_LOALPHA, 325 /* 112 p */ KParseTokens::ASC_LOALPHA, 326 /* 113 q */ KParseTokens::ASC_LOALPHA, 327 /* 114 r */ KParseTokens::ASC_LOALPHA, 328 /* 115 s */ KParseTokens::ASC_LOALPHA, 329 /* 116 t */ KParseTokens::ASC_LOALPHA, 330 /* 117 u */ KParseTokens::ASC_LOALPHA, 331 /* 118 v */ KParseTokens::ASC_LOALPHA, 332 /* 119 w */ KParseTokens::ASC_LOALPHA, 333 /* 120 x */ KParseTokens::ASC_LOALPHA, 334 /* 121 y */ KParseTokens::ASC_LOALPHA, 335 /* 122 z */ KParseTokens::ASC_LOALPHA, 336 /* 123 { */ KParseTokens::ASC_OTHER, 337 /* 124 | */ KParseTokens::ASC_OTHER, 338 /* 125 } */ KParseTokens::ASC_OTHER, 339 /* 126 ~ */ KParseTokens::ASC_OTHER, 340 /* 127 */ KParseTokens::ASC_OTHER 341 }; 342 343 344 // static 345 const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c ) 346 { 347 if ( !pStr ) 348 return NULL; 349 while ( *pStr ) 350 { 351 if ( *pStr == c ) 352 return pStr; 353 pStr++; 354 } 355 return NULL; 356 } 357 358 359 sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos ) 360 { 361 sal_Unicode c = aStr[nPos]; 362 if ( c < nDefCnt ) 363 return pParseTokensType[ sal_uInt8(c) ]; 364 else 365 { 366 367 //! all KParseTokens::UNI_... must be matched 368 switch ( u_charType( (sal_uInt32) c ) ) 369 { 370 case U_UPPERCASE_LETTER : 371 return KParseTokens::UNI_UPALPHA; 372 case U_LOWERCASE_LETTER : 373 return KParseTokens::UNI_LOALPHA; 374 case U_TITLECASE_LETTER : 375 return KParseTokens::UNI_TITLE_ALPHA; 376 case U_MODIFIER_LETTER : 377 return KParseTokens::UNI_MODIFIER_LETTER; 378 case U_OTHER_LETTER : 379 // Non_Spacing_Mark could not be as leading character 380 if (nPos == 0) break; 381 // fall through, treat it as Other_Letter. 382 case U_NON_SPACING_MARK : 383 return KParseTokens::UNI_OTHER_LETTER; 384 case U_DECIMAL_DIGIT_NUMBER : 385 return KParseTokens::UNI_DIGIT; 386 case U_LETTER_NUMBER : 387 return KParseTokens::UNI_LETTER_NUMBER; 388 case U_OTHER_NUMBER : 389 return KParseTokens::UNI_OTHER_NUMBER; 390 } 391 392 return KParseTokens::UNI_OTHER; 393 } 394 } 395 396 sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale ) 397 { 398 sal_Bool bChanged = (aParserLocale.Language != rLocale.Language 399 || aParserLocale.Country != rLocale.Country 400 || aParserLocale.Variant != rLocale.Variant); 401 if ( bChanged ) 402 { 403 aParserLocale.Language = rLocale.Language; 404 aParserLocale.Country = rLocale.Country; 405 aParserLocale.Variant = rLocale.Variant; 406 } 407 if ( !xLocaleData.is() && xMSF.is() ) 408 { 409 Reference < 410 XInterface > xI = 411 xMSF->createInstance( OUString( 412 RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) ); 413 if ( xI.is() ) 414 { 415 Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) ); 416 x >>= xLocaleData; 417 } 418 } 419 return bChanged; 420 } 421 422 423 void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType, 424 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType, 425 const OUString& userDefinedCharactersCont ) 426 { 427 bool bIntlEqual = (rLocale.Language == aParserLocale.Language && 428 rLocale.Country == aParserLocale.Country && 429 rLocale.Variant == aParserLocale.Variant); 430 if ( !pTable || !bIntlEqual || 431 startCharTokenType != nStartTypes || 432 contCharTokenType != nContTypes || 433 userDefinedCharactersStart != aStartChars || 434 userDefinedCharactersCont != aContChars ) 435 initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart, 436 contCharTokenType, userDefinedCharactersCont ); 437 } 438 439 440 void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType, 441 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType, 442 const OUString& userDefinedCharactersCont ) 443 { 444 // (Re)Init 445 setupInternational( rLocale ); 446 // Memory of pTable is reused. 447 if ( !pTable ) 448 pTable = new UPT_FLAG_TYPE[nDefCnt]; 449 memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt ); 450 // Start and cont tables only need reallocation if different length. 451 if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() ) 452 { 453 delete [] pStart; 454 pStart = NULL; 455 } 456 if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() ) 457 { 458 delete [] pCont; 459 pCont = NULL; 460 } 461 nStartTypes = startCharTokenType; 462 nContTypes = contCharTokenType; 463 aStartChars = userDefinedCharactersStart; 464 aContChars = userDefinedCharactersCont; 465 466 // specials 467 if( xLocaleData.is() ) 468 { 469 LocaleDataItem aItem = 470 xLocaleData->getLocaleItem( aParserLocale ); 471 //!TODO: theoretically separators may be a string, adjustment would have to be 472 //! done here and in parsing and in ::rtl::math::stringToDouble() 473 cGroupSep = aItem.thousandSeparator.getStr()[0]; 474 cDecimalSep = aItem.decimalSeparator.getStr()[0]; 475 } 476 477 if ( cGroupSep < nDefCnt ) 478 pTable[cGroupSep] |= TOKEN_VALUE; 479 if ( cDecimalSep < nDefCnt ) 480 pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE; 481 482 // Modify characters according to KParseTokens definitions. 483 { 484 using namespace KParseTokens; 485 sal_uInt8 i; 486 487 if ( !(nStartTypes & ASC_UPALPHA) ) 488 for ( i = 65; i < 91; i++ ) 489 pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character 490 if ( !(nContTypes & ASC_UPALPHA) ) 491 for ( i = 65; i < 91; i++ ) 492 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character 493 494 if ( !(nStartTypes & ASC_LOALPHA) ) 495 for ( i = 97; i < 123; i++ ) 496 pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character 497 if ( !(nContTypes & ASC_LOALPHA) ) 498 for ( i = 97; i < 123; i++ ) 499 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character 500 501 if ( nStartTypes & ASC_DIGIT ) 502 for ( i = 48; i < 58; i++ ) 503 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character 504 if ( !(nContTypes & ASC_DIGIT) ) 505 for ( i = 48; i < 58; i++ ) 506 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character 507 508 if ( !(nStartTypes & ASC_UNDERSCORE) ) 509 pTable[95] &= ~TOKEN_CHAR_WORD; // not allowed as start character 510 if ( !(nContTypes & ASC_UNDERSCORE) ) 511 pTable[95] &= ~TOKEN_WORD; // not allowed as cont character 512 513 if ( nStartTypes & ASC_DOLLAR ) 514 pTable[36] |= TOKEN_CHAR_WORD; // allowed as start character 515 if ( nContTypes & ASC_DOLLAR ) 516 pTable[36] |= TOKEN_WORD; // allowed as cont character 517 518 if ( nStartTypes & ASC_DOT ) 519 pTable[46] |= TOKEN_CHAR_WORD; // allowed as start character 520 if ( nContTypes & ASC_DOT ) 521 pTable[46] |= TOKEN_WORD; // allowed as cont character 522 523 if ( nStartTypes & ASC_COLON ) 524 pTable[58] |= TOKEN_CHAR_WORD; // allowed as start character 525 if ( nContTypes & ASC_COLON ) 526 pTable[58] |= TOKEN_WORD; // allowed as cont character 527 528 if ( nStartTypes & ASC_CONTROL ) 529 for ( i = 1; i < 32; i++ ) 530 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character 531 if ( nContTypes & ASC_CONTROL ) 532 for ( i = 1; i < 32; i++ ) 533 pTable[i] |= TOKEN_WORD; // allowed as cont character 534 535 if ( nStartTypes & ASC_ANY_BUT_CONTROL ) 536 for ( i = 32; i < nDefCnt; i++ ) 537 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character 538 if ( nContTypes & ASC_ANY_BUT_CONTROL ) 539 for ( i = 32; i < nDefCnt; i++ ) 540 pTable[i] |= TOKEN_WORD; // allowed as cont character 541 542 } 543 544 // Merge in (positively override with) user defined characters. 545 // StartChars 546 sal_Int32 nLen = aStartChars.getLength(); 547 if ( nLen ) 548 { 549 if ( !pStart ) 550 pStart = new UPT_FLAG_TYPE[ nLen ]; 551 const sal_Unicode* p = aStartChars.getStr(); 552 for ( sal_Int32 j=0; j<nLen; j++, p++ ) 553 { 554 pStart[j] = TOKEN_CHAR_WORD; 555 if ( *p < nDefCnt ) 556 pTable[*p] |= TOKEN_CHAR_WORD; 557 } 558 } 559 // ContChars 560 nLen = aContChars.getLength(); 561 if ( nLen ) 562 { 563 if ( !pCont ) 564 pCont = new UPT_FLAG_TYPE[ nLen ]; 565 const sal_Unicode* p = aContChars.getStr(); 566 for ( sal_Int32 j=0; j<nLen; j++ ) 567 { 568 pCont[j] = TOKEN_WORD; 569 if ( *p < nDefCnt ) 570 pTable[*p] |= TOKEN_WORD; 571 } 572 } 573 } 574 575 576 void cclass_Unicode::destroyParserTable() 577 { 578 if ( pCont ) 579 delete [] pCont; 580 if ( pStart ) 581 delete [] pStart; 582 if ( pTable ) 583 delete [] pTable; 584 } 585 586 587 UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos ) 588 { 589 UPT_FLAG_TYPE nMask; 590 sal_Unicode c = aStr[nPos]; 591 if ( c < nDefCnt ) 592 nMask = pTable[ sal_uInt8(c) ]; 593 else 594 nMask = getFlagsExtended( aStr, nPos ); 595 switch ( eState ) 596 { 597 case ssGetChar : 598 case ssRewindFromValue : 599 case ssIgnoreLeadingInRewind : 600 case ssGetWordFirstChar : 601 if ( !(nMask & TOKEN_CHAR_WORD) ) 602 { 603 nMask |= getStartCharsFlags( c ); 604 if ( nMask & TOKEN_CHAR_WORD ) 605 nMask &= ~TOKEN_EXCLUDED; 606 } 607 break; 608 case ssGetValue : 609 case ssGetWord : 610 if ( !(nMask & TOKEN_WORD) ) 611 { 612 nMask |= getContCharsFlags( c ); 613 if ( nMask & TOKEN_WORD ) 614 nMask &= ~TOKEN_EXCLUDED; 615 } 616 break; 617 default: 618 ; // other cases aren't needed, no compiler warning 619 } 620 return nMask; 621 } 622 623 624 UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos ) 625 { 626 sal_Unicode c = aStr[nPos]; 627 if ( c == cGroupSep ) 628 return TOKEN_VALUE; 629 else if ( c == cDecimalSep ) 630 return TOKEN_CHAR_VALUE | TOKEN_VALUE; 631 using namespace i18n; 632 bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar || 633 eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind); 634 sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes); 635 636 //! all KParseTokens::UNI_... must be matched 637 switch ( u_charType( (sal_uInt32) c ) ) 638 { 639 case U_UPPERCASE_LETTER : 640 return (nTypes & KParseTokens::UNI_UPALPHA) ? 641 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 642 TOKEN_ILLEGAL; 643 case U_LOWERCASE_LETTER : 644 return (nTypes & KParseTokens::UNI_LOALPHA) ? 645 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 646 TOKEN_ILLEGAL; 647 case U_TITLECASE_LETTER : 648 return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ? 649 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 650 TOKEN_ILLEGAL; 651 case U_MODIFIER_LETTER : 652 return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ? 653 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 654 TOKEN_ILLEGAL; 655 case U_NON_SPACING_MARK : 656 case U_COMBINING_SPACING_MARK : 657 // Non_Spacing_Mark can't be a leading character, 658 // nor can a spacing combining mark. 659 if (bStart) 660 return TOKEN_ILLEGAL; 661 // fall through, treat it as Other_Letter. 662 case U_OTHER_LETTER : 663 return (nTypes & KParseTokens::UNI_OTHER_LETTER) ? 664 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 665 TOKEN_ILLEGAL; 666 case U_DECIMAL_DIGIT_NUMBER : 667 return ((nTypes & KParseTokens::UNI_DIGIT) ? 668 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 669 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; 670 case U_LETTER_NUMBER : 671 return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ? 672 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 673 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; 674 case U_OTHER_NUMBER : 675 return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ? 676 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) : 677 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS; 678 case U_SPACE_SEPARATOR : 679 return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ? 680 TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) )); 681 } 682 683 return TOKEN_ILLEGAL; 684 } 685 686 687 UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c ) 688 { 689 if ( pStart ) 690 { 691 const sal_Unicode* pStr = aStartChars.getStr(); 692 const sal_Unicode* p = StrChr( pStr, c ); 693 if ( p ) 694 return pStart[ p - pStr ]; 695 } 696 return TOKEN_ILLEGAL; 697 } 698 699 700 UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c ) 701 { 702 if ( pCont ) 703 { 704 const sal_Unicode* pStr = aContChars.getStr(); 705 const sal_Unicode* p = StrChr( pStr, c ); 706 if ( p ) 707 return pCont[ p - pStr ]; 708 } 709 return TOKEN_ILLEGAL; 710 } 711 712 713 void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType ) 714 { 715 using namespace i18n; 716 const sal_Unicode* const pTextStart = rText.getStr() + nPos; 717 eState = ssGetChar; 718 719 //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue! 720 const sal_Unicode* pSym = pTextStart; 721 const sal_Unicode* pSrc = pSym; 722 OUString aSymbol; 723 sal_Unicode c = *pSrc; 724 sal_Unicode cLast = 0; 725 int nDecSeps = 0; 726 bool bQuote = false; 727 bool bMightBeWord = true; 728 bool bMightBeWordLast = true; 729 //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue! 730 731 while ( (c != 0) && (eState != ssStop) ) 732 { 733 UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart ); 734 if ( nMask & TOKEN_EXCLUDED ) 735 eState = ssBounce; 736 if ( bMightBeWord ) 737 { // only relevant for ssGetValue fall back 738 if ( eState == ssGetChar || eState == ssRewindFromValue || 739 eState == ssIgnoreLeadingInRewind ) 740 bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0); 741 else 742 bMightBeWord = ((nMask & TOKEN_WORD) != 0); 743 } 744 sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart ); 745 pSrc++; 746 switch (eState) 747 { 748 case ssGetChar : 749 case ssRewindFromValue : 750 case ssIgnoreLeadingInRewind : 751 { 752 if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue 753 && eState != ssIgnoreLeadingInRewind ) 754 { //! must be first, may fall back to ssGetWord via bMightBeWord 755 eState = ssGetValue; 756 if ( nMask & TOKEN_VALUE_DIGIT ) 757 { 758 if ( 128 <= c ) 759 r.TokenType = KParseType::UNI_NUMBER; 760 else 761 r.TokenType = KParseType::ASC_NUMBER; 762 } 763 else if ( c == cDecimalSep ) 764 { 765 if ( *pSrc ) 766 ++nDecSeps; 767 else 768 eState = ssRewindFromValue; 769 // retry for ONE_SINGLE_CHAR or others 770 } 771 } 772 else if ( nMask & TOKEN_CHAR_WORD ) 773 { 774 eState = ssGetWord; 775 r.TokenType = KParseType::IDENTNAME; 776 } 777 else if ( nMask & TOKEN_NAME_SEP ) 778 { 779 eState = ssGetWordFirstChar; 780 bQuote = true; 781 pSym++; 782 nParseTokensType = 0; // will be taken of first real character 783 r.TokenType = KParseType::SINGLE_QUOTE_NAME; 784 } 785 else if ( nMask & TOKEN_CHAR_STRING ) 786 { 787 eState = ssGetString; 788 pSym++; 789 nParseTokensType = 0; // will be taken of first real character 790 r.TokenType = KParseType::DOUBLE_QUOTE_STRING; 791 } 792 else if ( nMask & TOKEN_CHAR_DONTCARE ) 793 { 794 if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS ) 795 { 796 if (eState == ssRewindFromValue) 797 eState = ssIgnoreLeadingInRewind; 798 r.LeadingWhiteSpace++; 799 pSym++; 800 nParseTokensType = 0; // wait until real character 801 bMightBeWord = true; 802 } 803 else 804 eState = ssBounce; 805 } 806 else if ( nMask & TOKEN_CHAR_BOOL ) 807 { 808 eState = ssGetBool; 809 r.TokenType = KParseType::BOOLEAN; 810 } 811 else if ( nMask & TOKEN_CHAR ) 812 { //! must be last 813 eState = ssStop; 814 r.TokenType = KParseType::ONE_SINGLE_CHAR; 815 } 816 else 817 eState = ssBounce; // not known 818 } 819 break; 820 case ssGetValue : 821 { 822 if ( nMask & TOKEN_VALUE_DIGIT ) 823 { 824 if ( 128 <= c ) 825 r.TokenType = KParseType::UNI_NUMBER; 826 else if ( r.TokenType != KParseType::UNI_NUMBER ) 827 r.TokenType = KParseType::ASC_NUMBER; 828 } 829 if ( nMask & TOKEN_VALUE ) 830 { 831 if ( c == cDecimalSep && ++nDecSeps > 1 ) 832 { 833 if ( pSrc - pTextStart == 2 ) 834 eState = ssRewindFromValue; 835 // consecutive separators 836 else 837 eState = ssStopBack; 838 } 839 // else keep it going 840 } 841 else if ( c == 'E' || c == 'e' ) 842 { 843 UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart ); 844 if ( nNext & TOKEN_VALUE_EXP ) 845 ; // keep it going 846 else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) ) 847 { // might be a numerical name (1.2efg) 848 eState = ssGetWord; 849 r.TokenType = KParseType::IDENTNAME; 850 } 851 else 852 eState = ssStopBack; 853 } 854 else if ( nMask & TOKEN_VALUE_SIGN ) 855 { 856 if ( (cLast == 'E') || (cLast == 'e') ) 857 { 858 UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart ); 859 if ( nNext & TOKEN_VALUE_EXP_VALUE ) 860 ; // keep it going 861 else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) ) 862 { // might be a numerical name (1.2e+fg) 863 eState = ssGetWord; 864 r.TokenType = KParseType::IDENTNAME; 865 } 866 else 867 eState = ssStopBack; 868 } 869 else if ( bMightBeWord ) 870 { // might be a numerical name (1.2+fg) 871 eState = ssGetWord; 872 r.TokenType = KParseType::IDENTNAME; 873 } 874 else 875 eState = ssStopBack; 876 } 877 else if ( bMightBeWord && (nMask & TOKEN_WORD) ) 878 { // might be a numerical name (1995.A1) 879 eState = ssGetWord; 880 r.TokenType = KParseType::IDENTNAME; 881 } 882 else 883 eState = ssStopBack; 884 } 885 break; 886 case ssGetWordFirstChar : 887 eState = ssGetWord; 888 // fall thru 889 case ssGetWord : 890 { 891 if ( nMask & TOKEN_WORD ) 892 ; // keep it going 893 else if ( nMask & TOKEN_NAME_SEP ) 894 { 895 if ( bQuote ) 896 { 897 if ( cLast == '\\' ) 898 { // escaped 899 aSymbol += OUString( pSym, pSrc - pSym - 2 ); 900 aSymbol += OUString( &c, 1); 901 } 902 else 903 { 904 eState = ssStop; 905 aSymbol += OUString( pSym, pSrc - pSym - 1 ); 906 } 907 pSym = pSrc; 908 } 909 else 910 eState = ssStopBack; 911 } 912 else if ( bQuote ) 913 ; // keep it going 914 else 915 eState = ssStopBack; 916 } 917 break; 918 case ssGetString : 919 { 920 if ( nMask & TOKEN_STRING_SEP ) 921 { 922 if ( cLast == '\\' ) 923 { // escaped 924 aSymbol += OUString( pSym, pSrc - pSym - 2 ); 925 aSymbol += OUString( &c, 1); 926 } 927 else if ( c == *pSrc && 928 !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) ) 929 { // "" => literal " escaped 930 aSymbol += OUString( pSym, pSrc - pSym ); 931 pSrc++; 932 } 933 else 934 { 935 eState = ssStop; 936 aSymbol += OUString( pSym, pSrc - pSym - 1 ); 937 } 938 pSym = pSrc; 939 } 940 } 941 break; 942 case ssGetBool : 943 { 944 if ( (nMask & TOKEN_BOOL) ) 945 eState = ssStop; // maximum 2: <, >, <>, <=, >= 946 else 947 eState = ssStopBack; 948 } 949 break; 950 case ssStopBack : 951 case ssBounce : 952 case ssStop : 953 ; // nothing, no compiler warning 954 break; 955 } 956 if ( eState == ssRewindFromValue ) 957 { 958 r = ParseResult(); 959 pSym = pTextStart; 960 pSrc = pSym; 961 aSymbol = OUString(); 962 c = *pSrc; 963 cLast = 0; 964 nDecSeps = 0; 965 bQuote = false; 966 bMightBeWord = true; 967 bMightBeWordLast = true; 968 } 969 else 970 { 971 if ( !(r.TokenType & nTokenType) ) 972 { 973 if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER)) 974 && (nTokenType & KParseType::IDENTNAME) && bMightBeWord ) 975 ; // keep a number that might be a word 976 else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) ) 977 ; // keep ignored white space 978 else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) ) 979 ; // keep uncertain value 980 else 981 eState = ssBounce; 982 } 983 if ( eState == ssBounce ) 984 { 985 r.TokenType = 0; 986 eState = ssStopBack; 987 } 988 if ( eState == ssStopBack ) 989 { // put back 990 pSrc--; 991 bMightBeWord = bMightBeWordLast; 992 eState = ssStop; 993 } 994 if ( eState != ssStop ) 995 { 996 if ( !r.StartFlags ) 997 r.StartFlags |= nParseTokensType; 998 else 999 r.ContFlags |= nParseTokensType; 1000 } 1001 bMightBeWordLast = bMightBeWord; 1002 cLast = c; 1003 c = *pSrc; 1004 } 1005 } 1006 // r.CharLen is the length in characters (not code points) of the parsed 1007 // token not including any leading white space, change this calculation if 1008 // multi-code-point Unicode characters are to be supported. 1009 r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace; 1010 r.EndPos = nPos + (pSrc - pTextStart); 1011 if ( r.TokenType & KParseType::ASC_NUMBER ) 1012 { 1013 r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace, 1014 pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL ); 1015 if ( bMightBeWord ) 1016 r.TokenType |= KParseType::IDENTNAME; 1017 } 1018 else if ( r.TokenType & KParseType::UNI_NUMBER ) 1019 { 1020 if ( !xNatNumSup.is() ) 1021 { 1022 #define NATIVENUMBERSUPPLIER_SERVICENAME "com.sun.star.i18n.NativeNumberSupplier" 1023 if ( xMSF.is() ) 1024 { 1025 xNatNumSup = Reference< XNativeNumberSupplier > ( 1026 xMSF->createInstance( OUString( 1027 RTL_CONSTASCII_USTRINGPARAM( 1028 NATIVENUMBERSUPPLIER_SERVICENAME ) ) ), 1029 UNO_QUERY ); 1030 } 1031 if ( !xNatNumSup.is() ) 1032 { 1033 throw RuntimeException( OUString( 1034 #ifdef DBG_UTIL 1035 RTL_CONSTASCII_USTRINGPARAM( 1036 "cclass_Unicode::parseText: can't instanciate " 1037 NATIVENUMBERSUPPLIER_SERVICENAME ) 1038 #endif 1039 ), *this ); 1040 } 1041 #undef NATIVENUMBERSUPPLIER_SERVICENAME 1042 } 1043 OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos + 1044 r.LeadingWhiteSpace ); 1045 // transliterate to ASCII 1046 aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale, 1047 NativeNumberMode::NATNUM0 ); 1048 r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL ); 1049 if ( bMightBeWord ) 1050 r.TokenType |= KParseType::IDENTNAME; 1051 } 1052 else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) ) 1053 { 1054 if ( pSym < pSrc ) 1055 { //! open quote 1056 aSymbol += OUString( pSym, pSrc - pSym ); 1057 r.TokenType |= KParseType::MISSING_QUOTE; 1058 } 1059 r.DequotedNameOrString = aSymbol; 1060 } 1061 } 1062 1063 } } } } 1064