1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_svtools.hxx" 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #include <svtools/syntaxhighlight.hxx> 32*cdf0e10cSrcweir 33*cdf0e10cSrcweir #include <unotools/charclass.hxx> 34*cdf0e10cSrcweir #include <tools/debug.hxx> 35*cdf0e10cSrcweir 36*cdf0e10cSrcweir 37*cdf0e10cSrcweir // ########################################################################## 38*cdf0e10cSrcweir // ATTENTION: all these words needs to be in small caps 39*cdf0e10cSrcweir // ########################################################################## 40*cdf0e10cSrcweir static const char* strListBasicKeyWords[] = { 41*cdf0e10cSrcweir "access", 42*cdf0e10cSrcweir "alias", 43*cdf0e10cSrcweir "and", 44*cdf0e10cSrcweir "any", 45*cdf0e10cSrcweir "append", 46*cdf0e10cSrcweir "as", 47*cdf0e10cSrcweir "base", 48*cdf0e10cSrcweir "binary", 49*cdf0e10cSrcweir "boolean", 50*cdf0e10cSrcweir "byref", 51*cdf0e10cSrcweir "byte", 52*cdf0e10cSrcweir "byval", 53*cdf0e10cSrcweir "call", 54*cdf0e10cSrcweir "case", 55*cdf0e10cSrcweir "cdecl", 56*cdf0e10cSrcweir "classmodule", 57*cdf0e10cSrcweir "close", 58*cdf0e10cSrcweir "compare", 59*cdf0e10cSrcweir "compatible", 60*cdf0e10cSrcweir "const", 61*cdf0e10cSrcweir "currency", 62*cdf0e10cSrcweir "date", 63*cdf0e10cSrcweir "declare", 64*cdf0e10cSrcweir "defbool", 65*cdf0e10cSrcweir "defcur", 66*cdf0e10cSrcweir "defdate", 67*cdf0e10cSrcweir "defdbl", 68*cdf0e10cSrcweir "deferr", 69*cdf0e10cSrcweir "defint", 70*cdf0e10cSrcweir "deflng", 71*cdf0e10cSrcweir "defobj", 72*cdf0e10cSrcweir "defsng", 73*cdf0e10cSrcweir "defstr", 74*cdf0e10cSrcweir "defvar", 75*cdf0e10cSrcweir "dim", 76*cdf0e10cSrcweir "do", 77*cdf0e10cSrcweir "double", 78*cdf0e10cSrcweir "each", 79*cdf0e10cSrcweir "else", 80*cdf0e10cSrcweir "elseif", 81*cdf0e10cSrcweir "end", 82*cdf0e10cSrcweir "end enum", 83*cdf0e10cSrcweir "end function", 84*cdf0e10cSrcweir "end if", 85*cdf0e10cSrcweir "end select", 86*cdf0e10cSrcweir "end sub", 87*cdf0e10cSrcweir "end type", 88*cdf0e10cSrcweir "endif", 89*cdf0e10cSrcweir "enum", 90*cdf0e10cSrcweir "eqv", 91*cdf0e10cSrcweir "erase", 92*cdf0e10cSrcweir "error", 93*cdf0e10cSrcweir "exit", 94*cdf0e10cSrcweir "explicit", 95*cdf0e10cSrcweir "for", 96*cdf0e10cSrcweir "function", 97*cdf0e10cSrcweir "get", 98*cdf0e10cSrcweir "global", 99*cdf0e10cSrcweir "gosub", 100*cdf0e10cSrcweir "goto", 101*cdf0e10cSrcweir "if", 102*cdf0e10cSrcweir "imp", 103*cdf0e10cSrcweir "implements", 104*cdf0e10cSrcweir "in", 105*cdf0e10cSrcweir "input", 106*cdf0e10cSrcweir "integer", 107*cdf0e10cSrcweir "is", 108*cdf0e10cSrcweir "let", 109*cdf0e10cSrcweir "lib", 110*cdf0e10cSrcweir "like", 111*cdf0e10cSrcweir "line", 112*cdf0e10cSrcweir "line input", 113*cdf0e10cSrcweir "local", 114*cdf0e10cSrcweir "lock", 115*cdf0e10cSrcweir "long", 116*cdf0e10cSrcweir "loop", 117*cdf0e10cSrcweir "lprint", 118*cdf0e10cSrcweir "lset", 119*cdf0e10cSrcweir "mod", 120*cdf0e10cSrcweir "name", 121*cdf0e10cSrcweir "new", 122*cdf0e10cSrcweir "next", 123*cdf0e10cSrcweir "not", 124*cdf0e10cSrcweir "object", 125*cdf0e10cSrcweir "on", 126*cdf0e10cSrcweir "open", 127*cdf0e10cSrcweir "option", 128*cdf0e10cSrcweir "optional", 129*cdf0e10cSrcweir "or", 130*cdf0e10cSrcweir "output", 131*cdf0e10cSrcweir "preserve", 132*cdf0e10cSrcweir "print", 133*cdf0e10cSrcweir "private", 134*cdf0e10cSrcweir "property", 135*cdf0e10cSrcweir "public", 136*cdf0e10cSrcweir "random", 137*cdf0e10cSrcweir "read", 138*cdf0e10cSrcweir "redim", 139*cdf0e10cSrcweir "rem", 140*cdf0e10cSrcweir "resume", 141*cdf0e10cSrcweir "return", 142*cdf0e10cSrcweir "rset", 143*cdf0e10cSrcweir "select", 144*cdf0e10cSrcweir "set", 145*cdf0e10cSrcweir "shared", 146*cdf0e10cSrcweir "single", 147*cdf0e10cSrcweir "static", 148*cdf0e10cSrcweir "step", 149*cdf0e10cSrcweir "stop", 150*cdf0e10cSrcweir "string", 151*cdf0e10cSrcweir "sub", 152*cdf0e10cSrcweir "system", 153*cdf0e10cSrcweir "text", 154*cdf0e10cSrcweir "then", 155*cdf0e10cSrcweir "to", 156*cdf0e10cSrcweir "type", 157*cdf0e10cSrcweir "typeof", 158*cdf0e10cSrcweir "until", 159*cdf0e10cSrcweir "variant", 160*cdf0e10cSrcweir "wend", 161*cdf0e10cSrcweir "while", 162*cdf0e10cSrcweir "with", 163*cdf0e10cSrcweir "write", 164*cdf0e10cSrcweir "xor" 165*cdf0e10cSrcweir }; 166*cdf0e10cSrcweir 167*cdf0e10cSrcweir 168*cdf0e10cSrcweir static const char* strListSqlKeyWords[] = { 169*cdf0e10cSrcweir "all", 170*cdf0e10cSrcweir "and", 171*cdf0e10cSrcweir "any", 172*cdf0e10cSrcweir "as", 173*cdf0e10cSrcweir "asc", 174*cdf0e10cSrcweir "avg", 175*cdf0e10cSrcweir "between", 176*cdf0e10cSrcweir "by", 177*cdf0e10cSrcweir "cast", 178*cdf0e10cSrcweir "corresponding", 179*cdf0e10cSrcweir "count", 180*cdf0e10cSrcweir "create", 181*cdf0e10cSrcweir "cross", 182*cdf0e10cSrcweir "delete", 183*cdf0e10cSrcweir "desc", 184*cdf0e10cSrcweir "distinct", 185*cdf0e10cSrcweir "drop", 186*cdf0e10cSrcweir "escape", 187*cdf0e10cSrcweir "except", 188*cdf0e10cSrcweir "exists", 189*cdf0e10cSrcweir "false", 190*cdf0e10cSrcweir "from", 191*cdf0e10cSrcweir "full", 192*cdf0e10cSrcweir "global", 193*cdf0e10cSrcweir "group", 194*cdf0e10cSrcweir "having", 195*cdf0e10cSrcweir "in", 196*cdf0e10cSrcweir "inner", 197*cdf0e10cSrcweir "insert", 198*cdf0e10cSrcweir "intersect", 199*cdf0e10cSrcweir "into", 200*cdf0e10cSrcweir "is", 201*cdf0e10cSrcweir "join", 202*cdf0e10cSrcweir "left", 203*cdf0e10cSrcweir "like", 204*cdf0e10cSrcweir "local", 205*cdf0e10cSrcweir "match", 206*cdf0e10cSrcweir "max", 207*cdf0e10cSrcweir "min", 208*cdf0e10cSrcweir "natural", 209*cdf0e10cSrcweir "not", 210*cdf0e10cSrcweir "null", 211*cdf0e10cSrcweir "on", 212*cdf0e10cSrcweir "or", 213*cdf0e10cSrcweir "order", 214*cdf0e10cSrcweir "outer", 215*cdf0e10cSrcweir "right", 216*cdf0e10cSrcweir "select", 217*cdf0e10cSrcweir "set", 218*cdf0e10cSrcweir "some", 219*cdf0e10cSrcweir "sum", 220*cdf0e10cSrcweir "table", 221*cdf0e10cSrcweir "temporary", 222*cdf0e10cSrcweir "true", 223*cdf0e10cSrcweir "union", 224*cdf0e10cSrcweir "unique", 225*cdf0e10cSrcweir "unknown", 226*cdf0e10cSrcweir "update", 227*cdf0e10cSrcweir "using", 228*cdf0e10cSrcweir "values", 229*cdf0e10cSrcweir "where" 230*cdf0e10cSrcweir }; 231*cdf0e10cSrcweir 232*cdf0e10cSrcweir 233*cdf0e10cSrcweir extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 ) 234*cdf0e10cSrcweir { 235*cdf0e10cSrcweir return strcmp( (char *)arg1, *(char **)arg2 ); 236*cdf0e10cSrcweir } 237*cdf0e10cSrcweir 238*cdf0e10cSrcweir 239*cdf0e10cSrcweir class LetterTable 240*cdf0e10cSrcweir { 241*cdf0e10cSrcweir bool IsLetterTab[256]; 242*cdf0e10cSrcweir 243*cdf0e10cSrcweir public: 244*cdf0e10cSrcweir LetterTable( void ); 245*cdf0e10cSrcweir 246*cdf0e10cSrcweir inline bool isLetter( sal_Unicode c ) 247*cdf0e10cSrcweir { 248*cdf0e10cSrcweir bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c ); 249*cdf0e10cSrcweir return bRet; 250*cdf0e10cSrcweir } 251*cdf0e10cSrcweir bool isLetterUnicode( sal_Unicode c ); 252*cdf0e10cSrcweir }; 253*cdf0e10cSrcweir 254*cdf0e10cSrcweir class BasicSimpleCharClass 255*cdf0e10cSrcweir { 256*cdf0e10cSrcweir static LetterTable aLetterTable; 257*cdf0e10cSrcweir 258*cdf0e10cSrcweir public: 259*cdf0e10cSrcweir static sal_Bool isAlpha( sal_Unicode c, bool bCompatible ) 260*cdf0e10cSrcweir { 261*cdf0e10cSrcweir sal_Bool bRet = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 262*cdf0e10cSrcweir || (bCompatible && aLetterTable.isLetter( c )); 263*cdf0e10cSrcweir return bRet; 264*cdf0e10cSrcweir } 265*cdf0e10cSrcweir 266*cdf0e10cSrcweir static sal_Bool isDigit( sal_Unicode c ) 267*cdf0e10cSrcweir { 268*cdf0e10cSrcweir sal_Bool bRet = (c >= '0' && c <= '9'); 269*cdf0e10cSrcweir return bRet; 270*cdf0e10cSrcweir } 271*cdf0e10cSrcweir 272*cdf0e10cSrcweir static sal_Bool isAlphaNumeric( sal_Unicode c, bool bCompatible ) 273*cdf0e10cSrcweir { 274*cdf0e10cSrcweir sal_Bool bRet = isDigit( c ) || isAlpha( c, bCompatible ); 275*cdf0e10cSrcweir return bRet; 276*cdf0e10cSrcweir } 277*cdf0e10cSrcweir }; 278*cdf0e10cSrcweir 279*cdf0e10cSrcweir LetterTable BasicSimpleCharClass::aLetterTable; 280*cdf0e10cSrcweir 281*cdf0e10cSrcweir LetterTable::LetterTable( void ) 282*cdf0e10cSrcweir { 283*cdf0e10cSrcweir for( int i = 0 ; i < 256 ; ++i ) 284*cdf0e10cSrcweir IsLetterTab[i] = false; 285*cdf0e10cSrcweir 286*cdf0e10cSrcweir IsLetterTab[0xC0] = true; // ?, CAPITAL LETTER A WITH GRAVE ACCENT 287*cdf0e10cSrcweir IsLetterTab[0xC1] = true; // ?, CAPITAL LETTER A WITH ACUTE ACCENT 288*cdf0e10cSrcweir IsLetterTab[0xC2] = true; // ?, CAPITAL LETTER A WITH CIRCUMFLEX ACCENT 289*cdf0e10cSrcweir IsLetterTab[0xC3] = true; // ?, CAPITAL LETTER A WITH TILDE 290*cdf0e10cSrcweir IsLetterTab[0xC4] = true; // ?, CAPITAL LETTER A WITH DIAERESIS 291*cdf0e10cSrcweir IsLetterTab[0xC5] = true; // ?, CAPITAL LETTER A WITH RING ABOVE 292*cdf0e10cSrcweir IsLetterTab[0xC6] = true; // ?, CAPITAL LIGATURE AE 293*cdf0e10cSrcweir IsLetterTab[0xC7] = true; // ?, CAPITAL LETTER C WITH CEDILLA 294*cdf0e10cSrcweir IsLetterTab[0xC8] = true; // ?, CAPITAL LETTER E WITH GRAVE ACCENT 295*cdf0e10cSrcweir IsLetterTab[0xC9] = true; // ?, CAPITAL LETTER E WITH ACUTE ACCENT 296*cdf0e10cSrcweir IsLetterTab[0xCA] = true; // ?, CAPITAL LETTER E WITH CIRCUMFLEX ACCENT 297*cdf0e10cSrcweir IsLetterTab[0xCB] = true; // ?, CAPITAL LETTER E WITH DIAERESIS 298*cdf0e10cSrcweir IsLetterTab[0xCC] = true; // ?, CAPITAL LETTER I WITH GRAVE ACCENT 299*cdf0e10cSrcweir IsLetterTab[0xCD] = true; // ?, CAPITAL LETTER I WITH ACUTE ACCENT 300*cdf0e10cSrcweir IsLetterTab[0xCE] = true; // ?, CAPITAL LETTER I WITH CIRCUMFLEX ACCENT 301*cdf0e10cSrcweir IsLetterTab[0xCF] = true; // ?, CAPITAL LETTER I WITH DIAERESIS 302*cdf0e10cSrcweir IsLetterTab[0xD0] = true; // ?, CAPITAL LETTER ETH 303*cdf0e10cSrcweir IsLetterTab[0xD1] = true; // ?, CAPITAL LETTER N WITH TILDE 304*cdf0e10cSrcweir IsLetterTab[0xD2] = true; // ?, CAPITAL LETTER O WITH GRAVE ACCENT 305*cdf0e10cSrcweir IsLetterTab[0xD3] = true; // ?, CAPITAL LETTER O WITH ACUTE ACCENT 306*cdf0e10cSrcweir IsLetterTab[0xD4] = true; // ?, CAPITAL LETTER O WITH CIRCUMFLEX ACCENT 307*cdf0e10cSrcweir IsLetterTab[0xD5] = true; // ?, CAPITAL LETTER O WITH TILDE 308*cdf0e10cSrcweir IsLetterTab[0xD6] = true; // ?, CAPITAL LETTER O WITH DIAERESIS 309*cdf0e10cSrcweir IsLetterTab[0xD8] = true; // ?, CAPITAL LETTER O WITH STROKE 310*cdf0e10cSrcweir IsLetterTab[0xD9] = true; // ?, CAPITAL LETTER U WITH GRAVE ACCENT 311*cdf0e10cSrcweir IsLetterTab[0xDA] = true; // ?, CAPITAL LETTER U WITH ACUTE ACCENT 312*cdf0e10cSrcweir IsLetterTab[0xDB] = true; // ?, CAPITAL LETTER U WITH CIRCUMFLEX ACCENT 313*cdf0e10cSrcweir IsLetterTab[0xDC] = true; // ?, CAPITAL LETTER U WITH DIAERESIS 314*cdf0e10cSrcweir IsLetterTab[0xDD] = true; // ?, CAPITAL LETTER Y WITH ACUTE ACCENT 315*cdf0e10cSrcweir IsLetterTab[0xDE] = true; // ?, CAPITAL LETTER THORN 316*cdf0e10cSrcweir IsLetterTab[0xDF] = true; // ?, SMALL LETTER SHARP S 317*cdf0e10cSrcweir IsLetterTab[0xE0] = true; // ?, SMALL LETTER A WITH GRAVE ACCENT 318*cdf0e10cSrcweir IsLetterTab[0xE1] = true; // ?, SMALL LETTER A WITH ACUTE ACCENT 319*cdf0e10cSrcweir IsLetterTab[0xE2] = true; // ?, SMALL LETTER A WITH CIRCUMFLEX ACCENT 320*cdf0e10cSrcweir IsLetterTab[0xE3] = true; // ?, SMALL LETTER A WITH TILDE 321*cdf0e10cSrcweir IsLetterTab[0xE4] = true; // ?, SMALL LETTER A WITH DIAERESIS 322*cdf0e10cSrcweir IsLetterTab[0xE5] = true; // ?, SMALL LETTER A WITH RING ABOVE 323*cdf0e10cSrcweir IsLetterTab[0xE6] = true; // ?, SMALL LIGATURE AE 324*cdf0e10cSrcweir IsLetterTab[0xE7] = true; // ?, SMALL LETTER C WITH CEDILLA 325*cdf0e10cSrcweir IsLetterTab[0xE8] = true; // ?, SMALL LETTER E WITH GRAVE ACCENT 326*cdf0e10cSrcweir IsLetterTab[0xE9] = true; // ?, SMALL LETTER E WITH ACUTE ACCENT 327*cdf0e10cSrcweir IsLetterTab[0xEA] = true; // ?, SMALL LETTER E WITH CIRCUMFLEX ACCENT 328*cdf0e10cSrcweir IsLetterTab[0xEB] = true; // ?, SMALL LETTER E WITH DIAERESIS 329*cdf0e10cSrcweir IsLetterTab[0xEC] = true; // ?, SMALL LETTER I WITH GRAVE ACCENT 330*cdf0e10cSrcweir IsLetterTab[0xED] = true; // ?, SMALL LETTER I WITH ACUTE ACCENT 331*cdf0e10cSrcweir IsLetterTab[0xEE] = true; // ?, SMALL LETTER I WITH CIRCUMFLEX ACCENT 332*cdf0e10cSrcweir IsLetterTab[0xEF] = true; // ?, SMALL LETTER I WITH DIAERESIS 333*cdf0e10cSrcweir IsLetterTab[0xF0] = true; // ?, SMALL LETTER ETH 334*cdf0e10cSrcweir IsLetterTab[0xF1] = true; // ?, SMALL LETTER N WITH TILDE 335*cdf0e10cSrcweir IsLetterTab[0xF2] = true; // ?, SMALL LETTER O WITH GRAVE ACCENT 336*cdf0e10cSrcweir IsLetterTab[0xF3] = true; // ?, SMALL LETTER O WITH ACUTE ACCENT 337*cdf0e10cSrcweir IsLetterTab[0xF4] = true; // ?, SMALL LETTER O WITH CIRCUMFLEX ACCENT 338*cdf0e10cSrcweir IsLetterTab[0xF5] = true; // ?, SMALL LETTER O WITH TILDE 339*cdf0e10cSrcweir IsLetterTab[0xF6] = true; // ?, SMALL LETTER O WITH DIAERESIS 340*cdf0e10cSrcweir IsLetterTab[0xF8] = true; // ?, SMALL LETTER O WITH OBLIQUE BAR 341*cdf0e10cSrcweir IsLetterTab[0xF9] = true; // ?, SMALL LETTER U WITH GRAVE ACCENT 342*cdf0e10cSrcweir IsLetterTab[0xFA] = true; // ?, SMALL LETTER U WITH ACUTE ACCENT 343*cdf0e10cSrcweir IsLetterTab[0xFB] = true; // ?, SMALL LETTER U WITH CIRCUMFLEX ACCENT 344*cdf0e10cSrcweir IsLetterTab[0xFC] = true; // ?, SMALL LETTER U WITH DIAERESIS 345*cdf0e10cSrcweir IsLetterTab[0xFD] = true; // ?, SMALL LETTER Y WITH ACUTE ACCENT 346*cdf0e10cSrcweir IsLetterTab[0xFE] = true; // ?, SMALL LETTER THORN 347*cdf0e10cSrcweir IsLetterTab[0xFF] = true; // � , SMALL LETTER Y WITH DIAERESIS 348*cdf0e10cSrcweir } 349*cdf0e10cSrcweir 350*cdf0e10cSrcweir bool LetterTable::isLetterUnicode( sal_Unicode c ) 351*cdf0e10cSrcweir { 352*cdf0e10cSrcweir static CharClass* pCharClass = NULL; 353*cdf0e10cSrcweir if( pCharClass == NULL ) 354*cdf0e10cSrcweir pCharClass = new CharClass( Application::GetSettings().GetLocale() ); 355*cdf0e10cSrcweir String aStr( c ); 356*cdf0e10cSrcweir bool bRet = pCharClass->isLetter( aStr, 0 ); 357*cdf0e10cSrcweir return bRet; 358*cdf0e10cSrcweir } 359*cdf0e10cSrcweir 360*cdf0e10cSrcweir // Hilfsfunktion: Zeichen-Flag Testen 361*cdf0e10cSrcweir sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags ) 362*cdf0e10cSrcweir { 363*cdf0e10cSrcweir bool bRet = false; 364*cdf0e10cSrcweir if( c != 0 && c <= 255 ) 365*cdf0e10cSrcweir { 366*cdf0e10cSrcweir bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 ); 367*cdf0e10cSrcweir } 368*cdf0e10cSrcweir else if( c > 255 ) 369*cdf0e10cSrcweir { 370*cdf0e10cSrcweir bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0 371*cdf0e10cSrcweir ? BasicSimpleCharClass::isAlpha( c, true ) : false; 372*cdf0e10cSrcweir } 373*cdf0e10cSrcweir return bRet; 374*cdf0e10cSrcweir } 375*cdf0e10cSrcweir 376*cdf0e10cSrcweir void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount ) 377*cdf0e10cSrcweir { 378*cdf0e10cSrcweir ppListKeyWords = ppKeyWords; 379*cdf0e10cSrcweir nKeyWordCount = nCount; 380*cdf0e10cSrcweir } 381*cdf0e10cSrcweir 382*cdf0e10cSrcweir // Neues Token holen 383*cdf0e10cSrcweir sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, 384*cdf0e10cSrcweir /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos ) 385*cdf0e10cSrcweir { 386*cdf0e10cSrcweir reType = TT_UNKNOWN; 387*cdf0e10cSrcweir 388*cdf0e10cSrcweir // Position merken 389*cdf0e10cSrcweir rpStartPos = mpActualPos; 390*cdf0e10cSrcweir 391*cdf0e10cSrcweir // Zeichen untersuchen 392*cdf0e10cSrcweir sal_Unicode c = peekChar(); 393*cdf0e10cSrcweir if( c == CHAR_EOF ) 394*cdf0e10cSrcweir return sal_False; 395*cdf0e10cSrcweir 396*cdf0e10cSrcweir // Zeichen lesen 397*cdf0e10cSrcweir getChar(); 398*cdf0e10cSrcweir 399*cdf0e10cSrcweir //*** Alle Moeglichkeiten durchgehen *** 400*cdf0e10cSrcweir // Space? 401*cdf0e10cSrcweir if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) ) 402*cdf0e10cSrcweir { 403*cdf0e10cSrcweir while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True ) 404*cdf0e10cSrcweir getChar(); 405*cdf0e10cSrcweir 406*cdf0e10cSrcweir reType = TT_WHITESPACE; 407*cdf0e10cSrcweir } 408*cdf0e10cSrcweir 409*cdf0e10cSrcweir // Identifier? 410*cdf0e10cSrcweir else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) ) 411*cdf0e10cSrcweir { 412*cdf0e10cSrcweir sal_Bool bIdentifierChar; 413*cdf0e10cSrcweir do 414*cdf0e10cSrcweir { 415*cdf0e10cSrcweir // Naechstes Zeichen holen 416*cdf0e10cSrcweir c = peekChar(); 417*cdf0e10cSrcweir bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER ); 418*cdf0e10cSrcweir if( bIdentifierChar ) 419*cdf0e10cSrcweir getChar(); 420*cdf0e10cSrcweir } 421*cdf0e10cSrcweir while( bIdentifierChar ); 422*cdf0e10cSrcweir 423*cdf0e10cSrcweir reType = TT_IDENTIFIER; 424*cdf0e10cSrcweir 425*cdf0e10cSrcweir // Schluesselwort-Tabelle 426*cdf0e10cSrcweir if (ppListKeyWords != NULL) 427*cdf0e10cSrcweir { 428*cdf0e10cSrcweir int nCount = mpActualPos - rpStartPos; 429*cdf0e10cSrcweir 430*cdf0e10cSrcweir // No keyword if string contains char > 255 431*cdf0e10cSrcweir bool bCanBeKeyword = true; 432*cdf0e10cSrcweir for( int i = 0 ; i < nCount ; i++ ) 433*cdf0e10cSrcweir { 434*cdf0e10cSrcweir if( rpStartPos[i] > 255 ) 435*cdf0e10cSrcweir { 436*cdf0e10cSrcweir bCanBeKeyword = false; 437*cdf0e10cSrcweir break; 438*cdf0e10cSrcweir } 439*cdf0e10cSrcweir } 440*cdf0e10cSrcweir 441*cdf0e10cSrcweir if( bCanBeKeyword ) 442*cdf0e10cSrcweir { 443*cdf0e10cSrcweir String aKWString(rpStartPos, sal::static_int_cast< xub_StrLen >(nCount) ); 444*cdf0e10cSrcweir ByteString aByteStr( aKWString, RTL_TEXTENCODING_ASCII_US ); 445*cdf0e10cSrcweir aByteStr.ToLowerAscii(); 446*cdf0e10cSrcweir if ( bsearch( aByteStr.GetBuffer(), ppListKeyWords, nKeyWordCount, sizeof( char* ), 447*cdf0e10cSrcweir compare_strings ) ) 448*cdf0e10cSrcweir { 449*cdf0e10cSrcweir reType = TT_KEYWORDS; 450*cdf0e10cSrcweir 451*cdf0e10cSrcweir if ( aByteStr.Equals( "rem" ) ) 452*cdf0e10cSrcweir { 453*cdf0e10cSrcweir // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 454*cdf0e10cSrcweir sal_Unicode cPeek = peekChar(); 455*cdf0e10cSrcweir while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False ) 456*cdf0e10cSrcweir { 457*cdf0e10cSrcweir c = getChar(); 458*cdf0e10cSrcweir cPeek = peekChar(); 459*cdf0e10cSrcweir } 460*cdf0e10cSrcweir 461*cdf0e10cSrcweir reType = TT_COMMENT; 462*cdf0e10cSrcweir } 463*cdf0e10cSrcweir } 464*cdf0e10cSrcweir } 465*cdf0e10cSrcweir } 466*cdf0e10cSrcweir } 467*cdf0e10cSrcweir 468*cdf0e10cSrcweir // Operator? 469*cdf0e10cSrcweir // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there 470*cdf0e10cSrcweir else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) ) 471*cdf0e10cSrcweir { 472*cdf0e10cSrcweir // paramters for SQL view 473*cdf0e10cSrcweir if ( (c==':') || (c=='?')) 474*cdf0e10cSrcweir { 475*cdf0e10cSrcweir if (c!='?') 476*cdf0e10cSrcweir { 477*cdf0e10cSrcweir sal_Bool bIdentifierChar; 478*cdf0e10cSrcweir do 479*cdf0e10cSrcweir { 480*cdf0e10cSrcweir // Naechstes Zeichen holen 481*cdf0e10cSrcweir c = peekChar(); 482*cdf0e10cSrcweir bIdentifierChar = BasicSimpleCharClass::isAlpha( c, true ); 483*cdf0e10cSrcweir if( bIdentifierChar ) 484*cdf0e10cSrcweir getChar(); 485*cdf0e10cSrcweir } 486*cdf0e10cSrcweir while( bIdentifierChar ); 487*cdf0e10cSrcweir } 488*cdf0e10cSrcweir reType = TT_PARAMETER; 489*cdf0e10cSrcweir } 490*cdf0e10cSrcweir else if ((c=='-')) 491*cdf0e10cSrcweir { 492*cdf0e10cSrcweir sal_Unicode cPeekNext = peekChar(); 493*cdf0e10cSrcweir if (cPeekNext=='-') 494*cdf0e10cSrcweir { 495*cdf0e10cSrcweir // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 496*cdf0e10cSrcweir while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False ) 497*cdf0e10cSrcweir { 498*cdf0e10cSrcweir getChar(); 499*cdf0e10cSrcweir cPeekNext = peekChar(); 500*cdf0e10cSrcweir } 501*cdf0e10cSrcweir reType = TT_COMMENT; 502*cdf0e10cSrcweir } 503*cdf0e10cSrcweir } 504*cdf0e10cSrcweir else if (c=='/') 505*cdf0e10cSrcweir { 506*cdf0e10cSrcweir sal_Unicode cPeekNext = peekChar(); 507*cdf0e10cSrcweir if (cPeekNext=='/') 508*cdf0e10cSrcweir { 509*cdf0e10cSrcweir // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 510*cdf0e10cSrcweir while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False ) 511*cdf0e10cSrcweir { 512*cdf0e10cSrcweir getChar(); 513*cdf0e10cSrcweir cPeekNext = peekChar(); 514*cdf0e10cSrcweir } 515*cdf0e10cSrcweir reType = TT_COMMENT; 516*cdf0e10cSrcweir } 517*cdf0e10cSrcweir } 518*cdf0e10cSrcweir else 519*cdf0e10cSrcweir { 520*cdf0e10cSrcweir // Kommentar ? 521*cdf0e10cSrcweir if ( c == '\'' ) 522*cdf0e10cSrcweir { 523*cdf0e10cSrcweir c = getChar(); // '/' entfernen 524*cdf0e10cSrcweir 525*cdf0e10cSrcweir // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 526*cdf0e10cSrcweir sal_Unicode cPeek = c; 527*cdf0e10cSrcweir while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False ) 528*cdf0e10cSrcweir { 529*cdf0e10cSrcweir getChar(); 530*cdf0e10cSrcweir cPeek = peekChar(); 531*cdf0e10cSrcweir } 532*cdf0e10cSrcweir 533*cdf0e10cSrcweir reType = TT_COMMENT; 534*cdf0e10cSrcweir } 535*cdf0e10cSrcweir 536*cdf0e10cSrcweir // Echter Operator, kann hier einfach behandelt werden, 537*cdf0e10cSrcweir // da nicht der wirkliche Operator, wie z.B. += interessiert, 538*cdf0e10cSrcweir // sondern nur die Tatsache, dass es sich um einen handelt. 539*cdf0e10cSrcweir if( reType != TT_COMMENT ) 540*cdf0e10cSrcweir { 541*cdf0e10cSrcweir reType = TT_OPERATOR; 542*cdf0e10cSrcweir } 543*cdf0e10cSrcweir 544*cdf0e10cSrcweir } 545*cdf0e10cSrcweir } 546*cdf0e10cSrcweir 547*cdf0e10cSrcweir // Objekt-Trenner? Muss vor Number abgehandelt werden 548*cdf0e10cSrcweir else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) ) 549*cdf0e10cSrcweir { 550*cdf0e10cSrcweir reType = TT_OPERATOR; 551*cdf0e10cSrcweir } 552*cdf0e10cSrcweir 553*cdf0e10cSrcweir // Zahl? 554*cdf0e10cSrcweir else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True ) 555*cdf0e10cSrcweir { 556*cdf0e10cSrcweir reType = TT_NUMBER; 557*cdf0e10cSrcweir 558*cdf0e10cSrcweir // Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert 559*cdf0e10cSrcweir int nRadix = 10; 560*cdf0e10cSrcweir 561*cdf0e10cSrcweir // Ist es eine Hex- oder Oct-Zahl? 562*cdf0e10cSrcweir if( c == '&' ) 563*cdf0e10cSrcweir { 564*cdf0e10cSrcweir // Octal? 565*cdf0e10cSrcweir if( peekChar() == 'o' || peekChar() == 'O' ) 566*cdf0e10cSrcweir { 567*cdf0e10cSrcweir // o entfernen 568*cdf0e10cSrcweir getChar(); 569*cdf0e10cSrcweir nRadix = 8; // Octal-Basis 570*cdf0e10cSrcweir 571*cdf0e10cSrcweir // Alle Ziffern einlesen 572*cdf0e10cSrcweir while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) ) 573*cdf0e10cSrcweir c = getChar(); 574*cdf0e10cSrcweir } 575*cdf0e10cSrcweir // Hex? 576*cdf0e10cSrcweir else if( peekChar() == 'h' || peekChar() == 'H' ) 577*cdf0e10cSrcweir { 578*cdf0e10cSrcweir // x entfernen 579*cdf0e10cSrcweir getChar(); 580*cdf0e10cSrcweir nRadix = 16; // Hex-Basis 581*cdf0e10cSrcweir 582*cdf0e10cSrcweir // Alle Ziffern einlesen und puffern 583*cdf0e10cSrcweir while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) ) 584*cdf0e10cSrcweir c = getChar(); 585*cdf0e10cSrcweir } 586*cdf0e10cSrcweir else 587*cdf0e10cSrcweir { 588*cdf0e10cSrcweir reType = TT_OPERATOR; 589*cdf0e10cSrcweir } 590*cdf0e10cSrcweir } 591*cdf0e10cSrcweir 592*cdf0e10cSrcweir // Wenn nicht Oct oder Hex als double ansehen 593*cdf0e10cSrcweir if( reType == TT_NUMBER && nRadix == 10 ) 594*cdf0e10cSrcweir { 595*cdf0e10cSrcweir // Flag, ob das letzte Zeichen ein Exponent war 596*cdf0e10cSrcweir sal_Bool bAfterExpChar = sal_False; 597*cdf0e10cSrcweir 598*cdf0e10cSrcweir // Alle Ziffern einlesen 599*cdf0e10cSrcweir while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) || 600*cdf0e10cSrcweir (bAfterExpChar && peekChar() == '+' ) || 601*cdf0e10cSrcweir (bAfterExpChar && peekChar() == '-' ) ) 602*cdf0e10cSrcweir // Nach Exponent auch +/- OK 603*cdf0e10cSrcweir { 604*cdf0e10cSrcweir c = getChar(); // Zeichen lesen 605*cdf0e10cSrcweir bAfterExpChar = ( c == 'e' || c == 'E' ); 606*cdf0e10cSrcweir } 607*cdf0e10cSrcweir } 608*cdf0e10cSrcweir 609*cdf0e10cSrcweir // reType = TT_NUMBER; 610*cdf0e10cSrcweir } 611*cdf0e10cSrcweir 612*cdf0e10cSrcweir // String? 613*cdf0e10cSrcweir else if( testCharFlags( c, CHAR_START_STRING ) == sal_True ) 614*cdf0e10cSrcweir { 615*cdf0e10cSrcweir // Merken, welches Zeichen den String eroeffnet hat 616*cdf0e10cSrcweir sal_Unicode cEndString = c; 617*cdf0e10cSrcweir if( c == '[' ) 618*cdf0e10cSrcweir cEndString = ']'; 619*cdf0e10cSrcweir 620*cdf0e10cSrcweir // Alle Ziffern einlesen und puffern 621*cdf0e10cSrcweir while( peekChar() != cEndString ) 622*cdf0e10cSrcweir { 623*cdf0e10cSrcweir // #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht 624*cdf0e10cSrcweir if( peekChar() == CHAR_EOF ) 625*cdf0e10cSrcweir { 626*cdf0e10cSrcweir // ERROR: unterminated string literal 627*cdf0e10cSrcweir reType = TT_ERROR; 628*cdf0e10cSrcweir break; 629*cdf0e10cSrcweir } 630*cdf0e10cSrcweir c = getChar(); 631*cdf0e10cSrcweir if( testCharFlags( c, CHAR_EOL ) == sal_True ) 632*cdf0e10cSrcweir { 633*cdf0e10cSrcweir // ERROR: unterminated string literal 634*cdf0e10cSrcweir reType = TT_ERROR; 635*cdf0e10cSrcweir break; 636*cdf0e10cSrcweir } 637*cdf0e10cSrcweir } 638*cdf0e10cSrcweir 639*cdf0e10cSrcweir // Zeichen lesen 640*cdf0e10cSrcweir if( reType != TT_ERROR ) 641*cdf0e10cSrcweir { 642*cdf0e10cSrcweir getChar(); 643*cdf0e10cSrcweir if( cEndString == ']' ) 644*cdf0e10cSrcweir reType = TT_IDENTIFIER; 645*cdf0e10cSrcweir else 646*cdf0e10cSrcweir reType = TT_STRING; 647*cdf0e10cSrcweir } 648*cdf0e10cSrcweir } 649*cdf0e10cSrcweir 650*cdf0e10cSrcweir // Zeilenende? 651*cdf0e10cSrcweir else if( testCharFlags( c, CHAR_EOL ) == sal_True ) 652*cdf0e10cSrcweir { 653*cdf0e10cSrcweir // Falls ein weiteres anderes EOL-Char folgt, weg damit 654*cdf0e10cSrcweir sal_Unicode cNext = peekChar(); 655*cdf0e10cSrcweir if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True ) 656*cdf0e10cSrcweir getChar(); 657*cdf0e10cSrcweir 658*cdf0e10cSrcweir // Positions-Daten auf Zeilen-Beginn setzen 659*cdf0e10cSrcweir nCol = 0; 660*cdf0e10cSrcweir nLine++; 661*cdf0e10cSrcweir 662*cdf0e10cSrcweir reType = TT_EOL; 663*cdf0e10cSrcweir } 664*cdf0e10cSrcweir 665*cdf0e10cSrcweir // Alles andere bleibt TT_UNKNOWN 666*cdf0e10cSrcweir 667*cdf0e10cSrcweir 668*cdf0e10cSrcweir // End-Position eintragen 669*cdf0e10cSrcweir rpEndPos = mpActualPos; 670*cdf0e10cSrcweir return sal_True; 671*cdf0e10cSrcweir } 672*cdf0e10cSrcweir 673*cdf0e10cSrcweir String SimpleTokenizer_Impl::getTokStr 674*cdf0e10cSrcweir ( /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos ) 675*cdf0e10cSrcweir { 676*cdf0e10cSrcweir return String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) ); 677*cdf0e10cSrcweir } 678*cdf0e10cSrcweir 679*cdf0e10cSrcweir #ifdef DBG_UTIL 680*cdf0e10cSrcweir // TEST: Token ausgeben 681*cdf0e10cSrcweir String SimpleTokenizer_Impl::getFullTokenStr( /*out*/TokenTypes eType, 682*cdf0e10cSrcweir /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos ) 683*cdf0e10cSrcweir { 684*cdf0e10cSrcweir String aOut; 685*cdf0e10cSrcweir switch( eType ) 686*cdf0e10cSrcweir { 687*cdf0e10cSrcweir case TT_UNKNOWN: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_UNKNOWN:") ); break; 688*cdf0e10cSrcweir case TT_IDENTIFIER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_IDENTIFIER:") ); break; 689*cdf0e10cSrcweir case TT_WHITESPACE: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_WHITESPACE:") ); break; 690*cdf0e10cSrcweir case TT_NUMBER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_NUMBER:") ); break; 691*cdf0e10cSrcweir case TT_STRING: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_STRING:") ); break; 692*cdf0e10cSrcweir case TT_EOL: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_EOL:") ); break; 693*cdf0e10cSrcweir case TT_COMMENT: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_COMMENT:") ); break; 694*cdf0e10cSrcweir case TT_ERROR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_ERROR:") ); break; 695*cdf0e10cSrcweir case TT_OPERATOR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_OPERATOR:") ); break; 696*cdf0e10cSrcweir case TT_KEYWORDS: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_KEYWORD:") ); break; 697*cdf0e10cSrcweir case TT_PARAMETER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_PARAMETER:") ); break; 698*cdf0e10cSrcweir } 699*cdf0e10cSrcweir if( eType != TT_EOL ) 700*cdf0e10cSrcweir { 701*cdf0e10cSrcweir aOut += String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) ); 702*cdf0e10cSrcweir } 703*cdf0e10cSrcweir aOut += String( RTL_CONSTASCII_USTRINGPARAM("\n") ); 704*cdf0e10cSrcweir return aOut; 705*cdf0e10cSrcweir } 706*cdf0e10cSrcweir #endif 707*cdf0e10cSrcweir 708*cdf0e10cSrcweir SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang) 709*cdf0e10cSrcweir { 710*cdf0e10cSrcweir memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) ); 711*cdf0e10cSrcweir 712*cdf0e10cSrcweir // Zeichen-Tabelle fuellen 713*cdf0e10cSrcweir sal_uInt16 i; 714*cdf0e10cSrcweir 715*cdf0e10cSrcweir // Zulaessige Zeichen fuer Identifier 716*cdf0e10cSrcweir sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ); 717*cdf0e10cSrcweir for( i = 'a' ; i <= 'z' ; i++ ) 718*cdf0e10cSrcweir aCharTypeTab[i] |= nHelpMask; 719*cdf0e10cSrcweir for( i = 'A' ; i <= 'Z' ; i++ ) 720*cdf0e10cSrcweir aCharTypeTab[i] |= nHelpMask; 721*cdf0e10cSrcweir // '_' extra eintragen 722*cdf0e10cSrcweir aCharTypeTab[(int)'_'] |= nHelpMask; 723*cdf0e10cSrcweir // AB 23.6.97: '$' ist auch erlaubt 724*cdf0e10cSrcweir aCharTypeTab[(int)'$'] |= nHelpMask; 725*cdf0e10cSrcweir 726*cdf0e10cSrcweir // Ziffern (Identifier und Number ist moeglich) 727*cdf0e10cSrcweir nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER | 728*cdf0e10cSrcweir CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER ); 729*cdf0e10cSrcweir for( i = '0' ; i <= '9' ; i++ ) 730*cdf0e10cSrcweir aCharTypeTab[i] |= nHelpMask; 731*cdf0e10cSrcweir 732*cdf0e10cSrcweir // e und E sowie . von Hand ergaenzen 733*cdf0e10cSrcweir aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER; 734*cdf0e10cSrcweir aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER; 735*cdf0e10cSrcweir aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER ); 736*cdf0e10cSrcweir aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER; 737*cdf0e10cSrcweir 738*cdf0e10cSrcweir // Hex-Ziffern 739*cdf0e10cSrcweir for( i = 'a' ; i <= 'f' ; i++ ) 740*cdf0e10cSrcweir aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER; 741*cdf0e10cSrcweir for( i = 'A' ; i <= 'F' ; i++ ) 742*cdf0e10cSrcweir aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER; 743*cdf0e10cSrcweir 744*cdf0e10cSrcweir // Oct-Ziffern 745*cdf0e10cSrcweir for( i = '0' ; i <= '7' ; i++ ) 746*cdf0e10cSrcweir aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER; 747*cdf0e10cSrcweir 748*cdf0e10cSrcweir // String-Beginn/End-Zeichen 749*cdf0e10cSrcweir aCharTypeTab[(int)'\''] |= CHAR_START_STRING; 750*cdf0e10cSrcweir aCharTypeTab[(int)'\"'] |= CHAR_START_STRING; 751*cdf0e10cSrcweir aCharTypeTab[(int)'['] |= CHAR_START_STRING; 752*cdf0e10cSrcweir aCharTypeTab[(int)'`'] |= CHAR_START_STRING; 753*cdf0e10cSrcweir 754*cdf0e10cSrcweir // Operator-Zeichen 755*cdf0e10cSrcweir aCharTypeTab[(int)'!'] |= CHAR_OPERATOR; 756*cdf0e10cSrcweir aCharTypeTab[(int)'%'] |= CHAR_OPERATOR; 757*cdf0e10cSrcweir // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140 758*cdf0e10cSrcweir aCharTypeTab[(int)'('] |= CHAR_OPERATOR; 759*cdf0e10cSrcweir aCharTypeTab[(int)')'] |= CHAR_OPERATOR; 760*cdf0e10cSrcweir aCharTypeTab[(int)'*'] |= CHAR_OPERATOR; 761*cdf0e10cSrcweir aCharTypeTab[(int)'+'] |= CHAR_OPERATOR; 762*cdf0e10cSrcweir aCharTypeTab[(int)','] |= CHAR_OPERATOR; 763*cdf0e10cSrcweir aCharTypeTab[(int)'-'] |= CHAR_OPERATOR; 764*cdf0e10cSrcweir aCharTypeTab[(int)'/'] |= CHAR_OPERATOR; 765*cdf0e10cSrcweir aCharTypeTab[(int)':'] |= CHAR_OPERATOR; 766*cdf0e10cSrcweir aCharTypeTab[(int)'<'] |= CHAR_OPERATOR; 767*cdf0e10cSrcweir aCharTypeTab[(int)'='] |= CHAR_OPERATOR; 768*cdf0e10cSrcweir aCharTypeTab[(int)'>'] |= CHAR_OPERATOR; 769*cdf0e10cSrcweir aCharTypeTab[(int)'?'] |= CHAR_OPERATOR; 770*cdf0e10cSrcweir aCharTypeTab[(int)'^'] |= CHAR_OPERATOR; 771*cdf0e10cSrcweir aCharTypeTab[(int)'|'] |= CHAR_OPERATOR; 772*cdf0e10cSrcweir aCharTypeTab[(int)'~'] |= CHAR_OPERATOR; 773*cdf0e10cSrcweir aCharTypeTab[(int)'{'] |= CHAR_OPERATOR; 774*cdf0e10cSrcweir aCharTypeTab[(int)'}'] |= CHAR_OPERATOR; 775*cdf0e10cSrcweir // aCharTypeTab[(int)'['] |= CHAR_OPERATOR; Removed because of #i17826 776*cdf0e10cSrcweir aCharTypeTab[(int)']'] |= CHAR_OPERATOR; 777*cdf0e10cSrcweir aCharTypeTab[(int)';'] |= CHAR_OPERATOR; 778*cdf0e10cSrcweir 779*cdf0e10cSrcweir // Space 780*cdf0e10cSrcweir aCharTypeTab[(int)' ' ] |= CHAR_SPACE; 781*cdf0e10cSrcweir aCharTypeTab[(int)'\t'] |= CHAR_SPACE; 782*cdf0e10cSrcweir 783*cdf0e10cSrcweir // Zeilen-Ende-Zeichen 784*cdf0e10cSrcweir aCharTypeTab[(int)'\r'] |= CHAR_EOL; 785*cdf0e10cSrcweir aCharTypeTab[(int)'\n'] |= CHAR_EOL; 786*cdf0e10cSrcweir 787*cdf0e10cSrcweir ppListKeyWords = NULL; 788*cdf0e10cSrcweir } 789*cdf0e10cSrcweir 790*cdf0e10cSrcweir SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void ) 791*cdf0e10cSrcweir { 792*cdf0e10cSrcweir } 793*cdf0e10cSrcweir 794*cdf0e10cSrcweir SimpleTokenizer_Impl* getSimpleTokenizer( void ) 795*cdf0e10cSrcweir { 796*cdf0e10cSrcweir static SimpleTokenizer_Impl* pSimpleTokenizer = NULL; 797*cdf0e10cSrcweir if( !pSimpleTokenizer ) 798*cdf0e10cSrcweir pSimpleTokenizer = new SimpleTokenizer_Impl(); 799*cdf0e10cSrcweir return pSimpleTokenizer; 800*cdf0e10cSrcweir } 801*cdf0e10cSrcweir 802*cdf0e10cSrcweir // Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul 803*cdf0e10cSrcweir sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const String* aSource ) 804*cdf0e10cSrcweir { 805*cdf0e10cSrcweir // Position auf den Anfang des Source-Strings setzen 806*cdf0e10cSrcweir mpStringBegin = mpActualPos = aSource->GetBuffer(); 807*cdf0e10cSrcweir 808*cdf0e10cSrcweir // Zeile und Spalte initialisieren 809*cdf0e10cSrcweir nLine = nParseLine; 810*cdf0e10cSrcweir nCol = 0L; 811*cdf0e10cSrcweir 812*cdf0e10cSrcweir // Variablen fuer die Out-Parameter 813*cdf0e10cSrcweir TokenTypes eType; 814*cdf0e10cSrcweir const sal_Unicode* pStartPos; 815*cdf0e10cSrcweir const sal_Unicode* pEndPos; 816*cdf0e10cSrcweir 817*cdf0e10cSrcweir // Schleife ueber alle Tokens 818*cdf0e10cSrcweir sal_uInt16 nTokenCount = 0; 819*cdf0e10cSrcweir while( getNextToken( eType, pStartPos, pEndPos ) ) 820*cdf0e10cSrcweir nTokenCount++; 821*cdf0e10cSrcweir 822*cdf0e10cSrcweir return nTokenCount; 823*cdf0e10cSrcweir } 824*cdf0e10cSrcweir 825*cdf0e10cSrcweir void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const String& rLine, 826*cdf0e10cSrcweir /*out*/HighlightPortions& portions ) 827*cdf0e10cSrcweir { 828*cdf0e10cSrcweir // Position auf den Anfang des Source-Strings setzen 829*cdf0e10cSrcweir mpStringBegin = mpActualPos = rLine.GetBuffer(); 830*cdf0e10cSrcweir 831*cdf0e10cSrcweir // Zeile und Spalte initialisieren 832*cdf0e10cSrcweir nLine = nParseLine; 833*cdf0e10cSrcweir nCol = 0L; 834*cdf0e10cSrcweir 835*cdf0e10cSrcweir // Variablen fuer die Out-Parameter 836*cdf0e10cSrcweir TokenTypes eType; 837*cdf0e10cSrcweir const sal_Unicode* pStartPos; 838*cdf0e10cSrcweir const sal_Unicode* pEndPos; 839*cdf0e10cSrcweir 840*cdf0e10cSrcweir // Schleife ueber alle Tokens 841*cdf0e10cSrcweir while( getNextToken( eType, pStartPos, pEndPos ) ) 842*cdf0e10cSrcweir { 843*cdf0e10cSrcweir HighlightPortion portion; 844*cdf0e10cSrcweir 845*cdf0e10cSrcweir portion.nBegin = (sal_uInt16)(pStartPos - mpStringBegin); 846*cdf0e10cSrcweir portion.nEnd = (sal_uInt16)(pEndPos - mpStringBegin); 847*cdf0e10cSrcweir portion.tokenType = eType; 848*cdf0e10cSrcweir 849*cdf0e10cSrcweir portions.push_back(portion); 850*cdf0e10cSrcweir } 851*cdf0e10cSrcweir } 852*cdf0e10cSrcweir 853*cdf0e10cSrcweir 854*cdf0e10cSrcweir ////////////////////////////////////////////////////////////////////////// 855*cdf0e10cSrcweir // Implementierung des SyntaxHighlighter 856*cdf0e10cSrcweir 857*cdf0e10cSrcweir SyntaxHighlighter::SyntaxHighlighter() 858*cdf0e10cSrcweir { 859*cdf0e10cSrcweir m_pSimpleTokenizer = 0; 860*cdf0e10cSrcweir m_pKeyWords = NULL; 861*cdf0e10cSrcweir m_nKeyWordCount = 0; 862*cdf0e10cSrcweir } 863*cdf0e10cSrcweir 864*cdf0e10cSrcweir SyntaxHighlighter::~SyntaxHighlighter() 865*cdf0e10cSrcweir { 866*cdf0e10cSrcweir delete m_pSimpleTokenizer; 867*cdf0e10cSrcweir delete m_pKeyWords; 868*cdf0e10cSrcweir } 869*cdf0e10cSrcweir 870*cdf0e10cSrcweir void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ ) 871*cdf0e10cSrcweir { 872*cdf0e10cSrcweir eLanguage = eLanguage_; 873*cdf0e10cSrcweir delete m_pSimpleTokenizer; 874*cdf0e10cSrcweir m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage); 875*cdf0e10cSrcweir 876*cdf0e10cSrcweir switch (eLanguage) 877*cdf0e10cSrcweir { 878*cdf0e10cSrcweir case HIGHLIGHT_BASIC: 879*cdf0e10cSrcweir m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords, 880*cdf0e10cSrcweir sizeof( strListBasicKeyWords ) / sizeof( char* )); 881*cdf0e10cSrcweir break; 882*cdf0e10cSrcweir case HIGHLIGHT_SQL: 883*cdf0e10cSrcweir m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords, 884*cdf0e10cSrcweir sizeof( strListSqlKeyWords ) / sizeof( char* )); 885*cdf0e10cSrcweir break; 886*cdf0e10cSrcweir default: 887*cdf0e10cSrcweir m_pSimpleTokenizer->setKeyWords( NULL, 0 ); 888*cdf0e10cSrcweir } 889*cdf0e10cSrcweir } 890*cdf0e10cSrcweir 891*cdf0e10cSrcweir const Range SyntaxHighlighter::notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference, 892*cdf0e10cSrcweir const String* pChangedLines, sal_uInt32 nArrayLength) 893*cdf0e10cSrcweir { 894*cdf0e10cSrcweir (void)nLineCountDifference; 895*cdf0e10cSrcweir 896*cdf0e10cSrcweir for( sal_uInt32 i=0 ; i < nArrayLength ; i++ ) 897*cdf0e10cSrcweir m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]); 898*cdf0e10cSrcweir 899*cdf0e10cSrcweir return Range( nLine, nLine + nArrayLength-1 ); 900*cdf0e10cSrcweir } 901*cdf0e10cSrcweir 902*cdf0e10cSrcweir void SyntaxHighlighter::getHighlightPortions( sal_uInt32 nLine, const String& rLine, 903*cdf0e10cSrcweir /*out*/HighlightPortions& portions ) 904*cdf0e10cSrcweir { 905*cdf0e10cSrcweir m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions ); 906*cdf0e10cSrcweir } 907