1*5900e8ecSAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*5900e8ecSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*5900e8ecSAndrew Rist * or more contributor license agreements. See the NOTICE file 5*5900e8ecSAndrew Rist * distributed with this work for additional information 6*5900e8ecSAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*5900e8ecSAndrew Rist * to you under the Apache License, Version 2.0 (the 8*5900e8ecSAndrew Rist * "License"); you may not use this file except in compliance 9*5900e8ecSAndrew Rist * with the License. You may obtain a copy of the License at 10cdf0e10cSrcweir * 11*5900e8ecSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12cdf0e10cSrcweir * 13*5900e8ecSAndrew Rist * Unless required by applicable law or agreed to in writing, 14*5900e8ecSAndrew Rist * software distributed under the License is distributed on an 15*5900e8ecSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*5900e8ecSAndrew Rist * KIND, either express or implied. See the License for the 17*5900e8ecSAndrew Rist * specific language governing permissions and limitations 18*5900e8ecSAndrew Rist * under the License. 19cdf0e10cSrcweir * 20*5900e8ecSAndrew Rist *************************************************************/ 21*5900e8ecSAndrew Rist 22*5900e8ecSAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 25cdf0e10cSrcweir #include "precompiled_svtools.hxx" 26cdf0e10cSrcweir 27cdf0e10cSrcweir #include <svtools/syntaxhighlight.hxx> 28cdf0e10cSrcweir 29cdf0e10cSrcweir #include <unotools/charclass.hxx> 30cdf0e10cSrcweir #include <tools/debug.hxx> 31cdf0e10cSrcweir 32cdf0e10cSrcweir 33cdf0e10cSrcweir // ########################################################################## 34cdf0e10cSrcweir // ATTENTION: all these words needs to be in small caps 35cdf0e10cSrcweir // ########################################################################## 36cdf0e10cSrcweir static const char* strListBasicKeyWords[] = { 37cdf0e10cSrcweir "access", 38cdf0e10cSrcweir "alias", 39cdf0e10cSrcweir "and", 40cdf0e10cSrcweir "any", 41cdf0e10cSrcweir "append", 42cdf0e10cSrcweir "as", 43cdf0e10cSrcweir "base", 44cdf0e10cSrcweir "binary", 45cdf0e10cSrcweir "boolean", 46cdf0e10cSrcweir "byref", 47cdf0e10cSrcweir "byte", 48cdf0e10cSrcweir "byval", 49cdf0e10cSrcweir "call", 50cdf0e10cSrcweir "case", 51cdf0e10cSrcweir "cdecl", 52cdf0e10cSrcweir "classmodule", 53cdf0e10cSrcweir "close", 54cdf0e10cSrcweir "compare", 55cdf0e10cSrcweir "compatible", 56cdf0e10cSrcweir "const", 57cdf0e10cSrcweir "currency", 58cdf0e10cSrcweir "date", 59cdf0e10cSrcweir "declare", 60cdf0e10cSrcweir "defbool", 61cdf0e10cSrcweir "defcur", 62cdf0e10cSrcweir "defdate", 63cdf0e10cSrcweir "defdbl", 64cdf0e10cSrcweir "deferr", 65cdf0e10cSrcweir "defint", 66cdf0e10cSrcweir "deflng", 67cdf0e10cSrcweir "defobj", 68cdf0e10cSrcweir "defsng", 69cdf0e10cSrcweir "defstr", 70cdf0e10cSrcweir "defvar", 71cdf0e10cSrcweir "dim", 72cdf0e10cSrcweir "do", 73cdf0e10cSrcweir "double", 74cdf0e10cSrcweir "each", 75cdf0e10cSrcweir "else", 76cdf0e10cSrcweir "elseif", 77cdf0e10cSrcweir "end", 78cdf0e10cSrcweir "end enum", 79cdf0e10cSrcweir "end function", 80cdf0e10cSrcweir "end if", 81cdf0e10cSrcweir "end select", 82cdf0e10cSrcweir "end sub", 83cdf0e10cSrcweir "end type", 84cdf0e10cSrcweir "endif", 85cdf0e10cSrcweir "enum", 86cdf0e10cSrcweir "eqv", 87cdf0e10cSrcweir "erase", 88cdf0e10cSrcweir "error", 89cdf0e10cSrcweir "exit", 90cdf0e10cSrcweir "explicit", 91cdf0e10cSrcweir "for", 92cdf0e10cSrcweir "function", 93cdf0e10cSrcweir "get", 94cdf0e10cSrcweir "global", 95cdf0e10cSrcweir "gosub", 96cdf0e10cSrcweir "goto", 97cdf0e10cSrcweir "if", 98cdf0e10cSrcweir "imp", 99cdf0e10cSrcweir "implements", 100cdf0e10cSrcweir "in", 101cdf0e10cSrcweir "input", 102cdf0e10cSrcweir "integer", 103cdf0e10cSrcweir "is", 104cdf0e10cSrcweir "let", 105cdf0e10cSrcweir "lib", 106cdf0e10cSrcweir "like", 107cdf0e10cSrcweir "line", 108cdf0e10cSrcweir "line input", 109cdf0e10cSrcweir "local", 110cdf0e10cSrcweir "lock", 111cdf0e10cSrcweir "long", 112cdf0e10cSrcweir "loop", 113cdf0e10cSrcweir "lprint", 114cdf0e10cSrcweir "lset", 115cdf0e10cSrcweir "mod", 116cdf0e10cSrcweir "name", 117cdf0e10cSrcweir "new", 118cdf0e10cSrcweir "next", 119cdf0e10cSrcweir "not", 120cdf0e10cSrcweir "object", 121cdf0e10cSrcweir "on", 122cdf0e10cSrcweir "open", 123cdf0e10cSrcweir "option", 124cdf0e10cSrcweir "optional", 125cdf0e10cSrcweir "or", 126cdf0e10cSrcweir "output", 127cdf0e10cSrcweir "preserve", 128cdf0e10cSrcweir "print", 129cdf0e10cSrcweir "private", 130cdf0e10cSrcweir "property", 131cdf0e10cSrcweir "public", 132cdf0e10cSrcweir "random", 133cdf0e10cSrcweir "read", 134cdf0e10cSrcweir "redim", 135cdf0e10cSrcweir "rem", 136cdf0e10cSrcweir "resume", 137cdf0e10cSrcweir "return", 138cdf0e10cSrcweir "rset", 139cdf0e10cSrcweir "select", 140cdf0e10cSrcweir "set", 141cdf0e10cSrcweir "shared", 142cdf0e10cSrcweir "single", 143cdf0e10cSrcweir "static", 144cdf0e10cSrcweir "step", 145cdf0e10cSrcweir "stop", 146cdf0e10cSrcweir "string", 147cdf0e10cSrcweir "sub", 148cdf0e10cSrcweir "system", 149cdf0e10cSrcweir "text", 150cdf0e10cSrcweir "then", 151cdf0e10cSrcweir "to", 152cdf0e10cSrcweir "type", 153cdf0e10cSrcweir "typeof", 154cdf0e10cSrcweir "until", 155cdf0e10cSrcweir "variant", 156cdf0e10cSrcweir "wend", 157cdf0e10cSrcweir "while", 158cdf0e10cSrcweir "with", 159cdf0e10cSrcweir "write", 160cdf0e10cSrcweir "xor" 161cdf0e10cSrcweir }; 162cdf0e10cSrcweir 163cdf0e10cSrcweir 164cdf0e10cSrcweir static const char* strListSqlKeyWords[] = { 165cdf0e10cSrcweir "all", 166cdf0e10cSrcweir "and", 167cdf0e10cSrcweir "any", 168cdf0e10cSrcweir "as", 169cdf0e10cSrcweir "asc", 170cdf0e10cSrcweir "avg", 171cdf0e10cSrcweir "between", 172cdf0e10cSrcweir "by", 173cdf0e10cSrcweir "cast", 174cdf0e10cSrcweir "corresponding", 175cdf0e10cSrcweir "count", 176cdf0e10cSrcweir "create", 177cdf0e10cSrcweir "cross", 178cdf0e10cSrcweir "delete", 179cdf0e10cSrcweir "desc", 180cdf0e10cSrcweir "distinct", 181cdf0e10cSrcweir "drop", 182cdf0e10cSrcweir "escape", 183cdf0e10cSrcweir "except", 184cdf0e10cSrcweir "exists", 185cdf0e10cSrcweir "false", 186cdf0e10cSrcweir "from", 187cdf0e10cSrcweir "full", 188cdf0e10cSrcweir "global", 189cdf0e10cSrcweir "group", 190cdf0e10cSrcweir "having", 191cdf0e10cSrcweir "in", 192cdf0e10cSrcweir "inner", 193cdf0e10cSrcweir "insert", 194cdf0e10cSrcweir "intersect", 195cdf0e10cSrcweir "into", 196cdf0e10cSrcweir "is", 197cdf0e10cSrcweir "join", 198cdf0e10cSrcweir "left", 199cdf0e10cSrcweir "like", 200cdf0e10cSrcweir "local", 201cdf0e10cSrcweir "match", 202cdf0e10cSrcweir "max", 203cdf0e10cSrcweir "min", 204cdf0e10cSrcweir "natural", 205cdf0e10cSrcweir "not", 206cdf0e10cSrcweir "null", 207cdf0e10cSrcweir "on", 208cdf0e10cSrcweir "or", 209cdf0e10cSrcweir "order", 210cdf0e10cSrcweir "outer", 211cdf0e10cSrcweir "right", 212cdf0e10cSrcweir "select", 213cdf0e10cSrcweir "set", 214cdf0e10cSrcweir "some", 215cdf0e10cSrcweir "sum", 216cdf0e10cSrcweir "table", 217cdf0e10cSrcweir "temporary", 218cdf0e10cSrcweir "true", 219cdf0e10cSrcweir "union", 220cdf0e10cSrcweir "unique", 221cdf0e10cSrcweir "unknown", 222cdf0e10cSrcweir "update", 223cdf0e10cSrcweir "using", 224cdf0e10cSrcweir "values", 225cdf0e10cSrcweir "where" 226cdf0e10cSrcweir }; 227cdf0e10cSrcweir 228cdf0e10cSrcweir 229cdf0e10cSrcweir extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 ) 230cdf0e10cSrcweir { 231cdf0e10cSrcweir return strcmp( (char *)arg1, *(char **)arg2 ); 232cdf0e10cSrcweir } 233cdf0e10cSrcweir 234cdf0e10cSrcweir 235cdf0e10cSrcweir class LetterTable 236cdf0e10cSrcweir { 237cdf0e10cSrcweir bool IsLetterTab[256]; 238cdf0e10cSrcweir 239cdf0e10cSrcweir public: 240cdf0e10cSrcweir LetterTable( void ); 241cdf0e10cSrcweir 242cdf0e10cSrcweir inline bool isLetter( sal_Unicode c ) 243cdf0e10cSrcweir { 244cdf0e10cSrcweir bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c ); 245cdf0e10cSrcweir return bRet; 246cdf0e10cSrcweir } 247cdf0e10cSrcweir bool isLetterUnicode( sal_Unicode c ); 248cdf0e10cSrcweir }; 249cdf0e10cSrcweir 250cdf0e10cSrcweir class BasicSimpleCharClass 251cdf0e10cSrcweir { 252cdf0e10cSrcweir static LetterTable aLetterTable; 253cdf0e10cSrcweir 254cdf0e10cSrcweir public: 255cdf0e10cSrcweir static sal_Bool isAlpha( sal_Unicode c, bool bCompatible ) 256cdf0e10cSrcweir { 257cdf0e10cSrcweir sal_Bool bRet = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 258cdf0e10cSrcweir || (bCompatible && aLetterTable.isLetter( c )); 259cdf0e10cSrcweir return bRet; 260cdf0e10cSrcweir } 261cdf0e10cSrcweir 262cdf0e10cSrcweir static sal_Bool isDigit( sal_Unicode c ) 263cdf0e10cSrcweir { 264cdf0e10cSrcweir sal_Bool bRet = (c >= '0' && c <= '9'); 265cdf0e10cSrcweir return bRet; 266cdf0e10cSrcweir } 267cdf0e10cSrcweir 268cdf0e10cSrcweir static sal_Bool isAlphaNumeric( sal_Unicode c, bool bCompatible ) 269cdf0e10cSrcweir { 270cdf0e10cSrcweir sal_Bool bRet = isDigit( c ) || isAlpha( c, bCompatible ); 271cdf0e10cSrcweir return bRet; 272cdf0e10cSrcweir } 273cdf0e10cSrcweir }; 274cdf0e10cSrcweir 275cdf0e10cSrcweir LetterTable BasicSimpleCharClass::aLetterTable; 276cdf0e10cSrcweir 277cdf0e10cSrcweir LetterTable::LetterTable( void ) 278cdf0e10cSrcweir { 279cdf0e10cSrcweir for( int i = 0 ; i < 256 ; ++i ) 280cdf0e10cSrcweir IsLetterTab[i] = false; 281cdf0e10cSrcweir 282cdf0e10cSrcweir IsLetterTab[0xC0] = true; // ?, CAPITAL LETTER A WITH GRAVE ACCENT 283cdf0e10cSrcweir IsLetterTab[0xC1] = true; // ?, CAPITAL LETTER A WITH ACUTE ACCENT 284cdf0e10cSrcweir IsLetterTab[0xC2] = true; // ?, CAPITAL LETTER A WITH CIRCUMFLEX ACCENT 285cdf0e10cSrcweir IsLetterTab[0xC3] = true; // ?, CAPITAL LETTER A WITH TILDE 286cdf0e10cSrcweir IsLetterTab[0xC4] = true; // ?, CAPITAL LETTER A WITH DIAERESIS 287cdf0e10cSrcweir IsLetterTab[0xC5] = true; // ?, CAPITAL LETTER A WITH RING ABOVE 288cdf0e10cSrcweir IsLetterTab[0xC6] = true; // ?, CAPITAL LIGATURE AE 289cdf0e10cSrcweir IsLetterTab[0xC7] = true; // ?, CAPITAL LETTER C WITH CEDILLA 290cdf0e10cSrcweir IsLetterTab[0xC8] = true; // ?, CAPITAL LETTER E WITH GRAVE ACCENT 291cdf0e10cSrcweir IsLetterTab[0xC9] = true; // ?, CAPITAL LETTER E WITH ACUTE ACCENT 292cdf0e10cSrcweir IsLetterTab[0xCA] = true; // ?, CAPITAL LETTER E WITH CIRCUMFLEX ACCENT 293cdf0e10cSrcweir IsLetterTab[0xCB] = true; // ?, CAPITAL LETTER E WITH DIAERESIS 294cdf0e10cSrcweir IsLetterTab[0xCC] = true; // ?, CAPITAL LETTER I WITH GRAVE ACCENT 295cdf0e10cSrcweir IsLetterTab[0xCD] = true; // ?, CAPITAL LETTER I WITH ACUTE ACCENT 296cdf0e10cSrcweir IsLetterTab[0xCE] = true; // ?, CAPITAL LETTER I WITH CIRCUMFLEX ACCENT 297cdf0e10cSrcweir IsLetterTab[0xCF] = true; // ?, CAPITAL LETTER I WITH DIAERESIS 298cdf0e10cSrcweir IsLetterTab[0xD0] = true; // ?, CAPITAL LETTER ETH 299cdf0e10cSrcweir IsLetterTab[0xD1] = true; // ?, CAPITAL LETTER N WITH TILDE 300cdf0e10cSrcweir IsLetterTab[0xD2] = true; // ?, CAPITAL LETTER O WITH GRAVE ACCENT 301cdf0e10cSrcweir IsLetterTab[0xD3] = true; // ?, CAPITAL LETTER O WITH ACUTE ACCENT 302cdf0e10cSrcweir IsLetterTab[0xD4] = true; // ?, CAPITAL LETTER O WITH CIRCUMFLEX ACCENT 303cdf0e10cSrcweir IsLetterTab[0xD5] = true; // ?, CAPITAL LETTER O WITH TILDE 304cdf0e10cSrcweir IsLetterTab[0xD6] = true; // ?, CAPITAL LETTER O WITH DIAERESIS 305cdf0e10cSrcweir IsLetterTab[0xD8] = true; // ?, CAPITAL LETTER O WITH STROKE 306cdf0e10cSrcweir IsLetterTab[0xD9] = true; // ?, CAPITAL LETTER U WITH GRAVE ACCENT 307cdf0e10cSrcweir IsLetterTab[0xDA] = true; // ?, CAPITAL LETTER U WITH ACUTE ACCENT 308cdf0e10cSrcweir IsLetterTab[0xDB] = true; // ?, CAPITAL LETTER U WITH CIRCUMFLEX ACCENT 309cdf0e10cSrcweir IsLetterTab[0xDC] = true; // ?, CAPITAL LETTER U WITH DIAERESIS 310cdf0e10cSrcweir IsLetterTab[0xDD] = true; // ?, CAPITAL LETTER Y WITH ACUTE ACCENT 311cdf0e10cSrcweir IsLetterTab[0xDE] = true; // ?, CAPITAL LETTER THORN 312cdf0e10cSrcweir IsLetterTab[0xDF] = true; // ?, SMALL LETTER SHARP S 313cdf0e10cSrcweir IsLetterTab[0xE0] = true; // ?, SMALL LETTER A WITH GRAVE ACCENT 314cdf0e10cSrcweir IsLetterTab[0xE1] = true; // ?, SMALL LETTER A WITH ACUTE ACCENT 315cdf0e10cSrcweir IsLetterTab[0xE2] = true; // ?, SMALL LETTER A WITH CIRCUMFLEX ACCENT 316cdf0e10cSrcweir IsLetterTab[0xE3] = true; // ?, SMALL LETTER A WITH TILDE 317cdf0e10cSrcweir IsLetterTab[0xE4] = true; // ?, SMALL LETTER A WITH DIAERESIS 318cdf0e10cSrcweir IsLetterTab[0xE5] = true; // ?, SMALL LETTER A WITH RING ABOVE 319cdf0e10cSrcweir IsLetterTab[0xE6] = true; // ?, SMALL LIGATURE AE 320cdf0e10cSrcweir IsLetterTab[0xE7] = true; // ?, SMALL LETTER C WITH CEDILLA 321cdf0e10cSrcweir IsLetterTab[0xE8] = true; // ?, SMALL LETTER E WITH GRAVE ACCENT 322cdf0e10cSrcweir IsLetterTab[0xE9] = true; // ?, SMALL LETTER E WITH ACUTE ACCENT 323cdf0e10cSrcweir IsLetterTab[0xEA] = true; // ?, SMALL LETTER E WITH CIRCUMFLEX ACCENT 324cdf0e10cSrcweir IsLetterTab[0xEB] = true; // ?, SMALL LETTER E WITH DIAERESIS 325cdf0e10cSrcweir IsLetterTab[0xEC] = true; // ?, SMALL LETTER I WITH GRAVE ACCENT 326cdf0e10cSrcweir IsLetterTab[0xED] = true; // ?, SMALL LETTER I WITH ACUTE ACCENT 327cdf0e10cSrcweir IsLetterTab[0xEE] = true; // ?, SMALL LETTER I WITH CIRCUMFLEX ACCENT 328cdf0e10cSrcweir IsLetterTab[0xEF] = true; // ?, SMALL LETTER I WITH DIAERESIS 329cdf0e10cSrcweir IsLetterTab[0xF0] = true; // ?, SMALL LETTER ETH 330cdf0e10cSrcweir IsLetterTab[0xF1] = true; // ?, SMALL LETTER N WITH TILDE 331cdf0e10cSrcweir IsLetterTab[0xF2] = true; // ?, SMALL LETTER O WITH GRAVE ACCENT 332cdf0e10cSrcweir IsLetterTab[0xF3] = true; // ?, SMALL LETTER O WITH ACUTE ACCENT 333cdf0e10cSrcweir IsLetterTab[0xF4] = true; // ?, SMALL LETTER O WITH CIRCUMFLEX ACCENT 334cdf0e10cSrcweir IsLetterTab[0xF5] = true; // ?, SMALL LETTER O WITH TILDE 335cdf0e10cSrcweir IsLetterTab[0xF6] = true; // ?, SMALL LETTER O WITH DIAERESIS 336cdf0e10cSrcweir IsLetterTab[0xF8] = true; // ?, SMALL LETTER O WITH OBLIQUE BAR 337cdf0e10cSrcweir IsLetterTab[0xF9] = true; // ?, SMALL LETTER U WITH GRAVE ACCENT 338cdf0e10cSrcweir IsLetterTab[0xFA] = true; // ?, SMALL LETTER U WITH ACUTE ACCENT 339cdf0e10cSrcweir IsLetterTab[0xFB] = true; // ?, SMALL LETTER U WITH CIRCUMFLEX ACCENT 340cdf0e10cSrcweir IsLetterTab[0xFC] = true; // ?, SMALL LETTER U WITH DIAERESIS 341cdf0e10cSrcweir IsLetterTab[0xFD] = true; // ?, SMALL LETTER Y WITH ACUTE ACCENT 342cdf0e10cSrcweir IsLetterTab[0xFE] = true; // ?, SMALL LETTER THORN 343cdf0e10cSrcweir IsLetterTab[0xFF] = true; // � , SMALL LETTER Y WITH DIAERESIS 344cdf0e10cSrcweir } 345cdf0e10cSrcweir 346cdf0e10cSrcweir bool LetterTable::isLetterUnicode( sal_Unicode c ) 347cdf0e10cSrcweir { 348cdf0e10cSrcweir static CharClass* pCharClass = NULL; 349cdf0e10cSrcweir if( pCharClass == NULL ) 350cdf0e10cSrcweir pCharClass = new CharClass( Application::GetSettings().GetLocale() ); 351cdf0e10cSrcweir String aStr( c ); 352cdf0e10cSrcweir bool bRet = pCharClass->isLetter( aStr, 0 ); 353cdf0e10cSrcweir return bRet; 354cdf0e10cSrcweir } 355cdf0e10cSrcweir 356cdf0e10cSrcweir // Hilfsfunktion: Zeichen-Flag Testen 357cdf0e10cSrcweir sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags ) 358cdf0e10cSrcweir { 359cdf0e10cSrcweir bool bRet = false; 360cdf0e10cSrcweir if( c != 0 && c <= 255 ) 361cdf0e10cSrcweir { 362cdf0e10cSrcweir bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 ); 363cdf0e10cSrcweir } 364cdf0e10cSrcweir else if( c > 255 ) 365cdf0e10cSrcweir { 366cdf0e10cSrcweir bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0 367cdf0e10cSrcweir ? BasicSimpleCharClass::isAlpha( c, true ) : false; 368cdf0e10cSrcweir } 369cdf0e10cSrcweir return bRet; 370cdf0e10cSrcweir } 371cdf0e10cSrcweir 372cdf0e10cSrcweir void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount ) 373cdf0e10cSrcweir { 374cdf0e10cSrcweir ppListKeyWords = ppKeyWords; 375cdf0e10cSrcweir nKeyWordCount = nCount; 376cdf0e10cSrcweir } 377cdf0e10cSrcweir 378cdf0e10cSrcweir // Neues Token holen 379cdf0e10cSrcweir sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, 380cdf0e10cSrcweir /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos ) 381cdf0e10cSrcweir { 382cdf0e10cSrcweir reType = TT_UNKNOWN; 383cdf0e10cSrcweir 384cdf0e10cSrcweir // Position merken 385cdf0e10cSrcweir rpStartPos = mpActualPos; 386cdf0e10cSrcweir 387cdf0e10cSrcweir // Zeichen untersuchen 388cdf0e10cSrcweir sal_Unicode c = peekChar(); 389cdf0e10cSrcweir if( c == CHAR_EOF ) 390cdf0e10cSrcweir return sal_False; 391cdf0e10cSrcweir 392cdf0e10cSrcweir // Zeichen lesen 393cdf0e10cSrcweir getChar(); 394cdf0e10cSrcweir 395cdf0e10cSrcweir //*** Alle Moeglichkeiten durchgehen *** 396cdf0e10cSrcweir // Space? 397cdf0e10cSrcweir if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) ) 398cdf0e10cSrcweir { 399cdf0e10cSrcweir while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True ) 400cdf0e10cSrcweir getChar(); 401cdf0e10cSrcweir 402cdf0e10cSrcweir reType = TT_WHITESPACE; 403cdf0e10cSrcweir } 404cdf0e10cSrcweir 405cdf0e10cSrcweir // Identifier? 406cdf0e10cSrcweir else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) ) 407cdf0e10cSrcweir { 408cdf0e10cSrcweir sal_Bool bIdentifierChar; 409cdf0e10cSrcweir do 410cdf0e10cSrcweir { 411cdf0e10cSrcweir // Naechstes Zeichen holen 412cdf0e10cSrcweir c = peekChar(); 413cdf0e10cSrcweir bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER ); 414cdf0e10cSrcweir if( bIdentifierChar ) 415cdf0e10cSrcweir getChar(); 416cdf0e10cSrcweir } 417cdf0e10cSrcweir while( bIdentifierChar ); 418cdf0e10cSrcweir 419cdf0e10cSrcweir reType = TT_IDENTIFIER; 420cdf0e10cSrcweir 421cdf0e10cSrcweir // Schluesselwort-Tabelle 422cdf0e10cSrcweir if (ppListKeyWords != NULL) 423cdf0e10cSrcweir { 424cdf0e10cSrcweir int nCount = mpActualPos - rpStartPos; 425cdf0e10cSrcweir 426cdf0e10cSrcweir // No keyword if string contains char > 255 427cdf0e10cSrcweir bool bCanBeKeyword = true; 428cdf0e10cSrcweir for( int i = 0 ; i < nCount ; i++ ) 429cdf0e10cSrcweir { 430cdf0e10cSrcweir if( rpStartPos[i] > 255 ) 431cdf0e10cSrcweir { 432cdf0e10cSrcweir bCanBeKeyword = false; 433cdf0e10cSrcweir break; 434cdf0e10cSrcweir } 435cdf0e10cSrcweir } 436cdf0e10cSrcweir 437cdf0e10cSrcweir if( bCanBeKeyword ) 438cdf0e10cSrcweir { 439cdf0e10cSrcweir String aKWString(rpStartPos, sal::static_int_cast< xub_StrLen >(nCount) ); 440cdf0e10cSrcweir ByteString aByteStr( aKWString, RTL_TEXTENCODING_ASCII_US ); 441cdf0e10cSrcweir aByteStr.ToLowerAscii(); 442cdf0e10cSrcweir if ( bsearch( aByteStr.GetBuffer(), ppListKeyWords, nKeyWordCount, sizeof( char* ), 443cdf0e10cSrcweir compare_strings ) ) 444cdf0e10cSrcweir { 445cdf0e10cSrcweir reType = TT_KEYWORDS; 446cdf0e10cSrcweir 447cdf0e10cSrcweir if ( aByteStr.Equals( "rem" ) ) 448cdf0e10cSrcweir { 449cdf0e10cSrcweir // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 450cdf0e10cSrcweir sal_Unicode cPeek = peekChar(); 451cdf0e10cSrcweir while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False ) 452cdf0e10cSrcweir { 453cdf0e10cSrcweir c = getChar(); 454cdf0e10cSrcweir cPeek = peekChar(); 455cdf0e10cSrcweir } 456cdf0e10cSrcweir 457cdf0e10cSrcweir reType = TT_COMMENT; 458cdf0e10cSrcweir } 459cdf0e10cSrcweir } 460cdf0e10cSrcweir } 461cdf0e10cSrcweir } 462cdf0e10cSrcweir } 463cdf0e10cSrcweir 464cdf0e10cSrcweir // Operator? 465cdf0e10cSrcweir // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there 466cdf0e10cSrcweir else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) ) 467cdf0e10cSrcweir { 468cdf0e10cSrcweir // paramters for SQL view 469cdf0e10cSrcweir if ( (c==':') || (c=='?')) 470cdf0e10cSrcweir { 471cdf0e10cSrcweir if (c!='?') 472cdf0e10cSrcweir { 473cdf0e10cSrcweir sal_Bool bIdentifierChar; 474cdf0e10cSrcweir do 475cdf0e10cSrcweir { 476cdf0e10cSrcweir // Naechstes Zeichen holen 477cdf0e10cSrcweir c = peekChar(); 478cdf0e10cSrcweir bIdentifierChar = BasicSimpleCharClass::isAlpha( c, true ); 479cdf0e10cSrcweir if( bIdentifierChar ) 480cdf0e10cSrcweir getChar(); 481cdf0e10cSrcweir } 482cdf0e10cSrcweir while( bIdentifierChar ); 483cdf0e10cSrcweir } 484cdf0e10cSrcweir reType = TT_PARAMETER; 485cdf0e10cSrcweir } 486cdf0e10cSrcweir else if ((c=='-')) 487cdf0e10cSrcweir { 488cdf0e10cSrcweir sal_Unicode cPeekNext = peekChar(); 489cdf0e10cSrcweir if (cPeekNext=='-') 490cdf0e10cSrcweir { 491cdf0e10cSrcweir // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 492cdf0e10cSrcweir while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False ) 493cdf0e10cSrcweir { 494cdf0e10cSrcweir getChar(); 495cdf0e10cSrcweir cPeekNext = peekChar(); 496cdf0e10cSrcweir } 497cdf0e10cSrcweir reType = TT_COMMENT; 498cdf0e10cSrcweir } 499cdf0e10cSrcweir } 500cdf0e10cSrcweir else if (c=='/') 501cdf0e10cSrcweir { 502cdf0e10cSrcweir sal_Unicode cPeekNext = peekChar(); 503cdf0e10cSrcweir if (cPeekNext=='/') 504cdf0e10cSrcweir { 505cdf0e10cSrcweir // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 506cdf0e10cSrcweir while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False ) 507cdf0e10cSrcweir { 508cdf0e10cSrcweir getChar(); 509cdf0e10cSrcweir cPeekNext = peekChar(); 510cdf0e10cSrcweir } 511cdf0e10cSrcweir reType = TT_COMMENT; 512cdf0e10cSrcweir } 513cdf0e10cSrcweir } 514cdf0e10cSrcweir else 515cdf0e10cSrcweir { 516cdf0e10cSrcweir // Kommentar ? 517cdf0e10cSrcweir if ( c == '\'' ) 518cdf0e10cSrcweir { 519cdf0e10cSrcweir c = getChar(); // '/' entfernen 520cdf0e10cSrcweir 521cdf0e10cSrcweir // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 522cdf0e10cSrcweir sal_Unicode cPeek = c; 523cdf0e10cSrcweir while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False ) 524cdf0e10cSrcweir { 525cdf0e10cSrcweir getChar(); 526cdf0e10cSrcweir cPeek = peekChar(); 527cdf0e10cSrcweir } 528cdf0e10cSrcweir 529cdf0e10cSrcweir reType = TT_COMMENT; 530cdf0e10cSrcweir } 531cdf0e10cSrcweir 532cdf0e10cSrcweir // Echter Operator, kann hier einfach behandelt werden, 533cdf0e10cSrcweir // da nicht der wirkliche Operator, wie z.B. += interessiert, 534cdf0e10cSrcweir // sondern nur die Tatsache, dass es sich um einen handelt. 535cdf0e10cSrcweir if( reType != TT_COMMENT ) 536cdf0e10cSrcweir { 537cdf0e10cSrcweir reType = TT_OPERATOR; 538cdf0e10cSrcweir } 539cdf0e10cSrcweir 540cdf0e10cSrcweir } 541cdf0e10cSrcweir } 542cdf0e10cSrcweir 543cdf0e10cSrcweir // Objekt-Trenner? Muss vor Number abgehandelt werden 544cdf0e10cSrcweir else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) ) 545cdf0e10cSrcweir { 546cdf0e10cSrcweir reType = TT_OPERATOR; 547cdf0e10cSrcweir } 548cdf0e10cSrcweir 549cdf0e10cSrcweir // Zahl? 550cdf0e10cSrcweir else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True ) 551cdf0e10cSrcweir { 552cdf0e10cSrcweir reType = TT_NUMBER; 553cdf0e10cSrcweir 554cdf0e10cSrcweir // Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert 555cdf0e10cSrcweir int nRadix = 10; 556cdf0e10cSrcweir 557cdf0e10cSrcweir // Ist es eine Hex- oder Oct-Zahl? 558cdf0e10cSrcweir if( c == '&' ) 559cdf0e10cSrcweir { 560cdf0e10cSrcweir // Octal? 561cdf0e10cSrcweir if( peekChar() == 'o' || peekChar() == 'O' ) 562cdf0e10cSrcweir { 563cdf0e10cSrcweir // o entfernen 564cdf0e10cSrcweir getChar(); 565cdf0e10cSrcweir nRadix = 8; // Octal-Basis 566cdf0e10cSrcweir 567cdf0e10cSrcweir // Alle Ziffern einlesen 568cdf0e10cSrcweir while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) ) 569cdf0e10cSrcweir c = getChar(); 570cdf0e10cSrcweir } 571cdf0e10cSrcweir // Hex? 572cdf0e10cSrcweir else if( peekChar() == 'h' || peekChar() == 'H' ) 573cdf0e10cSrcweir { 574cdf0e10cSrcweir // x entfernen 575cdf0e10cSrcweir getChar(); 576cdf0e10cSrcweir nRadix = 16; // Hex-Basis 577cdf0e10cSrcweir 578cdf0e10cSrcweir // Alle Ziffern einlesen und puffern 579cdf0e10cSrcweir while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) ) 580cdf0e10cSrcweir c = getChar(); 581cdf0e10cSrcweir } 582cdf0e10cSrcweir else 583cdf0e10cSrcweir { 584cdf0e10cSrcweir reType = TT_OPERATOR; 585cdf0e10cSrcweir } 586cdf0e10cSrcweir } 587cdf0e10cSrcweir 588cdf0e10cSrcweir // Wenn nicht Oct oder Hex als double ansehen 589cdf0e10cSrcweir if( reType == TT_NUMBER && nRadix == 10 ) 590cdf0e10cSrcweir { 591cdf0e10cSrcweir // Flag, ob das letzte Zeichen ein Exponent war 592cdf0e10cSrcweir sal_Bool bAfterExpChar = sal_False; 593cdf0e10cSrcweir 594cdf0e10cSrcweir // Alle Ziffern einlesen 595cdf0e10cSrcweir while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) || 596cdf0e10cSrcweir (bAfterExpChar && peekChar() == '+' ) || 597cdf0e10cSrcweir (bAfterExpChar && peekChar() == '-' ) ) 598cdf0e10cSrcweir // Nach Exponent auch +/- OK 599cdf0e10cSrcweir { 600cdf0e10cSrcweir c = getChar(); // Zeichen lesen 601cdf0e10cSrcweir bAfterExpChar = ( c == 'e' || c == 'E' ); 602cdf0e10cSrcweir } 603cdf0e10cSrcweir } 604cdf0e10cSrcweir 605cdf0e10cSrcweir // reType = TT_NUMBER; 606cdf0e10cSrcweir } 607cdf0e10cSrcweir 608cdf0e10cSrcweir // String? 609cdf0e10cSrcweir else if( testCharFlags( c, CHAR_START_STRING ) == sal_True ) 610cdf0e10cSrcweir { 611cdf0e10cSrcweir // Merken, welches Zeichen den String eroeffnet hat 612cdf0e10cSrcweir sal_Unicode cEndString = c; 613cdf0e10cSrcweir if( c == '[' ) 614cdf0e10cSrcweir cEndString = ']'; 615cdf0e10cSrcweir 616cdf0e10cSrcweir // Alle Ziffern einlesen und puffern 617cdf0e10cSrcweir while( peekChar() != cEndString ) 618cdf0e10cSrcweir { 619cdf0e10cSrcweir // #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht 620cdf0e10cSrcweir if( peekChar() == CHAR_EOF ) 621cdf0e10cSrcweir { 622cdf0e10cSrcweir // ERROR: unterminated string literal 623cdf0e10cSrcweir reType = TT_ERROR; 624cdf0e10cSrcweir break; 625cdf0e10cSrcweir } 626cdf0e10cSrcweir c = getChar(); 627cdf0e10cSrcweir if( testCharFlags( c, CHAR_EOL ) == sal_True ) 628cdf0e10cSrcweir { 629cdf0e10cSrcweir // ERROR: unterminated string literal 630cdf0e10cSrcweir reType = TT_ERROR; 631cdf0e10cSrcweir break; 632cdf0e10cSrcweir } 633cdf0e10cSrcweir } 634cdf0e10cSrcweir 635cdf0e10cSrcweir // Zeichen lesen 636cdf0e10cSrcweir if( reType != TT_ERROR ) 637cdf0e10cSrcweir { 638cdf0e10cSrcweir getChar(); 639cdf0e10cSrcweir if( cEndString == ']' ) 640cdf0e10cSrcweir reType = TT_IDENTIFIER; 641cdf0e10cSrcweir else 642cdf0e10cSrcweir reType = TT_STRING; 643cdf0e10cSrcweir } 644cdf0e10cSrcweir } 645cdf0e10cSrcweir 646cdf0e10cSrcweir // Zeilenende? 647cdf0e10cSrcweir else if( testCharFlags( c, CHAR_EOL ) == sal_True ) 648cdf0e10cSrcweir { 649cdf0e10cSrcweir // Falls ein weiteres anderes EOL-Char folgt, weg damit 650cdf0e10cSrcweir sal_Unicode cNext = peekChar(); 651cdf0e10cSrcweir if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True ) 652cdf0e10cSrcweir getChar(); 653cdf0e10cSrcweir 654cdf0e10cSrcweir // Positions-Daten auf Zeilen-Beginn setzen 655cdf0e10cSrcweir nCol = 0; 656cdf0e10cSrcweir nLine++; 657cdf0e10cSrcweir 658cdf0e10cSrcweir reType = TT_EOL; 659cdf0e10cSrcweir } 660cdf0e10cSrcweir 661cdf0e10cSrcweir // Alles andere bleibt TT_UNKNOWN 662cdf0e10cSrcweir 663cdf0e10cSrcweir 664cdf0e10cSrcweir // End-Position eintragen 665cdf0e10cSrcweir rpEndPos = mpActualPos; 666cdf0e10cSrcweir return sal_True; 667cdf0e10cSrcweir } 668cdf0e10cSrcweir 669cdf0e10cSrcweir String SimpleTokenizer_Impl::getTokStr 670cdf0e10cSrcweir ( /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos ) 671cdf0e10cSrcweir { 672cdf0e10cSrcweir return String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) ); 673cdf0e10cSrcweir } 674cdf0e10cSrcweir 675cdf0e10cSrcweir #ifdef DBG_UTIL 676cdf0e10cSrcweir // TEST: Token ausgeben 677cdf0e10cSrcweir String SimpleTokenizer_Impl::getFullTokenStr( /*out*/TokenTypes eType, 678cdf0e10cSrcweir /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos ) 679cdf0e10cSrcweir { 680cdf0e10cSrcweir String aOut; 681cdf0e10cSrcweir switch( eType ) 682cdf0e10cSrcweir { 683cdf0e10cSrcweir case TT_UNKNOWN: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_UNKNOWN:") ); break; 684cdf0e10cSrcweir case TT_IDENTIFIER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_IDENTIFIER:") ); break; 685cdf0e10cSrcweir case TT_WHITESPACE: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_WHITESPACE:") ); break; 686cdf0e10cSrcweir case TT_NUMBER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_NUMBER:") ); break; 687cdf0e10cSrcweir case TT_STRING: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_STRING:") ); break; 688cdf0e10cSrcweir case TT_EOL: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_EOL:") ); break; 689cdf0e10cSrcweir case TT_COMMENT: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_COMMENT:") ); break; 690cdf0e10cSrcweir case TT_ERROR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_ERROR:") ); break; 691cdf0e10cSrcweir case TT_OPERATOR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_OPERATOR:") ); break; 692cdf0e10cSrcweir case TT_KEYWORDS: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_KEYWORD:") ); break; 693cdf0e10cSrcweir case TT_PARAMETER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_PARAMETER:") ); break; 694cdf0e10cSrcweir } 695cdf0e10cSrcweir if( eType != TT_EOL ) 696cdf0e10cSrcweir { 697cdf0e10cSrcweir aOut += String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) ); 698cdf0e10cSrcweir } 699cdf0e10cSrcweir aOut += String( RTL_CONSTASCII_USTRINGPARAM("\n") ); 700cdf0e10cSrcweir return aOut; 701cdf0e10cSrcweir } 702cdf0e10cSrcweir #endif 703cdf0e10cSrcweir 704cdf0e10cSrcweir SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang) 705cdf0e10cSrcweir { 706cdf0e10cSrcweir memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) ); 707cdf0e10cSrcweir 708cdf0e10cSrcweir // Zeichen-Tabelle fuellen 709cdf0e10cSrcweir sal_uInt16 i; 710cdf0e10cSrcweir 711cdf0e10cSrcweir // Zulaessige Zeichen fuer Identifier 712cdf0e10cSrcweir sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ); 713cdf0e10cSrcweir for( i = 'a' ; i <= 'z' ; i++ ) 714cdf0e10cSrcweir aCharTypeTab[i] |= nHelpMask; 715cdf0e10cSrcweir for( i = 'A' ; i <= 'Z' ; i++ ) 716cdf0e10cSrcweir aCharTypeTab[i] |= nHelpMask; 717cdf0e10cSrcweir // '_' extra eintragen 718cdf0e10cSrcweir aCharTypeTab[(int)'_'] |= nHelpMask; 719cdf0e10cSrcweir // AB 23.6.97: '$' ist auch erlaubt 720cdf0e10cSrcweir aCharTypeTab[(int)'$'] |= nHelpMask; 721cdf0e10cSrcweir 722cdf0e10cSrcweir // Ziffern (Identifier und Number ist moeglich) 723cdf0e10cSrcweir nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER | 724cdf0e10cSrcweir CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER ); 725cdf0e10cSrcweir for( i = '0' ; i <= '9' ; i++ ) 726cdf0e10cSrcweir aCharTypeTab[i] |= nHelpMask; 727cdf0e10cSrcweir 728cdf0e10cSrcweir // e und E sowie . von Hand ergaenzen 729cdf0e10cSrcweir aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER; 730cdf0e10cSrcweir aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER; 731cdf0e10cSrcweir aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER ); 732cdf0e10cSrcweir aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER; 733cdf0e10cSrcweir 734cdf0e10cSrcweir // Hex-Ziffern 735cdf0e10cSrcweir for( i = 'a' ; i <= 'f' ; i++ ) 736cdf0e10cSrcweir aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER; 737cdf0e10cSrcweir for( i = 'A' ; i <= 'F' ; i++ ) 738cdf0e10cSrcweir aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER; 739cdf0e10cSrcweir 740cdf0e10cSrcweir // Oct-Ziffern 741cdf0e10cSrcweir for( i = '0' ; i <= '7' ; i++ ) 742cdf0e10cSrcweir aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER; 743cdf0e10cSrcweir 744cdf0e10cSrcweir // String-Beginn/End-Zeichen 745cdf0e10cSrcweir aCharTypeTab[(int)'\''] |= CHAR_START_STRING; 746cdf0e10cSrcweir aCharTypeTab[(int)'\"'] |= CHAR_START_STRING; 747cdf0e10cSrcweir aCharTypeTab[(int)'['] |= CHAR_START_STRING; 748cdf0e10cSrcweir aCharTypeTab[(int)'`'] |= CHAR_START_STRING; 749cdf0e10cSrcweir 750cdf0e10cSrcweir // Operator-Zeichen 751cdf0e10cSrcweir aCharTypeTab[(int)'!'] |= CHAR_OPERATOR; 752cdf0e10cSrcweir aCharTypeTab[(int)'%'] |= CHAR_OPERATOR; 753cdf0e10cSrcweir // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140 754cdf0e10cSrcweir aCharTypeTab[(int)'('] |= CHAR_OPERATOR; 755cdf0e10cSrcweir aCharTypeTab[(int)')'] |= CHAR_OPERATOR; 756cdf0e10cSrcweir aCharTypeTab[(int)'*'] |= CHAR_OPERATOR; 757cdf0e10cSrcweir aCharTypeTab[(int)'+'] |= CHAR_OPERATOR; 758cdf0e10cSrcweir aCharTypeTab[(int)','] |= CHAR_OPERATOR; 759cdf0e10cSrcweir aCharTypeTab[(int)'-'] |= CHAR_OPERATOR; 760cdf0e10cSrcweir aCharTypeTab[(int)'/'] |= CHAR_OPERATOR; 761cdf0e10cSrcweir aCharTypeTab[(int)':'] |= CHAR_OPERATOR; 762cdf0e10cSrcweir aCharTypeTab[(int)'<'] |= CHAR_OPERATOR; 763cdf0e10cSrcweir aCharTypeTab[(int)'='] |= CHAR_OPERATOR; 764cdf0e10cSrcweir aCharTypeTab[(int)'>'] |= CHAR_OPERATOR; 765cdf0e10cSrcweir aCharTypeTab[(int)'?'] |= CHAR_OPERATOR; 766cdf0e10cSrcweir aCharTypeTab[(int)'^'] |= CHAR_OPERATOR; 767cdf0e10cSrcweir aCharTypeTab[(int)'|'] |= CHAR_OPERATOR; 768cdf0e10cSrcweir aCharTypeTab[(int)'~'] |= CHAR_OPERATOR; 769cdf0e10cSrcweir aCharTypeTab[(int)'{'] |= CHAR_OPERATOR; 770cdf0e10cSrcweir aCharTypeTab[(int)'}'] |= CHAR_OPERATOR; 771cdf0e10cSrcweir // aCharTypeTab[(int)'['] |= CHAR_OPERATOR; Removed because of #i17826 772cdf0e10cSrcweir aCharTypeTab[(int)']'] |= CHAR_OPERATOR; 773cdf0e10cSrcweir aCharTypeTab[(int)';'] |= CHAR_OPERATOR; 774cdf0e10cSrcweir 775cdf0e10cSrcweir // Space 776cdf0e10cSrcweir aCharTypeTab[(int)' ' ] |= CHAR_SPACE; 777cdf0e10cSrcweir aCharTypeTab[(int)'\t'] |= CHAR_SPACE; 778cdf0e10cSrcweir 779cdf0e10cSrcweir // Zeilen-Ende-Zeichen 780cdf0e10cSrcweir aCharTypeTab[(int)'\r'] |= CHAR_EOL; 781cdf0e10cSrcweir aCharTypeTab[(int)'\n'] |= CHAR_EOL; 782cdf0e10cSrcweir 783cdf0e10cSrcweir ppListKeyWords = NULL; 784cdf0e10cSrcweir } 785cdf0e10cSrcweir 786cdf0e10cSrcweir SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void ) 787cdf0e10cSrcweir { 788cdf0e10cSrcweir } 789cdf0e10cSrcweir 790cdf0e10cSrcweir SimpleTokenizer_Impl* getSimpleTokenizer( void ) 791cdf0e10cSrcweir { 792cdf0e10cSrcweir static SimpleTokenizer_Impl* pSimpleTokenizer = NULL; 793cdf0e10cSrcweir if( !pSimpleTokenizer ) 794cdf0e10cSrcweir pSimpleTokenizer = new SimpleTokenizer_Impl(); 795cdf0e10cSrcweir return pSimpleTokenizer; 796cdf0e10cSrcweir } 797cdf0e10cSrcweir 798cdf0e10cSrcweir // Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul 799cdf0e10cSrcweir sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const String* aSource ) 800cdf0e10cSrcweir { 801cdf0e10cSrcweir // Position auf den Anfang des Source-Strings setzen 802cdf0e10cSrcweir mpStringBegin = mpActualPos = aSource->GetBuffer(); 803cdf0e10cSrcweir 804cdf0e10cSrcweir // Zeile und Spalte initialisieren 805cdf0e10cSrcweir nLine = nParseLine; 806cdf0e10cSrcweir nCol = 0L; 807cdf0e10cSrcweir 808cdf0e10cSrcweir // Variablen fuer die Out-Parameter 809cdf0e10cSrcweir TokenTypes eType; 810cdf0e10cSrcweir const sal_Unicode* pStartPos; 811cdf0e10cSrcweir const sal_Unicode* pEndPos; 812cdf0e10cSrcweir 813cdf0e10cSrcweir // Schleife ueber alle Tokens 814cdf0e10cSrcweir sal_uInt16 nTokenCount = 0; 815cdf0e10cSrcweir while( getNextToken( eType, pStartPos, pEndPos ) ) 816cdf0e10cSrcweir nTokenCount++; 817cdf0e10cSrcweir 818cdf0e10cSrcweir return nTokenCount; 819cdf0e10cSrcweir } 820cdf0e10cSrcweir 821cdf0e10cSrcweir void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const String& rLine, 822cdf0e10cSrcweir /*out*/HighlightPortions& portions ) 823cdf0e10cSrcweir { 824cdf0e10cSrcweir // Position auf den Anfang des Source-Strings setzen 825cdf0e10cSrcweir mpStringBegin = mpActualPos = rLine.GetBuffer(); 826cdf0e10cSrcweir 827cdf0e10cSrcweir // Zeile und Spalte initialisieren 828cdf0e10cSrcweir nLine = nParseLine; 829cdf0e10cSrcweir nCol = 0L; 830cdf0e10cSrcweir 831cdf0e10cSrcweir // Variablen fuer die Out-Parameter 832cdf0e10cSrcweir TokenTypes eType; 833cdf0e10cSrcweir const sal_Unicode* pStartPos; 834cdf0e10cSrcweir const sal_Unicode* pEndPos; 835cdf0e10cSrcweir 836cdf0e10cSrcweir // Schleife ueber alle Tokens 837cdf0e10cSrcweir while( getNextToken( eType, pStartPos, pEndPos ) ) 838cdf0e10cSrcweir { 839cdf0e10cSrcweir HighlightPortion portion; 840cdf0e10cSrcweir 841cdf0e10cSrcweir portion.nBegin = (sal_uInt16)(pStartPos - mpStringBegin); 842cdf0e10cSrcweir portion.nEnd = (sal_uInt16)(pEndPos - mpStringBegin); 843cdf0e10cSrcweir portion.tokenType = eType; 844cdf0e10cSrcweir 845cdf0e10cSrcweir portions.push_back(portion); 846cdf0e10cSrcweir } 847cdf0e10cSrcweir } 848cdf0e10cSrcweir 849cdf0e10cSrcweir 850cdf0e10cSrcweir ////////////////////////////////////////////////////////////////////////// 851cdf0e10cSrcweir // Implementierung des SyntaxHighlighter 852cdf0e10cSrcweir 853cdf0e10cSrcweir SyntaxHighlighter::SyntaxHighlighter() 854cdf0e10cSrcweir { 855cdf0e10cSrcweir m_pSimpleTokenizer = 0; 856cdf0e10cSrcweir m_pKeyWords = NULL; 857cdf0e10cSrcweir m_nKeyWordCount = 0; 858cdf0e10cSrcweir } 859cdf0e10cSrcweir 860cdf0e10cSrcweir SyntaxHighlighter::~SyntaxHighlighter() 861cdf0e10cSrcweir { 862cdf0e10cSrcweir delete m_pSimpleTokenizer; 863cdf0e10cSrcweir delete m_pKeyWords; 864cdf0e10cSrcweir } 865cdf0e10cSrcweir 866cdf0e10cSrcweir void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ ) 867cdf0e10cSrcweir { 868cdf0e10cSrcweir eLanguage = eLanguage_; 869cdf0e10cSrcweir delete m_pSimpleTokenizer; 870cdf0e10cSrcweir m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage); 871cdf0e10cSrcweir 872cdf0e10cSrcweir switch (eLanguage) 873cdf0e10cSrcweir { 874cdf0e10cSrcweir case HIGHLIGHT_BASIC: 875cdf0e10cSrcweir m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords, 876cdf0e10cSrcweir sizeof( strListBasicKeyWords ) / sizeof( char* )); 877cdf0e10cSrcweir break; 878cdf0e10cSrcweir case HIGHLIGHT_SQL: 879cdf0e10cSrcweir m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords, 880cdf0e10cSrcweir sizeof( strListSqlKeyWords ) / sizeof( char* )); 881cdf0e10cSrcweir break; 882cdf0e10cSrcweir default: 883cdf0e10cSrcweir m_pSimpleTokenizer->setKeyWords( NULL, 0 ); 884cdf0e10cSrcweir } 885cdf0e10cSrcweir } 886cdf0e10cSrcweir 887cdf0e10cSrcweir const Range SyntaxHighlighter::notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference, 888cdf0e10cSrcweir const String* pChangedLines, sal_uInt32 nArrayLength) 889cdf0e10cSrcweir { 890cdf0e10cSrcweir (void)nLineCountDifference; 891cdf0e10cSrcweir 892cdf0e10cSrcweir for( sal_uInt32 i=0 ; i < nArrayLength ; i++ ) 893cdf0e10cSrcweir m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]); 894cdf0e10cSrcweir 895cdf0e10cSrcweir return Range( nLine, nLine + nArrayLength-1 ); 896cdf0e10cSrcweir } 897cdf0e10cSrcweir 898cdf0e10cSrcweir void SyntaxHighlighter::getHighlightPortions( sal_uInt32 nLine, const String& rLine, 899cdf0e10cSrcweir /*out*/HighlightPortions& portions ) 900cdf0e10cSrcweir { 901cdf0e10cSrcweir m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions ); 902cdf0e10cSrcweir } 903