xref: /AOO41X/main/svtools/source/edit/syntaxhighlight.cxx (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_svtools.hxx"
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include <svtools/syntaxhighlight.hxx>
32*cdf0e10cSrcweir 
33*cdf0e10cSrcweir #include <unotools/charclass.hxx>
34*cdf0e10cSrcweir #include <tools/debug.hxx>
35*cdf0e10cSrcweir 
36*cdf0e10cSrcweir 
37*cdf0e10cSrcweir // ##########################################################################
38*cdf0e10cSrcweir // ATTENTION: all these words needs to be in small caps
39*cdf0e10cSrcweir // ##########################################################################
40*cdf0e10cSrcweir static const char* strListBasicKeyWords[] = {
41*cdf0e10cSrcweir 	"access",
42*cdf0e10cSrcweir 	"alias",
43*cdf0e10cSrcweir 	"and",
44*cdf0e10cSrcweir 	"any",
45*cdf0e10cSrcweir 	"append",
46*cdf0e10cSrcweir 	"as",
47*cdf0e10cSrcweir 	"base",
48*cdf0e10cSrcweir 	"binary",
49*cdf0e10cSrcweir 	"boolean",
50*cdf0e10cSrcweir 	"byref",
51*cdf0e10cSrcweir 	"byte",
52*cdf0e10cSrcweir 	"byval",
53*cdf0e10cSrcweir 	"call",
54*cdf0e10cSrcweir 	"case",
55*cdf0e10cSrcweir 	"cdecl",
56*cdf0e10cSrcweir 	"classmodule",
57*cdf0e10cSrcweir 	"close",
58*cdf0e10cSrcweir 	"compare",
59*cdf0e10cSrcweir 	"compatible",
60*cdf0e10cSrcweir 	"const",
61*cdf0e10cSrcweir 	"currency",
62*cdf0e10cSrcweir 	"date",
63*cdf0e10cSrcweir 	"declare",
64*cdf0e10cSrcweir 	"defbool",
65*cdf0e10cSrcweir 	"defcur",
66*cdf0e10cSrcweir 	"defdate",
67*cdf0e10cSrcweir 	"defdbl",
68*cdf0e10cSrcweir 	"deferr",
69*cdf0e10cSrcweir 	"defint",
70*cdf0e10cSrcweir 	"deflng",
71*cdf0e10cSrcweir 	"defobj",
72*cdf0e10cSrcweir 	"defsng",
73*cdf0e10cSrcweir 	"defstr",
74*cdf0e10cSrcweir 	"defvar",
75*cdf0e10cSrcweir 	"dim",
76*cdf0e10cSrcweir 	"do",
77*cdf0e10cSrcweir 	"double",
78*cdf0e10cSrcweir 	"each",
79*cdf0e10cSrcweir 	"else",
80*cdf0e10cSrcweir 	"elseif",
81*cdf0e10cSrcweir 	"end",
82*cdf0e10cSrcweir 	"end enum",
83*cdf0e10cSrcweir 	"end function",
84*cdf0e10cSrcweir 	"end if",
85*cdf0e10cSrcweir 	"end select",
86*cdf0e10cSrcweir 	"end sub",
87*cdf0e10cSrcweir 	"end type",
88*cdf0e10cSrcweir 	"endif",
89*cdf0e10cSrcweir 	"enum",
90*cdf0e10cSrcweir 	"eqv",
91*cdf0e10cSrcweir 	"erase",
92*cdf0e10cSrcweir 	"error",
93*cdf0e10cSrcweir 	"exit",
94*cdf0e10cSrcweir 	"explicit",
95*cdf0e10cSrcweir 	"for",
96*cdf0e10cSrcweir 	"function",
97*cdf0e10cSrcweir 	"get",
98*cdf0e10cSrcweir 	"global",
99*cdf0e10cSrcweir 	"gosub",
100*cdf0e10cSrcweir 	"goto",
101*cdf0e10cSrcweir 	"if",
102*cdf0e10cSrcweir 	"imp",
103*cdf0e10cSrcweir 	"implements",
104*cdf0e10cSrcweir 	"in",
105*cdf0e10cSrcweir 	"input",
106*cdf0e10cSrcweir 	"integer",
107*cdf0e10cSrcweir 	"is",
108*cdf0e10cSrcweir 	"let",
109*cdf0e10cSrcweir 	"lib",
110*cdf0e10cSrcweir 	"like",
111*cdf0e10cSrcweir 	"line",
112*cdf0e10cSrcweir 	"line input",
113*cdf0e10cSrcweir 	"local",
114*cdf0e10cSrcweir 	"lock",
115*cdf0e10cSrcweir 	"long",
116*cdf0e10cSrcweir 	"loop",
117*cdf0e10cSrcweir 	"lprint",
118*cdf0e10cSrcweir 	"lset",
119*cdf0e10cSrcweir 	"mod",
120*cdf0e10cSrcweir 	"name",
121*cdf0e10cSrcweir 	"new",
122*cdf0e10cSrcweir 	"next",
123*cdf0e10cSrcweir 	"not",
124*cdf0e10cSrcweir 	"object",
125*cdf0e10cSrcweir 	"on",
126*cdf0e10cSrcweir 	"open",
127*cdf0e10cSrcweir 	"option",
128*cdf0e10cSrcweir 	"optional",
129*cdf0e10cSrcweir 	"or",
130*cdf0e10cSrcweir 	"output",
131*cdf0e10cSrcweir 	"preserve",
132*cdf0e10cSrcweir 	"print",
133*cdf0e10cSrcweir 	"private",
134*cdf0e10cSrcweir 	"property",
135*cdf0e10cSrcweir 	"public",
136*cdf0e10cSrcweir 	"random",
137*cdf0e10cSrcweir 	"read",
138*cdf0e10cSrcweir 	"redim",
139*cdf0e10cSrcweir 	"rem",
140*cdf0e10cSrcweir 	"resume",
141*cdf0e10cSrcweir 	"return",
142*cdf0e10cSrcweir 	"rset",
143*cdf0e10cSrcweir 	"select",
144*cdf0e10cSrcweir 	"set",
145*cdf0e10cSrcweir 	"shared",
146*cdf0e10cSrcweir 	"single",
147*cdf0e10cSrcweir 	"static",
148*cdf0e10cSrcweir 	"step",
149*cdf0e10cSrcweir 	"stop",
150*cdf0e10cSrcweir 	"string",
151*cdf0e10cSrcweir 	"sub",
152*cdf0e10cSrcweir 	"system",
153*cdf0e10cSrcweir 	"text",
154*cdf0e10cSrcweir 	"then",
155*cdf0e10cSrcweir 	"to",
156*cdf0e10cSrcweir 	"type",
157*cdf0e10cSrcweir 	"typeof",
158*cdf0e10cSrcweir 	"until",
159*cdf0e10cSrcweir 	"variant",
160*cdf0e10cSrcweir 	"wend",
161*cdf0e10cSrcweir 	"while",
162*cdf0e10cSrcweir 	"with",
163*cdf0e10cSrcweir 	"write",
164*cdf0e10cSrcweir 	"xor"
165*cdf0e10cSrcweir };
166*cdf0e10cSrcweir 
167*cdf0e10cSrcweir 
168*cdf0e10cSrcweir static const char* strListSqlKeyWords[] = {
169*cdf0e10cSrcweir 	"all",
170*cdf0e10cSrcweir 	"and",
171*cdf0e10cSrcweir 	"any",
172*cdf0e10cSrcweir 	"as",
173*cdf0e10cSrcweir 	"asc",
174*cdf0e10cSrcweir 	"avg",
175*cdf0e10cSrcweir 	"between",
176*cdf0e10cSrcweir 	"by",
177*cdf0e10cSrcweir 	"cast",
178*cdf0e10cSrcweir 	"corresponding",
179*cdf0e10cSrcweir 	"count",
180*cdf0e10cSrcweir 	"create",
181*cdf0e10cSrcweir 	"cross",
182*cdf0e10cSrcweir 	"delete",
183*cdf0e10cSrcweir 	"desc",
184*cdf0e10cSrcweir 	"distinct",
185*cdf0e10cSrcweir 	"drop",
186*cdf0e10cSrcweir 	"escape",
187*cdf0e10cSrcweir 	"except",
188*cdf0e10cSrcweir 	"exists",
189*cdf0e10cSrcweir 	"false",
190*cdf0e10cSrcweir 	"from",
191*cdf0e10cSrcweir 	"full",
192*cdf0e10cSrcweir 	"global",
193*cdf0e10cSrcweir 	"group",
194*cdf0e10cSrcweir 	"having",
195*cdf0e10cSrcweir 	"in",
196*cdf0e10cSrcweir 	"inner",
197*cdf0e10cSrcweir 	"insert",
198*cdf0e10cSrcweir 	"intersect",
199*cdf0e10cSrcweir 	"into",
200*cdf0e10cSrcweir 	"is",
201*cdf0e10cSrcweir 	"join",
202*cdf0e10cSrcweir 	"left",
203*cdf0e10cSrcweir 	"like",
204*cdf0e10cSrcweir 	"local",
205*cdf0e10cSrcweir 	"match",
206*cdf0e10cSrcweir 	"max",
207*cdf0e10cSrcweir 	"min",
208*cdf0e10cSrcweir 	"natural",
209*cdf0e10cSrcweir 	"not",
210*cdf0e10cSrcweir 	"null",
211*cdf0e10cSrcweir 	"on",
212*cdf0e10cSrcweir 	"or",
213*cdf0e10cSrcweir 	"order",
214*cdf0e10cSrcweir 	"outer",
215*cdf0e10cSrcweir 	"right",
216*cdf0e10cSrcweir 	"select",
217*cdf0e10cSrcweir 	"set",
218*cdf0e10cSrcweir 	"some",
219*cdf0e10cSrcweir 	"sum",
220*cdf0e10cSrcweir 	"table",
221*cdf0e10cSrcweir 	"temporary",
222*cdf0e10cSrcweir 	"true",
223*cdf0e10cSrcweir 	"union",
224*cdf0e10cSrcweir 	"unique",
225*cdf0e10cSrcweir 	"unknown",
226*cdf0e10cSrcweir 	"update",
227*cdf0e10cSrcweir 	"using",
228*cdf0e10cSrcweir 	"values",
229*cdf0e10cSrcweir 	"where"
230*cdf0e10cSrcweir };
231*cdf0e10cSrcweir 
232*cdf0e10cSrcweir 
233*cdf0e10cSrcweir extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 )
234*cdf0e10cSrcweir {
235*cdf0e10cSrcweir 	return strcmp( (char *)arg1, *(char **)arg2 );
236*cdf0e10cSrcweir }
237*cdf0e10cSrcweir 
238*cdf0e10cSrcweir 
239*cdf0e10cSrcweir class LetterTable
240*cdf0e10cSrcweir {
241*cdf0e10cSrcweir 	bool		IsLetterTab[256];
242*cdf0e10cSrcweir 
243*cdf0e10cSrcweir public:
244*cdf0e10cSrcweir 	LetterTable( void );
245*cdf0e10cSrcweir 
246*cdf0e10cSrcweir 	inline bool isLetter( sal_Unicode c )
247*cdf0e10cSrcweir 	{
248*cdf0e10cSrcweir 		bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c );
249*cdf0e10cSrcweir 		return bRet;
250*cdf0e10cSrcweir 	}
251*cdf0e10cSrcweir 	bool isLetterUnicode( sal_Unicode c );
252*cdf0e10cSrcweir };
253*cdf0e10cSrcweir 
254*cdf0e10cSrcweir class BasicSimpleCharClass
255*cdf0e10cSrcweir {
256*cdf0e10cSrcweir 	static LetterTable aLetterTable;
257*cdf0e10cSrcweir 
258*cdf0e10cSrcweir public:
259*cdf0e10cSrcweir 	static sal_Bool isAlpha( sal_Unicode c, bool bCompatible )
260*cdf0e10cSrcweir 	{
261*cdf0e10cSrcweir 		sal_Bool bRet = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
262*cdf0e10cSrcweir 					|| (bCompatible && aLetterTable.isLetter( c ));
263*cdf0e10cSrcweir 		return bRet;
264*cdf0e10cSrcweir 	}
265*cdf0e10cSrcweir 
266*cdf0e10cSrcweir 	static sal_Bool isDigit( sal_Unicode c )
267*cdf0e10cSrcweir 	{
268*cdf0e10cSrcweir 		sal_Bool bRet = (c >= '0' && c <= '9');
269*cdf0e10cSrcweir 		return bRet;
270*cdf0e10cSrcweir 	}
271*cdf0e10cSrcweir 
272*cdf0e10cSrcweir 	static sal_Bool isAlphaNumeric( sal_Unicode c, bool bCompatible )
273*cdf0e10cSrcweir 	{
274*cdf0e10cSrcweir 		sal_Bool bRet = isDigit( c ) || isAlpha( c, bCompatible );
275*cdf0e10cSrcweir 		return bRet;
276*cdf0e10cSrcweir 	}
277*cdf0e10cSrcweir };
278*cdf0e10cSrcweir 
279*cdf0e10cSrcweir LetterTable BasicSimpleCharClass::aLetterTable;
280*cdf0e10cSrcweir 
281*cdf0e10cSrcweir LetterTable::LetterTable( void )
282*cdf0e10cSrcweir {
283*cdf0e10cSrcweir 	for( int i = 0 ; i < 256 ; ++i )
284*cdf0e10cSrcweir 		IsLetterTab[i] = false;
285*cdf0e10cSrcweir 
286*cdf0e10cSrcweir 	IsLetterTab[0xC0] = true;	// ?, CAPITAL LETTER A WITH GRAVE ACCENT
287*cdf0e10cSrcweir 	IsLetterTab[0xC1] = true;	// ?, CAPITAL LETTER A WITH ACUTE ACCENT
288*cdf0e10cSrcweir 	IsLetterTab[0xC2] = true;	// ?, CAPITAL LETTER A WITH CIRCUMFLEX ACCENT
289*cdf0e10cSrcweir 	IsLetterTab[0xC3] = true;	// ?, CAPITAL LETTER A WITH TILDE
290*cdf0e10cSrcweir 	IsLetterTab[0xC4] = true;	// ?, CAPITAL LETTER A WITH DIAERESIS
291*cdf0e10cSrcweir 	IsLetterTab[0xC5] = true;	// ?, CAPITAL LETTER A WITH RING ABOVE
292*cdf0e10cSrcweir 	IsLetterTab[0xC6] = true;	// ?, CAPITAL LIGATURE AE
293*cdf0e10cSrcweir 	IsLetterTab[0xC7] = true;	// ?, CAPITAL LETTER C WITH CEDILLA
294*cdf0e10cSrcweir 	IsLetterTab[0xC8] = true;	// ?, CAPITAL LETTER E WITH GRAVE ACCENT
295*cdf0e10cSrcweir 	IsLetterTab[0xC9] = true;	// ?, CAPITAL LETTER E WITH ACUTE ACCENT
296*cdf0e10cSrcweir 	IsLetterTab[0xCA] = true;	// ?, CAPITAL LETTER E WITH CIRCUMFLEX ACCENT
297*cdf0e10cSrcweir 	IsLetterTab[0xCB] = true;	// ?, CAPITAL LETTER E WITH DIAERESIS
298*cdf0e10cSrcweir 	IsLetterTab[0xCC] = true;	// ?, CAPITAL LETTER I WITH GRAVE ACCENT
299*cdf0e10cSrcweir 	IsLetterTab[0xCD] = true;	// ?, CAPITAL LETTER I WITH ACUTE ACCENT
300*cdf0e10cSrcweir 	IsLetterTab[0xCE] = true;	// ?, CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
301*cdf0e10cSrcweir 	IsLetterTab[0xCF] = true;	// ?, CAPITAL LETTER I WITH DIAERESIS
302*cdf0e10cSrcweir 	IsLetterTab[0xD0] = true;	// ?, CAPITAL LETTER ETH
303*cdf0e10cSrcweir 	IsLetterTab[0xD1] = true;	// ?, CAPITAL LETTER N WITH TILDE
304*cdf0e10cSrcweir 	IsLetterTab[0xD2] = true;	// ?, CAPITAL LETTER O WITH GRAVE ACCENT
305*cdf0e10cSrcweir 	IsLetterTab[0xD3] = true;	// ?, CAPITAL LETTER O WITH ACUTE ACCENT
306*cdf0e10cSrcweir 	IsLetterTab[0xD4] = true;	// ?, CAPITAL LETTER O WITH CIRCUMFLEX ACCENT
307*cdf0e10cSrcweir 	IsLetterTab[0xD5] = true;	// ?, CAPITAL LETTER O WITH TILDE
308*cdf0e10cSrcweir 	IsLetterTab[0xD6] = true;	// ?, CAPITAL LETTER O WITH DIAERESIS
309*cdf0e10cSrcweir 	IsLetterTab[0xD8] = true;	// ?, CAPITAL LETTER O WITH STROKE
310*cdf0e10cSrcweir 	IsLetterTab[0xD9] = true;	// ?, CAPITAL LETTER U WITH GRAVE ACCENT
311*cdf0e10cSrcweir 	IsLetterTab[0xDA] = true;	// ?, CAPITAL LETTER U WITH ACUTE ACCENT
312*cdf0e10cSrcweir 	IsLetterTab[0xDB] = true;	// ?, CAPITAL LETTER U WITH CIRCUMFLEX ACCENT
313*cdf0e10cSrcweir 	IsLetterTab[0xDC] = true;	// ?, CAPITAL LETTER U WITH DIAERESIS
314*cdf0e10cSrcweir 	IsLetterTab[0xDD] = true;	// ?, CAPITAL LETTER Y WITH ACUTE ACCENT
315*cdf0e10cSrcweir 	IsLetterTab[0xDE] = true;	// ?, CAPITAL LETTER THORN
316*cdf0e10cSrcweir 	IsLetterTab[0xDF] = true;	// ?, SMALL LETTER SHARP S
317*cdf0e10cSrcweir 	IsLetterTab[0xE0] = true;	// ?, SMALL LETTER A WITH GRAVE ACCENT
318*cdf0e10cSrcweir 	IsLetterTab[0xE1] = true;	// ?, SMALL LETTER A WITH ACUTE ACCENT
319*cdf0e10cSrcweir 	IsLetterTab[0xE2] = true;	// ?, SMALL LETTER A WITH CIRCUMFLEX ACCENT
320*cdf0e10cSrcweir 	IsLetterTab[0xE3] = true;	// ?, SMALL LETTER A WITH TILDE
321*cdf0e10cSrcweir 	IsLetterTab[0xE4] = true;	// ?, SMALL LETTER A WITH DIAERESIS
322*cdf0e10cSrcweir 	IsLetterTab[0xE5] = true;	// ?, SMALL LETTER A WITH RING ABOVE
323*cdf0e10cSrcweir 	IsLetterTab[0xE6] = true;	// ?, SMALL LIGATURE AE
324*cdf0e10cSrcweir 	IsLetterTab[0xE7] = true;	// ?, SMALL LETTER C WITH CEDILLA
325*cdf0e10cSrcweir 	IsLetterTab[0xE8] = true;	// ?, SMALL LETTER E WITH GRAVE ACCENT
326*cdf0e10cSrcweir 	IsLetterTab[0xE9] = true;	// ?, SMALL LETTER E WITH ACUTE ACCENT
327*cdf0e10cSrcweir 	IsLetterTab[0xEA] = true;	// ?, SMALL LETTER E WITH CIRCUMFLEX ACCENT
328*cdf0e10cSrcweir 	IsLetterTab[0xEB] = true;	// ?, SMALL LETTER E WITH DIAERESIS
329*cdf0e10cSrcweir 	IsLetterTab[0xEC] = true;	// ?, SMALL LETTER I WITH GRAVE ACCENT
330*cdf0e10cSrcweir 	IsLetterTab[0xED] = true;	// ?, SMALL LETTER I WITH ACUTE ACCENT
331*cdf0e10cSrcweir 	IsLetterTab[0xEE] = true;	// ?, SMALL LETTER I WITH CIRCUMFLEX ACCENT
332*cdf0e10cSrcweir 	IsLetterTab[0xEF] = true;	// ?, SMALL LETTER I WITH DIAERESIS
333*cdf0e10cSrcweir 	IsLetterTab[0xF0] = true;	// ?, SMALL LETTER ETH
334*cdf0e10cSrcweir 	IsLetterTab[0xF1] = true;	// ?, SMALL LETTER N WITH TILDE
335*cdf0e10cSrcweir 	IsLetterTab[0xF2] = true;	// ?, SMALL LETTER O WITH GRAVE ACCENT
336*cdf0e10cSrcweir 	IsLetterTab[0xF3] = true;	// ?, SMALL LETTER O WITH ACUTE ACCENT
337*cdf0e10cSrcweir 	IsLetterTab[0xF4] = true;	// ?, SMALL LETTER O WITH CIRCUMFLEX ACCENT
338*cdf0e10cSrcweir 	IsLetterTab[0xF5] = true;	// ?, SMALL LETTER O WITH TILDE
339*cdf0e10cSrcweir 	IsLetterTab[0xF6] = true;	// ?, SMALL LETTER O WITH DIAERESIS
340*cdf0e10cSrcweir 	IsLetterTab[0xF8] = true;	// ?, SMALL LETTER O WITH OBLIQUE BAR
341*cdf0e10cSrcweir 	IsLetterTab[0xF9] = true;	// ?, SMALL LETTER U WITH GRAVE ACCENT
342*cdf0e10cSrcweir 	IsLetterTab[0xFA] = true;	// ?, SMALL LETTER U WITH ACUTE ACCENT
343*cdf0e10cSrcweir 	IsLetterTab[0xFB] = true;	// ?, SMALL LETTER U WITH CIRCUMFLEX ACCENT
344*cdf0e10cSrcweir 	IsLetterTab[0xFC] = true;	// ?, SMALL LETTER U WITH DIAERESIS
345*cdf0e10cSrcweir 	IsLetterTab[0xFD] = true;	// ?, SMALL LETTER Y WITH ACUTE ACCENT
346*cdf0e10cSrcweir 	IsLetterTab[0xFE] = true;	// ?, SMALL LETTER THORN
347*cdf0e10cSrcweir 	IsLetterTab[0xFF] = true;	// � , SMALL LETTER Y WITH DIAERESIS
348*cdf0e10cSrcweir }
349*cdf0e10cSrcweir 
350*cdf0e10cSrcweir bool LetterTable::isLetterUnicode( sal_Unicode c )
351*cdf0e10cSrcweir {
352*cdf0e10cSrcweir 	static CharClass* pCharClass = NULL;
353*cdf0e10cSrcweir 	if( pCharClass == NULL )
354*cdf0e10cSrcweir 		pCharClass = new CharClass( Application::GetSettings().GetLocale() );
355*cdf0e10cSrcweir 	String aStr( c );
356*cdf0e10cSrcweir 	bool bRet = pCharClass->isLetter( aStr, 0 );
357*cdf0e10cSrcweir 	return bRet;
358*cdf0e10cSrcweir }
359*cdf0e10cSrcweir 
360*cdf0e10cSrcweir // Hilfsfunktion: Zeichen-Flag Testen
361*cdf0e10cSrcweir sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags )
362*cdf0e10cSrcweir {
363*cdf0e10cSrcweir 	bool bRet = false;
364*cdf0e10cSrcweir 	if( c != 0 && c <= 255 )
365*cdf0e10cSrcweir 	{
366*cdf0e10cSrcweir 		bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 );
367*cdf0e10cSrcweir 	}
368*cdf0e10cSrcweir 	else if( c > 255 )
369*cdf0e10cSrcweir 	{
370*cdf0e10cSrcweir 		bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0
371*cdf0e10cSrcweir 			? BasicSimpleCharClass::isAlpha( c, true ) : false;
372*cdf0e10cSrcweir 	}
373*cdf0e10cSrcweir 	return bRet;
374*cdf0e10cSrcweir }
375*cdf0e10cSrcweir 
376*cdf0e10cSrcweir void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount )
377*cdf0e10cSrcweir {
378*cdf0e10cSrcweir 	ppListKeyWords = ppKeyWords;
379*cdf0e10cSrcweir 	nKeyWordCount = nCount;
380*cdf0e10cSrcweir }
381*cdf0e10cSrcweir 
382*cdf0e10cSrcweir // Neues Token holen
383*cdf0e10cSrcweir sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
384*cdf0e10cSrcweir 	/*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos )
385*cdf0e10cSrcweir {
386*cdf0e10cSrcweir 	reType = TT_UNKNOWN;
387*cdf0e10cSrcweir 
388*cdf0e10cSrcweir 	// Position merken
389*cdf0e10cSrcweir 	rpStartPos = mpActualPos;
390*cdf0e10cSrcweir 
391*cdf0e10cSrcweir 	// Zeichen untersuchen
392*cdf0e10cSrcweir 	sal_Unicode c = peekChar();
393*cdf0e10cSrcweir 	if( c == CHAR_EOF )
394*cdf0e10cSrcweir 		return sal_False;
395*cdf0e10cSrcweir 
396*cdf0e10cSrcweir 	// Zeichen lesen
397*cdf0e10cSrcweir 	getChar();
398*cdf0e10cSrcweir 
399*cdf0e10cSrcweir 	//*** Alle Moeglichkeiten durchgehen ***
400*cdf0e10cSrcweir 	// Space?
401*cdf0e10cSrcweir 	if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) )
402*cdf0e10cSrcweir 	{
403*cdf0e10cSrcweir 		while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True )
404*cdf0e10cSrcweir 			getChar();
405*cdf0e10cSrcweir 
406*cdf0e10cSrcweir 		reType = TT_WHITESPACE;
407*cdf0e10cSrcweir 	}
408*cdf0e10cSrcweir 
409*cdf0e10cSrcweir 	// Identifier?
410*cdf0e10cSrcweir 	else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) )
411*cdf0e10cSrcweir 	{
412*cdf0e10cSrcweir 		sal_Bool bIdentifierChar;
413*cdf0e10cSrcweir 		do
414*cdf0e10cSrcweir 		{
415*cdf0e10cSrcweir 			// Naechstes Zeichen holen
416*cdf0e10cSrcweir 			c = peekChar();
417*cdf0e10cSrcweir 			bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER );
418*cdf0e10cSrcweir 			if( bIdentifierChar )
419*cdf0e10cSrcweir 				getChar();
420*cdf0e10cSrcweir 		}
421*cdf0e10cSrcweir 		while( bIdentifierChar );
422*cdf0e10cSrcweir 
423*cdf0e10cSrcweir 		reType = TT_IDENTIFIER;
424*cdf0e10cSrcweir 
425*cdf0e10cSrcweir 		// Schluesselwort-Tabelle
426*cdf0e10cSrcweir 		if (ppListKeyWords != NULL)
427*cdf0e10cSrcweir 		{
428*cdf0e10cSrcweir 			int nCount = mpActualPos - rpStartPos;
429*cdf0e10cSrcweir 
430*cdf0e10cSrcweir 			// No keyword if string contains char > 255
431*cdf0e10cSrcweir 			bool bCanBeKeyword = true;
432*cdf0e10cSrcweir 			for( int i = 0 ; i < nCount ; i++ )
433*cdf0e10cSrcweir 			{
434*cdf0e10cSrcweir 				if( rpStartPos[i] > 255 )
435*cdf0e10cSrcweir 				{
436*cdf0e10cSrcweir 					bCanBeKeyword = false;
437*cdf0e10cSrcweir 					break;
438*cdf0e10cSrcweir 				}
439*cdf0e10cSrcweir 			}
440*cdf0e10cSrcweir 
441*cdf0e10cSrcweir 			if( bCanBeKeyword )
442*cdf0e10cSrcweir 			{
443*cdf0e10cSrcweir 				String aKWString(rpStartPos, sal::static_int_cast< xub_StrLen >(nCount) );
444*cdf0e10cSrcweir 				ByteString aByteStr( aKWString, RTL_TEXTENCODING_ASCII_US );
445*cdf0e10cSrcweir 				aByteStr.ToLowerAscii();
446*cdf0e10cSrcweir 				if ( bsearch( aByteStr.GetBuffer(), ppListKeyWords, nKeyWordCount, sizeof( char* ),
447*cdf0e10cSrcweir 																		compare_strings ) )
448*cdf0e10cSrcweir 				{
449*cdf0e10cSrcweir 					reType = TT_KEYWORDS;
450*cdf0e10cSrcweir 
451*cdf0e10cSrcweir 					if ( aByteStr.Equals( "rem" ) )
452*cdf0e10cSrcweir 					{
453*cdf0e10cSrcweir 						// Alle Zeichen bis Zeilen-Ende oder EOF entfernen
454*cdf0e10cSrcweir 						sal_Unicode cPeek = peekChar();
455*cdf0e10cSrcweir 						while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
456*cdf0e10cSrcweir 						{
457*cdf0e10cSrcweir 							c = getChar();
458*cdf0e10cSrcweir 							cPeek = peekChar();
459*cdf0e10cSrcweir 						}
460*cdf0e10cSrcweir 
461*cdf0e10cSrcweir 						reType = TT_COMMENT;
462*cdf0e10cSrcweir 					}
463*cdf0e10cSrcweir 				}
464*cdf0e10cSrcweir 			}
465*cdf0e10cSrcweir 		}
466*cdf0e10cSrcweir 	}
467*cdf0e10cSrcweir 
468*cdf0e10cSrcweir 	// Operator?
469*cdf0e10cSrcweir 	// only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there
470*cdf0e10cSrcweir 	else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) )
471*cdf0e10cSrcweir 	{
472*cdf0e10cSrcweir 		// paramters for SQL view
473*cdf0e10cSrcweir 		if ( (c==':') || (c=='?'))
474*cdf0e10cSrcweir 		{
475*cdf0e10cSrcweir 			if (c!='?')
476*cdf0e10cSrcweir 			{
477*cdf0e10cSrcweir 				sal_Bool bIdentifierChar;
478*cdf0e10cSrcweir 				do
479*cdf0e10cSrcweir 				{
480*cdf0e10cSrcweir 					// Naechstes Zeichen holen
481*cdf0e10cSrcweir 					c = peekChar();
482*cdf0e10cSrcweir 					bIdentifierChar =  BasicSimpleCharClass::isAlpha( c, true );
483*cdf0e10cSrcweir 					if( bIdentifierChar )
484*cdf0e10cSrcweir 						getChar();
485*cdf0e10cSrcweir 				}
486*cdf0e10cSrcweir 				while( bIdentifierChar );
487*cdf0e10cSrcweir 			}
488*cdf0e10cSrcweir 			reType = TT_PARAMETER;
489*cdf0e10cSrcweir 		}
490*cdf0e10cSrcweir 		else if ((c=='-'))
491*cdf0e10cSrcweir 		{
492*cdf0e10cSrcweir 			sal_Unicode cPeekNext = peekChar();
493*cdf0e10cSrcweir 			if (cPeekNext=='-')
494*cdf0e10cSrcweir 			{
495*cdf0e10cSrcweir 				// Alle Zeichen bis Zeilen-Ende oder EOF entfernen
496*cdf0e10cSrcweir 				while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
497*cdf0e10cSrcweir 				{
498*cdf0e10cSrcweir 					getChar();
499*cdf0e10cSrcweir 					cPeekNext = peekChar();
500*cdf0e10cSrcweir 				}
501*cdf0e10cSrcweir 				reType = TT_COMMENT;
502*cdf0e10cSrcweir 			}
503*cdf0e10cSrcweir 		}
504*cdf0e10cSrcweir        else if (c=='/')
505*cdf0e10cSrcweir        {
506*cdf0e10cSrcweir            sal_Unicode cPeekNext = peekChar();
507*cdf0e10cSrcweir            if (cPeekNext=='/')
508*cdf0e10cSrcweir            {
509*cdf0e10cSrcweir                // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
510*cdf0e10cSrcweir                while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
511*cdf0e10cSrcweir                {
512*cdf0e10cSrcweir                    getChar();
513*cdf0e10cSrcweir                    cPeekNext = peekChar();
514*cdf0e10cSrcweir                }
515*cdf0e10cSrcweir                reType = TT_COMMENT;
516*cdf0e10cSrcweir            }
517*cdf0e10cSrcweir        }
518*cdf0e10cSrcweir 		else
519*cdf0e10cSrcweir 		{
520*cdf0e10cSrcweir 			// Kommentar ?
521*cdf0e10cSrcweir 			if ( c == '\'' )
522*cdf0e10cSrcweir 			{
523*cdf0e10cSrcweir 				c = getChar();	// '/' entfernen
524*cdf0e10cSrcweir 
525*cdf0e10cSrcweir 				// Alle Zeichen bis Zeilen-Ende oder EOF entfernen
526*cdf0e10cSrcweir 				sal_Unicode cPeek = c;
527*cdf0e10cSrcweir 				while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
528*cdf0e10cSrcweir 				{
529*cdf0e10cSrcweir 					getChar();
530*cdf0e10cSrcweir 					cPeek = peekChar();
531*cdf0e10cSrcweir 				}
532*cdf0e10cSrcweir 
533*cdf0e10cSrcweir 				reType = TT_COMMENT;
534*cdf0e10cSrcweir 			}
535*cdf0e10cSrcweir 
536*cdf0e10cSrcweir 			// Echter Operator, kann hier einfach behandelt werden,
537*cdf0e10cSrcweir 			// da nicht der wirkliche Operator, wie z.B. += interessiert,
538*cdf0e10cSrcweir 			// sondern nur die Tatsache, dass es sich um einen handelt.
539*cdf0e10cSrcweir 			if( reType != TT_COMMENT )
540*cdf0e10cSrcweir 			{
541*cdf0e10cSrcweir 				reType = TT_OPERATOR;
542*cdf0e10cSrcweir 			}
543*cdf0e10cSrcweir 
544*cdf0e10cSrcweir 		}
545*cdf0e10cSrcweir 	}
546*cdf0e10cSrcweir 
547*cdf0e10cSrcweir 	// Objekt-Trenner? Muss vor Number abgehandelt werden
548*cdf0e10cSrcweir 	else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) )
549*cdf0e10cSrcweir 	{
550*cdf0e10cSrcweir 		reType = TT_OPERATOR;
551*cdf0e10cSrcweir 	}
552*cdf0e10cSrcweir 
553*cdf0e10cSrcweir 	// Zahl?
554*cdf0e10cSrcweir 	else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True )
555*cdf0e10cSrcweir 	{
556*cdf0e10cSrcweir 		reType = TT_NUMBER;
557*cdf0e10cSrcweir 
558*cdf0e10cSrcweir 		// Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert
559*cdf0e10cSrcweir 		int nRadix = 10;
560*cdf0e10cSrcweir 
561*cdf0e10cSrcweir 		// Ist es eine Hex- oder Oct-Zahl?
562*cdf0e10cSrcweir 		if( c == '&' )
563*cdf0e10cSrcweir 		{
564*cdf0e10cSrcweir 			// Octal?
565*cdf0e10cSrcweir 			if( peekChar() == 'o' || peekChar() == 'O' )
566*cdf0e10cSrcweir 			{
567*cdf0e10cSrcweir 				// o entfernen
568*cdf0e10cSrcweir 				getChar();
569*cdf0e10cSrcweir 				nRadix = 8; 	// Octal-Basis
570*cdf0e10cSrcweir 
571*cdf0e10cSrcweir 				// Alle Ziffern einlesen
572*cdf0e10cSrcweir 				while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) )
573*cdf0e10cSrcweir 					c = getChar();
574*cdf0e10cSrcweir 			}
575*cdf0e10cSrcweir 			// Hex?
576*cdf0e10cSrcweir 			else if( peekChar() == 'h' || peekChar() == 'H' )
577*cdf0e10cSrcweir 			{
578*cdf0e10cSrcweir 				// x entfernen
579*cdf0e10cSrcweir 				getChar();
580*cdf0e10cSrcweir 				nRadix = 16;	 // Hex-Basis
581*cdf0e10cSrcweir 
582*cdf0e10cSrcweir 				// Alle Ziffern einlesen und puffern
583*cdf0e10cSrcweir 				while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) )
584*cdf0e10cSrcweir 					c = getChar();
585*cdf0e10cSrcweir 			}
586*cdf0e10cSrcweir 			else
587*cdf0e10cSrcweir 			{
588*cdf0e10cSrcweir 				reType = TT_OPERATOR;
589*cdf0e10cSrcweir 			}
590*cdf0e10cSrcweir 		}
591*cdf0e10cSrcweir 
592*cdf0e10cSrcweir 		// Wenn nicht Oct oder Hex als double ansehen
593*cdf0e10cSrcweir 		if( reType == TT_NUMBER && nRadix == 10 )
594*cdf0e10cSrcweir 		{
595*cdf0e10cSrcweir 			// Flag, ob das letzte Zeichen ein Exponent war
596*cdf0e10cSrcweir 			sal_Bool bAfterExpChar = sal_False;
597*cdf0e10cSrcweir 
598*cdf0e10cSrcweir 			// Alle Ziffern einlesen
599*cdf0e10cSrcweir 			while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) ||
600*cdf0e10cSrcweir 					(bAfterExpChar && peekChar() == '+' ) ||
601*cdf0e10cSrcweir 					(bAfterExpChar && peekChar() == '-' ) )
602*cdf0e10cSrcweir 					// Nach Exponent auch +/- OK
603*cdf0e10cSrcweir 			{
604*cdf0e10cSrcweir 				c = getChar();					// Zeichen lesen
605*cdf0e10cSrcweir 				bAfterExpChar = ( c == 'e' || c == 'E' );
606*cdf0e10cSrcweir 			}
607*cdf0e10cSrcweir 		}
608*cdf0e10cSrcweir 
609*cdf0e10cSrcweir 		// reType = TT_NUMBER;
610*cdf0e10cSrcweir 	}
611*cdf0e10cSrcweir 
612*cdf0e10cSrcweir 	// String?
613*cdf0e10cSrcweir 	else if( testCharFlags( c, CHAR_START_STRING ) == sal_True )
614*cdf0e10cSrcweir 	{
615*cdf0e10cSrcweir 		// Merken, welches Zeichen den String eroeffnet hat
616*cdf0e10cSrcweir 		sal_Unicode cEndString = c;
617*cdf0e10cSrcweir 		if( c == '[' )
618*cdf0e10cSrcweir 			cEndString = ']';
619*cdf0e10cSrcweir 
620*cdf0e10cSrcweir 		// Alle Ziffern einlesen und puffern
621*cdf0e10cSrcweir 		while( peekChar() != cEndString )
622*cdf0e10cSrcweir 		{
623*cdf0e10cSrcweir 			// #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht
624*cdf0e10cSrcweir 			if( peekChar() == CHAR_EOF )
625*cdf0e10cSrcweir 			{
626*cdf0e10cSrcweir 				// ERROR: unterminated string literal
627*cdf0e10cSrcweir 				reType = TT_ERROR;
628*cdf0e10cSrcweir 				break;
629*cdf0e10cSrcweir 			}
630*cdf0e10cSrcweir 			c = getChar();
631*cdf0e10cSrcweir 			if( testCharFlags( c, CHAR_EOL ) == sal_True )
632*cdf0e10cSrcweir 			{
633*cdf0e10cSrcweir 				// ERROR: unterminated string literal
634*cdf0e10cSrcweir 				reType = TT_ERROR;
635*cdf0e10cSrcweir 				break;
636*cdf0e10cSrcweir 			}
637*cdf0e10cSrcweir 		}
638*cdf0e10cSrcweir 
639*cdf0e10cSrcweir 		//	Zeichen lesen
640*cdf0e10cSrcweir 		if( reType != TT_ERROR )
641*cdf0e10cSrcweir 		{
642*cdf0e10cSrcweir 			getChar();
643*cdf0e10cSrcweir 			if( cEndString == ']' )
644*cdf0e10cSrcweir 				reType = TT_IDENTIFIER;
645*cdf0e10cSrcweir 			else
646*cdf0e10cSrcweir 				reType = TT_STRING;
647*cdf0e10cSrcweir 		}
648*cdf0e10cSrcweir 	}
649*cdf0e10cSrcweir 
650*cdf0e10cSrcweir 	// Zeilenende?
651*cdf0e10cSrcweir 	else if( testCharFlags( c, CHAR_EOL ) == sal_True )
652*cdf0e10cSrcweir 	{
653*cdf0e10cSrcweir 		// Falls ein weiteres anderes EOL-Char folgt, weg damit
654*cdf0e10cSrcweir 		sal_Unicode cNext = peekChar();
655*cdf0e10cSrcweir 		if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True )
656*cdf0e10cSrcweir 			getChar();
657*cdf0e10cSrcweir 
658*cdf0e10cSrcweir 		// Positions-Daten auf Zeilen-Beginn setzen
659*cdf0e10cSrcweir 		nCol = 0;
660*cdf0e10cSrcweir 		nLine++;
661*cdf0e10cSrcweir 
662*cdf0e10cSrcweir 		reType = TT_EOL;
663*cdf0e10cSrcweir 	}
664*cdf0e10cSrcweir 
665*cdf0e10cSrcweir 	// Alles andere bleibt TT_UNKNOWN
666*cdf0e10cSrcweir 
667*cdf0e10cSrcweir 
668*cdf0e10cSrcweir 	// End-Position eintragen
669*cdf0e10cSrcweir 	rpEndPos = mpActualPos;
670*cdf0e10cSrcweir 	return sal_True;
671*cdf0e10cSrcweir }
672*cdf0e10cSrcweir 
673*cdf0e10cSrcweir String SimpleTokenizer_Impl::getTokStr
674*cdf0e10cSrcweir 	( /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos )
675*cdf0e10cSrcweir {
676*cdf0e10cSrcweir 	return String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) );
677*cdf0e10cSrcweir }
678*cdf0e10cSrcweir 
679*cdf0e10cSrcweir #ifdef DBG_UTIL
680*cdf0e10cSrcweir // TEST: Token ausgeben
681*cdf0e10cSrcweir String SimpleTokenizer_Impl::getFullTokenStr( /*out*/TokenTypes eType,
682*cdf0e10cSrcweir 	/*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos )
683*cdf0e10cSrcweir {
684*cdf0e10cSrcweir 	String aOut;
685*cdf0e10cSrcweir 	switch( eType )
686*cdf0e10cSrcweir 	{
687*cdf0e10cSrcweir 		case TT_UNKNOWN:	aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_UNKNOWN:") ); break;
688*cdf0e10cSrcweir 		case TT_IDENTIFIER:	aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_IDENTIFIER:") ); break;
689*cdf0e10cSrcweir 		case TT_WHITESPACE:	aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_WHITESPACE:") ); break;
690*cdf0e10cSrcweir 		case TT_NUMBER:		aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_NUMBER:") ); break;
691*cdf0e10cSrcweir 		case TT_STRING:		aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_STRING:") ); break;
692*cdf0e10cSrcweir 		case TT_EOL:		aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_EOL:") ); break;
693*cdf0e10cSrcweir 		case TT_COMMENT:	aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_COMMENT:") ); break;
694*cdf0e10cSrcweir 		case TT_ERROR:		aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_ERROR:") ); break;
695*cdf0e10cSrcweir 		case TT_OPERATOR:	aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_OPERATOR:") ); break;
696*cdf0e10cSrcweir 		case TT_KEYWORDS:	aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_KEYWORD:") ); break;
697*cdf0e10cSrcweir 		case TT_PARAMETER:	aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_PARAMETER:") ); break;
698*cdf0e10cSrcweir 	}
699*cdf0e10cSrcweir 	if( eType != TT_EOL )
700*cdf0e10cSrcweir 	{
701*cdf0e10cSrcweir 		aOut += String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) );
702*cdf0e10cSrcweir 	}
703*cdf0e10cSrcweir 	aOut += String( RTL_CONSTASCII_USTRINGPARAM("\n") );
704*cdf0e10cSrcweir 	return aOut;
705*cdf0e10cSrcweir }
706*cdf0e10cSrcweir #endif
707*cdf0e10cSrcweir 
708*cdf0e10cSrcweir SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang)
709*cdf0e10cSrcweir {
710*cdf0e10cSrcweir 	memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) );
711*cdf0e10cSrcweir 
712*cdf0e10cSrcweir 	// Zeichen-Tabelle fuellen
713*cdf0e10cSrcweir 	sal_uInt16 i;
714*cdf0e10cSrcweir 
715*cdf0e10cSrcweir 	// Zulaessige Zeichen fuer Identifier
716*cdf0e10cSrcweir 	sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER );
717*cdf0e10cSrcweir 	for( i = 'a' ; i <= 'z' ; i++ )
718*cdf0e10cSrcweir 		aCharTypeTab[i] |= nHelpMask;
719*cdf0e10cSrcweir 	for( i = 'A' ; i <= 'Z' ; i++ )
720*cdf0e10cSrcweir 		aCharTypeTab[i] |= nHelpMask;
721*cdf0e10cSrcweir 	// '_' extra eintragen
722*cdf0e10cSrcweir 	aCharTypeTab[(int)'_'] |= nHelpMask;
723*cdf0e10cSrcweir 	// AB 23.6.97: '$' ist auch erlaubt
724*cdf0e10cSrcweir 	aCharTypeTab[(int)'$'] |= nHelpMask;
725*cdf0e10cSrcweir 
726*cdf0e10cSrcweir 	// Ziffern (Identifier und Number ist moeglich)
727*cdf0e10cSrcweir 	nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER |
728*cdf0e10cSrcweir 						 CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER );
729*cdf0e10cSrcweir 	for( i = '0' ; i <= '9' ; i++ )
730*cdf0e10cSrcweir 		aCharTypeTab[i] |= nHelpMask;
731*cdf0e10cSrcweir 
732*cdf0e10cSrcweir 	// e und E sowie . von Hand ergaenzen
733*cdf0e10cSrcweir 	aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER;
734*cdf0e10cSrcweir 	aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER;
735*cdf0e10cSrcweir 	aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER );
736*cdf0e10cSrcweir 	aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER;
737*cdf0e10cSrcweir 
738*cdf0e10cSrcweir 	// Hex-Ziffern
739*cdf0e10cSrcweir 	for( i = 'a' ; i <= 'f' ; i++ )
740*cdf0e10cSrcweir 		aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
741*cdf0e10cSrcweir 	for( i = 'A' ; i <= 'F' ; i++ )
742*cdf0e10cSrcweir 		aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
743*cdf0e10cSrcweir 
744*cdf0e10cSrcweir 	// Oct-Ziffern
745*cdf0e10cSrcweir 	for( i = '0' ; i <= '7' ; i++ )
746*cdf0e10cSrcweir 		aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER;
747*cdf0e10cSrcweir 
748*cdf0e10cSrcweir 	// String-Beginn/End-Zeichen
749*cdf0e10cSrcweir 	aCharTypeTab[(int)'\''] |= CHAR_START_STRING;
750*cdf0e10cSrcweir 	aCharTypeTab[(int)'\"'] |= CHAR_START_STRING;
751*cdf0e10cSrcweir 	aCharTypeTab[(int)'[']  |= CHAR_START_STRING;
752*cdf0e10cSrcweir 	aCharTypeTab[(int)'`']  |= CHAR_START_STRING;
753*cdf0e10cSrcweir 
754*cdf0e10cSrcweir 	// Operator-Zeichen
755*cdf0e10cSrcweir 	aCharTypeTab[(int)'!'] |= CHAR_OPERATOR;
756*cdf0e10cSrcweir 	aCharTypeTab[(int)'%'] |= CHAR_OPERATOR;
757*cdf0e10cSrcweir 	// aCharTypeTab[(int)'&'] |= CHAR_OPERATOR;		Removed because of #i14140
758*cdf0e10cSrcweir 	aCharTypeTab[(int)'('] |= CHAR_OPERATOR;
759*cdf0e10cSrcweir 	aCharTypeTab[(int)')'] |= CHAR_OPERATOR;
760*cdf0e10cSrcweir 	aCharTypeTab[(int)'*'] |= CHAR_OPERATOR;
761*cdf0e10cSrcweir 	aCharTypeTab[(int)'+'] |= CHAR_OPERATOR;
762*cdf0e10cSrcweir 	aCharTypeTab[(int)','] |= CHAR_OPERATOR;
763*cdf0e10cSrcweir 	aCharTypeTab[(int)'-'] |= CHAR_OPERATOR;
764*cdf0e10cSrcweir 	aCharTypeTab[(int)'/'] |= CHAR_OPERATOR;
765*cdf0e10cSrcweir 	aCharTypeTab[(int)':'] |= CHAR_OPERATOR;
766*cdf0e10cSrcweir 	aCharTypeTab[(int)'<'] |= CHAR_OPERATOR;
767*cdf0e10cSrcweir 	aCharTypeTab[(int)'='] |= CHAR_OPERATOR;
768*cdf0e10cSrcweir 	aCharTypeTab[(int)'>'] |= CHAR_OPERATOR;
769*cdf0e10cSrcweir 	aCharTypeTab[(int)'?'] |= CHAR_OPERATOR;
770*cdf0e10cSrcweir 	aCharTypeTab[(int)'^'] |= CHAR_OPERATOR;
771*cdf0e10cSrcweir 	aCharTypeTab[(int)'|'] |= CHAR_OPERATOR;
772*cdf0e10cSrcweir 	aCharTypeTab[(int)'~'] |= CHAR_OPERATOR;
773*cdf0e10cSrcweir 	aCharTypeTab[(int)'{'] |= CHAR_OPERATOR;
774*cdf0e10cSrcweir 	aCharTypeTab[(int)'}'] |= CHAR_OPERATOR;
775*cdf0e10cSrcweir 	// aCharTypeTab[(int)'['] |= CHAR_OPERATOR;		Removed because of #i17826
776*cdf0e10cSrcweir 	aCharTypeTab[(int)']'] |= CHAR_OPERATOR;
777*cdf0e10cSrcweir 	aCharTypeTab[(int)';'] |= CHAR_OPERATOR;
778*cdf0e10cSrcweir 
779*cdf0e10cSrcweir 	// Space
780*cdf0e10cSrcweir 	aCharTypeTab[(int)' ' ] |= CHAR_SPACE;
781*cdf0e10cSrcweir 	aCharTypeTab[(int)'\t'] |= CHAR_SPACE;
782*cdf0e10cSrcweir 
783*cdf0e10cSrcweir 	// Zeilen-Ende-Zeichen
784*cdf0e10cSrcweir 	aCharTypeTab[(int)'\r'] |= CHAR_EOL;
785*cdf0e10cSrcweir 	aCharTypeTab[(int)'\n'] |= CHAR_EOL;
786*cdf0e10cSrcweir 
787*cdf0e10cSrcweir 	ppListKeyWords = NULL;
788*cdf0e10cSrcweir }
789*cdf0e10cSrcweir 
790*cdf0e10cSrcweir SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void )
791*cdf0e10cSrcweir {
792*cdf0e10cSrcweir }
793*cdf0e10cSrcweir 
794*cdf0e10cSrcweir SimpleTokenizer_Impl* getSimpleTokenizer( void )
795*cdf0e10cSrcweir {
796*cdf0e10cSrcweir 	static SimpleTokenizer_Impl* pSimpleTokenizer = NULL;
797*cdf0e10cSrcweir 	if( !pSimpleTokenizer )
798*cdf0e10cSrcweir 		pSimpleTokenizer = new SimpleTokenizer_Impl();
799*cdf0e10cSrcweir 	return pSimpleTokenizer;
800*cdf0e10cSrcweir }
801*cdf0e10cSrcweir 
802*cdf0e10cSrcweir // Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul
803*cdf0e10cSrcweir sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const String* aSource )
804*cdf0e10cSrcweir {
805*cdf0e10cSrcweir 	// Position auf den Anfang des Source-Strings setzen
806*cdf0e10cSrcweir 	mpStringBegin = mpActualPos = aSource->GetBuffer();
807*cdf0e10cSrcweir 
808*cdf0e10cSrcweir 	// Zeile und Spalte initialisieren
809*cdf0e10cSrcweir 	nLine = nParseLine;
810*cdf0e10cSrcweir 	nCol = 0L;
811*cdf0e10cSrcweir 
812*cdf0e10cSrcweir 	// Variablen fuer die Out-Parameter
813*cdf0e10cSrcweir 	TokenTypes eType;
814*cdf0e10cSrcweir 	const sal_Unicode* pStartPos;
815*cdf0e10cSrcweir 	const sal_Unicode* pEndPos;
816*cdf0e10cSrcweir 
817*cdf0e10cSrcweir 	// Schleife ueber alle Tokens
818*cdf0e10cSrcweir 	sal_uInt16 nTokenCount = 0;
819*cdf0e10cSrcweir 	while( getNextToken( eType, pStartPos, pEndPos ) )
820*cdf0e10cSrcweir 		nTokenCount++;
821*cdf0e10cSrcweir 
822*cdf0e10cSrcweir 	return nTokenCount;
823*cdf0e10cSrcweir }
824*cdf0e10cSrcweir 
825*cdf0e10cSrcweir void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const String& rLine,
826*cdf0e10cSrcweir 													/*out*/HighlightPortions& portions  )
827*cdf0e10cSrcweir {
828*cdf0e10cSrcweir 	// Position auf den Anfang des Source-Strings setzen
829*cdf0e10cSrcweir 	mpStringBegin = mpActualPos = rLine.GetBuffer();
830*cdf0e10cSrcweir 
831*cdf0e10cSrcweir 	// Zeile und Spalte initialisieren
832*cdf0e10cSrcweir 	nLine = nParseLine;
833*cdf0e10cSrcweir 	nCol = 0L;
834*cdf0e10cSrcweir 
835*cdf0e10cSrcweir 	// Variablen fuer die Out-Parameter
836*cdf0e10cSrcweir 	TokenTypes eType;
837*cdf0e10cSrcweir 	const sal_Unicode* pStartPos;
838*cdf0e10cSrcweir 	const sal_Unicode* pEndPos;
839*cdf0e10cSrcweir 
840*cdf0e10cSrcweir 	// Schleife ueber alle Tokens
841*cdf0e10cSrcweir 	while( getNextToken( eType, pStartPos, pEndPos ) )
842*cdf0e10cSrcweir 	{
843*cdf0e10cSrcweir 		HighlightPortion portion;
844*cdf0e10cSrcweir 
845*cdf0e10cSrcweir 		portion.nBegin = (sal_uInt16)(pStartPos - mpStringBegin);
846*cdf0e10cSrcweir 		portion.nEnd = (sal_uInt16)(pEndPos - mpStringBegin);
847*cdf0e10cSrcweir 		portion.tokenType = eType;
848*cdf0e10cSrcweir 
849*cdf0e10cSrcweir         portions.push_back(portion);
850*cdf0e10cSrcweir 	}
851*cdf0e10cSrcweir }
852*cdf0e10cSrcweir 
853*cdf0e10cSrcweir 
854*cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////////
855*cdf0e10cSrcweir // Implementierung des SyntaxHighlighter
856*cdf0e10cSrcweir 
857*cdf0e10cSrcweir SyntaxHighlighter::SyntaxHighlighter()
858*cdf0e10cSrcweir {
859*cdf0e10cSrcweir 	m_pSimpleTokenizer = 0;
860*cdf0e10cSrcweir 	m_pKeyWords = NULL;
861*cdf0e10cSrcweir 	m_nKeyWordCount = 0;
862*cdf0e10cSrcweir }
863*cdf0e10cSrcweir 
864*cdf0e10cSrcweir SyntaxHighlighter::~SyntaxHighlighter()
865*cdf0e10cSrcweir {
866*cdf0e10cSrcweir 	delete m_pSimpleTokenizer;
867*cdf0e10cSrcweir 	delete m_pKeyWords;
868*cdf0e10cSrcweir }
869*cdf0e10cSrcweir 
870*cdf0e10cSrcweir void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ )
871*cdf0e10cSrcweir {
872*cdf0e10cSrcweir 	eLanguage = eLanguage_;
873*cdf0e10cSrcweir 	delete m_pSimpleTokenizer;
874*cdf0e10cSrcweir 	m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage);
875*cdf0e10cSrcweir 
876*cdf0e10cSrcweir 	switch (eLanguage)
877*cdf0e10cSrcweir 	{
878*cdf0e10cSrcweir 		case HIGHLIGHT_BASIC:
879*cdf0e10cSrcweir 			m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords,
880*cdf0e10cSrcweir 											sizeof( strListBasicKeyWords ) / sizeof( char* ));
881*cdf0e10cSrcweir 			break;
882*cdf0e10cSrcweir 		case HIGHLIGHT_SQL:
883*cdf0e10cSrcweir 			m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords,
884*cdf0e10cSrcweir 											sizeof( strListSqlKeyWords ) / sizeof( char* ));
885*cdf0e10cSrcweir 			break;
886*cdf0e10cSrcweir 		default:
887*cdf0e10cSrcweir 			m_pSimpleTokenizer->setKeyWords( NULL, 0 );
888*cdf0e10cSrcweir 	}
889*cdf0e10cSrcweir }
890*cdf0e10cSrcweir 
891*cdf0e10cSrcweir const Range SyntaxHighlighter::notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference,
892*cdf0e10cSrcweir 								const String* pChangedLines, sal_uInt32 nArrayLength)
893*cdf0e10cSrcweir {
894*cdf0e10cSrcweir     (void)nLineCountDifference;
895*cdf0e10cSrcweir 
896*cdf0e10cSrcweir 	for( sal_uInt32 i=0 ; i < nArrayLength ; i++ )
897*cdf0e10cSrcweir 		m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]);
898*cdf0e10cSrcweir 
899*cdf0e10cSrcweir 	return Range( nLine, nLine + nArrayLength-1 );
900*cdf0e10cSrcweir }
901*cdf0e10cSrcweir 
902*cdf0e10cSrcweir void SyntaxHighlighter::getHighlightPortions( sal_uInt32 nLine, const String& rLine,
903*cdf0e10cSrcweir 											/*out*/HighlightPortions& portions )
904*cdf0e10cSrcweir {
905*cdf0e10cSrcweir 	m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions );
906*cdf0e10cSrcweir }
907