xref: /AOO41X/main/writerfilter/source/rtftok/RTFScanner.lex (revision 3eeae0359da67b6bee32b2a1a66cfad47ee86e25)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 /* compile with flex++ -8 -f -+ -Sflex.skl -ortfparser.cxx rtfparser.lex */
23 %option yylineno
24 %{
25   //#include <io.h>
26 #include <math.h>
27 #include <string.h>
28 #include <osl/file.h>
29 #include <assert.h>
30 #include <vector>
31 
32 #if defined (UNX)
33   #define stricmp strcasecmp
34 #endif
35 
createRTFScanner(class writerfilter::rtftok::RTFInputSource & inputSource,writerfilter::rtftok::RTFScannerHandler & eventHandler)36 writerfilter::rtftok::RTFScanner* writerfilter::rtftok::RTFScanner::createRTFScanner(class writerfilter::rtftok::RTFInputSource& inputSource, writerfilter::rtftok::RTFScannerHandler &eventHandler)
37 {
38   return new yyFlexLexer(&inputSource, eventHandler);
39 }
40 
41 
42 
43 extern "C" {
44 //int isatty(int fd) { return 0; }
yywrap(void)45 int yywrap(void) { return 1; }
46 }
47 
48 /*
49 oslFileHandle yy_osl_in=NULL;
50 #define YY_INPUT(buf,result,max_size) \
51 {\
52 {\
53     assert(yy_osl_in!=NULL);\
54     sal_Bool isEOF;\
55     oslFileError ret=osl_isEndOfFile( yy_osl_in, &isEOF );\
56     assert(ret==osl_File_E_None);\
57     if (isEOF)\
58     {\
59         result=YY_NULL;\
60     }\
61     else\
62     {\
63     sal_uInt64 bytesRead;\
64     ret=osl_readFile( yy_osl_in, buf, max_size, &bytesRead);\
65     assert(ret==osl_File_E_None);\
66     result = bytesRead; \
67     }\
68 }\
69 }
70 */
71 
72 //extern RtfTokenizer* this;
split_ctrl(char * _yytext,char * token,char * value)73 void yyFlexLexer::split_ctrl(char *_yytext, char* token, char *value)
74    {
75      int i=0; // skip first '\'
76      while(_yytext[i]!=0 && (_yytext[i]=='\r' || _yytext[i]=='\n')) i++;
77      while(_yytext[i]!=0 && (_yytext[i]<'A' || (_yytext[i]>'Z' && _yytext[i]<'a') || _yytext[i]>'z')) i++;
78      while(_yytext[i]!=0 && _yytext[i]>='A') *(token++)=_yytext[i++];
79      *token=0;
80      while(_yytext[i]!=0 && _yytext[i]>' ') *(value++)=_yytext[i++];
81      *value=0;
82    }
83 
raise_ctrl(char * _yytext)84  void yyFlexLexer::raise_ctrl(char* _yytext)
85    {
86      char token[50];
87      char value[50];
88      split_ctrl(_yytext, token, value);
89      eventHandler.ctrl(token, value);
90    }
91 
raise_dest(char * _yytext)92  void yyFlexLexer::raise_dest(char* _yytext)
93    {
94      char token[50];
95      char value[50];
96      split_ctrl(_yytext, token, value);
97      eventHandler.dest(token, value);
98    }
99 
100 #define _num_of_destctrls (sizeof(_destctrls)/sizeof(_destctrls[0]))
101 static const char* _destctrls[] = {
102 "aftncn",
103 "aftnsep",
104 "aftnsepc",
105 "annotation",
106 "atnauthor",
107 "atndate",
108 "atnicn",
109 "atnid",
110 "atnparent",
111 "atnref",
112 "atntime",
113 "atrfend",
114 "atrfstart",
115 "author",
116 "background",
117 "bkmkend",
118 "bkmkstart",
119 "buptim",
120 "category",
121 "colortbl",
122 "comment",
123 "company",
124 "creatim",
125 "datafield",
126 "do",
127 "doccomm",
128 "docvar",
129 "dptxbxtext",
130 "falt",
131 "fchars",
132 "ffdeftext",
133 "ffentrymcr",
134 "ffexitmcr",
135 "ffformat",
136 "ffhelptext",
137 "ffl",
138 "ffname",
139 "ffstattext",
140 "field",
141 "file",
142 "filetbl",
143 "fldinst",
144 "fldrslt",
145 "fldtype",
146 "fname",
147 "fontemb",
148 "fontfile",
149 "fonttbl",
150 "footer",
151 "footer",
152 "footerf",
153 "footerl",
154 "footnote",
155 "formfield",
156 "ftncn",
157 "ftnsep",
158 "ftnsepc",
159 "g",
160 "generator",
161 "gridtbl",
162 "header",
163 "header",
164 "headerf",
165 "headerl",
166 "htmltag",
167 "info",
168 "keycode",
169 "keywords",
170 "lchars",
171 "levelnumbers",
172 "leveltext",
173 "lfolevel",
174 "list",
175 "listlevel",
176 "listname",
177 "listoverride",
178 "listoverridetable",
179 "listtable",
180 "listtext",
181 "manager",
182 "mhtmltag",
183 "nesttableprops",
184 "nextfile",
185 "nonesttables",
186 "nonshppict",
187 "objalias",
188 "objclass",
189 "objdata",
190 "object",
191 "objname",
192 "objsect",
193 "objtime",
194 "oldcprops",
195 "oldpprops",
196 "oldsprops",
197 "oldtprops",
198 "operator",
199 "panose",
200 "pgp",
201 "pgptbl",
202 "picprop",
203 "pict",
204 "pn",
205 "pnseclvl",
206 "pntext",
207 "pntxta",
208 "pntxtb",
209 "printim",
210 "private",
211 "pwd",
212 "pxe",
213 "result",
214 "revtbl",
215 "revtim",
216 "rsidtbl",
217 "rtf",
218 "rxe",
219 "shp",
220 "shpgrp",
221 "shpinst",
222 "shppict",
223 "shprslt",
224 "shptxt",
225 "sn",
226 "sp",
227 "stylesheet",
228 "subject",
229 "sv",
230 "tc",
231 "template",
232 "title",
233 "txe",
234 "ud",
235 "upr",
236 "urtf",
237 "userprops",
238 "xe"
239 };
240 
raise_destOrCtrl(char * _yytext)241  void yyFlexLexer::raise_destOrCtrl(char* _yytext)
242    {
243      char token[50];
244      char value[50];
245      split_ctrl(_yytext, token, value);
246      char* result=(char*)bsearch(token, _destctrls, _num_of_destctrls, 20, (int (*)(const void*, const void*))stricmp);
247      if (result)
248        {
249      eventHandler.dest(token, value);
250        }
251      else
252        {
253      eventHandler.lbrace();
254      eventHandler.ctrl(token, value);
255        }
256    }
257 
258 %}
259 
260 %%
261 \{\\upr\{" "? { /* skip upr destination */
262   int c;
263   int br=1;
264   while (br>0 && (c = yyinput()) != EOF)
265     {
266       if (c=='}') br--;
267       if (c=='{') br++;
268     }
269   eventHandler.lbrace();
270   num_chars+=yyleng;
271 }
272 
273 
274 \\bin(("+"|"-")?[0-9]*)?" "? {
275   raise_dest(yytext);
276   num_chars+=yyleng;
277   int len=atoi(yytext+4);
278    num_chars+=len;
279    //   pictureBytes=2*len;
280   while ( len )
281     {
282       int c = yyinput();
283       eventHandler.addBinData((unsigned char)c);
284       len--;
285     }
286   eventHandler.rbrace();
287 }
288 
289 \{[\r\n]*\\\*\\[A-Za-z]+(("+"|"-")?[0-9]*)?" "? { /* stared dest word */
290         raise_dest(yytext);
291         num_chars+=yyleng;
292 }
293 \{[\r\n]*\\[A-Za-z]+(("+"|"-")?[0-9]*)?" "? { /* dest word */
294         raise_destOrCtrl(yytext);
295 }
296 \\[A-Za-z]+(("+"|"-")?[0-9]*)?" "? { /* ctrl word */
297         raise_ctrl(yytext);
298     star_flag=0;
299     num_chars+=yyleng;
300 }
301 \\\'[A-Fa-f0-9][A-Fa-f0-9] { /* hex char */
302   eventHandler.addHexChar(yytext);
303   num_chars+=yyleng;
304 }
305 \\* { /* star */
306     star_flag=1;
307     num_chars+=yyleng;
308 }
309 \{ { /* lbrace */
310         eventHandler.lbrace();
311         num_chars+=yyleng;
312 }
313 \} { /* rbrace */
314         eventHandler.rbrace();
315         num_chars+=yyleng;
316 }
317 \\\| { num_chars+=yyleng;}
318 \\~ {num_chars+=yyleng; eventHandler.addCharU(0xa0);}
319 \\- {num_chars+=yyleng;}
320 \\_ {num_chars+=yyleng;}
321 \\\: {num_chars+=yyleng;}
322 \n   {   ++num_lines;num_chars+=yyleng;}
323 \r {num_chars+=yyleng;}
324 \t {num_chars+=yyleng;}
325 " "(" "+) { eventHandler.addSpaces(yyleng); num_chars+=yyleng;}
326 . { eventHandler.addChar(yytext[0]); num_chars+=yyleng;}
327 %%
328