xref: /AOO41X/main/l10ntools/source/tagtest.cxx (revision 1ecadb572e7010ff3b3382ad9bf179dbc6efadbb)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_l10ntools.hxx"
30 #include <tools/string.hxx>
31 #include "tagtest.hxx"
32 
33 #if OSL_DEBUG_LEVEL > 1
34 #include <stdio.h>
35 #endif
36 
37 #include "gsicheck.hxx"
38 
39 #define HAS_FLAG( nFlags, nFlag )		( ( nFlags & nFlag ) != 0 )
40 #define SET_FLAG( nFlags, nFlag )		( nFlags |= nFlag )
41 #define RESET_FLAG( nFlags, nFlag )		( nFlags &= ~nFlag )	// ~ = Bitweises NOT
42 
43 
44 
45 TokenInfo::TokenInfo( TokenId pnId, sal_uInt16 nP, String paStr, ParserMessageList &rErrorList )
46 : bClosed(sal_False)
47 , bCloseTag(sal_False)
48 , bIsBroken(sal_False)
49 , bHasBeenFixed(sal_False)
50 , bDone(sal_False)
51 , aTokenString( paStr )
52 , nId( pnId )
53 , nPos(nP)
54 {
55     if ( nId == TAG_COMMONSTART || nId == TAG_COMMONEND )
56         SplitTag( rErrorList );
57 }
58 
59 enum tagcheck { TC_START, TC_HAS_TAG_NAME, TC_HAS_PROP_NAME_EQ, TC_HAS_PROP_NAME_EQ_SP, TC_HAS_PROP_NAME_SP, TC_INSIDE_STRING, TC_PROP_FINISHED, TC_CLOSED, TC_CLOSED_SPACE, TC_CLOSETAG, TC_CLOSETAG_HAS_TAG_NAME, TC_FINISHED, TC_ERROR };
60 
61 /*
62                                                       \<  link  href  =  \"text\"  name  =  \"C\"  \>
63 START               ' ' ->  HAS_TAG_NAME
64 START               '/' ->  CLOSED
65 START               '/' ->  CLOSETAG    - no Portion (starting with /)
66 START               '>' ->  FINISHED
67 HAS_TAG_NAME        '=' ->  HAS_PROP_NAME_EQ
68 HAS_TAG_NAME        ' ' ->  HAS_PROP_NAME_SP
69 HAS_TAG_NAME        '/' ->  CLOSED
70 HAS_TAG_NAME        '>' ->  FINISHED
71 HAS_PROP_NAME_SP    '=' ->  HAS_PROP_NAME_EQ
72 HAS_PROP_NAME_EQ    ' ' ->  HAS_PROP_NAME_EQ_SP
73 HAS_PROP_NAME_EQ    '"' ->  INSIDE_STRING
74 HAS_PROP_NAME_EQ_SP '"' ->  INSIDE_STRING
75 INSIDE_STRING       ' ' ->  INSIDE_STRING
76 INSIDE_STRING       '=' ->  INSIDE_STRING
77 INSIDE_STRING       '>' ->  INSIDE_STRING
78 INSIDE_STRING       '"' ->  PROP_FINISHED
79 PROP_FINISHED       ' ' ->  HAS_TAG_NAME
80 PROP_FINISHED       '/' ->  CLOSED
81 PROP_FINISHED       '>' ->  FINISHED
82 CLOSED              ' ' ->  CLOSED_SPACE
83 CLOSED              '>' ->  FINISHED
84 CLOSED_SPACE        '>' ->  FINISHED
85 
86 CLOSETAG            ' ' ->  CLOSETAG_HAS_TAG_NAME
87 CLOSETAG            '>' ->  FINISHED
88 CLOSETAG_HAS_TAG_NAME  '>' ->  FINISHED
89 
90 */
91 void TokenInfo::SplitTag( ParserMessageList &rErrorList )
92 {
93     sal_uInt16 nLastPos = 2;    // skip initial  \<
94     sal_uInt16 nCheckPos = nLastPos;
95     String aDelims( String::CreateFromAscii( " \\=>/" ) );
96     String aPortion;
97     String aValue;      // store the value of a property
98     ByteString aName;   // store the name of a property/tag
99     sal_Bool bCheckName = sal_False;
100     sal_Bool bCheckEmpty = sal_False;
101     sal_Unicode cDelim;
102     tagcheck aState = TC_START;
103 
104     // skip blanks
105     while ( nLastPos < aTokenString.Len() && aTokenString.GetChar( nLastPos ) == ' ')
106         nLastPos++;
107 
108     nCheckPos = aTokenString.SearchChar( aDelims.GetBuffer(), nLastPos );
109     while ( nCheckPos != STRING_NOTFOUND && !( aState == TC_FINISHED || aState == TC_ERROR ) )
110     {
111         aPortion = aTokenString.Copy( nLastPos, nCheckPos-nLastPos );
112 
113         if ( aTokenString.GetChar( nCheckPos ) == '\\' )
114             nCheckPos++;
115 
116         cDelim = aTokenString.GetChar( nCheckPos );
117         nCheckPos++;
118 
119         switch ( aState )
120         {
121 //            START           ' ' ->  HAS_TAG_NAME
122 //            START           '/' ->  CLOSED
123 //            START           '>' ->  FINISHED
124             case TC_START:
125                 aTagName = aPortion;
126                 switch ( cDelim )
127                 {
128                     case ' ':  aState = TC_HAS_TAG_NAME;
129                                bCheckName = sal_True;
130                                break;
131                     case '/':
132                         {
133                             if ( aPortion.Len() == 0 )
134                             {
135                                 aState = TC_CLOSETAG;
136                             }
137                             else
138                             {
139                                 aState = TC_CLOSED;
140                                 bCheckName = sal_True;
141                             }
142                         }
143                         break;
144                     case '>':  aState = TC_FINISHED;
145                                bCheckName = sal_True;
146                                break;
147                     default:   aState = TC_ERROR;
148                 }
149                 break;
150 
151 //            HAS_TAG_NAME    '=' ->  HAS_PROP_NAME_EQ
152 //            HAS_TAG_NAME    ' ' ->  HAS_PROP_NAME_SP
153 //            HAS_TAG_NAME    '/' ->  CLOSED
154 //            HAS_TAG_NAME    '>' ->  FINISHED
155             case TC_HAS_TAG_NAME:
156                 switch ( cDelim )
157                 {
158                     case '=':  aState = TC_HAS_PROP_NAME_EQ;
159                                bCheckName = sal_True;
160                                break;
161                     case ' ':  aState = TC_HAS_PROP_NAME_SP;
162                                bCheckName = sal_True;
163                                break;
164                     case '/':  aState = TC_CLOSED;
165                                bCheckEmpty = sal_True;
166                                break;
167                     case '>':  aState = TC_FINISHED;
168                                bCheckEmpty = sal_True;
169                                break;
170                     default:   aState = TC_ERROR;
171                 }
172                 break;
173 
174 //            HAS_PROP_NAME_SP    '=' ->  HAS_PROP_NAME_EQ
175             case TC_HAS_PROP_NAME_SP:
176                 switch ( cDelim )
177                 {
178                     case '=':  aState = TC_HAS_PROP_NAME_EQ;
179                                bCheckEmpty = sal_True;
180                                break;
181                     default:   aState = TC_ERROR;
182                 }
183                 break;
184 
185 //            HAS_PROP_NAME_EQ    ' ' ->  HAS_PROP_NAME_EQ_SP
186 //            HAS_PROP_NAME_EQ    '"' ->  INSIDE_STRING
187             case TC_HAS_PROP_NAME_EQ:
188                 switch ( cDelim )
189                 {
190                     case ' ':  aState = TC_HAS_PROP_NAME_EQ_SP;
191                                bCheckEmpty = sal_True;
192                                break;
193                     case '\"': aState = TC_INSIDE_STRING;
194                                bCheckEmpty = sal_True;
195                                aValue.Erase();
196                                break;
197                     default:   aState = TC_ERROR;
198                 }
199                 break;
200 
201 //            HAS_PROP_NAME_EQ_SP '"' ->  INSIDE_STRING
202             case TC_HAS_PROP_NAME_EQ_SP:
203                 switch ( cDelim )
204                 {
205                     case '\"': aState = TC_INSIDE_STRING;
206                                bCheckEmpty = sal_True;
207                                aValue.Erase();
208                                break;
209                     default:   aState = TC_ERROR;
210                 }
211                 break;
212 
213 //            INSIDE_STRING    *  ->  INSIDE_STRING
214 //            INSIDE_STRING   '"' ->  PROP_FINISHED
215             case TC_INSIDE_STRING:
216                 switch ( cDelim )
217                 {
218                     case '\"':
219                         {
220                             aState = TC_PROP_FINISHED;
221                             aValue += aPortion;
222                             if ( aProperties.find( aName ) == aProperties.end() )
223                             {
224                                 if ( !IsPropertyValueValid( aName, aValue ) )
225                                 {
226                                     rErrorList.AddError( 25, ByteString("Property '").Append(aName).Append("' has invalid value '").Append(ByteString( aValue, RTL_TEXTENCODING_UTF8 )).Append("' "), *this );
227                                     bIsBroken = sal_True;
228                                 }
229                                 aProperties[ aName ] = aValue;
230                             }
231                             else
232                             {
233                                 rErrorList.AddError( 25, ByteString("Property '").Append(aName).Append("' defined twice "), *this );
234                                 bIsBroken = sal_True;
235                             }
236                         }
237                                break;
238                     default:
239                         {
240                             aState = TC_INSIDE_STRING;
241                             aValue += aPortion;
242                             aValue += cDelim;
243                         }
244                 }
245                 break;
246 
247 //            PROP_FINISHED   ' ' ->  HAS_TAG_NAME
248 //            PROP_FINISHED   '/' ->  CLOSED
249 //            PROP_FINISHED   '>' ->  FINISHED
250             case TC_PROP_FINISHED:
251                 switch ( cDelim )
252                 {
253                     case ' ': aState = TC_HAS_TAG_NAME;
254                                bCheckEmpty = sal_True;
255                                break;
256                     case '/': aState = TC_CLOSED;
257                                bCheckEmpty = sal_True;
258                                break;
259                     case '>': aState = TC_FINISHED;
260                                bCheckEmpty = sal_True;
261                                break;
262                     default:   aState = TC_ERROR;
263                 }
264                 break;
265 
266 //            CLOSED          ' ' ->  CLOSED_SPACE
267 //            CLOSED          '>' ->  FINISHED
268             case TC_CLOSED:
269                 switch ( cDelim )
270                 {
271                     case ' ': aState = TC_CLOSED_SPACE;
272                                bCheckEmpty = sal_True;
273                                bClosed = sal_True;
274                                break;
275                     case '>': aState = TC_FINISHED;
276                                bCheckEmpty = sal_True;
277                                break;
278                     default:   aState = TC_ERROR;
279                 }
280                 break;
281 
282 //            CLOSED_SPACE    '>' ->  FINISHED
283             case TC_CLOSED_SPACE:
284                 switch ( cDelim )
285                 {
286                     case '>': aState = TC_FINISHED;
287                                bCheckEmpty = sal_True;
288                                break;
289                     default:   aState = TC_ERROR;
290                 }
291                 break;
292 
293 // CLOSETAG            ' ' ->  CLOSETAG_HAS_TAG_NAME
294 // CLOSETAG            '>' ->  FINISHED
295             case TC_CLOSETAG:
296                 bCloseTag = sal_True;
297                 switch ( cDelim )
298                 {
299                     case ' ': aState = TC_CLOSETAG_HAS_TAG_NAME;
300                                aTagName = aPortion;
301                                bCheckName = sal_True;
302                                break;
303                     case '>': aState = TC_FINISHED;
304                                aTagName = aPortion;
305                                bCheckName = sal_True;
306                                break;
307                     default:   aState = TC_ERROR;
308                 }
309                 break;
310 
311 // CLOSETAG_HAS_TAG_NAME       '>' ->  FINISHED
312             case TC_CLOSETAG_HAS_TAG_NAME:
313                 switch ( cDelim )
314                 {
315                     case '>': aState = TC_FINISHED;
316                                bCheckEmpty = sal_True;
317                                break;
318                     default:   aState = TC_ERROR;
319                 }
320                 break;
321 
322 
323             default: rErrorList.AddError( 99, "Internal error Parsing Tag ", *this );
324                      bIsBroken = sal_True;
325 
326         }
327 
328         if ( bCheckName )
329         {
330             if ( aPortion.Len() == 0 )
331             {
332                 rErrorList.AddError( 25, "Tag/Property name missing ", *this );
333                 bIsBroken = sal_True;
334             }
335             else
336             {
337                 aName = ByteString( aPortion, RTL_TEXTENCODING_UTF8 );
338                 // "a-zA-Z_-.0-9"
339                 xub_StrLen nCount;
340                 sal_Bool bBroken = sal_False;
341                 const sal_Char* aBuf = aName.GetBuffer();
342                 for ( nCount = 0 ; !bBroken && nCount < aName.Len() ; nCount++ )
343                 {
344                     bBroken = ! (   ( aBuf[nCount] >= 'a' && aBuf[nCount] <= 'z' )
345                                 ||( aBuf[nCount] >= 'A' && aBuf[nCount] <= 'Z' )
346                                 ||( aBuf[nCount] >= '0' && aBuf[nCount] <= '9' )
347                                 ||( aBuf[nCount] == '_' )
348                                 ||( aBuf[nCount] == '-' )
349                                 ||( aBuf[nCount] == '.' )
350                                 );
351                 }
352 
353                 if ( bBroken )
354                 {
355                     rErrorList.AddError( 25, "Found illegal character in Tag/Property name ", *this );
356                     bIsBroken = sal_True;
357                 }
358             }
359 
360             bCheckName = sal_False;
361         }
362 
363         if ( bCheckEmpty )
364         {
365             if ( aPortion.Len() )
366             {
367                 rErrorList.AddError( 25, ByteString("Found displaced characters '").Append(ByteString( aPortion, RTL_TEXTENCODING_UTF8 )).Append("' in Tag "), *this );
368                 bIsBroken = sal_True;
369             }
370             bCheckEmpty = sal_False;
371         }
372 
373 
374         nLastPos = nCheckPos;
375 
376         // skip further blanks
377         if ( cDelim == ' ' && aState != TC_INSIDE_STRING )
378             while ( nLastPos < aTokenString.Len() && aTokenString.GetChar( nLastPos ) == ' ')
379                 nLastPos++;
380 
381         nCheckPos = aTokenString.SearchChar( aDelims.GetBuffer(), nLastPos );
382     }
383     if ( aState != TC_FINISHED )
384     {
385         rErrorList.AddError( 25, "Parsing error in Tag ", *this );
386         bIsBroken = sal_True;
387     }
388 }
389 
390 sal_Bool TokenInfo::IsPropertyRelevant( const ByteString &aName, const String &aValue ) const
391 {
392     if ( aTagName.EqualsAscii( "alt" ) && aName.Equals( "xml-lang" ) )
393         return sal_False;
394     if ( aTagName.EqualsAscii( "ahelp" ) && aName.Equals( "visibility" ) && aValue.EqualsAscii("visible") )
395         return sal_False;
396     if ( aTagName.EqualsAscii( "image" ) && (aName.Equals( "width" ) || aName.Equals( "height" )) )
397         return sal_False;
398 
399     return sal_True;
400 }
401 
402 sal_Bool TokenInfo::IsPropertyValueValid( const ByteString &aName, const String &aValue ) const
403 {
404 /*  removed due to i56740
405     if ( aTagName.EqualsAscii( "switchinline" ) && aName.Equals( "select" ) )
406     {
407         return aValue.EqualsAscii("sys") ||
408                aValue.EqualsAscii("appl") ||
409                aValue.EqualsAscii("distrib");
410     } */
411     if ( aTagName.EqualsAscii( "caseinline" ) && aName.Equals( "select" ) )
412     {
413         return /*!aValue.EqualsAscii("OS2") &&  removed due to i56740 */
414                !aValue.EqualsAscii("");
415     }
416 
417     // we don't know any better so we assume it to be OK
418     return sal_True;
419 }
420 
421 sal_Bool TokenInfo::IsPropertyInvariant( const ByteString &aName, const String &aValue ) const
422 {
423     if ( aTagName.EqualsAscii( "link" ) && aName.Equals( "name" ) )
424         return sal_False;
425     if ( aTagName.EqualsAscii( "link" ) && aName.Equals( "href" ) )
426     {   // check for external reference
427         if (  aValue.Copy( 0, 5 ).EqualsIgnoreCaseAscii( "http:" )
428            || aValue.Copy( 0, 6 ).EqualsIgnoreCaseAscii( "https:" )
429            || aValue.Copy( 0, 4 ).EqualsIgnoreCaseAscii( "ftp:" ) )
430             return sal_False;
431         else
432             return sal_True;
433     }
434     return sal_True;
435 }
436 
437 sal_Bool TokenInfo::IsPropertyFixable( const ByteString &aName ) const
438 {
439     // name everything that is allowed to be fixed automatically here
440     if ( (aTagName.EqualsAscii( "ahelp" ) && aName.Equals( "hid" ))
441       || (aTagName.EqualsAscii( "link" ) && aName.Equals( "href" ))
442       || (aTagName.EqualsAscii( "alt" ) && aName.Equals( "id" ))
443       || (aTagName.EqualsAscii( "variable" ) && aName.Equals( "id" ))
444       || (aTagName.EqualsAscii( "image" ) && aName.Equals( "src" ))
445       || (aTagName.EqualsAscii( "image" ) && aName.Equals( "id" ) ))
446         return sal_True;
447     return sal_False;
448 }
449 
450 sal_Bool TokenInfo::MatchesTranslation( TokenInfo& rInfo, sal_Bool bGenErrors, ParserMessageList &rErrorList, sal_Bool bFixTags ) const
451 {
452     // check if tags are equal
453     // check if all existing properties are in the translation as well and
454     // wether they have a matching content (the same in most cases)
455 
456     if ( nId != rInfo.nId )
457         return sal_False;
458 
459     if ( !aTagName.Equals( rInfo.aTagName ) )
460         return sal_False;
461 
462     // If one of the tags has formating errors already it does make no sense to check here, so return right away
463     if ( bGenErrors && ( bIsBroken || rInfo.bIsBroken ) )
464         return sal_True;
465 
466 	StringHashMap::const_iterator iProp;
467 	for( iProp = aProperties.begin() ; iProp != aProperties.end(); ++iProp )
468     {
469         if ( rInfo.aProperties.find( iProp->first ) != rInfo.aProperties.end() )
470         {
471             if ( IsPropertyRelevant( iProp->first, iProp->second ) || IsPropertyRelevant( iProp->first, rInfo.aProperties.find( iProp->first )->second ) )
472             {
473                 if ( IsPropertyInvariant( iProp->first, iProp->second ) )
474                 {
475                     if ( !rInfo.aProperties.find( iProp->first )->second.Equals( iProp->second ) )
476                     {
477                         if ( bGenErrors )
478                         {
479                             if ( bFixTags && IsPropertyFixable( iProp->first ) )
480                             {
481                                 rInfo.aProperties.find( iProp->first )->second = iProp->second;
482                                 rInfo.SetHasBeenFixed();
483                                 rErrorList.AddWarning( 25, ByteString("Property '").Append(iProp->first).Append("': FIXED different value in Translation "), *this );
484                             }
485                             else
486                                 rErrorList.AddError( 25, ByteString("Property '").Append(iProp->first).Append("': value different in Translation "), *this );
487                         }
488                         else return sal_False;
489                     }
490                 }
491             }
492         }
493         else
494         {
495             if ( IsPropertyRelevant( iProp->first, iProp->second ) )
496             {
497                 if ( bGenErrors )
498                     rErrorList.AddError( 25, ByteString("Property '").Append(iProp->first).Append("' missing in Translation "), *this );
499                 else return sal_False;
500             }
501         }
502 	}
503 	for( iProp = rInfo.aProperties.begin() ; iProp != rInfo.aProperties.end(); ++iProp )
504     {
505         if ( aProperties.find( iProp->first ) == aProperties.end() )
506         {
507             if ( IsPropertyRelevant( iProp->first, iProp->second ) )
508             {
509                 if ( bGenErrors )
510                     rErrorList.AddError( 25, ByteString("Extra Property '").Append(iProp->first).Append("' in Translation "), rInfo );
511                 else return sal_False;
512             }
513         }
514 	}
515 
516     // if we reach here eather
517     //   the tags match completely or
518     //   the tags match but not the properties and we generated errors for that
519     return sal_True;
520 }
521 
522 String TokenInfo::GetTagName() const
523 {
524     return aTagName;
525 }
526 
527 String TokenInfo::MakeTag() const
528 {
529     String aRet;
530     aRet.AppendAscii("\\<");
531     if ( bCloseTag )
532         aRet.AppendAscii("/");
533     aRet.Append( GetTagName() );
534 	StringHashMap::const_iterator iProp;
535 
536 	for( iProp = aProperties.begin() ; iProp != aProperties.end(); ++iProp )
537     {
538         aRet.AppendAscii(" ");
539         aRet.Append( String( iProp->first, RTL_TEXTENCODING_UTF8 ) );
540         aRet.AppendAscii("=\\\"");
541         aRet.Append( iProp->second );
542         aRet.AppendAscii("\\\"");
543     }
544     if ( bClosed )
545         aRet.AppendAscii("/");
546     aRet.AppendAscii("\\>");
547     return aRet;
548 }
549 
550 
551 void ParserMessageList::AddError( sal_uInt16 nErrorNr, ByteString aErrorText, const TokenInfo &rTag )
552 {
553     Insert( new ParserError( nErrorNr, aErrorText, rTag ), LIST_APPEND );
554 }
555 
556 void ParserMessageList::AddWarning( sal_uInt16 nErrorNr, ByteString aErrorText, const TokenInfo &rTag )
557 {
558     Insert( new ParserWarning( nErrorNr, aErrorText, rTag ), LIST_APPEND );
559 }
560 
561 sal_Bool ParserMessageList::HasErrors()
562 {
563     sal_uInt16 i;
564     for ( i=0 ; i < Count() ; i++ )
565         if ( GetObject( i )->IsError() )
566             return sal_True;
567     return sal_False;
568 }
569 
570 struct Tag
571 {
572     String GetName() const { return String::CreateFromAscii( pName ); };
573 	const char* pName;
574 	TokenId nTag;
575 };
576 
577 
578 static const Tag aKnownTags[] =
579 {
580 /*  commenting oldstyle tags
581 //	{ "<#GROUP_FORMAT>", TAG_GROUP_FORMAT },
582 	{ "<#BOLD>", TAG_BOLDON },
583 	{ "<#/BOLD>", TAG_BOLDOFF },
584 	{ "<#ITALIC>", TAG_ITALICON },
585 	{ "<#/ITALIC>", TAG_ITALICOFF },
586 	{ "<#UNDER>", TAG_UNDERLINEON },
587 	{ "<#/UNDER>", TAG_UNDERLINEOFF },
588 
589 //	{ "<#GROUP_NOTALLOWED>", TAG_GROUP_NOTALLOWED },
590 	{ "<#HELPID>", TAG_HELPID },
591 	{ "<#MODIFY>", TAG_MODIFY },
592 	{ "<#REFNR>", TAG_REFNR },
593 
594 //	{ "<#GROUP_STRUCTURE>", TAG_GROUP_STRUCTURE },
595 	{ "<#NAME>", TAG_NAME },
596 	{ "<#HREF>", TAG_HREF },
597 	{ "<#AVIS>", TAG_AVIS },
598 	{ "<#AHID>", TAG_AHID },
599 	{ "<#AEND>", TAG_AEND },
600 
601 	{ "<#TITEL>", TAG_TITEL },
602 	{ "<#KEY>", TAG_KEY },
603 	{ "<#INDEX>", TAG_INDEX },
604 
605 	{ "<#REFSTART>", TAG_REFSTART },
606 
607 	{ "<#GRAPHIC>", TAG_GRAPHIC },
608 	{ "<#NEXTVERSION>", TAG_NEXTVERSION },
609 
610     //	{ "<#GROUP_SYSSWITCH>", TAG_GROUP_SYSSWITCH },
611 	{ "<#WIN>", TAG_WIN },
612 	{ "<#UNIX>", TAG_UNIX },
613 	{ "<#MAC>", TAG_MAC },
614 	{ "<#OS2>", TAG_OS2 },
615 
616 //	{ "<#GROUP_PROGSWITCH>", TAG_GROUP_PROGSWITCH },
617 	{ "<#WRITER>", TAG_WRITER },
618 	{ "<#CALC>", TAG_CALC },
619 	{ "<#DRAW>", TAG_DRAW },
620 	{ "<#IMPRESS>", TAG_IMPRESS },
621 	{ "<#SCHEDULE>", TAG_SCHEDULE },
622 	{ "<#IMAGE>", TAG_IMAGE },
623 	{ "<#MATH>", TAG_MATH },
624 	{ "<#CHART>", TAG_CHART },
625 	{ "<#OFFICE>", TAG_OFFICE },
626   */
627 //	{ "<#TAG_GROUP_META>", TAG_GROUP_META },
628 	{ "$[officefullname]", TAG_OFFICEFULLNAME },
629 	{ "$[officename]", TAG_OFFICENAME },
630 	{ "$[officepath]", TAG_OFFICEPATH },
631 	{ "$[officeversion]", TAG_OFFICEVERSION },
632 	{ "$[portalname]", TAG_PORTALNAME },
633 	{ "$[portalfullname]", TAG_PORTALFULLNAME },
634 	{ "$[portalpath]", TAG_PORTALPATH },
635 	{ "$[portalversion]", TAG_PORTALVERSION },
636 	{ "$[portalshortname]", TAG_PORTALSHORTNAME },
637 /*  commenting oldstyle tags
638 //	{ "<#TAG_GROUP_SINGLE>", TAG_GROUP_SINGLE },
639 	{ "<#REFINSERT>", TAG_REFINSERT },
640 
641 //	{ "<#GROUP_MULTI>", TAG_GROUP_MULTI },
642 	{ "<#END>", TAG_END },
643 	{ "<#ELSE>", TAG_ELSE },
644 	{ "<#VERSIONEND>", TAG_VERSIONEND },
645 	{ "<#ENDGRAPHIC>", TAG_ENDGRAPHIC },*/
646 	{ "<Common Tag>", TAG_COMMONSTART },
647 	{ "</Common Tag>", TAG_COMMONEND },
648 
649     { "<no more tags>", TAG_NOMORETAGS },
650 	{ "", TAG_UNKNOWN_TAG },
651 };
652 
653 
654 SimpleParser::SimpleParser()
655 : nPos( 0 )
656 , aNextTag( TAG_NOMORETAGS, TOK_INVALIDPOS )
657 {
658 }
659 
660 void SimpleParser::Parse( String PaSource )
661 {
662     aSource = PaSource;
663 	nPos = 0;
664 	aLastToken.Erase();
665     aNextTag = TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
666 	aTokenList.Clear();
667 };
668 
669 TokenInfo SimpleParser::GetNextToken( ParserMessageList &rErrorList )
670 {
671     TokenInfo aResult;
672     sal_uInt16 nTokenStartPos = 0;
673     if ( aNextTag.nId != TAG_NOMORETAGS )
674     {
675         aResult = aNextTag;
676         aNextTag = TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
677     }
678     else
679     {
680 	    aLastToken = GetNextTokenString( rErrorList, nTokenStartPos );
681 	    if ( aLastToken.Len() == 0 )
682 		    return TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
683 
684         // do we have a \< ... \> style tag?
685         if ( aLastToken.Copy(0,2).EqualsAscii( "\\<" ) )
686         {
687             // check for paired \" \"
688             bool bEven = true;
689             sal_uInt16 nQuotePos = 0;
690     	    sal_uInt16 nQuotedQuotesPos = aLastToken.SearchAscii( "\\\"" );
691     	    sal_uInt16 nQuotedBackPos = aLastToken.SearchAscii( "\\\\" );    // this is only to kick out quoted backslashes
692             while ( nQuotedQuotesPos != STRING_NOTFOUND )
693             {
694                 if ( nQuotedBackPos <= nQuotedQuotesPos )
695                     nQuotePos = nQuotedBackPos+2;
696                 else
697                 {
698                     nQuotePos = nQuotedQuotesPos+2;
699                     bEven = !bEven;
700                 }
701 		        nQuotedQuotesPos = aLastToken.SearchAscii( "\\\"", nQuotePos );
702     	        nQuotedBackPos = aLastToken.SearchAscii( "\\\\", nQuotePos );    // this is only to kick out quoted backslashes
703             }
704             if ( !bEven )
705             {
706         		rErrorList.AddError( 24, "Missing quotes ( \\\" ) in Tag", TokenInfo( TAG_UNKNOWN_TAG, nTokenStartPos, aLastToken ) );
707             }
708 
709             // check if we have an end-tag or a start-tag
710             sal_uInt16 nNonBlankStartPos,nNonBlankEndPos;
711             nNonBlankStartPos = 2;
712             while ( aLastToken.GetChar(nNonBlankStartPos) == ' ' )
713                 nNonBlankStartPos++;
714             if ( aLastToken.GetChar(nNonBlankStartPos) == '/' )
715                 aResult = TokenInfo( TAG_COMMONEND, nTokenStartPos, aLastToken, rErrorList );
716             else
717             {
718                 aResult = TokenInfo( TAG_COMMONSTART, nTokenStartPos, aLastToken, rErrorList );
719                 nNonBlankEndPos = aLastToken.Len() -3;
720                 while ( aLastToken.GetChar(nNonBlankEndPos) == ' ' )
721                     nNonBlankEndPos--;
722                 if ( aLastToken.GetChar( nNonBlankEndPos ) == '/' )
723                     aNextTag = TokenInfo( TAG_COMMONEND, nTokenStartPos, String::CreateFromAscii("\\</").Append(aResult.GetTagName()).AppendAscii("\\>"), rErrorList );
724             }
725         }
726         else
727         {
728 	        sal_uInt16 i = 0;
729 	        while ( aKnownTags[i].nTag != TAG_UNKNOWN_TAG &&
730 		        aLastToken != aKnownTags[i].GetName() )
731 		        i++;
732             aResult = TokenInfo( aKnownTags[i].nTag, nTokenStartPos );
733         }
734     }
735 
736     if ( aResult.nId == TAG_UNKNOWN_TAG )
737         aResult = TokenInfo( TAG_UNKNOWN_TAG, nTokenStartPos, aLastToken );
738 	aTokenList.Insert( aResult, LIST_APPEND );
739 	return aResult;
740 }
741 
742 String SimpleParser::GetNextTokenString( ParserMessageList &rErrorList, sal_uInt16 &rTagStartPos )
743 {
744 //	sal_uInt16 nStyle1StartPos = aSource.SearchAscii( "<#", nPos );
745 	sal_uInt16 nStyle2StartPos = aSource.SearchAscii( "$[", nPos );
746 	sal_uInt16 nStyle3StartPos = aSource.SearchAscii( "\\<", nPos );
747 	sal_uInt16 nStyle4StartPos = aSource.SearchAscii( "\\\\", nPos );    // this is only to kick out quoted backslashes
748 
749     rTagStartPos = 0;
750 
751 /* removing since a \<... is not likely
752     // check if the tag starts with a letter to avoid things like <> <= ... >
753     while ( STRING_NOTFOUND != nStyle3StartPos && !( aSource.Copy( nStyle3StartPos+2, 1 ).IsAlphaAscii() || aSource.GetChar( nStyle3StartPos+2 ) == '/' ) )
754     	nStyle3StartPos = aSource.SearchAscii( "\\<", nStyle3StartPos+1 );
755 */
756     if ( STRING_NOTFOUND == nStyle2StartPos && STRING_NOTFOUND == nStyle3StartPos )
757         return String();  // no more tokens
758 
759 	if ( nStyle4StartPos < nStyle2StartPos && nStyle4StartPos <= nStyle3StartPos )  // <= to make sure \\ is always handled first
760     {   // Skip quoted Backslash
761         nPos = nStyle4StartPos +2;
762         return GetNextTokenString( rErrorList, rTagStartPos );
763     }
764 
765 /*	if ( nStyle1StartPos < nStyle2StartPos && nStyle1StartPos <= nStyle3StartPos )  // <= to make sure our spechial tags are recognized before all others
766     {	// test for <# ... > style tokens
767 	    sal_uInt16 nEndPos = aSource.SearchAscii( ">", nStyle1StartPos );
768         if ( nEndPos == STRING_NOTFOUND )
769         {   // Token is incomplete. Skip start and search for better ones
770             nPos = nStyle1StartPos +2;
771             return GetNextTokenString( rErrorList, rTagStartPos );
772         }
773 	    nPos = nEndPos;
774         rTagStartPos = nStyle1StartPos;
775 	    return aSource.Copy( nStyle1StartPos, nEndPos-nStyle1StartPos +1 ).ToUpperAscii();
776     }
777     else*/ if ( nStyle2StartPos < nStyle3StartPos )
778 	{	// test for $[ ... ] style tokens
779 		sal_uInt16 nEndPos = aSource.SearchAscii( "]", nStyle2StartPos);
780         if ( nEndPos == STRING_NOTFOUND )
781         {   // Token is incomplete. Skip start and search for better ones
782             nPos = nStyle2StartPos +2;
783             return GetNextTokenString( rErrorList, rTagStartPos );
784         }
785 		nPos = nEndPos;
786         rTagStartPos = nStyle2StartPos;
787 		return aSource.Copy( nStyle2StartPos, nEndPos-nStyle2StartPos +1 );
788 	}
789     else
790 	{	// test for \< ... \> style tokens
791     	sal_uInt16 nEndPos = aSource.SearchAscii( "\\>", nStyle3StartPos);
792     	sal_uInt16 nQuotedBackPos = aSource.SearchAscii( "\\\\", nStyle3StartPos );    // this is only to kick out quoted backslashes
793         while ( nQuotedBackPos <= nEndPos && nQuotedBackPos != STRING_NOTFOUND )
794         {
795 		    nEndPos = aSource.SearchAscii( "\\>", nQuotedBackPos +2);
796     	    nQuotedBackPos = aSource.SearchAscii( "\\\\", nQuotedBackPos +2 );    // this is only to kick out quoted backslashes
797         }
798         if ( nEndPos == STRING_NOTFOUND )
799         {   // Token is incomplete. Skip start and search for better ones
800             nPos = nStyle3StartPos +2;
801 		    ByteString sTmp( "Tag Start '\\<' without Tag End '\\>': " );
802         	rErrorList.AddError( 24, "Tag Start '\\<' without Tag End '\\>'", TokenInfo( TAG_UNKNOWN_TAG, nStyle3StartPos, aSource.Copy( nStyle3StartPos-10, 20 ) ) );
803             return GetNextTokenString( rErrorList, rTagStartPos );
804         }
805         // check for paired quoted "    -->   \"sometext\"
806 
807         nPos = nEndPos;
808         rTagStartPos = nStyle3StartPos;
809 		return aSource.Copy( nStyle3StartPos, nEndPos-nStyle3StartPos +2 );
810 	}
811 }
812 
813 String SimpleParser::GetLexem( TokenInfo const &aToken )
814 {
815     if ( aToken.aTokenString.Len() )
816         return aToken.aTokenString;
817     else
818     {
819         sal_uInt16 i = 0;
820 	    while ( aKnownTags[i].nTag != TAG_UNKNOWN_TAG &&
821 		    aKnownTags[i].nTag != aToken.nId )
822 		    i++;
823 
824         return aKnownTags[i].GetName();
825     }
826 }
827 
828 TokenParser::TokenParser()
829 : pErrorList( NULL )
830 {}
831 
832 void TokenParser::Parse( const String &aCode, ParserMessageList* pList )
833 {
834     pErrorList = pList;
835 
836 	//Scanner initialisieren
837 	aParser.Parse( aCode );
838 
839 	//erstes Symbol holen
840 	aTag = aParser.GetNextToken( *pErrorList );
841 
842 	nPfCaseOptions = 0;
843 	nAppCaseOptions = 0;
844 	bPfCaseActive = sal_False;
845 	bAppCaseActive = sal_False;
846 
847 	nActiveRefTypes = 0;
848 
849     //Ausfuehren der Start-Produktion
850 	Paragraph();
851 
852 	//Es wurde nicht die ganze Kette abgearbeitet, bisher ist aber
853 	//kein Fehler aufgetreten
854 	//=> es wurde ein einleitendes Tag vergessen
855 	if ( aTag.nId != TAG_NOMORETAGS )
856 	{
857 		switch ( aTag.nId )
858 		{
859 			case TAG_END:
860 				{
861 					ParseError( 3, "Extra Tag <#END>. Switch or <#HREF> expected.", aTag );
862 				}
863 				break;
864 			case TAG_BOLDOFF:
865 				{
866 					ParseError( 4, "<#BOLD> expected before <#/BOLD>.", aTag );
867 				}
868 				break;
869 			case TAG_ITALICOFF:
870 				{
871 					ParseError( 5, "<#ITALIC> expected before <#/ITALIC>.", aTag );
872 				}
873 				break;
874 			case TAG_UNDERLINEOFF:
875 				{
876 					ParseError( 17, "<#UNDER> expected before <#/UNDER>.", aTag );
877 				}
878 				break;
879 /*			case TAG_MISSPARENTHESIS:
880 				{
881 					ParseError( 14, "missing closing parenthesis '>'", aTag );
882 				}
883 				break;*/
884 			case TAG_AEND:
885 				{
886 					ParseError( 5, "Extra Tag <#AEND>. <#AVIS> or <#AHID> expected.", aTag );
887 				}
888 				break;
889 			case TAG_ELSE:
890 				{
891 					ParseError( 16, "Application-tag or platform-tag expected before <#ELSE>.", aTag );
892 				}
893 				break;
894 			case TAG_UNKNOWN_TAG:
895 				{
896 					ParseError( 6, "unknown Tag", aTag );
897 				}
898 				break;
899 			default:
900 				{
901 					ParseError( 6, "unexpected Tag", aTag );
902 				}
903 		}
904 	}
905     pErrorList = NULL;
906 }
907 
908 void TokenParser::Paragraph()
909 {
910 	switch ( aTag.nId )
911 	{
912 		case TAG_GRAPHIC:
913 		case TAG_NEXTVERSION:
914 			{
915 				TagRef();
916 				Paragraph();
917 			}
918 			break;
919 		case TAG_AVIS:
920 		case TAG_AHID:
921 			{
922 				TagRef();
923 				Paragraph();
924 			}
925 			break;
926 		case TAG_HELPID:
927 			{
928 				SimpleTag();
929 				Paragraph();
930 			}
931 			break;
932 		case TAG_OFFICEFULLNAME:
933 		case TAG_OFFICENAME:
934 		case TAG_OFFICEPATH:
935 		case TAG_OFFICEVERSION:
936 		case TAG_PORTALNAME:
937 		case TAG_PORTALFULLNAME:
938 		case TAG_PORTALPATH:
939 		case TAG_PORTALVERSION:
940 		case TAG_PORTALSHORTNAME:
941 			{
942 				SimpleTag();
943 				Paragraph();
944 			}
945 			break;
946 		case TAG_REFINSERT:
947 			{
948 				SimpleTag();
949 				Paragraph();
950 			}
951 			break;
952 		case TAG_BOLDON:
953 		case TAG_ITALICON:
954 		case TAG_UNDERLINEON:
955 		case TAG_COMMONSTART:
956 			{
957 				TagPair();
958 				Paragraph();
959 			}
960 			break;
961 		case TAG_HREF:
962 		case TAG_NAME:
963 		case TAG_KEY:
964 		case TAG_INDEX:
965         case TAG_TITEL:
966         case TAG_REFSTART:
967             {
968 				TagRef();
969 				Paragraph();
970 			}
971 			break;
972 		case TAG_OS2:
973 		case TAG_WIN:
974 		case TAG_UNIX:
975 		case TAG_MAC: //...
976 			{
977 				if ( ! bPfCaseActive )
978 				{
979                     //PfCases duerfen nicht verschachtelt sein:
980 					bPfCaseActive = sal_True;
981 					PfCase();
982 
983 					//So jetzt kann wieder ein PfCase kommen:
984 					bPfCaseActive = sal_False;
985 					Paragraph();
986 				}
987 			}
988 			break;
989 		case TAG_WRITER:
990 		case TAG_CALC:
991 		case TAG_DRAW:
992 		case TAG_IMPRESS:
993 		case TAG_SCHEDULE:
994 		case TAG_IMAGE:
995 		case TAG_MATH:
996 		case TAG_CHART:
997 		case TAG_OFFICE:
998 			{
999 				if ( !bAppCaseActive )
1000 				{
1001                     //AppCases duerfen nicht verschachtelt sein:
1002 					bAppCaseActive = sal_True;
1003 					AppCase();
1004 
1005                     //jetzt koennen wieder AppCases kommen:
1006 					bAppCaseActive = sal_False;
1007 					Paragraph();
1008 				}
1009 			}
1010 			break;
1011 
1012 		//Case TAG_BOLDOFF, TAG_ITALICOFF, TAG_BUNDERLINE, TAG_END
1013 			//nichts tun wg. epsilon-Prod.
1014 	}
1015 }
1016 
1017 void TokenParser::PfCase()
1018 {
1019 
1020 	//Produktion:
1021 	//PfCase -> PfCaseBegin Paragraph (PfCase | PfCaseEnd)
1022 
1023 	PfCaseBegin();
1024 
1025 	//Jetzt ist eine PfCase-Produktion aktiv:
1026 	Paragraph();
1027 	switch ( aTag.nId )
1028 	{
1029 		case TAG_ELSE:
1030 		case TAG_END:
1031 			{
1032 				CaseEnd();
1033 			}
1034 			break;
1035 		case TAG_OS2:
1036 		case TAG_WIN:
1037 		case TAG_UNIX:
1038 		case TAG_MAC: //First (PfBegin)
1039 			{
1040 				PfCase();
1041 			}
1042 			break;
1043 		default:
1044 			ParseError( 8, "<#ELSE> or <#END> or platform-tag expected.", aTag );
1045 	}
1046     //Die gemerkten Tags wieder loeschen fuer naechstes PfCase:
1047 	nPfCaseOptions = 0;
1048 }
1049 
1050 void TokenParser::PfCaseBegin()
1051 {
1052 	switch ( aTag.nId )
1053 	{
1054 		case TAG_OS2:
1055 		case TAG_WIN:
1056 		case TAG_UNIX:
1057 		case TAG_MAC:
1058 			{
1059 				//Token darf noch nicht vorgekommen sein im
1060 				//aktuellen Plattform-Case:
1061 				if ( !HAS_FLAG( nPfCaseOptions, TAG_NOGROUP( aTag.nId ) ) )
1062 				{
1063 					SET_FLAG( nPfCaseOptions, TAG_NOGROUP( aTag.nId ) );
1064 					match( aTag, aTag );
1065 				}
1066 				else {
1067 					ParseError( 9, "Tag defined twice in the same platform-case", aTag );
1068 				}
1069 			}
1070 	}
1071 }
1072 
1073 void TokenParser::AppCase()
1074 {
1075 
1076 	//Produktion:
1077 	//AppCase -> AppCaseBegin Paragraph (AppCase | AppCaseEnd)
1078 
1079 
1080 	AppCaseBegin();
1081 
1082 	Paragraph();
1083 
1084 	switch ( aTag.nId )
1085 	{
1086 		case TAG_ELSE:
1087 		case TAG_END:
1088 			{
1089 				CaseEnd();
1090 			}
1091 			break;
1092 		case TAG_WRITER:
1093 		case TAG_DRAW:
1094 		case TAG_CALC:
1095 		case TAG_IMAGE:
1096 		case TAG_MATH:
1097 		case TAG_CHART:
1098 		case TAG_OFFICE:
1099 		case TAG_IMPRESS:
1100 		case TAG_SCHEDULE: //First (AppBegin)
1101 			{
1102 				AppCase();
1103 			}
1104 			break;
1105 		default:
1106 			ParseError( 1, "<#ELSE> or <#END> or application-case-tag expected.", aTag );
1107 		}
1108 
1109     //Die gemerkten Tags wieder loeschen fuer naechstes AppCase:
1110 	nAppCaseOptions = 0;
1111 }
1112 
1113 void TokenParser::AppCaseBegin()
1114 {
1115 	switch ( aTag.nId )
1116 	{
1117 		case TAG_WRITER:
1118 		case TAG_DRAW:
1119 		case TAG_CALC:
1120 		case TAG_IMAGE:
1121 		case TAG_MATH:
1122 		case TAG_CHART:
1123 		case TAG_OFFICE:
1124 		case TAG_IMPRESS:
1125 		case TAG_SCHEDULE:
1126 			{
1127 				//Token darf noch nicht vorgekommen sein im
1128 				//aktuellen Plattform-Case:
1129 				if ( !HAS_FLAG( nAppCaseOptions, TAG_NOGROUP( aTag.nId ) ) )
1130 				{
1131 					SET_FLAG( nAppCaseOptions, TAG_NOGROUP( aTag.nId ) );
1132 					match( aTag, aTag );
1133 				}
1134 				else {
1135 					ParseError( 13, "Tag defined twice in the same application-case.", aTag );
1136 				}
1137 			}
1138 	}
1139 }
1140 
1141 void TokenParser::CaseEnd()
1142 {
1143 	//Produktion:
1144 	//CaseEnd -> <#ELSE> Paragraph <#END> | <#END>
1145 
1146 	switch ( aTag.nId )
1147 	{
1148 		case TAG_ELSE:
1149 		{
1150 			match( aTag, TAG_ELSE );
1151 			Paragraph();
1152 			match( aTag, TAG_END );
1153 		}
1154 		break;
1155 		case TAG_END:
1156 		{
1157 			match( aTag, TAG_END );
1158 		}
1159 		break;
1160 		default:
1161 			ParseError( 2, "<#ELSE> or <#END> expected.", aTag );
1162 	}
1163 }
1164 
1165 void TokenParser::SimpleTag()
1166 {
1167 
1168 	switch ( aTag.nId )
1169 	{
1170 		case TAG_HELPID:
1171 			{
1172 				match( aTag, TAG_HELPID );
1173     		}
1174 			break;
1175 		case TAG_OFFICEFULLNAME:
1176 		case TAG_OFFICENAME:
1177 		case TAG_OFFICEPATH:
1178 		case TAG_OFFICEVERSION:
1179 		case TAG_PORTALNAME:
1180 		case TAG_PORTALFULLNAME:
1181 		case TAG_PORTALPATH:
1182 		case TAG_PORTALVERSION:
1183 		case TAG_PORTALSHORTNAME:
1184 
1185         case TAG_REFINSERT:
1186 			{
1187 				match( aTag, aTag );
1188     		}
1189 			break;
1190 		default:
1191 			ParseError( 15, "[<#SimpleTag>] expected.", aTag );
1192 	}
1193 }
1194 
1195 void TokenParser::TagPair()
1196 {
1197 	switch ( aTag.nId )
1198 	{
1199 		case TAG_BOLDON:
1200 			{
1201 				match( aTag, TAG_BOLDON );
1202 				Paragraph();
1203 				match( aTag, TAG_BOLDOFF );
1204 			}
1205 			break;
1206 		case TAG_ITALICON:
1207 			{
1208 				match( aTag, TAG_ITALICON );
1209 				Paragraph();
1210 				match( aTag, TAG_ITALICOFF );
1211 			}
1212 			break;
1213 		case TAG_UNDERLINEON:
1214 			{
1215 				match( aTag, TAG_UNDERLINEON );
1216 				Paragraph();
1217 				match( aTag, TAG_UNDERLINEOFF );
1218 			}
1219 			break;
1220 		case TAG_COMMONSTART:
1221 			{
1222                 //remember tag so we can give the original tag in case of an error
1223                 TokenInfo aEndTag( aTag );
1224                 aEndTag.nId = TAG_COMMONEND;
1225 				match( aTag, TAG_COMMONSTART );
1226 				Paragraph();
1227 				match( aTag, aEndTag );
1228 			}
1229 			break;
1230 		default:
1231 			ParseError( 10, "<#BOLD>, <#ITALIC>, <#UNDER> expected.", aTag );
1232 	}
1233 }
1234 
1235 
1236 void TokenParser::TagRef()
1237 {
1238 	switch ( aTag.nId )
1239 	{
1240 		case TAG_GRAPHIC:
1241 		case TAG_NEXTVERSION:
1242             {
1243 				if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1244 				{
1245 					TokenId aThisToken = aTag.nId;
1246 					SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1247 					match( aTag, aTag );
1248 					Paragraph();
1249                     if ( aThisToken == TAG_GRAPHIC )
1250     					match( aTag, TAG_ENDGRAPHIC );
1251                     else
1252     					match( aTag, TAG_VERSIONEND );
1253                     // don't reset since alowed only once per paragraph
1254 					// RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1255 				}
1256 				else
1257 				{
1258 					ParseError( 11, "Tags <#GRAPHIC>,<#NEXTVERSION> allowed only once per paragraph at", aTag );
1259 				}
1260             }
1261 			break;
1262 		case TAG_AVIS:
1263 		case TAG_AHID:
1264 			{
1265 				if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1266 				{
1267 					TokenId aThisToken = aTag.nId;
1268 					SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1269 					match( aTag, aTag );
1270 					Paragraph();
1271 					match( aTag, TAG_AEND );
1272 					RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1273 				}
1274 				else
1275 				{
1276 					ParseError( 11, "Nested <#AHID>,<#AVIS> not allowed.", aTag );
1277 				}
1278 			}
1279 			break;
1280 		case TAG_HREF:
1281 		case TAG_NAME:
1282 			{
1283 
1284 			}
1285 			// NOBREAK
1286 		case TAG_KEY:
1287 		case TAG_INDEX:
1288 		case TAG_TITEL:
1289         case TAG_REFSTART:
1290 			{
1291 				if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1292 				{
1293 					TokenId aThisToken = aTag.nId;
1294 					match( aTag, aTag );
1295 					if ( aThisToken != TAG_NAME )
1296 					{	// TAG_NAME has no TAG_END
1297 						SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1298 						Paragraph();
1299 						match( aTag, TAG_END );
1300 						RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1301 					}
1302 				}
1303 				else
1304 				{
1305 					ParseError( 11, "Nested <#HREF>,<#NAME> or <#KEY> not allowed.", aTag );
1306 				}
1307 			}
1308 			break;
1309 		default:
1310 			ParseError( 12, "<#HREF>,<#NAME> or <#KEY> expected.", aTag );
1311 	}
1312 }
1313 
1314 sal_Bool TokenParser::match( const TokenInfo &aCurrentToken, const TokenId &aExpectedToken )
1315 {
1316     return match( aCurrentToken, TokenInfo( aExpectedToken, TOK_INVALIDPOS ) );
1317 }
1318 
1319 sal_Bool TokenParser::match( const TokenInfo &aCurrentToken, const TokenInfo &rExpectedToken )
1320 {
1321     TokenInfo aExpectedToken( rExpectedToken );
1322 	if ( aCurrentToken.nId == aExpectedToken.nId )
1323 	{
1324         if ( ( aCurrentToken.nId == TAG_COMMONEND
1325                && aCurrentToken.GetTagName().Equals( aExpectedToken.GetTagName() ) )
1326              || aCurrentToken.nId != TAG_COMMONEND )
1327         {
1328 		    aTag = aParser.GetNextToken( *pErrorList );
1329 		    return sal_True;
1330         }
1331 	}
1332 
1333     if ( aExpectedToken.nId == TAG_COMMONEND )
1334     {
1335         aExpectedToken.aTokenString.Insert( String::CreateFromAscii( "Close tag for " ), 0 );
1336     }
1337 
1338     ByteString sTmp( "Expected Symbol" );
1339     if ( aCurrentToken.nId == TAG_NOMORETAGS )
1340     {
1341 		ParseError( 7, sTmp, aExpectedToken );
1342     }
1343     else
1344     {
1345 		sTmp += ": ";
1346 		sTmp += ByteString( aParser.GetLexem( aExpectedToken ), RTL_TEXTENCODING_UTF8 );
1347 		sTmp += " near ";
1348 		ParseError( 7, sTmp, aCurrentToken );
1349     }
1350 	return sal_False;
1351 }
1352 
1353 void TokenParser::ParseError( sal_uInt16 nErrNr, ByteString aErrMsg, const TokenInfo &rTag )
1354 {
1355 	pErrorList->AddError( nErrNr, aErrMsg, rTag);
1356 
1357     // Das Fehlerhafte Tag ueberspringen
1358 	aTag = aParser.GetNextToken( *pErrorList );
1359 }
1360 
1361 
1362 ParserMessage::ParserMessage( sal_uInt16 PnErrorNr, ByteString PaErrorText, const TokenInfo &rTag )
1363 		: nErrorNr( PnErrorNr )
1364 		, aErrorText( PaErrorText )
1365 		, nTagBegin( 0 )
1366 		, nTagLength( 0 )
1367 {
1368     String aLexem( SimpleParser::GetLexem( rTag ) );
1369     aErrorText.Append(": ");
1370 	aErrorText += ByteString( aLexem, RTL_TEXTENCODING_UTF8 );
1371     if ( rTag.nId == TAG_NOMORETAGS )
1372         aErrorText.Append(" at end of line ");
1373     else if ( rTag.nPos != TOK_INVALIDPOS )
1374     {
1375         aErrorText.Append(" at Position ");
1376         aErrorText.Append( ByteString::CreateFromInt32( rTag.nPos ) );
1377     }
1378     nTagBegin = rTag.nPos;
1379 	nTagLength = aLexem.Len();
1380 }
1381 
1382 ParserError::ParserError( sal_uInt16 ErrorNr, ByteString ErrorText, const TokenInfo &rTag )
1383 : ParserMessage( ErrorNr, ErrorText, rTag )
1384 {}
1385 
1386 ParserWarning::ParserWarning( sal_uInt16 ErrorNr, ByteString ErrorText, const TokenInfo &rTag )
1387 : ParserMessage( ErrorNr, ErrorText, rTag )
1388 {}
1389 
1390 sal_Bool LingTest::IsTagMandatory( TokenInfo const &aToken, TokenId &aMetaTokens )
1391 {
1392 	TokenId aTokenId = aToken.nId;
1393 	TokenId aTokenGroup = TAG_GROUP( aTokenId );
1394 	if ( TAG_GROUP_PROGSWITCH == aTokenGroup
1395 		|| TAG_REFINSERT == aTokenId
1396 		|| TAG_REFSTART == aTokenId
1397 		|| TAG_NAME == aTokenId
1398 		|| TAG_HREF == aTokenId
1399 		|| TAG_AVIS == aTokenId
1400 		|| TAG_AHID == aTokenId
1401 		|| TAG_GRAPHIC == aTokenId
1402 		|| TAG_NEXTVERSION == aTokenId
1403         || ( TAG_GROUP_META == aTokenGroup && (aMetaTokens & aTokenId) == aTokenId ) )
1404     {
1405         if ( TAG_GROUP_META == aTokenGroup )
1406             aMetaTokens |= aTokenId;
1407         return sal_True;
1408     }
1409     else if (   TAG_COMMONSTART == aTokenId
1410 		     || TAG_COMMONEND == aTokenId )
1411     {
1412         String aTagName = aToken.GetTagName();
1413         return !(aTagName.EqualsIgnoreCaseAscii( "comment" )
1414               || aTagName.EqualsIgnoreCaseAscii( "bookmark_value" )
1415               || aTagName.EqualsIgnoreCaseAscii( "emph" )
1416               || aTagName.EqualsIgnoreCaseAscii( "item" )
1417               || aTagName.EqualsIgnoreCaseAscii( "br" ) );
1418     }
1419     return sal_False;
1420 }
1421 
1422 void LingTest::CheckTags( TokenList &aReference, TokenList &aTestee, sal_Bool bFixTags )
1423 {
1424 	sal_uLong i=0,j=0;
1425 	// Clean old Warnings
1426 	while ( aCompareWarningList.Count() )
1427 	{
1428 		delete aCompareWarningList.GetCurObject();
1429 		aCompareWarningList.Remove();
1430 	}
1431 
1432     /* in xml tags, do not require the following tags
1433         comment
1434         bookmark_value
1435         emph
1436         item
1437         br
1438     */
1439 
1440     // filter uninteresting Tags
1441     TokenId aMetaTokens = 0;
1442 	for ( i=0 ; i < aReference.Count() ; i++ )
1443 	{
1444 		if ( !IsTagMandatory( aReference.GetObject( i ), aMetaTokens ) )
1445 			aReference.GetObject( i ).SetDone();
1446 	}
1447 
1448     aMetaTokens = 0;
1449 	for ( i=0 ; i < aTestee.Count() ; i++ )
1450 	{
1451 		if ( !IsTagMandatory( aTestee.GetObject( i ), aMetaTokens ) )
1452 			aTestee.GetObject( i ).SetDone();
1453 	}
1454 
1455     // remove all matching tags
1456 	for ( i=0 ; i < aReference.Count() ; i++ )
1457     {
1458         if ( aReference.GetObject( i ).IsDone() )
1459             continue;
1460 
1461     	sal_Bool bTagFound = sal_False;
1462     	for ( j=0 ; j < aTestee.Count() && !bTagFound ; j++ )
1463         {
1464             if ( aTestee.GetObject( j ).IsDone() )
1465                 continue;
1466 
1467 		    if ( aReference.GetObject( i ).MatchesTranslation( aTestee.GetObject( j ), sal_False, aCompareWarningList ) )
1468             {
1469 			    aReference.GetObject( i ).SetDone();
1470 			    aTestee.GetObject( j ).SetDone();
1471                 bTagFound = sal_True;
1472             }
1473         }
1474     }
1475 
1476     sal_Bool bCanFix = sal_True;
1477 
1478     if ( bFixTags )
1479     {
1480         // we fix only if its a really simple case
1481         sal_uInt16 nTagCount = 0;
1482 	    for ( i=0 ; i < aReference.Count() ; i++ )
1483             if ( !aReference.GetObject( i ).IsDone() )
1484                 nTagCount++;
1485         if ( nTagCount > 1 )
1486             bCanFix = sal_False;
1487 
1488         nTagCount = 0;
1489 	    for ( i=0 ; i < aTestee.Count() ; i++ )
1490             if ( !aTestee.GetObject( i ).IsDone() )
1491                 nTagCount++;
1492         if ( nTagCount > 1 )
1493             bCanFix = sal_False;
1494     }
1495 
1496     // generate errors for tags that have differing attributes
1497 	for ( i=0 ; i < aReference.Count() ; i++ )
1498     {
1499         if ( aReference.GetObject( i ).IsDone() )
1500             continue;
1501 
1502     	sal_Bool bTagFound = sal_False;
1503     	for ( j=0 ; j < aTestee.Count() && !bTagFound ; j++ )
1504         {
1505             if ( aTestee.GetObject( j ).IsDone() )
1506                 continue;
1507 
1508 		    if ( aReference.GetObject( i ).MatchesTranslation( aTestee.GetObject( j ), sal_True, aCompareWarningList, bCanFix && bFixTags ) )
1509             {
1510 			    aReference.GetObject( i ).SetDone();
1511 			    aTestee.GetObject( j ).SetDone();
1512                 bTagFound = sal_True;
1513             }
1514         }
1515     }
1516 
1517     // list remaining tags as errors
1518 	for ( i=0 ; i < aReference.Count() ; i++ )
1519     {
1520         if ( aReference.GetObject( i ).IsDone() )
1521             continue;
1522 
1523         aCompareWarningList.AddError( 20, "Missing Tag in Translation", aReference.GetObject( i ) );
1524     }
1525 	for ( i=0 ; i < aTestee.Count() ; i++ )
1526     {
1527         if ( aTestee.GetObject( i ).IsDone() )
1528             continue;
1529 
1530 		aCompareWarningList.AddError( 21, "Extra Tag in Translation", aTestee.GetObject( i ) );
1531     }
1532 
1533 	for ( i=0 ; i < aReference.Count() ; i++ )
1534         aReference.GetObject( i ).SetDone( sal_False );
1535 
1536 	for ( i=0 ; i < aTestee.Count() ; i++ )
1537         aTestee.GetObject( i ).SetDone( sal_False );
1538 }
1539 
1540 void LingTest::CheckReference( GSILine *aReference )
1541 {
1542 	aReferenceParser.Parse( aReference->GetUText(), aReference->GetMessageList() );
1543 }
1544 
1545 void LingTest::CheckTestee( GSILine *aTestee, sal_Bool bHasSourceLine, sal_Bool bFixTags )
1546 {
1547     aFixedTestee = aTestee->GetUText();
1548 	aTesteeParser.Parse( aFixedTestee, aTestee->GetMessageList() );
1549 
1550     if ( bHasSourceLine )
1551 	    CheckTags( aReferenceParser.GetTokenList(), aTesteeParser.GetTokenList(), bFixTags );
1552 
1553     if ( bFixTags )
1554     {
1555         TokenList& aTesteeTokens = aTesteeParser.GetTokenList();
1556         sal_Bool bFixesDone = sal_False;
1557         // count backwards to allow replacing from right to left
1558         int i;
1559 	    for ( i=aTesteeTokens.Count()-1 ; i>=0 ; i-- )
1560         {
1561             if ( aTesteeTokens.GetObject( i ).HasBeenFixed() )
1562             {
1563                 bFixesDone = sal_True;
1564                 aFixedTestee.Replace( aTesteeTokens.GetObject( i ).nPos, aTesteeTokens.GetObject( i ).aTokenString.Len(), aTesteeTokens.GetObject( i ).MakeTag() );
1565             }
1566         }
1567         if ( bFixesDone )
1568         {
1569             aTestee->SetUText( aFixedTestee );
1570             aTestee->SetFixed();
1571         }
1572     }
1573 }
1574 
1575