1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_l10ntools.hxx" 26 #include <tools/string.hxx> 27 #include "tagtest.hxx" 28 29 #if OSL_DEBUG_LEVEL > 1 30 #include <stdio.h> 31 #endif 32 33 #include "gsicheck.hxx" 34 35 #define HAS_FLAG( nFlags, nFlag ) ( ( nFlags & nFlag ) != 0 ) 36 #define SET_FLAG( nFlags, nFlag ) ( nFlags |= nFlag ) 37 #define RESET_FLAG( nFlags, nFlag ) ( nFlags &= ~nFlag ) // ~ = Bitweises NOT 38 39 40 41 TokenInfo::TokenInfo( TokenId pnId, sal_uInt16 nP, String paStr, ParserMessageList &rErrorList ) 42 : bClosed(sal_False) 43 , bCloseTag(sal_False) 44 , bIsBroken(sal_False) 45 , bHasBeenFixed(sal_False) 46 , bDone(sal_False) 47 , aTokenString( paStr ) 48 , nId( pnId ) 49 , nPos(nP) 50 { 51 if ( nId == TAG_COMMONSTART || nId == TAG_COMMONEND ) 52 SplitTag( rErrorList ); 53 } 54 55 enum tagcheck { TC_START, TC_HAS_TAG_NAME, TC_HAS_PROP_NAME_EQ, TC_HAS_PROP_NAME_EQ_SP, TC_HAS_PROP_NAME_SP, TC_INSIDE_STRING, TC_PROP_FINISHED, TC_CLOSED, TC_CLOSED_SPACE, TC_CLOSETAG, TC_CLOSETAG_HAS_TAG_NAME, TC_FINISHED, TC_ERROR }; 56 57 /* 58 \< link href = \"text\" name = \"C\" \> 59 START ' ' -> HAS_TAG_NAME 60 START '/' -> CLOSED 61 START '/' -> CLOSETAG - no Portion (starting with /) 62 START '>' -> FINISHED 63 HAS_TAG_NAME '=' -> HAS_PROP_NAME_EQ 64 HAS_TAG_NAME ' ' -> HAS_PROP_NAME_SP 65 HAS_TAG_NAME '/' -> CLOSED 66 HAS_TAG_NAME '>' -> FINISHED 67 HAS_PROP_NAME_SP '=' -> HAS_PROP_NAME_EQ 68 HAS_PROP_NAME_EQ ' ' -> HAS_PROP_NAME_EQ_SP 69 HAS_PROP_NAME_EQ '"' -> INSIDE_STRING 70 HAS_PROP_NAME_EQ_SP '"' -> INSIDE_STRING 71 INSIDE_STRING ' ' -> INSIDE_STRING 72 INSIDE_STRING '=' -> INSIDE_STRING 73 INSIDE_STRING '>' -> INSIDE_STRING 74 INSIDE_STRING '"' -> PROP_FINISHED 75 PROP_FINISHED ' ' -> HAS_TAG_NAME 76 PROP_FINISHED '/' -> CLOSED 77 PROP_FINISHED '>' -> FINISHED 78 CLOSED ' ' -> CLOSED_SPACE 79 CLOSED '>' -> FINISHED 80 CLOSED_SPACE '>' -> FINISHED 81 82 CLOSETAG ' ' -> CLOSETAG_HAS_TAG_NAME 83 CLOSETAG '>' -> FINISHED 84 CLOSETAG_HAS_TAG_NAME '>' -> FINISHED 85 86 */ 87 void TokenInfo::SplitTag( ParserMessageList &rErrorList ) 88 { 89 sal_uInt16 nLastPos = 2; // skip initial \< 90 sal_uInt16 nCheckPos = nLastPos; 91 String aDelims( String::CreateFromAscii( " \\=>/" ) ); 92 String aPortion; 93 String aValue; // store the value of a property 94 ByteString aName; // store the name of a property/tag 95 sal_Bool bCheckName = sal_False; 96 sal_Bool bCheckEmpty = sal_False; 97 sal_Unicode cDelim; 98 tagcheck aState = TC_START; 99 100 // skip blanks 101 while ( nLastPos < aTokenString.Len() && aTokenString.GetChar( nLastPos ) == ' ') 102 nLastPos++; 103 104 nCheckPos = aTokenString.SearchChar( aDelims.GetBuffer(), nLastPos ); 105 while ( nCheckPos != STRING_NOTFOUND && !( aState == TC_FINISHED || aState == TC_ERROR ) ) 106 { 107 aPortion = aTokenString.Copy( nLastPos, nCheckPos-nLastPos ); 108 109 if ( aTokenString.GetChar( nCheckPos ) == '\\' ) 110 nCheckPos++; 111 112 cDelim = aTokenString.GetChar( nCheckPos ); 113 nCheckPos++; 114 115 switch ( aState ) 116 { 117 // START ' ' -> HAS_TAG_NAME 118 // START '/' -> CLOSED 119 // START '>' -> FINISHED 120 case TC_START: 121 aTagName = aPortion; 122 switch ( cDelim ) 123 { 124 case ' ': aState = TC_HAS_TAG_NAME; 125 bCheckName = sal_True; 126 break; 127 case '/': 128 { 129 if ( aPortion.Len() == 0 ) 130 { 131 aState = TC_CLOSETAG; 132 } 133 else 134 { 135 aState = TC_CLOSED; 136 bCheckName = sal_True; 137 } 138 } 139 break; 140 case '>': aState = TC_FINISHED; 141 bCheckName = sal_True; 142 break; 143 default: aState = TC_ERROR; 144 } 145 break; 146 147 // HAS_TAG_NAME '=' -> HAS_PROP_NAME_EQ 148 // HAS_TAG_NAME ' ' -> HAS_PROP_NAME_SP 149 // HAS_TAG_NAME '/' -> CLOSED 150 // HAS_TAG_NAME '>' -> FINISHED 151 case TC_HAS_TAG_NAME: 152 switch ( cDelim ) 153 { 154 case '=': aState = TC_HAS_PROP_NAME_EQ; 155 bCheckName = sal_True; 156 break; 157 case ' ': aState = TC_HAS_PROP_NAME_SP; 158 bCheckName = sal_True; 159 break; 160 case '/': aState = TC_CLOSED; 161 bCheckEmpty = sal_True; 162 break; 163 case '>': aState = TC_FINISHED; 164 bCheckEmpty = sal_True; 165 break; 166 default: aState = TC_ERROR; 167 } 168 break; 169 170 // HAS_PROP_NAME_SP '=' -> HAS_PROP_NAME_EQ 171 case TC_HAS_PROP_NAME_SP: 172 switch ( cDelim ) 173 { 174 case '=': aState = TC_HAS_PROP_NAME_EQ; 175 bCheckEmpty = sal_True; 176 break; 177 default: aState = TC_ERROR; 178 } 179 break; 180 181 // HAS_PROP_NAME_EQ ' ' -> HAS_PROP_NAME_EQ_SP 182 // HAS_PROP_NAME_EQ '"' -> INSIDE_STRING 183 case TC_HAS_PROP_NAME_EQ: 184 switch ( cDelim ) 185 { 186 case ' ': aState = TC_HAS_PROP_NAME_EQ_SP; 187 bCheckEmpty = sal_True; 188 break; 189 case '\"': aState = TC_INSIDE_STRING; 190 bCheckEmpty = sal_True; 191 aValue.Erase(); 192 break; 193 default: aState = TC_ERROR; 194 } 195 break; 196 197 // HAS_PROP_NAME_EQ_SP '"' -> INSIDE_STRING 198 case TC_HAS_PROP_NAME_EQ_SP: 199 switch ( cDelim ) 200 { 201 case '\"': aState = TC_INSIDE_STRING; 202 bCheckEmpty = sal_True; 203 aValue.Erase(); 204 break; 205 default: aState = TC_ERROR; 206 } 207 break; 208 209 // INSIDE_STRING * -> INSIDE_STRING 210 // INSIDE_STRING '"' -> PROP_FINISHED 211 case TC_INSIDE_STRING: 212 switch ( cDelim ) 213 { 214 case '\"': 215 { 216 aState = TC_PROP_FINISHED; 217 aValue += aPortion; 218 if ( aProperties.find( aName ) == aProperties.end() ) 219 { 220 if ( !IsPropertyValueValid( aName, aValue ) ) 221 { 222 rErrorList.AddError( 25, ByteString("Property '").Append(aName).Append("' has invalid value '").Append(ByteString( aValue, RTL_TEXTENCODING_UTF8 )).Append("' "), *this ); 223 bIsBroken = sal_True; 224 } 225 aProperties[ aName ] = aValue; 226 } 227 else 228 { 229 rErrorList.AddError( 25, ByteString("Property '").Append(aName).Append("' defined twice "), *this ); 230 bIsBroken = sal_True; 231 } 232 } 233 break; 234 default: 235 { 236 aState = TC_INSIDE_STRING; 237 aValue += aPortion; 238 aValue += cDelim; 239 } 240 } 241 break; 242 243 // PROP_FINISHED ' ' -> HAS_TAG_NAME 244 // PROP_FINISHED '/' -> CLOSED 245 // PROP_FINISHED '>' -> FINISHED 246 case TC_PROP_FINISHED: 247 switch ( cDelim ) 248 { 249 case ' ': aState = TC_HAS_TAG_NAME; 250 bCheckEmpty = sal_True; 251 break; 252 case '/': aState = TC_CLOSED; 253 bCheckEmpty = sal_True; 254 break; 255 case '>': aState = TC_FINISHED; 256 bCheckEmpty = sal_True; 257 break; 258 default: aState = TC_ERROR; 259 } 260 break; 261 262 // CLOSED ' ' -> CLOSED_SPACE 263 // CLOSED '>' -> FINISHED 264 case TC_CLOSED: 265 switch ( cDelim ) 266 { 267 case ' ': aState = TC_CLOSED_SPACE; 268 bCheckEmpty = sal_True; 269 bClosed = sal_True; 270 break; 271 case '>': aState = TC_FINISHED; 272 bCheckEmpty = sal_True; 273 break; 274 default: aState = TC_ERROR; 275 } 276 break; 277 278 // CLOSED_SPACE '>' -> FINISHED 279 case TC_CLOSED_SPACE: 280 switch ( cDelim ) 281 { 282 case '>': aState = TC_FINISHED; 283 bCheckEmpty = sal_True; 284 break; 285 default: aState = TC_ERROR; 286 } 287 break; 288 289 // CLOSETAG ' ' -> CLOSETAG_HAS_TAG_NAME 290 // CLOSETAG '>' -> FINISHED 291 case TC_CLOSETAG: 292 bCloseTag = sal_True; 293 switch ( cDelim ) 294 { 295 case ' ': aState = TC_CLOSETAG_HAS_TAG_NAME; 296 aTagName = aPortion; 297 bCheckName = sal_True; 298 break; 299 case '>': aState = TC_FINISHED; 300 aTagName = aPortion; 301 bCheckName = sal_True; 302 break; 303 default: aState = TC_ERROR; 304 } 305 break; 306 307 // CLOSETAG_HAS_TAG_NAME '>' -> FINISHED 308 case TC_CLOSETAG_HAS_TAG_NAME: 309 switch ( cDelim ) 310 { 311 case '>': aState = TC_FINISHED; 312 bCheckEmpty = sal_True; 313 break; 314 default: aState = TC_ERROR; 315 } 316 break; 317 318 319 default: rErrorList.AddError( 99, "Internal error Parsing Tag ", *this ); 320 bIsBroken = sal_True; 321 322 } 323 324 if ( bCheckName ) 325 { 326 if ( aPortion.Len() == 0 ) 327 { 328 rErrorList.AddError( 25, "Tag/Property name missing ", *this ); 329 bIsBroken = sal_True; 330 } 331 else 332 { 333 aName = ByteString( aPortion, RTL_TEXTENCODING_UTF8 ); 334 // "a-zA-Z_-.0-9" 335 xub_StrLen nCount; 336 sal_Bool bBroken = sal_False; 337 const sal_Char* aBuf = aName.GetBuffer(); 338 for ( nCount = 0 ; !bBroken && nCount < aName.Len() ; nCount++ ) 339 { 340 bBroken = ! ( ( aBuf[nCount] >= 'a' && aBuf[nCount] <= 'z' ) 341 ||( aBuf[nCount] >= 'A' && aBuf[nCount] <= 'Z' ) 342 ||( aBuf[nCount] >= '0' && aBuf[nCount] <= '9' ) 343 ||( aBuf[nCount] == '_' ) 344 ||( aBuf[nCount] == '-' ) 345 ||( aBuf[nCount] == '.' ) 346 ); 347 } 348 349 if ( bBroken ) 350 { 351 rErrorList.AddError( 25, "Found illegal character in Tag/Property name ", *this ); 352 bIsBroken = sal_True; 353 } 354 } 355 356 bCheckName = sal_False; 357 } 358 359 if ( bCheckEmpty ) 360 { 361 if ( aPortion.Len() ) 362 { 363 rErrorList.AddError( 25, ByteString("Found displaced characters '").Append(ByteString( aPortion, RTL_TEXTENCODING_UTF8 )).Append("' in Tag "), *this ); 364 bIsBroken = sal_True; 365 } 366 bCheckEmpty = sal_False; 367 } 368 369 370 nLastPos = nCheckPos; 371 372 // skip further blanks 373 if ( cDelim == ' ' && aState != TC_INSIDE_STRING ) 374 while ( nLastPos < aTokenString.Len() && aTokenString.GetChar( nLastPos ) == ' ') 375 nLastPos++; 376 377 nCheckPos = aTokenString.SearchChar( aDelims.GetBuffer(), nLastPos ); 378 } 379 if ( aState != TC_FINISHED ) 380 { 381 rErrorList.AddError( 25, "Parsing error in Tag ", *this ); 382 bIsBroken = sal_True; 383 } 384 } 385 386 sal_Bool TokenInfo::IsPropertyRelevant( const ByteString &aName, const String &aValue ) const 387 { 388 if ( aTagName.EqualsAscii( "alt" ) && aName.Equals( "xml-lang" ) ) 389 return sal_False; 390 if ( aTagName.EqualsAscii( "ahelp" ) && aName.Equals( "visibility" ) && aValue.EqualsAscii("visible") ) 391 return sal_False; 392 if ( aTagName.EqualsAscii( "image" ) && (aName.Equals( "width" ) || aName.Equals( "height" )) ) 393 return sal_False; 394 395 return sal_True; 396 } 397 398 sal_Bool TokenInfo::IsPropertyValueValid( const ByteString &aName, const String &aValue ) const 399 { 400 /* removed due to i56740 401 if ( aTagName.EqualsAscii( "switchinline" ) && aName.Equals( "select" ) ) 402 { 403 return aValue.EqualsAscii("sys") || 404 aValue.EqualsAscii("appl") || 405 aValue.EqualsAscii("distrib"); 406 } */ 407 if ( aTagName.EqualsAscii( "caseinline" ) && aName.Equals( "select" ) ) 408 { 409 return /*!aValue.EqualsAscii("OS2") && removed due to i56740 */ 410 !aValue.EqualsAscii(""); 411 } 412 413 // we don't know any better so we assume it to be OK 414 return sal_True; 415 } 416 417 sal_Bool TokenInfo::IsPropertyInvariant( const ByteString &aName, const String &aValue ) const 418 { 419 if ( aTagName.EqualsAscii( "link" ) && aName.Equals( "name" ) ) 420 return sal_False; 421 if ( aTagName.EqualsAscii( "link" ) && aName.Equals( "href" ) ) 422 { // check for external reference 423 if ( aValue.Copy( 0, 5 ).EqualsIgnoreCaseAscii( "http:" ) 424 || aValue.Copy( 0, 6 ).EqualsIgnoreCaseAscii( "https:" ) 425 || aValue.Copy( 0, 4 ).EqualsIgnoreCaseAscii( "ftp:" ) ) 426 return sal_False; 427 else 428 return sal_True; 429 } 430 return sal_True; 431 } 432 433 sal_Bool TokenInfo::IsPropertyFixable( const ByteString &aName ) const 434 { 435 // name everything that is allowed to be fixed automatically here 436 if ( (aTagName.EqualsAscii( "ahelp" ) && aName.Equals( "hid" )) 437 || (aTagName.EqualsAscii( "link" ) && aName.Equals( "href" )) 438 || (aTagName.EqualsAscii( "alt" ) && aName.Equals( "id" )) 439 || (aTagName.EqualsAscii( "variable" ) && aName.Equals( "id" )) 440 || (aTagName.EqualsAscii( "image" ) && aName.Equals( "src" )) 441 || (aTagName.EqualsAscii( "image" ) && aName.Equals( "id" ) )) 442 return sal_True; 443 return sal_False; 444 } 445 446 sal_Bool TokenInfo::MatchesTranslation( TokenInfo& rInfo, sal_Bool bGenErrors, ParserMessageList &rErrorList, sal_Bool bFixTags ) const 447 { 448 // check if tags are equal 449 // check if all existing properties are in the translation as well and 450 // wether they have a matching content (the same in most cases) 451 452 if ( nId != rInfo.nId ) 453 return sal_False; 454 455 if ( !aTagName.Equals( rInfo.aTagName ) ) 456 return sal_False; 457 458 // If one of the tags has formating errors already it does make no sense to check here, so return right away 459 if ( bGenErrors && ( bIsBroken || rInfo.bIsBroken ) ) 460 return sal_True; 461 462 StringHashMap::const_iterator iProp; 463 for( iProp = aProperties.begin() ; iProp != aProperties.end(); ++iProp ) 464 { 465 if ( rInfo.aProperties.find( iProp->first ) != rInfo.aProperties.end() ) 466 { 467 if ( IsPropertyRelevant( iProp->first, iProp->second ) || IsPropertyRelevant( iProp->first, rInfo.aProperties.find( iProp->first )->second ) ) 468 { 469 if ( IsPropertyInvariant( iProp->first, iProp->second ) ) 470 { 471 if ( !rInfo.aProperties.find( iProp->first )->second.Equals( iProp->second ) ) 472 { 473 if ( bGenErrors ) 474 { 475 if ( bFixTags && IsPropertyFixable( iProp->first ) ) 476 { 477 rInfo.aProperties.find( iProp->first )->second = iProp->second; 478 rInfo.SetHasBeenFixed(); 479 rErrorList.AddWarning( 25, ByteString("Property '").Append(iProp->first).Append("': FIXED different value in Translation "), *this ); 480 } 481 else 482 rErrorList.AddError( 25, ByteString("Property '").Append(iProp->first).Append("': value different in Translation "), *this ); 483 } 484 else return sal_False; 485 } 486 } 487 } 488 } 489 else 490 { 491 if ( IsPropertyRelevant( iProp->first, iProp->second ) ) 492 { 493 if ( bGenErrors ) 494 rErrorList.AddError( 25, ByteString("Property '").Append(iProp->first).Append("' missing in Translation "), *this ); 495 else return sal_False; 496 } 497 } 498 } 499 for( iProp = rInfo.aProperties.begin() ; iProp != rInfo.aProperties.end(); ++iProp ) 500 { 501 if ( aProperties.find( iProp->first ) == aProperties.end() ) 502 { 503 if ( IsPropertyRelevant( iProp->first, iProp->second ) ) 504 { 505 if ( bGenErrors ) 506 rErrorList.AddError( 25, ByteString("Extra Property '").Append(iProp->first).Append("' in Translation "), rInfo ); 507 else return sal_False; 508 } 509 } 510 } 511 512 // if we reach here eather 513 // the tags match completely or 514 // the tags match but not the properties and we generated errors for that 515 return sal_True; 516 } 517 518 String TokenInfo::GetTagName() const 519 { 520 return aTagName; 521 } 522 523 String TokenInfo::MakeTag() const 524 { 525 String aRet; 526 aRet.AppendAscii("\\<"); 527 if ( bCloseTag ) 528 aRet.AppendAscii("/"); 529 aRet.Append( GetTagName() ); 530 StringHashMap::const_iterator iProp; 531 532 for( iProp = aProperties.begin() ; iProp != aProperties.end(); ++iProp ) 533 { 534 aRet.AppendAscii(" "); 535 aRet.Append( String( iProp->first, RTL_TEXTENCODING_UTF8 ) ); 536 aRet.AppendAscii("=\\\""); 537 aRet.Append( iProp->second ); 538 aRet.AppendAscii("\\\""); 539 } 540 if ( bClosed ) 541 aRet.AppendAscii("/"); 542 aRet.AppendAscii("\\>"); 543 return aRet; 544 } 545 546 547 void ParserMessageList::AddError( sal_uInt16 nErrorNr, ByteString aErrorText, const TokenInfo &rTag ) 548 { 549 Insert( new ParserError( nErrorNr, aErrorText, rTag ), LIST_APPEND ); 550 } 551 552 void ParserMessageList::AddWarning( sal_uInt16 nErrorNr, ByteString aErrorText, const TokenInfo &rTag ) 553 { 554 Insert( new ParserWarning( nErrorNr, aErrorText, rTag ), LIST_APPEND ); 555 } 556 557 sal_Bool ParserMessageList::HasErrors() 558 { 559 sal_uInt16 i; 560 for ( i=0 ; i < Count() ; i++ ) 561 if ( GetObject( i )->IsError() ) 562 return sal_True; 563 return sal_False; 564 } 565 566 struct Tag 567 { 568 String GetName() const { return String::CreateFromAscii( pName ); }; 569 const char* pName; 570 TokenId nTag; 571 }; 572 573 574 static const Tag aKnownTags[] = 575 { 576 /* commenting oldstyle tags 577 // { "<#GROUP_FORMAT>", TAG_GROUP_FORMAT }, 578 { "<#BOLD>", TAG_BOLDON }, 579 { "<#/BOLD>", TAG_BOLDOFF }, 580 { "<#ITALIC>", TAG_ITALICON }, 581 { "<#/ITALIC>", TAG_ITALICOFF }, 582 { "<#UNDER>", TAG_UNDERLINEON }, 583 { "<#/UNDER>", TAG_UNDERLINEOFF }, 584 585 // { "<#GROUP_NOTALLOWED>", TAG_GROUP_NOTALLOWED }, 586 { "<#HELPID>", TAG_HELPID }, 587 { "<#MODIFY>", TAG_MODIFY }, 588 { "<#REFNR>", TAG_REFNR }, 589 590 // { "<#GROUP_STRUCTURE>", TAG_GROUP_STRUCTURE }, 591 { "<#NAME>", TAG_NAME }, 592 { "<#HREF>", TAG_HREF }, 593 { "<#AVIS>", TAG_AVIS }, 594 { "<#AHID>", TAG_AHID }, 595 { "<#AEND>", TAG_AEND }, 596 597 { "<#TITEL>", TAG_TITEL }, 598 { "<#KEY>", TAG_KEY }, 599 { "<#INDEX>", TAG_INDEX }, 600 601 { "<#REFSTART>", TAG_REFSTART }, 602 603 { "<#GRAPHIC>", TAG_GRAPHIC }, 604 { "<#NEXTVERSION>", TAG_NEXTVERSION }, 605 606 // { "<#GROUP_SYSSWITCH>", TAG_GROUP_SYSSWITCH }, 607 { "<#WIN>", TAG_WIN }, 608 { "<#UNIX>", TAG_UNIX }, 609 { "<#MAC>", TAG_MAC }, 610 { "<#OS2>", TAG_OS2 }, 611 612 // { "<#GROUP_PROGSWITCH>", TAG_GROUP_PROGSWITCH }, 613 { "<#WRITER>", TAG_WRITER }, 614 { "<#CALC>", TAG_CALC }, 615 { "<#DRAW>", TAG_DRAW }, 616 { "<#IMPRESS>", TAG_IMPRESS }, 617 { "<#SCHEDULE>", TAG_SCHEDULE }, 618 { "<#IMAGE>", TAG_IMAGE }, 619 { "<#MATH>", TAG_MATH }, 620 { "<#CHART>", TAG_CHART }, 621 { "<#OFFICE>", TAG_OFFICE }, 622 */ 623 // { "<#TAG_GROUP_META>", TAG_GROUP_META }, 624 { "$[officefullname]", TAG_OFFICEFULLNAME }, 625 { "$[officename]", TAG_OFFICENAME }, 626 { "$[officepath]", TAG_OFFICEPATH }, 627 { "$[officeversion]", TAG_OFFICEVERSION }, 628 { "$[portalname]", TAG_PORTALNAME }, 629 { "$[portalfullname]", TAG_PORTALFULLNAME }, 630 { "$[portalpath]", TAG_PORTALPATH }, 631 { "$[portalversion]", TAG_PORTALVERSION }, 632 { "$[portalshortname]", TAG_PORTALSHORTNAME }, 633 /* commenting oldstyle tags 634 // { "<#TAG_GROUP_SINGLE>", TAG_GROUP_SINGLE }, 635 { "<#REFINSERT>", TAG_REFINSERT }, 636 637 // { "<#GROUP_MULTI>", TAG_GROUP_MULTI }, 638 { "<#END>", TAG_END }, 639 { "<#ELSE>", TAG_ELSE }, 640 { "<#VERSIONEND>", TAG_VERSIONEND }, 641 { "<#ENDGRAPHIC>", TAG_ENDGRAPHIC },*/ 642 { "<Common Tag>", TAG_COMMONSTART }, 643 { "</Common Tag>", TAG_COMMONEND }, 644 645 { "<no more tags>", TAG_NOMORETAGS }, 646 { "", TAG_UNKNOWN_TAG }, 647 }; 648 649 650 SimpleParser::SimpleParser() 651 : nPos( 0 ) 652 , aNextTag( TAG_NOMORETAGS, TOK_INVALIDPOS ) 653 { 654 } 655 656 void SimpleParser::Parse( String PaSource ) 657 { 658 aSource = PaSource; 659 nPos = 0; 660 aLastToken.Erase(); 661 aNextTag = TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS ); 662 aTokenList.Clear(); 663 }; 664 665 TokenInfo SimpleParser::GetNextToken( ParserMessageList &rErrorList ) 666 { 667 TokenInfo aResult; 668 sal_uInt16 nTokenStartPos = 0; 669 if ( aNextTag.nId != TAG_NOMORETAGS ) 670 { 671 aResult = aNextTag; 672 aNextTag = TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS ); 673 } 674 else 675 { 676 aLastToken = GetNextTokenString( rErrorList, nTokenStartPos ); 677 if ( aLastToken.Len() == 0 ) 678 return TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS ); 679 680 // do we have a \< ... \> style tag? 681 if ( aLastToken.Copy(0,2).EqualsAscii( "\\<" ) ) 682 { 683 // check for paired \" \" 684 bool bEven = true; 685 sal_uInt16 nQuotePos = 0; 686 sal_uInt16 nQuotedQuotesPos = aLastToken.SearchAscii( "\\\"" ); 687 sal_uInt16 nQuotedBackPos = aLastToken.SearchAscii( "\\\\" ); // this is only to kick out quoted backslashes 688 while ( nQuotedQuotesPos != STRING_NOTFOUND ) 689 { 690 if ( nQuotedBackPos <= nQuotedQuotesPos ) 691 nQuotePos = nQuotedBackPos+2; 692 else 693 { 694 nQuotePos = nQuotedQuotesPos+2; 695 bEven = !bEven; 696 } 697 nQuotedQuotesPos = aLastToken.SearchAscii( "\\\"", nQuotePos ); 698 nQuotedBackPos = aLastToken.SearchAscii( "\\\\", nQuotePos ); // this is only to kick out quoted backslashes 699 } 700 if ( !bEven ) 701 { 702 rErrorList.AddError( 24, "Missing quotes ( \\\" ) in Tag", TokenInfo( TAG_UNKNOWN_TAG, nTokenStartPos, aLastToken ) ); 703 } 704 705 // check if we have an end-tag or a start-tag 706 sal_uInt16 nNonBlankStartPos,nNonBlankEndPos; 707 nNonBlankStartPos = 2; 708 while ( aLastToken.GetChar(nNonBlankStartPos) == ' ' ) 709 nNonBlankStartPos++; 710 if ( aLastToken.GetChar(nNonBlankStartPos) == '/' ) 711 aResult = TokenInfo( TAG_COMMONEND, nTokenStartPos, aLastToken, rErrorList ); 712 else 713 { 714 aResult = TokenInfo( TAG_COMMONSTART, nTokenStartPos, aLastToken, rErrorList ); 715 nNonBlankEndPos = aLastToken.Len() -3; 716 while ( aLastToken.GetChar(nNonBlankEndPos) == ' ' ) 717 nNonBlankEndPos--; 718 if ( aLastToken.GetChar( nNonBlankEndPos ) == '/' ) 719 aNextTag = TokenInfo( TAG_COMMONEND, nTokenStartPos, String::CreateFromAscii("\\</").Append(aResult.GetTagName()).AppendAscii("\\>"), rErrorList ); 720 } 721 } 722 else 723 { 724 sal_uInt16 i = 0; 725 while ( aKnownTags[i].nTag != TAG_UNKNOWN_TAG && 726 aLastToken != aKnownTags[i].GetName() ) 727 i++; 728 aResult = TokenInfo( aKnownTags[i].nTag, nTokenStartPos ); 729 } 730 } 731 732 if ( aResult.nId == TAG_UNKNOWN_TAG ) 733 aResult = TokenInfo( TAG_UNKNOWN_TAG, nTokenStartPos, aLastToken ); 734 aTokenList.Insert( aResult, LIST_APPEND ); 735 return aResult; 736 } 737 738 String SimpleParser::GetNextTokenString( ParserMessageList &rErrorList, sal_uInt16 &rTagStartPos ) 739 { 740 // sal_uInt16 nStyle1StartPos = aSource.SearchAscii( "<#", nPos ); 741 sal_uInt16 nStyle2StartPos = aSource.SearchAscii( "$[", nPos ); 742 sal_uInt16 nStyle3StartPos = aSource.SearchAscii( "\\<", nPos ); 743 sal_uInt16 nStyle4StartPos = aSource.SearchAscii( "\\\\", nPos ); // this is only to kick out quoted backslashes 744 745 rTagStartPos = 0; 746 747 /* removing since a \<... is not likely 748 // check if the tag starts with a letter to avoid things like <> <= ... > 749 while ( STRING_NOTFOUND != nStyle3StartPos && !( aSource.Copy( nStyle3StartPos+2, 1 ).IsAlphaAscii() || aSource.GetChar( nStyle3StartPos+2 ) == '/' ) ) 750 nStyle3StartPos = aSource.SearchAscii( "\\<", nStyle3StartPos+1 ); 751 */ 752 if ( STRING_NOTFOUND == nStyle2StartPos && STRING_NOTFOUND == nStyle3StartPos ) 753 return String(); // no more tokens 754 755 if ( nStyle4StartPos < nStyle2StartPos && nStyle4StartPos <= nStyle3StartPos ) // <= to make sure \\ is always handled first 756 { // Skip quoted Backslash 757 nPos = nStyle4StartPos +2; 758 return GetNextTokenString( rErrorList, rTagStartPos ); 759 } 760 761 /* if ( nStyle1StartPos < nStyle2StartPos && nStyle1StartPos <= nStyle3StartPos ) // <= to make sure our spechial tags are recognized before all others 762 { // test for <# ... > style tokens 763 sal_uInt16 nEndPos = aSource.SearchAscii( ">", nStyle1StartPos ); 764 if ( nEndPos == STRING_NOTFOUND ) 765 { // Token is incomplete. Skip start and search for better ones 766 nPos = nStyle1StartPos +2; 767 return GetNextTokenString( rErrorList, rTagStartPos ); 768 } 769 nPos = nEndPos; 770 rTagStartPos = nStyle1StartPos; 771 return aSource.Copy( nStyle1StartPos, nEndPos-nStyle1StartPos +1 ).ToUpperAscii(); 772 } 773 else*/ if ( nStyle2StartPos < nStyle3StartPos ) 774 { // test for $[ ... ] style tokens 775 sal_uInt16 nEndPos = aSource.SearchAscii( "]", nStyle2StartPos); 776 if ( nEndPos == STRING_NOTFOUND ) 777 { // Token is incomplete. Skip start and search for better ones 778 nPos = nStyle2StartPos +2; 779 return GetNextTokenString( rErrorList, rTagStartPos ); 780 } 781 nPos = nEndPos; 782 rTagStartPos = nStyle2StartPos; 783 return aSource.Copy( nStyle2StartPos, nEndPos-nStyle2StartPos +1 ); 784 } 785 else 786 { // test for \< ... \> style tokens 787 sal_uInt16 nEndPos = aSource.SearchAscii( "\\>", nStyle3StartPos); 788 sal_uInt16 nQuotedBackPos = aSource.SearchAscii( "\\\\", nStyle3StartPos ); // this is only to kick out quoted backslashes 789 while ( nQuotedBackPos <= nEndPos && nQuotedBackPos != STRING_NOTFOUND ) 790 { 791 nEndPos = aSource.SearchAscii( "\\>", nQuotedBackPos +2); 792 nQuotedBackPos = aSource.SearchAscii( "\\\\", nQuotedBackPos +2 ); // this is only to kick out quoted backslashes 793 } 794 if ( nEndPos == STRING_NOTFOUND ) 795 { // Token is incomplete. Skip start and search for better ones 796 nPos = nStyle3StartPos +2; 797 ByteString sTmp( "Tag Start '\\<' without Tag End '\\>': " ); 798 rErrorList.AddError( 24, "Tag Start '\\<' without Tag End '\\>'", TokenInfo( TAG_UNKNOWN_TAG, nStyle3StartPos, aSource.Copy( nStyle3StartPos-10, 20 ) ) ); 799 return GetNextTokenString( rErrorList, rTagStartPos ); 800 } 801 // check for paired quoted " --> \"sometext\" 802 803 nPos = nEndPos; 804 rTagStartPos = nStyle3StartPos; 805 return aSource.Copy( nStyle3StartPos, nEndPos-nStyle3StartPos +2 ); 806 } 807 } 808 809 String SimpleParser::GetLexem( TokenInfo const &aToken ) 810 { 811 if ( aToken.aTokenString.Len() ) 812 return aToken.aTokenString; 813 else 814 { 815 sal_uInt16 i = 0; 816 while ( aKnownTags[i].nTag != TAG_UNKNOWN_TAG && 817 aKnownTags[i].nTag != aToken.nId ) 818 i++; 819 820 return aKnownTags[i].GetName(); 821 } 822 } 823 824 TokenParser::TokenParser() 825 : pErrorList( NULL ) 826 {} 827 828 void TokenParser::Parse( const String &aCode, ParserMessageList* pList ) 829 { 830 pErrorList = pList; 831 832 //Scanner initialisieren 833 aParser.Parse( aCode ); 834 835 //erstes Symbol holen 836 aTag = aParser.GetNextToken( *pErrorList ); 837 838 nPfCaseOptions = 0; 839 nAppCaseOptions = 0; 840 bPfCaseActive = sal_False; 841 bAppCaseActive = sal_False; 842 843 nActiveRefTypes = 0; 844 845 //Ausfuehren der Start-Produktion 846 Paragraph(); 847 848 //Es wurde nicht die ganze Kette abgearbeitet, bisher ist aber 849 //kein Fehler aufgetreten 850 //=> es wurde ein einleitendes Tag vergessen 851 if ( aTag.nId != TAG_NOMORETAGS ) 852 { 853 switch ( aTag.nId ) 854 { 855 case TAG_END: 856 { 857 ParseError( 3, "Extra Tag <#END>. Switch or <#HREF> expected.", aTag ); 858 } 859 break; 860 case TAG_BOLDOFF: 861 { 862 ParseError( 4, "<#BOLD> expected before <#/BOLD>.", aTag ); 863 } 864 break; 865 case TAG_ITALICOFF: 866 { 867 ParseError( 5, "<#ITALIC> expected before <#/ITALIC>.", aTag ); 868 } 869 break; 870 case TAG_UNDERLINEOFF: 871 { 872 ParseError( 17, "<#UNDER> expected before <#/UNDER>.", aTag ); 873 } 874 break; 875 /* case TAG_MISSPARENTHESIS: 876 { 877 ParseError( 14, "missing closing parenthesis '>'", aTag ); 878 } 879 break;*/ 880 case TAG_AEND: 881 { 882 ParseError( 5, "Extra Tag <#AEND>. <#AVIS> or <#AHID> expected.", aTag ); 883 } 884 break; 885 case TAG_ELSE: 886 { 887 ParseError( 16, "Application-tag or platform-tag expected before <#ELSE>.", aTag ); 888 } 889 break; 890 case TAG_UNKNOWN_TAG: 891 { 892 ParseError( 6, "unknown Tag", aTag ); 893 } 894 break; 895 default: 896 { 897 ParseError( 6, "unexpected Tag", aTag ); 898 } 899 } 900 } 901 pErrorList = NULL; 902 } 903 904 void TokenParser::Paragraph() 905 { 906 switch ( aTag.nId ) 907 { 908 case TAG_GRAPHIC: 909 case TAG_NEXTVERSION: 910 { 911 TagRef(); 912 Paragraph(); 913 } 914 break; 915 case TAG_AVIS: 916 case TAG_AHID: 917 { 918 TagRef(); 919 Paragraph(); 920 } 921 break; 922 case TAG_HELPID: 923 { 924 SimpleTag(); 925 Paragraph(); 926 } 927 break; 928 case TAG_OFFICEFULLNAME: 929 case TAG_OFFICENAME: 930 case TAG_OFFICEPATH: 931 case TAG_OFFICEVERSION: 932 case TAG_PORTALNAME: 933 case TAG_PORTALFULLNAME: 934 case TAG_PORTALPATH: 935 case TAG_PORTALVERSION: 936 case TAG_PORTALSHORTNAME: 937 { 938 SimpleTag(); 939 Paragraph(); 940 } 941 break; 942 case TAG_REFINSERT: 943 { 944 SimpleTag(); 945 Paragraph(); 946 } 947 break; 948 case TAG_BOLDON: 949 case TAG_ITALICON: 950 case TAG_UNDERLINEON: 951 case TAG_COMMONSTART: 952 { 953 TagPair(); 954 Paragraph(); 955 } 956 break; 957 case TAG_HREF: 958 case TAG_NAME: 959 case TAG_KEY: 960 case TAG_INDEX: 961 case TAG_TITEL: 962 case TAG_REFSTART: 963 { 964 TagRef(); 965 Paragraph(); 966 } 967 break; 968 case TAG_OS2: 969 case TAG_WIN: 970 case TAG_UNIX: 971 case TAG_MAC: //... 972 { 973 if ( ! bPfCaseActive ) 974 { 975 //PfCases duerfen nicht verschachtelt sein: 976 bPfCaseActive = sal_True; 977 PfCase(); 978 979 //So jetzt kann wieder ein PfCase kommen: 980 bPfCaseActive = sal_False; 981 Paragraph(); 982 } 983 } 984 break; 985 case TAG_WRITER: 986 case TAG_CALC: 987 case TAG_DRAW: 988 case TAG_IMPRESS: 989 case TAG_SCHEDULE: 990 case TAG_IMAGE: 991 case TAG_MATH: 992 case TAG_CHART: 993 case TAG_OFFICE: 994 { 995 if ( !bAppCaseActive ) 996 { 997 //AppCases duerfen nicht verschachtelt sein: 998 bAppCaseActive = sal_True; 999 AppCase(); 1000 1001 //jetzt koennen wieder AppCases kommen: 1002 bAppCaseActive = sal_False; 1003 Paragraph(); 1004 } 1005 } 1006 break; 1007 1008 //Case TAG_BOLDOFF, TAG_ITALICOFF, TAG_BUNDERLINE, TAG_END 1009 //nichts tun wg. epsilon-Prod. 1010 } 1011 } 1012 1013 void TokenParser::PfCase() 1014 { 1015 1016 //Produktion: 1017 //PfCase -> PfCaseBegin Paragraph (PfCase | PfCaseEnd) 1018 1019 PfCaseBegin(); 1020 1021 //Jetzt ist eine PfCase-Produktion aktiv: 1022 Paragraph(); 1023 switch ( aTag.nId ) 1024 { 1025 case TAG_ELSE: 1026 case TAG_END: 1027 { 1028 CaseEnd(); 1029 } 1030 break; 1031 case TAG_OS2: 1032 case TAG_WIN: 1033 case TAG_UNIX: 1034 case TAG_MAC: //First (PfBegin) 1035 { 1036 PfCase(); 1037 } 1038 break; 1039 default: 1040 ParseError( 8, "<#ELSE> or <#END> or platform-tag expected.", aTag ); 1041 } 1042 //Die gemerkten Tags wieder loeschen fuer naechstes PfCase: 1043 nPfCaseOptions = 0; 1044 } 1045 1046 void TokenParser::PfCaseBegin() 1047 { 1048 switch ( aTag.nId ) 1049 { 1050 case TAG_OS2: 1051 case TAG_WIN: 1052 case TAG_UNIX: 1053 case TAG_MAC: 1054 { 1055 //Token darf noch nicht vorgekommen sein im 1056 //aktuellen Plattform-Case: 1057 if ( !HAS_FLAG( nPfCaseOptions, TAG_NOGROUP( aTag.nId ) ) ) 1058 { 1059 SET_FLAG( nPfCaseOptions, TAG_NOGROUP( aTag.nId ) ); 1060 match( aTag, aTag ); 1061 } 1062 else { 1063 ParseError( 9, "Tag defined twice in the same platform-case", aTag ); 1064 } 1065 } 1066 } 1067 } 1068 1069 void TokenParser::AppCase() 1070 { 1071 1072 //Produktion: 1073 //AppCase -> AppCaseBegin Paragraph (AppCase | AppCaseEnd) 1074 1075 1076 AppCaseBegin(); 1077 1078 Paragraph(); 1079 1080 switch ( aTag.nId ) 1081 { 1082 case TAG_ELSE: 1083 case TAG_END: 1084 { 1085 CaseEnd(); 1086 } 1087 break; 1088 case TAG_WRITER: 1089 case TAG_DRAW: 1090 case TAG_CALC: 1091 case TAG_IMAGE: 1092 case TAG_MATH: 1093 case TAG_CHART: 1094 case TAG_OFFICE: 1095 case TAG_IMPRESS: 1096 case TAG_SCHEDULE: //First (AppBegin) 1097 { 1098 AppCase(); 1099 } 1100 break; 1101 default: 1102 ParseError( 1, "<#ELSE> or <#END> or application-case-tag expected.", aTag ); 1103 } 1104 1105 //Die gemerkten Tags wieder loeschen fuer naechstes AppCase: 1106 nAppCaseOptions = 0; 1107 } 1108 1109 void TokenParser::AppCaseBegin() 1110 { 1111 switch ( aTag.nId ) 1112 { 1113 case TAG_WRITER: 1114 case TAG_DRAW: 1115 case TAG_CALC: 1116 case TAG_IMAGE: 1117 case TAG_MATH: 1118 case TAG_CHART: 1119 case TAG_OFFICE: 1120 case TAG_IMPRESS: 1121 case TAG_SCHEDULE: 1122 { 1123 //Token darf noch nicht vorgekommen sein im 1124 //aktuellen Plattform-Case: 1125 if ( !HAS_FLAG( nAppCaseOptions, TAG_NOGROUP( aTag.nId ) ) ) 1126 { 1127 SET_FLAG( nAppCaseOptions, TAG_NOGROUP( aTag.nId ) ); 1128 match( aTag, aTag ); 1129 } 1130 else { 1131 ParseError( 13, "Tag defined twice in the same application-case.", aTag ); 1132 } 1133 } 1134 } 1135 } 1136 1137 void TokenParser::CaseEnd() 1138 { 1139 //Produktion: 1140 //CaseEnd -> <#ELSE> Paragraph <#END> | <#END> 1141 1142 switch ( aTag.nId ) 1143 { 1144 case TAG_ELSE: 1145 { 1146 match( aTag, TAG_ELSE ); 1147 Paragraph(); 1148 match( aTag, TAG_END ); 1149 } 1150 break; 1151 case TAG_END: 1152 { 1153 match( aTag, TAG_END ); 1154 } 1155 break; 1156 default: 1157 ParseError( 2, "<#ELSE> or <#END> expected.", aTag ); 1158 } 1159 } 1160 1161 void TokenParser::SimpleTag() 1162 { 1163 1164 switch ( aTag.nId ) 1165 { 1166 case TAG_HELPID: 1167 { 1168 match( aTag, TAG_HELPID ); 1169 } 1170 break; 1171 case TAG_OFFICEFULLNAME: 1172 case TAG_OFFICENAME: 1173 case TAG_OFFICEPATH: 1174 case TAG_OFFICEVERSION: 1175 case TAG_PORTALNAME: 1176 case TAG_PORTALFULLNAME: 1177 case TAG_PORTALPATH: 1178 case TAG_PORTALVERSION: 1179 case TAG_PORTALSHORTNAME: 1180 1181 case TAG_REFINSERT: 1182 { 1183 match( aTag, aTag ); 1184 } 1185 break; 1186 default: 1187 ParseError( 15, "[<#SimpleTag>] expected.", aTag ); 1188 } 1189 } 1190 1191 void TokenParser::TagPair() 1192 { 1193 switch ( aTag.nId ) 1194 { 1195 case TAG_BOLDON: 1196 { 1197 match( aTag, TAG_BOLDON ); 1198 Paragraph(); 1199 match( aTag, TAG_BOLDOFF ); 1200 } 1201 break; 1202 case TAG_ITALICON: 1203 { 1204 match( aTag, TAG_ITALICON ); 1205 Paragraph(); 1206 match( aTag, TAG_ITALICOFF ); 1207 } 1208 break; 1209 case TAG_UNDERLINEON: 1210 { 1211 match( aTag, TAG_UNDERLINEON ); 1212 Paragraph(); 1213 match( aTag, TAG_UNDERLINEOFF ); 1214 } 1215 break; 1216 case TAG_COMMONSTART: 1217 { 1218 //remember tag so we can give the original tag in case of an error 1219 TokenInfo aEndTag( aTag ); 1220 aEndTag.nId = TAG_COMMONEND; 1221 match( aTag, TAG_COMMONSTART ); 1222 Paragraph(); 1223 match( aTag, aEndTag ); 1224 } 1225 break; 1226 default: 1227 ParseError( 10, "<#BOLD>, <#ITALIC>, <#UNDER> expected.", aTag ); 1228 } 1229 } 1230 1231 1232 void TokenParser::TagRef() 1233 { 1234 switch ( aTag.nId ) 1235 { 1236 case TAG_GRAPHIC: 1237 case TAG_NEXTVERSION: 1238 { 1239 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) ) 1240 { 1241 TokenId aThisToken = aTag.nId; 1242 SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) ); 1243 match( aTag, aTag ); 1244 Paragraph(); 1245 if ( aThisToken == TAG_GRAPHIC ) 1246 match( aTag, TAG_ENDGRAPHIC ); 1247 else 1248 match( aTag, TAG_VERSIONEND ); 1249 // don't reset since alowed only once per paragraph 1250 // RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) ); 1251 } 1252 else 1253 { 1254 ParseError( 11, "Tags <#GRAPHIC>,<#NEXTVERSION> allowed only once per paragraph at", aTag ); 1255 } 1256 } 1257 break; 1258 case TAG_AVIS: 1259 case TAG_AHID: 1260 { 1261 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) ) 1262 { 1263 TokenId aThisToken = aTag.nId; 1264 SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) ); 1265 match( aTag, aTag ); 1266 Paragraph(); 1267 match( aTag, TAG_AEND ); 1268 RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) ); 1269 } 1270 else 1271 { 1272 ParseError( 11, "Nested <#AHID>,<#AVIS> not allowed.", aTag ); 1273 } 1274 } 1275 break; 1276 case TAG_HREF: 1277 case TAG_NAME: 1278 { 1279 1280 } 1281 // NOBREAK 1282 case TAG_KEY: 1283 case TAG_INDEX: 1284 case TAG_TITEL: 1285 case TAG_REFSTART: 1286 { 1287 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) ) 1288 { 1289 TokenId aThisToken = aTag.nId; 1290 match( aTag, aTag ); 1291 if ( aThisToken != TAG_NAME ) 1292 { // TAG_NAME has no TAG_END 1293 SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) ); 1294 Paragraph(); 1295 match( aTag, TAG_END ); 1296 RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) ); 1297 } 1298 } 1299 else 1300 { 1301 ParseError( 11, "Nested <#HREF>,<#NAME> or <#KEY> not allowed.", aTag ); 1302 } 1303 } 1304 break; 1305 default: 1306 ParseError( 12, "<#HREF>,<#NAME> or <#KEY> expected.", aTag ); 1307 } 1308 } 1309 1310 sal_Bool TokenParser::match( const TokenInfo &aCurrentToken, const TokenId &aExpectedToken ) 1311 { 1312 return match( aCurrentToken, TokenInfo( aExpectedToken, TOK_INVALIDPOS ) ); 1313 } 1314 1315 sal_Bool TokenParser::match( const TokenInfo &aCurrentToken, const TokenInfo &rExpectedToken ) 1316 { 1317 TokenInfo aExpectedToken( rExpectedToken ); 1318 if ( aCurrentToken.nId == aExpectedToken.nId ) 1319 { 1320 if ( ( aCurrentToken.nId == TAG_COMMONEND 1321 && aCurrentToken.GetTagName().Equals( aExpectedToken.GetTagName() ) ) 1322 || aCurrentToken.nId != TAG_COMMONEND ) 1323 { 1324 aTag = aParser.GetNextToken( *pErrorList ); 1325 return sal_True; 1326 } 1327 } 1328 1329 if ( aExpectedToken.nId == TAG_COMMONEND ) 1330 { 1331 aExpectedToken.aTokenString.Insert( String::CreateFromAscii( "Close tag for " ), 0 ); 1332 } 1333 1334 ByteString sTmp( "Expected Symbol" ); 1335 if ( aCurrentToken.nId == TAG_NOMORETAGS ) 1336 { 1337 ParseError( 7, sTmp, aExpectedToken ); 1338 } 1339 else 1340 { 1341 sTmp += ": "; 1342 sTmp += ByteString( aParser.GetLexem( aExpectedToken ), RTL_TEXTENCODING_UTF8 ); 1343 sTmp += " near "; 1344 ParseError( 7, sTmp, aCurrentToken ); 1345 } 1346 return sal_False; 1347 } 1348 1349 void TokenParser::ParseError( sal_uInt16 nErrNr, ByteString aErrMsg, const TokenInfo &rTag ) 1350 { 1351 pErrorList->AddError( nErrNr, aErrMsg, rTag); 1352 1353 // Das Fehlerhafte Tag ueberspringen 1354 aTag = aParser.GetNextToken( *pErrorList ); 1355 } 1356 1357 1358 ParserMessage::ParserMessage( sal_uInt16 PnErrorNr, ByteString PaErrorText, const TokenInfo &rTag ) 1359 : nErrorNr( PnErrorNr ) 1360 , aErrorText( PaErrorText ) 1361 , nTagBegin( 0 ) 1362 , nTagLength( 0 ) 1363 { 1364 String aLexem( SimpleParser::GetLexem( rTag ) ); 1365 aErrorText.Append(": "); 1366 aErrorText += ByteString( aLexem, RTL_TEXTENCODING_UTF8 ); 1367 if ( rTag.nId == TAG_NOMORETAGS ) 1368 aErrorText.Append(" at end of line "); 1369 else if ( rTag.nPos != TOK_INVALIDPOS ) 1370 { 1371 aErrorText.Append(" at Position "); 1372 aErrorText.Append( ByteString::CreateFromInt32( rTag.nPos ) ); 1373 } 1374 nTagBegin = rTag.nPos; 1375 nTagLength = aLexem.Len(); 1376 } 1377 1378 ParserError::ParserError( sal_uInt16 ErrorNr, ByteString ErrorText, const TokenInfo &rTag ) 1379 : ParserMessage( ErrorNr, ErrorText, rTag ) 1380 {} 1381 1382 ParserWarning::ParserWarning( sal_uInt16 ErrorNr, ByteString ErrorText, const TokenInfo &rTag ) 1383 : ParserMessage( ErrorNr, ErrorText, rTag ) 1384 {} 1385 1386 sal_Bool LingTest::IsTagMandatory( TokenInfo const &aToken, TokenId &aMetaTokens ) 1387 { 1388 TokenId aTokenId = aToken.nId; 1389 TokenId aTokenGroup = TAG_GROUP( aTokenId ); 1390 if ( TAG_GROUP_PROGSWITCH == aTokenGroup 1391 || TAG_REFINSERT == aTokenId 1392 || TAG_REFSTART == aTokenId 1393 || TAG_NAME == aTokenId 1394 || TAG_HREF == aTokenId 1395 || TAG_AVIS == aTokenId 1396 || TAG_AHID == aTokenId 1397 || TAG_GRAPHIC == aTokenId 1398 || TAG_NEXTVERSION == aTokenId 1399 || ( TAG_GROUP_META == aTokenGroup && (aMetaTokens & aTokenId) == aTokenId ) ) 1400 { 1401 if ( TAG_GROUP_META == aTokenGroup ) 1402 aMetaTokens |= aTokenId; 1403 return sal_True; 1404 } 1405 else if ( TAG_COMMONSTART == aTokenId 1406 || TAG_COMMONEND == aTokenId ) 1407 { 1408 String aTagName = aToken.GetTagName(); 1409 return !(aTagName.EqualsIgnoreCaseAscii( "comment" ) 1410 || aTagName.EqualsIgnoreCaseAscii( "bookmark_value" ) 1411 || aTagName.EqualsIgnoreCaseAscii( "emph" ) 1412 || aTagName.EqualsIgnoreCaseAscii( "item" ) 1413 || aTagName.EqualsIgnoreCaseAscii( "br" ) ); 1414 } 1415 return sal_False; 1416 } 1417 1418 void LingTest::CheckTags( TokenList &aReference, TokenList &aTestee, sal_Bool bFixTags ) 1419 { 1420 sal_uLong i=0,j=0; 1421 // Clean old Warnings 1422 while ( aCompareWarningList.Count() ) 1423 { 1424 delete aCompareWarningList.GetCurObject(); 1425 aCompareWarningList.Remove(); 1426 } 1427 1428 /* in xml tags, do not require the following tags 1429 comment 1430 bookmark_value 1431 emph 1432 item 1433 br 1434 */ 1435 1436 // filter uninteresting Tags 1437 TokenId aMetaTokens = 0; 1438 for ( i=0 ; i < aReference.Count() ; i++ ) 1439 { 1440 if ( !IsTagMandatory( aReference.GetObject( i ), aMetaTokens ) ) 1441 aReference.GetObject( i ).SetDone(); 1442 } 1443 1444 aMetaTokens = 0; 1445 for ( i=0 ; i < aTestee.Count() ; i++ ) 1446 { 1447 if ( !IsTagMandatory( aTestee.GetObject( i ), aMetaTokens ) ) 1448 aTestee.GetObject( i ).SetDone(); 1449 } 1450 1451 // remove all matching tags 1452 for ( i=0 ; i < aReference.Count() ; i++ ) 1453 { 1454 if ( aReference.GetObject( i ).IsDone() ) 1455 continue; 1456 1457 sal_Bool bTagFound = sal_False; 1458 for ( j=0 ; j < aTestee.Count() && !bTagFound ; j++ ) 1459 { 1460 if ( aTestee.GetObject( j ).IsDone() ) 1461 continue; 1462 1463 if ( aReference.GetObject( i ).MatchesTranslation( aTestee.GetObject( j ), sal_False, aCompareWarningList ) ) 1464 { 1465 aReference.GetObject( i ).SetDone(); 1466 aTestee.GetObject( j ).SetDone(); 1467 bTagFound = sal_True; 1468 } 1469 } 1470 } 1471 1472 sal_Bool bCanFix = sal_True; 1473 1474 if ( bFixTags ) 1475 { 1476 // we fix only if its a really simple case 1477 sal_uInt16 nTagCount = 0; 1478 for ( i=0 ; i < aReference.Count() ; i++ ) 1479 if ( !aReference.GetObject( i ).IsDone() ) 1480 nTagCount++; 1481 if ( nTagCount > 1 ) 1482 bCanFix = sal_False; 1483 1484 nTagCount = 0; 1485 for ( i=0 ; i < aTestee.Count() ; i++ ) 1486 if ( !aTestee.GetObject( i ).IsDone() ) 1487 nTagCount++; 1488 if ( nTagCount > 1 ) 1489 bCanFix = sal_False; 1490 } 1491 1492 // generate errors for tags that have differing attributes 1493 for ( i=0 ; i < aReference.Count() ; i++ ) 1494 { 1495 if ( aReference.GetObject( i ).IsDone() ) 1496 continue; 1497 1498 sal_Bool bTagFound = sal_False; 1499 for ( j=0 ; j < aTestee.Count() && !bTagFound ; j++ ) 1500 { 1501 if ( aTestee.GetObject( j ).IsDone() ) 1502 continue; 1503 1504 if ( aReference.GetObject( i ).MatchesTranslation( aTestee.GetObject( j ), sal_True, aCompareWarningList, bCanFix && bFixTags ) ) 1505 { 1506 aReference.GetObject( i ).SetDone(); 1507 aTestee.GetObject( j ).SetDone(); 1508 bTagFound = sal_True; 1509 } 1510 } 1511 } 1512 1513 // list remaining tags as errors 1514 for ( i=0 ; i < aReference.Count() ; i++ ) 1515 { 1516 if ( aReference.GetObject( i ).IsDone() ) 1517 continue; 1518 1519 aCompareWarningList.AddError( 20, "Missing Tag in Translation", aReference.GetObject( i ) ); 1520 } 1521 for ( i=0 ; i < aTestee.Count() ; i++ ) 1522 { 1523 if ( aTestee.GetObject( i ).IsDone() ) 1524 continue; 1525 1526 aCompareWarningList.AddError( 21, "Extra Tag in Translation", aTestee.GetObject( i ) ); 1527 } 1528 1529 for ( i=0 ; i < aReference.Count() ; i++ ) 1530 aReference.GetObject( i ).SetDone( sal_False ); 1531 1532 for ( i=0 ; i < aTestee.Count() ; i++ ) 1533 aTestee.GetObject( i ).SetDone( sal_False ); 1534 } 1535 1536 void LingTest::CheckReference( GSILine *aReference ) 1537 { 1538 aReferenceParser.Parse( aReference->GetUText(), aReference->GetMessageList() ); 1539 } 1540 1541 void LingTest::CheckTestee( GSILine *aTestee, sal_Bool bHasSourceLine, sal_Bool bFixTags ) 1542 { 1543 aFixedTestee = aTestee->GetUText(); 1544 aTesteeParser.Parse( aFixedTestee, aTestee->GetMessageList() ); 1545 1546 if ( bHasSourceLine ) 1547 CheckTags( aReferenceParser.GetTokenList(), aTesteeParser.GetTokenList(), bFixTags ); 1548 1549 if ( bFixTags ) 1550 { 1551 TokenList& aTesteeTokens = aTesteeParser.GetTokenList(); 1552 sal_Bool bFixesDone = sal_False; 1553 // count backwards to allow replacing from right to left 1554 int i; 1555 for ( i=aTesteeTokens.Count()-1 ; i>=0 ; i-- ) 1556 { 1557 if ( aTesteeTokens.GetObject( i ).HasBeenFixed() ) 1558 { 1559 bFixesDone = sal_True; 1560 aFixedTestee.Replace( aTesteeTokens.GetObject( i ).nPos, aTesteeTokens.GetObject( i ).aTokenString.Len(), aTesteeTokens.GetObject( i ).MakeTag() ); 1561 } 1562 } 1563 if ( bFixesDone ) 1564 { 1565 aTestee->SetUText( aFixedTestee ); 1566 aTestee->SetFixed(); 1567 } 1568 } 1569 } 1570 1571