1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_l10ntools.hxx" 30 #include <tools/string.hxx> 31 #include "tagtest.hxx" 32 33 #if OSL_DEBUG_LEVEL > 1 34 #include <stdio.h> 35 #endif 36 37 #include "gsicheck.hxx" 38 39 #define HAS_FLAG( nFlags, nFlag ) ( ( nFlags & nFlag ) != 0 ) 40 #define SET_FLAG( nFlags, nFlag ) ( nFlags |= nFlag ) 41 #define RESET_FLAG( nFlags, nFlag ) ( nFlags &= ~nFlag ) // ~ = Bitweises NOT 42 43 44 45 TokenInfo::TokenInfo( TokenId pnId, sal_uInt16 nP, String paStr, ParserMessageList &rErrorList ) 46 : bClosed(sal_False) 47 , bCloseTag(sal_False) 48 , bIsBroken(sal_False) 49 , bHasBeenFixed(sal_False) 50 , bDone(sal_False) 51 , aTokenString( paStr ) 52 , nId( pnId ) 53 , nPos(nP) 54 { 55 if ( nId == TAG_COMMONSTART || nId == TAG_COMMONEND ) 56 SplitTag( rErrorList ); 57 } 58 59 enum tagcheck { TC_START, TC_HAS_TAG_NAME, TC_HAS_PROP_NAME_EQ, TC_HAS_PROP_NAME_EQ_SP, TC_HAS_PROP_NAME_SP, TC_INSIDE_STRING, TC_PROP_FINISHED, TC_CLOSED, TC_CLOSED_SPACE, TC_CLOSETAG, TC_CLOSETAG_HAS_TAG_NAME, TC_FINISHED, TC_ERROR }; 60 61 /* 62 \< link href = \"text\" name = \"C\" \> 63 START ' ' -> HAS_TAG_NAME 64 START '/' -> CLOSED 65 START '/' -> CLOSETAG - no Portion (starting with /) 66 START '>' -> FINISHED 67 HAS_TAG_NAME '=' -> HAS_PROP_NAME_EQ 68 HAS_TAG_NAME ' ' -> HAS_PROP_NAME_SP 69 HAS_TAG_NAME '/' -> CLOSED 70 HAS_TAG_NAME '>' -> FINISHED 71 HAS_PROP_NAME_SP '=' -> HAS_PROP_NAME_EQ 72 HAS_PROP_NAME_EQ ' ' -> HAS_PROP_NAME_EQ_SP 73 HAS_PROP_NAME_EQ '"' -> INSIDE_STRING 74 HAS_PROP_NAME_EQ_SP '"' -> INSIDE_STRING 75 INSIDE_STRING ' ' -> INSIDE_STRING 76 INSIDE_STRING '=' -> INSIDE_STRING 77 INSIDE_STRING '>' -> INSIDE_STRING 78 INSIDE_STRING '"' -> PROP_FINISHED 79 PROP_FINISHED ' ' -> HAS_TAG_NAME 80 PROP_FINISHED '/' -> CLOSED 81 PROP_FINISHED '>' -> FINISHED 82 CLOSED ' ' -> CLOSED_SPACE 83 CLOSED '>' -> FINISHED 84 CLOSED_SPACE '>' -> FINISHED 85 86 CLOSETAG ' ' -> CLOSETAG_HAS_TAG_NAME 87 CLOSETAG '>' -> FINISHED 88 CLOSETAG_HAS_TAG_NAME '>' -> FINISHED 89 90 */ 91 void TokenInfo::SplitTag( ParserMessageList &rErrorList ) 92 { 93 sal_uInt16 nLastPos = 2; // skip initial \< 94 sal_uInt16 nCheckPos = nLastPos; 95 String aDelims( String::CreateFromAscii( " \\=>/" ) ); 96 String aPortion; 97 String aValue; // store the value of a property 98 ByteString aName; // store the name of a property/tag 99 sal_Bool bCheckName = sal_False; 100 sal_Bool bCheckEmpty = sal_False; 101 sal_Unicode cDelim; 102 tagcheck aState = TC_START; 103 104 // skip blanks 105 while ( nLastPos < aTokenString.Len() && aTokenString.GetChar( nLastPos ) == ' ') 106 nLastPos++; 107 108 nCheckPos = aTokenString.SearchChar( aDelims.GetBuffer(), nLastPos ); 109 while ( nCheckPos != STRING_NOTFOUND && !( aState == TC_FINISHED || aState == TC_ERROR ) ) 110 { 111 aPortion = aTokenString.Copy( nLastPos, nCheckPos-nLastPos ); 112 113 if ( aTokenString.GetChar( nCheckPos ) == '\\' ) 114 nCheckPos++; 115 116 cDelim = aTokenString.GetChar( nCheckPos ); 117 nCheckPos++; 118 119 switch ( aState ) 120 { 121 // START ' ' -> HAS_TAG_NAME 122 // START '/' -> CLOSED 123 // START '>' -> FINISHED 124 case TC_START: 125 aTagName = aPortion; 126 switch ( cDelim ) 127 { 128 case ' ': aState = TC_HAS_TAG_NAME; 129 bCheckName = sal_True; 130 break; 131 case '/': 132 { 133 if ( aPortion.Len() == 0 ) 134 { 135 aState = TC_CLOSETAG; 136 } 137 else 138 { 139 aState = TC_CLOSED; 140 bCheckName = sal_True; 141 } 142 } 143 break; 144 case '>': aState = TC_FINISHED; 145 bCheckName = sal_True; 146 break; 147 default: aState = TC_ERROR; 148 } 149 break; 150 151 // HAS_TAG_NAME '=' -> HAS_PROP_NAME_EQ 152 // HAS_TAG_NAME ' ' -> HAS_PROP_NAME_SP 153 // HAS_TAG_NAME '/' -> CLOSED 154 // HAS_TAG_NAME '>' -> FINISHED 155 case TC_HAS_TAG_NAME: 156 switch ( cDelim ) 157 { 158 case '=': aState = TC_HAS_PROP_NAME_EQ; 159 bCheckName = sal_True; 160 break; 161 case ' ': aState = TC_HAS_PROP_NAME_SP; 162 bCheckName = sal_True; 163 break; 164 case '/': aState = TC_CLOSED; 165 bCheckEmpty = sal_True; 166 break; 167 case '>': aState = TC_FINISHED; 168 bCheckEmpty = sal_True; 169 break; 170 default: aState = TC_ERROR; 171 } 172 break; 173 174 // HAS_PROP_NAME_SP '=' -> HAS_PROP_NAME_EQ 175 case TC_HAS_PROP_NAME_SP: 176 switch ( cDelim ) 177 { 178 case '=': aState = TC_HAS_PROP_NAME_EQ; 179 bCheckEmpty = sal_True; 180 break; 181 default: aState = TC_ERROR; 182 } 183 break; 184 185 // HAS_PROP_NAME_EQ ' ' -> HAS_PROP_NAME_EQ_SP 186 // HAS_PROP_NAME_EQ '"' -> INSIDE_STRING 187 case TC_HAS_PROP_NAME_EQ: 188 switch ( cDelim ) 189 { 190 case ' ': aState = TC_HAS_PROP_NAME_EQ_SP; 191 bCheckEmpty = sal_True; 192 break; 193 case '\"': aState = TC_INSIDE_STRING; 194 bCheckEmpty = sal_True; 195 aValue.Erase(); 196 break; 197 default: aState = TC_ERROR; 198 } 199 break; 200 201 // HAS_PROP_NAME_EQ_SP '"' -> INSIDE_STRING 202 case TC_HAS_PROP_NAME_EQ_SP: 203 switch ( cDelim ) 204 { 205 case '\"': aState = TC_INSIDE_STRING; 206 bCheckEmpty = sal_True; 207 aValue.Erase(); 208 break; 209 default: aState = TC_ERROR; 210 } 211 break; 212 213 // INSIDE_STRING * -> INSIDE_STRING 214 // INSIDE_STRING '"' -> PROP_FINISHED 215 case TC_INSIDE_STRING: 216 switch ( cDelim ) 217 { 218 case '\"': 219 { 220 aState = TC_PROP_FINISHED; 221 aValue += aPortion; 222 if ( aProperties.find( aName ) == aProperties.end() ) 223 { 224 if ( !IsPropertyValueValid( aName, aValue ) ) 225 { 226 rErrorList.AddError( 25, ByteString("Property '").Append(aName).Append("' has invalid value '").Append(ByteString( aValue, RTL_TEXTENCODING_UTF8 )).Append("' "), *this ); 227 bIsBroken = sal_True; 228 } 229 aProperties[ aName ] = aValue; 230 } 231 else 232 { 233 rErrorList.AddError( 25, ByteString("Property '").Append(aName).Append("' defined twice "), *this ); 234 bIsBroken = sal_True; 235 } 236 } 237 break; 238 default: 239 { 240 aState = TC_INSIDE_STRING; 241 aValue += aPortion; 242 aValue += cDelim; 243 } 244 } 245 break; 246 247 // PROP_FINISHED ' ' -> HAS_TAG_NAME 248 // PROP_FINISHED '/' -> CLOSED 249 // PROP_FINISHED '>' -> FINISHED 250 case TC_PROP_FINISHED: 251 switch ( cDelim ) 252 { 253 case ' ': aState = TC_HAS_TAG_NAME; 254 bCheckEmpty = sal_True; 255 break; 256 case '/': aState = TC_CLOSED; 257 bCheckEmpty = sal_True; 258 break; 259 case '>': aState = TC_FINISHED; 260 bCheckEmpty = sal_True; 261 break; 262 default: aState = TC_ERROR; 263 } 264 break; 265 266 // CLOSED ' ' -> CLOSED_SPACE 267 // CLOSED '>' -> FINISHED 268 case TC_CLOSED: 269 switch ( cDelim ) 270 { 271 case ' ': aState = TC_CLOSED_SPACE; 272 bCheckEmpty = sal_True; 273 bClosed = sal_True; 274 break; 275 case '>': aState = TC_FINISHED; 276 bCheckEmpty = sal_True; 277 break; 278 default: aState = TC_ERROR; 279 } 280 break; 281 282 // CLOSED_SPACE '>' -> FINISHED 283 case TC_CLOSED_SPACE: 284 switch ( cDelim ) 285 { 286 case '>': aState = TC_FINISHED; 287 bCheckEmpty = sal_True; 288 break; 289 default: aState = TC_ERROR; 290 } 291 break; 292 293 // CLOSETAG ' ' -> CLOSETAG_HAS_TAG_NAME 294 // CLOSETAG '>' -> FINISHED 295 case TC_CLOSETAG: 296 bCloseTag = sal_True; 297 switch ( cDelim ) 298 { 299 case ' ': aState = TC_CLOSETAG_HAS_TAG_NAME; 300 aTagName = aPortion; 301 bCheckName = sal_True; 302 break; 303 case '>': aState = TC_FINISHED; 304 aTagName = aPortion; 305 bCheckName = sal_True; 306 break; 307 default: aState = TC_ERROR; 308 } 309 break; 310 311 // CLOSETAG_HAS_TAG_NAME '>' -> FINISHED 312 case TC_CLOSETAG_HAS_TAG_NAME: 313 switch ( cDelim ) 314 { 315 case '>': aState = TC_FINISHED; 316 bCheckEmpty = sal_True; 317 break; 318 default: aState = TC_ERROR; 319 } 320 break; 321 322 323 default: rErrorList.AddError( 99, "Internal error Parsing Tag ", *this ); 324 bIsBroken = sal_True; 325 326 } 327 328 if ( bCheckName ) 329 { 330 if ( aPortion.Len() == 0 ) 331 { 332 rErrorList.AddError( 25, "Tag/Property name missing ", *this ); 333 bIsBroken = sal_True; 334 } 335 else 336 { 337 aName = ByteString( aPortion, RTL_TEXTENCODING_UTF8 ); 338 // "a-zA-Z_-.0-9" 339 xub_StrLen nCount; 340 sal_Bool bBroken = sal_False; 341 const sal_Char* aBuf = aName.GetBuffer(); 342 for ( nCount = 0 ; !bBroken && nCount < aName.Len() ; nCount++ ) 343 { 344 bBroken = ! ( ( aBuf[nCount] >= 'a' && aBuf[nCount] <= 'z' ) 345 ||( aBuf[nCount] >= 'A' && aBuf[nCount] <= 'Z' ) 346 ||( aBuf[nCount] >= '0' && aBuf[nCount] <= '9' ) 347 ||( aBuf[nCount] == '_' ) 348 ||( aBuf[nCount] == '-' ) 349 ||( aBuf[nCount] == '.' ) 350 ); 351 } 352 353 if ( bBroken ) 354 { 355 rErrorList.AddError( 25, "Found illegal character in Tag/Property name ", *this ); 356 bIsBroken = sal_True; 357 } 358 } 359 360 bCheckName = sal_False; 361 } 362 363 if ( bCheckEmpty ) 364 { 365 if ( aPortion.Len() ) 366 { 367 rErrorList.AddError( 25, ByteString("Found displaced characters '").Append(ByteString( aPortion, RTL_TEXTENCODING_UTF8 )).Append("' in Tag "), *this ); 368 bIsBroken = sal_True; 369 } 370 bCheckEmpty = sal_False; 371 } 372 373 374 nLastPos = nCheckPos; 375 376 // skip further blanks 377 if ( cDelim == ' ' && aState != TC_INSIDE_STRING ) 378 while ( nLastPos < aTokenString.Len() && aTokenString.GetChar( nLastPos ) == ' ') 379 nLastPos++; 380 381 nCheckPos = aTokenString.SearchChar( aDelims.GetBuffer(), nLastPos ); 382 } 383 if ( aState != TC_FINISHED ) 384 { 385 rErrorList.AddError( 25, "Parsing error in Tag ", *this ); 386 bIsBroken = sal_True; 387 } 388 } 389 390 sal_Bool TokenInfo::IsPropertyRelevant( const ByteString &aName, const String &aValue ) const 391 { 392 if ( aTagName.EqualsAscii( "alt" ) && aName.Equals( "xml-lang" ) ) 393 return sal_False; 394 if ( aTagName.EqualsAscii( "ahelp" ) && aName.Equals( "visibility" ) && aValue.EqualsAscii("visible") ) 395 return sal_False; 396 if ( aTagName.EqualsAscii( "image" ) && (aName.Equals( "width" ) || aName.Equals( "height" )) ) 397 return sal_False; 398 399 return sal_True; 400 } 401 402 sal_Bool TokenInfo::IsPropertyValueValid( const ByteString &aName, const String &aValue ) const 403 { 404 /* removed due to i56740 405 if ( aTagName.EqualsAscii( "switchinline" ) && aName.Equals( "select" ) ) 406 { 407 return aValue.EqualsAscii("sys") || 408 aValue.EqualsAscii("appl") || 409 aValue.EqualsAscii("distrib"); 410 } */ 411 if ( aTagName.EqualsAscii( "caseinline" ) && aName.Equals( "select" ) ) 412 { 413 return /*!aValue.EqualsAscii("OS2") && removed due to i56740 */ 414 !aValue.EqualsAscii(""); 415 } 416 417 // we don't know any better so we assume it to be OK 418 return sal_True; 419 } 420 421 sal_Bool TokenInfo::IsPropertyInvariant( const ByteString &aName, const String &aValue ) const 422 { 423 if ( aTagName.EqualsAscii( "link" ) && aName.Equals( "name" ) ) 424 return sal_False; 425 if ( aTagName.EqualsAscii( "link" ) && aName.Equals( "href" ) ) 426 { // check for external reference 427 if ( aValue.Copy( 0, 5 ).EqualsIgnoreCaseAscii( "http:" ) 428 || aValue.Copy( 0, 6 ).EqualsIgnoreCaseAscii( "https:" ) 429 || aValue.Copy( 0, 4 ).EqualsIgnoreCaseAscii( "ftp:" ) ) 430 return sal_False; 431 else 432 return sal_True; 433 } 434 return sal_True; 435 } 436 437 sal_Bool TokenInfo::IsPropertyFixable( const ByteString &aName ) const 438 { 439 // name everything that is allowed to be fixed automatically here 440 if ( (aTagName.EqualsAscii( "ahelp" ) && aName.Equals( "hid" )) 441 || (aTagName.EqualsAscii( "link" ) && aName.Equals( "href" )) 442 || (aTagName.EqualsAscii( "alt" ) && aName.Equals( "id" )) 443 || (aTagName.EqualsAscii( "variable" ) && aName.Equals( "id" )) 444 || (aTagName.EqualsAscii( "image" ) && aName.Equals( "src" )) 445 || (aTagName.EqualsAscii( "image" ) && aName.Equals( "id" ) )) 446 return sal_True; 447 return sal_False; 448 } 449 450 sal_Bool TokenInfo::MatchesTranslation( TokenInfo& rInfo, sal_Bool bGenErrors, ParserMessageList &rErrorList, sal_Bool bFixTags ) const 451 { 452 // check if tags are equal 453 // check if all existing properties are in the translation as well and 454 // wether they have a matching content (the same in most cases) 455 456 if ( nId != rInfo.nId ) 457 return sal_False; 458 459 if ( !aTagName.Equals( rInfo.aTagName ) ) 460 return sal_False; 461 462 // If one of the tags has formating errors already it does make no sense to check here, so return right away 463 if ( bGenErrors && ( bIsBroken || rInfo.bIsBroken ) ) 464 return sal_True; 465 466 StringHashMap::const_iterator iProp; 467 for( iProp = aProperties.begin() ; iProp != aProperties.end(); ++iProp ) 468 { 469 if ( rInfo.aProperties.find( iProp->first ) != rInfo.aProperties.end() ) 470 { 471 if ( IsPropertyRelevant( iProp->first, iProp->second ) || IsPropertyRelevant( iProp->first, rInfo.aProperties.find( iProp->first )->second ) ) 472 { 473 if ( IsPropertyInvariant( iProp->first, iProp->second ) ) 474 { 475 if ( !rInfo.aProperties.find( iProp->first )->second.Equals( iProp->second ) ) 476 { 477 if ( bGenErrors ) 478 { 479 if ( bFixTags && IsPropertyFixable( iProp->first ) ) 480 { 481 rInfo.aProperties.find( iProp->first )->second = iProp->second; 482 rInfo.SetHasBeenFixed(); 483 rErrorList.AddWarning( 25, ByteString("Property '").Append(iProp->first).Append("': FIXED different value in Translation "), *this ); 484 } 485 else 486 rErrorList.AddError( 25, ByteString("Property '").Append(iProp->first).Append("': value different in Translation "), *this ); 487 } 488 else return sal_False; 489 } 490 } 491 } 492 } 493 else 494 { 495 if ( IsPropertyRelevant( iProp->first, iProp->second ) ) 496 { 497 if ( bGenErrors ) 498 rErrorList.AddError( 25, ByteString("Property '").Append(iProp->first).Append("' missing in Translation "), *this ); 499 else return sal_False; 500 } 501 } 502 } 503 for( iProp = rInfo.aProperties.begin() ; iProp != rInfo.aProperties.end(); ++iProp ) 504 { 505 if ( aProperties.find( iProp->first ) == aProperties.end() ) 506 { 507 if ( IsPropertyRelevant( iProp->first, iProp->second ) ) 508 { 509 if ( bGenErrors ) 510 rErrorList.AddError( 25, ByteString("Extra Property '").Append(iProp->first).Append("' in Translation "), rInfo ); 511 else return sal_False; 512 } 513 } 514 } 515 516 // if we reach here eather 517 // the tags match completely or 518 // the tags match but not the properties and we generated errors for that 519 return sal_True; 520 } 521 522 String TokenInfo::GetTagName() const 523 { 524 return aTagName; 525 } 526 527 String TokenInfo::MakeTag() const 528 { 529 String aRet; 530 aRet.AppendAscii("\\<"); 531 if ( bCloseTag ) 532 aRet.AppendAscii("/"); 533 aRet.Append( GetTagName() ); 534 StringHashMap::const_iterator iProp; 535 536 for( iProp = aProperties.begin() ; iProp != aProperties.end(); ++iProp ) 537 { 538 aRet.AppendAscii(" "); 539 aRet.Append( String( iProp->first, RTL_TEXTENCODING_UTF8 ) ); 540 aRet.AppendAscii("=\\\""); 541 aRet.Append( iProp->second ); 542 aRet.AppendAscii("\\\""); 543 } 544 if ( bClosed ) 545 aRet.AppendAscii("/"); 546 aRet.AppendAscii("\\>"); 547 return aRet; 548 } 549 550 551 void ParserMessageList::AddError( sal_uInt16 nErrorNr, ByteString aErrorText, const TokenInfo &rTag ) 552 { 553 Insert( new ParserError( nErrorNr, aErrorText, rTag ), LIST_APPEND ); 554 } 555 556 void ParserMessageList::AddWarning( sal_uInt16 nErrorNr, ByteString aErrorText, const TokenInfo &rTag ) 557 { 558 Insert( new ParserWarning( nErrorNr, aErrorText, rTag ), LIST_APPEND ); 559 } 560 561 sal_Bool ParserMessageList::HasErrors() 562 { 563 sal_uInt16 i; 564 for ( i=0 ; i < Count() ; i++ ) 565 if ( GetObject( i )->IsError() ) 566 return sal_True; 567 return sal_False; 568 } 569 570 struct Tag 571 { 572 String GetName() const { return String::CreateFromAscii( pName ); }; 573 const char* pName; 574 TokenId nTag; 575 }; 576 577 578 static const Tag aKnownTags[] = 579 { 580 /* commenting oldstyle tags 581 // { "<#GROUP_FORMAT>", TAG_GROUP_FORMAT }, 582 { "<#BOLD>", TAG_BOLDON }, 583 { "<#/BOLD>", TAG_BOLDOFF }, 584 { "<#ITALIC>", TAG_ITALICON }, 585 { "<#/ITALIC>", TAG_ITALICOFF }, 586 { "<#UNDER>", TAG_UNDERLINEON }, 587 { "<#/UNDER>", TAG_UNDERLINEOFF }, 588 589 // { "<#GROUP_NOTALLOWED>", TAG_GROUP_NOTALLOWED }, 590 { "<#HELPID>", TAG_HELPID }, 591 { "<#MODIFY>", TAG_MODIFY }, 592 { "<#REFNR>", TAG_REFNR }, 593 594 // { "<#GROUP_STRUCTURE>", TAG_GROUP_STRUCTURE }, 595 { "<#NAME>", TAG_NAME }, 596 { "<#HREF>", TAG_HREF }, 597 { "<#AVIS>", TAG_AVIS }, 598 { "<#AHID>", TAG_AHID }, 599 { "<#AEND>", TAG_AEND }, 600 601 { "<#TITEL>", TAG_TITEL }, 602 { "<#KEY>", TAG_KEY }, 603 { "<#INDEX>", TAG_INDEX }, 604 605 { "<#REFSTART>", TAG_REFSTART }, 606 607 { "<#GRAPHIC>", TAG_GRAPHIC }, 608 { "<#NEXTVERSION>", TAG_NEXTVERSION }, 609 610 // { "<#GROUP_SYSSWITCH>", TAG_GROUP_SYSSWITCH }, 611 { "<#WIN>", TAG_WIN }, 612 { "<#UNIX>", TAG_UNIX }, 613 { "<#MAC>", TAG_MAC }, 614 { "<#OS2>", TAG_OS2 }, 615 616 // { "<#GROUP_PROGSWITCH>", TAG_GROUP_PROGSWITCH }, 617 { "<#WRITER>", TAG_WRITER }, 618 { "<#CALC>", TAG_CALC }, 619 { "<#DRAW>", TAG_DRAW }, 620 { "<#IMPRESS>", TAG_IMPRESS }, 621 { "<#SCHEDULE>", TAG_SCHEDULE }, 622 { "<#IMAGE>", TAG_IMAGE }, 623 { "<#MATH>", TAG_MATH }, 624 { "<#CHART>", TAG_CHART }, 625 { "<#OFFICE>", TAG_OFFICE }, 626 */ 627 // { "<#TAG_GROUP_META>", TAG_GROUP_META }, 628 { "$[officefullname]", TAG_OFFICEFULLNAME }, 629 { "$[officename]", TAG_OFFICENAME }, 630 { "$[officepath]", TAG_OFFICEPATH }, 631 { "$[officeversion]", TAG_OFFICEVERSION }, 632 { "$[portalname]", TAG_PORTALNAME }, 633 { "$[portalfullname]", TAG_PORTALFULLNAME }, 634 { "$[portalpath]", TAG_PORTALPATH }, 635 { "$[portalversion]", TAG_PORTALVERSION }, 636 { "$[portalshortname]", TAG_PORTALSHORTNAME }, 637 /* commenting oldstyle tags 638 // { "<#TAG_GROUP_SINGLE>", TAG_GROUP_SINGLE }, 639 { "<#REFINSERT>", TAG_REFINSERT }, 640 641 // { "<#GROUP_MULTI>", TAG_GROUP_MULTI }, 642 { "<#END>", TAG_END }, 643 { "<#ELSE>", TAG_ELSE }, 644 { "<#VERSIONEND>", TAG_VERSIONEND }, 645 { "<#ENDGRAPHIC>", TAG_ENDGRAPHIC },*/ 646 { "<Common Tag>", TAG_COMMONSTART }, 647 { "</Common Tag>", TAG_COMMONEND }, 648 649 { "<no more tags>", TAG_NOMORETAGS }, 650 { "", TAG_UNKNOWN_TAG }, 651 }; 652 653 654 SimpleParser::SimpleParser() 655 : nPos( 0 ) 656 , aNextTag( TAG_NOMORETAGS, TOK_INVALIDPOS ) 657 { 658 } 659 660 void SimpleParser::Parse( String PaSource ) 661 { 662 aSource = PaSource; 663 nPos = 0; 664 aLastToken.Erase(); 665 aNextTag = TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS ); 666 aTokenList.Clear(); 667 }; 668 669 TokenInfo SimpleParser::GetNextToken( ParserMessageList &rErrorList ) 670 { 671 TokenInfo aResult; 672 sal_uInt16 nTokenStartPos = 0; 673 if ( aNextTag.nId != TAG_NOMORETAGS ) 674 { 675 aResult = aNextTag; 676 aNextTag = TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS ); 677 } 678 else 679 { 680 aLastToken = GetNextTokenString( rErrorList, nTokenStartPos ); 681 if ( aLastToken.Len() == 0 ) 682 return TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS ); 683 684 // do we have a \< ... \> style tag? 685 if ( aLastToken.Copy(0,2).EqualsAscii( "\\<" ) ) 686 { 687 // check for paired \" \" 688 bool bEven = true; 689 sal_uInt16 nQuotePos = 0; 690 sal_uInt16 nQuotedQuotesPos = aLastToken.SearchAscii( "\\\"" ); 691 sal_uInt16 nQuotedBackPos = aLastToken.SearchAscii( "\\\\" ); // this is only to kick out quoted backslashes 692 while ( nQuotedQuotesPos != STRING_NOTFOUND ) 693 { 694 if ( nQuotedBackPos <= nQuotedQuotesPos ) 695 nQuotePos = nQuotedBackPos+2; 696 else 697 { 698 nQuotePos = nQuotedQuotesPos+2; 699 bEven = !bEven; 700 } 701 nQuotedQuotesPos = aLastToken.SearchAscii( "\\\"", nQuotePos ); 702 nQuotedBackPos = aLastToken.SearchAscii( "\\\\", nQuotePos ); // this is only to kick out quoted backslashes 703 } 704 if ( !bEven ) 705 { 706 rErrorList.AddError( 24, "Missing quotes ( \\\" ) in Tag", TokenInfo( TAG_UNKNOWN_TAG, nTokenStartPos, aLastToken ) ); 707 } 708 709 // check if we have an end-tag or a start-tag 710 sal_uInt16 nNonBlankStartPos,nNonBlankEndPos; 711 nNonBlankStartPos = 2; 712 while ( aLastToken.GetChar(nNonBlankStartPos) == ' ' ) 713 nNonBlankStartPos++; 714 if ( aLastToken.GetChar(nNonBlankStartPos) == '/' ) 715 aResult = TokenInfo( TAG_COMMONEND, nTokenStartPos, aLastToken, rErrorList ); 716 else 717 { 718 aResult = TokenInfo( TAG_COMMONSTART, nTokenStartPos, aLastToken, rErrorList ); 719 nNonBlankEndPos = aLastToken.Len() -3; 720 while ( aLastToken.GetChar(nNonBlankEndPos) == ' ' ) 721 nNonBlankEndPos--; 722 if ( aLastToken.GetChar( nNonBlankEndPos ) == '/' ) 723 aNextTag = TokenInfo( TAG_COMMONEND, nTokenStartPos, String::CreateFromAscii("\\</").Append(aResult.GetTagName()).AppendAscii("\\>"), rErrorList ); 724 } 725 } 726 else 727 { 728 sal_uInt16 i = 0; 729 while ( aKnownTags[i].nTag != TAG_UNKNOWN_TAG && 730 aLastToken != aKnownTags[i].GetName() ) 731 i++; 732 aResult = TokenInfo( aKnownTags[i].nTag, nTokenStartPos ); 733 } 734 } 735 736 if ( aResult.nId == TAG_UNKNOWN_TAG ) 737 aResult = TokenInfo( TAG_UNKNOWN_TAG, nTokenStartPos, aLastToken ); 738 aTokenList.Insert( aResult, LIST_APPEND ); 739 return aResult; 740 } 741 742 String SimpleParser::GetNextTokenString( ParserMessageList &rErrorList, sal_uInt16 &rTagStartPos ) 743 { 744 // sal_uInt16 nStyle1StartPos = aSource.SearchAscii( "<#", nPos ); 745 sal_uInt16 nStyle2StartPos = aSource.SearchAscii( "$[", nPos ); 746 sal_uInt16 nStyle3StartPos = aSource.SearchAscii( "\\<", nPos ); 747 sal_uInt16 nStyle4StartPos = aSource.SearchAscii( "\\\\", nPos ); // this is only to kick out quoted backslashes 748 749 rTagStartPos = 0; 750 751 /* removing since a \<... is not likely 752 // check if the tag starts with a letter to avoid things like <> <= ... > 753 while ( STRING_NOTFOUND != nStyle3StartPos && !( aSource.Copy( nStyle3StartPos+2, 1 ).IsAlphaAscii() || aSource.GetChar( nStyle3StartPos+2 ) == '/' ) ) 754 nStyle3StartPos = aSource.SearchAscii( "\\<", nStyle3StartPos+1 ); 755 */ 756 if ( STRING_NOTFOUND == nStyle2StartPos && STRING_NOTFOUND == nStyle3StartPos ) 757 return String(); // no more tokens 758 759 if ( nStyle4StartPos < nStyle2StartPos && nStyle4StartPos <= nStyle3StartPos ) // <= to make sure \\ is always handled first 760 { // Skip quoted Backslash 761 nPos = nStyle4StartPos +2; 762 return GetNextTokenString( rErrorList, rTagStartPos ); 763 } 764 765 /* if ( nStyle1StartPos < nStyle2StartPos && nStyle1StartPos <= nStyle3StartPos ) // <= to make sure our spechial tags are recognized before all others 766 { // test for <# ... > style tokens 767 sal_uInt16 nEndPos = aSource.SearchAscii( ">", nStyle1StartPos ); 768 if ( nEndPos == STRING_NOTFOUND ) 769 { // Token is incomplete. Skip start and search for better ones 770 nPos = nStyle1StartPos +2; 771 return GetNextTokenString( rErrorList, rTagStartPos ); 772 } 773 nPos = nEndPos; 774 rTagStartPos = nStyle1StartPos; 775 return aSource.Copy( nStyle1StartPos, nEndPos-nStyle1StartPos +1 ).ToUpperAscii(); 776 } 777 else*/ if ( nStyle2StartPos < nStyle3StartPos ) 778 { // test for $[ ... ] style tokens 779 sal_uInt16 nEndPos = aSource.SearchAscii( "]", nStyle2StartPos); 780 if ( nEndPos == STRING_NOTFOUND ) 781 { // Token is incomplete. Skip start and search for better ones 782 nPos = nStyle2StartPos +2; 783 return GetNextTokenString( rErrorList, rTagStartPos ); 784 } 785 nPos = nEndPos; 786 rTagStartPos = nStyle2StartPos; 787 return aSource.Copy( nStyle2StartPos, nEndPos-nStyle2StartPos +1 ); 788 } 789 else 790 { // test for \< ... \> style tokens 791 sal_uInt16 nEndPos = aSource.SearchAscii( "\\>", nStyle3StartPos); 792 sal_uInt16 nQuotedBackPos = aSource.SearchAscii( "\\\\", nStyle3StartPos ); // this is only to kick out quoted backslashes 793 while ( nQuotedBackPos <= nEndPos && nQuotedBackPos != STRING_NOTFOUND ) 794 { 795 nEndPos = aSource.SearchAscii( "\\>", nQuotedBackPos +2); 796 nQuotedBackPos = aSource.SearchAscii( "\\\\", nQuotedBackPos +2 ); // this is only to kick out quoted backslashes 797 } 798 if ( nEndPos == STRING_NOTFOUND ) 799 { // Token is incomplete. Skip start and search for better ones 800 nPos = nStyle3StartPos +2; 801 ByteString sTmp( "Tag Start '\\<' without Tag End '\\>': " ); 802 rErrorList.AddError( 24, "Tag Start '\\<' without Tag End '\\>'", TokenInfo( TAG_UNKNOWN_TAG, nStyle3StartPos, aSource.Copy( nStyle3StartPos-10, 20 ) ) ); 803 return GetNextTokenString( rErrorList, rTagStartPos ); 804 } 805 // check for paired quoted " --> \"sometext\" 806 807 nPos = nEndPos; 808 rTagStartPos = nStyle3StartPos; 809 return aSource.Copy( nStyle3StartPos, nEndPos-nStyle3StartPos +2 ); 810 } 811 } 812 813 String SimpleParser::GetLexem( TokenInfo const &aToken ) 814 { 815 if ( aToken.aTokenString.Len() ) 816 return aToken.aTokenString; 817 else 818 { 819 sal_uInt16 i = 0; 820 while ( aKnownTags[i].nTag != TAG_UNKNOWN_TAG && 821 aKnownTags[i].nTag != aToken.nId ) 822 i++; 823 824 return aKnownTags[i].GetName(); 825 } 826 } 827 828 TokenParser::TokenParser() 829 : pErrorList( NULL ) 830 {} 831 832 void TokenParser::Parse( const String &aCode, ParserMessageList* pList ) 833 { 834 pErrorList = pList; 835 836 //Scanner initialisieren 837 aParser.Parse( aCode ); 838 839 //erstes Symbol holen 840 aTag = aParser.GetNextToken( *pErrorList ); 841 842 nPfCaseOptions = 0; 843 nAppCaseOptions = 0; 844 bPfCaseActive = sal_False; 845 bAppCaseActive = sal_False; 846 847 nActiveRefTypes = 0; 848 849 //Ausfuehren der Start-Produktion 850 Paragraph(); 851 852 //Es wurde nicht die ganze Kette abgearbeitet, bisher ist aber 853 //kein Fehler aufgetreten 854 //=> es wurde ein einleitendes Tag vergessen 855 if ( aTag.nId != TAG_NOMORETAGS ) 856 { 857 switch ( aTag.nId ) 858 { 859 case TAG_END: 860 { 861 ParseError( 3, "Extra Tag <#END>. Switch or <#HREF> expected.", aTag ); 862 } 863 break; 864 case TAG_BOLDOFF: 865 { 866 ParseError( 4, "<#BOLD> expected before <#/BOLD>.", aTag ); 867 } 868 break; 869 case TAG_ITALICOFF: 870 { 871 ParseError( 5, "<#ITALIC> expected before <#/ITALIC>.", aTag ); 872 } 873 break; 874 case TAG_UNDERLINEOFF: 875 { 876 ParseError( 17, "<#UNDER> expected before <#/UNDER>.", aTag ); 877 } 878 break; 879 /* case TAG_MISSPARENTHESIS: 880 { 881 ParseError( 14, "missing closing parenthesis '>'", aTag ); 882 } 883 break;*/ 884 case TAG_AEND: 885 { 886 ParseError( 5, "Extra Tag <#AEND>. <#AVIS> or <#AHID> expected.", aTag ); 887 } 888 break; 889 case TAG_ELSE: 890 { 891 ParseError( 16, "Application-tag or platform-tag expected before <#ELSE>.", aTag ); 892 } 893 break; 894 case TAG_UNKNOWN_TAG: 895 { 896 ParseError( 6, "unknown Tag", aTag ); 897 } 898 break; 899 default: 900 { 901 ParseError( 6, "unexpected Tag", aTag ); 902 } 903 } 904 } 905 pErrorList = NULL; 906 } 907 908 void TokenParser::Paragraph() 909 { 910 switch ( aTag.nId ) 911 { 912 case TAG_GRAPHIC: 913 case TAG_NEXTVERSION: 914 { 915 TagRef(); 916 Paragraph(); 917 } 918 break; 919 case TAG_AVIS: 920 case TAG_AHID: 921 { 922 TagRef(); 923 Paragraph(); 924 } 925 break; 926 case TAG_HELPID: 927 { 928 SimpleTag(); 929 Paragraph(); 930 } 931 break; 932 case TAG_OFFICEFULLNAME: 933 case TAG_OFFICENAME: 934 case TAG_OFFICEPATH: 935 case TAG_OFFICEVERSION: 936 case TAG_PORTALNAME: 937 case TAG_PORTALFULLNAME: 938 case TAG_PORTALPATH: 939 case TAG_PORTALVERSION: 940 case TAG_PORTALSHORTNAME: 941 { 942 SimpleTag(); 943 Paragraph(); 944 } 945 break; 946 case TAG_REFINSERT: 947 { 948 SimpleTag(); 949 Paragraph(); 950 } 951 break; 952 case TAG_BOLDON: 953 case TAG_ITALICON: 954 case TAG_UNDERLINEON: 955 case TAG_COMMONSTART: 956 { 957 TagPair(); 958 Paragraph(); 959 } 960 break; 961 case TAG_HREF: 962 case TAG_NAME: 963 case TAG_KEY: 964 case TAG_INDEX: 965 case TAG_TITEL: 966 case TAG_REFSTART: 967 { 968 TagRef(); 969 Paragraph(); 970 } 971 break; 972 case TAG_OS2: 973 case TAG_WIN: 974 case TAG_UNIX: 975 case TAG_MAC: //... 976 { 977 if ( ! bPfCaseActive ) 978 { 979 //PfCases duerfen nicht verschachtelt sein: 980 bPfCaseActive = sal_True; 981 PfCase(); 982 983 //So jetzt kann wieder ein PfCase kommen: 984 bPfCaseActive = sal_False; 985 Paragraph(); 986 } 987 } 988 break; 989 case TAG_WRITER: 990 case TAG_CALC: 991 case TAG_DRAW: 992 case TAG_IMPRESS: 993 case TAG_SCHEDULE: 994 case TAG_IMAGE: 995 case TAG_MATH: 996 case TAG_CHART: 997 case TAG_OFFICE: 998 { 999 if ( !bAppCaseActive ) 1000 { 1001 //AppCases duerfen nicht verschachtelt sein: 1002 bAppCaseActive = sal_True; 1003 AppCase(); 1004 1005 //jetzt koennen wieder AppCases kommen: 1006 bAppCaseActive = sal_False; 1007 Paragraph(); 1008 } 1009 } 1010 break; 1011 1012 //Case TAG_BOLDOFF, TAG_ITALICOFF, TAG_BUNDERLINE, TAG_END 1013 //nichts tun wg. epsilon-Prod. 1014 } 1015 } 1016 1017 void TokenParser::PfCase() 1018 { 1019 1020 //Produktion: 1021 //PfCase -> PfCaseBegin Paragraph (PfCase | PfCaseEnd) 1022 1023 PfCaseBegin(); 1024 1025 //Jetzt ist eine PfCase-Produktion aktiv: 1026 Paragraph(); 1027 switch ( aTag.nId ) 1028 { 1029 case TAG_ELSE: 1030 case TAG_END: 1031 { 1032 CaseEnd(); 1033 } 1034 break; 1035 case TAG_OS2: 1036 case TAG_WIN: 1037 case TAG_UNIX: 1038 case TAG_MAC: //First (PfBegin) 1039 { 1040 PfCase(); 1041 } 1042 break; 1043 default: 1044 ParseError( 8, "<#ELSE> or <#END> or platform-tag expected.", aTag ); 1045 } 1046 //Die gemerkten Tags wieder loeschen fuer naechstes PfCase: 1047 nPfCaseOptions = 0; 1048 } 1049 1050 void TokenParser::PfCaseBegin() 1051 { 1052 switch ( aTag.nId ) 1053 { 1054 case TAG_OS2: 1055 case TAG_WIN: 1056 case TAG_UNIX: 1057 case TAG_MAC: 1058 { 1059 //Token darf noch nicht vorgekommen sein im 1060 //aktuellen Plattform-Case: 1061 if ( !HAS_FLAG( nPfCaseOptions, TAG_NOGROUP( aTag.nId ) ) ) 1062 { 1063 SET_FLAG( nPfCaseOptions, TAG_NOGROUP( aTag.nId ) ); 1064 match( aTag, aTag ); 1065 } 1066 else { 1067 ParseError( 9, "Tag defined twice in the same platform-case", aTag ); 1068 } 1069 } 1070 } 1071 } 1072 1073 void TokenParser::AppCase() 1074 { 1075 1076 //Produktion: 1077 //AppCase -> AppCaseBegin Paragraph (AppCase | AppCaseEnd) 1078 1079 1080 AppCaseBegin(); 1081 1082 Paragraph(); 1083 1084 switch ( aTag.nId ) 1085 { 1086 case TAG_ELSE: 1087 case TAG_END: 1088 { 1089 CaseEnd(); 1090 } 1091 break; 1092 case TAG_WRITER: 1093 case TAG_DRAW: 1094 case TAG_CALC: 1095 case TAG_IMAGE: 1096 case TAG_MATH: 1097 case TAG_CHART: 1098 case TAG_OFFICE: 1099 case TAG_IMPRESS: 1100 case TAG_SCHEDULE: //First (AppBegin) 1101 { 1102 AppCase(); 1103 } 1104 break; 1105 default: 1106 ParseError( 1, "<#ELSE> or <#END> or application-case-tag expected.", aTag ); 1107 } 1108 1109 //Die gemerkten Tags wieder loeschen fuer naechstes AppCase: 1110 nAppCaseOptions = 0; 1111 } 1112 1113 void TokenParser::AppCaseBegin() 1114 { 1115 switch ( aTag.nId ) 1116 { 1117 case TAG_WRITER: 1118 case TAG_DRAW: 1119 case TAG_CALC: 1120 case TAG_IMAGE: 1121 case TAG_MATH: 1122 case TAG_CHART: 1123 case TAG_OFFICE: 1124 case TAG_IMPRESS: 1125 case TAG_SCHEDULE: 1126 { 1127 //Token darf noch nicht vorgekommen sein im 1128 //aktuellen Plattform-Case: 1129 if ( !HAS_FLAG( nAppCaseOptions, TAG_NOGROUP( aTag.nId ) ) ) 1130 { 1131 SET_FLAG( nAppCaseOptions, TAG_NOGROUP( aTag.nId ) ); 1132 match( aTag, aTag ); 1133 } 1134 else { 1135 ParseError( 13, "Tag defined twice in the same application-case.", aTag ); 1136 } 1137 } 1138 } 1139 } 1140 1141 void TokenParser::CaseEnd() 1142 { 1143 //Produktion: 1144 //CaseEnd -> <#ELSE> Paragraph <#END> | <#END> 1145 1146 switch ( aTag.nId ) 1147 { 1148 case TAG_ELSE: 1149 { 1150 match( aTag, TAG_ELSE ); 1151 Paragraph(); 1152 match( aTag, TAG_END ); 1153 } 1154 break; 1155 case TAG_END: 1156 { 1157 match( aTag, TAG_END ); 1158 } 1159 break; 1160 default: 1161 ParseError( 2, "<#ELSE> or <#END> expected.", aTag ); 1162 } 1163 } 1164 1165 void TokenParser::SimpleTag() 1166 { 1167 1168 switch ( aTag.nId ) 1169 { 1170 case TAG_HELPID: 1171 { 1172 match( aTag, TAG_HELPID ); 1173 } 1174 break; 1175 case TAG_OFFICEFULLNAME: 1176 case TAG_OFFICENAME: 1177 case TAG_OFFICEPATH: 1178 case TAG_OFFICEVERSION: 1179 case TAG_PORTALNAME: 1180 case TAG_PORTALFULLNAME: 1181 case TAG_PORTALPATH: 1182 case TAG_PORTALVERSION: 1183 case TAG_PORTALSHORTNAME: 1184 1185 case TAG_REFINSERT: 1186 { 1187 match( aTag, aTag ); 1188 } 1189 break; 1190 default: 1191 ParseError( 15, "[<#SimpleTag>] expected.", aTag ); 1192 } 1193 } 1194 1195 void TokenParser::TagPair() 1196 { 1197 switch ( aTag.nId ) 1198 { 1199 case TAG_BOLDON: 1200 { 1201 match( aTag, TAG_BOLDON ); 1202 Paragraph(); 1203 match( aTag, TAG_BOLDOFF ); 1204 } 1205 break; 1206 case TAG_ITALICON: 1207 { 1208 match( aTag, TAG_ITALICON ); 1209 Paragraph(); 1210 match( aTag, TAG_ITALICOFF ); 1211 } 1212 break; 1213 case TAG_UNDERLINEON: 1214 { 1215 match( aTag, TAG_UNDERLINEON ); 1216 Paragraph(); 1217 match( aTag, TAG_UNDERLINEOFF ); 1218 } 1219 break; 1220 case TAG_COMMONSTART: 1221 { 1222 //remember tag so we can give the original tag in case of an error 1223 TokenInfo aEndTag( aTag ); 1224 aEndTag.nId = TAG_COMMONEND; 1225 match( aTag, TAG_COMMONSTART ); 1226 Paragraph(); 1227 match( aTag, aEndTag ); 1228 } 1229 break; 1230 default: 1231 ParseError( 10, "<#BOLD>, <#ITALIC>, <#UNDER> expected.", aTag ); 1232 } 1233 } 1234 1235 1236 void TokenParser::TagRef() 1237 { 1238 switch ( aTag.nId ) 1239 { 1240 case TAG_GRAPHIC: 1241 case TAG_NEXTVERSION: 1242 { 1243 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) ) 1244 { 1245 TokenId aThisToken = aTag.nId; 1246 SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) ); 1247 match( aTag, aTag ); 1248 Paragraph(); 1249 if ( aThisToken == TAG_GRAPHIC ) 1250 match( aTag, TAG_ENDGRAPHIC ); 1251 else 1252 match( aTag, TAG_VERSIONEND ); 1253 // don't reset since alowed only once per paragraph 1254 // RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) ); 1255 } 1256 else 1257 { 1258 ParseError( 11, "Tags <#GRAPHIC>,<#NEXTVERSION> allowed only once per paragraph at", aTag ); 1259 } 1260 } 1261 break; 1262 case TAG_AVIS: 1263 case TAG_AHID: 1264 { 1265 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) ) 1266 { 1267 TokenId aThisToken = aTag.nId; 1268 SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) ); 1269 match( aTag, aTag ); 1270 Paragraph(); 1271 match( aTag, TAG_AEND ); 1272 RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) ); 1273 } 1274 else 1275 { 1276 ParseError( 11, "Nested <#AHID>,<#AVIS> not allowed.", aTag ); 1277 } 1278 } 1279 break; 1280 case TAG_HREF: 1281 case TAG_NAME: 1282 { 1283 1284 } 1285 // NOBREAK 1286 case TAG_KEY: 1287 case TAG_INDEX: 1288 case TAG_TITEL: 1289 case TAG_REFSTART: 1290 { 1291 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) ) 1292 { 1293 TokenId aThisToken = aTag.nId; 1294 match( aTag, aTag ); 1295 if ( aThisToken != TAG_NAME ) 1296 { // TAG_NAME has no TAG_END 1297 SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) ); 1298 Paragraph(); 1299 match( aTag, TAG_END ); 1300 RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) ); 1301 } 1302 } 1303 else 1304 { 1305 ParseError( 11, "Nested <#HREF>,<#NAME> or <#KEY> not allowed.", aTag ); 1306 } 1307 } 1308 break; 1309 default: 1310 ParseError( 12, "<#HREF>,<#NAME> or <#KEY> expected.", aTag ); 1311 } 1312 } 1313 1314 sal_Bool TokenParser::match( const TokenInfo &aCurrentToken, const TokenId &aExpectedToken ) 1315 { 1316 return match( aCurrentToken, TokenInfo( aExpectedToken, TOK_INVALIDPOS ) ); 1317 } 1318 1319 sal_Bool TokenParser::match( const TokenInfo &aCurrentToken, const TokenInfo &rExpectedToken ) 1320 { 1321 TokenInfo aExpectedToken( rExpectedToken ); 1322 if ( aCurrentToken.nId == aExpectedToken.nId ) 1323 { 1324 if ( ( aCurrentToken.nId == TAG_COMMONEND 1325 && aCurrentToken.GetTagName().Equals( aExpectedToken.GetTagName() ) ) 1326 || aCurrentToken.nId != TAG_COMMONEND ) 1327 { 1328 aTag = aParser.GetNextToken( *pErrorList ); 1329 return sal_True; 1330 } 1331 } 1332 1333 if ( aExpectedToken.nId == TAG_COMMONEND ) 1334 { 1335 aExpectedToken.aTokenString.Insert( String::CreateFromAscii( "Close tag for " ), 0 ); 1336 } 1337 1338 ByteString sTmp( "Expected Symbol" ); 1339 if ( aCurrentToken.nId == TAG_NOMORETAGS ) 1340 { 1341 ParseError( 7, sTmp, aExpectedToken ); 1342 } 1343 else 1344 { 1345 sTmp += ": "; 1346 sTmp += ByteString( aParser.GetLexem( aExpectedToken ), RTL_TEXTENCODING_UTF8 ); 1347 sTmp += " near "; 1348 ParseError( 7, sTmp, aCurrentToken ); 1349 } 1350 return sal_False; 1351 } 1352 1353 void TokenParser::ParseError( sal_uInt16 nErrNr, ByteString aErrMsg, const TokenInfo &rTag ) 1354 { 1355 pErrorList->AddError( nErrNr, aErrMsg, rTag); 1356 1357 // Das Fehlerhafte Tag ueberspringen 1358 aTag = aParser.GetNextToken( *pErrorList ); 1359 } 1360 1361 1362 ParserMessage::ParserMessage( sal_uInt16 PnErrorNr, ByteString PaErrorText, const TokenInfo &rTag ) 1363 : nErrorNr( PnErrorNr ) 1364 , aErrorText( PaErrorText ) 1365 , nTagBegin( 0 ) 1366 , nTagLength( 0 ) 1367 { 1368 String aLexem( SimpleParser::GetLexem( rTag ) ); 1369 aErrorText.Append(": "); 1370 aErrorText += ByteString( aLexem, RTL_TEXTENCODING_UTF8 ); 1371 if ( rTag.nId == TAG_NOMORETAGS ) 1372 aErrorText.Append(" at end of line "); 1373 else if ( rTag.nPos != TOK_INVALIDPOS ) 1374 { 1375 aErrorText.Append(" at Position "); 1376 aErrorText.Append( ByteString::CreateFromInt32( rTag.nPos ) ); 1377 } 1378 nTagBegin = rTag.nPos; 1379 nTagLength = aLexem.Len(); 1380 } 1381 1382 ParserError::ParserError( sal_uInt16 ErrorNr, ByteString ErrorText, const TokenInfo &rTag ) 1383 : ParserMessage( ErrorNr, ErrorText, rTag ) 1384 {} 1385 1386 ParserWarning::ParserWarning( sal_uInt16 ErrorNr, ByteString ErrorText, const TokenInfo &rTag ) 1387 : ParserMessage( ErrorNr, ErrorText, rTag ) 1388 {} 1389 1390 sal_Bool LingTest::IsTagMandatory( TokenInfo const &aToken, TokenId &aMetaTokens ) 1391 { 1392 TokenId aTokenId = aToken.nId; 1393 TokenId aTokenGroup = TAG_GROUP( aTokenId ); 1394 if ( TAG_GROUP_PROGSWITCH == aTokenGroup 1395 || TAG_REFINSERT == aTokenId 1396 || TAG_REFSTART == aTokenId 1397 || TAG_NAME == aTokenId 1398 || TAG_HREF == aTokenId 1399 || TAG_AVIS == aTokenId 1400 || TAG_AHID == aTokenId 1401 || TAG_GRAPHIC == aTokenId 1402 || TAG_NEXTVERSION == aTokenId 1403 || ( TAG_GROUP_META == aTokenGroup && (aMetaTokens & aTokenId) == aTokenId ) ) 1404 { 1405 if ( TAG_GROUP_META == aTokenGroup ) 1406 aMetaTokens |= aTokenId; 1407 return sal_True; 1408 } 1409 else if ( TAG_COMMONSTART == aTokenId 1410 || TAG_COMMONEND == aTokenId ) 1411 { 1412 String aTagName = aToken.GetTagName(); 1413 return !(aTagName.EqualsIgnoreCaseAscii( "comment" ) 1414 || aTagName.EqualsIgnoreCaseAscii( "bookmark_value" ) 1415 || aTagName.EqualsIgnoreCaseAscii( "emph" ) 1416 || aTagName.EqualsIgnoreCaseAscii( "item" ) 1417 || aTagName.EqualsIgnoreCaseAscii( "br" ) ); 1418 } 1419 return sal_False; 1420 } 1421 1422 void LingTest::CheckTags( TokenList &aReference, TokenList &aTestee, sal_Bool bFixTags ) 1423 { 1424 sal_uLong i=0,j=0; 1425 // Clean old Warnings 1426 while ( aCompareWarningList.Count() ) 1427 { 1428 delete aCompareWarningList.GetCurObject(); 1429 aCompareWarningList.Remove(); 1430 } 1431 1432 /* in xml tags, do not require the following tags 1433 comment 1434 bookmark_value 1435 emph 1436 item 1437 br 1438 */ 1439 1440 // filter uninteresting Tags 1441 TokenId aMetaTokens = 0; 1442 for ( i=0 ; i < aReference.Count() ; i++ ) 1443 { 1444 if ( !IsTagMandatory( aReference.GetObject( i ), aMetaTokens ) ) 1445 aReference.GetObject( i ).SetDone(); 1446 } 1447 1448 aMetaTokens = 0; 1449 for ( i=0 ; i < aTestee.Count() ; i++ ) 1450 { 1451 if ( !IsTagMandatory( aTestee.GetObject( i ), aMetaTokens ) ) 1452 aTestee.GetObject( i ).SetDone(); 1453 } 1454 1455 // remove all matching tags 1456 for ( i=0 ; i < aReference.Count() ; i++ ) 1457 { 1458 if ( aReference.GetObject( i ).IsDone() ) 1459 continue; 1460 1461 sal_Bool bTagFound = sal_False; 1462 for ( j=0 ; j < aTestee.Count() && !bTagFound ; j++ ) 1463 { 1464 if ( aTestee.GetObject( j ).IsDone() ) 1465 continue; 1466 1467 if ( aReference.GetObject( i ).MatchesTranslation( aTestee.GetObject( j ), sal_False, aCompareWarningList ) ) 1468 { 1469 aReference.GetObject( i ).SetDone(); 1470 aTestee.GetObject( j ).SetDone(); 1471 bTagFound = sal_True; 1472 } 1473 } 1474 } 1475 1476 sal_Bool bCanFix = sal_True; 1477 1478 if ( bFixTags ) 1479 { 1480 // we fix only if its a really simple case 1481 sal_uInt16 nTagCount = 0; 1482 for ( i=0 ; i < aReference.Count() ; i++ ) 1483 if ( !aReference.GetObject( i ).IsDone() ) 1484 nTagCount++; 1485 if ( nTagCount > 1 ) 1486 bCanFix = sal_False; 1487 1488 nTagCount = 0; 1489 for ( i=0 ; i < aTestee.Count() ; i++ ) 1490 if ( !aTestee.GetObject( i ).IsDone() ) 1491 nTagCount++; 1492 if ( nTagCount > 1 ) 1493 bCanFix = sal_False; 1494 } 1495 1496 // generate errors for tags that have differing attributes 1497 for ( i=0 ; i < aReference.Count() ; i++ ) 1498 { 1499 if ( aReference.GetObject( i ).IsDone() ) 1500 continue; 1501 1502 sal_Bool bTagFound = sal_False; 1503 for ( j=0 ; j < aTestee.Count() && !bTagFound ; j++ ) 1504 { 1505 if ( aTestee.GetObject( j ).IsDone() ) 1506 continue; 1507 1508 if ( aReference.GetObject( i ).MatchesTranslation( aTestee.GetObject( j ), sal_True, aCompareWarningList, bCanFix && bFixTags ) ) 1509 { 1510 aReference.GetObject( i ).SetDone(); 1511 aTestee.GetObject( j ).SetDone(); 1512 bTagFound = sal_True; 1513 } 1514 } 1515 } 1516 1517 // list remaining tags as errors 1518 for ( i=0 ; i < aReference.Count() ; i++ ) 1519 { 1520 if ( aReference.GetObject( i ).IsDone() ) 1521 continue; 1522 1523 aCompareWarningList.AddError( 20, "Missing Tag in Translation", aReference.GetObject( i ) ); 1524 } 1525 for ( i=0 ; i < aTestee.Count() ; i++ ) 1526 { 1527 if ( aTestee.GetObject( i ).IsDone() ) 1528 continue; 1529 1530 aCompareWarningList.AddError( 21, "Extra Tag in Translation", aTestee.GetObject( i ) ); 1531 } 1532 1533 for ( i=0 ; i < aReference.Count() ; i++ ) 1534 aReference.GetObject( i ).SetDone( sal_False ); 1535 1536 for ( i=0 ; i < aTestee.Count() ; i++ ) 1537 aTestee.GetObject( i ).SetDone( sal_False ); 1538 } 1539 1540 void LingTest::CheckReference( GSILine *aReference ) 1541 { 1542 aReferenceParser.Parse( aReference->GetUText(), aReference->GetMessageList() ); 1543 } 1544 1545 void LingTest::CheckTestee( GSILine *aTestee, sal_Bool bHasSourceLine, sal_Bool bFixTags ) 1546 { 1547 aFixedTestee = aTestee->GetUText(); 1548 aTesteeParser.Parse( aFixedTestee, aTestee->GetMessageList() ); 1549 1550 if ( bHasSourceLine ) 1551 CheckTags( aReferenceParser.GetTokenList(), aTesteeParser.GetTokenList(), bFixTags ); 1552 1553 if ( bFixTags ) 1554 { 1555 TokenList& aTesteeTokens = aTesteeParser.GetTokenList(); 1556 sal_Bool bFixesDone = sal_False; 1557 // count backwards to allow replacing from right to left 1558 int i; 1559 for ( i=aTesteeTokens.Count()-1 ; i>=0 ; i-- ) 1560 { 1561 if ( aTesteeTokens.GetObject( i ).HasBeenFixed() ) 1562 { 1563 bFixesDone = sal_True; 1564 aFixedTestee.Replace( aTesteeTokens.GetObject( i ).nPos, aTesteeTokens.GetObject( i ).aTokenString.Len(), aTesteeTokens.GetObject( i ).MakeTag() ); 1565 } 1566 } 1567 if ( bFixesDone ) 1568 { 1569 aTestee->SetUText( aFixedTestee ); 1570 aTestee->SetFixed(); 1571 } 1572 } 1573 } 1574 1575