xref: /AOO41X/main/l10ntools/inc/tagtest.hxx (revision 1ecadb572e7010ff3b3382ad9bf179dbc6efadbb)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #ifndef _TAGTEST_HXX_
29 #define _TAGTEST_HXX_
30 
31 #include <tools/string.hxx>
32 #include <tools/list.hxx>
33 #include <hash_map> /* std::hashmap*/
34 
35 class GSILine;
36 
37 typedef sal_uInt16 TokenId;
38 
39 #define TOK_INVALIDPOS  sal_uInt16( 0xFFFF )
40 
41 class ParserMessage;
42 
43 DECLARE_LIST( Impl_ParserMessageList, ParserMessage* )
44 class ParserMessageList;
45 
46 
47 struct equalByteString{
48         bool operator()( const ByteString& rKey1, const ByteString& rKey2 ) const {
49             return rKey1.CompareTo( rKey2 )==COMPARE_EQUAL;
50     }
51 };
52 struct lessByteString{
53         bool operator()( const ByteString& rKey1, const ByteString& rKey2 ) const {
54             return rKey1.CompareTo( rKey2 )==COMPARE_LESS;
55     }
56 };
57 
58 struct hashByteString{
59     size_t operator()( const ByteString& rName ) const{
60                 std::hash< const char* > myHash;
61                 return myHash( rName.GetBuffer() );
62     }
63 };
64 
65 
66 
67 typedef std::hash_map<ByteString , String , hashByteString,equalByteString>
68                                 StringHashMap;
69 
70 class TokenInfo
71 {
72 private:
73     void SplitTag( ParserMessageList &rErrorList );
74 
75     String aTagName;
76     StringHashMap aProperties;
77     sal_Bool bClosed;    // tag is closed  <sdnf/>
78     sal_Bool bCloseTag;  // tag is close Tag  </sdnf>
79 
80 
81     sal_Bool bIsBroken;
82     sal_Bool bHasBeenFixed;
83     sal_Bool bDone;
84 
85 public:
86 
87 	String aTokenString;
88 	TokenId nId;
89     sal_uInt16 nPos;            // Position in String
90 
91     TokenInfo():bClosed(sal_False),bCloseTag(sal_False),bIsBroken(sal_False),bHasBeenFixed(sal_False),bDone(sal_False),nId( 0 ){;}
92 explicit    TokenInfo( TokenId pnId, sal_uInt16 nP ):bClosed(sal_False),bCloseTag(sal_False),bIsBroken(sal_False),bHasBeenFixed(sal_False),bDone(sal_False),nId( pnId ),nPos(nP){;}
93 explicit    TokenInfo( TokenId pnId, sal_uInt16 nP, String paStr ):bClosed(sal_False),bCloseTag(sal_False),bIsBroken(sal_False),bHasBeenFixed(sal_False),bDone(sal_False),aTokenString( paStr ),nId( pnId ),nPos(nP) {;}
94 explicit    TokenInfo( TokenId pnId, sal_uInt16 nP, String paStr, ParserMessageList &rErrorList );
95 
96 	String GetTagName() const;
97 
98     String MakeTag() const;
99 
100     /**
101         Is the property to be ignored or does it have the default value anyways
102     **/
103     sal_Bool IsPropertyRelevant( const ByteString &aName, const String &aValue ) const;
104     sal_Bool IsPropertyValueValid( const ByteString &aName, const String &aValue ) const;
105     /**
106         Does the property contain the same value for all languages
107         e.g.: the href in a link tag
108     **/
109     sal_Bool IsPropertyInvariant( const ByteString &aName, const String &aValue ) const;
110     /**
111         a subset of IsPropertyInvariant but containing only those that are fixable
112         we dont wat to fix e.g.: ahelp :: visibility
113     **/
114     sal_Bool IsPropertyFixable( const ByteString &aName ) const;
115     sal_Bool MatchesTranslation( TokenInfo& rInfo, sal_Bool bGenErrors, ParserMessageList &rErrorList, sal_Bool bFixTags = sal_False ) const;
116 
117     sal_Bool IsDone() const { return bDone; }
118     void SetDone( sal_Bool bNew = sal_True ) { bDone = bNew; }
119 
120     sal_Bool HasBeenFixed() const { return bHasBeenFixed; }
121     void SetHasBeenFixed( sal_Bool bNew = sal_True ) { bHasBeenFixed = bNew; }
122 };
123 
124 
125 class ParserMessageList : public Impl_ParserMessageList
126 {
127 public:
128     void AddError( sal_uInt16 nErrorNr, ByteString aErrorText, const TokenInfo &rTag );
129     void AddWarning( sal_uInt16 nErrorNr, ByteString aErrorText, const TokenInfo &rTag );
130 
131     sal_Bool HasErrors();
132 };
133 
134 
135 #define TAG_GROUPMASK				0xF000
136 #define TAG_GROUPSHIFT				12
137 
138 #define TAG_GROUP( nTag )			(( nTag & TAG_GROUPMASK ) >> TAG_GROUPSHIFT )
139 #define TAG_NOGROUP( nTag )			( nTag & ~TAG_GROUPMASK )	// ~ = Bitweises NOT
140 
141 #define TAG_NOMORETAGS				0x0
142 
143 #define TAG_GROUP_FORMAT			0x1
144 #define TAG_ON						0x100
145 #define TAG_BOLDON					( TAG_GROUP_FORMAT << TAG_GROUPSHIFT | TAG_ON | 0x001 )
146 #define TAG_BOLDOFF					( TAG_GROUP_FORMAT << TAG_GROUPSHIFT |          0x001 )
147 #define TAG_ITALICON				( TAG_GROUP_FORMAT << TAG_GROUPSHIFT | TAG_ON | 0x002 )
148 #define TAG_ITALICOFF				( TAG_GROUP_FORMAT << TAG_GROUPSHIFT |          0x002 )
149 #define TAG_UNDERLINEON				( TAG_GROUP_FORMAT << TAG_GROUPSHIFT | TAG_ON | 0x004 )
150 #define TAG_UNDERLINEOFF			( TAG_GROUP_FORMAT << TAG_GROUPSHIFT |          0x004 )
151 
152 #define TAG_GROUP_NOTALLOWED		0x2
153 #define TAG_HELPID					( TAG_GROUP_NOTALLOWED << TAG_GROUPSHIFT | 0x001 )
154 #define TAG_MODIFY					( TAG_GROUP_NOTALLOWED << TAG_GROUPSHIFT | 0x002 )
155 #define TAG_REFNR					( TAG_GROUP_NOTALLOWED << TAG_GROUPSHIFT | 0x004 )
156 
157 #define TAG_GROUP_STRUCTURE			0x3
158 #define TAG_NAME					( TAG_GROUP_STRUCTURE << TAG_GROUPSHIFT | 0x001 )
159 #define TAG_HREF					( TAG_GROUP_STRUCTURE << TAG_GROUPSHIFT | 0x002 )
160 #define TAG_AVIS					( TAG_GROUP_STRUCTURE << TAG_GROUPSHIFT | 0x004 )
161 #define TAG_AHID					( TAG_GROUP_STRUCTURE << TAG_GROUPSHIFT | 0x008 )
162 
163 #define TAG_TITEL					( TAG_GROUP_STRUCTURE << TAG_GROUPSHIFT | 0x020 )
164 #define TAG_KEY						( TAG_GROUP_STRUCTURE << TAG_GROUPSHIFT | 0x040 )
165 #define TAG_INDEX					( TAG_GROUP_STRUCTURE << TAG_GROUPSHIFT | 0x080 )
166 
167 #define TAG_REFSTART				( TAG_GROUP_STRUCTURE << TAG_GROUPSHIFT | 0x100 )
168 
169 #define TAG_GRAPHIC					( TAG_GROUP_STRUCTURE << TAG_GROUPSHIFT | 0x200 )
170 #define TAG_NEXTVERSION				( TAG_GROUP_STRUCTURE << TAG_GROUPSHIFT | 0x400 )
171 
172 #define TAG_GROUP_SYSSWITCH			0x4
173 #define TAG_WIN						( TAG_GROUP_SYSSWITCH << TAG_GROUPSHIFT | 0x001 )
174 #define TAG_UNIX					( TAG_GROUP_SYSSWITCH << TAG_GROUPSHIFT | 0x002 )
175 #define TAG_MAC						( TAG_GROUP_SYSSWITCH << TAG_GROUPSHIFT | 0x004 )
176 #define TAG_OS2						( TAG_GROUP_SYSSWITCH << TAG_GROUPSHIFT | 0x008 )
177 
178 #define TAG_GROUP_PROGSWITCH		0x5
179 #define TAG_WRITER					( TAG_GROUP_PROGSWITCH << TAG_GROUPSHIFT | 0x001 )
180 #define TAG_CALC					( TAG_GROUP_PROGSWITCH << TAG_GROUPSHIFT | 0x002 )
181 #define TAG_DRAW					( TAG_GROUP_PROGSWITCH << TAG_GROUPSHIFT | 0x004 )
182 #define TAG_IMPRESS					( TAG_GROUP_PROGSWITCH << TAG_GROUPSHIFT | 0x008 )
183 #define TAG_SCHEDULE				( TAG_GROUP_PROGSWITCH << TAG_GROUPSHIFT | 0x010 )
184 #define TAG_IMAGE					( TAG_GROUP_PROGSWITCH << TAG_GROUPSHIFT | 0x020 )
185 #define TAG_MATH					( TAG_GROUP_PROGSWITCH << TAG_GROUPSHIFT | 0x040 )
186 #define TAG_CHART					( TAG_GROUP_PROGSWITCH << TAG_GROUPSHIFT | 0x080 )
187 #define TAG_OFFICE					( TAG_GROUP_PROGSWITCH << TAG_GROUPSHIFT | 0x100 )
188 
189 
190 #define TAG_GROUP_META				0x6
191 #define TAG_OFFICEFULLNAME			( TAG_GROUP_META << TAG_GROUPSHIFT | 0x001 )
192 #define TAG_OFFICENAME				( TAG_GROUP_META << TAG_GROUPSHIFT | 0x002 )
193 #define TAG_OFFICEPATH				( TAG_GROUP_META << TAG_GROUPSHIFT | 0x004 )
194 #define TAG_OFFICEVERSION			( TAG_GROUP_META << TAG_GROUPSHIFT | 0x008 )
195 #define TAG_PORTALNAME				( TAG_GROUP_META << TAG_GROUPSHIFT | 0x010 )
196 #define TAG_PORTALFULLNAME			( TAG_GROUP_META << TAG_GROUPSHIFT | 0x020 )
197 #define TAG_PORTALPATH				( TAG_GROUP_META << TAG_GROUPSHIFT | 0x040 )
198 #define TAG_PORTALVERSION			( TAG_GROUP_META << TAG_GROUPSHIFT | 0x080 )
199 #define TAG_PORTALSHORTNAME			( TAG_GROUP_META << TAG_GROUPSHIFT | 0x100 )
200 
201 
202 #define TAG_GROUP_SINGLE            0x7
203 #define TAG_REFINSERT               ( TAG_GROUP_SINGLE << TAG_GROUPSHIFT | 0x001 )
204 
205 
206 #define TAG_GROUP_MULTI				0x8
207 #define TAG_END						( TAG_GROUP_MULTI << TAG_GROUPSHIFT | 0x010 )
208 #define TAG_ELSE					( TAG_GROUP_MULTI << TAG_GROUPSHIFT | 0x020 )
209 #define TAG_AEND					( TAG_GROUP_MULTI << TAG_GROUPSHIFT | 0x040 )
210 #define TAG_VERSIONEND				( TAG_GROUP_MULTI << TAG_GROUPSHIFT | 0x080 )
211 #define TAG_ENDGRAPHIC				( TAG_GROUP_MULTI << TAG_GROUPSHIFT | 0x100 )
212 
213 #define TAG_GROUP_MISC				0x9
214 #define TAG_COMMONSTART				( TAG_GROUP_MISC << TAG_GROUPSHIFT | 0x001 )
215 #define TAG_COMMONEND				( TAG_GROUP_MISC << TAG_GROUPSHIFT | 0x002 )
216 
217 #define TAG_UNKNOWN_TAG				( TAG_GROUP_MULTI << TAG_GROUPSHIFT | 0x800 )
218 
219 DECLARE_LIST( TokenListImpl, TokenInfo* )
220 
221 class TokenList : private TokenListImpl
222 {
223 private:
224 
225     TokenList&   operator =( const TokenList& rList );
226 //                { TokenListImpl::operator =( rList ); return *this; }
227 
228 
229 public:
230 	using TokenListImpl::Count;
231 
232 
233     TokenList() : TokenListImpl(){};
234     ~TokenList(){ Clear(); };
235 
236 	void		Clear()
237 		{
238 			for ( sal_uLong i = 0 ; i < Count() ; i++ )
239 				delete TokenListImpl::GetObject( i );
240 			TokenListImpl::Clear();
241 		}
242 	void		Insert( TokenInfo p, sal_uLong nIndex = LIST_APPEND )
243 		{ TokenListImpl::Insert( new TokenInfo(p), nIndex ); }
244 /*    TokenInfo		Remove( sal_uLong nIndex )
245 		{
246 			TokenInfo aT = GetObject( nIndex );
247 			delete TokenListImpl::GetObject( nIndex );
248 			TokenListImpl::Remove( nIndex );
249 			return aT;
250 		}*/
251 //    TokenInfo		Remove( TokenInfo p ){ return Remove( GetPos( p ) ); }
252 //    TokenInfo		GetCurObject() const { return *TokenListImpl::GetCurObject(); }
253     TokenInfo&		GetObject( sal_uLong nIndex ) const
254 		{
255 //			if ( TokenListImpl::GetObject(nIndex) )
256 				return *TokenListImpl::GetObject(nIndex);
257 //			else
258 //				return TokenInfo();
259 		}
260 /*    sal_uLong		GetPos( const TokenInfo p ) const
261 		{
262 			for ( sal_uLong i = 0 ; i < Count() ; i++ )
263 				if ( p == GetObject( i ) )
264 					return i;
265 			return LIST_ENTRY_NOTFOUND;
266 		}*/
267 
268     TokenList( const TokenList& rList );
269 /*		{
270 			for ( sal_uLong i = 0 ; i < rList.Count() ; i++ )
271 			{
272 				Insert( rList.GetObject( i ), LIST_APPEND );
273 			}
274 		}*/
275 };
276 
277 class ParserMessage
278 {
279 	sal_uInt16 nErrorNr;
280 	ByteString aErrorText;
281 	sal_uInt16 nTagBegin,nTagLength;
282 
283 protected:
284     ParserMessage( sal_uInt16 PnErrorNr, ByteString PaErrorText, const TokenInfo &rTag );
285 public:
286 
287 	sal_uInt16 GetErrorNr() { return nErrorNr; }
288 	ByteString GetErrorText() { return aErrorText; }
289 
290 	sal_uInt16 GetTagBegin() { return nTagBegin; }
291 	sal_uInt16 GetTagLength() { return nTagLength; }
292 
293     virtual ~ParserMessage() {}
294     virtual sal_Bool IsError() =0;
295     virtual ByteString Prefix() =0;
296 };
297 
298 class ParserError : public ParserMessage
299 {
300 public:
301     ParserError( sal_uInt16 PnErrorNr, ByteString PaErrorText, const TokenInfo &rTag );
302 
303     virtual sal_Bool IsError() {return sal_True;};
304     virtual ByteString Prefix() {return "Error:"; };
305 };
306 
307 class ParserWarning : public ParserMessage
308 {
309 public:
310     ParserWarning( sal_uInt16 PnErrorNr, ByteString PaErrorText, const TokenInfo &rTag );
311 
312     virtual sal_Bool IsError() {return sal_False;};
313     virtual ByteString Prefix() {return "Warning:"; };
314 };
315 
316 class SimpleParser
317 {
318 private:
319 	sal_uInt16 nPos;
320 	String aSource;
321 	String aLastToken;
322 	TokenList aTokenList;
323 
324     TokenInfo aNextTag;     // to store closetag in case of combined tags like <br/>
325 
326 	String GetNextTokenString( ParserMessageList &rErrorList, sal_uInt16 &rTokeStartPos );
327 
328 public:
329 	SimpleParser();
330 	void Parse( String PaSource );
331 	TokenInfo GetNextToken( ParserMessageList &rErrorList );
332 	static String GetLexem( TokenInfo const &aToken );
333 	TokenList& GetTokenList(){ return aTokenList; }
334 };
335 
336 class TokenParser
337 {
338     sal_Bool match( const TokenInfo &aCurrentToken, const TokenId &aExpectedToken );
339 	sal_Bool match( const TokenInfo &aCurrentToken, const TokenInfo &aExpectedToken );
340 	void ParseError( sal_uInt16 nErrNr, ByteString aErrMsg, const TokenInfo &rTag );
341 	void Paragraph();
342 	void PfCase();
343 	void PfCaseBegin();
344 	void AppCase();
345 	void AppCaseBegin();
346 	void CaseEnd();
347 	void SimpleTag();
348 	void TagPair();
349 	void TagRef();
350 
351 	SimpleParser aParser;
352 	TokenInfo aTag;
353 
354 	TokenId nPfCaseOptions;
355 	TokenId nAppCaseOptions;
356 	sal_Bool bPfCaseActive ,bAppCaseActive;
357 
358 	TokenId nActiveRefTypes;
359 
360 	ParserMessageList *pErrorList;
361 
362 public:
363 	TokenParser();
364 	void Parse( const String &aCode, ParserMessageList* pList );
365 //	ParserMessageList& GetErrors(){ return aErrorList; }
366 //	sal_Bool HasErrors(){ return ( aErrorList.Count() > 0 ); }
367 	TokenList& GetTokenList(){ return aParser.GetTokenList(); }
368 };
369 
370 class LingTest
371 {
372 private:
373 	TokenParser aReferenceParser;
374 	TokenParser aTesteeParser;
375 	ParserMessageList aCompareWarningList;
376 	void CheckTags( TokenList &aReference, TokenList &aTestee, sal_Bool bFixTags );
377     sal_Bool IsTagMandatory( TokenInfo const &aToken, TokenId &aMetaTokens );
378     String aFixedTestee;
379 public:
380 	void CheckReference( GSILine *aReference );
381 	void CheckTestee( GSILine *aTestee, sal_Bool bHasSourceLine, sal_Bool bFixTags );
382 
383 //	ParserMessageList& GetReferenceErrors(){ return aReferenceParser.GetErrors(); }
384 //	sal_Bool HasReferenceErrors(){ return aReferenceParser.HasErrors(); }
385 
386 //	ParserMessageList& GetTesteeErrors(){ return aTesteeParser.GetErrors(); }
387 //	sal_Bool HasTesteeErrors(){ return aTesteeParser.HasErrors(); }
388 
389 	ParserMessageList& GetCompareWarnings(){ return aCompareWarningList; }
390 	sal_Bool HasCompareWarnings(){ return ( aCompareWarningList.Count() > 0 ); }
391 
392     String GetFixedTestee(){ return aFixedTestee; }
393 };
394 
395 #endif
396 
397