xref: /AOO41X/main/l10ntools/inc/xmlparse.hxx (revision 983d4c8a2545ff349deb9b45349fc5e9e80c6c2f)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #ifndef BOOTSTRP_XMLPARSE_HXX
25 #define BOOTSTRP_XMLPARSE_HXX
26 
27 #include <signal.h>
28 #include <expat.h>
29 #include <rtl/ustring.hxx>
30 #include <rtl/ustrbuf.hxx>
31 #include "tools/string.hxx"
32 #include "tools/list.hxx"
33 #define ENABLE_BYTESTRING_STREAM_OPERATORS
34 #include "tools/stream.hxx"
35 #include "tools/isofallback.hxx"
36 #include "export.hxx"
37 #include "xmlutil.hxx"
38 
39 #include <fstream>
40 #include <iostream>
41 
42 class XMLParentNode;
43 class XMLElement;
44 
45 
46 using namespace ::rtl;
47 using namespace std;
48 
49 #include <hash_map> /* std::hashmap*/
50 #include <deque>    /* std::deque*/
51 #include <iterator> /* std::iterator*/
52 #include <list>     /* std::list*/
53 #include <vector>   /* std::vector*/
54 #define XML_NODE_TYPE_FILE          0x001
55 #define XML_NODE_TYPE_ELEMENT       0x002
56 #define XML_NODE_TYPE_DATA          0x003
57 #define XML_NODE_TYPE_COMMENT       0x004
58 #define XML_NODE_TYPE_DEFAULT       0x005
59 #define MAX_LANGUAGES               99
60 
61 
62 //#define TESTDRIVER        /* use xml2gsi testclass */
63 //-------------------------------------------------------------------------
64 
65 /** Holds data of Attributes
66  */
67 class XMLAttribute : public String
68 {
69 private:
70     String sValue;
71 
72 public:
73     /// creates an attribute
XMLAttribute(const String & rName,const String & rValue)74     XMLAttribute(
75         const String &rName,    // attributes name
76         const String &rValue    // attributes data
77     )
78                 : String( rName ), sValue( rValue ) {}
79 
80     /// getting value of an attribue
GetValue()81     const String &GetValue() { return sValue; }
82 
setValue(const String & rValue)83     void setValue(const String &rValue){sValue=rValue;}
84 
85     /// returns true if two attributes are equal and have the same value
IsEqual(const XMLAttribute & rAttribute)86     sal_Bool IsEqual(
87         const XMLAttribute &rAttribute  // the attribute which has to be equal
88     )
89     {
90         return (( rAttribute == *this ) && ( rAttribute.sValue == sValue ));
91     }
92 };
93 
94 DECLARE_LIST( XMLAttributeList, XMLAttribute * )
95 
96 //-------------------------------------------------------------------------
97 
98 /** Virtual base to handle different kinds of XML nodes
99  */
100 class XMLNode
101 {
102 protected:
XMLNode()103     XMLNode() {}
104 
105 public:
106     virtual sal_uInt16 GetNodeType() = 0;
~XMLNode()107     virtual ~XMLNode() {}
108 };
109 
110 //-------------------------------------------------------------------------
111 
112 /** Virtual base to handle different kinds of child nodes
113  */
114 class XMLChildNode : public XMLNode
115 {
116 private:
117     XMLParentNode *pParent;
118 
119 protected:
120     XMLChildNode( XMLParentNode *pPar );
XMLChildNode()121     XMLChildNode():pParent( NULL ){};
122     XMLChildNode( const XMLChildNode& obj);
123     XMLChildNode& operator=(const XMLChildNode& obj);
124 public:
125     virtual sal_uInt16 GetNodeType() = 0;
126 
127     /// returns the parent of this node
GetParent()128     XMLParentNode *GetParent() { return pParent; }
~XMLChildNode()129     virtual ~XMLChildNode(){};
130 };
131 
132 DECLARE_LIST( XMLChildNodeList, XMLChildNode * )
133 
134 //-------------------------------------------------------------------------
135 
136 /** Virtual base to handle different kinds of parent nodes
137  */
138 class XMLData;
139 
140 class XMLParentNode : public XMLChildNode
141 {
142 private:
143     XMLChildNodeList *pChildList;
144     static int dbgcnt;
145     //int         nParentPos;
146 protected:
XMLParentNode(XMLParentNode * pPar)147     XMLParentNode( XMLParentNode *pPar )
148                 : XMLChildNode( pPar ), pChildList( NULL )
149               {
150               }
XMLParentNode()151     XMLParentNode(): pChildList(NULL){
152     }
153     /// Copyconstructor
154     XMLParentNode( const XMLParentNode& );
155 
156     XMLParentNode& operator=(const XMLParentNode& obj);
157     virtual ~XMLParentNode();
158 
159 
160 public:
161     virtual sal_uInt16 GetNodeType() = 0;
162 
163     /// returns child list of this node
GetChildList()164     XMLChildNodeList *GetChildList() { return pChildList; }
165 
166     /// adds a new child
167     void AddChild(
168         XMLChildNode *pChild    /// the new child
169     );
170 
171     void AddChild(
172         XMLChildNode *pChild , int pos  /// the new child
173     );
174 
175     virtual int GetPosition( ByteString id );
176     int RemoveChild( XMLElement *pRefElement );
177     void RemoveAndDeleteAllChilds();
178 
179     /// returns a child element which matches the given one
180     XMLElement *GetChildElement(
181         XMLElement *pRefElement // the reference elelement
182     );
183 };
184 
185 //-------------------------------------------------------------------------
186 
187 DECLARE_LIST( XMLStringList, XMLElement* )
188 
189 /// Mapping numeric Language code <-> XML Element
190 typedef std::hash_map< ByteString ,XMLElement* , hashByteString,equalByteString > LangHashMap;
191 
192 /// Mapping XML Element string identifier <-> Language Map
193 typedef std::hash_map<ByteString , LangHashMap* ,
194                       hashByteString,equalByteString>                   XMLHashMap;
195 
196 /// Mapping iso alpha string code <-> iso numeric code
197 typedef std::hash_map<ByteString, int, hashByteString,equalByteString>  HashMap;
198 
199 /// Mapping XML tag names <-> have localizable strings
200 typedef std::hash_map<ByteString , sal_Bool ,
201                       hashByteString,equalByteString>                   TagMap;
202 
203 /** Holds information of a XML file, is root node of tree
204  */
205 
206 
207 class XMLFile : public XMLParentNode
208 {
209 public:
210     XMLFile() ;
211     XMLFile(
212                 const String &rFileName // the file name, empty if created from memory stream
213     );
214     XMLFile( const XMLFile& obj ) ;
215     ~XMLFile();
216 
217     ByteString* GetGroupID(std::deque<ByteString> &groupid);
218     void        Print( XMLNode *pCur = NULL, sal_uInt16 nLevel = 0 );
219     virtual void SearchL10NElements( XMLParentNode *pCur, int pos = 0 );
220     void        Extract( XMLFile *pCur = NULL );
221     void        View();
222 //  void static Signal_handler(int signo);//void*,oslSignalInfo * pInfo);
223     void        showType(XMLParentNode* node);
224 
GetStrings()225     XMLHashMap* GetStrings(){return XMLStrings;}
226     sal_Bool        Write( ByteString &rFilename );
227     sal_Bool        Write( ofstream &rStream , XMLNode *pCur = NULL );
228 
229     bool        CheckExportStatus( XMLParentNode *pCur = NULL );// , int pos = 0 );
230 
231     XMLFile&    operator=(const XMLFile& obj);
232 
233     virtual sal_uInt16  GetNodeType();
234 
235     /// returns file name
GetName()236     const String &GetName() { return sFileName; }
SetName(const String & rFilename)237     void          SetName( const String &rFilename ) { sFileName = rFilename; }
SetFullName(const String & rFullFilename)238     void          SetFullName( const String &rFullFilename ) { sFullName = rFullFilename; }
getOrder()239     const std::vector<ByteString> getOrder(){ return order; }
240 
241 protected:
242     // writes a string as UTF8 with dos line ends to a given stream
243     void        WriteString( ofstream &rStream, const String &sString );
244 
245     // quotes the given text for writing to a file
246     void        QuotHTML( String &rString );
247 
248     void        InsertL10NElement( XMLElement* pElement);
249 
250     // DATA
251     String      sFileName;
252     String      sFullName;
253 
254     const ByteString ID,OLDREF,XML_LANG;
255 
256     TagMap      nodes_localize;
257     XMLHashMap* XMLStrings;
258 
259     std::vector <ByteString> order;
260 };
261 
262 /// An Utility class for XML
263 /// See RFC 3066 / #i8252# for ISO codes
264 class XMLUtil{
265 
266 public:
267     /// Quot the XML characters and replace \n \t
268     static void         QuotHTML( String &rString );
269 
270     /// UnQuot the XML characters and restore \n \t
271     static void         UnQuotHTML  ( String &rString );
272 
273     /// Return the numeric iso language code
274     //sal_uInt16                GetLangByIsoLang( const ByteString &rIsoLang );
275 
276     /// Return the alpha strings representation
277     ByteString          GetIsoLangByIndex( sal_uInt16 nIndex );
278 
279     static XMLUtil&     Instance();
280     ~XMLUtil();
281 
282     void         dump();
283 
284 private:
285     /// Mapping iso alpha string code <-> iso numeric code
286     HashMap      lMap;
287 
288     /// Mapping iso numeric code      <-> iso alpha string code
289     ByteString   isoArray[MAX_LANGUAGES];
290 
291     static void UnQuotData( String &rString );
292     static void UnQuotTags( String &rString );
293 
294     XMLUtil();
295     XMLUtil(const XMLUtil&);
296 
297 };
298 
299 
300 
301 //-------------------------------------------------------------------------
302 
303 /** Hold information of an element node
304  */
305 class XMLElement : public XMLParentNode
306 {
307 private:
308     String sElementName;
309     XMLAttributeList *pAttributes;
310     ByteString   project,
311                  filename,
312                  id,
313                  sOldRef,
314                  resourceType,
315                  languageId;
316     int          nPos;
317 
318 protected:
319     void Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement);
320 public:
321     /// create a element node
XMLElement()322     XMLElement(){}
XMLElement(const String & rName,XMLParentNode * Parent)323     XMLElement(
324         const String &rName,    // the element name
325         XMLParentNode *Parent   // parent node of this element
326     ):          XMLParentNode( Parent ),
327                 sElementName( rName ),
328                 pAttributes( NULL ),
329                 project(""),
330                 filename(""),
331                 id(""),
332                 sOldRef(""),
333                 resourceType(""),
334                 languageId(""),
335                 nPos(0)
336                 {
337                 }
338     ~XMLElement();
339     XMLElement(const XMLElement&);
340 
341     XMLElement& operator=(const XMLElement& obj);
342     /// returns node type XML_NODE_ELEMENT
343     virtual sal_uInt16 GetNodeType();
344 
345     /// returns element name
GetName()346     const String &GetName() { return sElementName; }
347 
348     /// returns list of attributes of this element
GetAttributeList()349     XMLAttributeList *GetAttributeList() { return pAttributes; }
350 
351     /// adds a new attribute to this element, typically used by parser
352     void AddAttribute( const String &rAttribute, const String &rValue );
353 
354     void ChangeLanguageTag( const String &rValue );
355     // Return a ASCII String representation of this object
356     OString ToOString();
357 
358     // Return a Unicode String representation of this object
359     OUString ToOUString();
360 
361     bool    Equals(OUString refStr);
362 
363     /// returns a attribute
364     XMLAttribute *GetAttribute(
365         const String &rName // the attribute name
366     );
SetProject(ByteString prj)367     void SetProject         ( ByteString prj        ){ project = prj;        }
SetFileName(ByteString fn)368     void SetFileName        ( ByteString fn         ){ filename = fn;        }
SetId(ByteString theId)369     void SetId              ( ByteString theId      ){ id = theId;           }
SetResourceType(ByteString rt)370     void SetResourceType    ( ByteString rt         ){ resourceType = rt;    }
SetLanguageId(ByteString lid)371     void SetLanguageId      ( ByteString lid        ){ languageId = lid;     }
SetPos(int nPos_in)372     void SetPos             ( int nPos_in           ){ nPos = nPos_in;       }
SetOldRef(ByteString sOldRef_in)373     void SetOldRef          ( ByteString sOldRef_in ){ sOldRef = sOldRef_in; }
374 
GetPos()375     virtual int        GetPos()         { return nPos;         }
GetProject()376     ByteString GetProject()     { return project;      }
GetFileName()377     ByteString GetFileName()    { return filename;     }
GetId()378     ByteString GetId()          { return id;           }
GetOldref()379     ByteString GetOldref()      { return sOldRef;      }
GetResourceType()380     ByteString GetResourceType(){ return resourceType; }
GetLanguageId()381     ByteString GetLanguageId()  { return languageId;   }
382 
383 
384 };
385 //-------------------------------------------------------------------------
386 
387 
388 /** Holds character data
389  */
390 class XMLData : public XMLChildNode
391 {
392 private:
393     String sData;
394     bool   isNewCreated;
395 
396 public:
397     /// create a data node
XMLData(const String & rData,XMLParentNode * Parent)398     XMLData(
399         const String &rData,    // the initial data
400         XMLParentNode *Parent   // the parent node of this data, typically a element node
401     )
402                 : XMLChildNode( Parent ), sData( rData ) , isNewCreated ( false ){}
XMLData(const String & rData,XMLParentNode * Parent,bool newCreated)403     XMLData(
404         const String &rData,    // the initial data
405         XMLParentNode *Parent,  // the parent node of this data, typically a element node
406         bool newCreated
407     )
408                 : XMLChildNode( Parent ), sData( rData ) , isNewCreated ( newCreated ){}
409 
410     XMLData(const XMLData& obj);
411 
412     XMLData& operator=(const XMLData& obj);
413     virtual sal_uInt16 GetNodeType();
414 
415     /// returns the data
GetData()416     const String &GetData() { return sData; }
417 
isNew()418     bool isNew() { return isNewCreated; }
419     /// adds new character data to the existing one
420     void AddData(
421         const String &rData // the new data
422     );
423 
424 
425 
426 };
427 
428 //-------------------------------------------------------------------------
429 
430 /** Holds comments
431  */
432 class XMLComment : public XMLChildNode
433 {
434 private:
435     String sComment;
436 
437 public:
438     /// create a comment node
XMLComment(const String & rComment,XMLParentNode * Parent)439     XMLComment(
440         const String &rComment, // the comment
441         XMLParentNode *Parent   // the parent node of this comemnt, typically a element node
442     )
443                 : XMLChildNode( Parent ), sComment( rComment ) {}
444 
445     virtual sal_uInt16 GetNodeType();
446 
447     XMLComment( const XMLComment& obj );
448 
449     XMLComment& operator=(const XMLComment& obj);
450 
451     /// returns the comment
GetComment()452     const String &GetComment()  { return sComment; }
453 };
454 
455 //-------------------------------------------------------------------------
456 
457 /** Holds additional file content like those for which no handler exists
458  */
459 class XMLDefault : public XMLChildNode
460 {
461 private:
462     String sDefault;
463 
464 public:
465     /// create a comment node
XMLDefault(const String & rDefault,XMLParentNode * Parent)466     XMLDefault(
467         const String &rDefault, // the comment
468         XMLParentNode *Parent   // the parent node of this comemnt, typically a element node
469     )
470                 : XMLChildNode( Parent ), sDefault( rDefault ) {}
471 
472     XMLDefault(const XMLDefault& obj);
473 
474     XMLDefault& operator=(const XMLDefault& obj);
475 
476     /// returns node type XML_NODE_TYPE_COMMENT
477     virtual sal_uInt16 GetNodeType();
478 
479     /// returns the comment
GetDefault()480     const String &GetDefault()  { return sDefault; }
481 };
482 
483 //-------------------------------------------------------------------------
484 
485 /** struct for error information, used by class SimpleXMLParser
486  */
487 struct XMLError {
488     XML_Error eCode;    // the error code
489     sal_uLong nLine;        // error line number
490     sal_uLong nColumn;      // error column number
491     String sMessage;    // readable error message
492 };
493 
494 //-------------------------------------------------------------------------
495 
496 /** validating xml parser, creates a document tree with xml nodes
497  */
498 
499 
500 class SimpleXMLParser
501 {
502 private:
503     XML_Parser aParser;
504     XMLError aErrorInformation;
505 
506     XMLFile *pXMLFile;
507     XMLParentNode *pCurNode;
508     XMLData *pCurData;
509 
510 
511     static void StartElementHandler( void *userData, const XML_Char *name, const XML_Char **atts );
512     static void EndElementHandler( void *userData, const XML_Char *name );
513     static void CharacterDataHandler( void *userData, const XML_Char *s, int len );
514     static void CommentHandler( void *userData, const XML_Char *data );
515     static void DefaultHandler( void *userData, const XML_Char *s, int len );
516 
517 
518     void StartElement( const XML_Char *name, const XML_Char **atts );
519     void EndElement( const XML_Char *name );
520     void CharacterData( const XML_Char *s, int len );
521     void Comment( const XML_Char *data );
522     void Default( const XML_Char *s, int len );
523 
524 
525 public:
526     /// creates a new parser
527     SimpleXMLParser();
528     ~SimpleXMLParser();
529 
530     /// parse a file, returns NULL on criticall errors
531     XMLFile *Execute(
532         const String &rFullFileName,
533         const String &rFileName,    // the file name
534         XMLFile *pXMLFileIn         // the XMLFile
535     );
536 
537     /// parse a memory stream, returns NULL on criticall errors
538     XMLFile *Execute(
539         SvMemoryStream *pStream // the stream
540     );
541 
542     /// returns an error struct
GetError()543     const XMLError &GetError() { return aErrorInformation; }
544 };
545 
546 #endif
547