1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_shell.hxx" 26 27 #ifndef XML_PARSER_HXX_INCLUDED 28 #include "internal/xml_parser.hxx" 29 #endif 30 #include "internal/i_xml_parser_event_handler.hxx" 31 32 #include <assert.h> 33 34 namespace /* private */ 35 { 36 37 //###################################################### 38 /* Extracts the local part of tag without 39 namespace decoration e.g. meta:creator -> creator */ 40 const XML_Char COLON = (XML_Char)':'; 41 42 const XML_Char* get_local_name(const XML_Char* rawname) 43 { 44 const XML_Char* p = rawname; 45 46 // go to the end 47 while (*p) p++; 48 49 // go back until the first ':' 50 while (*p != COLON && p > rawname) 51 p--; 52 53 // if we are on a colon one step forward 54 if (*p == COLON) 55 p++; 56 57 return p; 58 } 59 60 //################################################ 61 inline xml_parser* get_parser_instance(void* data) 62 { 63 return reinterpret_cast<xml_parser*>(XML_GetUserData( 64 reinterpret_cast<XML_Parser>(data))); 65 } 66 67 //################################################ 68 bool has_only_whitespaces(const XML_Char* s, int len) 69 { 70 const XML_Char* p = s; 71 for (int i = 0; i < len; i++) 72 if (*p++ != ' ') return false; 73 return true; 74 } 75 } 76 77 //################################################### 78 xml_parser::xml_parser(const XML_Char* EncodingName) : 79 document_handler_(0), 80 xml_parser_(XML_ParserCreate(EncodingName)) 81 { 82 init(); 83 } 84 85 //################################################### 86 xml_parser::~xml_parser() 87 { 88 XML_ParserFree(xml_parser_); 89 } 90 91 //################################################### 92 /* Callback functions will be called by the parser on 93 different events */ 94 95 //################################################### 96 extern "C" 97 { 98 99 static void xml_start_element_handler(void* UserData, const XML_Char* name, const XML_Char** atts) 100 { 101 assert(UserData != NULL); 102 103 xml_parser* pImpl = get_parser_instance(UserData); 104 105 i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 106 if (pDocHdl) 107 { 108 xml_tag_attribute_container_t attributes; 109 110 int i = 0; 111 112 while(atts[i]) 113 { 114 attributes[reinterpret_cast<const char_t*>(get_local_name(atts[i]))] = reinterpret_cast<const char_t*>(atts[i+1]); 115 i += 2; // skip to next pair 116 } 117 118 pDocHdl->start_element( 119 reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name)), attributes); 120 } 121 } 122 123 //################################################### 124 static void xml_end_element_handler(void* UserData, const XML_Char* name) 125 { 126 assert(UserData); 127 128 xml_parser* pImpl = get_parser_instance(UserData); 129 i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 130 if (pDocHdl) 131 pDocHdl->end_element(reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name))); 132 } 133 134 //################################################### 135 static void xml_character_data_handler(void* UserData, const XML_Char* s, int len) 136 { 137 assert(UserData); 138 139 xml_parser* pImpl = get_parser_instance(UserData); 140 i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 141 if (pDocHdl) 142 { 143 if (has_only_whitespaces(s,len)) 144 pDocHdl->ignore_whitespace(string_t(reinterpret_cast<const char_t*>(s), len)); 145 else 146 pDocHdl->characters(string_t(reinterpret_cast<const char_t*>(s), len)); 147 } 148 } 149 150 //################################################### 151 static void xml_comment_handler(void* UserData, const XML_Char* Data) 152 { 153 assert(UserData); 154 155 xml_parser* pImpl = get_parser_instance(UserData); 156 i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 157 if (pDocHdl) 158 pDocHdl->comment(reinterpret_cast<const char_t*>(Data)); 159 } 160 161 } // extern "C" 162 163 //################################################### 164 void xml_parser::init() 165 { 166 XML_SetUserData(xml_parser_, this); 167 168 // we use the parser as handler argument, 169 // so we could use it if necessary, the 170 // UserData are usable anyway using 171 // XML_GetUserData(...) 172 XML_UseParserAsHandlerArg(xml_parser_); 173 174 XML_SetElementHandler( 175 xml_parser_, 176 xml_start_element_handler, 177 xml_end_element_handler); 178 179 XML_SetCharacterDataHandler( 180 xml_parser_, 181 xml_character_data_handler); 182 183 XML_SetCommentHandler( 184 xml_parser_, 185 xml_comment_handler); 186 } 187 188 //################################################### 189 void xml_parser::parse(const char* XmlData, size_t Length, bool IsFinal) 190 { 191 if (0 == XML_Parse(xml_parser_, XmlData, Length, IsFinal)) 192 throw xml_parser_exception( 193 (char*)XML_ErrorString(XML_GetErrorCode(xml_parser_)), 194 (int)XML_GetErrorCode(xml_parser_), 195 XML_GetCurrentLineNumber(xml_parser_), 196 XML_GetCurrentColumnNumber(xml_parser_), 197 XML_GetCurrentByteIndex(xml_parser_)); 198 } 199 200 //################################################### 201 void xml_parser::set_document_handler( 202 i_xml_parser_event_handler* event_handler) 203 { 204 document_handler_ = event_handler; 205 } 206 207 //################################################### 208 i_xml_parser_event_handler* xml_parser::get_document_handler() const 209 { 210 return document_handler_; 211 } 212