1*cdf0e10cSrcweir/************************************************************************* 2*cdf0e10cSrcweir* 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir*************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir#import "OOoContentDataParser.h" 29*cdf0e10cSrcweir 30*cdf0e10cSrcweir@implementation OOoContentDataParser 31*cdf0e10cSrcweir 32*cdf0e10cSrcweir- (id)init 33*cdf0e10cSrcweir{ 34*cdf0e10cSrcweir if ((self = [super init]) != nil) { 35*cdf0e10cSrcweir shouldReadCharacters = NO; 36*cdf0e10cSrcweir textContent = nil; 37*cdf0e10cSrcweir runningTextContent = nil; 38*cdf0e10cSrcweir 39*cdf0e10cSrcweir return self; 40*cdf0e10cSrcweir } 41*cdf0e10cSrcweir 42*cdf0e10cSrcweir return nil; 43*cdf0e10cSrcweir} 44*cdf0e10cSrcweir 45*cdf0e10cSrcweir- (void)parseXML:(NSData*)data intoDictionary:(NSMutableDictionary*)dict 46*cdf0e10cSrcweir{ 47*cdf0e10cSrcweir mdiValues = dict; 48*cdf0e10cSrcweir 49*cdf0e10cSrcweir //NSLog(@"data: %@ %d", data, [data length]); 50*cdf0e10cSrcweir 51*cdf0e10cSrcweir //init parser settings 52*cdf0e10cSrcweir shouldReadCharacters = NO; 53*cdf0e10cSrcweir 54*cdf0e10cSrcweir NSXMLParser *parser = [[NSXMLParser alloc] initWithData:data]; 55*cdf0e10cSrcweir 56*cdf0e10cSrcweir [parser setDelegate:self]; 57*cdf0e10cSrcweir [parser setShouldResolveExternalEntities:NO]; 58*cdf0e10cSrcweir [parser parse]; 59*cdf0e10cSrcweir 60*cdf0e10cSrcweir [parser release]; 61*cdf0e10cSrcweir 62*cdf0e10cSrcweir //NSLog(@"finished"); 63*cdf0e10cSrcweir} 64*cdf0e10cSrcweir 65*cdf0e10cSrcweir- (void)parser:(NSXMLParser *)parser didStartElement:(NSString *)elementName namespaceURI:(NSString *)namespaceURI qualifiedName:(NSString *)qualifiedName attributes:(NSDictionary *)attributeDict 66*cdf0e10cSrcweir{ 67*cdf0e10cSrcweir // all text content is stored inside <text:p> elements 68*cdf0e10cSrcweir if ([elementName isEqualToString:@"text:p"] == YES) { 69*cdf0e10cSrcweir runningTextContent = [NSMutableString new]; 70*cdf0e10cSrcweir shouldReadCharacters = YES; 71*cdf0e10cSrcweir //NSLog(@"start"); 72*cdf0e10cSrcweir } else { 73*cdf0e10cSrcweir return; 74*cdf0e10cSrcweir } 75*cdf0e10cSrcweir 76*cdf0e10cSrcweir //NSLog(@"start element %@", elementName); 77*cdf0e10cSrcweir} 78*cdf0e10cSrcweir 79*cdf0e10cSrcweir- (void)parser:(NSXMLParser *)parser didEndElement:(NSString *)elementName namespaceURI:(NSString *)namespaceURI qualifiedName:(NSString *)qName 80*cdf0e10cSrcweir{ 81*cdf0e10cSrcweir if (shouldReadCharacters == TRUE) { 82*cdf0e10cSrcweir if (textContent == nil) { 83*cdf0e10cSrcweir textContent = [NSMutableString new]; 84*cdf0e10cSrcweir } else if ([runningTextContent isEqualToString:@""] == NO) { 85*cdf0e10cSrcweir // separate by whitespace 86*cdf0e10cSrcweir [textContent appendString:@" "]; 87*cdf0e10cSrcweir } 88*cdf0e10cSrcweir //NSLog(@"end"); 89*cdf0e10cSrcweir 90*cdf0e10cSrcweir [textContent appendString:[NSString stringWithString:runningTextContent]]; 91*cdf0e10cSrcweir [runningTextContent release]; 92*cdf0e10cSrcweir } 93*cdf0e10cSrcweir shouldReadCharacters = NO; 94*cdf0e10cSrcweir} 95*cdf0e10cSrcweir 96*cdf0e10cSrcweir- (void)parser:(NSXMLParser *)parser foundCharacters:(NSString *)string 97*cdf0e10cSrcweir{ 98*cdf0e10cSrcweir if (shouldReadCharacters == NO) { 99*cdf0e10cSrcweir return; 100*cdf0e10cSrcweir } 101*cdf0e10cSrcweir //NSLog(string); 102*cdf0e10cSrcweir 103*cdf0e10cSrcweir [runningTextContent appendString:string]; 104*cdf0e10cSrcweir 105*cdf0e10cSrcweir //NSLog(@"currentElement: %@", currentElement); 106*cdf0e10cSrcweir //NSLog(@"read: %@", string); 107*cdf0e10cSrcweir 108*cdf0e10cSrcweir} 109*cdf0e10cSrcweir 110*cdf0e10cSrcweir- (void)parser:(NSXMLParser *)parser parseErrorOccurred:(NSError *)parseError 111*cdf0e10cSrcweir{ 112*cdf0e10cSrcweir //NSLog(@"parsing finished with error"); 113*cdf0e10cSrcweir NSLog([NSString stringWithFormat:@"An error occured parsing the document. (Error %i, Description: %@, Line: %i, Column: %i)", [parseError code], 114*cdf0e10cSrcweir [[parser parserError] localizedDescription], [parser lineNumber], 115*cdf0e10cSrcweir [parser columnNumber]]); 116*cdf0e10cSrcweir 117*cdf0e10cSrcweir if (runningTextContent != nil) { 118*cdf0e10cSrcweir [runningTextContent release]; 119*cdf0e10cSrcweir } 120*cdf0e10cSrcweir if (textContent != nil) { 121*cdf0e10cSrcweir [textContent release]; 122*cdf0e10cSrcweir } 123*cdf0e10cSrcweir} 124*cdf0e10cSrcweir 125*cdf0e10cSrcweir- (void)parserDidEndDocument:(NSXMLParser *)parser 126*cdf0e10cSrcweir{ 127*cdf0e10cSrcweir if (textContent != nil && [textContent length] > 0) { 128*cdf0e10cSrcweir [mdiValues setObject:[NSString stringWithString:textContent] forKey:(NSString*)kMDItemTextContent]; 129*cdf0e10cSrcweir [textContent release]; 130*cdf0e10cSrcweir } 131*cdf0e10cSrcweir} 132*cdf0e10cSrcweir 133*cdf0e10cSrcweir@end 134