xref: /AOO41X/main/extensions/source/macosx/spotlight/OOoContentDataParser.m (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1*cdf0e10cSrcweir/*************************************************************************
2*cdf0e10cSrcweir*
3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir *
5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir *
7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir *
9*cdf0e10cSrcweir * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir *
11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir *
15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir *
21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir *
26*cdf0e10cSrcweir*************************************************************************/
27*cdf0e10cSrcweir
28*cdf0e10cSrcweir#import "OOoContentDataParser.h"
29*cdf0e10cSrcweir
30*cdf0e10cSrcweir@implementation OOoContentDataParser
31*cdf0e10cSrcweir
32*cdf0e10cSrcweir- (id)init
33*cdf0e10cSrcweir{
34*cdf0e10cSrcweir    if ((self = [super init]) != nil) {
35*cdf0e10cSrcweir        shouldReadCharacters = NO;
36*cdf0e10cSrcweir        textContent = nil;
37*cdf0e10cSrcweir        runningTextContent = nil;
38*cdf0e10cSrcweir
39*cdf0e10cSrcweir        return self;
40*cdf0e10cSrcweir    }
41*cdf0e10cSrcweir
42*cdf0e10cSrcweir    return nil;
43*cdf0e10cSrcweir}
44*cdf0e10cSrcweir
45*cdf0e10cSrcweir- (void)parseXML:(NSData*)data intoDictionary:(NSMutableDictionary*)dict
46*cdf0e10cSrcweir{
47*cdf0e10cSrcweir    mdiValues = dict;
48*cdf0e10cSrcweir
49*cdf0e10cSrcweir    //NSLog(@"data: %@ %d", data, [data length]);
50*cdf0e10cSrcweir
51*cdf0e10cSrcweir    //init parser settings
52*cdf0e10cSrcweir    shouldReadCharacters = NO;
53*cdf0e10cSrcweir
54*cdf0e10cSrcweir    NSXMLParser *parser = [[NSXMLParser alloc] initWithData:data];
55*cdf0e10cSrcweir
56*cdf0e10cSrcweir    [parser setDelegate:self];
57*cdf0e10cSrcweir    [parser setShouldResolveExternalEntities:NO];
58*cdf0e10cSrcweir    [parser parse];
59*cdf0e10cSrcweir
60*cdf0e10cSrcweir    [parser release];
61*cdf0e10cSrcweir
62*cdf0e10cSrcweir    //NSLog(@"finished");
63*cdf0e10cSrcweir}
64*cdf0e10cSrcweir
65*cdf0e10cSrcweir- (void)parser:(NSXMLParser *)parser didStartElement:(NSString *)elementName namespaceURI:(NSString *)namespaceURI qualifiedName:(NSString *)qualifiedName attributes:(NSDictionary *)attributeDict
66*cdf0e10cSrcweir{
67*cdf0e10cSrcweir    // all text content is stored inside <text:p> elements
68*cdf0e10cSrcweir    if ([elementName isEqualToString:@"text:p"] == YES) {
69*cdf0e10cSrcweir        runningTextContent = [NSMutableString new];
70*cdf0e10cSrcweir        shouldReadCharacters = YES;
71*cdf0e10cSrcweir        //NSLog(@"start");
72*cdf0e10cSrcweir    } else {
73*cdf0e10cSrcweir        return;
74*cdf0e10cSrcweir    }
75*cdf0e10cSrcweir
76*cdf0e10cSrcweir    //NSLog(@"start element %@", elementName);
77*cdf0e10cSrcweir}
78*cdf0e10cSrcweir
79*cdf0e10cSrcweir- (void)parser:(NSXMLParser *)parser didEndElement:(NSString *)elementName namespaceURI:(NSString *)namespaceURI qualifiedName:(NSString *)qName
80*cdf0e10cSrcweir{
81*cdf0e10cSrcweir    if (shouldReadCharacters == TRUE) {
82*cdf0e10cSrcweir        if (textContent == nil) {
83*cdf0e10cSrcweir            textContent = [NSMutableString new];
84*cdf0e10cSrcweir        } else if ([runningTextContent isEqualToString:@""] == NO) {
85*cdf0e10cSrcweir            // separate by whitespace
86*cdf0e10cSrcweir            [textContent appendString:@" "];
87*cdf0e10cSrcweir        }
88*cdf0e10cSrcweir        //NSLog(@"end");
89*cdf0e10cSrcweir
90*cdf0e10cSrcweir        [textContent appendString:[NSString stringWithString:runningTextContent]];
91*cdf0e10cSrcweir        [runningTextContent release];
92*cdf0e10cSrcweir    }
93*cdf0e10cSrcweir    shouldReadCharacters = NO;
94*cdf0e10cSrcweir}
95*cdf0e10cSrcweir
96*cdf0e10cSrcweir- (void)parser:(NSXMLParser *)parser foundCharacters:(NSString *)string
97*cdf0e10cSrcweir{
98*cdf0e10cSrcweir    if (shouldReadCharacters == NO) {
99*cdf0e10cSrcweir        return;
100*cdf0e10cSrcweir    }
101*cdf0e10cSrcweir    //NSLog(string);
102*cdf0e10cSrcweir
103*cdf0e10cSrcweir    [runningTextContent appendString:string];
104*cdf0e10cSrcweir
105*cdf0e10cSrcweir    //NSLog(@"currentElement: %@", currentElement);
106*cdf0e10cSrcweir    //NSLog(@"read: %@", string);
107*cdf0e10cSrcweir
108*cdf0e10cSrcweir}
109*cdf0e10cSrcweir
110*cdf0e10cSrcweir- (void)parser:(NSXMLParser *)parser parseErrorOccurred:(NSError *)parseError
111*cdf0e10cSrcweir{
112*cdf0e10cSrcweir    //NSLog(@"parsing finished with error");
113*cdf0e10cSrcweir    NSLog([NSString stringWithFormat:@"An error occured parsing the document. (Error %i, Description: %@, Line: %i, Column: %i)", [parseError code],
114*cdf0e10cSrcweir        [[parser parserError] localizedDescription], [parser lineNumber],
115*cdf0e10cSrcweir        [parser columnNumber]]);
116*cdf0e10cSrcweir
117*cdf0e10cSrcweir    if (runningTextContent != nil) {
118*cdf0e10cSrcweir        [runningTextContent release];
119*cdf0e10cSrcweir    }
120*cdf0e10cSrcweir    if (textContent != nil) {
121*cdf0e10cSrcweir        [textContent release];
122*cdf0e10cSrcweir    }
123*cdf0e10cSrcweir}
124*cdf0e10cSrcweir
125*cdf0e10cSrcweir- (void)parserDidEndDocument:(NSXMLParser *)parser
126*cdf0e10cSrcweir{
127*cdf0e10cSrcweir    if (textContent != nil && [textContent length] > 0) {
128*cdf0e10cSrcweir        [mdiValues setObject:[NSString stringWithString:textContent] forKey:(NSString*)kMDItemTextContent];
129*cdf0e10cSrcweir        [textContent release];
130*cdf0e10cSrcweir    }
131*cdf0e10cSrcweir}
132*cdf0e10cSrcweir
133*cdf0e10cSrcweir@end
134