xref: /AOO41X/main/xmerge/source/pexcel/java/org/openoffice/xmerge/converter/xml/sxc/pexcel/records/formula/TokenDecoder.java (revision 04ea5bd4910fe6337fe7d7c799027ca781f77c68)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.converter.xml.sxc.pexcel.records.formula;
25 
26 import java.io.*;
27 import java.util.Vector;
28 import java.util.Enumeration;
29 
30 import org.openoffice.xmerge.util.Debug;
31 import org.openoffice.xmerge.util.EndianConverter;
32 import org.openoffice.xmerge.converter.xml.sxc.pexcel.records.DefinedName;
33 import org.openoffice.xmerge.converter.xml.sxc.pexcel.records.Workbook;
34 
35 /**
36  * The TokenDecoder decodes a byte[] to an equivalent <code>String</code>. The only
37  * public method apart from the default constructor is the getTokenVector method.
38  * This method takes an entire formula as a pexcel byte[] and decodes it into
39  * a series of <code>Token</code>s. It adds these to a <code>Vector</code> which
40  * is returned once all the tokens have been decoded. The Decoder supports
41  * the following tokens.<br><br>
42  *
43  * Operands     Floating point's, Cell references (absolute and relative),
44  *              cell ranges<br>
45  * Operators    +,-,*,/,&lt;,&gt;.&lt;=,&gt;=,&lt;&gt;<br>
46  * Functions    All pexcel fixed and varaible argument functions
47  *
48  */
49 public class TokenDecoder {
50 
51     private TokenFactory tf;
52     private FunctionLookup fl;
53     private OperatorLookup operatorLookup;
54     private OperandLookup operandLookup;
55     private Workbook wb;
56 
57     /**
58      * Default Constructor initializes the <code>TokenFactory</code> for generating
59      * <code>Token</code> and the <code>SymbolLookup</code> for generating
60      * Strings from hex values.
61      */
TokenDecoder()62     public TokenDecoder() {
63         tf = new TokenFactory();
64         fl = new FunctionLookup();
65         operatorLookup = new OperatorLookup();
66         operandLookup = new OperandLookup();
67     }
68 
69     /**
70      * Sets global workbook data needed for defined names
71      */
setWorkbook(Workbook wb)72     public void setWorkbook(Workbook wb) {
73 
74         Debug.log(Debug.TRACE, "TokenDecoder : setWorkbook");
75         this.wb = wb;
76     }
77 
78     /**
79      * Returns a <code>Vector</code> of <code>Token</code> decoded from a
80      * byte[]. The byte[] is first converted to a
81      * <code>ByteArrayInputStream</code> as this is the easiest way of reading
82      * bytes.
83      *
84      * @param formula A Pocket Excel Formula byte[]
85      * @return A <code>Vector</code> of deoded <code>Token</code>
86      */
getTokenVector(byte[] formula)87     public Vector getTokenVector(byte[] formula) {
88 
89         Vector v = new Vector();
90 
91         ByteArrayInputStream bis = new ByteArrayInputStream(formula);
92         int b = 0 ;
93         Token t;
94 
95         while ((b = bis.read())!=-1)
96         {
97 
98 
99             switch (b) {
100 
101                 case TokenConstants.TAREA3D:
102                                 Debug.log(Debug.TRACE, "Decoded 3D Area Cell Reference: ");
103                                 v.add(read3DCellAreaRefToken(bis));
104                                 Debug.log(Debug.TRACE, "Decoded 3D Area Cell Reference: " + v.lastElement());
105                                 break;
106                 case TokenConstants.TREF3D:
107                                 Debug.log(Debug.TRACE, "Decoded 3D Cell Reference: ");
108                                 v.add(read3DCellRefToken(bis));
109                                 Debug.log(Debug.TRACE, "Decoded 3D Cell Reference: " + v.lastElement());
110                                 break;
111                 case TokenConstants.TREF :
112                                 v.add(readCellRefToken(bis));
113                                 Debug.log(Debug.TRACE, "Decoded Cell Reference: " + v.lastElement());
114                                 break;
115                 case TokenConstants.TAREA :
116                                 v.add(readCellAreaRefToken(bis));
117                                 Debug.log(Debug.TRACE, "Decoded Cell Area Reference: " + v.lastElement());
118                                 break;
119                 case TokenConstants.TNUM :
120                                 v.add(readNumToken(bis));
121                                 Debug.log(Debug.TRACE, "Decoded number : " + v.lastElement());
122                                 break;
123                 case TokenConstants.TFUNCVAR :
124                                 v.add(readFunctionVarToken(bis));
125                                 Debug.log(Debug.TRACE, "Decoded variable argument function: " + v.lastElement());
126                                 break;
127                 case TokenConstants.TFUNC :
128                                 v.add(readFunctionToken(bis));
129                                 Debug.log(Debug.TRACE, "Decoded function: " + v.lastElement());
130                                 break;
131                 case TokenConstants.TSTRING :
132                                 v.add(readStringToken(bis));
133                                 Debug.log(Debug.TRACE, "Decoded string: " + v.lastElement());
134                                 break;
135                 case TokenConstants.TNAME :
136                                 v.add(readNameToken(bis));
137                                 Debug.log(Debug.TRACE, "Decoded defined name: " + v.lastElement());
138                                 break;
139                 case TokenConstants.TUPLUS:
140                 case TokenConstants.TUMINUS:
141                 case TokenConstants.TPERCENT:
142                                 v.add(readOperatorToken(b, 1));
143                                 Debug.log(Debug.TRACE, "Decoded Unary operator : " + v.lastElement());
144                                 break;
145                 case TokenConstants.TADD :
146                 case TokenConstants.TSUB :
147                 case TokenConstants.TMUL :
148                 case TokenConstants.TDIV :
149                 case TokenConstants.TLESS :
150                 case TokenConstants.TLESSEQUALS :
151                 case TokenConstants.TEQUALS :
152                 case TokenConstants.TGTEQUALS :
153                 case TokenConstants.TGREATER :
154                 case TokenConstants.TNEQUALS :
155                                 v.add(readOperatorToken(b, 2));
156                                 Debug.log(Debug.TRACE, "Decoded Binary operator : " + v.lastElement());
157                                 break;
158 
159                 default :
160                                 Debug.log(Debug.TRACE, "Unrecognized byte : " + b);
161             }
162         }
163         return v;
164     }
165 
166     /**
167      * Converts a zero based integer to a char (eg. a=0, b=1).
168      * It assumes the integer is less than 26.
169      *
170      * @param i A 0 based index
171      * @return The equivalent character
172      */
int2Char(int i)173     private char int2Char(int i) {
174         return (char) ('A' + i);
175     }
176 
177     /**
178      * Reads a Cell Reference token from the <code>ByteArrayInputStream</code>
179      *
180      * @param bis The <code>ByteArrayInputStream</code> from which we read the
181      * bytes.
182      * @return The decoded String <code>Token</code>
183      */
readStringToken(ByteArrayInputStream bis)184     private Token readStringToken(ByteArrayInputStream bis) {
185 
186         int len = ((int)bis.read())*2;
187         int options = (int)bis.read();
188         Debug.log(Debug.TRACE,"String length is " + len + " and Options Flag is " + options);
189         byte [] stringBytes = new byte[len];
190         int numRead =0;
191         if ((numRead = bis.read(stringBytes, 0, len)) != len) {
192             Debug.log(Debug.TRACE,"Expected " + len + " bytes. Could only read " + numRead + " bytes.");
193             //throw new IOException("Expected " + len + " bytes. Could only read " + numRead + " bytes.");
194         }
195         StringBuffer outputString = new StringBuffer();
196         outputString.append('"');
197         try {
198             Debug.log(Debug.TRACE,"Using LE encoding");
199             outputString.append(new String(stringBytes, "UTF-16LE"));
200         } catch (IOException eIO) {
201             outputString.append(new String(stringBytes)); //fall back to default encoding
202         }
203         outputString.append('"');
204 
205         return (tf.getOperandToken(outputString.toString(), "STRING"));
206     }
207 
208     /**
209      * Reads a Defined Name  token from the <code>ByteArrayInputStream</code>
210      *
211      * @param bis The <code>ByteArrayInputStream</code> from which we read the
212      * bytes.
213      * @return The decoded Name <code>Token</code>
214      */
readNameToken(ByteArrayInputStream bis)215     private Token readNameToken(ByteArrayInputStream bis) {
216         byte buffer[] = new byte[2];
217         buffer[0] = (byte) bis.read();
218         buffer[1] = (byte) bis.read();
219         int nameIndex = EndianConverter.readShort(buffer);
220         bis.skip(12);       // the next 12 bytes are unused
221         Enumeration e = wb.getDefinedNames();
222         int i = 1;
223         while(i<nameIndex) {
224             e.nextElement();
225             i++;
226         }
227         Debug.log(Debug.TRACE,"Name index is " + nameIndex);
228         DefinedName dn = (DefinedName)e.nextElement();
229         Debug.log(Debug.TRACE,"DefinedName is " + dn.getName());
230         return (tf.getOperandToken(dn.getName(), "NAME"));
231     }
232 
233     /**
234      * Reads a Cell Reference token from the <code>ByteArrayInputStream</code>
235      *
236      * @param bis The <code>ByteArrayInputStream</code> from which we read the
237      * bytes.
238      * @return The decoded Cell Reference <code>Token</code>
239      */
readCellRefToken(ByteArrayInputStream bis)240     private Token readCellRefToken(ByteArrayInputStream bis) {
241 
242         byte buffer[] = new byte[2];
243         String outputString = new String();
244 
245         buffer[0] = (byte) bis.read();
246         buffer[1] = (byte) bis.read();
247         int formulaRow = EndianConverter.readShort(buffer);
248         int relativeFlags = (formulaRow & 0xC000)>>14;
249         formulaRow &= 0x3FFF;
250         int formulaCol = (byte) bis.read();
251 
252         outputString = int2CellStr(formulaRow, formulaCol, relativeFlags);
253 
254         return (tf.getOperandToken(outputString,"CELL_REFERENCE"));
255     }
256 
257     /**
258      * Reads a Cell Reference token from the <code>ByteArrayInputStream</code>
259      *
260      * @param bis The <code>ByteArrayInputStream</code> from which we read the
261      * bytes.
262      * @return The decoded Cell Reference <code>Token</code>
263      */
read3DCellRefToken(ByteArrayInputStream bis)264     private Token read3DCellRefToken(ByteArrayInputStream bis) {
265 
266         byte buffer[] = new byte[2];
267         String outputString = new String();
268 
269         bis.skip(10);
270 
271         buffer[0] = (byte) bis.read();
272         buffer[1] = (byte) bis.read();
273         int Sheet1 = EndianConverter.readShort(buffer);
274         buffer[0] = (byte) bis.read();
275         buffer[1] = (byte) bis.read();
276         int Sheet2 = EndianConverter.readShort(buffer);
277 
278         buffer[0] = (byte) bis.read();
279         buffer[1] = (byte) bis.read();
280         int formulaRow = EndianConverter.readShort(buffer);
281         int relativeFlags = (formulaRow & 0xC000)>>14;
282         formulaRow &= 0x3FFF;
283         int formulaCol = (byte) bis.read();
284         String cellRef = "." + int2CellStr(formulaRow, formulaCol, relativeFlags);
285         if(Sheet1 == Sheet2) {
286             outputString = "$" + wb.getSheetName(Sheet1) + cellRef;
287         } else {
288             outputString = "$" + wb.getSheetName(Sheet1) + cellRef + ":$" + wb.getSheetName(Sheet2) + cellRef;
289         }
290 
291         return (tf.getOperandToken(outputString,"3D_CELL_REFERENCE"));
292     }
293 
294     /**
295      * Reads a Cell Reference token from the <code>ByteArrayInputStream</code>
296      *
297      * @param bis The <code>ByteArrayInputStream</code> from which we read the
298      * bytes.
299      * @return The decoded Cell Reference <code>Token</code>
300      */
read3DCellAreaRefToken(ByteArrayInputStream bis)301     private Token read3DCellAreaRefToken(ByteArrayInputStream bis) {
302 
303         byte buffer[] = new byte[2];
304         String outputString = new String();
305 
306         bis.skip(10);
307 
308         buffer[0] = (byte) bis.read();
309         buffer[1] = (byte) bis.read();
310         int Sheet1 = EndianConverter.readShort(buffer);
311         buffer[0] = (byte) bis.read();
312         buffer[1] = (byte) bis.read();
313         int Sheet2 = EndianConverter.readShort(buffer);
314 
315         buffer[0] = (byte) bis.read();
316         buffer[1] = (byte) bis.read();
317         int formulaRow1 = EndianConverter.readShort(buffer);
318         int relativeFlags1 = (formulaRow1 & 0xC000)>>14;
319         formulaRow1 &= 0x3FFF;
320 
321         buffer[0] = (byte) bis.read();
322         buffer[1] = (byte) bis.read();
323         int formulaRow2 = EndianConverter.readShort(buffer);
324         int relativeFlags2 = (formulaRow2 & 0xC000)>>14;
325         formulaRow2 &= 0x3FFF;
326 
327         int formulaCol1 = (byte) bis.read();
328         int formulaCol2 = (byte) bis.read();
329 
330         String cellRef1 = "." + int2CellStr(formulaRow1, formulaCol1, relativeFlags1);
331         String cellRef2 = int2CellStr(formulaRow2, formulaCol2, relativeFlags2);
332 
333         if(Sheet1 == Sheet2) {
334             outputString = "$" + wb.getSheetName(Sheet1) + cellRef1 + ":" + cellRef2;
335         } else {
336             outputString = "$" + wb.getSheetName(Sheet1) + cellRef1 + ":$" + wb.getSheetName(Sheet2) + "." + cellRef2;
337         }
338 
339         return (tf.getOperandToken(outputString,"3D_CELL_AREA_REFERENCE"));
340     }
341 
342     /**
343      * Converts a row and col 0 based index to a spreadsheet cell reference.
344      * It also has a relativeFlags which indicates whether or not the
345      * Cell Reference is relative or absolute (Absolute is denoted with '$')
346      *
347      * 00 = absolute row, absolute col
348      * 01 = absolute row, relative col
349      * 10 = relative row, absolute col
350      * 11 = relative row, relative col
351      *
352      * @param row The cell reference 0 based index to the row
353      * @param col The cell reference 0 based index to the row
354      * @param relativeFlags Flags indicating addressing of row and column
355      * @return A <code>String</code> representing a cell reference
356      */
int2CellStr(int row, int col, int relativeFlags)357     private String int2CellStr(int row, int col, int relativeFlags) {
358        String outputString = "";
359        int firstChar = (col + 1) / 26;
360 
361        if((relativeFlags & 1) == 0) {
362            outputString += "$";
363        }
364 
365         if(firstChar>0) {
366                 int secondChar = (col + 1) % 26;
367                 outputString += Character.toString(int2Char(firstChar - 1)) + Character.toString(int2Char(secondChar - 1));
368         } else {
369                 outputString += Character.toString(int2Char(col));
370         }
371        if((relativeFlags & 2) == 0) {
372            outputString += "$";
373        }
374        outputString += Integer.toString(row+1);
375        return outputString;
376     }
377 
378     /**
379      * Reads a Cell Area Reference (cell range) <code>Token</code> from
380      * the <code>ByteArrayInputStream</code>
381      *
382      * @param bis The <code>ByteArrayInputStream</code> from which we read the
383      * bytes.
384      * @return The equivalent Cell Area Reference (cell range)
385      * <code>Token</code>
386      */
readCellAreaRefToken(ByteArrayInputStream bis)387     private Token readCellAreaRefToken(ByteArrayInputStream bis) {
388         byte buffer[] = new byte[2];
389         int formulaRow1, formulaRow2;
390         int formulaCol1, formulaCol2;
391 
392         String outputString = new String();
393 
394         buffer[0] = (byte) bis.read();
395         buffer[1] = (byte) bis.read();
396         formulaRow1 = EndianConverter.readShort(buffer);
397         int relativeFlags1 = (formulaRow1 & 0xC000)>>14;
398         formulaRow1 &= 0x3FFF;
399         buffer[0] = (byte) bis.read();
400         buffer[1] = (byte) bis.read();
401         formulaRow2 = EndianConverter.readShort(buffer);
402         int relativeFlags2 = (formulaRow2 & 0xC000)>>14;
403         formulaRow2 &= 0x3FFF;
404 
405         formulaCol1 = (byte) bis.read();
406         formulaCol2 = (byte) bis.read();
407 
408         outputString = int2CellStr(formulaRow1, formulaCol1, relativeFlags1);
409         outputString += (":" + int2CellStr(formulaRow2, formulaCol2, relativeFlags2));
410 
411         return (tf.getOperandToken(outputString,"CELL_AREA_REFERENCE"));
412     }
413 
414 
415     /**
416      * Reads a Number (floating point) token from the <code>ByteArrayInputStream</code>
417      *
418      * @param bis The <code>ByteArrayInputStream</code> from which we read the
419      * bytes.
420      * @return The decoded Integer <code>Token</code>
421      */
readNumToken(ByteArrayInputStream bis)422     private Token readNumToken(ByteArrayInputStream bis) {
423 
424         byte numBuffer[] = new byte[8];
425 
426         for(int j=0;j<8;j++) {
427                 numBuffer[j]=(byte) bis.read();
428         }
429 
430         return (tf.getOperandToken(Double.toString(EndianConverter.readDouble(numBuffer)),"NUMBER"));
431     }
432 
433     /**
434      * Read an Operator token from the <code>ByteArrayInputStream</code>
435      *
436      * @param b A Pocket Excel number representing an operator.
437      * @param args The number of arguments this operator takes.
438      * @return The decoded Operator <code>Token</code>
439      */
readOperatorToken(int b, int args)440     private Token readOperatorToken(int b, int args) {
441 
442         Token t;
443 
444         if(b==TokenConstants.TUPLUS) {
445             t = tf.getOperatorToken("+", args);
446         } else if(b==TokenConstants.TUMINUS) {
447             t = tf.getOperatorToken("-", args);
448         } else {
449             t = tf.getOperatorToken(operatorLookup.getStringFromID(b), args);
450         }
451         return t;
452      }
453 
454     /**
455      * Read a Function token from the <code>ByteArrayInputStream</code>
456      * This function can have any number of arguments and this number is read
457      * in with the record
458      *
459      * @param bis The <code>ByteArrayInputStream</code> from which we read the
460      * bytes.
461      * @return The decoded variable argument Function <code>Token</code>
462      */
readFunctionVarToken(ByteArrayInputStream bis)463     private Token readFunctionVarToken(ByteArrayInputStream bis) {
464 
465         int numArgs = 0;
466         numArgs = bis.read();
467         byte buffer[] = new byte[2];
468         buffer[0] = (byte) bis.read();
469         buffer[1] = (byte) bis.read();
470         int functionID = EndianConverter.readShort(buffer);
471         return (tf.getFunctionToken(fl.getStringFromID(functionID),numArgs));
472     }
473 
474     /**
475      * Read a Function token from the <code>ByteArrayInputStream</code>
476      * This function has a fixed number of arguments which it will get
477      * from <code>FunctionLookup</code>.
478      *
479      * @param bis The <code>ByteArrayInputStream</code> from which we read the
480      * bytes.
481      * @return The decoded fixed argument Function <code>Token</code>
482      */
readFunctionToken(ByteArrayInputStream bis)483     private Token readFunctionToken(ByteArrayInputStream bis) {
484 
485         byte buffer[] = new byte[2];
486         buffer[0] = (byte) bis.read();
487         buffer[1] = (byte) bis.read();
488         int functionID = EndianConverter.readShort(buffer);
489         String functionName = fl.getStringFromID(functionID);
490         return (tf.getFunctionToken(functionName,fl.getArgCountFromString(functionName)));
491     }
492 
493 }
494