xref: /AOO41X/main/sw/source/filter/ww8/dump/msvbasic.cxx (revision efeef26f81c84063fb0a91bde3856d4a51172d90)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_sw.hxx"
26 
27 
28 #include <string.h>     // memset(), ...
29 #ifndef UNX
30 #include <io.h>         // access()
31 #endif
32 #include <msvbasic.hxx>
33 
34 /* class VBA_Impl:
35  * The VBA class provides a set of methods to handle Visual Basic For
36  * Applications streams, the constructor is given the root ole2 stream
37  * of the document, Open reads the VBA project file and figures out
38  * the number of VBA streams, and the offset of the data within them.
39  * Decompress decompresses a particular numbered stream, NoStreams returns
40  * this number, and StreamName can give you the streams name. Decompress
41  * will call Output when it has a 4096 byte collection of data to output,
42  * and also with the final remainder of data if there is still some left
43  * at the end of compression. Output is virtual to allow custom handling
44  * of each chunk of decompressed data. So inherit from this to do something
45  * useful with the data.
46  *
47  * cmc
48  * */
49 
ReadPString(SvStorageStreamRef & xVBAProject)50 sal_uInt8 VBA_Impl::ReadPString(SvStorageStreamRef &xVBAProject)
51 {
52     sal_uInt16 idlen;
53     sal_uInt8 type=0;
54     *xVBAProject >> idlen;
55     sal_uInt8 out;
56     int i=0;
57     if (idlen < 6)
58     {
59         type=0;
60         xVBAProject->SeekRel(-2);
61         return(type);
62     }
63 
64     for(i=0;i<idlen/2;i++)
65     {
66         *xVBAProject >> out;
67         xVBAProject->SeekRel(1);
68         if (i==2)
69         {
70             type=out;
71             if ((type != 'G') && (type != 'C'))
72                 type=0;
73             if (type == 0)
74             {
75                 xVBAProject->SeekRel(-8);
76                 break;
77             }
78         }
79     }
80 
81 
82     return(type);
83 }
84 
ConfirmFixedOctect(SvStorageStreamRef & xVBAProject)85 void VBA_Impl::ConfirmFixedOctect(SvStorageStreamRef &xVBAProject)
86 {
87     static const sal_uInt8 stest[8] =
88         {
89         0x06, 0x02, 0x01, 0x00, 0x08, 0x02, 0x00, 0x00
90         };
91 
92     sal_uInt8 test[8];
93     xVBAProject->Read(test,8);
94     if (memcmp(stest,test,8) != 0)
95         DBG_WARNING("Found a different octect, please report");
96 }
97 
Confirm12Zeros(SvStorageStreamRef & xVBAProject)98 void VBA_Impl::Confirm12Zeros(SvStorageStreamRef &xVBAProject)
99 {
100     static const sal_uInt8 stest[12]={0};
101     sal_uInt8 test[12];
102     xVBAProject->Read(test,12);
103     if (memcmp(stest,test,12) != 0)
104         DBG_WARNING("Found a Non Zero block, please report");
105 }
106 
ConfirmHalfWayMarker(SvStorageStreamRef & xVBAProject)107 void VBA_Impl::ConfirmHalfWayMarker(SvStorageStreamRef &xVBAProject)
108 {
109     static const sal_uInt8 stest[12]={0,0,0,0,0,0,0,0,0,0,1,0};
110     sal_uInt8 test[12];
111     xVBAProject->Read(test,12);
112     if (memcmp(stest,test,12) != 0)
113         DBG_WARNING("Found a different halfway marker, please report");
114 }
115 
ConfirmFixedMiddle(SvStorageStreamRef & xVBAProject)116 void VBA_Impl::ConfirmFixedMiddle(SvStorageStreamRef &xVBAProject)
117 {
118     static const sal_uInt8 stest[20] =
119     {
120         0x00, 0x00, 0xe1, 0x2e, 0x45, 0x0d, 0x8f, 0xe0,
121         0x1a, 0x10, 0x85, 0x2e, 0x02, 0x60, 0x8c, 0x4d,
122         0x0b, 0xb4, 0x00, 0x00
123     };
124 
125     sal_uInt8 test[20];
126     xVBAProject->Read(test,20);
127     if (memcmp(stest,test,20) != 0)
128     {
129         DBG_WARNING("Found a different middle marker, please report");
130         xVBAProject->SeekRel(-20);
131     }
132 }
133 
ConfirmFixedMiddle2(SvStorageStreamRef & xVBAProject)134 void VBA_Impl::ConfirmFixedMiddle2(SvStorageStreamRef &xVBAProject)
135 {
136     static const sal_uInt8 stest[20] =
137     {
138         0x00, 0x00, 0x2e, 0xc9, 0x27, 0x8e, 0x64, 0x12,
139         0x1c, 0x10, 0x8a, 0x2f, 0x04, 0x02, 0x24, 0x00,
140         0x9c, 0x02, 0x00, 0x00
141     };
142 
143     sal_uInt8 test[20];
144     xVBAProject->Read(test,20);
145     if (memcmp(stest,test,20) != 0)
146         {
147         DBG_WARNING("Found a different middle2 marker, please report");
148         xVBAProject->SeekRel(-20);
149         }
150 }
151 
152 
Output(int nLen,const sal_uInt8 * pData)153 void VBA_Impl::Output( int nLen, const sal_uInt8 *pData)
154 {
155     sVBAString += String( (const sal_Char *)pData, nLen );
156 /*
157 //For debugging purposes
158     for(int i=0;i<len;i++)
159         *pOut << data[i];
160 */
161 }
162 
163 
ReadVBAProject(const SvStorageRef & rxVBAStorage)164 int VBA_Impl::ReadVBAProject(const SvStorageRef &rxVBAStorage)
165     {
166     SvStorageStreamRef xVBAProject;
167     xVBAProject = rxVBAStorage->OpenStream(
168                     String::CreateFromAscii( "_VBA_PROJECT" ),
169                     STREAM_STD_READ | STREAM_NOCREATE );
170 
171     if( !xVBAProject.Is() || SVSTREAM_OK != xVBAProject->GetError() )
172     {
173         DBG_WARNING("Not able to find vba project, cannot find macros");
174         return(0);
175     }
176     xVBAProject->SetNumberFormatInt( NUMBERFORMAT_INT_LITTLEENDIAN );
177 
178     //*pOut << hex;
179     sal_uInt8 header[30] =
180     {
181         0xcc, 0x61, 0x5e, 0x00, 0x00, 0x01, 0x00, 0xff,
182         0x07, 0x04, 0x00, 0x00, 0x09, 0x04, 0x00, 0x00,
183         0xe4, 0x04, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
184         0x00, 0x00, 0x00, 0x00, 0x01, 0x00
185     };
186     sal_uInt8 headerin[30];
187 
188     xVBAProject->Read(headerin,30);
189     if (memcmp(header,headerin,30) != 0)
190         DBG_WARNING("Warning VBA header is different, please report");
191     sal_uInt16 value;
192     *xVBAProject >> value;
193     //*pOut << "Trigger value 1 is " << value << endl;
194     sal_uInt16 svalue;
195     *xVBAProject >> svalue;
196     if (svalue != 0x02)
197         DBG_WARNING("Warning VBA number is different, please report");
198 
199     int count=0;
200     sal_uInt8 testc=0;
201 
202     //*pOut << "Other strings after the middle are..." << endl;
203     //There appears to be almost any number of strings acceptable
204     //most begin with */G , and sometimes with
205     //*/C. Those with G always have a trailer of 12 bytes, those
206     //with C come in pairs, the first with no trailer, and the
207     //second with one of 12 bytes. The following code attemts
208     //to read these strings and ends when it reaches a sequence of
209     //bytes which fails a test to be a valid string. So this
210     //while loop here is the particular piece of code which is
211     //very suspect and likely to be the cause of any crashes and
212     //problems.
213     while ((testc = ReadPString(xVBAProject)) != 0)
214     {
215         //*pOut << endl;
216         //*pOut << "testcharacter is " << testc << endl;
217         switch (testc)
218         {
219             case 'C':
220                 count++;
221                 if (count == 2)
222                 {
223                     Confirm12Zeros(xVBAProject);
224                     count=0;
225                 }
226                 break;
227             default:
228             case 'G':
229                 Confirm12Zeros(xVBAProject);
230                 break;
231         }
232     }
233 
234     //appears to be a fixed 20 byte sequence here, and then the strings
235     //continue
236     ConfirmFixedMiddle(xVBAProject);
237 
238     count=0;
239     testc=0;
240 
241     while ((testc = ReadPString(xVBAProject)) != 0)
242     {
243         //*pOut << endl;
244         //*pOut << "testcharacter is " << testc << endl;
245         switch (testc)
246         {
247             case 'C':
248                 count++;
249                 if (count == 2)
250                 {
251                     Confirm12Zeros(xVBAProject);
252                     count=0;
253                 }
254                 break;
255             default:
256             case 'G':
257                 Confirm12Zeros(xVBAProject);
258                 break;
259         }
260     }
261 
262     //there *may* be another different 20byte fixed string
263     ConfirmFixedMiddle2(xVBAProject);
264 
265     //*pOut << "testc is " << testc << endl;
266     //*pOut << "position is " << xVBAProject->Tell() << endl;
267 
268     sal_uInt16 nModules;
269     *xVBAProject >> nModules;
270 
271     //begin section, this section isn't really 100% correct
272     //*pOut << nModules << hex << " vba modules" << endl;
273     xVBAProject->SeekRel(2*nModules);
274     xVBAProject->SeekRel(4);
275     //*pOut << "position is " << xVBAProject->Tell() << endl;
276     ConfirmFixedOctect(xVBAProject);
277 
278     sal_uInt16 junksize;
279     while(junksize != 0xFFFF)
280     {
281         xVBAProject->Read(&junksize,2); // usually 18 02, sometimes 1e 02
282         //but sometimes its a run of numbers until 0xffff, gagh!!!
283         //*pOut << "position is " << xVBAProject->Tell() << "len is "
284         //  << junksize << endl;
285     }
286 
287     sal_uInt16 ftest;
288     *xVBAProject >> ftest;
289     if (ftest != 0xFFFF)
290         xVBAProject->SeekRel(ftest);
291     *xVBAProject >> ftest;
292     if (ftest != 0xFFFF)
293         xVBAProject->SeekRel(ftest);
294 
295     xVBAProject->SeekRel(100);
296     //*pOut << "position is " << xVBAProject->Tell() << endl;
297     //end section
298 
299 
300     *xVBAProject >> nOffsets;
301     pOffsets = new VBAOffset_Impl[nOffsets];
302     int i;
303     for (i=0;i<nOffsets;i++)
304         {
305         sal_uInt8 discard;
306         sal_uInt16 len;
307         *xVBAProject >> len;
308         int j;
309         for (j=0;j<len/2;j++)
310             {
311             *xVBAProject >> discard;
312             pOffsets[i].sName += discard;
313             *xVBAProject >> discard;
314             }
315         *xVBAProject >> len;
316         xVBAProject->SeekRel(len);
317 
318         //begin section, another problem area
319         *xVBAProject >> len;
320         if (len == 0xFFFF)
321         {
322             xVBAProject->SeekRel(2);
323             *xVBAProject >> len;
324             xVBAProject->SeekRel(len);
325         }
326         else
327             xVBAProject->SeekRel(len+2);
328         //
329         /* I have a theory that maybe you read a 16bit len, and
330          * if it has 0x02 for the second byte then it is a special
331          * token of its own that affects nothing else, otherwise
332          * it is a len of the following data. C. I must test this
333          * theory later.
334          */
335         //end section
336 
337         xVBAProject->SeekRel(8);
338         sal_uInt8 no_of_octects;
339         *xVBAProject >> no_of_octects;
340         for(j=0;j<no_of_octects;j++)
341             xVBAProject->SeekRel(8);
342         xVBAProject->SeekRel(6);
343 
344         *xVBAProject >> pOffsets[i].nOffset;
345         //*pOut << pOffsets[i].pName.GetStr() << " at 0x" << hex << pOffsets[i].nOffset << endl;
346         xVBAProject->SeekRel(2);
347         }
348 
349     //*pOut << endl;
350     return(nOffsets);
351     }
352 
Open(const String & rToplevel,const String & rSublevel)353 sal_Bool VBA_Impl::Open( const String &rToplevel,const String &rSublevel )
354 {
355     /* beginning test for vba stuff */
356     sal_Bool bRet = sal_False;
357     SvStorageRef xMacros= xStor->OpenStorage(rToplevel);
358     if( !xMacros.Is() || SVSTREAM_OK != xMacros->GetError() )
359     {
360         DBG_WARNING("No Macros Storage");
361     }
362     else
363     {
364         xVBA = xMacros->OpenStorage(rSublevel);
365         if( !xVBA.Is() || SVSTREAM_OK != xVBA->GetError() )
366         {
367             DBG_WARNING("No Visual Basic in Storage");
368         }
369         else
370         {
371             if (ReadVBAProject(xVBA))
372                 bRet = sal_True;
373         }
374     }
375     /* end test for vba stuff */
376     return bRet;
377 }
378 
Decompress(sal_uInt16 nIndex,int * pOverflow)379 const String &VBA_Impl::Decompress( sal_uInt16 nIndex, int *pOverflow)
380 {
381     SvStorageStreamRef xVBAStream;
382     sVBAString.Erase();
383 
384     DBG_ASSERT( nIndex < nOffsets, "Index out of range" );
385     xVBAStream = xVBA->OpenStream( pOffsets[nIndex].sName,
386                         STREAM_STD_READ | STREAM_NOCREATE );
387     if (pOverflow)
388         *pOverflow=0;
389     if( !xVBAStream.Is() || SVSTREAM_OK !=
390         xVBAStream->GetError() )
391     {
392         DBG_WARNING("Not able to open vb module ");
393 //      DBG_WARNING((pOffsets[nIndex].sName).GetStr());
394     }
395     else
396     {
397         xVBAStream->SetNumberFormatInt( NUMBERFORMAT_INT_LITTLEENDIAN );
398         DecompressVBA(nIndex,xVBAStream);
399         /*
400          * if len was too big for a single string set that variable ?
401          *  if ((len > XX) && (pOverflow))
402                 *pOverflow=1;
403          */
404         if (bCommented)
405         {
406             String sTempStringa(String::CreateFromAscii( "\x0D\x0A"));
407             String sTempStringb(String::CreateFromAscii( "\x0D\x0ARem "));
408             sVBAString.SearchAndReplaceAll(sTempStringa,sTempStringb);
409             sVBAString.InsertAscii("Rem ",0);
410         }
411     }
412     return sVBAString;
413 }
414 
415 
DecompressVBA(int nIndex,SvStorageStreamRef & xVBAStream)416 int VBA_Impl::DecompressVBA( int nIndex, SvStorageStreamRef &xVBAStream )
417 {
418     sal_uInt8 leadbyte;
419     unsigned int pos = 0;
420 
421     //*pOut << "jumping to " << hex << offsets[nIndex].offset << endl;
422     xVBAStream->Seek(pOffsets[nIndex].nOffset+3);
423 
424     int len;
425     sal_uInt16 token;
426     int distance, shift, clean=1;
427 
428     while(xVBAStream->Read(&leadbyte,1))
429         {
430         //*pOut << "reading 8 data unit block beginning with " << leadbyte << int(leadbyte) << " at pos " << xVBAStream->Tell() << " real pos " << pos << endl;
431         for(int position=0x01;position < 0x100;position=position<<1)
432             {
433             //we see if the leadbyte has flagged this location as a dataunit
434             //which is actually a token which must be looked up in the history
435             if (leadbyte & position)
436                 {
437                 *xVBAStream >> token;
438 
439                 if (clean == 0)
440                     clean=1;
441 
442                 //For some reason the division of the token into the length
443                 //field of the data to be inserted, and the distance back into
444                 //the history differs depending on how full the history is
445                 int pos2 = pos%WINDOWLEN;
446                 if (pos2 <= 0x10)
447                     shift = 12;
448                 else if (pos2 <= 0x20)
449                     shift = 11;
450                 else if (pos2 <= 0x40)
451                     shift = 10;
452                 else if (pos2 <= 0x80)
453                     shift = 9;
454                 else if (pos2 <= 0x100)
455                     shift = 8;
456                 else if (pos2 <= 0x200)
457                     shift = 7;
458                 else if (pos2 <= 0x400)
459                     shift = 6;
460                 else if (pos2 <= 0x800)
461                     shift = 5;
462                 else
463                     shift = 4;
464 
465                 int i;
466                 len=0;
467                 for(i=0;i<shift;i++)
468                     len |= token & (1<<i);
469 
470                 //*pOut << endl << "match lookup token " << int(token) << "len " << int(len) << endl;
471 
472                 len += 3;
473                 //*pOut << endl << "len is " << len << "shift is " << shift << endl;
474 
475                 distance = token >> shift;
476                 //*pOut << "distance token shift is " << distance << " " << int(token) << " " << shift << "pos is " << pos << " " << xVBAStream->Tell() << endl;
477 
478                 //read the len of data from the history, wrapping around the
479                 //WINDOWLEN boundary if necessary
480                 //data read from the history is also copied into the recent
481                 //part of the history as well.
482                 for (i = 0; i < len; i++)
483                     {
484                     unsigned char c;
485                     //*pOut << endl << (pos%WINDOWLEN)-distance-1 << " " << pos << " " << distance << endl;
486                     c = aHistory[(pos-distance-1)%WINDOWLEN];
487                     aHistory[pos%WINDOWLEN] = c;
488                     pos++;
489                     //*pOut << "real pos is " << pos << endl;
490                     //
491                     //temp removed
492                     //*pOut << c ;
493                     }
494                 }
495             else
496                 {
497                 // special boundary case code, not guarantueed to be correct
498                 // seems to work though, there is something wrong with the
499                 // compression scheme (or maybe a feature) where when
500                 // the data ends on a WINDOWLEN boundary and the excess
501                 // bytes in the 8 dataunit list are discarded, and not
502                 // interpreted as tokens or normal data.
503                 if ((pos != 0) && ((pos%WINDOWLEN) == 0) && (clean))
504                     {
505                     //*pOut << "at boundary position is " << position << " " << xVBAStream->Tell() << " pos is " << pos << endl;
506                     //if (position != 0x01)
507                     //*pOut << "must restart by eating remainder single byte data units" << endl;
508                     xVBAStream->SeekRel(2);
509                     clean=0;
510                     Output(WINDOWLEN,aHistory);
511                     break;
512                     }
513                 //This is the normal case for when the data unit is not a
514                 //token to be looked up, but instead some normal data which
515                 //can be output, and placed in the history.
516                 if (xVBAStream->Read(&aHistory[pos%WINDOWLEN],1))
517                 {
518                     pos++;
519                     //temp removed
520                     //*pOut << aHistory[pos++%WINDOWLEN];
521                 }
522                 if (clean == 0)
523                     clean=1;
524                 //*pOut << "pos is " << pos << " " << xVBAStream->Tell() << endl;
525                 }
526             }
527         }
528     if (pos%WINDOWLEN)
529         Output(pos%WINDOWLEN,aHistory);
530     return(pos);
531 }
532 
533