xref: /AOO41X/main/l10ntools/source/help/HelpIndexerTool.java (revision 8809db7a87f97847b57a57f4cd2b0104b2b83182)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package com.sun.star.help;
25 
26 import java.io.FileInputStream;
27 import java.io.FileOutputStream;
28 import java.util.Arrays;
29 import java.util.HashSet;
30 import java.util.List;
31 import java.util.zip.ZipEntry;
32 import java.util.zip.ZipOutputStream;
33 import java.util.zip.CRC32;
34 import org.apache.lucene.analysis.standard.StandardAnalyzer;
35 import org.apache.lucene.analysis.cjk.CJKAnalyzer;
36 import org.apache.lucene.analysis.Analyzer;
37 import org.apache.lucene.index.IndexWriter;
38 
39 import java.io.File;
40 import java.io.FileNotFoundException;
41 import java.io.IOException;
42 import java.util.Date;
43 
44 
45 /**
46    When this tool is used with long path names on Windows, that is paths which start
47    with \\?\, then the caller must make sure that the path is unique. This is achieved
48    by removing '.' and '..' from the path. Paths which are created by
49    osl_getSystemPathFromFileURL fulfill this requirement. This is necessary because
50    lucene is patched to not use File.getCanonicalPath. See long_path.patch in the lucene
51    module.
52  */
53 public class HelpIndexerTool
54 {
55     public HelpIndexerTool()
56     {
57     }
58 
59 
60     /**
61      * @param args the command line arguments
62      */
63     public static void main( String[] args )
64     {
65         boolean bExtensionMode = false;
66         mainImpl( args, bExtensionMode );
67     }
68 
69     public static void mainImpl( String[] args, boolean bExtensionMode )
70     {
71         String aDirToZipStr = "";
72         String aSrcDirStr = "";
73         String aLanguageStr = "";
74         String aModule = "";
75         String aTargetZipFileStr = "";
76         String aCfsName = "";
77         String aSegmentName = "";
78 
79         // Scan arguments
80         //If this tool is invoked in the build process for extensions help,
81         //then -extension must be set.
82         boolean bExtension = false;
83         boolean bLang = false;
84         boolean bMod = false;
85         boolean bZipDir = false;
86         boolean bSrcDir = false;
87         boolean bOutput = false;
88         boolean bCfsName = false;
89         boolean bSegmentName = false;
90 
91         int nArgCount = args.length;
92         for( int i = 0 ; i < nArgCount ; i++ )
93         {
94             if( "-extension".equals(args[i]) )
95             {
96                 bExtension = true;
97             }
98             else if( "-lang".equals(args[i]) )
99             {
100                 if( i + 1 < nArgCount )
101                 {
102                     aLanguageStr = args[i + 1];
103                     bLang = true;
104                 }
105                 i++;
106             }
107             else if( "-mod".equals(args[i]) )
108             {
109                 if( i + 1 < nArgCount )
110                 {
111                     aModule = args[i + 1];
112                     bMod = true;
113                 }
114                 i++;
115             }
116             else if( "-zipdir".equals(args[i]) )
117             {
118                 if( i + 1 < nArgCount )
119                 {
120                     aDirToZipStr = args[i + 1];
121                     bZipDir = true;
122                 }
123                 i++;
124             }
125             else if( "-srcdir".equals(args[i]) )
126             {
127                 if( i + 1 < nArgCount )
128                 {
129                     aSrcDirStr = args[i + 1];
130                     bSrcDir = true;
131                 }
132                 i++;
133             }
134             else if( "-o".equals(args[i]) )
135             {
136                 if( i + 1 < nArgCount )
137                 {
138                     aTargetZipFileStr = args[i + 1];
139                     bOutput = true;
140                 }
141                 i++;
142             }
143             else if( "-checkcfsandsegname".equals(args[i]) )
144             {
145                 if( i + 1 < nArgCount )
146                 {
147                     aCfsName = args[i + 1] + ".cfs";
148                     bCfsName = true;
149                 }
150                 i++;
151                 if( i + 1 < nArgCount )
152                 {
153                     aSegmentName = "segments" + args[i + 1];
154                     bSegmentName = true;
155                 }
156                 i++;
157                 if (!(bCfsName && bSegmentName))
158                 {
159                     System.out.println("Usage: HelpIndexer -checkcfsandsegname _0 _3 (2 arguments needed)");
160                     System.exit( -1 );
161                 }
162             }
163         }
164 
165         if( !bLang || !bMod || !bZipDir || (!bOutput && !bExtensionMode && !bExtension) )
166         {
167             if( bExtensionMode )
168                 return;
169 
170             System.out.println("Usage: HelpIndexer -lang ISOLangCode -mod HelpModule -zipdir TempZipDir -o OutputZipFile");
171             System.out.println("Usage: HelpIndexer -extension -lang ISOLangCode -mod HelpModule -zipdir PathToLangDir");
172             System.exit( -1 );
173         }
174 
175         String aIndexDirName = aModule + ".idxl";
176         File aIndexDir = new File( aDirToZipStr + File.separator + aIndexDirName );
177         if( !bSrcDir )
178             aSrcDirStr = aDirToZipStr;
179         File aCaptionFilesDir = new File( aSrcDirStr + File.separator + "caption" );
180         File aContentFilesDir = new File( aSrcDirStr + File.separator + "content" );
181 
182         try
183         {
184             Date start = new Date();
185             Analyzer analyzer = aLanguageStr.equals("ja") ? (Analyzer)new CJKAnalyzer() : (Analyzer)new StandardAnalyzer();
186             IndexWriter writer = new IndexWriter( aIndexDir, analyzer, true );
187             if( !bExtensionMode )
188                 System.out.println( "Lucene: Indexing to directory '" + aIndexDir + "'..." );
189             int nRet = indexDocs( writer, aModule, bExtensionMode, aCaptionFilesDir, aContentFilesDir );
190             if( nRet != -1 )
191             {
192                 if( !bExtensionMode )
193                 {
194                     System.out.println();
195                     System.out.println( "Optimizing ..." );
196                 }
197                 writer.optimize();
198             }
199             writer.close();
200 
201             boolean bCfsFileOk = true;
202             boolean bSegmentFileOk = true;
203             if( bCfsName && bSegmentName && !bExtensionMode && nRet != -1 )
204             {
205                 String aCompleteCfsFileName = aDirToZipStr + File.separator + aIndexDirName + File.separator + aCfsName;
206                 String aCompleteSegmentFileName = aDirToZipStr + File.separator + aIndexDirName + File.separator + aSegmentName;
207                 File aCfsFile = new File( aCompleteCfsFileName );
208                 File aSegmentFile = new File( aCompleteSegmentFileName );
209                 bCfsFileOk = aCfsFile.exists();
210                 bSegmentFileOk = aSegmentFile.exists();
211                 System.out.println( "Checking cfs file " + aCfsName+ ": " + (bCfsFileOk ? "Found" : "Not found") );
212                 System.out.println( "Checking segment file " + aSegmentName+ ": " + (bSegmentFileOk ? "Found" : "Not found") );
213             }
214 
215             if( bExtensionMode || bExtension)
216             {
217                 if( !bSrcDir )
218                 {
219                     deleteRecursively( aCaptionFilesDir );
220                     deleteRecursively( aContentFilesDir );
221                 }
222             }
223             else
224             {
225                 if( nRet == -1 )
226                     deleteRecursively( aIndexDir );
227 
228                 if( bCfsFileOk && bSegmentFileOk )
229                     System.out.println( "Zipping ..." );
230                 File aDirToZipFile = new File( aDirToZipStr );
231                 createZipFile( aDirToZipFile, aTargetZipFileStr );
232                 deleteRecursively( aDirToZipFile );
233             }
234 
235             if( !bCfsFileOk )
236             {
237                 System.out.println( "cfs file check failed, terminating..." );
238                 System.exit( -1 );
239             }
240 
241             if( !bSegmentFileOk )
242             {
243                 System.out.println( "segment file check failed, terminating..." );
244                 System.exit( -1 );
245             }
246 
247             Date end = new Date();
248             if( !bExtensionMode )
249                 System.out.println(end.getTime() - start.getTime() + " total milliseconds");
250         }
251         catch (IOException e)
252         {
253             if( bExtensionMode )
254                 return;
255 
256             System.out.println(" caught a " + e.getClass() +
257                 "\n with message: " + e.getMessage());
258             System.exit( -1 );
259         }
260     }
261 
262     private static int indexDocs(IndexWriter writer, String aModule, boolean bExtensionMode,
263         File aCaptionFilesDir, File aContentFilesDir) throws IOException
264     {
265         if( !aCaptionFilesDir.canRead() || !aCaptionFilesDir.isDirectory() )
266         {
267             if( !bExtensionMode )
268                 System.out.println( "Not found: " + aCaptionFilesDir );
269             return -1;
270         }
271         if( !aContentFilesDir.canRead() || !aContentFilesDir.isDirectory() )
272         {
273             if( !bExtensionMode )
274                 System.out.println( "Not found: " + aContentFilesDir );
275             return -1;
276         }
277 
278         String[] aCaptionFiles = aCaptionFilesDir.list();
279         List aCaptionFilesList = Arrays.asList( aCaptionFiles );
280         HashSet aCaptionFilesHashSet = new HashSet( aCaptionFilesList );
281 
282         String[] aContentFiles = aContentFilesDir.list();
283         List aContentFilesList = Arrays.asList( aContentFiles );
284         HashSet aContentFilesHashSet = new HashSet( aContentFilesList );
285 
286         // Loop over caption files and find corresponding content file
287         if( !bExtensionMode )
288             System.out.println( "Indexing, adding files" );
289         int nCaptionFilesLen = aCaptionFiles.length;
290         for( int i = 0 ; i < nCaptionFilesLen ; i++ )
291         {
292             String aCaptionFileStr = aCaptionFiles[i];
293             File aCaptionFile = new File( aCaptionFilesDir, aCaptionFileStr );
294             File aContentFile = null;
295             if( aContentFilesHashSet.contains( aCaptionFileStr ) )
296                 aContentFile = new File( aContentFilesDir, aCaptionFileStr );
297 
298             if( !bExtensionMode )
299                 System.out.print( "." );
300             writer.addDocument( HelpFileDocument.Document( aModule, aCaptionFile, aContentFile ) );
301         }
302 
303         // Loop over content files to find remaining files not mapped to caption files
304         int nContentFilesLen = aContentFiles.length;
305         for( int i = 0 ; i < nContentFilesLen ; i++ )
306         {
307             String aContentFileStr = aContentFiles[i];
308             if( !aCaptionFilesHashSet.contains( aContentFileStr ) )
309             {
310                 // Not already handled in caption files loop
311                 File aCaptionFile = null;
312                 File aContentFile = new File( aContentFilesDir, aContentFileStr );
313                 if( !bExtensionMode )
314                     System.out.print( "." );
315                 writer.addDocument( HelpFileDocument.Document( aModule, aCaptionFile, aContentFile ) );
316             }
317         }
318         return 0;
319     }
320 
321     public static void createZipFile( File aDirToZip, String aTargetZipFileStr )
322             throws FileNotFoundException, IOException
323     {
324         FileOutputStream fos = new FileOutputStream( aTargetZipFileStr );
325         ZipOutputStream zos = new ZipOutputStream( fos );
326 
327         File[] aChildrenFiles = aDirToZip.listFiles();
328         int nFileCount = aChildrenFiles.length;
329         for( int i = 0 ; i < nFileCount ; i++ )
330             addToZipRecursively( zos, aChildrenFiles[i], null );
331 
332         zos.close();
333     }
334 
335     public static void addToZipRecursively( ZipOutputStream zos, File aFile, String aBasePath )
336             throws FileNotFoundException, IOException
337     {
338         if( aFile.isDirectory() )
339         {
340             String aDirName = aFile.getName();
341             if( aDirName.equalsIgnoreCase( "caption" ) || aDirName.equalsIgnoreCase( "content" ) )
342                 return;
343 
344             File[] aChildrenFiles = aFile.listFiles();
345             String aNewBasePath = "";
346             if( aBasePath != null )
347                 aNewBasePath += aBasePath + File.separator;
348             aNewBasePath += aDirName;
349 
350             int nFileCount = aChildrenFiles.length;
351             for( int i = 0 ; i < nFileCount ; i++ )
352                 addToZipRecursively( zos, aChildrenFiles[i], aNewBasePath );
353 
354             return;
355         }
356 
357         // No directory
358         // read contents of file we are going to put in the zip
359         int fileLength = (int) aFile.length();
360         FileInputStream fis = new FileInputStream( aFile );
361         byte[] wholeFile = new byte[fileLength];
362         int bytesRead = fis.read( wholeFile, 0, fileLength );
363         fis.close();
364 
365         String aFileName = aFile.getName();
366         String aEntryName = "";
367         if( aBasePath != null )
368             aEntryName += aBasePath + "/";
369         aEntryName += aFileName;
370         ZipEntry aZipEntry = new ZipEntry( aEntryName );
371         aZipEntry.setTime( aFile.lastModified() );
372         aZipEntry.setSize( fileLength );
373 
374         int nMethod = ( aFileName.toLowerCase().endsWith( ".jar" ) )
375                 ? ZipEntry.STORED : ZipEntry.DEFLATED;
376         aZipEntry.setMethod( nMethod );
377 
378         CRC32 tempCRC = new CRC32();
379         tempCRC.update( wholeFile, 0, wholeFile.length );
380         aZipEntry.setCrc( tempCRC.getValue() );
381 
382         // write the contents into the zip element
383         zos.putNextEntry( aZipEntry );
384         zos.write( wholeFile, 0, fileLength );
385         zos.closeEntry();
386     }
387 
388     static public boolean deleteRecursively( File aFile )
389     {
390         if( aFile.isDirectory() )
391         {
392             File[] aChildrenFiles = aFile.listFiles();
393             int nFileCount = aChildrenFiles.length;
394             for( int i = 0 ; i < nFileCount ; i++ )
395             {
396                 File aChildrenFile = aChildrenFiles[i];
397                 boolean bSuccess = deleteRecursively( aChildrenFile );
398                 if( !bSuccess )
399                     return false;
400             }
401         }
402 
403         return aFile.delete();
404     }
405 }
406 
407