xref: /AOO41X/main/l10ntools/source/gsiconv.cxx (revision 3cd96b95fb0ad23ccdd883f9b15a685c459d45ca)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_l10ntools.hxx"
26 #include <stdio.h>
27 #include <tools/fsys.hxx>
28 #include <tools/stream.hxx>
29 
30 // local includes
31 #include "utf8conv.hxx"
32 
33 #define GSI_FILE_UNKNOWN        0x0000
34 #define GSI_FILE_OLDSTYLE       0x0001
35 #define GSI_FILE_L10NFRAMEWORK  0x0002
36 
37 /*****************************************************************************/
GetGSIFileType(SvStream & rStream)38 sal_uInt16 GetGSIFileType( SvStream &rStream )
39 /*****************************************************************************/
40 {
41     sal_uInt16 nFileType = GSI_FILE_UNKNOWN;
42 
43     sal_uLong nPos( rStream.Tell());
44     rStream.Seek( STREAM_SEEK_TO_BEGIN );
45 
46     ByteString sLine;
47     while( !rStream.IsEof() && !sLine.Len())
48         rStream.ReadLine( sLine );
49 
50     if( sLine.Len()) {
51         if( sLine.Search( "($$)" ) != STRING_NOTFOUND )
52             nFileType = GSI_FILE_OLDSTYLE;
53         else
54             nFileType = GSI_FILE_L10NFRAMEWORK;
55     }
56 
57     rStream.Seek( nPos );
58 
59     return nFileType;
60 }
61 
62 /*****************************************************************************/
GetGSILineId(const ByteString & rLine,sal_uInt16 nFileType)63 ByteString GetGSILineId( const ByteString &rLine, sal_uInt16 nFileType )
64 /*****************************************************************************/
65 {
66     ByteString sId;
67     switch ( nFileType ) {
68         case GSI_FILE_OLDSTYLE:
69             sId = rLine;
70             sId.SearchAndReplaceAll( "($$)", "\t" );
71             sId = sId.GetToken( 0, '\t' );
72         break;
73 
74         case GSI_FILE_L10NFRAMEWORK:
75             sId = rLine.GetToken( 0, '\t' );
76             sId += "\t";
77             sId += rLine.GetToken( 1, '\t' );
78             sId += "\t";
79             sId += rLine.GetToken( 4, '\t' );
80             sId += "\t";
81             sId += rLine.GetToken( 5, '\t' );
82         break;
83     }
84     return sId;
85 }
86 
87 /*****************************************************************************/
GetGSILineLangId(const ByteString & rLine,sal_uInt16 nFileType)88 ByteString GetGSILineLangId( const ByteString &rLine, sal_uInt16 nFileType )
89 /*****************************************************************************/
90 {
91     ByteString sLangId;
92     switch ( nFileType ) {
93         case GSI_FILE_OLDSTYLE:
94             sLangId = rLine;
95             sLangId.SearchAndReplaceAll( "($$)", "\t" );
96             sLangId = sLangId.GetToken( 2, '\t' );
97         break;
98 
99         case GSI_FILE_L10NFRAMEWORK:
100             sLangId = rLine.GetToken( 9, '\t' );
101         break;
102     }
103     return sLangId;
104 }
105 
106 /*****************************************************************************/
ConvertGSILine(sal_Bool bToUTF8,ByteString & rLine,rtl_TextEncoding nEncoding,sal_uInt16 nFileType)107 void ConvertGSILine( sal_Bool bToUTF8, ByteString &rLine,
108         rtl_TextEncoding nEncoding, sal_uInt16 nFileType )
109 /*****************************************************************************/
110 {
111     switch ( nFileType ) {
112         case GSI_FILE_OLDSTYLE:
113             if ( bToUTF8 )
114                 rLine = UTF8Converter::ConvertToUTF8( rLine, nEncoding );
115             else
116                 rLine = UTF8Converter::ConvertFromUTF8( rLine, nEncoding );
117         break;
118 
119         case GSI_FILE_L10NFRAMEWORK: {
120             ByteString sConverted;
121             for ( sal_uInt16 i = 0; i < rLine.GetTokenCount( '\t' ); i++ ) {
122                 ByteString sToken = rLine.GetToken( i, '\t' );
123                 if (( i > 9 ) && ( i < 14 )) {
124                     if( bToUTF8 )
125                         sToken = UTF8Converter::ConvertToUTF8( sToken, nEncoding );
126                     else
127                         sToken = UTF8Converter::ConvertFromUTF8( sToken, nEncoding );
128                 }
129                 if ( i )
130                     sConverted += "\t";
131                 sConverted += sToken;
132             }
133             rLine = sConverted;
134         }
135         break;
136     }
137 }
138 
139 /*****************************************************************************/
Help()140 void Help()
141 /*****************************************************************************/
142 {
143     fprintf( stdout, "\n" );
144     fprintf( stdout, "gsiconv (c)1999 by StarOffice Entwicklungs GmbH\n" );
145     fprintf( stdout, "===============================================\n" );
146     fprintf( stdout, "\n" );
147     fprintf( stdout, "gsiconv converts strings in GSI-Files (Gutschmitt Interface) from or to UTF-8\n" );
148     fprintf( stdout, "\n" );
149     fprintf( stdout, "Syntax: gsiconv (-t|-f langid charset)|(-p n) filename\n" );
150     fprintf( stdout, "Switches: -t   => conversion from charset to UTF-8\n" );
151     fprintf( stdout, "          -f   => conversion from UTF-8 to charset\n" );
152     fprintf( stdout, "          -p n => creates several files with ca. n lines\n" );
153     fprintf( stdout, "\n" );
154     fprintf( stdout, "Allowed charsets:\n" );
155     fprintf( stdout, "          MS_932  => Japanese\n" );
156     fprintf( stdout, "          MS_936  => Chinese Simplified\n" );
157     fprintf( stdout, "          MS_949  => Korean\n" );
158     fprintf( stdout, "          MS_950  => Chinese Traditional\n" );
159     fprintf( stdout, "          MS_1250 => East Europe\n" );
160     fprintf( stdout, "          MS_1251 => Cyrillic\n" );
161     fprintf( stdout, "          MS_1252 => West Europe\n" );
162     fprintf( stdout, "          MS_1253 => Greek\n" );
163     fprintf( stdout, "          MS_1254 => Turkish\n" );
164     fprintf( stdout, "          MS_1255 => Hebrew\n" );
165     fprintf( stdout, "          MS_1256 => Arabic\n" );
166     fprintf( stdout, "\n" );
167     fprintf( stdout, "Allowed langids:\n" );
168     fprintf( stdout, "          1  => ENGLISH_US\n" );
169     fprintf( stdout, "          3  => PORTUGUESE \n" );
170     fprintf( stdout, "          4  => GERMAN_DE (new german style)\n" );
171     fprintf( stdout, "          7  => RUSSIAN\n" );
172     fprintf( stdout, "          30 => GREEK\n" );
173     fprintf( stdout, "          31 => DUTCH\n" );
174     fprintf( stdout, "          33 => FRENCH\n" );
175     fprintf( stdout, "          34 => SPANISH\n" );
176     fprintf( stdout, "          35 => FINNISH\n" );
177     fprintf( stdout, "          36 => HUNGARIAN\n" );
178     fprintf( stdout, "          39 => ITALIAN\n" );
179     fprintf( stdout, "          42 => CZECH\n" );
180     fprintf( stdout, "          44 => ENGLISH (UK)\n" );
181     fprintf( stdout, "          45 => DANISH\n" );
182     fprintf( stdout, "          46 => SWEDISH\n" );
183     fprintf( stdout, "          47 => NORWEGIAN\n" );
184     fprintf( stdout, "          49 => GERMAN (old german style)\n" );
185     fprintf( stdout, "          55 => PORTUGUESE_BRAZILIAN\n" );
186     fprintf( stdout, "          81 => JAPANESE\n" );
187     fprintf( stdout, "          82 => KOREAN\n" );
188     fprintf( stdout, "          86 => CHINESE_SIMPLIFIED\n" );
189     fprintf( stdout, "          88 => CHINESE_TRADITIONAL\n" );
190     fprintf( stdout, "          90 => TURKISH\n" );
191     fprintf( stdout, "          96 => ARABIC\n" );
192     fprintf( stdout, "          97 => HEBREW\n" );
193     fprintf( stdout, "\n" );
194 }
195 
196 /*****************************************************************************/
197 #if defined(UNX) || defined(OS2)
main(int argc,char * argv[])198 int main( int argc, char *argv[] )
199 #else
200 int _cdecl main( int argc, char *argv[] )
201 #endif
202 /*****************************************************************************/
203 {
204     if (( argc != 5 ) && ( argc != 4 )) {
205         Help();
206         exit ( 0 );
207     }
208 
209     if ( argc == 4 ) {
210         if ( ByteString( argv[ 1 ] ) == "-p" ) {
211 
212             DirEntry aSource = DirEntry( String( argv[ 3 ], RTL_TEXTENCODING_ASCII_US ));
213             if ( !aSource.Exists()) {
214                 fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
215                 exit ( 2 );
216             }
217 
218             DirEntry aOutput( aSource );
219 
220             String sBase = aOutput.GetBase();
221             String sExt = aOutput.GetExtension();
222 
223             String sGSI( argv[ 3 ], RTL_TEXTENCODING_ASCII_US );
224             SvFileStream aGSI( sGSI, STREAM_STD_READ  );
225             if ( !aGSI.IsOpen()) {
226                 fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
227                 exit ( 3 );
228             }
229 
230             sal_uInt16 nFileType( GetGSIFileType( aGSI ));
231 
232             sal_uLong nMaxLines = (sal_uLong) ByteString( argv[ 2 ] ).ToInt64();
233             if ( !nMaxLines ) {
234                 fprintf( stderr, "\nERROR: Linecount must be at least 1!\n\n" );
235                 exit ( 3 );
236             }
237 
238             ByteString sGSILine;
239             ByteString sOldId;
240             sal_uLong nLine = 0;
241             sal_uLong nOutputFile = 1;
242 
243             String sOutput( sBase );
244             sOutput += String( "_", RTL_TEXTENCODING_ASCII_US );
245             sOutput += String::CreateFromInt64( nOutputFile );
246             if ( sExt.Len()) {
247                 sOutput += String( ".", RTL_TEXTENCODING_ASCII_US );
248                 sOutput += sExt;
249             }
250             nOutputFile ++;
251 
252             aOutput.SetName( sOutput );
253             SvFileStream aOutputStream( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC );
254 
255             while ( !aGSI.IsEof()) {
256 
257                 aGSI.ReadLine( sGSILine );
258                 ByteString sId( GetGSILineId( sGSILine, nFileType ));
259 
260                 nLine++;
261 
262                 if (( nLine >= nMaxLines ) && ( sId != sOldId )) {
263                     aOutputStream.Close();
264 
265                     ByteString sText( aOutput.GetFull(), gsl_getSystemTextEncoding());
266                     sText += " with ";
267                     sText += ByteString::CreateFromInt64( nLine );
268                     sText += " lines written.";
269 
270                     fprintf( stdout, "%s\n", sText.GetBuffer());
271                     String sOutput1( sBase );
272                     sOutput1 += String( "_", RTL_TEXTENCODING_ASCII_US );
273                     sOutput1 += String::CreateFromInt64( nOutputFile );
274                     if ( sExt.Len()) {
275                         sOutput1 += String( ".", RTL_TEXTENCODING_ASCII_US );
276                         sOutput1 += sExt;
277                     }
278                     nOutputFile ++;
279 
280                     aOutput.SetName( sOutput1 );
281 
282                     aOutputStream.Open( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC );
283                     nLine = 0;
284                 }
285 
286                 aOutputStream.WriteLine( sGSILine );
287 
288                 sOldId = sId;
289             }
290 
291             aGSI.Close();
292             aOutputStream.Close();
293 
294             ByteString sText( aOutput.GetFull(), RTL_TEXTENCODING_ASCII_US );
295             sText += " with ";
296             sText += ByteString::CreateFromInt64( nLine );
297             sText += " lines written.";
298         }
299         else {
300             Help();
301             exit( 1 );
302         }
303     }
304     else {
305         if ( ByteString( argv[ 1 ] ) == "-t" || ByteString( argv[ 1 ] ) == "-f" ) {
306             rtl_TextEncoding nEncoding;
307 
308             ByteString sCurLangId( argv[ 2 ] );
309 
310             ByteString sCharset( argv[ 3 ] );
311             sCharset.ToUpperAscii();
312 
313             if      ( sCharset == "MS_932" )    nEncoding = RTL_TEXTENCODING_MS_932;
314             else if ( sCharset == "MS_936" )    nEncoding = RTL_TEXTENCODING_MS_936;
315             else if ( sCharset == "MS_949" )    nEncoding = RTL_TEXTENCODING_MS_949;
316             else if ( sCharset == "MS_950" )    nEncoding = RTL_TEXTENCODING_MS_950;
317             else if ( sCharset == "MS_1250" )   nEncoding = RTL_TEXTENCODING_MS_1250;
318             else if ( sCharset == "MS_1251" )   nEncoding = RTL_TEXTENCODING_MS_1251;
319             else if ( sCharset == "MS_1252" )   nEncoding = RTL_TEXTENCODING_MS_1252;
320             else if ( sCharset == "MS_1253" )   nEncoding = RTL_TEXTENCODING_MS_1253;
321             else if ( sCharset == "MS_1254" )   nEncoding = RTL_TEXTENCODING_MS_1254;
322             else if ( sCharset == "MS_1255" )   nEncoding = RTL_TEXTENCODING_MS_1255;
323             else if ( sCharset == "MS_1256" )   nEncoding = RTL_TEXTENCODING_MS_1256;
324             else if ( sCharset == "MS_1257" )   nEncoding = RTL_TEXTENCODING_MS_1257;
325             else if ( sCharset == "UTF8" )      nEncoding = RTL_TEXTENCODING_UTF8;
326 
327             else {
328                 Help();
329                 exit ( 1 );
330             }
331 
332             DirEntry aSource = DirEntry( String( argv[ 4 ], RTL_TEXTENCODING_ASCII_US ));
333             if ( !aSource.Exists()) {
334                 fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
335                 exit ( 2 );
336             }
337 
338             String sGSI( argv[ 4 ], RTL_TEXTENCODING_ASCII_US );
339             SvFileStream aGSI( sGSI, STREAM_STD_READ );
340             if ( !aGSI.IsOpen()) {
341                 fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
342                 exit ( 3 );
343             }
344             sal_uInt16 nFileType( GetGSIFileType( aGSI ));
345 
346             ByteString sGSILine;
347             while ( !aGSI.IsEof()) {
348 
349                 aGSI.ReadLine( sGSILine );
350                 ByteString sLangId( GetGSILineLangId( sGSILine, nFileType ));
351                 if ( sLangId == sCurLangId )
352                     ConvertGSILine(( ByteString( argv[ 1 ] ) == "-t" ), sGSILine, nEncoding, nFileType );
353 
354                 fprintf( stdout, "%s\n", sGSILine.GetBuffer());
355             }
356 
357             aGSI.Close();
358         }
359         else {
360             Help();
361             exit( 1 );
362         }
363     }
364     return 0;
365 }
366