1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_l10ntools.hxx" 26 #include <stdio.h> 27 #include <tools/fsys.hxx> 28 #include <tools/stream.hxx> 29 30 // local includes 31 #include "utf8conv.hxx" 32 33 #define GSI_FILE_UNKNOWN 0x0000 34 #define GSI_FILE_OLDSTYLE 0x0001 35 #define GSI_FILE_L10NFRAMEWORK 0x0002 36 37 /*****************************************************************************/ 38 sal_uInt16 GetGSIFileType( SvStream &rStream ) 39 /*****************************************************************************/ 40 { 41 sal_uInt16 nFileType = GSI_FILE_UNKNOWN; 42 43 sal_uLong nPos( rStream.Tell()); 44 rStream.Seek( STREAM_SEEK_TO_BEGIN ); 45 46 ByteString sLine; 47 while( !rStream.IsEof() && !sLine.Len()) 48 rStream.ReadLine( sLine ); 49 50 if( sLine.Len()) { 51 if( sLine.Search( "($$)" ) != STRING_NOTFOUND ) 52 nFileType = GSI_FILE_OLDSTYLE; 53 else 54 nFileType = GSI_FILE_L10NFRAMEWORK; 55 } 56 57 rStream.Seek( nPos ); 58 59 return nFileType; 60 } 61 62 /*****************************************************************************/ 63 ByteString GetGSILineId( const ByteString &rLine, sal_uInt16 nFileType ) 64 /*****************************************************************************/ 65 { 66 ByteString sId; 67 switch ( nFileType ) { 68 case GSI_FILE_OLDSTYLE: 69 sId = rLine; 70 sId.SearchAndReplaceAll( "($$)", "\t" ); 71 sId = sId.GetToken( 0, '\t' ); 72 break; 73 74 case GSI_FILE_L10NFRAMEWORK: 75 sId = rLine.GetToken( 0, '\t' ); 76 sId += "\t"; 77 sId += rLine.GetToken( 1, '\t' ); 78 sId += "\t"; 79 sId += rLine.GetToken( 4, '\t' ); 80 sId += "\t"; 81 sId += rLine.GetToken( 5, '\t' ); 82 break; 83 } 84 return sId; 85 } 86 87 /*****************************************************************************/ 88 ByteString GetGSILineLangId( const ByteString &rLine, sal_uInt16 nFileType ) 89 /*****************************************************************************/ 90 { 91 ByteString sLangId; 92 switch ( nFileType ) { 93 case GSI_FILE_OLDSTYLE: 94 sLangId = rLine; 95 sLangId.SearchAndReplaceAll( "($$)", "\t" ); 96 sLangId = sLangId.GetToken( 2, '\t' ); 97 break; 98 99 case GSI_FILE_L10NFRAMEWORK: 100 sLangId = rLine.GetToken( 9, '\t' ); 101 break; 102 } 103 return sLangId; 104 } 105 106 /*****************************************************************************/ 107 void ConvertGSILine( sal_Bool bToUTF8, ByteString &rLine, 108 rtl_TextEncoding nEncoding, sal_uInt16 nFileType ) 109 /*****************************************************************************/ 110 { 111 switch ( nFileType ) { 112 case GSI_FILE_OLDSTYLE: 113 if ( bToUTF8 ) 114 rLine = UTF8Converter::ConvertToUTF8( rLine, nEncoding ); 115 else 116 rLine = UTF8Converter::ConvertFromUTF8( rLine, nEncoding ); 117 break; 118 119 case GSI_FILE_L10NFRAMEWORK: { 120 ByteString sConverted; 121 for ( sal_uInt16 i = 0; i < rLine.GetTokenCount( '\t' ); i++ ) { 122 ByteString sToken = rLine.GetToken( i, '\t' ); 123 if (( i > 9 ) && ( i < 14 )) { 124 if( bToUTF8 ) 125 sToken = UTF8Converter::ConvertToUTF8( sToken, nEncoding ); 126 else 127 sToken = UTF8Converter::ConvertFromUTF8( sToken, nEncoding ); 128 } 129 if ( i ) 130 sConverted += "\t"; 131 sConverted += sToken; 132 } 133 rLine = sConverted; 134 } 135 break; 136 } 137 } 138 139 /*****************************************************************************/ 140 void Help() 141 /*****************************************************************************/ 142 { 143 fprintf( stdout, "\n" ); 144 fprintf( stdout, "gsiconv (c)1999 by StarOffice Entwicklungs GmbH\n" ); 145 fprintf( stdout, "===============================================\n" ); 146 fprintf( stdout, "\n" ); 147 fprintf( stdout, "gsiconv converts strings in GSI-Files (Gutschmitt Interface) from or to UTF-8\n" ); 148 fprintf( stdout, "\n" ); 149 fprintf( stdout, "Syntax: gsiconv (-t|-f langid charset)|(-p n) filename\n" ); 150 fprintf( stdout, "Switches: -t => conversion from charset to UTF-8\n" ); 151 fprintf( stdout, " -f => conversion from UTF-8 to charset\n" ); 152 fprintf( stdout, " -p n => creates several files with ca. n lines\n" ); 153 fprintf( stdout, "\n" ); 154 fprintf( stdout, "Allowed charsets:\n" ); 155 fprintf( stdout, " MS_932 => Japanese\n" ); 156 fprintf( stdout, " MS_936 => Chinese Simplified\n" ); 157 fprintf( stdout, " MS_949 => Korean\n" ); 158 fprintf( stdout, " MS_950 => Chinese Traditional\n" ); 159 fprintf( stdout, " MS_1250 => East Europe\n" ); 160 fprintf( stdout, " MS_1251 => Cyrillic\n" ); 161 fprintf( stdout, " MS_1252 => West Europe\n" ); 162 fprintf( stdout, " MS_1253 => Greek\n" ); 163 fprintf( stdout, " MS_1254 => Turkish\n" ); 164 fprintf( stdout, " MS_1255 => Hebrew\n" ); 165 fprintf( stdout, " MS_1256 => Arabic\n" ); 166 fprintf( stdout, "\n" ); 167 fprintf( stdout, "Allowed langids:\n" ); 168 fprintf( stdout, " 1 => ENGLISH_US\n" ); 169 fprintf( stdout, " 3 => PORTUGUESE \n" ); 170 fprintf( stdout, " 4 => GERMAN_DE (new german style)\n" ); 171 fprintf( stdout, " 7 => RUSSIAN\n" ); 172 fprintf( stdout, " 30 => GREEK\n" ); 173 fprintf( stdout, " 31 => DUTCH\n" ); 174 fprintf( stdout, " 33 => FRENCH\n" ); 175 fprintf( stdout, " 34 => SPANISH\n" ); 176 fprintf( stdout, " 35 => FINNISH\n" ); 177 fprintf( stdout, " 36 => HUNGARIAN\n" ); 178 fprintf( stdout, " 39 => ITALIAN\n" ); 179 fprintf( stdout, " 42 => CZECH\n" ); 180 fprintf( stdout, " 44 => ENGLISH (UK)\n" ); 181 fprintf( stdout, " 45 => DANISH\n" ); 182 fprintf( stdout, " 46 => SWEDISH\n" ); 183 fprintf( stdout, " 47 => NORWEGIAN\n" ); 184 fprintf( stdout, " 49 => GERMAN (old german style)\n" ); 185 fprintf( stdout, " 55 => PORTUGUESE_BRAZILIAN\n" ); 186 fprintf( stdout, " 81 => JAPANESE\n" ); 187 fprintf( stdout, " 82 => KOREAN\n" ); 188 fprintf( stdout, " 86 => CHINESE_SIMPLIFIED\n" ); 189 fprintf( stdout, " 88 => CHINESE_TRADITIONAL\n" ); 190 fprintf( stdout, " 90 => TURKISH\n" ); 191 fprintf( stdout, " 96 => ARABIC\n" ); 192 fprintf( stdout, " 97 => HEBREW\n" ); 193 fprintf( stdout, "\n" ); 194 } 195 196 /*****************************************************************************/ 197 #if defined(UNX) || defined(OS2) 198 int main( int argc, char *argv[] ) 199 #else 200 int _cdecl main( int argc, char *argv[] ) 201 #endif 202 /*****************************************************************************/ 203 { 204 if (( argc != 5 ) && ( argc != 4 )) { 205 Help(); 206 exit ( 0 ); 207 } 208 209 if ( argc == 4 ) { 210 if ( ByteString( argv[ 1 ] ) == "-p" ) { 211 212 DirEntry aSource = DirEntry( String( argv[ 3 ], RTL_TEXTENCODING_ASCII_US )); 213 if ( !aSource.Exists()) { 214 fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer()); 215 exit ( 2 ); 216 } 217 218 DirEntry aOutput( aSource ); 219 220 String sBase = aOutput.GetBase(); 221 String sExt = aOutput.GetExtension(); 222 223 String sGSI( argv[ 3 ], RTL_TEXTENCODING_ASCII_US ); 224 SvFileStream aGSI( sGSI, STREAM_STD_READ ); 225 if ( !aGSI.IsOpen()) { 226 fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer()); 227 exit ( 3 ); 228 } 229 230 sal_uInt16 nFileType( GetGSIFileType( aGSI )); 231 232 sal_uLong nMaxLines = (sal_uLong) ByteString( argv[ 2 ] ).ToInt64(); 233 if ( !nMaxLines ) { 234 fprintf( stderr, "\nERROR: Linecount must be at least 1!\n\n" ); 235 exit ( 3 ); 236 } 237 238 ByteString sGSILine; 239 ByteString sOldId; 240 sal_uLong nLine = 0; 241 sal_uLong nOutputFile = 1; 242 243 String sOutput( sBase ); 244 sOutput += String( "_", RTL_TEXTENCODING_ASCII_US ); 245 sOutput += String::CreateFromInt64( nOutputFile ); 246 if ( sExt.Len()) { 247 sOutput += String( ".", RTL_TEXTENCODING_ASCII_US ); 248 sOutput += sExt; 249 } 250 nOutputFile ++; 251 252 aOutput.SetName( sOutput ); 253 SvFileStream aOutputStream( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC ); 254 255 while ( !aGSI.IsEof()) { 256 257 aGSI.ReadLine( sGSILine ); 258 ByteString sId( GetGSILineId( sGSILine, nFileType )); 259 260 nLine++; 261 262 if (( nLine >= nMaxLines ) && ( sId != sOldId )) { 263 aOutputStream.Close(); 264 265 ByteString sText( aOutput.GetFull(), gsl_getSystemTextEncoding()); 266 sText += " with "; 267 sText += ByteString::CreateFromInt64( nLine ); 268 sText += " lines written."; 269 270 fprintf( stdout, "%s\n", sText.GetBuffer()); 271 String sOutput1( sBase ); 272 sOutput1 += String( "_", RTL_TEXTENCODING_ASCII_US ); 273 sOutput1 += String::CreateFromInt64( nOutputFile ); 274 if ( sExt.Len()) { 275 sOutput1 += String( ".", RTL_TEXTENCODING_ASCII_US ); 276 sOutput1 += sExt; 277 } 278 nOutputFile ++; 279 280 aOutput.SetName( sOutput1 ); 281 282 aOutputStream.Open( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC ); 283 nLine = 0; 284 } 285 286 aOutputStream.WriteLine( sGSILine ); 287 288 sOldId = sId; 289 } 290 291 aGSI.Close(); 292 aOutputStream.Close(); 293 294 ByteString sText( aOutput.GetFull(), RTL_TEXTENCODING_ASCII_US ); 295 sText += " with "; 296 sText += ByteString::CreateFromInt64( nLine ); 297 sText += " lines written."; 298 } 299 else { 300 Help(); 301 exit( 1 ); 302 } 303 } 304 else { 305 if ( ByteString( argv[ 1 ] ) == "-t" || ByteString( argv[ 1 ] ) == "-f" ) { 306 rtl_TextEncoding nEncoding; 307 308 ByteString sCurLangId( argv[ 2 ] ); 309 310 ByteString sCharset( argv[ 3 ] ); 311 sCharset.ToUpperAscii(); 312 313 if ( sCharset == "MS_932" ) nEncoding = RTL_TEXTENCODING_MS_932; 314 else if ( sCharset == "MS_936" ) nEncoding = RTL_TEXTENCODING_MS_936; 315 else if ( sCharset == "MS_949" ) nEncoding = RTL_TEXTENCODING_MS_949; 316 else if ( sCharset == "MS_950" ) nEncoding = RTL_TEXTENCODING_MS_950; 317 else if ( sCharset == "MS_1250" ) nEncoding = RTL_TEXTENCODING_MS_1250; 318 else if ( sCharset == "MS_1251" ) nEncoding = RTL_TEXTENCODING_MS_1251; 319 else if ( sCharset == "MS_1252" ) nEncoding = RTL_TEXTENCODING_MS_1252; 320 else if ( sCharset == "MS_1253" ) nEncoding = RTL_TEXTENCODING_MS_1253; 321 else if ( sCharset == "MS_1254" ) nEncoding = RTL_TEXTENCODING_MS_1254; 322 else if ( sCharset == "MS_1255" ) nEncoding = RTL_TEXTENCODING_MS_1255; 323 else if ( sCharset == "MS_1256" ) nEncoding = RTL_TEXTENCODING_MS_1256; 324 else if ( sCharset == "MS_1257" ) nEncoding = RTL_TEXTENCODING_MS_1257; 325 else if ( sCharset == "UTF8" ) nEncoding = RTL_TEXTENCODING_UTF8; 326 327 else { 328 Help(); 329 exit ( 1 ); 330 } 331 332 DirEntry aSource = DirEntry( String( argv[ 4 ], RTL_TEXTENCODING_ASCII_US )); 333 if ( !aSource.Exists()) { 334 fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer()); 335 exit ( 2 ); 336 } 337 338 String sGSI( argv[ 4 ], RTL_TEXTENCODING_ASCII_US ); 339 SvFileStream aGSI( sGSI, STREAM_STD_READ ); 340 if ( !aGSI.IsOpen()) { 341 fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer()); 342 exit ( 3 ); 343 } 344 sal_uInt16 nFileType( GetGSIFileType( aGSI )); 345 346 ByteString sGSILine; 347 while ( !aGSI.IsEof()) { 348 349 aGSI.ReadLine( sGSILine ); 350 ByteString sLangId( GetGSILineLangId( sGSILine, nFileType )); 351 if ( sLangId == sCurLangId ) 352 ConvertGSILine(( ByteString( argv[ 1 ] ) == "-t" ), sGSILine, nEncoding, nFileType ); 353 354 fprintf( stdout, "%s\n", sGSILine.GetBuffer()); 355 } 356 357 aGSI.Close(); 358 } 359 else { 360 Help(); 361 exit( 1 ); 362 } 363 } 364 return 0; 365 } 366