/**************************************************************
 * 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * 
 *************************************************************/



// MARKER(update_precomp.py): autogen include statement, do not remove
#include "precompiled_l10ntools.hxx"
#include <stdio.h>
#include <tools/fsys.hxx>
#include <tools/stream.hxx>

// local includes
#include "utf8conv.hxx"

#define GSI_FILE_UNKNOWN		0x0000
#define GSI_FILE_OLDSTYLE		0x0001
#define GSI_FILE_L10NFRAMEWORK	0x0002

/*****************************************************************************/
sal_uInt16 GetGSIFileType( SvStream &rStream )
/*****************************************************************************/
{
	sal_uInt16 nFileType = GSI_FILE_UNKNOWN;

	sal_uLong nPos( rStream.Tell());
	rStream.Seek( STREAM_SEEK_TO_BEGIN );

	ByteString sLine;
	while( !rStream.IsEof() && !sLine.Len())
		rStream.ReadLine( sLine );

	if( sLine.Len()) {
		if( sLine.Search( "($$)" ) != STRING_NOTFOUND )
			nFileType = GSI_FILE_OLDSTYLE;
		else
			nFileType = GSI_FILE_L10NFRAMEWORK;
	}

	rStream.Seek( nPos );

	return nFileType;
}

/*****************************************************************************/
ByteString GetGSILineId( const ByteString &rLine, sal_uInt16 nFileType )
/*****************************************************************************/
{
	ByteString sId;
	switch ( nFileType ) {
		case GSI_FILE_OLDSTYLE:
			sId = rLine;
			sId.SearchAndReplaceAll( "($$)", "\t" );
			sId = sId.GetToken( 0, '\t' );
  		break;

		case GSI_FILE_L10NFRAMEWORK:
			sId = rLine.GetToken( 0, '\t' );
			sId += "\t";
			sId += rLine.GetToken( 1, '\t' );
			sId += "\t";
			sId += rLine.GetToken( 4, '\t' );
			sId += "\t";
			sId += rLine.GetToken( 5, '\t' );
  		break;
	}
	return sId;
}

/*****************************************************************************/
ByteString GetGSILineLangId( const ByteString &rLine, sal_uInt16 nFileType )
/*****************************************************************************/
{
	ByteString sLangId;
	switch ( nFileType ) {
		case GSI_FILE_OLDSTYLE:
			sLangId = rLine;
			sLangId.SearchAndReplaceAll( "($$)", "\t" );
			sLangId = sLangId.GetToken( 2, '\t' );
  		break;

		case GSI_FILE_L10NFRAMEWORK:
			sLangId = rLine.GetToken( 9, '\t' );
  		break;
	}
	return sLangId;
}

/*****************************************************************************/
void ConvertGSILine( sal_Bool bToUTF8, ByteString &rLine,
		rtl_TextEncoding nEncoding,	sal_uInt16 nFileType )
/*****************************************************************************/
{
	switch ( nFileType ) {
		case GSI_FILE_OLDSTYLE:
			if ( bToUTF8 )
				rLine = UTF8Converter::ConvertToUTF8( rLine, nEncoding );
			else
				rLine = UTF8Converter::ConvertFromUTF8( rLine, nEncoding );
		break;

		case GSI_FILE_L10NFRAMEWORK: {
			ByteString sConverted;
			for ( sal_uInt16 i = 0; i < rLine.GetTokenCount( '\t' ); i++ ) {
				ByteString sToken = rLine.GetToken( i, '\t' );
				if (( i > 9 ) && ( i < 14 )) {
					if( bToUTF8 )
						sToken = UTF8Converter::ConvertToUTF8( sToken, nEncoding );
					else
						sToken = UTF8Converter::ConvertFromUTF8( sToken, nEncoding );
				}
				if ( i )
					sConverted += "\t";
				sConverted += sToken;
			}
			rLine = sConverted;
		}
		break;
	}
}

/*****************************************************************************/
void Help()
/*****************************************************************************/
{
	fprintf( stdout, "\n" );
	fprintf( stdout, "gsiconv (c)1999 by StarOffice Entwicklungs GmbH\n" );
	fprintf( stdout, "===============================================\n" );
	fprintf( stdout, "\n" );
	fprintf( stdout, "gsiconv converts strings in GSI-Files (Gutschmitt Interface) from or to UTF-8\n" );
	fprintf( stdout, "\n" );
	fprintf( stdout, "Syntax: gsiconv (-t|-f langid charset)|(-p n) filename\n" );
	fprintf( stdout, "Switches: -t   => conversion from charset to UTF-8\n" );
	fprintf( stdout, "          -f   => conversion from UTF-8 to charset\n" );
	fprintf( stdout, "          -p n => creates several files with ca. n lines\n" );
	fprintf( stdout, "\n" );
	fprintf( stdout, "Allowed charsets:\n" );
	fprintf( stdout, "          MS_932  => Japanese\n" );
	fprintf( stdout, "          MS_936  => Chinese Simplified\n" );
	fprintf( stdout, "          MS_949  => Korean\n" );
	fprintf( stdout, "          MS_950  => Chinese Traditional\n" );
	fprintf( stdout, "          MS_1250 => East Europe\n" );
	fprintf( stdout, "          MS_1251 => Cyrillic\n" );
	fprintf( stdout, "          MS_1252 => West Europe\n" );
	fprintf( stdout, "          MS_1253 => Greek\n" );
	fprintf( stdout, "          MS_1254 => Turkish\n" );
	fprintf( stdout, "          MS_1255 => Hebrew\n" );
	fprintf( stdout, "          MS_1256 => Arabic\n" );
	fprintf( stdout, "\n" );
	fprintf( stdout, "Allowed langids:\n" );
	fprintf( stdout, "          1  => ENGLISH_US\n" );
	fprintf( stdout, "          3  => PORTUGUESE \n" );
	fprintf( stdout, "          4  => GERMAN_DE (new german style)\n" );
	fprintf( stdout, "          7  => RUSSIAN\n" );
	fprintf( stdout, "          30 => GREEK\n" );
	fprintf( stdout, "          31 => DUTCH\n" );
	fprintf( stdout, "          33 => FRENCH\n" );
	fprintf( stdout, "          34 => SPANISH\n" );
	fprintf( stdout, "          35 => FINNISH\n" );
	fprintf( stdout, "          36 => HUNGARIAN\n" );
	fprintf( stdout, "          39 => ITALIAN\n" );
	fprintf( stdout, "          42 => CZECH\n" );
	fprintf( stdout, "          44 => ENGLISH (UK)\n" );
	fprintf( stdout, "          45 => DANISH\n" );
	fprintf( stdout, "          46 => SWEDISH\n" );
	fprintf( stdout, "          47 => NORWEGIAN\n" );
	fprintf( stdout, "          49 => GERMAN (old german style)\n" );
	fprintf( stdout, "          55 => PORTUGUESE_BRAZILIAN\n" );
	fprintf( stdout, "          81 => JAPANESE\n" );
	fprintf( stdout, "          82 => KOREAN\n" );
	fprintf( stdout, "          86 => CHINESE_SIMPLIFIED\n" );
	fprintf( stdout, "          88 => CHINESE_TRADITIONAL\n" );
	fprintf( stdout, "          90 => TURKISH\n" );
	fprintf( stdout, "          96 => ARABIC\n" );
	fprintf( stdout, "          97 => HEBREW\n" );
	fprintf( stdout, "\n" );
}

/*****************************************************************************/
#if defined(UNX) || defined(OS2)
int main( int argc, char *argv[] )
#else
int _cdecl main( int argc, char *argv[] )
#endif
/*****************************************************************************/
{
	if (( argc != 5 ) && ( argc != 4 )) {
		Help();
		exit ( 0 );
	}

	if ( argc == 4 ) {
		if ( ByteString( argv[ 1 ] ) == "-p" ) {

			DirEntry aSource = DirEntry( String( argv[ 3 ], RTL_TEXTENCODING_ASCII_US ));
			if ( !aSource.Exists()) {
				fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
				exit ( 2 );
			}

			DirEntry aOutput( aSource );

			String sBase = aOutput.GetBase();
			String sExt = aOutput.GetExtension();

			String sGSI( argv[ 3 ], RTL_TEXTENCODING_ASCII_US );
			SvFileStream aGSI( sGSI, STREAM_STD_READ  );
			if ( !aGSI.IsOpen()) {
				fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
				exit ( 3 );
			}

			sal_uInt16 nFileType( GetGSIFileType( aGSI ));

			sal_uLong nMaxLines = (sal_uLong) ByteString( argv[ 2 ] ).ToInt64();
			if ( !nMaxLines ) {
				fprintf( stderr, "\nERROR: Linecount must be at least 1!\n\n" );
				exit ( 3 );
			}

			ByteString sGSILine;
			ByteString sOldId;
			sal_uLong nLine = 0;
			sal_uLong nOutputFile = 1;

			String sOutput( sBase );
			sOutput += String( "_", RTL_TEXTENCODING_ASCII_US );
			sOutput += String::CreateFromInt64( nOutputFile );
			if ( sExt.Len()) {
				sOutput += String( ".", RTL_TEXTENCODING_ASCII_US );
				sOutput += sExt;
			}
			nOutputFile ++;

			aOutput.SetName( sOutput );
			SvFileStream aOutputStream( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC );

			while ( !aGSI.IsEof()) {

				aGSI.ReadLine( sGSILine );
				ByteString sId( GetGSILineId( sGSILine, nFileType ));

				nLine++;

				if (( nLine >= nMaxLines ) && ( sId != sOldId )) {
					aOutputStream.Close();

					ByteString sText( aOutput.GetFull(), gsl_getSystemTextEncoding());
					sText += " with ";
					sText += ByteString::CreateFromInt64( nLine );
					sText += " lines written.";

					fprintf( stdout, "%s\n", sText.GetBuffer());
					String sOutput1( sBase );
					sOutput1 += String( "_", RTL_TEXTENCODING_ASCII_US );
					sOutput1 += String::CreateFromInt64( nOutputFile );
					if ( sExt.Len()) {
						sOutput1 += String( ".", RTL_TEXTENCODING_ASCII_US );
						sOutput1 += sExt;
					}
					nOutputFile ++;

					aOutput.SetName( sOutput1 );

					aOutputStream.Open( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC );
					nLine = 0;
				}

				aOutputStream.WriteLine( sGSILine );

				sOldId = sId;
			}

			aGSI.Close();
			aOutputStream.Close();

			ByteString sText( aOutput.GetFull(), RTL_TEXTENCODING_ASCII_US );
			sText += " with ";
			sText += ByteString::CreateFromInt64( nLine );
			sText += " lines written.";
		}
		else {
			Help();
			exit( 1 );
		}
	}
	else {
		if ( ByteString( argv[ 1 ] ) == "-t" || ByteString( argv[ 1 ] ) == "-f" ) {
			rtl_TextEncoding nEncoding;

			ByteString sCurLangId( argv[ 2 ] );

			ByteString sCharset( argv[ 3 ] );
			sCharset.ToUpperAscii();

			if 		( sCharset == "MS_932" ) 	nEncoding = RTL_TEXTENCODING_MS_932;
			else if ( sCharset == "MS_936" ) 	nEncoding = RTL_TEXTENCODING_MS_936;
			else if ( sCharset == "MS_949" ) 	nEncoding = RTL_TEXTENCODING_MS_949;
			else if ( sCharset == "MS_950" ) 	nEncoding = RTL_TEXTENCODING_MS_950;
			else if ( sCharset == "MS_1250" ) 	nEncoding = RTL_TEXTENCODING_MS_1250;
			else if ( sCharset == "MS_1251" ) 	nEncoding = RTL_TEXTENCODING_MS_1251;
			else if ( sCharset == "MS_1252" ) 	nEncoding = RTL_TEXTENCODING_MS_1252;
			else if ( sCharset == "MS_1253" ) 	nEncoding = RTL_TEXTENCODING_MS_1253;
			else if ( sCharset == "MS_1254" ) 	nEncoding = RTL_TEXTENCODING_MS_1254;
			else if ( sCharset == "MS_1255" ) 	nEncoding = RTL_TEXTENCODING_MS_1255;
			else if ( sCharset == "MS_1256" ) 	nEncoding = RTL_TEXTENCODING_MS_1256;
			else if ( sCharset == "MS_1257" ) 	nEncoding = RTL_TEXTENCODING_MS_1257;
			else if ( sCharset == "UTF8" )		nEncoding = RTL_TEXTENCODING_UTF8;

			else {
				Help();
				exit ( 1 );
			}

			DirEntry aSource = DirEntry( String( argv[ 4 ], RTL_TEXTENCODING_ASCII_US ));
			if ( !aSource.Exists()) {
				fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
				exit ( 2 );
			}

			String sGSI( argv[ 4 ], RTL_TEXTENCODING_ASCII_US );
			SvFileStream aGSI( sGSI, STREAM_STD_READ );
			if ( !aGSI.IsOpen()) {
				fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
				exit ( 3 );
			}
			sal_uInt16 nFileType( GetGSIFileType( aGSI ));

			ByteString sGSILine;
			while ( !aGSI.IsEof()) {

				aGSI.ReadLine( sGSILine );
				ByteString sLangId( GetGSILineLangId( sGSILine, nFileType ));
				if ( sLangId == sCurLangId )
					ConvertGSILine(( ByteString( argv[ 1 ] ) == "-t" ), sGSILine, nEncoding, nFileType );

				fprintf( stdout, "%s\n", sGSILine.GetBuffer());
			}

			aGSI.Close();
		}
		else {
			Help();
			exit( 1 );
		}
	}
	return 0;
}
