/**************************************************************
 * 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * 
 *************************************************************/


// MARKER(update_precomp.py): autogen include statement, do not remove
#include "precompiled_basic.hxx"

#include "sbcomp.hxx"
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#if defined UNX
#include <stdlib.h>
#else
#include <math.h>   // atof()
#endif
#include <rtl/math.hxx>
#include <vcl/svapp.hxx>
#include <unotools/charclass.hxx>

#include <runtime.hxx>

SbiScanner::SbiScanner( const ::rtl::OUString& rBuf, StarBASIC* p ) : aBuf( rBuf )
{
	pBasic   = p;
	pLine    = NULL;
	nVal	 = 0;
	eScanType = SbxVARIANT;
	nErrors  = 0;
	nBufPos  = 0;
	nCurCol1 = 0;
	nSavedCol1 = 0;
	nColLock = 0;
	nLine	 = 0;
	nCol1	 = 0;
	nCol2	 = 0;
	nCol     = 0;
	bError	 =
	bAbort   =
	bSpaces  =
	bNumber  =
	bSymbol  =
	bUsedForHilite =
	bCompatible = 
	bVBASupportOn = 
	bPrevLineExtentsComment = sal_False;
	bHash    =
	bErrors  = sal_True;
}

SbiScanner::~SbiScanner()
{}

void SbiScanner::LockColumn()
{
	if( !nColLock++ )
		nSavedCol1 = nCol1;
}

void SbiScanner::UnlockColumn()
{
	if( nColLock )
		nColLock--;
}

void SbiScanner::GenError( SbError code )
{
    if( GetSbData()->bBlockCompilerError )
    {
        bAbort = sal_True;
        return;
    }
	if( !bError && bErrors )
	{
		sal_Bool bRes = sal_True;
		// Nur einen Fehler pro Statement reporten
		bError = sal_True;
		if( pBasic )
		{
			// Falls EXPECTED oder UNEXPECTED kommen sollte, bezieht es sich
			// immer auf das letzte Token, also die Col1 uebernehmen
			sal_uInt16 nc = nColLock ? nSavedCol1 : nCol1;
			switch( code )
			{
				case SbERR_EXPECTED:
				case SbERR_UNEXPECTED:
				case SbERR_SYMBOL_EXPECTED:
				case SbERR_LABEL_EXPECTED:
					nc = nCol1;
					if( nc > nCol2 ) nCol2 = nc;
					break;
			}
			bRes = pBasic->CError( code, aError, nLine, nc, nCol2 );
		}
		bAbort |= !bRes |
			 ( code == SbERR_NO_MEMORY || code == SbERR_PROG_TOO_LARGE );
	}
	if( bErrors )
		nErrors++;
}

// Falls sofort ein Doppelpunkt folgt, wird sal_True zurueckgeliefert.
// Wird von SbiTokenizer::MayBeLabel() verwendet, um einen Label zu erkennen

sal_Bool SbiScanner::DoesColonFollow()
{
	if( pLine && *pLine == ':' )
	{
		pLine++; nCol++; return sal_True;
	}
	else return sal_False;
}

// Testen auf ein legales Suffix

static SbxDataType GetSuffixType( sal_Unicode c )
{
	static String aSuffixesStr = String::CreateFromAscii( "%&!#@ $" );
	if( c )
	{
		sal_uInt32 n = aSuffixesStr.Search( c );
		if( STRING_NOTFOUND != n && c != ' ' )
			return SbxDataType( (sal_uInt16) n + SbxINTEGER );
	}
	return SbxVARIANT;
}

// Einlesen des naechsten Symbols in die Variablen aSym, nVal und eType
// Returnwert ist sal_False bei EOF oder Fehlern
#define BUF_SIZE 80

namespace {

/** Returns true, if the passed character is a white space character. */
inline bool lclIsWhitespace( sal_Unicode cChar )
{
    return (cChar == ' ') || (cChar == '\t') || (cChar == '\f');
}

} // namespace

sal_Bool SbiScanner::NextSym()
{
	// Fuer den EOLN-Fall merken
	sal_uInt16 nOldLine = nLine;
	sal_uInt16 nOldCol1 = nCol1;
	sal_uInt16 nOldCol2 = nCol2;
	sal_Unicode buf[ BUF_SIZE ], *p = buf;
	bHash = sal_False;

	eScanType = SbxVARIANT;
	aSym.Erase();
	bSymbol =
	bNumber = bSpaces = sal_False;

	// Zeile einlesen?
	if( !pLine )
	{
		sal_Int32 n = nBufPos;
		sal_Int32 nLen = aBuf.getLength();
		if( nBufPos >= nLen )
			return sal_False;
		const sal_Unicode* p2 = aBuf.getStr();
		p2 += n;
		while( ( n < nLen ) && ( *p2 != '\n' ) && ( *p2 != '\r' ) )
			p2++, n++;
        // #163944# ignore trailing whitespace
        sal_Int32 nCopyEndPos = n;
        while( (nBufPos < nCopyEndPos) && lclIsWhitespace( aBuf[ nCopyEndPos - 1 ] ) )
            --nCopyEndPos;
		aLine = aBuf.copy( nBufPos, nCopyEndPos - nBufPos );
		if( n < nLen )
        {
		    if( *p2 == '\r' && *( p2+1 ) == '\n' )
			    n += 2;
		    else
			    n++;
        }
		nBufPos = n;
        pLine = aLine.getStr();
		nOldLine = ++nLine;
		nCol = nCol1 = nCol2 = nOldCol1 = nOldCol2 = 0;
		nColLock = 0;
	}

	// Leerstellen weg:
	while( lclIsWhitespace( *pLine ) )
		pLine++, nCol++, bSpaces = sal_True;

	nCol1 = nCol;

	// nur Leerzeile?
	if( !*pLine )
		goto eoln;

	if( bPrevLineExtentsComment )
		goto PrevLineCommentLbl;

	if( *pLine == '#' )
	{
		pLine++;
		nCol++;
		bHash = sal_True;
	}

	// Symbol? Dann Zeichen kopieren.
    if( BasicSimpleCharClass::isAlpha( *pLine, bCompatible ) || *pLine == '_' )
	{
		// Wenn nach '_' nichts kommt, ist es ein Zeilenabschluss!
		if(	*pLine == '_' && !*(pLine+1) )
		{	pLine++;
			goto eoln;	}
		bSymbol = sal_True;
		short n = nCol;
		for ( ; (BasicSimpleCharClass::isAlphaNumeric( *pLine, bCompatible ) || ( *pLine == '_' ) ); pLine++ )
			nCol++;
		aSym = aLine.copy( n, nCol - n );

		// Special handling for "go to"
		if( bCompatible && *pLine && aSym.EqualsIgnoreCaseAscii( "go" ) )
		{
			const sal_Unicode* pTestLine = pLine;
			short nTestCol = nCol;
			while( lclIsWhitespace( *pTestLine ) )
			{
				pTestLine++;
				nTestCol++;
			}

			if( *pTestLine && *(pTestLine + 1) )
			{
				String aTestSym = aLine.copy( nTestCol, 2 );
				if( aTestSym.EqualsIgnoreCaseAscii( "to" ) )
				{
					aSym = String::CreateFromAscii( "goto" );
					pLine = pTestLine + 2;
					nCol = nTestCol + 2;
				}
			}
		}

		// Abschliessendes '_' durch Space ersetzen, wenn Zeilenende folgt
		// (sonst falsche Zeilenfortsetzung)
		if(	!bUsedForHilite && !*pLine && *(pLine-1) == '_' )
		{
			aSym.GetBufferAccess();		// #109693 force copy if necessary
			*((sal_Unicode*)(pLine-1)) = ' ';		// cast wegen const
		}
		// Typkennung?
		// Das Ausrufezeichen bitte nicht testen, wenn
		// danach noch ein Symbol anschliesst
		else if( *pLine != '!' || !BasicSimpleCharClass::isAlpha( pLine[ 1 ], bCompatible ) )
		{
			SbxDataType t = GetSuffixType( *pLine );
			if( t != SbxVARIANT )
			{
				eScanType = t;
				pLine++;
				nCol++;
			}
		}
	}

	// Zahl? Dann einlesen und konvertieren.
	else if( BasicSimpleCharClass::isDigit( *pLine & 0xFF )
		|| ( *pLine == '.' && BasicSimpleCharClass::isDigit( *(pLine+1) & 0xFF ) ) )
	{
		short exp = 0;
		short comma = 0;
		short ndig = 0;
		short ncdig = 0;
		eScanType = SbxDOUBLE;
		sal_Bool bBufOverflow = sal_False;
		while( strchr( "0123456789.DEde", *pLine ) && *pLine )
		{
			// AB 4.1.1996: Buffer voll? -> leer weiter scannen
			if( (p-buf) == (BUF_SIZE-1) )
			{
				bBufOverflow = sal_True;
				pLine++, nCol++;
				continue;
			}
			// Komma oder Exponent?
			if( *pLine == '.' )
			{
				if( ++comma > 1 )
				{
					pLine++; nCol++; continue;
				}
				else *p++ = *pLine++, nCol++;
			}
			else if( strchr( "DdEe", *pLine ) )
			{
				if (++exp > 1)
				{
					pLine++; nCol++; continue;
				}
//              if( toupper( *pLine ) == 'D' )
//                  eScanType = SbxDOUBLE;
				*p++ = 'E'; pLine++; nCol++;
				// Vorzeichen hinter Exponent?
				if( *pLine == '+' )
					pLine++, nCol++;
				else
				if( *pLine == '-' )
					*p++ = *pLine++, nCol++;
			}
			else
			{
				*p++ = *pLine++, nCol++;
				if( comma && !exp ) ncdig++;
			}
			if (!exp) ndig++;
		}
		*p = 0;
		aSym = p; bNumber = sal_True;
		// Komma, Exponent mehrfach vorhanden?
		if( comma > 1 || exp > 1 )
		{	aError = '.';
			GenError( SbERR_BAD_CHAR_IN_NUMBER );	}

		// #57844 Lokalisierte Funktion benutzen
	    nVal = rtl_math_uStringToDouble( buf, buf+(p-buf), '.', ',', NULL, NULL );
		// ALT: nVal = atof( buf );

		ndig = ndig - comma;
		if( !comma && !exp )
		{
			if( nVal >= SbxMININT && nVal <= SbxMAXINT )
				eScanType = SbxINTEGER;
			else
			if( nVal >= SbxMINLNG && nVal <= SbxMAXLNG )
				eScanType = SbxLONG;
		}
		if( bBufOverflow )
			GenError( SbERR_MATH_OVERFLOW );
		// zu viele Zahlen fuer SINGLE?
//      if (ndig > 15 || ncdig > 6)
//          eScanType = SbxDOUBLE;
//      else
//      if( nVal > SbxMAXSNG || nVal < SbxMINSNG )
//          eScanType = SbxDOUBLE;

		// Typkennung?
		SbxDataType t = GetSuffixType( *pLine );
		if( t != SbxVARIANT )
		{
			eScanType = t;
			pLine++;
			nCol++;
		}
	}

	// Hex/Oktalzahl? Einlesen und konvertieren:
	else if( *pLine == '&' )
	{
		pLine++; nCol++;
		sal_Unicode cmp1[] = { '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F', 0 };
		sal_Unicode cmp2[] = { '0', '1', '2', '3', '4', '5', '6', '7', 0 };
		sal_Unicode *cmp = cmp1;
		//char *cmp = "0123456789ABCDEF";
		sal_Unicode base = 16;
		sal_Unicode ndig = 8;
		sal_Unicode xch  = *pLine++ & 0xFF; nCol++;
		switch( toupper( xch ) )
		{
			case 'O':
				cmp = cmp2; base = 8; ndig = 11; break;
				//cmp = "01234567"; base = 8; ndig = 11; break;
			case 'H':
				break;
			default :
				// Wird als Operator angesehen
				pLine--; nCol--; nCol1 = nCol-1; aSym = '&'; return SYMBOL;
		}
		bNumber = sal_True;
		long l = 0;
		int i;
		sal_Bool bBufOverflow = sal_False;
		while( BasicSimpleCharClass::isAlphaNumeric( *pLine & 0xFF, bCompatible ) )
		{
			sal_Unicode ch = sal::static_int_cast< sal_Unicode >(
                toupper( *pLine & 0xFF ) );
			pLine++; nCol++;
			// AB 4.1.1996: Buffer voll, leer weiter scannen
			if( (p-buf) == (BUF_SIZE-1) )
				bBufOverflow = sal_True;
			else if( String( cmp ).Search( ch ) != STRING_NOTFOUND )
			//else if( strchr( cmp, ch ) )
				*p++ = ch;
			else
			{
				aError = ch;
				GenError( SbERR_BAD_CHAR_IN_NUMBER );
			}
		}
		*p = 0;
		for( p = buf; *p; p++ )
		{
			i = (*p & 0xFF) - '0';
			if( i > 9 ) i -= 7;
			l = ( l * base ) + i;
			if( !ndig-- )
			{
				GenError( SbERR_MATH_OVERFLOW ); break;
			}
		}
		if( *pLine == '&' ) pLine++, nCol++;
		nVal = (double) l;
		eScanType = ( l >= SbxMININT && l <= SbxMAXINT ) ? SbxINTEGER : SbxLONG;
		if( bBufOverflow )
			GenError( SbERR_MATH_OVERFLOW );
	}

	// Strings:
	else if( *pLine == '"' || *pLine == '[' )
	{
		sal_Unicode cSep = *pLine;
		if( cSep == '[' )
			bSymbol = sal_True, cSep = ']';
		short n = nCol+1;
		while( *pLine )
		{
			do pLine++, nCol++;
			while( *pLine && ( *pLine != cSep ) );
			if( *pLine == cSep )
			{
				pLine++; nCol++;
				if( *pLine != cSep || cSep == ']' ) break;
			} else aError = cSep, GenError( SbERR_EXPECTED );
		}
		// If VBA Interop then doen't eat the [] chars	
		if ( cSep == ']' && bVBASupportOn )
			aSym = aLine.copy( n - 1, nCol - n  + 1);
		else
			aSym = aLine.copy( n, nCol - n - 1 );
		// Doppelte Stringbegrenzer raus
		String s( cSep );
		s += cSep;
		sal_uInt16 nIdx = 0;
		do 
		{
			nIdx = aSym.Search( s, nIdx );
			if( nIdx == STRING_NOTFOUND )
				break;
			aSym.Erase( nIdx, 1 );
			nIdx++;
		} 
		while( true );
		if( cSep != ']' )
			eScanType = ( cSep == '#' ) ? SbxDATE : SbxSTRING;
	}
	// ungueltige Zeichen:
	else if( ( *pLine & 0xFF ) >= 0x7F )
	{
		GenError( SbERR_SYNTAX ); pLine++; nCol++;
	}
	// andere Gruppen:
	else
	{
		short n = 1;
		switch( *pLine++ )
		{
			case '<': if( *pLine == '>' || *pLine == '=' ) n = 2; break;
			case '>': if( *pLine == '=' ) n = 2; break;
			case ':': if( *pLine == '=' ) n = 2; break;
		}
		aSym = aLine.copy( nCol, n );
		pLine += n-1; nCol = nCol + n;
	}

	nCol2 = nCol-1;

PrevLineCommentLbl:
	// Kommentar?
	if( bPrevLineExtentsComment || (eScanType != SbxSTRING &&
		( aSym.GetBuffer()[0] == '\'' || aSym.EqualsIgnoreCaseAscii( "REM" ) ) ) )
	{
		bPrevLineExtentsComment = sal_False;
		aSym = String::CreateFromAscii( "REM" );
		sal_uInt16 nLen = String( pLine ).Len();
		if( bCompatible && pLine[ nLen - 1 ] == '_' && pLine[ nLen - 2 ] == ' ' )
			bPrevLineExtentsComment = sal_True;
		nCol2 = nCol2 + nLen;
		pLine = NULL;
	}
	return sal_True;

	// Sonst Zeilen-Ende: aber bitte auf '_' testen, ob die
	// Zeile nicht weitergeht!
eoln:
	if( nCol && *--pLine == '_' )
	{
		pLine = NULL; 
		bool bRes = NextSym();
		if( bVBASupportOn && aSym.GetBuffer()[0] == '.' )
		{
			// object _
			//    .Method
			// ^^^  <- spaces is legal in MSO VBA
			OSL_TRACE("*** resetting bSpaces***"); 
			bSpaces = sal_False; 
		}
		return bRes;
	}
	else
	{
		pLine = NULL;
		nLine = nOldLine;
		nCol1 = nOldCol1;
		nCol2 = nOldCol2;
		aSym = '\n';
		nColLock = 0;
		return sal_True;
	}
}

LetterTable BasicSimpleCharClass::aLetterTable;

LetterTable::LetterTable( void )
{
	for( int i = 0 ; i < 256 ; ++i )
		IsLetterTab[i] = false;

	IsLetterTab[0xC0] = true;	// À , CAPITAL LETTER A WITH GRAVE ACCENT
	IsLetterTab[0xC1] = true;	// Á , CAPITAL LETTER A WITH ACUTE ACCENT
	IsLetterTab[0xC2] = true;	// Â , CAPITAL LETTER A WITH CIRCUMFLEX ACCENT
	IsLetterTab[0xC3] = true;	// Ã , CAPITAL LETTER A WITH TILDE
	IsLetterTab[0xC4] = true;	// Ä , CAPITAL LETTER A WITH DIAERESIS
	IsLetterTab[0xC5] = true;	// Å , CAPITAL LETTER A WITH RING ABOVE
	IsLetterTab[0xC6] = true;	// Æ , CAPITAL LIGATURE AE
	IsLetterTab[0xC7] = true;	// Ç , CAPITAL LETTER C WITH CEDILLA
	IsLetterTab[0xC8] = true;	// È , CAPITAL LETTER E WITH GRAVE ACCENT
	IsLetterTab[0xC9] = true;	// É , CAPITAL LETTER E WITH ACUTE ACCENT
	IsLetterTab[0xCA] = true;	// Ê , CAPITAL LETTER E WITH CIRCUMFLEX ACCENT
	IsLetterTab[0xCB] = true;	// Ë , CAPITAL LETTER E WITH DIAERESIS
	IsLetterTab[0xCC] = true;	// Ì , CAPITAL LETTER I WITH GRAVE ACCENT
	IsLetterTab[0xCD] = true;	// Í , CAPITAL LETTER I WITH ACUTE ACCENT
	IsLetterTab[0xCE] = true;	// Î , CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
	IsLetterTab[0xCF] = true;	// Ï , CAPITAL LETTER I WITH DIAERESIS
	IsLetterTab[0xD0] = true;	// Ð , CAPITAL LETTER ETH
	IsLetterTab[0xD1] = true;	// Ñ , CAPITAL LETTER N WITH TILDE
	IsLetterTab[0xD2] = true;	// Ò , CAPITAL LETTER O WITH GRAVE ACCENT
	IsLetterTab[0xD3] = true;	// Ó , CAPITAL LETTER O WITH ACUTE ACCENT
	IsLetterTab[0xD4] = true;	// Ô , CAPITAL LETTER O WITH CIRCUMFLEX ACCENT
	IsLetterTab[0xD5] = true;	// Õ , CAPITAL LETTER O WITH TILDE
	IsLetterTab[0xD6] = true;	// Ö , CAPITAL LETTER O WITH DIAERESIS
	IsLetterTab[0xD8] = true;	// Ø , CAPITAL LETTER O WITH STROKE
	IsLetterTab[0xD9] = true;	// Ù , CAPITAL LETTER U WITH GRAVE ACCENT
	IsLetterTab[0xDA] = true;	// Ú , CAPITAL LETTER U WITH ACUTE ACCENT
	IsLetterTab[0xDB] = true;	// Û , CAPITAL LETTER U WITH CIRCUMFLEX ACCENT
	IsLetterTab[0xDC] = true;	// Ü , CAPITAL LETTER U WITH DIAERESIS
	IsLetterTab[0xDD] = true;	// Ý , CAPITAL LETTER Y WITH ACUTE ACCENT
	IsLetterTab[0xDE] = true;	// Þ , CAPITAL LETTER THORN
	IsLetterTab[0xDF] = true;	// ß , SMALL LETTER SHARP S
	IsLetterTab[0xE0] = true;	// à , SMALL LETTER A WITH GRAVE ACCENT
	IsLetterTab[0xE1] = true;	// á , SMALL LETTER A WITH ACUTE ACCENT
	IsLetterTab[0xE2] = true;	// â , SMALL LETTER A WITH CIRCUMFLEX ACCENT
	IsLetterTab[0xE3] = true;	// ã , SMALL LETTER A WITH TILDE
	IsLetterTab[0xE4] = true;	// ä , SMALL LETTER A WITH DIAERESIS
	IsLetterTab[0xE5] = true;	// å , SMALL LETTER A WITH RING ABOVE
	IsLetterTab[0xE6] = true;	// æ , SMALL LIGATURE AE
	IsLetterTab[0xE7] = true;	// ç , SMALL LETTER C WITH CEDILLA
	IsLetterTab[0xE8] = true;	// è , SMALL LETTER E WITH GRAVE ACCENT
	IsLetterTab[0xE9] = true;	// é , SMALL LETTER E WITH ACUTE ACCENT
	IsLetterTab[0xEA] = true;	// ê , SMALL LETTER E WITH CIRCUMFLEX ACCENT
	IsLetterTab[0xEB] = true;	// ë , SMALL LETTER E WITH DIAERESIS
	IsLetterTab[0xEC] = true;	// ì , SMALL LETTER I WITH GRAVE ACCENT
	IsLetterTab[0xED] = true;	// í , SMALL LETTER I WITH ACUTE ACCENT
	IsLetterTab[0xEE] = true;	// î , SMALL LETTER I WITH CIRCUMFLEX ACCENT
	IsLetterTab[0xEF] = true;	// ï , SMALL LETTER I WITH DIAERESIS
	IsLetterTab[0xF0] = true;	// ð , SMALL LETTER ETH
	IsLetterTab[0xF1] = true;	// ñ , SMALL LETTER N WITH TILDE
	IsLetterTab[0xF2] = true;	// ò , SMALL LETTER O WITH GRAVE ACCENT
	IsLetterTab[0xF3] = true;	// ó , SMALL LETTER O WITH ACUTE ACCENT
	IsLetterTab[0xF4] = true;	// ô , SMALL LETTER O WITH CIRCUMFLEX ACCENT
	IsLetterTab[0xF5] = true;	// õ , SMALL LETTER O WITH TILDE
	IsLetterTab[0xF6] = true;	// ö , SMALL LETTER O WITH DIAERESIS
	IsLetterTab[0xF8] = true;	// ø , SMALL LETTER O WITH OBLIQUE BAR
	IsLetterTab[0xF9] = true;	// ù , SMALL LETTER U WITH GRAVE ACCENT
	IsLetterTab[0xFA] = true;	// ú , SMALL LETTER U WITH ACUTE ACCENT
	IsLetterTab[0xFB] = true;	// û , SMALL LETTER U WITH CIRCUMFLEX ACCENT
	IsLetterTab[0xFC] = true;	// ü , SMALL LETTER U WITH DIAERESIS
	IsLetterTab[0xFD] = true;	// ý , SMALL LETTER Y WITH ACUTE ACCENT
	IsLetterTab[0xFE] = true;	// þ , SMALL LETTER THORN
	IsLetterTab[0xFF] = true;	// ÿ , SMALL LETTER Y WITH DIAERESIS
}

bool LetterTable::isLetterUnicode( sal_Unicode c )
{
	static CharClass* pCharClass = NULL;
	if( pCharClass == NULL )
		pCharClass = new CharClass( Application::GetSettings().GetLocale() );
	String aStr( c );
	bool bRet = pCharClass->isLetter( aStr, 0 );
	return bRet;
}