xref: /AOO41X/main/lingucomponent/source/lingutil/lingutil.cxx (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_lingucomponent.hxx"
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #if defined(WNT)
32*cdf0e10cSrcweir #include <tools/prewin.h>
33*cdf0e10cSrcweir #endif
34*cdf0e10cSrcweir 
35*cdf0e10cSrcweir #if defined(WNT)
36*cdf0e10cSrcweir #include <Windows.h>
37*cdf0e10cSrcweir #endif
38*cdf0e10cSrcweir 
39*cdf0e10cSrcweir #if defined(WNT)
40*cdf0e10cSrcweir #include <tools/postwin.h>
41*cdf0e10cSrcweir #endif
42*cdf0e10cSrcweir 
43*cdf0e10cSrcweir 
44*cdf0e10cSrcweir #include <osl/thread.h>
45*cdf0e10cSrcweir #include <osl/file.hxx>
46*cdf0e10cSrcweir #include <tools/debug.hxx>
47*cdf0e10cSrcweir #include <tools/urlobj.hxx>
48*cdf0e10cSrcweir #include <i18npool/mslangid.hxx>
49*cdf0e10cSrcweir #include <unotools/lingucfg.hxx>
50*cdf0e10cSrcweir #include <unotools/pathoptions.hxx>
51*cdf0e10cSrcweir #include <rtl/ustring.hxx>
52*cdf0e10cSrcweir #include <rtl/string.hxx>
53*cdf0e10cSrcweir #include <rtl/tencinfo.h>
54*cdf0e10cSrcweir #include <linguistic/misc.hxx>
55*cdf0e10cSrcweir 
56*cdf0e10cSrcweir #include <set>
57*cdf0e10cSrcweir #include <vector>
58*cdf0e10cSrcweir #include <string.h>
59*cdf0e10cSrcweir 
60*cdf0e10cSrcweir #include <lingutil.hxx>
61*cdf0e10cSrcweir #include <dictmgr.hxx>
62*cdf0e10cSrcweir 
63*cdf0e10cSrcweir 
64*cdf0e10cSrcweir 
65*cdf0e10cSrcweir 
66*cdf0e10cSrcweir using ::com::sun::star::lang::Locale;
67*cdf0e10cSrcweir using namespace ::com::sun::star;
68*cdf0e10cSrcweir 
69*cdf0e10cSrcweir #if 0
70*cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////
71*cdf0e10cSrcweir 
72*cdf0e10cSrcweir String GetDirectoryPathFromFileURL( const String &rFileURL )
73*cdf0e10cSrcweir {
74*cdf0e10cSrcweir     // get file URL
75*cdf0e10cSrcweir     INetURLObject aURLObj;
76*cdf0e10cSrcweir     aURLObj.SetSmartProtocol( INET_PROT_FILE );
77*cdf0e10cSrcweir     aURLObj.SetSmartURL( rFileURL );
78*cdf0e10cSrcweir     aURLObj.removeSegment();
79*cdf0e10cSrcweir     DBG_ASSERT( !aURLObj.HasError(), "invalid URL" );
80*cdf0e10cSrcweir     String aRes = aURLObj.GetMainURL( INetURLObject::DECODE_TO_IURI );
81*cdf0e10cSrcweir     return aRes;
82*cdf0e10cSrcweir }
83*cdf0e10cSrcweir #endif
84*cdf0e10cSrcweir 
85*cdf0e10cSrcweir #if defined(WNT)
86*cdf0e10cSrcweir rtl::OString Win_GetShortPathName( const rtl::OUString &rLongPathName )
87*cdf0e10cSrcweir {
88*cdf0e10cSrcweir     rtl::OString aRes;
89*cdf0e10cSrcweir 
90*cdf0e10cSrcweir     sal_Unicode aShortBuffer[1024] = {0};
91*cdf0e10cSrcweir     sal_Int32   nShortBufSize = sizeof( aShortBuffer ) / sizeof( aShortBuffer[0] );
92*cdf0e10cSrcweir 
93*cdf0e10cSrcweir     // use the version of 'GetShortPathName' that can deal with Unicode...
94*cdf0e10cSrcweir     sal_Int32 nShortLen = GetShortPathNameW(
95*cdf0e10cSrcweir             reinterpret_cast<LPCWSTR>( rLongPathName.getStr() ),
96*cdf0e10cSrcweir             reinterpret_cast<LPWSTR>( aShortBuffer ),
97*cdf0e10cSrcweir             nShortBufSize );
98*cdf0e10cSrcweir 
99*cdf0e10cSrcweir     if (nShortLen < nShortBufSize) // conversion successful?
100*cdf0e10cSrcweir         aRes = rtl::OString( OU2ENC( rtl::OUString( aShortBuffer, nShortLen ), osl_getThreadTextEncoding()) );
101*cdf0e10cSrcweir     else
102*cdf0e10cSrcweir         DBG_ERROR( "Win_GetShortPathName: buffer to short" );
103*cdf0e10cSrcweir 
104*cdf0e10cSrcweir     return aRes;
105*cdf0e10cSrcweir }
106*cdf0e10cSrcweir #endif //defined(WNT)
107*cdf0e10cSrcweir 
108*cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////
109*cdf0e10cSrcweir 
110*cdf0e10cSrcweir // build list of old style diuctionaries (not as extensions) to use.
111*cdf0e10cSrcweir // User installed dictionaries (the ones residing in the user paths)
112*cdf0e10cSrcweir // will get precedence over system installed ones for the same language.
113*cdf0e10cSrcweir std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType )
114*cdf0e10cSrcweir {
115*cdf0e10cSrcweir     std::vector< SvtLinguConfigDictionaryEntry > aRes;
116*cdf0e10cSrcweir 
117*cdf0e10cSrcweir 	if (!pDicType)
118*cdf0e10cSrcweir 		return aRes;
119*cdf0e10cSrcweir 
120*cdf0e10cSrcweir 	rtl::OUString aFormatName;
121*cdf0e10cSrcweir 	String aDicExtension;
122*cdf0e10cSrcweir #ifdef SYSTEM_DICTS
123*cdf0e10cSrcweir 	rtl::OUString aSystemDir;
124*cdf0e10cSrcweir 	rtl::OUString aSystemPrefix;
125*cdf0e10cSrcweir 	rtl::OUString aSystemSuffix;
126*cdf0e10cSrcweir #endif
127*cdf0e10cSrcweir 	bool bSpell = false;
128*cdf0e10cSrcweir 	bool bHyph  = false;
129*cdf0e10cSrcweir 	bool bThes  = false;
130*cdf0e10cSrcweir     if (strcmp( pDicType, "DICT" ) == 0)
131*cdf0e10cSrcweir 	{
132*cdf0e10cSrcweir 		aFormatName		= A2OU("DICT_SPELL");
133*cdf0e10cSrcweir 		aDicExtension	= String::CreateFromAscii( ".dic" );
134*cdf0e10cSrcweir #ifdef SYSTEM_DICTS
135*cdf0e10cSrcweir 		aSystemDir		= A2OU( DICT_SYSTEM_DIR );
136*cdf0e10cSrcweir 		aSystemSuffix		= aDicExtension;
137*cdf0e10cSrcweir #endif
138*cdf0e10cSrcweir 		bSpell = true;
139*cdf0e10cSrcweir 	}
140*cdf0e10cSrcweir     else if (strcmp( pDicType, "HYPH" ) == 0)
141*cdf0e10cSrcweir 	{
142*cdf0e10cSrcweir 		aFormatName		= A2OU("DICT_HYPH");
143*cdf0e10cSrcweir 		aDicExtension	= String::CreateFromAscii( ".dic" );
144*cdf0e10cSrcweir #ifdef SYSTEM_DICTS
145*cdf0e10cSrcweir 		aSystemDir		= A2OU( HYPH_SYSTEM_DIR );
146*cdf0e10cSrcweir 		aSystemPrefix		= A2OU( "hyph_" );
147*cdf0e10cSrcweir 		aSystemSuffix		= aDicExtension;
148*cdf0e10cSrcweir #endif
149*cdf0e10cSrcweir 		bHyph = true;
150*cdf0e10cSrcweir 	}
151*cdf0e10cSrcweir     else if (strcmp( pDicType, "THES" ) == 0)
152*cdf0e10cSrcweir 	{
153*cdf0e10cSrcweir 		aFormatName		= A2OU("DICT_THES");
154*cdf0e10cSrcweir 		aDicExtension	= String::CreateFromAscii( ".dat" );
155*cdf0e10cSrcweir #ifdef SYSTEM_DICTS
156*cdf0e10cSrcweir 		aSystemDir		= A2OU( THES_SYSTEM_DIR );
157*cdf0e10cSrcweir 		aSystemPrefix		= A2OU( "th_" );
158*cdf0e10cSrcweir 		aSystemSuffix		= A2OU( "_v2.dat" );
159*cdf0e10cSrcweir #endif
160*cdf0e10cSrcweir 		bThes = true;
161*cdf0e10cSrcweir 	}
162*cdf0e10cSrcweir 
163*cdf0e10cSrcweir 
164*cdf0e10cSrcweir 	if (aFormatName.getLength() == 0 || aDicExtension.Len() == 0)
165*cdf0e10cSrcweir 		return aRes;
166*cdf0e10cSrcweir 
167*cdf0e10cSrcweir 	// set of languages to remember the language where it is already
168*cdf0e10cSrcweir 	// decided to make use of the dictionary.
169*cdf0e10cSrcweir 	std::set< LanguageType > aDicLangInUse;
170*cdf0e10cSrcweir 
171*cdf0e10cSrcweir #ifdef SYSTEM_DICTS
172*cdf0e10cSrcweir    osl::Directory aSystemDicts(aSystemDir);
173*cdf0e10cSrcweir    if (aSystemDicts.open() == osl::FileBase::E_None)
174*cdf0e10cSrcweir    {
175*cdf0e10cSrcweir        osl::DirectoryItem aItem;
176*cdf0e10cSrcweir        osl::FileStatus aFileStatus(FileStatusMask_FileURL);
177*cdf0e10cSrcweir        while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None)
178*cdf0e10cSrcweir        {
179*cdf0e10cSrcweir            aItem.getFileStatus(aFileStatus);
180*cdf0e10cSrcweir            rtl::OUString sPath = aFileStatus.getFileURL();
181*cdf0e10cSrcweir            if (sPath.lastIndexOf(aSystemSuffix) == sPath.getLength()-aSystemSuffix.getLength())
182*cdf0e10cSrcweir            {
183*cdf0e10cSrcweir                sal_Int32 nStartIndex = sPath.lastIndexOf(sal_Unicode('/')) + 1;
184*cdf0e10cSrcweir                if (!sPath.match(aSystemPrefix, nStartIndex))
185*cdf0e10cSrcweir                    continue;
186*cdf0e10cSrcweir                rtl::OUString sChunk = sPath.copy(0, sPath.getLength() - aSystemSuffix.getLength());
187*cdf0e10cSrcweir                sal_Int32 nIndex = nStartIndex + aSystemPrefix.getLength();
188*cdf0e10cSrcweir                rtl::OUString sLang = sChunk.getToken( 0, '_', nIndex );
189*cdf0e10cSrcweir                if (!sLang.getLength())
190*cdf0e10cSrcweir                    continue;
191*cdf0e10cSrcweir                rtl::OUString sRegion;
192*cdf0e10cSrcweir                if (nIndex != -1)
193*cdf0e10cSrcweir                    sRegion = sChunk.copy( nIndex, sChunk.getLength() - nIndex );
194*cdf0e10cSrcweir 
195*cdf0e10cSrcweir                // Thus we first get the language of the dictionary
196*cdf0e10cSrcweir                LanguageType nLang = MsLangId::convertIsoNamesToLanguage(
197*cdf0e10cSrcweir                   sLang, sRegion );
198*cdf0e10cSrcweir 
199*cdf0e10cSrcweir                if (aDicLangInUse.count( nLang ) == 0)
200*cdf0e10cSrcweir                {
201*cdf0e10cSrcweir                    // remember the new language in use
202*cdf0e10cSrcweir                    aDicLangInUse.insert( nLang );
203*cdf0e10cSrcweir 
204*cdf0e10cSrcweir                    // add the dictionary to the resulting vector
205*cdf0e10cSrcweir                    SvtLinguConfigDictionaryEntry aDicEntry;
206*cdf0e10cSrcweir                    aDicEntry.aLocations.realloc(1);
207*cdf0e10cSrcweir                    aDicEntry.aLocaleNames.realloc(1);
208*cdf0e10cSrcweir                    rtl::OUString aLocaleName( MsLangId::convertLanguageToIsoString( nLang ) );
209*cdf0e10cSrcweir                    aDicEntry.aLocations[0] = sPath;
210*cdf0e10cSrcweir                    aDicEntry.aFormatName = aFormatName;
211*cdf0e10cSrcweir                    aDicEntry.aLocaleNames[0] = aLocaleName;
212*cdf0e10cSrcweir                    aRes.push_back( aDicEntry );
213*cdf0e10cSrcweir                }
214*cdf0e10cSrcweir            }
215*cdf0e10cSrcweir        }
216*cdf0e10cSrcweir     }
217*cdf0e10cSrcweir 
218*cdf0e10cSrcweir #endif
219*cdf0e10cSrcweir 
220*cdf0e10cSrcweir     return aRes;
221*cdf0e10cSrcweir }
222*cdf0e10cSrcweir 
223*cdf0e10cSrcweir 
224*cdf0e10cSrcweir void MergeNewStyleDicsAndOldStyleDics(
225*cdf0e10cSrcweir 	std::list< SvtLinguConfigDictionaryEntry > &rNewStyleDics,
226*cdf0e10cSrcweir 	const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics )
227*cdf0e10cSrcweir {
228*cdf0e10cSrcweir 	// get list of languages supported by new style dictionaries
229*cdf0e10cSrcweir 	std::set< LanguageType > aNewStyleLanguages;
230*cdf0e10cSrcweir 	std::list< SvtLinguConfigDictionaryEntry >::const_iterator aIt;
231*cdf0e10cSrcweir 	for (aIt = rNewStyleDics.begin() ;  aIt != rNewStyleDics.end();  ++aIt)
232*cdf0e10cSrcweir 	{
233*cdf0e10cSrcweir 		const uno::Sequence< rtl::OUString > aLocaleNames( aIt->aLocaleNames );
234*cdf0e10cSrcweir 		sal_Int32 nLocaleNames = aLocaleNames.getLength();
235*cdf0e10cSrcweir 		for (sal_Int32 k = 0;  k < nLocaleNames; ++k)
236*cdf0e10cSrcweir 		{
237*cdf0e10cSrcweir 			LanguageType nLang = MsLangId::convertIsoStringToLanguage( aLocaleNames[k] );
238*cdf0e10cSrcweir 			aNewStyleLanguages.insert( nLang );
239*cdf0e10cSrcweir 		}
240*cdf0e10cSrcweir 	}
241*cdf0e10cSrcweir 
242*cdf0e10cSrcweir 	// now check all old style dictionaries if they will add a not yet
243*cdf0e10cSrcweir 	// added language. If so add them to the resulting vector
244*cdf0e10cSrcweir 	std::vector< SvtLinguConfigDictionaryEntry >::const_iterator aIt2;
245*cdf0e10cSrcweir 	for (aIt2 = rOldStyleDics.begin();  aIt2 != rOldStyleDics.end();  ++aIt2)
246*cdf0e10cSrcweir 	{
247*cdf0e10cSrcweir 		sal_Int32 nOldStyleDics = aIt2->aLocaleNames.getLength();
248*cdf0e10cSrcweir 
249*cdf0e10cSrcweir 		// old style dics should only have one language listed...
250*cdf0e10cSrcweir 		DBG_ASSERT( nOldStyleDics, "old style dictionary with more then one language found!");
251*cdf0e10cSrcweir 		if (nOldStyleDics > 0)
252*cdf0e10cSrcweir 		{
253*cdf0e10cSrcweir 			LanguageType nLang = MsLangId::convertIsoStringToLanguage( aIt2->aLocaleNames[0] );
254*cdf0e10cSrcweir 
255*cdf0e10cSrcweir             if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_NONE)
256*cdf0e10cSrcweir             {
257*cdf0e10cSrcweir                 DBG_ERROR( "old style dictionary with invalid language found!" );
258*cdf0e10cSrcweir                 continue;
259*cdf0e10cSrcweir             }
260*cdf0e10cSrcweir 
261*cdf0e10cSrcweir 			// language not yet added?
262*cdf0e10cSrcweir 			if (aNewStyleLanguages.count( nLang ) == 0)
263*cdf0e10cSrcweir 				rNewStyleDics.push_back( *aIt2 );
264*cdf0e10cSrcweir 		}
265*cdf0e10cSrcweir 		else
266*cdf0e10cSrcweir 		{
267*cdf0e10cSrcweir 			DBG_ERROR( "old style dictionary with no language found!" );
268*cdf0e10cSrcweir 		}
269*cdf0e10cSrcweir 	}
270*cdf0e10cSrcweir }
271*cdf0e10cSrcweir 
272*cdf0e10cSrcweir 
273*cdf0e10cSrcweir rtl_TextEncoding getTextEncodingFromCharset(const sal_Char* pCharset)
274*cdf0e10cSrcweir {
275*cdf0e10cSrcweir     // default result: used to indicate that we failed to get the proper encoding
276*cdf0e10cSrcweir     rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW;
277*cdf0e10cSrcweir 
278*cdf0e10cSrcweir     if (pCharset)
279*cdf0e10cSrcweir     {
280*cdf0e10cSrcweir         eRet = rtl_getTextEncodingFromMimeCharset(pCharset);
281*cdf0e10cSrcweir         if (eRet == RTL_TEXTENCODING_DONTKNOW)
282*cdf0e10cSrcweir             eRet = rtl_getTextEncodingFromUnixCharset(pCharset);
283*cdf0e10cSrcweir         if (eRet == RTL_TEXTENCODING_DONTKNOW)
284*cdf0e10cSrcweir         {
285*cdf0e10cSrcweir             if (strcmp("ISCII-DEVANAGARI", pCharset) == 0)
286*cdf0e10cSrcweir                 eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI;
287*cdf0e10cSrcweir         }
288*cdf0e10cSrcweir     }
289*cdf0e10cSrcweir     return eRet;
290*cdf0e10cSrcweir }
291*cdf0e10cSrcweir 
292*cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////
293*cdf0e10cSrcweir 
294