xref: /AOO41X/main/lingucomponent/source/lingutil/lingutil.cxx (revision b0844812064e3450486bf80ca0c2a3ca72742d45)
1*b0844812SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*b0844812SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*b0844812SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*b0844812SAndrew Rist  * distributed with this work for additional information
6*b0844812SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*b0844812SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*b0844812SAndrew Rist  * "License"); you may not use this file except in compliance
9*b0844812SAndrew Rist  * with the License.  You may obtain a copy of the License at
10cdf0e10cSrcweir  *
11*b0844812SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12cdf0e10cSrcweir  *
13*b0844812SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*b0844812SAndrew Rist  * software distributed under the License is distributed on an
15*b0844812SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*b0844812SAndrew Rist  * KIND, either express or implied.  See the License for the
17*b0844812SAndrew Rist  * specific language governing permissions and limitations
18*b0844812SAndrew Rist  * under the License.
19cdf0e10cSrcweir  *
20*b0844812SAndrew Rist  *************************************************************/
21*b0844812SAndrew Rist 
22*b0844812SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_lingucomponent.hxx"
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #if defined(WNT)
28cdf0e10cSrcweir #include <tools/prewin.h>
29cdf0e10cSrcweir #endif
30cdf0e10cSrcweir 
31cdf0e10cSrcweir #if defined(WNT)
32cdf0e10cSrcweir #include <Windows.h>
33cdf0e10cSrcweir #endif
34cdf0e10cSrcweir 
35cdf0e10cSrcweir #if defined(WNT)
36cdf0e10cSrcweir #include <tools/postwin.h>
37cdf0e10cSrcweir #endif
38cdf0e10cSrcweir 
39cdf0e10cSrcweir 
40cdf0e10cSrcweir #include <osl/thread.h>
41cdf0e10cSrcweir #include <osl/file.hxx>
42cdf0e10cSrcweir #include <tools/debug.hxx>
43cdf0e10cSrcweir #include <tools/urlobj.hxx>
44cdf0e10cSrcweir #include <i18npool/mslangid.hxx>
45cdf0e10cSrcweir #include <unotools/lingucfg.hxx>
46cdf0e10cSrcweir #include <unotools/pathoptions.hxx>
47cdf0e10cSrcweir #include <rtl/ustring.hxx>
48cdf0e10cSrcweir #include <rtl/string.hxx>
49cdf0e10cSrcweir #include <rtl/tencinfo.h>
50cdf0e10cSrcweir #include <linguistic/misc.hxx>
51cdf0e10cSrcweir 
52cdf0e10cSrcweir #include <set>
53cdf0e10cSrcweir #include <vector>
54cdf0e10cSrcweir #include <string.h>
55cdf0e10cSrcweir 
56cdf0e10cSrcweir #include <lingutil.hxx>
57cdf0e10cSrcweir #include <dictmgr.hxx>
58cdf0e10cSrcweir 
59cdf0e10cSrcweir 
60cdf0e10cSrcweir 
61cdf0e10cSrcweir 
62cdf0e10cSrcweir using ::com::sun::star::lang::Locale;
63cdf0e10cSrcweir using namespace ::com::sun::star;
64cdf0e10cSrcweir 
65cdf0e10cSrcweir #if 0
66cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////
67cdf0e10cSrcweir 
68cdf0e10cSrcweir String GetDirectoryPathFromFileURL( const String &rFileURL )
69cdf0e10cSrcweir {
70cdf0e10cSrcweir     // get file URL
71cdf0e10cSrcweir     INetURLObject aURLObj;
72cdf0e10cSrcweir     aURLObj.SetSmartProtocol( INET_PROT_FILE );
73cdf0e10cSrcweir     aURLObj.SetSmartURL( rFileURL );
74cdf0e10cSrcweir     aURLObj.removeSegment();
75cdf0e10cSrcweir     DBG_ASSERT( !aURLObj.HasError(), "invalid URL" );
76cdf0e10cSrcweir     String aRes = aURLObj.GetMainURL( INetURLObject::DECODE_TO_IURI );
77cdf0e10cSrcweir     return aRes;
78cdf0e10cSrcweir }
79cdf0e10cSrcweir #endif
80cdf0e10cSrcweir 
81cdf0e10cSrcweir #if defined(WNT)
82cdf0e10cSrcweir rtl::OString Win_GetShortPathName( const rtl::OUString &rLongPathName )
83cdf0e10cSrcweir {
84cdf0e10cSrcweir     rtl::OString aRes;
85cdf0e10cSrcweir 
86cdf0e10cSrcweir     sal_Unicode aShortBuffer[1024] = {0};
87cdf0e10cSrcweir     sal_Int32   nShortBufSize = sizeof( aShortBuffer ) / sizeof( aShortBuffer[0] );
88cdf0e10cSrcweir 
89cdf0e10cSrcweir     // use the version of 'GetShortPathName' that can deal with Unicode...
90cdf0e10cSrcweir     sal_Int32 nShortLen = GetShortPathNameW(
91cdf0e10cSrcweir             reinterpret_cast<LPCWSTR>( rLongPathName.getStr() ),
92cdf0e10cSrcweir             reinterpret_cast<LPWSTR>( aShortBuffer ),
93cdf0e10cSrcweir             nShortBufSize );
94cdf0e10cSrcweir 
95cdf0e10cSrcweir     if (nShortLen < nShortBufSize) // conversion successful?
96cdf0e10cSrcweir         aRes = rtl::OString( OU2ENC( rtl::OUString( aShortBuffer, nShortLen ), osl_getThreadTextEncoding()) );
97cdf0e10cSrcweir     else
98cdf0e10cSrcweir         DBG_ERROR( "Win_GetShortPathName: buffer to short" );
99cdf0e10cSrcweir 
100cdf0e10cSrcweir     return aRes;
101cdf0e10cSrcweir }
102cdf0e10cSrcweir #endif //defined(WNT)
103cdf0e10cSrcweir 
104cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////
105cdf0e10cSrcweir 
106cdf0e10cSrcweir // build list of old style diuctionaries (not as extensions) to use.
107cdf0e10cSrcweir // User installed dictionaries (the ones residing in the user paths)
108cdf0e10cSrcweir // will get precedence over system installed ones for the same language.
109cdf0e10cSrcweir std::vector< SvtLinguConfigDictionaryEntry > GetOldStyleDics( const char *pDicType )
110cdf0e10cSrcweir {
111cdf0e10cSrcweir     std::vector< SvtLinguConfigDictionaryEntry > aRes;
112cdf0e10cSrcweir 
113cdf0e10cSrcweir 	if (!pDicType)
114cdf0e10cSrcweir 		return aRes;
115cdf0e10cSrcweir 
116cdf0e10cSrcweir 	rtl::OUString aFormatName;
117cdf0e10cSrcweir 	String aDicExtension;
118cdf0e10cSrcweir #ifdef SYSTEM_DICTS
119cdf0e10cSrcweir 	rtl::OUString aSystemDir;
120cdf0e10cSrcweir 	rtl::OUString aSystemPrefix;
121cdf0e10cSrcweir 	rtl::OUString aSystemSuffix;
122cdf0e10cSrcweir #endif
123cdf0e10cSrcweir 	bool bSpell = false;
124cdf0e10cSrcweir 	bool bHyph  = false;
125cdf0e10cSrcweir 	bool bThes  = false;
126cdf0e10cSrcweir     if (strcmp( pDicType, "DICT" ) == 0)
127cdf0e10cSrcweir 	{
128cdf0e10cSrcweir 		aFormatName		= A2OU("DICT_SPELL");
129cdf0e10cSrcweir 		aDicExtension	= String::CreateFromAscii( ".dic" );
130cdf0e10cSrcweir #ifdef SYSTEM_DICTS
131cdf0e10cSrcweir 		aSystemDir		= A2OU( DICT_SYSTEM_DIR );
132cdf0e10cSrcweir 		aSystemSuffix		= aDicExtension;
133cdf0e10cSrcweir #endif
134cdf0e10cSrcweir 		bSpell = true;
135cdf0e10cSrcweir 	}
136cdf0e10cSrcweir     else if (strcmp( pDicType, "HYPH" ) == 0)
137cdf0e10cSrcweir 	{
138cdf0e10cSrcweir 		aFormatName		= A2OU("DICT_HYPH");
139cdf0e10cSrcweir 		aDicExtension	= String::CreateFromAscii( ".dic" );
140cdf0e10cSrcweir #ifdef SYSTEM_DICTS
141cdf0e10cSrcweir 		aSystemDir		= A2OU( HYPH_SYSTEM_DIR );
142cdf0e10cSrcweir 		aSystemPrefix		= A2OU( "hyph_" );
143cdf0e10cSrcweir 		aSystemSuffix		= aDicExtension;
144cdf0e10cSrcweir #endif
145cdf0e10cSrcweir 		bHyph = true;
146cdf0e10cSrcweir 	}
147cdf0e10cSrcweir     else if (strcmp( pDicType, "THES" ) == 0)
148cdf0e10cSrcweir 	{
149cdf0e10cSrcweir 		aFormatName		= A2OU("DICT_THES");
150cdf0e10cSrcweir 		aDicExtension	= String::CreateFromAscii( ".dat" );
151cdf0e10cSrcweir #ifdef SYSTEM_DICTS
152cdf0e10cSrcweir 		aSystemDir		= A2OU( THES_SYSTEM_DIR );
153cdf0e10cSrcweir 		aSystemPrefix		= A2OU( "th_" );
154cdf0e10cSrcweir 		aSystemSuffix		= A2OU( "_v2.dat" );
155cdf0e10cSrcweir #endif
156cdf0e10cSrcweir 		bThes = true;
157cdf0e10cSrcweir 	}
158cdf0e10cSrcweir 
159cdf0e10cSrcweir 
160cdf0e10cSrcweir 	if (aFormatName.getLength() == 0 || aDicExtension.Len() == 0)
161cdf0e10cSrcweir 		return aRes;
162cdf0e10cSrcweir 
163cdf0e10cSrcweir 	// set of languages to remember the language where it is already
164cdf0e10cSrcweir 	// decided to make use of the dictionary.
165cdf0e10cSrcweir 	std::set< LanguageType > aDicLangInUse;
166cdf0e10cSrcweir 
167cdf0e10cSrcweir #ifdef SYSTEM_DICTS
168cdf0e10cSrcweir    osl::Directory aSystemDicts(aSystemDir);
169cdf0e10cSrcweir    if (aSystemDicts.open() == osl::FileBase::E_None)
170cdf0e10cSrcweir    {
171cdf0e10cSrcweir        osl::DirectoryItem aItem;
172cdf0e10cSrcweir        osl::FileStatus aFileStatus(FileStatusMask_FileURL);
173cdf0e10cSrcweir        while (aSystemDicts.getNextItem(aItem) == osl::FileBase::E_None)
174cdf0e10cSrcweir        {
175cdf0e10cSrcweir            aItem.getFileStatus(aFileStatus);
176cdf0e10cSrcweir            rtl::OUString sPath = aFileStatus.getFileURL();
177cdf0e10cSrcweir            if (sPath.lastIndexOf(aSystemSuffix) == sPath.getLength()-aSystemSuffix.getLength())
178cdf0e10cSrcweir            {
179cdf0e10cSrcweir                sal_Int32 nStartIndex = sPath.lastIndexOf(sal_Unicode('/')) + 1;
180cdf0e10cSrcweir                if (!sPath.match(aSystemPrefix, nStartIndex))
181cdf0e10cSrcweir                    continue;
182cdf0e10cSrcweir                rtl::OUString sChunk = sPath.copy(0, sPath.getLength() - aSystemSuffix.getLength());
183cdf0e10cSrcweir                sal_Int32 nIndex = nStartIndex + aSystemPrefix.getLength();
184cdf0e10cSrcweir                rtl::OUString sLang = sChunk.getToken( 0, '_', nIndex );
185cdf0e10cSrcweir                if (!sLang.getLength())
186cdf0e10cSrcweir                    continue;
187cdf0e10cSrcweir                rtl::OUString sRegion;
188cdf0e10cSrcweir                if (nIndex != -1)
189cdf0e10cSrcweir                    sRegion = sChunk.copy( nIndex, sChunk.getLength() - nIndex );
190cdf0e10cSrcweir 
191cdf0e10cSrcweir                // Thus we first get the language of the dictionary
192cdf0e10cSrcweir                LanguageType nLang = MsLangId::convertIsoNamesToLanguage(
193cdf0e10cSrcweir                   sLang, sRegion );
194cdf0e10cSrcweir 
195cdf0e10cSrcweir                if (aDicLangInUse.count( nLang ) == 0)
196cdf0e10cSrcweir                {
197cdf0e10cSrcweir                    // remember the new language in use
198cdf0e10cSrcweir                    aDicLangInUse.insert( nLang );
199cdf0e10cSrcweir 
200cdf0e10cSrcweir                    // add the dictionary to the resulting vector
201cdf0e10cSrcweir                    SvtLinguConfigDictionaryEntry aDicEntry;
202cdf0e10cSrcweir                    aDicEntry.aLocations.realloc(1);
203cdf0e10cSrcweir                    aDicEntry.aLocaleNames.realloc(1);
204cdf0e10cSrcweir                    rtl::OUString aLocaleName( MsLangId::convertLanguageToIsoString( nLang ) );
205cdf0e10cSrcweir                    aDicEntry.aLocations[0] = sPath;
206cdf0e10cSrcweir                    aDicEntry.aFormatName = aFormatName;
207cdf0e10cSrcweir                    aDicEntry.aLocaleNames[0] = aLocaleName;
208cdf0e10cSrcweir                    aRes.push_back( aDicEntry );
209cdf0e10cSrcweir                }
210cdf0e10cSrcweir            }
211cdf0e10cSrcweir        }
212cdf0e10cSrcweir     }
213cdf0e10cSrcweir 
214cdf0e10cSrcweir #endif
215cdf0e10cSrcweir 
216cdf0e10cSrcweir     return aRes;
217cdf0e10cSrcweir }
218cdf0e10cSrcweir 
219cdf0e10cSrcweir 
220cdf0e10cSrcweir void MergeNewStyleDicsAndOldStyleDics(
221cdf0e10cSrcweir 	std::list< SvtLinguConfigDictionaryEntry > &rNewStyleDics,
222cdf0e10cSrcweir 	const std::vector< SvtLinguConfigDictionaryEntry > &rOldStyleDics )
223cdf0e10cSrcweir {
224cdf0e10cSrcweir 	// get list of languages supported by new style dictionaries
225cdf0e10cSrcweir 	std::set< LanguageType > aNewStyleLanguages;
226cdf0e10cSrcweir 	std::list< SvtLinguConfigDictionaryEntry >::const_iterator aIt;
227cdf0e10cSrcweir 	for (aIt = rNewStyleDics.begin() ;  aIt != rNewStyleDics.end();  ++aIt)
228cdf0e10cSrcweir 	{
229cdf0e10cSrcweir 		const uno::Sequence< rtl::OUString > aLocaleNames( aIt->aLocaleNames );
230cdf0e10cSrcweir 		sal_Int32 nLocaleNames = aLocaleNames.getLength();
231cdf0e10cSrcweir 		for (sal_Int32 k = 0;  k < nLocaleNames; ++k)
232cdf0e10cSrcweir 		{
233cdf0e10cSrcweir 			LanguageType nLang = MsLangId::convertIsoStringToLanguage( aLocaleNames[k] );
234cdf0e10cSrcweir 			aNewStyleLanguages.insert( nLang );
235cdf0e10cSrcweir 		}
236cdf0e10cSrcweir 	}
237cdf0e10cSrcweir 
238cdf0e10cSrcweir 	// now check all old style dictionaries if they will add a not yet
239cdf0e10cSrcweir 	// added language. If so add them to the resulting vector
240cdf0e10cSrcweir 	std::vector< SvtLinguConfigDictionaryEntry >::const_iterator aIt2;
241cdf0e10cSrcweir 	for (aIt2 = rOldStyleDics.begin();  aIt2 != rOldStyleDics.end();  ++aIt2)
242cdf0e10cSrcweir 	{
243cdf0e10cSrcweir 		sal_Int32 nOldStyleDics = aIt2->aLocaleNames.getLength();
244cdf0e10cSrcweir 
245cdf0e10cSrcweir 		// old style dics should only have one language listed...
246cdf0e10cSrcweir 		DBG_ASSERT( nOldStyleDics, "old style dictionary with more then one language found!");
247cdf0e10cSrcweir 		if (nOldStyleDics > 0)
248cdf0e10cSrcweir 		{
249cdf0e10cSrcweir 			LanguageType nLang = MsLangId::convertIsoStringToLanguage( aIt2->aLocaleNames[0] );
250cdf0e10cSrcweir 
251cdf0e10cSrcweir             if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_NONE)
252cdf0e10cSrcweir             {
253cdf0e10cSrcweir                 DBG_ERROR( "old style dictionary with invalid language found!" );
254cdf0e10cSrcweir                 continue;
255cdf0e10cSrcweir             }
256cdf0e10cSrcweir 
257cdf0e10cSrcweir 			// language not yet added?
258cdf0e10cSrcweir 			if (aNewStyleLanguages.count( nLang ) == 0)
259cdf0e10cSrcweir 				rNewStyleDics.push_back( *aIt2 );
260cdf0e10cSrcweir 		}
261cdf0e10cSrcweir 		else
262cdf0e10cSrcweir 		{
263cdf0e10cSrcweir 			DBG_ERROR( "old style dictionary with no language found!" );
264cdf0e10cSrcweir 		}
265cdf0e10cSrcweir 	}
266cdf0e10cSrcweir }
267cdf0e10cSrcweir 
268cdf0e10cSrcweir 
269cdf0e10cSrcweir rtl_TextEncoding getTextEncodingFromCharset(const sal_Char* pCharset)
270cdf0e10cSrcweir {
271cdf0e10cSrcweir     // default result: used to indicate that we failed to get the proper encoding
272cdf0e10cSrcweir     rtl_TextEncoding eRet = RTL_TEXTENCODING_DONTKNOW;
273cdf0e10cSrcweir 
274cdf0e10cSrcweir     if (pCharset)
275cdf0e10cSrcweir     {
276cdf0e10cSrcweir         eRet = rtl_getTextEncodingFromMimeCharset(pCharset);
277cdf0e10cSrcweir         if (eRet == RTL_TEXTENCODING_DONTKNOW)
278cdf0e10cSrcweir             eRet = rtl_getTextEncodingFromUnixCharset(pCharset);
279cdf0e10cSrcweir         if (eRet == RTL_TEXTENCODING_DONTKNOW)
280cdf0e10cSrcweir         {
281cdf0e10cSrcweir             if (strcmp("ISCII-DEVANAGARI", pCharset) == 0)
282cdf0e10cSrcweir                 eRet = RTL_TEXTENCODING_ISCII_DEVANAGARI;
283cdf0e10cSrcweir         }
284cdf0e10cSrcweir     }
285cdf0e10cSrcweir     return eRet;
286cdf0e10cSrcweir }
287cdf0e10cSrcweir 
288cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////
289cdf0e10cSrcweir 
290