1*32b1fd08SAndrew Rist /**************************************************************
2cdf0e10cSrcweir *
3*32b1fd08SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*32b1fd08SAndrew Rist * or more contributor license agreements. See the NOTICE file
5*32b1fd08SAndrew Rist * distributed with this work for additional information
6*32b1fd08SAndrew Rist * regarding copyright ownership. The ASF licenses this file
7*32b1fd08SAndrew Rist * to you under the Apache License, Version 2.0 (the
8*32b1fd08SAndrew Rist * "License"); you may not use this file except in compliance
9*32b1fd08SAndrew Rist * with the License. You may obtain a copy of the License at
10cdf0e10cSrcweir *
11*32b1fd08SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0
12cdf0e10cSrcweir *
13*32b1fd08SAndrew Rist * Unless required by applicable law or agreed to in writing,
14*32b1fd08SAndrew Rist * software distributed under the License is distributed on an
15*32b1fd08SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*32b1fd08SAndrew Rist * KIND, either express or implied. See the License for the
17*32b1fd08SAndrew Rist * specific language governing permissions and limitations
18*32b1fd08SAndrew Rist * under the License.
19cdf0e10cSrcweir *
20*32b1fd08SAndrew Rist *************************************************************/
21*32b1fd08SAndrew Rist
22*32b1fd08SAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir #include <stdlib.h>
25cdf0e10cSrcweir #include <stdio.h>
26cdf0e10cSrcweir #include <fcntl.h>
27cdf0e10cSrcweir #include <errno.h>
28cdf0e10cSrcweir #include <string.h>
29cdf0e10cSrcweir #include <unistd.h>
30cdf0e10cSrcweir #include <ctype.h>
31cdf0e10cSrcweir #include <sal/alloca.h>
32cdf0e10cSrcweir
33cdf0e10cSrcweir #include <rtl/ustring.hxx>
34cdf0e10cSrcweir
35cdf0e10cSrcweir #include <map>
36cdf0e10cSrcweir #include <string>
37cdf0e10cSrcweir
38cdf0e10cSrcweir /*****************************************************************************
39cdf0e10cSrcweir * typedefs
40cdf0e10cSrcweir *****************************************************************************/
41cdf0e10cSrcweir
42cdf0e10cSrcweir typedef std::map< const std::string, rtl_TextEncoding > EncodingMap;
43cdf0e10cSrcweir
44cdf0e10cSrcweir struct _pair {
45cdf0e10cSrcweir const char *key;
46cdf0e10cSrcweir rtl_TextEncoding value;
47cdf0e10cSrcweir };
48cdf0e10cSrcweir
49cdf0e10cSrcweir static int _pair_compare (const char *key, const _pair *pair);
50cdf0e10cSrcweir static const _pair* _pair_search (const char *key, const _pair *base, unsigned int member );
51cdf0e10cSrcweir
52cdf0e10cSrcweir
53cdf0e10cSrcweir const _pair _ms_encoding_list[] = {
54cdf0e10cSrcweir { "0", RTL_TEXTENCODING_UTF8 },
55cdf0e10cSrcweir { "1250", RTL_TEXTENCODING_MS_1250 },
56cdf0e10cSrcweir { "1251", RTL_TEXTENCODING_MS_1251 },
57cdf0e10cSrcweir { "1252", RTL_TEXTENCODING_MS_1252 },
58cdf0e10cSrcweir { "1253", RTL_TEXTENCODING_MS_1253 },
59cdf0e10cSrcweir { "1254", RTL_TEXTENCODING_MS_1254 },
60cdf0e10cSrcweir { "1255", RTL_TEXTENCODING_MS_1255 },
61cdf0e10cSrcweir { "1256", RTL_TEXTENCODING_MS_1256 },
62cdf0e10cSrcweir { "1257", RTL_TEXTENCODING_MS_1257 },
63cdf0e10cSrcweir { "1258", RTL_TEXTENCODING_MS_1258 },
64cdf0e10cSrcweir { "874", RTL_TEXTENCODING_MS_874 },
65cdf0e10cSrcweir { "932", RTL_TEXTENCODING_MS_932 },
66cdf0e10cSrcweir { "936", RTL_TEXTENCODING_MS_936 },
67cdf0e10cSrcweir { "949", RTL_TEXTENCODING_MS_949 },
68cdf0e10cSrcweir { "950", RTL_TEXTENCODING_MS_950 }
69cdf0e10cSrcweir };
70cdf0e10cSrcweir
71cdf0e10cSrcweir
72cdf0e10cSrcweir /*****************************************************************************
73cdf0e10cSrcweir * fgets that work with unix line ends on Windows
74cdf0e10cSrcweir *****************************************************************************/
75cdf0e10cSrcweir
my_fgets(char * s,int n,FILE * fp)76cdf0e10cSrcweir char * my_fgets(char *s, int n, FILE *fp)
77cdf0e10cSrcweir {
78cdf0e10cSrcweir int i;
79cdf0e10cSrcweir for( i=0; i < n-1; i++ )
80cdf0e10cSrcweir {
81cdf0e10cSrcweir int c = getc(fp);
82cdf0e10cSrcweir
83cdf0e10cSrcweir if( c == EOF )
84cdf0e10cSrcweir break;
85cdf0e10cSrcweir
86cdf0e10cSrcweir s[i] = (char) c;
87cdf0e10cSrcweir
88cdf0e10cSrcweir if( s[i] == '\n' )
89cdf0e10cSrcweir {
90cdf0e10cSrcweir i++;
91cdf0e10cSrcweir break;
92cdf0e10cSrcweir }
93cdf0e10cSrcweir }
94cdf0e10cSrcweir
95cdf0e10cSrcweir if( i>0 )
96cdf0e10cSrcweir {
97cdf0e10cSrcweir s[i] = '\0';
98cdf0e10cSrcweir return s;
99cdf0e10cSrcweir }
100cdf0e10cSrcweir else
101cdf0e10cSrcweir {
102cdf0e10cSrcweir return NULL;
103cdf0e10cSrcweir }
104cdf0e10cSrcweir }
105cdf0e10cSrcweir
106cdf0e10cSrcweir /*****************************************************************************
107cdf0e10cSrcweir * compare function for binary search
108cdf0e10cSrcweir *****************************************************************************/
109cdf0e10cSrcweir
110cdf0e10cSrcweir static int
_pair_compare(const char * key,const _pair * pair)111cdf0e10cSrcweir _pair_compare (const char *key, const _pair *pair)
112cdf0e10cSrcweir {
113cdf0e10cSrcweir int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
114cdf0e10cSrcweir return result;
115cdf0e10cSrcweir }
116cdf0e10cSrcweir
117cdf0e10cSrcweir /*****************************************************************************
118cdf0e10cSrcweir * binary search on encoding tables
119cdf0e10cSrcweir *****************************************************************************/
120cdf0e10cSrcweir
121cdf0e10cSrcweir static const _pair*
_pair_search(const char * key,const _pair * base,unsigned int member)122cdf0e10cSrcweir _pair_search (const char *key, const _pair *base, unsigned int member )
123cdf0e10cSrcweir {
124cdf0e10cSrcweir unsigned int lower = 0;
125cdf0e10cSrcweir unsigned int upper = member;
126cdf0e10cSrcweir unsigned int current;
127cdf0e10cSrcweir int comparison;
128cdf0e10cSrcweir
129cdf0e10cSrcweir /* check for validity of input */
130cdf0e10cSrcweir if ( (key == NULL) || (base == NULL) || (member == 0) )
131cdf0e10cSrcweir return NULL;
132cdf0e10cSrcweir
133cdf0e10cSrcweir /* binary search */
134cdf0e10cSrcweir while ( lower < upper )
135cdf0e10cSrcweir {
136cdf0e10cSrcweir current = (lower + upper) / 2;
137cdf0e10cSrcweir comparison = _pair_compare( key, base + current );
138cdf0e10cSrcweir if (comparison < 0)
139cdf0e10cSrcweir upper = current;
140cdf0e10cSrcweir else
141cdf0e10cSrcweir if (comparison > 0)
142cdf0e10cSrcweir lower = current + 1;
143cdf0e10cSrcweir else
144cdf0e10cSrcweir return base + current;
145cdf0e10cSrcweir }
146cdf0e10cSrcweir
147cdf0e10cSrcweir return NULL;
148cdf0e10cSrcweir }
149cdf0e10cSrcweir
150cdf0e10cSrcweir
151cdf0e10cSrcweir /************************************************************************
152cdf0e10cSrcweir * read_encoding_table
153cdf0e10cSrcweir ************************************************************************/
154cdf0e10cSrcweir
read_encoding_table(char * file,EncodingMap & aEncodingMap)155cdf0e10cSrcweir void read_encoding_table(char * file, EncodingMap& aEncodingMap)
156cdf0e10cSrcweir {
157cdf0e10cSrcweir FILE * fp = fopen(file, "r");
158cdf0e10cSrcweir if ( ! fp ) {
159cdf0e10cSrcweir fprintf(stderr, "ulfconv: %s %s\n", file, strerror(errno));
160cdf0e10cSrcweir exit(2);
161cdf0e10cSrcweir }
162cdf0e10cSrcweir
163cdf0e10cSrcweir char buffer[512];
164cdf0e10cSrcweir while ( NULL != my_fgets(buffer, sizeof(buffer), fp) ) {
165cdf0e10cSrcweir
166cdf0e10cSrcweir // strip comment lines
167cdf0e10cSrcweir if ( buffer[0] == '#' )
168cdf0e10cSrcweir continue;
169cdf0e10cSrcweir
170cdf0e10cSrcweir // find end of language string
171cdf0e10cSrcweir char * cp;
172cdf0e10cSrcweir for ( cp = buffer; ! isspace(*cp); cp++ )
173cdf0e10cSrcweir ;
174cdf0e10cSrcweir *cp = '\0';
175cdf0e10cSrcweir
176cdf0e10cSrcweir // find start of codepage string
177cdf0e10cSrcweir for ( ++cp; isspace(*cp); ++cp )
178cdf0e10cSrcweir ;
179cdf0e10cSrcweir char * codepage = cp;
180cdf0e10cSrcweir
181cdf0e10cSrcweir // find end of codepage string
182cdf0e10cSrcweir for ( ++cp; ! isspace(*cp); ++cp )
183cdf0e10cSrcweir ;
184cdf0e10cSrcweir *cp = '\0';
185cdf0e10cSrcweir
186cdf0e10cSrcweir // find the correct mapping for codepage
187cdf0e10cSrcweir const unsigned int members = sizeof( _ms_encoding_list ) / sizeof( _pair );
188cdf0e10cSrcweir const _pair *encoding = _pair_search( codepage, _ms_encoding_list, members );
189cdf0e10cSrcweir
190cdf0e10cSrcweir if ( encoding != NULL ) {
191cdf0e10cSrcweir const std::string language(buffer);
192cdf0e10cSrcweir aEncodingMap.insert( EncodingMap::value_type(language, encoding->value) );
193cdf0e10cSrcweir }
194cdf0e10cSrcweir }
195cdf0e10cSrcweir
196cdf0e10cSrcweir fclose(fp);
197cdf0e10cSrcweir }
198cdf0e10cSrcweir
199cdf0e10cSrcweir /************************************************************************
200cdf0e10cSrcweir * print_legacy_mixed
201cdf0e10cSrcweir ************************************************************************/
202cdf0e10cSrcweir
print_legacy_mixed(FILE * ostream,const rtl::OUString & aString,const std::string & language,EncodingMap & aEncodingMap)203cdf0e10cSrcweir void print_legacy_mixed(
204cdf0e10cSrcweir FILE * ostream,
205cdf0e10cSrcweir const rtl::OUString& aString,
206cdf0e10cSrcweir const std::string& language,
207cdf0e10cSrcweir EncodingMap& aEncodingMap)
208cdf0e10cSrcweir {
209cdf0e10cSrcweir EncodingMap::iterator iter = aEncodingMap.find(language);
210cdf0e10cSrcweir
211cdf0e10cSrcweir if ( iter != aEncodingMap.end() ) {
212cdf0e10cSrcweir fputs(OUStringToOString(aString, iter->second).getStr(), ostream);
213cdf0e10cSrcweir } else {
214cdf0e10cSrcweir fprintf(stderr, "ulfconv: WARNING: no legacy encoding found for %s\n", language.c_str());
215cdf0e10cSrcweir }
216cdf0e10cSrcweir }
217cdf0e10cSrcweir
218cdf0e10cSrcweir /************************************************************************
219cdf0e10cSrcweir * print_java_style
220cdf0e10cSrcweir ************************************************************************/
221cdf0e10cSrcweir
print_java_style(FILE * ostream,const rtl::OUString & aString)222cdf0e10cSrcweir void print_java_style(FILE * ostream, const rtl::OUString& aString)
223cdf0e10cSrcweir {
224cdf0e10cSrcweir int imax = aString.getLength();
225cdf0e10cSrcweir for (int i = 0; i < imax; i++) {
226cdf0e10cSrcweir sal_Unicode uc = aString[i];
227cdf0e10cSrcweir if ( uc < 128 ) {
228cdf0e10cSrcweir fprintf(ostream, "%c", (char) uc);
229cdf0e10cSrcweir } else {
230cdf0e10cSrcweir fprintf(ostream, "\\u%2.2x%2.2x", uc >> 8, uc & 0xFF );
231cdf0e10cSrcweir }
232cdf0e10cSrcweir }
233cdf0e10cSrcweir }
234cdf0e10cSrcweir
235cdf0e10cSrcweir /************************************************************************
236cdf0e10cSrcweir * main
237cdf0e10cSrcweir ************************************************************************/
238cdf0e10cSrcweir
main(int argc,char * const argv[])239cdf0e10cSrcweir int main( int argc, char * const argv[] )
240cdf0e10cSrcweir {
241cdf0e10cSrcweir EncodingMap aEncodingMap;
242cdf0e10cSrcweir
243cdf0e10cSrcweir FILE *istream = stdin;
244cdf0e10cSrcweir FILE *ostream = stdout;
245cdf0e10cSrcweir
246cdf0e10cSrcweir char *outfile = NULL;
247cdf0e10cSrcweir
248cdf0e10cSrcweir int errflg = 0;
249cdf0e10cSrcweir int argi;
250cdf0e10cSrcweir
251cdf0e10cSrcweir for( argi=1; argi < argc; argi++ )
252cdf0e10cSrcweir {
253cdf0e10cSrcweir if( argv[argi][0] == '-' && argv[argi][2] == '\0' )
254cdf0e10cSrcweir {
255cdf0e10cSrcweir switch(argv[argi][1]) {
256cdf0e10cSrcweir case 'o':
257cdf0e10cSrcweir if (argi+1 >= argc || argv[argi+1][0] == '-')
258cdf0e10cSrcweir {
259cdf0e10cSrcweir fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
260cdf0e10cSrcweir errflg++;
261cdf0e10cSrcweir break;
262cdf0e10cSrcweir }
263cdf0e10cSrcweir
264cdf0e10cSrcweir ++argi;
265cdf0e10cSrcweir outfile = argv[argi];
266cdf0e10cSrcweir break;
267cdf0e10cSrcweir case 't':
268cdf0e10cSrcweir if (argi+1 >= argc || argv[argi+1][0] == '-')
269cdf0e10cSrcweir {
270cdf0e10cSrcweir fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
271cdf0e10cSrcweir errflg++;
272cdf0e10cSrcweir break;
273cdf0e10cSrcweir }
274cdf0e10cSrcweir
275cdf0e10cSrcweir read_encoding_table(argv[++argi], aEncodingMap);
276cdf0e10cSrcweir break;
277cdf0e10cSrcweir default:
278cdf0e10cSrcweir fprintf(stderr, "Unrecognized option: -%c\n", argv[argi][1]);
279cdf0e10cSrcweir errflg++;
280cdf0e10cSrcweir }
281cdf0e10cSrcweir }
282cdf0e10cSrcweir else
283cdf0e10cSrcweir {
284cdf0e10cSrcweir break;
285cdf0e10cSrcweir }
286cdf0e10cSrcweir }
287cdf0e10cSrcweir
288cdf0e10cSrcweir if (errflg) {
289cdf0e10cSrcweir fprintf(stderr, "Usage: ulfconv [-o <output file>] [-t <encoding table>] [<ulf file>]\n");
290cdf0e10cSrcweir exit(2);
291cdf0e10cSrcweir }
292cdf0e10cSrcweir
293cdf0e10cSrcweir /* assign input file to stdin */
294cdf0e10cSrcweir if ( argi < argc )
295cdf0e10cSrcweir {
296cdf0e10cSrcweir istream = fopen(argv[argi], "r");
297cdf0e10cSrcweir if ( istream == NULL ) {
298cdf0e10cSrcweir fprintf(stderr, "ulfconv: %s : %s\n", argv[argi], strerror(errno));
299cdf0e10cSrcweir exit(2);
300cdf0e10cSrcweir }
301cdf0e10cSrcweir }
302cdf0e10cSrcweir
303cdf0e10cSrcweir /* open output file if any */
304cdf0e10cSrcweir if ( outfile )
305cdf0e10cSrcweir {
306cdf0e10cSrcweir ostream = fopen(outfile, "w");
307cdf0e10cSrcweir if ( ostream == NULL ) {
308cdf0e10cSrcweir fprintf(stderr, "ulfconv: %s : %s\n", outfile, strerror(errno));
309cdf0e10cSrcweir fclose(istream);
310cdf0e10cSrcweir exit(2);
311cdf0e10cSrcweir }
312cdf0e10cSrcweir }
313cdf0e10cSrcweir
314cdf0e10cSrcweir /* read line by line from stdin */
315cdf0e10cSrcweir char buffer[65536];
316cdf0e10cSrcweir while ( NULL != fgets(buffer, sizeof(buffer), istream) ) {
317cdf0e10cSrcweir
318cdf0e10cSrcweir /* only handle lines containing " = " */
319cdf0e10cSrcweir char * cp = strstr(buffer, " = \"");
320cdf0e10cSrcweir if ( cp ) {
321cdf0e10cSrcweir rtl::OUString aString;
322cdf0e10cSrcweir
323cdf0e10cSrcweir /* find end of lang string */
324cdf0e10cSrcweir int n;
325cdf0e10cSrcweir for ( n=0; ! isspace(buffer[n]); n++ )
326cdf0e10cSrcweir ;
327cdf0e10cSrcweir
328cdf0e10cSrcweir std::string line = buffer;
329cdf0e10cSrcweir std::string lang(line, 0, n);
330cdf0e10cSrcweir
331cdf0e10cSrcweir cp += 4;
332cdf0e10cSrcweir rtl_string2UString( &aString.pData, cp, strrchr(cp, '\"') - cp,
333cdf0e10cSrcweir RTL_TEXTENCODING_UTF8, OSTRING_TO_OUSTRING_CVTFLAGS );
334cdf0e10cSrcweir
335cdf0e10cSrcweir fprintf(ostream, "%s = \"", lang.c_str());
336cdf0e10cSrcweir
337cdf0e10cSrcweir if ( aEncodingMap.empty() ) {
338cdf0e10cSrcweir print_java_style(ostream, aString);
339cdf0e10cSrcweir } else {
340cdf0e10cSrcweir print_legacy_mixed(ostream, aString, lang, aEncodingMap);
341cdf0e10cSrcweir }
342cdf0e10cSrcweir
343cdf0e10cSrcweir fprintf(ostream, "\"\n");
344cdf0e10cSrcweir
345cdf0e10cSrcweir
346cdf0e10cSrcweir } else {
347cdf0e10cSrcweir fputs(buffer, ostream);
348cdf0e10cSrcweir }
349cdf0e10cSrcweir }
350cdf0e10cSrcweir
351cdf0e10cSrcweir fclose(ostream);
352cdf0e10cSrcweir fclose(istream);
353cdf0e10cSrcweir }
354