1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 #include <stdlib.h> 25 #include <stdio.h> 26 #include <fcntl.h> 27 #include <errno.h> 28 #include <string.h> 29 #include <unistd.h> 30 #include <ctype.h> 31 #include <sal/alloca.h> 32 33 #include <rtl/ustring.hxx> 34 35 #include <map> 36 #include <string> 37 38 /***************************************************************************** 39 * typedefs 40 *****************************************************************************/ 41 42 typedef std::map< const std::string, rtl_TextEncoding > EncodingMap; 43 44 struct _pair { 45 const char *key; 46 rtl_TextEncoding value; 47 }; 48 49 static int _pair_compare (const char *key, const _pair *pair); 50 static const _pair* _pair_search (const char *key, const _pair *base, unsigned int member ); 51 52 53 const _pair _ms_encoding_list[] = { 54 { "0", RTL_TEXTENCODING_UTF8 }, 55 { "1250", RTL_TEXTENCODING_MS_1250 }, 56 { "1251", RTL_TEXTENCODING_MS_1251 }, 57 { "1252", RTL_TEXTENCODING_MS_1252 }, 58 { "1253", RTL_TEXTENCODING_MS_1253 }, 59 { "1254", RTL_TEXTENCODING_MS_1254 }, 60 { "1255", RTL_TEXTENCODING_MS_1255 }, 61 { "1256", RTL_TEXTENCODING_MS_1256 }, 62 { "1257", RTL_TEXTENCODING_MS_1257 }, 63 { "1258", RTL_TEXTENCODING_MS_1258 }, 64 { "874", RTL_TEXTENCODING_MS_874 }, 65 { "932", RTL_TEXTENCODING_MS_932 }, 66 { "936", RTL_TEXTENCODING_MS_936 }, 67 { "949", RTL_TEXTENCODING_MS_949 }, 68 { "950", RTL_TEXTENCODING_MS_950 } 69 }; 70 71 72 /***************************************************************************** 73 * fgets that work with unix line ends on Windows 74 *****************************************************************************/ 75 76 char * my_fgets(char *s, int n, FILE *fp) 77 { 78 int i; 79 for( i=0; i < n-1; i++ ) 80 { 81 int c = getc(fp); 82 83 if( c == EOF ) 84 break; 85 86 s[i] = (char) c; 87 88 if( s[i] == '\n' ) 89 { 90 i++; 91 break; 92 } 93 } 94 95 if( i>0 ) 96 { 97 s[i] = '\0'; 98 return s; 99 } 100 else 101 { 102 return NULL; 103 } 104 } 105 106 /***************************************************************************** 107 * compare function for binary search 108 *****************************************************************************/ 109 110 static int 111 _pair_compare (const char *key, const _pair *pair) 112 { 113 int result = rtl_str_compareIgnoreAsciiCase( key, pair->key ); 114 return result; 115 } 116 117 /***************************************************************************** 118 * binary search on encoding tables 119 *****************************************************************************/ 120 121 static const _pair* 122 _pair_search (const char *key, const _pair *base, unsigned int member ) 123 { 124 unsigned int lower = 0; 125 unsigned int upper = member; 126 unsigned int current; 127 int comparison; 128 129 /* check for validity of input */ 130 if ( (key == NULL) || (base == NULL) || (member == 0) ) 131 return NULL; 132 133 /* binary search */ 134 while ( lower < upper ) 135 { 136 current = (lower + upper) / 2; 137 comparison = _pair_compare( key, base + current ); 138 if (comparison < 0) 139 upper = current; 140 else 141 if (comparison > 0) 142 lower = current + 1; 143 else 144 return base + current; 145 } 146 147 return NULL; 148 } 149 150 151 /************************************************************************ 152 * read_encoding_table 153 ************************************************************************/ 154 155 void read_encoding_table(char * file, EncodingMap& aEncodingMap) 156 { 157 FILE * fp = fopen(file, "r"); 158 if ( ! fp ) { 159 fprintf(stderr, "ulfconv: %s %s\n", file, strerror(errno)); 160 exit(2); 161 } 162 163 char buffer[512]; 164 while ( NULL != my_fgets(buffer, sizeof(buffer), fp) ) { 165 166 // strip comment lines 167 if ( buffer[0] == '#' ) 168 continue; 169 170 // find end of language string 171 char * cp; 172 for ( cp = buffer; ! isspace(*cp); cp++ ) 173 ; 174 *cp = '\0'; 175 176 // find start of codepage string 177 for ( ++cp; isspace(*cp); ++cp ) 178 ; 179 char * codepage = cp; 180 181 // find end of codepage string 182 for ( ++cp; ! isspace(*cp); ++cp ) 183 ; 184 *cp = '\0'; 185 186 // find the correct mapping for codepage 187 const unsigned int members = sizeof( _ms_encoding_list ) / sizeof( _pair ); 188 const _pair *encoding = _pair_search( codepage, _ms_encoding_list, members ); 189 190 if ( encoding != NULL ) { 191 const std::string language(buffer); 192 aEncodingMap.insert( EncodingMap::value_type(language, encoding->value) ); 193 } 194 } 195 196 fclose(fp); 197 } 198 199 /************************************************************************ 200 * print_legacy_mixed 201 ************************************************************************/ 202 203 void print_legacy_mixed( 204 FILE * ostream, 205 const rtl::OUString& aString, 206 const std::string& language, 207 EncodingMap& aEncodingMap) 208 { 209 EncodingMap::iterator iter = aEncodingMap.find(language); 210 211 if ( iter != aEncodingMap.end() ) { 212 fputs(OUStringToOString(aString, iter->second).getStr(), ostream); 213 } else { 214 fprintf(stderr, "ulfconv: WARNING: no legacy encoding found for %s\n", language.c_str()); 215 } 216 } 217 218 /************************************************************************ 219 * print_java_style 220 ************************************************************************/ 221 222 void print_java_style(FILE * ostream, const rtl::OUString& aString) 223 { 224 int imax = aString.getLength(); 225 for (int i = 0; i < imax; i++) { 226 sal_Unicode uc = aString[i]; 227 if ( uc < 128 ) { 228 fprintf(ostream, "%c", (char) uc); 229 } else { 230 fprintf(ostream, "\\u%2.2x%2.2x", uc >> 8, uc & 0xFF ); 231 } 232 } 233 } 234 235 /************************************************************************ 236 * main 237 ************************************************************************/ 238 239 int main( int argc, char * const argv[] ) 240 { 241 EncodingMap aEncodingMap; 242 243 FILE *istream = stdin; 244 FILE *ostream = stdout; 245 246 char *outfile = NULL; 247 248 int errflg = 0; 249 int argi; 250 251 for( argi=1; argi < argc; argi++ ) 252 { 253 if( argv[argi][0] == '-' && argv[argi][2] == '\0' ) 254 { 255 switch(argv[argi][1]) { 256 case 'o': 257 if (argi+1 >= argc || argv[argi+1][0] == '-') 258 { 259 fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]); 260 errflg++; 261 break; 262 } 263 264 ++argi; 265 outfile = argv[argi]; 266 break; 267 case 't': 268 if (argi+1 >= argc || argv[argi+1][0] == '-') 269 { 270 fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]); 271 errflg++; 272 break; 273 } 274 275 read_encoding_table(argv[++argi], aEncodingMap); 276 break; 277 default: 278 fprintf(stderr, "Unrecognized option: -%c\n", argv[argi][1]); 279 errflg++; 280 } 281 } 282 else 283 { 284 break; 285 } 286 } 287 288 if (errflg) { 289 fprintf(stderr, "Usage: ulfconv [-o <output file>] [-t <encoding table>] [<ulf file>]\n"); 290 exit(2); 291 } 292 293 /* assign input file to stdin */ 294 if ( argi < argc ) 295 { 296 istream = fopen(argv[argi], "r"); 297 if ( istream == NULL ) { 298 fprintf(stderr, "ulfconv: %s : %s\n", argv[argi], strerror(errno)); 299 exit(2); 300 } 301 } 302 303 /* open output file if any */ 304 if ( outfile ) 305 { 306 ostream = fopen(outfile, "w"); 307 if ( ostream == NULL ) { 308 fprintf(stderr, "ulfconv: %s : %s\n", outfile, strerror(errno)); 309 fclose(istream); 310 exit(2); 311 } 312 } 313 314 /* read line by line from stdin */ 315 char buffer[65536]; 316 while ( NULL != fgets(buffer, sizeof(buffer), istream) ) { 317 318 /* only handle lines containing " = " */ 319 char * cp = strstr(buffer, " = \""); 320 if ( cp ) { 321 rtl::OUString aString; 322 323 /* find end of lang string */ 324 int n; 325 for ( n=0; ! isspace(buffer[n]); n++ ) 326 ; 327 328 std::string line = buffer; 329 std::string lang(line, 0, n); 330 331 cp += 4; 332 rtl_string2UString( &aString.pData, cp, strrchr(cp, '\"') - cp, 333 RTL_TEXTENCODING_UTF8, OSTRING_TO_OUSTRING_CVTFLAGS ); 334 335 fprintf(ostream, "%s = \"", lang.c_str()); 336 337 if ( aEncodingMap.empty() ) { 338 print_java_style(ostream, aString); 339 } else { 340 print_legacy_mixed(ostream, aString, lang, aEncodingMap); 341 } 342 343 fprintf(ostream, "\"\n"); 344 345 346 } else { 347 fputs(buffer, ostream); 348 } 349 } 350 351 fclose(ostream); 352 fclose(istream); 353 } 354