xref: /AOO41X/main/i18npool/source/breakiterator/gendict.cxx (revision 449ab281255486d6ec349c45a6ad7906d6939331)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_i18npool.hxx"
26 
27 #include <stdio.h>
28 #include <string.h>
29 #include <stdlib.h>
30 #include <sal/main.h>
31 #include <sal/types.h>
32 #include <rtl/strbuf.hxx>
33 #include <rtl/ustring.hxx>
34 
35 using namespace ::rtl;
36 
37 /* Main Procedure */
38 
SAL_IMPLEMENT_MAIN_WITH_ARGS(argc,argv)39 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
40 {
41     FILE *sfp, *cfp;
42 
43     if (argc < 3) exit(-1);
44 
45     sfp = fopen(argv[1], "rb"); // open the source file for read;
46     if (sfp == NULL)
47     {
48         printf("Open the dictionary source file failed.");
49         return -1;
50     }
51 
52     // create the C source file to write
53     cfp = fopen(argv[2], "wb");
54     if (cfp == NULL) {
55         fclose(sfp);
56         printf("Can't create the C source file.");
57         return -1;
58     }
59 
60     fprintf(cfp, "/*\n");
61     fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
62     fprintf(cfp, " * All Rights Reserved.\n");
63     fprintf(cfp, " */\n\n");
64     fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n");
65     fprintf(cfp, "#include <sal/types.h>\n\n");
66     fprintf(cfp, "extern \"C\" {\n");
67 
68     sal_Int32 count, i, j;
69     sal_Int32 lenArrayCurr = 0, lenArrayCount = 0, lenArrayLen = 0, *lenArray = NULL, charArray[0x10000];
70     sal_Bool exist[0x10000];
71     for (i = 0; i < 0x10000; i++) {
72         exist[i] = sal_False;
73         charArray[i] = 0;
74     }
75 
76     // generate main dict. data array
77     fprintf(cfp, "static const sal_Unicode dataArea[] = {");
78     sal_Char str[1024];
79     sal_Unicode current = 0;
80     count = 0;
81     while (fgets(str, 1024, sfp)) {
82         // input file is in UTF-8 encoding
83         // don't convert last new line character to Ostr.
84         OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8);
85         const sal_Unicode *u = Ostr.getStr();
86 
87         sal_Int32 len = Ostr.getLength();
88 
89         i=0;
90         Ostr.iterateCodePoints(&i, 1);
91         if (len == i) continue; // skip one character word
92 
93         if (*u != current) {
94         if (*u < current)
95         printf("u %x, current %x, count %d, lenArrayCount %d\n", *u, current,
96                     sal::static_int_cast<int>(count), sal::static_int_cast<int>(lenArrayCount));
97         current = *u;
98         charArray[current] = lenArrayCount;
99         }
100 
101         if (lenArrayLen <= lenArrayCount+1)
102         lenArray = (sal_Int32*) realloc(lenArray, (lenArrayLen += 1000) * sizeof(sal_Int32));
103         lenArray[lenArrayCount++] = lenArrayCurr;
104 
105         exist[u[0]] = sal_True;
106         for (i = 1; i < len; i++) {     // start from second character,
107         exist[u[i]] = sal_True;     // since the first character is captured in charArray.
108         lenArrayCurr++;
109         if ((count++) % 0x10 == 0)
110             fprintf(cfp, "\n\t");
111         fprintf(cfp, "0x%04x, ", u[i]);
112         }
113     }
114     lenArray[lenArrayCount++] = lenArrayCurr; // store last ending pointer
115     charArray[current+1] = lenArrayCount;
116     fprintf(cfp, "\n};\n");
117 
118     // generate lenArray
119     fprintf(cfp, "static const sal_Int32 lenArray[] = {\n\t");
120     count = 1;
121     fprintf(cfp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
122     for (i = 0; i < lenArrayCount; i++) {
123         fprintf(cfp, "0x%lx, ", static_cast<long unsigned int>(lenArray[i]));
124         if (count == 0xf) {
125         count = 0;
126         fprintf(cfp, "\n\t");
127         } else count++;
128     }
129     fprintf(cfp, "\n};\n");
130 
131     free(lenArray);
132 
133     // generate index1 array
134     fprintf (cfp, "static const sal_Int16 index1[] = {\n\t");
135     sal_Int16 set[0x100];
136     count = 0;
137     for (i = 0; i < 0x100; i++) {
138         for (j = 0; j < 0x100; j++)
139         if (charArray[(i*0x100) + j] != 0)
140             break;
141 
142         fprintf(cfp, "0x%02x, ", set[i] = (j < 0x100 ? sal::static_int_cast<sal_Int16>(count++) : 0xff));
143         if ((i+1) % 0x10 == 0)
144         fprintf (cfp, "\n\t");
145     }
146     fprintf (cfp, "};\n");
147 
148     // generate index2 array
149     fprintf (cfp, "static const sal_Int32 index2[] = {\n\t");
150     sal_Int32 prev = 0;
151     for (i = 0; i < 0x100; i++) {
152         if (set[i] != 0xff) {
153         for (j = 0; j < 0x100; j++) {
154             sal_Int32 k = (i*0x100) + j;
155             if (prev != 0 && charArray[k] == 0) {
156             for (k++; k < 0x10000; k++)
157                 if (charArray[k] != 0)
158                 break;
159             }
160             prev = charArray[(i*0x100) + j];
161             fprintf(
162                 cfp, "0x%lx, ",
163                 sal::static_int_cast< unsigned long >(
164                     k < 0x10000 ? charArray[k] + 1 : 0));
165             if ((j+1) % 0x10 == 0)
166             fprintf (cfp, "\n\t");
167         }
168         fprintf (cfp, "\n\t");
169         }
170     }
171     fprintf (cfp, "\n};\n");
172 
173     // generate existMark array
174     count = 0;
175     fprintf (cfp, "static const sal_uInt8 existMark[] = {\n\t");
176     for (i = 0; i < 0x1FFF; i++) {
177         sal_uInt8 bit = 0;
178         for (j = 0; j < 8; j++)
179         if (exist[i * 8 + j])
180             bit |= 1 << j;
181         fprintf(cfp, "0x%02x, ", bit);
182         if (count == 0xf) {
183         count = 0;
184         fprintf(cfp, "\n\t");
185         } else count++;
186     }
187     fprintf (cfp, "\n};\n");
188 
189     // create function to return arrays
190     fprintf (cfp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n");
191     fprintf (cfp, "\tconst sal_Int16* getIndex1() { return index1; }\n");
192     fprintf (cfp, "\tconst sal_Int32* getIndex2() { return index2; }\n");
193     fprintf (cfp, "\tconst sal_Int32* getLenArray() { return lenArray; }\n");
194     fprintf (cfp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
195     fprintf (cfp, "}\n");
196 
197     fclose(sfp);
198     fclose(cfp);
199 
200     return 0;
201 }   // End of main
202