xref: /AOO41X/main/sal/textenc/tenchelp.c (revision 1ecadb572e7010ff3b3382ad9bf179dbc6efadbb)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #include "tenchelp.h"
29 #include "unichars.h"
30 #include "rtl/textcvt.h"
31 #include "sal/types.h"
32 
33 static sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,
34                                                sal_Char * pBuf,
35                                                sal_Size nMaxLen);
36 
37 static sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,
38                                              sal_Char * pBuf,
39                                              sal_Size nMaxLen);
40 
41 static int ImplIsUnicodeIgnoreChar(sal_Unicode c, sal_uInt32 nFlags);
42 
43 sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,
44                                         sal_Char * pBuf,
45                                         sal_Size nMaxLen)
46 {
47     if (nMaxLen == 0)
48         return sal_False;
49     switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK)
50     {
51     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0:
52         *pBuf = 0x00;
53         break;
54 
55     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK:
56     default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */
57         *pBuf = 0x3F;
58         break;
59 
60     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE:
61         *pBuf = 0x5F;
62         break;
63     }
64     return sal_True;
65 }
66 
67 sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,
68                                       sal_Char * pBuf,
69                                       sal_Size nMaxLen)
70 {
71     if (nMaxLen == 0)
72         return sal_False;
73     switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK)
74     {
75     case RTL_UNICODETOTEXT_FLAGS_INVALID_0:
76         *pBuf = 0x00;
77         break;
78 
79     case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK:
80     default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */
81         *pBuf = 0x3F;
82         break;
83 
84     case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE:
85         *pBuf = 0x5F;
86         break;
87     }
88     return sal_True;
89 }
90 
91 int ImplIsUnicodeIgnoreChar( sal_Unicode c, sal_uInt32 nFlags )
92 {
93     return
94         ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0
95          && ImplIsZeroWidth(c))
96         || ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0
97             && ImplIsControlOrFormat(c))
98         || ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0
99             && ImplIsPrivateUse(c));
100 }
101 
102 /* ======================================================================= */
103 
104 sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags)
105 {
106     return ((nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK)
107                    == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE) ?
108                RTL_TEXTCVT_BYTE_PRIVATE_START + cChar :
109                RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
110 }
111 
112 /* ----------------------------------------------------------------------- */
113 
114 sal_Bool
115 ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData,
116                                      sal_Unicode const ** ppSrcBuf,
117                                      sal_Unicode const * pEndSrcBuf,
118                                      sal_Char ** ppDestBuf,
119                                      sal_Char const * pEndDestBuf,
120                                      sal_uInt32 nFlags,
121                                      sal_uInt32 * pInfo)
122 {
123     sal_Unicode c = **ppSrcBuf;
124 
125     (void) pData; /* unused */
126 
127     /* Should the private character map to one byte */
128     if ( (c >= RTL_TEXTCVT_BYTE_PRIVATE_START) && (c <= RTL_TEXTCVT_BYTE_PRIVATE_END) )
129     {
130         if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
131         {
132             **ppDestBuf = (sal_Char)(sal_uChar)(c-RTL_TEXTCVT_BYTE_PRIVATE_START);
133             (*ppDestBuf)++;
134             (*ppSrcBuf)++;
135             return sal_True;
136         }
137     }
138 
139     /* Should this character ignored (Private, Non Spacing, Control) */
140     if ( ImplIsUnicodeIgnoreChar( c, nFlags ) )
141     {
142         (*ppSrcBuf)++;
143         return sal_True;
144     }
145 
146     /* Surrogates Characters should result in */
147     /* one replacement character */
148     if (ImplIsHighSurrogate(c))
149     {
150         if ( *ppSrcBuf == pEndSrcBuf )
151         {
152             *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
153             return sal_False;
154         }
155 
156         c = *((*ppSrcBuf)+1);
157         if (ImplIsLowSurrogate(c))
158             (*ppSrcBuf)++;
159         else
160         {
161             *pInfo |= RTL_UNICODETOTEXT_INFO_INVALID;
162             if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR )
163             {
164                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
165                 return sal_False;
166             }
167             else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE )
168             {
169                 (*ppSrcBuf)++;
170                 return sal_True;
171             }
172             else if (ImplGetInvalidAsciiMultiByte(nFlags,
173                                                   *ppDestBuf,
174                                                   pEndDestBuf - *ppDestBuf))
175             {
176                 ++*ppSrcBuf;
177                 ++*ppDestBuf;
178                 return sal_True;
179             }
180             else
181             {
182                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR
183                               | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
184                 return sal_False;
185             }
186         }
187     }
188 
189     *pInfo |= RTL_UNICODETOTEXT_INFO_UNDEFINED;
190     if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR )
191     {
192         *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
193         return sal_False;
194     }
195     else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE )
196         (*ppSrcBuf)++;
197     else if (ImplGetUndefinedAsciiMultiByte(nFlags,
198                                             *ppDestBuf,
199                                             pEndDestBuf - *ppDestBuf))
200     {
201         ++*ppSrcBuf;
202         ++*ppDestBuf;
203     }
204     else
205     {
206         *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR
207                       | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
208         return sal_False;
209     }
210 
211     return sal_True;
212 }
213 
214