xref: /AOO41X/main/tools/inc/tools/inetmime.hxx (revision 67e470dafe1997e73f56ff7ff4878983707e3e07)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 #ifndef TOOLS_INETMIME_HXX
24 #define TOOLS_INETMIME_HXX
25 
26 #include "tools/toolsdllapi.h"
27 #include <rtl/alloc.h>
28 #include <rtl/string.h>
29 #include "rtl/tencinfo.h"
30 #include <tools/debug.hxx>
31 #include <tools/errcode.hxx>
32 #include <tools/list.hxx>
33 #include <tools/string.hxx>
34 
35 class DateTime;
36 class INetContentTypeParameterList;
37 class INetMIMECharsetList_Impl;
38 class INetMIMEOutputSink;
39 
40 //============================================================================
41 class TOOLS_DLLPUBLIC INetMIME
42 {
43 public:
44     enum { SOFT_LINE_LENGTH_LIMIT = 76,
45            HARD_LINE_LENGTH_LIMIT = 998 };
46 
47     /** The various types of message header field bodies, with respect to
48         encoding and decoding them.
49 
50         @descr  At the moment, five different types of header fields suffice
51         to describe how to encoded and decode any known message header field
52         body, but need for more types may arise in the future as new header
53         fields are introduced.
54 
55         @descr  The following is an exhaustive list of all the header fields
56         currently known to our implementation.  For every header field, it
57         includes a 'canonic' (with regard to capitalization) name, a grammar
58         rule for the body (using RFC 822 and RFC 2234 conventions), a list of
59         relevant sources of information, and the HeaderFieldType value to use
60         with that header field.  The list is based on RFC 2076 and draft-
61         palme-mailext-headers-02.txt (see also <http://www.dsv.su.se/~jpalme/
62         ietf/jp-ietf-home.html#anchor1003783>).
63 
64         Approved: address  ;RFC 1036; HEADER_FIELD_ADDRESS
65         bcc: #address  ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
66         cc: 1#address  ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
67         Comments: *text  ;RFCs 822, RFC 2047; HEADER_FIELD_TEXT
68         Content-Base: absoluteURI  ;RFC 2110; HEADER_FIELD_TEXT
69         Content-Description: *text  ;RFC 2045, RFC 2047; HEADER_FIELD_TEXT
70         Content-Disposition: disposition-type *(";" disposition-parm)
71             ;RFC 1806; HEADER_FIELD_STRUCTURED
72         Content-ID: msg-id  ;RFC 2045, RFC 2047; HEADER_FIELD_MESSAGE_ID
73         Content-Location: absoluteURI / relativeURI  ;RFC 2110;
74             HEADER_FIELD_TEXT
75         Content-Transfer-Encoding: mechanism  ;RFC 2045, RFC 2047;
76             HEADER_FIELD_STRUCTURED
77         Content-Type: type "/" subtype *(";" parameter)  ;RFC 2045, RFC 2047;
78             HEADER_FIELD_STRUCTURED
79         Control:  *text ;RFC 1036; HEADER_FIELD_TEXT
80         Date: date-time  ;RFC 822, RFC 1123, RFC 2047; HEADER_FIELD_STRUCTURED
81         Distribution: 1#atom  ;RFC 1036; HEADER_FIELD_STRUCTURED
82         Encrypted: 1#2word  ;RFC 822, RFC 2047; HEADER_FIELD_STRUCTURED
83         Expires: date-time  ;RFC 1036; HEADER_FIELD_STRUCTURED
84         Followup-To: 1#(atom *("." atom))  ;RFC 1036; HEADER_FIELD_STRUCTURED
85         From: mailbox / 1#mailbox  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
86         In-Reply-To: *(phrase / msg-id)  ;RFC 822, RFC 2047;
87             HEADER_FIELD_ADDRESS
88         Keywords: #phrase  ;RFC 822, RFC 2047; HEADER_FIELD_PHRASE
89         MIME-Version: 1*DIGIT "." 1*DIGIT  ;RFC 2045, RFC 2047;
90             HEADER_FIELD_STRUCTURED
91         Message-ID: msg-id  ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
92         Newsgroups: 1#(atom *("." atom))  ;RFC 1036, RFC 2047;
93             HEADER_FIELD_STRUCTURED
94         Organization: *text  ;RFC 1036; HEADER_FIELD_TEXT
95         Received: ["from" domain] ["by" domain] ["via" atom] *("with" atom)
96             ["id" msg-id] ["for" addr-spec] ";" date-time  ;RFC 822, RFC 1123,
97             RFC 2047; HEADER_FIELD_STRUCTURED
98         References: *(phrase / msg-id)  ;RFC 822, RFC 2047;
99             HEADER_FIELD_ADDRESS
100         Reply-To: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
101         Resent-Date: date-time  ;RFC 822, RFC 1123, RFC 2047;
102             HEADER_FIELD_STRUCTURED
103         Resent-From: mailbox / 1#mailbox  ;RFC 822, RFC 2047;
104             HEADER_FIELD_ADDRESS
105         Resent-Message-ID: msg-id  ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
106         Resent-Reply-To: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
107         Resent-Sender: mailbox  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
108         Resent-To: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
109         Resent-bcc: #address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
110         Resent-cc: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
111         Return-path: route-addr / ("<" ">")  ;RFC 822, RFC 1123, RFC 2047;
112             HEADER_FIELD_STRUCTURED
113         Return-Receipt-To: address  ;Not Internet standard;
114             HEADER_FIELD_ADDRES
115         Sender: mailbox  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
116         Subject: *text  ;RFC 822, RFC 2047; HEADER_FIELD_TEXT
117         Summary: *text  ;RFC 1036; HEADER_FIELD_TEXT
118         To: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
119         X-CHAOS-Marked: "YES" / "NO"  ;local; HEADER_FIELD_STRUCTURED
120         X-CHAOS-Read: "YES" / "NO"  ;local; HEADER_FIELD_STRUCTURED
121         X-CHAOS-Recipients: #*("<" atom word ">")  ;local;
122             HEADER_FIELD_STRUCTURED
123         X-CHAOS-Size: 1*DIGIT  ;local; HEADER_FIELD_STRUCTURED
124         X-Mailer: *text  ;Not Internet standard; HEADER_FIELD_TEXT
125         X-Mozilla-Status: 4HEXDIG  ;Mozilla; HEADER_FIELD_STRUCTURED
126         X-Newsreader: *text  ;Not Internet standard; HEADER_FIELD_TEXT
127         X-Priority: "1" / "2" / "3" / "4" / "5"  ;Not Internet standard;
128             HEADER_FIELD_STRUCTURED
129         Xref: sub-domain
130             1*((atom / string) *("." (atom / string)) ":" msg-number)
131             ;RFCs 1036, 2047, local; HEADER_FIELD_STRUCTURED
132      */
133     enum HeaderFieldType
134     {
135         HEADER_FIELD_TEXT,
136         HEADER_FIELD_STRUCTURED,
137         HEADER_FIELD_PHRASE,
138         HEADER_FIELD_MESSAGE_ID,
139         HEADER_FIELD_ADDRESS
140     };
141 
142     /** Check for US-ASCII character.
143 
144         @param nChar  Some UCS-4 character.
145 
146         @return  True if nChar is a US-ASCII character (0x00--0x7F).
147      */
148     static inline bool isUSASCII(sal_uInt32 nChar);
149 
150     /** Check for ISO 8859-1 character.
151 
152         @param nChar  Some UCS-4 character.
153 
154         @return  True if nChar is a ISO 8859-1 character (0x00--0xFF).
155      */
156     static inline bool isISO88591(sal_uInt32 nChar);
157 
158     /** Check for US-ASCII control character.
159 
160         @param nChar  Some UCS-4 character.
161 
162         @return  True if nChar is a US-ASCII control character (US-ASCII
163         0x00--0x1F or 0x7F).
164      */
165     static inline bool isControl(sal_uInt32 nChar);
166 
167     /** Check for US-ASCII white space character.
168 
169         @param nChar  Some UCS-4 character.
170 
171         @return  True if nChar is a US-ASCII white space character (US-ASCII
172         0x09 or 0x20).
173      */
174     static inline bool isWhiteSpace(sal_uInt32 nChar);
175 
176     /** Check for US-ASCII visible character.
177 
178         @param nChar  Some UCS-4 character.
179 
180         @return  True if nChar is a US-ASCII visible character (US-ASCII
181         0x21--0x7E).
182      */
183     static inline bool isVisible(sal_uInt32 nChar);
184 
185     /** Check for US-ASCII digit character.
186 
187         @param nChar  Some UCS-4 character.
188 
189         @return  True if nChar is a US-ASCII (decimal) digit character (US-
190         ASCII '0'--'9').
191      */
192     static inline bool isDigit(sal_uInt32 nChar);
193 
194     /** Check for US-ASCII canonic hexadecimal digit character.
195 
196         @param nChar  Some UCS-4 character.
197 
198         @return  True if nChar is a US-ASCII canonic (i.e., upper case)
199         hexadecimal digit character (US-ASCII '0'--'9' or 'A'--'F').
200      */
201     static inline bool isCanonicHexDigit(sal_uInt32 nChar);
202 
203     /** Check for US-ASCII hexadecimal digit character.
204 
205         @param nChar  Some UCS-4 character.
206 
207         @return  True if nChar is a US-ASCII hexadecimal digit character (US-
208         ASCII '0'--'9', 'A'--'F', 'a'--'f').
209      */
210     static inline bool isHexDigit(sal_uInt32 nChar);
211 
212     /** Check for US-ASCII upper case character.
213 
214         @param nChar  Some UCS-4 character.
215 
216         @return  True if nChar is a US-ASCII upper case alphabetic character
217         (US-ASCII 'A'--'Z').
218      */
219     static inline bool isUpperCase(sal_uInt32 nChar);
220 
221     /** Check for US-ASCII lower case character.
222 
223         @param nChar  Some UCS-4 character.
224 
225         @return  True if nChar is a US-ASCII lower case alphabetic character
226         (US-ASCII 'a'--'z').
227      */
228     static inline bool isLowerCase(sal_uInt32 nChar);
229 
230     /** Check for US-ASCII alphabetic character.
231 
232         @param nChar  Some UCS-4 character.
233 
234         @return  True if nChar is a US-ASCII alphabetic character (US-ASCII
235         'A'--'Z' or 'a'--'z').
236      */
237     static inline bool isAlpha(sal_uInt32 nChar);
238 
239     /** Check for US-ASCII alphanumeric character.
240 
241         @param nChar  Some UCS-4 character.
242 
243         @return  True if nChar is a US-ASCII alphanumeric character (US-ASCII
244         '0'--'9', 'A'--'Z' or 'a'--'z').
245      */
246     static inline bool isAlphanumeric(sal_uInt32 nChar);
247 
248     /** Check for US-ASCII Base 64 digit character.
249 
250         @param nChar  Some UCS-4 character.
251 
252         @return  True if nChar is a US-ASCII Base 64 digit character (US-ASCII
253         'A'--'Z', 'a'--'z', '0'--'9', '+', or '/').
254      */
255     static inline bool isBase64Digit(sal_uInt32 nChar);
256 
257     /** Check whether some character is valid within an RFC 822 <atom>.
258 
259         @param nChar  Some UCS-4 character.
260 
261         @return  True if nChar is valid within an RFC 822 <atom> (US-ASCII
262         'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
263         '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', or '~').
264      */
265     static bool isAtomChar(sal_uInt32 nChar);
266 
267     /** Check whether some character is valid within an RFC 2045 <token>.
268 
269         @param nChar  Some UCS-4 character.
270 
271         @return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
272         'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
273         '-', '.', '^', '_', '`', '{', '|', '}', or '~').
274      */
275     static bool isTokenChar(sal_uInt32 nChar);
276 
277     /** Check whether some character is valid within an RFC 2047 <token>.
278 
279         @param nChar  Some UCS-4 character.
280 
281         @return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
282         'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
283         '-', '^', '_', '`', '{', '|', '}', or '~').
284      */
285     static bool isEncodedWordTokenChar(sal_uInt32 nChar);
286 
287     /** Check whether some character is valid within an RFC 2060 <atom>.
288 
289         @param nChar  Some UCS-4 character.
290 
291         @return  True if nChar is valid within an RFC 2060 <atom> (US-ASCII
292         'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '&', ''', '+', ',', '-',
293         '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', ']', '^', '_', '`',
294         '|', '}', or '~').
295      */
296     static bool isIMAPAtomChar(sal_uInt32 nChar);
297 
298     /** Translate an US-ASCII character to upper case.
299 
300         @param nChar  Some UCS-4 character.
301 
302         @return  If nChar is a US-ASCII upper case character (US-ASCII
303         'A'--'Z'), return the corresponding US-ASCII lower case character (US-
304         ASCII 'a'--'z'); otherwise, return nChar unchanged.
305      */
306     static inline sal_uInt32 toUpperCase(sal_uInt32 nChar);
307 
308     /** Translate an US-ASCII character to lower case.
309 
310         @param nChar  Some UCS-4 character.
311 
312         @return  If nChar is a US-ASCII lower case character (US-ASCII
313         'a'--'z'), return the corresponding US-ASCII upper case character (US-
314         ASCII 'A'--'Z'); otherwise, return nChar unchanged.
315      */
316     static inline sal_uInt32 toLowerCase(sal_uInt32 nChar);
317 
318     /** Get the digit weight of a US-ASCII character.
319 
320         @param nChar  Some UCS-4 character.
321 
322         @return  If nChar is a US-ASCII (decimal) digit character (US-ASCII
323         '0'--'9'), return the corresponding weight (0--9); otherwise,
324         return -1.
325      */
326     static inline int getWeight(sal_uInt32 nChar);
327 
328     /** Get the hexadecimal digit weight of a US-ASCII character.
329 
330         @param nChar  Some UCS-4 character.
331 
332         @return  If nChar is a US-ASCII hexadecimal digit character (US-ASCII
333         '0'--'9', 'A'--'F', or 'a'--'f'), return the corresponding weight
334         (0--15); otherwise, return -1.
335      */
336     static inline int getHexWeight(sal_uInt32 nChar);
337 
338     /** Get the Base 64 digit weight of a US-ASCII character.
339 
340         @param nChar  Some UCS-4 character.
341 
342         @return  If nChar is a US-ASCII Base 64 digit character (US-ASCII
343         'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
344         corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
345         character (US-ASCII '='), return -1; otherwise, return -2.
346      */
347     static inline int getBase64Weight(sal_uInt32 nChar);
348 
349     /** Get a decimal digit encoded as US-ASCII.
350 
351         @param nWeight  Must be in the range 0--9, inclusive.
352 
353         @return  The decimal digit corresponding to nWeight (US-ASCII
354         '0'--'9').
355      */
356     static sal_uInt32 getDigit(int nWeight);
357 
358     /** Get a hexadecimal digit encoded as US-ASCII.
359 
360         @param nWeight  Must be in the range 0--15, inclusive.
361 
362         @return  The canonic (i.e., upper case) hexadecimal digit
363         corresponding to nWeight (US-ASCII '0'--'9' or 'A'--'F').
364      */
365     static sal_uInt32 getHexDigit(int nWeight);
366 
367     /** Get a Base 64 digit encoded as US-ASCII.
368 
369         @param nWeight  Must be in the range 0--63, inclusive.
370 
371         @return  The Base 64 digit corresponding to nWeight (US-ASCII 'A'--
372         'Z', 'a'--'z', '0'--'9', '+' or '/').
373      */
374     static sal_uInt32 getBase64Digit(int nWeight);
375 
376     static inline bool isHighSurrogate(sal_uInt32 nUTF16);
377 
378     static inline bool isLowSurrogate(sal_uInt32 nUTF16);
379 
380     static inline sal_uInt32 toUTF32(sal_Unicode cHighSurrogate,
381                                      sal_Unicode cLowSurrogate);
382 
383     /** Check two US-ASCII strings for equality, ignoring case.
384 
385         @param pBegin1  Points to the start of the first string, must not be
386         null.
387 
388         @param pEnd1  Points past the end of the first string, must be >=
389         pBegin1.
390 
391         @param pBegin2  Points to the start of the second string, must not be
392         null.
393 
394         @param pEnd2  Points past the end of the second string, must be >=
395         pBegin2.
396 
397         @return  True if the two strings are equal, ignoring the case of US-
398         ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
399      */
400     static bool equalIgnoreCase(const sal_Char * pBegin1,
401                                 const sal_Char * pEnd1,
402                                 const sal_Char * pBegin2,
403                                 const sal_Char * pEnd2);
404 
405     /** Check two US-ASCII strings for equality, ignoring case.
406 
407         @param pBegin1  Points to the start of the first string, must not be
408         null.
409 
410         @param pEnd1  Points past the end of the first string, must be >=
411         pBegin1.
412 
413         @param pString2  Points to the start of the null terminated second
414         string, must not be null.
415 
416         @return  True if the two strings are equal, ignoring the case of US-
417         ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
418      */
419     static bool equalIgnoreCase(const sal_Char * pBegin1,
420                                 const sal_Char * pEnd1,
421                                 const sal_Char * pString2);
422 
423     /** Check two US-ASCII strings for equality, ignoring case.
424 
425         @param pBegin1  Points to the start of the first string, must not be
426         null.
427 
428         @param pEnd1  Points past the end of the first string, must be >=
429         pBegin1.
430 
431         @param pString2  Points to the start of the null terminated second
432         string, must not be null.
433 
434         @return  True if the two strings are equal, ignoring the case of US-
435         ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
436      */
437     static bool equalIgnoreCase(const sal_Unicode * pBegin1,
438                                 const sal_Unicode * pEnd1,
439                                 const sal_Char * pString2);
440 
441     /** Check two US-ASCII strings for equality, ignoring case.
442 
443         @param rString1  The first string.
444 
445         @param sString2  Points to the start of the null terminated second
446         string, must not be null.
447 
448         @return  True if the two strings are equal, ignoring the case of US-
449         ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
450      */
451     static inline bool equalIgnoreCase(const ByteString & rString1,
452                                        const sal_Char * pString2);
453 
454     static inline bool startsWithLineBreak(const sal_Char * pBegin,
455                                            const sal_Char * pEnd);
456 
457     static inline bool startsWithLineBreak(const sal_Unicode * pBegin,
458                                            const sal_Unicode * pEnd);
459 
460     static inline bool startsWithLineFolding(const sal_Char * pBegin,
461                                              const sal_Char * pEnd);
462 
463     static inline bool startsWithLineFolding(const sal_Unicode * pBegin,
464                                              const sal_Unicode * pEnd);
465 
466     static bool startsWithLinearWhiteSpace(const sal_Char * pBegin,
467                                            const sal_Char * pEnd);
468 
469     static const sal_Char * skipLinearWhiteSpace(const sal_Char * pBegin,
470                                                  const sal_Char * pEnd);
471 
472     static const sal_Unicode * skipLinearWhiteSpace(const sal_Unicode *
473                                                         pBegin,
474                                                     const sal_Unicode * pEnd);
475 
476     static const sal_Char * skipComment(const sal_Char * pBegin,
477                                         const sal_Char * pEnd);
478 
479     static const sal_Unicode * skipComment(const sal_Unicode * pBegin,
480                                            const sal_Unicode * pEnd);
481 
482     static const sal_Char * skipLinearWhiteSpaceComment(const sal_Char *
483                                                             pBegin,
484                                                         const sal_Char *
485                                                             pEnd);
486 
487     static const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
488                                                                pBegin,
489                                                            const sal_Unicode *
490                                                                pEnd);
491 
492     static inline bool needsQuotedStringEscape(sal_uInt32 nChar);
493 
494     static const sal_Char * skipQuotedString(const sal_Char * pBegin,
495                                              const sal_Char * pEnd);
496 
497     static const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
498                                                 const sal_Unicode * pEnd);
499 
500     static const sal_Char * scanAtom(const sal_Char * pBegin,
501                                      const sal_Char * pEnd);
502 
503     static const sal_Unicode * scanAtom(const sal_Unicode * pBegin,
504                                         const sal_Unicode * pEnd);
505 
506     static bool scanUnsigned(const sal_Char *& rBegin, const sal_Char * pEnd,
507                              bool bLeadingZeroes, sal_uInt32 & rValue);
508 
509     static bool scanUnsigned(const sal_Unicode *& rBegin,
510                              const sal_Unicode * pEnd, bool bLeadingZeroes,
511                              sal_uInt32 & rValue);
512 
513     static bool scanUnsignedHex(const sal_Char *& rBegin,
514                                 const sal_Char * pEnd, bool bLeadingZeroes,
515                                 sal_uInt32 & rValue);
516 
517     static bool scanUnsignedHex(const sal_Unicode *& rBegin,
518                                 const sal_Unicode * pEnd, bool bLeadingZeroes,
519                                 sal_uInt32 & rValue);
520 
521     static const sal_Char * scanQuotedBlock(const sal_Char * pBegin,
522                                             const sal_Char * pEnd,
523                                             sal_uInt32 nOpening,
524                                             sal_uInt32 nClosing,
525                                             sal_Size & rLength,
526                                             bool & rModify);
527 
528     static const sal_Unicode * scanQuotedBlock(const sal_Unicode * pBegin,
529                                                const sal_Unicode * pEnd,
530                                                sal_uInt32 nOpening,
531                                                sal_uInt32 nClosing,
532                                                sal_Size & rLength,
533                                                bool & rModify);
534 
535     static sal_Char const * scanParameters(sal_Char const * pBegin,
536                                            sal_Char const * pEnd,
537                                            INetContentTypeParameterList *
538                                                pParameters);
539 
540     static sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
541                                               sal_Unicode const * pEnd,
542                                               INetContentTypeParameterList *
543                                                   pParameters);
544 
545     static inline rtl_TextEncoding translateToMIME(rtl_TextEncoding
546                                                        eEncoding);
547 
548     static inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
549                                                          eEncoding);
550 
551     static const sal_Char * getCharsetName(rtl_TextEncoding eEncoding);
552 
553     static rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
554                                                const sal_Char * pEnd);
555 
556     static rtl_TextEncoding getCharsetEncoding(const sal_Unicode * pBegin,
557                                                const sal_Unicode * pEnd);
558 
559     static inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding);
560 
561     static INetMIMECharsetList_Impl *
562     createPreferredCharsetList(rtl_TextEncoding eEncoding);
563 
564     static sal_Unicode * convertToUnicode(const sal_Char * pBegin,
565                                           const sal_Char * pEnd,
566                                           rtl_TextEncoding eEncoding,
567                                           sal_Size & rSize);
568 
569     static sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
570                                          const sal_Unicode * pEnd,
571                                          rtl_TextEncoding eEncoding,
572                                          sal_Size & rSize);
573 
574     /** Get the number of octets required to encode an UCS-4 character using
575         UTF-8 encoding.
576 
577         @param nChar  Some UCS-4 character.
578 
579         @return  The number of octets required (in the range 1--6, inclusive).
580      */
581     static inline int getUTF8OctetCount(sal_uInt32 nChar);
582 
583     static inline void writeEscapeSequence(INetMIMEOutputSink & rSink,
584                                            sal_uInt32 nChar);
585 
586     static void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar);
587 
588     static void writeUnsigned(INetMIMEOutputSink & rSink, sal_uInt32 nValue,
589                               int nMinDigits = 1);
590 
591     static void writeDateTime(INetMIMEOutputSink & rSink,
592                               const DateTime & rUTC);
593 
594     static void writeHeaderFieldBody(INetMIMEOutputSink & rSink,
595                                      HeaderFieldType eType,
596                                      const ByteString & rBody,
597                                      rtl_TextEncoding ePreferredEncoding,
598                                      bool bInitialSpace = true);
599 
600     static void writeHeaderFieldBody(INetMIMEOutputSink & rSink,
601                                      HeaderFieldType eType,
602                                      const UniString & rBody,
603                                      rtl_TextEncoding ePreferredEncoding,
604                                      bool bInitialSpace = true);
605 
606     static bool translateUTF8Char(const sal_Char *& rBegin,
607                                   const sal_Char * pEnd,
608                                   rtl_TextEncoding eEncoding,
609                                   sal_uInt32 & rCharacter);
610 
611     static ByteString decodeUTF8(const ByteString & rText,
612                                  rtl_TextEncoding eEncoding);
613 
614     static UniString decodeHeaderFieldBody(HeaderFieldType eType,
615                                            const ByteString & rBody);
616 
617 // #i70651#: Prevent warnings on Mac OS X.
618 #ifdef MACOSX
619 #pragma GCC system_header
620 #endif
621 
622     /** Get the UTF-32 character at the head of a UTF-16 encoded string.
623 
624         @param rBegin  Points to the start of the UTF-16 encoded string, must
625         not be null.  On exit, it points past the first UTF-32 character's
626         encoding.
627 
628         @param pEnd  Points past the end of the UTF-16 encoded string, must be
629         strictly greater than rBegin.
630 
631         @return  The UCS-4 character at the head of the UTF-16 encoded string.
632         If the string does not start with the UTF-16 encoding of a UCS-32
633         character, the first UTF-16 value is returned.
634      */
635     static inline sal_uInt32 getUTF32Character(const sal_Unicode *& rBegin,
636                                                const sal_Unicode * pEnd);
637 
638     /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
639 
640         @param pBuffer  Points to a buffer, must not be null.
641 
642         @param nUTF32  An UTF-32 character, must be in the range 0..0x10FFFF.
643 
644         @return  A pointer past the UTF-16 characters put into the buffer
645         (i.e., pBuffer + 1 or pBuffer + 2).
646      */
647     static inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
648                                                   sal_uInt32 nUTF32);
649 };
650 
651 // static
isUSASCII(sal_uInt32 nChar)652 inline bool INetMIME::isUSASCII(sal_uInt32 nChar)
653 {
654     return nChar <= 0x7F;
655 }
656 
657 // static
isISO88591(sal_uInt32 nChar)658 inline bool INetMIME::isISO88591(sal_uInt32 nChar)
659 {
660     return nChar <= 0xFF;
661 }
662 
663 // static
isControl(sal_uInt32 nChar)664 inline bool INetMIME::isControl(sal_uInt32 nChar)
665 {
666     return nChar <= 0x1F || nChar == 0x7F;
667 }
668 
669 // static
isWhiteSpace(sal_uInt32 nChar)670 inline bool INetMIME::isWhiteSpace(sal_uInt32 nChar)
671 {
672     return nChar == '\t' || nChar == ' ';
673 }
674 
675 // static
isVisible(sal_uInt32 nChar)676 inline bool INetMIME::isVisible(sal_uInt32 nChar)
677 {
678     return nChar >= '!' && nChar <= '~';
679 }
680 
681 // static
isDigit(sal_uInt32 nChar)682 inline bool INetMIME::isDigit(sal_uInt32 nChar)
683 {
684     return nChar >= '0' && nChar <= '9';
685 }
686 
687 // static
isCanonicHexDigit(sal_uInt32 nChar)688 inline bool INetMIME::isCanonicHexDigit(sal_uInt32 nChar)
689 {
690     return isDigit(nChar) || (nChar >= 'A' && nChar <= 'F');
691 }
692 
693 // static
isHexDigit(sal_uInt32 nChar)694 inline bool INetMIME::isHexDigit(sal_uInt32 nChar)
695 {
696     return isCanonicHexDigit(nChar) || (nChar >= 'a' && nChar <= 'f');
697 }
698 
699 // static
isUpperCase(sal_uInt32 nChar)700 inline bool INetMIME::isUpperCase(sal_uInt32 nChar)
701 {
702     return nChar >= 'A' && nChar <= 'Z';
703 }
704 
705 // static
isLowerCase(sal_uInt32 nChar)706 inline bool INetMIME::isLowerCase(sal_uInt32 nChar)
707 {
708     return nChar >= 'a' && nChar <= 'z';
709 }
710 
711 // static
isAlpha(sal_uInt32 nChar)712 inline bool INetMIME::isAlpha(sal_uInt32 nChar)
713 {
714     return isUpperCase(nChar) || isLowerCase(nChar);
715 }
716 
717 // static
isAlphanumeric(sal_uInt32 nChar)718 inline bool INetMIME::isAlphanumeric(sal_uInt32 nChar)
719 {
720     return isAlpha(nChar) || isDigit(nChar);
721 }
722 
723 // static
isBase64Digit(sal_uInt32 nChar)724 inline bool INetMIME::isBase64Digit(sal_uInt32 nChar)
725 {
726     return isUpperCase(nChar) || isLowerCase(nChar) || isDigit(nChar)
727            || nChar == '+' || nChar == '/';
728 }
729 
730 // static
toUpperCase(sal_uInt32 nChar)731 inline sal_uInt32 INetMIME::toUpperCase(sal_uInt32 nChar)
732 {
733     return isLowerCase(nChar) ? nChar - ('a' - 'A') : nChar;
734 }
735 
736 // static
toLowerCase(sal_uInt32 nChar)737 inline sal_uInt32 INetMIME::toLowerCase(sal_uInt32 nChar)
738 {
739     return isUpperCase(nChar) ? nChar + ('a' - 'A') : nChar;
740 }
741 
742 // static
getWeight(sal_uInt32 nChar)743 inline int INetMIME::getWeight(sal_uInt32 nChar)
744 {
745     return isDigit(nChar) ? int(nChar - '0') : -1;
746 }
747 
748 // static
getHexWeight(sal_uInt32 nChar)749 inline int INetMIME::getHexWeight(sal_uInt32 nChar)
750 {
751     return isDigit(nChar) ? int(nChar - '0') :
752            nChar >= 'A' && nChar <= 'F' ? int(nChar - 'A' + 10) :
753            nChar >= 'a' && nChar <= 'f' ? int(nChar - 'a' + 10) : -1;
754 }
755 
756 // static
getBase64Weight(sal_uInt32 nChar)757 inline int INetMIME::getBase64Weight(sal_uInt32 nChar)
758 {
759     return isUpperCase(nChar) ? int(nChar - 'A') :
760            isLowerCase(nChar) ? int(nChar - 'a' + 26) :
761            isDigit(nChar) ? int(nChar - '0' + 52) :
762            nChar == '+' ? 62 :
763            nChar == '/' ? 63 :
764            nChar == '=' ? -1 : -2;
765 }
766 
767 // static
isHighSurrogate(sal_uInt32 nUTF16)768 inline bool INetMIME::isHighSurrogate(sal_uInt32 nUTF16)
769 {
770     return nUTF16 >= 0xD800 && nUTF16 <= 0xDBFF;
771 }
772 
773 // static
isLowSurrogate(sal_uInt32 nUTF16)774 inline bool INetMIME::isLowSurrogate(sal_uInt32 nUTF16)
775 {
776     return nUTF16 >= 0xDC00 && nUTF16 <= 0xDFFF;
777 }
778 
779 // static
toUTF32(sal_Unicode cHighSurrogate,sal_Unicode cLowSurrogate)780 inline sal_uInt32 INetMIME::toUTF32(sal_Unicode cHighSurrogate,
781                                     sal_Unicode cLowSurrogate)
782 {
783     DBG_ASSERT(isHighSurrogate(cHighSurrogate)
784                && isLowSurrogate(cLowSurrogate),
785                "INetMIME::toUTF32(): Bad chars");
786     return ((sal_uInt32(cHighSurrogate) & 0x3FF) << 10)
787                | (sal_uInt32(cLowSurrogate) & 0x3FF);
788 }
789 
790 // static
equalIgnoreCase(const ByteString & rString1,const sal_Char * pString2)791 inline bool INetMIME::equalIgnoreCase(const ByteString & rString1,
792                                       const sal_Char * pString2)
793 {
794     return equalIgnoreCase(rString1.GetBuffer(),
795                            rString1.GetBuffer() + rString1.Len(), pString2);
796 }
797 
798 // static
startsWithLineBreak(const sal_Char * pBegin,const sal_Char * pEnd)799 inline bool INetMIME::startsWithLineBreak(const sal_Char * pBegin,
800                                           const sal_Char * pEnd)
801 {
802     DBG_ASSERT(pBegin && pBegin <= pEnd,
803                "INetMIME::startsWithLineBreak(): Bad sequence");
804 
805     return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
806         // CR, LF
807 }
808 
809 // static
startsWithLineBreak(const sal_Unicode * pBegin,const sal_Unicode * pEnd)810 inline bool INetMIME::startsWithLineBreak(const sal_Unicode * pBegin,
811                                               const sal_Unicode * pEnd)
812 {
813     DBG_ASSERT(pBegin && pBegin <= pEnd,
814                "INetMIME::startsWithLineBreak(): Bad sequence");
815 
816     return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
817         // CR, LF
818 }
819 
820 // static
startsWithLineFolding(const sal_Char * pBegin,const sal_Char * pEnd)821 inline bool INetMIME::startsWithLineFolding(const sal_Char * pBegin,
822                                             const sal_Char * pEnd)
823 {
824     DBG_ASSERT(pBegin && pBegin <= pEnd,
825                "INetMIME::startsWithLineFolding(): Bad sequence");
826 
827     return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
828            && isWhiteSpace(pBegin[2]); // CR, LF
829 }
830 
831 // static
startsWithLineFolding(const sal_Unicode * pBegin,const sal_Unicode * pEnd)832 inline bool INetMIME::startsWithLineFolding(const sal_Unicode * pBegin,
833                                             const sal_Unicode * pEnd)
834 {
835     DBG_ASSERT(pBegin && pBegin <= pEnd,
836                "INetMIME::startsWithLineFolding(): Bad sequence");
837 
838     return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
839            && isWhiteSpace(pBegin[2]); // CR, LF
840 }
841 
842 // static
startsWithLinearWhiteSpace(const sal_Char * pBegin,const sal_Char * pEnd)843 inline bool INetMIME::startsWithLinearWhiteSpace(const sal_Char * pBegin,
844                                                  const sal_Char * pEnd)
845 {
846     DBG_ASSERT(pBegin && pBegin <= pEnd,
847                "INetMIME::startsWithLinearWhiteSpace(): Bad sequence");
848 
849     return pBegin != pEnd
850            && (isWhiteSpace(*pBegin) || startsWithLineFolding(pBegin, pEnd));
851 }
852 
853 // static
needsQuotedStringEscape(sal_uInt32 nChar)854 inline bool INetMIME::needsQuotedStringEscape(sal_uInt32 nChar)
855 {
856     return nChar == '"' || nChar == '\\';
857 }
858 
859 // static
translateToMIME(rtl_TextEncoding eEncoding)860 inline rtl_TextEncoding INetMIME::translateToMIME(rtl_TextEncoding eEncoding)
861 {
862 #if defined WNT
863     return eEncoding == RTL_TEXTENCODING_MS_1252 ?
864                RTL_TEXTENCODING_ISO_8859_1 : eEncoding;
865 #else // WNT
866     return eEncoding;
867 #endif // WNT
868 }
869 
870 // static
translateFromMIME(rtl_TextEncoding eEncoding)871 inline rtl_TextEncoding INetMIME::translateFromMIME(rtl_TextEncoding
872                                                         eEncoding)
873 {
874 #if defined WNT
875     return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
876                RTL_TEXTENCODING_MS_1252 : eEncoding;
877 #else // WNT
878     return eEncoding;
879 #endif // WNT
880 }
881 
882 // static
isMIMECharsetEncoding(rtl_TextEncoding eEncoding)883 inline bool INetMIME::isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
884 {
885     return ( rtl_isOctetTextEncoding(eEncoding) == sal_True );
886 }
887 
888 // static
getUTF8OctetCount(sal_uInt32 nChar)889 inline int INetMIME::getUTF8OctetCount(sal_uInt32 nChar)
890 {
891     DBG_ASSERT(nChar < 0x80000000, "INetMIME::getUTF8OctetCount(): Bad char");
892 
893     return nChar < 0x80 ? 1 :
894            nChar < 0x800 ? 2 :
895            nChar <= 0x10000 ? 3 :
896            nChar <= 0x200000 ? 4 :
897            nChar <= 0x4000000 ? 5 : 6;
898 }
899 
900 // static
getUTF32Character(const sal_Unicode * & rBegin,const sal_Unicode * pEnd)901 inline sal_uInt32 INetMIME::getUTF32Character(const sal_Unicode *& rBegin,
902                                               const sal_Unicode * pEnd)
903 {
904     DBG_ASSERT(rBegin && rBegin < pEnd,
905                "INetMIME::getUTF32Character(): Bad sequence");
906     if (rBegin + 1 < pEnd && rBegin[0] >= 0xD800 && rBegin[0] <= 0xDBFF
907         && rBegin[1] >= 0xDC00 && rBegin[1] <= 0xDFFF)
908     {
909         sal_uInt32 nUTF32 = sal_uInt32(*rBegin++ & 0x3FF) << 10;
910         return (nUTF32 | (*rBegin++ & 0x3FF)) + 0x10000;
911     }
912     else
913         return *rBegin++;
914 }
915 
916 // static
putUTF32Character(sal_Unicode * pBuffer,sal_uInt32 nUTF32)917 inline sal_Unicode * INetMIME::putUTF32Character(sal_Unicode * pBuffer,
918                                                  sal_uInt32 nUTF32)
919 {
920     DBG_ASSERT(nUTF32 <= 0x10FFFF, "INetMIME::putUTF32Character(): Bad char");
921     if (nUTF32 < 0x10000)
922         *pBuffer++ = sal_Unicode(nUTF32);
923     else
924     {
925         nUTF32 -= 0x10000;
926         *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
927         *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
928     }
929     return pBuffer;
930 }
931 
932 //============================================================================
933 class INetMIMEOutputSink
934 {
935 public:
936     static sal_uInt32 const NO_LINE_LENGTH_LIMIT = SAL_MAX_UINT32;
937 
938 private:
939     sal_uInt32 m_nColumn;
940     sal_uInt32 m_nLineLengthLimit;
941 
942 protected:
943     /** Write a sequence of octets.
944 
945         @param pBegin  Points to the start of the sequence, must not be null.
946 
947         @param pEnd  Points past the end of the sequence, must be >= pBegin.
948      */
949     virtual void writeSequence(const sal_Char * pBegin,
950                                const sal_Char * pEnd) = 0;
951 
952     /** Write a null terminated sequence of octets (without the terminating
953         null).
954 
955         @param pOctets  A null terminated sequence of octets, must not be
956         null.
957 
958         @return  The length of pOctets (without the terminating null).
959      */
960     virtual sal_Size writeSequence(const sal_Char * pSequence);
961 
962     /** Write a sequence of octets.
963 
964         @descr  The supplied sequence of UCS-4 characters is interpreted as a
965         sequence of octets.  It is an error if any of the elements of the
966         sequence has a numerical value greater than 255.
967 
968         @param pBegin  Points to the start of the sequence, must not be null.
969 
970         @param pEnd  Points past the end of the sequence, must be >= pBegin.
971      */
972     virtual void writeSequence(const sal_uInt32 * pBegin,
973                                const sal_uInt32 * pEnd);
974 
975     /** Write a sequence of octets.
976 
977         @descr  The supplied sequence of Unicode characters is interpreted as
978         a sequence of octets.  It is an error if any of the elements of the
979         sequence has a numerical value greater than 255.
980 
981         @param pBegin  Points to the start of the sequence, must not be null.
982 
983         @param pEnd  Points past the end of the sequence, must be >= pBegin.
984      */
985     virtual void writeSequence(const sal_Unicode * pBegin,
986                                const sal_Unicode * pEnd);
987 
988 public:
INetMIMEOutputSink(sal_uInt32 nTheColumn=0,sal_uInt32 nTheLineLengthLimit=INetMIME::SOFT_LINE_LENGTH_LIMIT)989     INetMIMEOutputSink(sal_uInt32 nTheColumn = 0,
990                        sal_uInt32 nTheLineLengthLimit
991                            = INetMIME::SOFT_LINE_LENGTH_LIMIT):
992         m_nColumn(nTheColumn), m_nLineLengthLimit(nTheLineLengthLimit) {}
993 
~INetMIMEOutputSink()994     virtual ~INetMIMEOutputSink() {}
995 
996     /** Get the current column.
997 
998         @return  The current column (starting from zero).
999      */
getColumn() const1000     sal_uInt32 getColumn() const { return m_nColumn; }
1001 
getLineLengthLimit() const1002     sal_uInt32 getLineLengthLimit() const { return m_nLineLengthLimit; }
1003 
setLineLengthLimit(sal_uInt32 nTheLineLengthLimit)1004     void setLineLengthLimit(sal_uInt32 nTheLineLengthLimit)
1005     { m_nLineLengthLimit = nTheLineLengthLimit; }
1006 
1007     virtual ErrCode getError() const;
1008 
1009     /** Write a sequence of octets.
1010 
1011         @param pBegin  Points to the start of the sequence, must not be null.
1012 
1013         @param pEnd  Points past the end of the sequence, must be >= pBegin.
1014      */
1015     inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
1016 
1017     /** Write a sequence of octets.
1018 
1019         @param pBegin  Points to the start of the sequence, must not be null.
1020 
1021         @param nLength  The length of the sequence.
1022      */
write(const sal_Char * pBegin,sal_Size nLength)1023     void write(const sal_Char * pBegin, sal_Size nLength)
1024     { write(pBegin, pBegin + nLength); }
1025 
1026     /** Write a sequence of octets.
1027 
1028         @descr  The supplied sequence of UCS-4 characters is interpreted as a
1029         sequence of octets.  It is an error if any of the elements of the
1030         sequence has a numerical value greater than 255.
1031 
1032         @param pBegin  Points to the start of the sequence, must not be null.
1033 
1034         @param pEnd  Points past the end of the sequence, must be >= pBegin.
1035      */
1036     inline void write(const sal_uInt32 * pBegin, const sal_uInt32 * pEnd);
1037 
1038     /** Write a sequence of octets.
1039 
1040         @descr  The supplied sequence of Unicode characters is interpreted as
1041         a sequence of octets.  It is an error if any of the elements of the
1042         sequence has a numerical value greater than 255.
1043 
1044         @param pBegin  Points to the start of the sequence, must not be null.
1045 
1046         @param pEnd  Points past the end of the sequence, must be >= pBegin.
1047      */
1048     inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
1049 
1050     /** Write a sequence of octets.
1051 
1052         @param rOctets  A ByteString, interpreted as a sequence of octets.
1053 
1054         @param nBegin  The offset of the first character to write.
1055 
1056         @param nEnd  The offset past the last character to write.
1057      */
1058     inline void write(const ByteString & rString, xub_StrLen nBegin,
1059                       xub_StrLen nEnd);
1060 
1061     /** Write a single octet.
1062 
1063         @param nOctet  Some octet.
1064 
1065         @return  This instance.
1066      */
1067     inline INetMIMEOutputSink & operator <<(sal_Char nOctet);
1068 
1069     /** Write a null terminated sequence of octets (without the terminating
1070         null).
1071 
1072         @param pOctets  A null terminated sequence of octets, must not be
1073         null.
1074 
1075         @return  This instance.
1076      */
1077     inline INetMIMEOutputSink & operator <<(const sal_Char * pOctets);
1078 
1079     /** Write a sequence of octets.
1080 
1081         @param rOctets  A ByteString, interpreted as a sequence of octets.
1082 
1083         @return  This instance.
1084      */
1085     inline INetMIMEOutputSink & operator <<(const ByteString & rOctets);
1086 
1087     /** Call a manipulator function.
1088 
1089         @param  pManipulator  A manipulator function.
1090 
1091         @return  Whatever the manipulator function returns.
1092      */
1093     INetMIMEOutputSink &
operator <<(INetMIMEOutputSink & (* pManipulator)(INetMIMEOutputSink &))1094     operator <<(INetMIMEOutputSink & (* pManipulator)(INetMIMEOutputSink &))
1095     { return pManipulator(*this); }
1096 
1097     /** Write a line end (CR LF).
1098      */
1099     void writeLineEnd();
1100 
1101     /** A manipulator function that writes a line end (CR LF).
1102 
1103         @param rSink  Some sink.
1104 
1105         @return  The sink rSink.
1106      */
1107     static inline INetMIMEOutputSink & endl(INetMIMEOutputSink & rSink);
1108 };
1109 
write(const sal_Char * pBegin,const sal_Char * pEnd)1110 inline void INetMIMEOutputSink::write(const sal_Char * pBegin,
1111                                       const sal_Char * pEnd)
1112 {
1113     writeSequence(pBegin, pEnd);
1114     m_nColumn += pEnd - pBegin;
1115 }
1116 
write(const sal_uInt32 * pBegin,const sal_uInt32 * pEnd)1117 inline void INetMIMEOutputSink::write(const sal_uInt32 * pBegin,
1118                                       const sal_uInt32 * pEnd)
1119 {
1120     writeSequence(pBegin, pEnd);
1121     m_nColumn += pEnd - pBegin;
1122 }
1123 
write(const sal_Unicode * pBegin,const sal_Unicode * pEnd)1124 inline void INetMIMEOutputSink::write(const sal_Unicode * pBegin,
1125                                       const sal_Unicode * pEnd)
1126 {
1127     writeSequence(pBegin, pEnd);
1128     m_nColumn += pEnd - pBegin;
1129 }
1130 
write(const ByteString & rOctets,xub_StrLen nBegin,xub_StrLen nEnd)1131 inline void INetMIMEOutputSink::write(const ByteString & rOctets,
1132                                       xub_StrLen nBegin, xub_StrLen nEnd)
1133 {
1134     writeSequence(rOctets.GetBuffer() + nBegin, rOctets.GetBuffer() + nEnd);
1135     m_nColumn += nEnd - nBegin;
1136 }
1137 
operator <<(sal_Char nOctet)1138 inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(sal_Char nOctet)
1139 {
1140     writeSequence(&nOctet, &nOctet + 1);
1141     ++m_nColumn;
1142     return *this;
1143 }
1144 
operator <<(const sal_Char * pOctets)1145 inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(const sal_Char *
1146                                                                 pOctets)
1147 {
1148     m_nColumn += writeSequence(pOctets);
1149     return *this;
1150 }
1151 
operator <<(const ByteString & rOctets)1152 inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(const ByteString &
1153                                                                 rOctets)
1154 {
1155     writeSequence(rOctets.GetBuffer(), rOctets.GetBuffer() + rOctets.Len());
1156     m_nColumn += rOctets.Len();
1157     return *this;
1158 }
1159 
1160 // static
endl(INetMIMEOutputSink & rSink)1161 inline INetMIMEOutputSink & INetMIMEOutputSink::endl(INetMIMEOutputSink &
1162                                                          rSink)
1163 {
1164     rSink.writeLineEnd();
1165     return rSink;
1166 }
1167 
1168 // static
writeEscapeSequence(INetMIMEOutputSink & rSink,sal_uInt32 nChar)1169 inline void INetMIME::writeEscapeSequence(INetMIMEOutputSink & rSink,
1170                                           sal_uInt32 nChar)
1171 {
1172     DBG_ASSERT(nChar <= 0xFF, "INetMIME::writeEscapeSequence(): Bad char");
1173     rSink << '=' << sal_uInt8(getHexDigit(nChar >> 4))
1174           << sal_uInt8(getHexDigit(nChar & 15));
1175 }
1176 
1177 //============================================================================
1178 class INetMIMEStringOutputSink: public INetMIMEOutputSink
1179 {
1180     ByteString m_aBuffer;
1181     bool m_bOverflow;
1182 
1183     using INetMIMEOutputSink::writeSequence;
1184 
1185     virtual void writeSequence(const sal_Char * pBegin,
1186                                const sal_Char * pEnd);
1187 
1188 public:
INetMIMEStringOutputSink(sal_uInt32 nColumn=0,sal_uInt32 nLineLengthLimit=INetMIME::SOFT_LINE_LENGTH_LIMIT)1189     inline INetMIMEStringOutputSink(sal_uInt32 nColumn = 0,
1190                                     sal_uInt32 nLineLengthLimit
1191                                         = INetMIME::SOFT_LINE_LENGTH_LIMIT):
1192         INetMIMEOutputSink(nColumn, nLineLengthLimit), m_bOverflow(false) {}
1193 
1194     virtual ErrCode getError() const;
1195 
1196     inline ByteString takeBuffer();
1197 };
1198 
takeBuffer()1199 inline ByteString INetMIMEStringOutputSink::takeBuffer()
1200 {
1201     ByteString aTheBuffer = m_aBuffer;
1202     m_aBuffer.Erase();
1203     m_bOverflow = false;
1204     return aTheBuffer;
1205 }
1206 
1207 //============================================================================
1208 class INetMIMEUnicodeOutputSink: public INetMIMEOutputSink
1209 {
1210     UniString m_aBuffer;
1211     bool m_bOverflow;
1212 
1213     using INetMIMEOutputSink::writeSequence;
1214 
1215     virtual void writeSequence(const sal_Char * pBegin,
1216                                const sal_Char * pEnd);
1217 
1218     virtual void writeSequence(const sal_uInt32 * pBegin,
1219                                const sal_uInt32 * pEnd);
1220 
1221     virtual void writeSequence(const sal_Unicode * pBegin,
1222                                const sal_Unicode * pEnd);
1223 
1224 public:
INetMIMEUnicodeOutputSink(sal_uInt32 nColumn=0,sal_uInt32 nLineLengthLimit=INetMIME::SOFT_LINE_LENGTH_LIMIT)1225     inline INetMIMEUnicodeOutputSink(sal_uInt32 nColumn = 0,
1226                                      sal_uInt32 nLineLengthLimit
1227                                          = INetMIME::SOFT_LINE_LENGTH_LIMIT):
1228         INetMIMEOutputSink(nColumn, nLineLengthLimit), m_bOverflow(false) {}
1229 
1230     virtual ErrCode getError() const;
1231 
1232     inline UniString takeBuffer();
1233 };
1234 
takeBuffer()1235 inline UniString INetMIMEUnicodeOutputSink::takeBuffer()
1236 {
1237     UniString aTheBuffer = m_aBuffer;
1238     m_aBuffer.Erase();
1239     m_bOverflow = false;
1240     return aTheBuffer;
1241 }
1242 
1243 //============================================================================
1244 class INetMIMEEncodedWordOutputSink
1245 {
1246 public:
1247     enum Context { CONTEXT_TEXT = 1,
1248                    CONTEXT_COMMENT = 2,
1249                    CONTEXT_PHRASE = 4 };
1250 
1251     enum Space { SPACE_NO, SPACE_ENCODED, SPACE_ALWAYS };
1252 
1253 private:
1254     enum { BUFFER_SIZE = 256 };
1255 
1256     enum Coding { CODING_NONE, CODING_QUOTED, CODING_ENCODED,
1257                   CODING_ENCODED_TERMINATED };
1258 
1259     enum EncodedWordState { STATE_INITIAL, STATE_FIRST_EQUALS,
1260                             STATE_FIRST_QUESTION, STATE_CHARSET,
1261                             STATE_SECOND_QUESTION, STATE_ENCODING,
1262                             STATE_THIRD_QUESTION, STATE_ENCODED_TEXT,
1263                             STATE_FOURTH_QUESTION, STATE_SECOND_EQUALS,
1264                             STATE_BAD };
1265 
1266     INetMIMEOutputSink & m_rSink;
1267     Context m_eContext;
1268     Space m_eInitialSpace;
1269     sal_uInt32 m_nExtraSpaces;
1270     INetMIMECharsetList_Impl * m_pEncodingList;
1271     sal_Unicode * m_pBuffer;
1272     sal_uInt32 m_nBufferSize;
1273     sal_Unicode * m_pBufferEnd;
1274     Coding m_ePrevCoding;
1275     rtl_TextEncoding m_ePrevMIMEEncoding;
1276     Coding m_eCoding;
1277     sal_uInt32 m_nQuotedEscaped;
1278     EncodedWordState m_eEncodedWordState;
1279 
1280     inline bool needsEncodedWordEscape(sal_uInt32 nChar) const;
1281 
1282     void finish(bool bWriteTrailer);
1283 
1284 public:
1285     inline INetMIMEEncodedWordOutputSink(INetMIMEOutputSink & rTheSink,
1286                                          Context eTheContext,
1287                                          Space eTheInitialSpace,
1288                                          rtl_TextEncoding ePreferredEncoding);
1289 
1290     ~INetMIMEEncodedWordOutputSink();
1291 
1292     INetMIMEEncodedWordOutputSink & operator <<(sal_uInt32 nChar);
1293 
1294     inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
1295 
1296     inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
1297 
1298     inline bool flush();
1299 };
1300 
INetMIMEEncodedWordOutputSink(INetMIMEOutputSink & rTheSink,Context eTheContext,Space eTheInitialSpace,rtl_TextEncoding ePreferredEncoding)1301 inline INetMIMEEncodedWordOutputSink::INetMIMEEncodedWordOutputSink(
1302            INetMIMEOutputSink & rTheSink, Context eTheContext,
1303            Space eTheInitialSpace, rtl_TextEncoding ePreferredEncoding):
1304     m_rSink(rTheSink),
1305     m_eContext(eTheContext),
1306     m_eInitialSpace(eTheInitialSpace),
1307     m_nExtraSpaces(0),
1308     m_pEncodingList(INetMIME::createPreferredCharsetList(ePreferredEncoding)),
1309     m_ePrevCoding(CODING_NONE),
1310     m_eCoding(CODING_NONE),
1311     m_nQuotedEscaped(0),
1312     m_eEncodedWordState(STATE_INITIAL)
1313 {
1314     m_nBufferSize = BUFFER_SIZE;
1315     m_pBuffer = static_cast< sal_Unicode * >(rtl_allocateMemory(
1316                                                  m_nBufferSize
1317                                                      * sizeof (sal_Unicode)));
1318     m_pBufferEnd = m_pBuffer;
1319 }
1320 
write(const sal_Char * pBegin,const sal_Char * pEnd)1321 inline void INetMIMEEncodedWordOutputSink::write(const sal_Char * pBegin,
1322                                                  const sal_Char * pEnd)
1323 {
1324     DBG_ASSERT(pBegin && pBegin <= pEnd,
1325                "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1326 
1327     while (pBegin != pEnd)
1328         operator <<(*pBegin++);
1329 }
1330 
write(const sal_Unicode * pBegin,const sal_Unicode * pEnd)1331 inline void INetMIMEEncodedWordOutputSink::write(const sal_Unicode * pBegin,
1332                                                  const sal_Unicode * pEnd)
1333 {
1334     DBG_ASSERT(pBegin && pBegin <= pEnd,
1335                "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1336 
1337     while (pBegin != pEnd)
1338         operator <<(*pBegin++);
1339 }
1340 
flush()1341 inline bool INetMIMEEncodedWordOutputSink::flush()
1342 {
1343     finish(true);
1344     return m_ePrevCoding != CODING_NONE;
1345 }
1346 
1347 //============================================================================
1348 struct INetContentTypeParameter
1349 {
1350     /** The name of the attribute, in US-ASCII encoding and converted to lower
1351         case.  If a parameter value is split as described in RFC 2231, there
1352         will only be one item for the complete parameter, with the attribute
1353         name lacking any section suffix.
1354      */
1355     const ByteString m_sAttribute;
1356 
1357     /** The optional character set specification (see RFC 2231), in US-ASCII
1358         encoding and converted to lower case.
1359      */
1360     const ByteString m_sCharset;
1361 
1362     /** The optional language specification (see RFC 2231), in US-ASCII
1363         encoding and converted to lower case.
1364      */
1365     const ByteString m_sLanguage;
1366 
1367     /** The attribute value.  If the value is a quoted-string, it is
1368         'unpacked.'  If a character set is specified, and the value can be
1369         converted to Unicode, this is done.  Also, if no character set is
1370         specified, it is first tried to convert the value from UTF-8 encoding
1371         to Unicode, and if that doesn't work (because the value is not in
1372         UTF-8 encoding), it is converted from ISO-8859-1 encoding to Unicode
1373         (which will always work).  But if a character set is specified and the
1374         value cannot be converted from that character set to Unicode, special
1375         action is taken to produce a value that can possibly be transformed
1376         back into its original form:  Any 8-bit character from a non-encoded
1377         part of the original value is directly converted to Unicode
1378         (effectively handling it as if it was ISO-8859-1 encoded), and any
1379         8-bit character from an encoded part of the original value is mapped
1380         to the range U+F800..U+F8FF at the top of the Corporate Use Subarea
1381         within Unicode's Private Use Area (effectively adding 0xF800 to the
1382         character's numeric value).
1383      */
1384     const UniString m_sValue;
1385 
1386     /** This is true if the value is successfuly converted to Unicode, and
1387         false if the value is a special mixture of ISO-LATIN-1 characters and
1388         characters from Unicode's Private Use Area.
1389      */
1390     const bool m_bConverted;
1391 
1392     inline INetContentTypeParameter(const ByteString & rTheAttribute,
1393                                     const ByteString & rTheCharset,
1394                                     const ByteString & rTheLanguage,
1395                                     const UniString & rTheValue,
1396                                     bool bTheConverted);
1397 };
1398 
INetContentTypeParameter(const ByteString & rTheAttribute,const ByteString & rTheCharset,const ByteString & rTheLanguage,const UniString & rTheValue,bool bTheConverted)1399 inline INetContentTypeParameter::INetContentTypeParameter(const ByteString &
1400                                                               rTheAttribute,
1401                                                           const ByteString &
1402                                                               rTheCharset,
1403                                                           const ByteString &
1404                                                               rTheLanguage,
1405                                                           const UniString &
1406                                                               rTheValue,
1407                                                           bool bTheConverted):
1408     m_sAttribute(rTheAttribute),
1409     m_sCharset(rTheCharset),
1410     m_sLanguage(rTheLanguage),
1411     m_sValue(rTheValue),
1412     m_bConverted(bTheConverted)
1413 {}
1414 
1415 //============================================================================
1416 class TOOLS_DLLPUBLIC INetContentTypeParameterList: private List
1417 {
1418 public:
~INetContentTypeParameterList()1419     ~INetContentTypeParameterList() { Clear(); }
1420 
1421     using List::Count;
1422 
1423     void Clear();
1424 
Insert(INetContentTypeParameter * pParameter,sal_uIntPtr nIndex)1425     void Insert(INetContentTypeParameter * pParameter, sal_uIntPtr nIndex)
1426     { List::Insert(pParameter, nIndex); }
1427 
1428     inline const INetContentTypeParameter * GetObject(sal_uIntPtr nIndex) const;
1429 
1430     const INetContentTypeParameter * find(const ByteString & rAttribute)
1431         const;
1432 };
1433 
1434 inline const INetContentTypeParameter *
GetObject(sal_uIntPtr nIndex) const1435 INetContentTypeParameterList::GetObject(sal_uIntPtr nIndex) const
1436 {
1437     return static_cast< INetContentTypeParameter * >(List::GetObject(nIndex));
1438 }
1439 
1440 #endif // TOOLS_INETMIME_HXX
1441 
1442