xref: /AOO41X/main/sal/rtl/source/uri.cxx (revision 1ecadb572e7010ff3b3382ad9bf179dbc6efadbb)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_sal.hxx"
30 
31 #include "rtl/uri.h"
32 
33 #include "surrogates.h"
34 
35 #include "osl/diagnose.h"
36 #include "rtl/strbuf.hxx"
37 #include "rtl/textenc.h"
38 #include "rtl/textcvt.h"
39 #include "rtl/uri.h"
40 #include "rtl/ustrbuf.h"
41 #include "rtl/ustrbuf.hxx"
42 #include "rtl/ustring.h"
43 #include "rtl/ustring.hxx"
44 #include "sal/types.h"
45 
46 #include <cstddef>
47 
48 namespace {
49 
50 std::size_t const nCharClassSize = 128;
51 
52 sal_Unicode const cEscapePrefix = 0x25; // '%'
53 
54 inline bool isDigit(sal_uInt32 nUtf32)
55 {
56     return nUtf32 >= 0x30 && nUtf32 <= 0x39; // '0'--'9'
57 }
58 
59 inline bool isAlpha(sal_uInt32 nUtf32)
60 {
61     // 'A'--'Z', 'a'--'z'
62     return (
63             (nUtf32 >= 0x41 && nUtf32 <= 0x5A) ||
64             (nUtf32 >= 0x61 && nUtf32 <= 0x7A)
65            );
66 }
67 
68 inline bool isHighSurrogate(sal_uInt32 nUtf16)
69 {
70     return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
71 }
72 
73 inline bool isLowSurrogate(sal_uInt32 nUtf16)
74 {
75     return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
76 }
77 
78 inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
79 {
80     return SAL_RTL_COMBINE_SURROGATES(high, low);
81 }
82 
83 inline int getHexWeight(sal_uInt32 nUtf32)
84 {
85     return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
86                static_cast< int >(nUtf32 - 0x30) :
87            nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
88                static_cast< int >(nUtf32 - 0x41 + 10) :
89            nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
90                static_cast< int >(nUtf32 - 0x61 + 10) :
91                -1; // not a hex digit
92 }
93 
94 inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
95 {
96     return nUtf32 < nCharClassSize && pCharClass[nUtf32];
97 }
98 
99 inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
100                          sal_Unicode cChar)
101 {
102     rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
103 }
104 
105 enum EscapeType
106 {
107     EscapeNo,
108     EscapeChar,
109     EscapeOctet
110 };
111 
112 /* Read any of the following:
113 
114    - sequence of escape sequences representing character from eCharset,
115      translated to single UCS4 character; or
116 
117    - pair of UTF-16 surrogates, translated to single UCS4 character; or
118 
119    _ single UTF-16 character, extended to UCS4 character.
120  */
121 sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
122                     bool bEncoded, rtl_TextEncoding eCharset,
123                     EscapeType * pType)
124 {
125     sal_uInt32 nChar = *(*pBegin)++;
126     int nWeight1;
127     int nWeight2;
128     if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
129         && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
130         && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
131     {
132         *pBegin += 2;
133         nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
134         if (nChar <= 0x7F)
135             *pType = EscapeChar;
136         else if (eCharset == RTL_TEXTENCODING_UTF8)
137         {
138             if (nChar >= 0xC0 && nChar <= 0xF4)
139             {
140                 sal_uInt32 nEncoded;
141                 int nShift;
142                 sal_uInt32 nMin;
143                 if (nChar <= 0xDF)
144                 {
145                     nEncoded = (nChar & 0x1F) << 6;
146                     nShift = 0;
147                     nMin = 0x80;
148                 }
149                 else if (nChar <= 0xEF)
150                 {
151                     nEncoded = (nChar & 0x0F) << 12;
152                     nShift = 6;
153                     nMin = 0x800;
154                 }
155                 else
156                 {
157                     nEncoded = (nChar & 0x07) << 18;
158                     nShift = 12;
159                     nMin = 0x10000;
160                 }
161                 sal_Unicode const * p = *pBegin;
162                 bool bUTF8 = true;
163                 for (; nShift >= 0; nShift -= 6)
164                 {
165                     if (pEnd - p < 3 || p[0] != cEscapePrefix
166                         || (nWeight1 = getHexWeight(p[1])) < 8
167                         || nWeight1 > 11
168                         || (nWeight2 = getHexWeight(p[2])) < 0)
169                     {
170                         bUTF8 = sal_False;
171                         break;
172                     }
173                     p += 3;
174                     nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
175                 }
176                 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
177                     && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
178                 {
179                     *pBegin = p;
180                     *pType = EscapeChar;
181                     return nEncoded;
182                 }
183             }
184             *pType = EscapeOctet;
185         }
186         else
187         {
188             rtl::OStringBuffer aBuf;
189             aBuf.append(static_cast< char >(nChar));
190             rtl_TextToUnicodeConverter aConverter
191                 = rtl_createTextToUnicodeConverter(eCharset);
192             sal_Unicode const * p = *pBegin;
193             for (;;)
194             {
195                 sal_Unicode aDst[2];
196                 sal_uInt32 nInfo;
197                 sal_Size nConverted;
198                 sal_Size nDstSize = rtl_convertTextToUnicode(
199                     aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
200                     sizeof aDst / sizeof aDst[0],
201                     (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
202                      | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
203                      | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
204                     &nInfo, &nConverted);
205                 if (nInfo == 0)
206                 {
207                     OSL_ASSERT(
208                         nConverted
209                         == sal::static_int_cast< sal_uInt32 >(
210                             aBuf.getLength()));
211                     rtl_destroyTextToUnicodeConverter(aConverter);
212                     *pBegin = p;
213                     *pType = EscapeChar;
214                     OSL_ASSERT(
215                         nDstSize == 1
216                         || (nDstSize == 2 && isHighSurrogate(aDst[0])
217                             && isLowSurrogate(aDst[1])));
218                     return nDstSize == 1
219                         ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
220                 }
221                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
222                          && pEnd - p >= 3 && p[0] == cEscapePrefix
223                          && (nWeight1 = getHexWeight(p[1])) >= 0
224                          && (nWeight2 = getHexWeight(p[2])) >= 0)
225                 {
226                     p += 3;
227                     aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
228                 }
229                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
230                          && p != pEnd && *p <= 0x7F)
231                 {
232                     aBuf.append(static_cast< char >(*p++));
233                 }
234                 else
235                 {
236                     OSL_ASSERT(
237                         (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
238                         == 0);
239                     break;
240                 }
241             }
242             rtl_destroyTextToUnicodeConverter(aConverter);
243             *pType = EscapeOctet;
244         }
245         return nChar;
246     }
247     else
248     {
249         *pType = EscapeNo;
250         return isHighSurrogate(nChar) && *pBegin < pEnd
251                && isLowSurrogate(**pBegin) ?
252                    combineSurrogates(nChar, *(*pBegin)++) : nChar;
253     }
254 }
255 
256 void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
257 {
258     OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
259     if (nUtf32 <= 0xFFFF) {
260         writeUnicode(
261             pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
262     } else {
263         nUtf32 -= 0x10000;
264         writeUnicode(
265             pBuffer, pCapacity,
266             static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
267         writeUnicode(
268             pBuffer, pCapacity,
269             static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
270     }
271 }
272 
273 void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
274                       sal_uInt32 nOctet)
275 {
276     OSL_ENSURE(nOctet <= 0xFF, "bad octet");
277 
278     static sal_Unicode const aHex[16]
279         = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
280             0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
281 
282     writeUnicode(pBuffer, pCapacity, cEscapePrefix);
283     writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
284     writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
285 }
286 
287 bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
288                      sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
289 {
290     OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
291     if (eCharset == RTL_TEXTENCODING_UTF8) {
292         if (nUtf32 < 0x80)
293             writeEscapeOctet(pBuffer, pCapacity, nUtf32);
294         else if (nUtf32 < 0x800)
295         {
296             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
297             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
298         }
299         else if (nUtf32 < 0x10000)
300         {
301             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
302             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
303             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
304         }
305         else
306         {
307             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
308             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
309             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
310             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
311         }
312     } else {
313         rtl_UnicodeToTextConverter aConverter
314             = rtl_createUnicodeToTextConverter(eCharset);
315         sal_Unicode aSrc[2];
316         sal_Size nSrcSize;
317         if (nUtf32 <= 0xFFFF)
318         {
319             aSrc[0] = static_cast< sal_Unicode >(nUtf32);
320             nSrcSize = 1;
321         }
322         else
323         {
324             aSrc[0] = static_cast< sal_Unicode >(
325                 ((nUtf32 - 0x10000) >> 10) | 0xD800);
326             aSrc[1] = static_cast< sal_Unicode >(
327                 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
328             nSrcSize = 2;
329         }
330         sal_Char aDst[32]; // FIXME  random value
331         sal_uInt32 nInfo;
332         sal_Size nConverted;
333         sal_Size nDstSize = rtl_convertUnicodeToText(
334             aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
335             RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
336             | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
337             | RTL_UNICODETOTEXT_FLAGS_FLUSH,
338             &nInfo, &nConverted);
339         OSL_ASSERT((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
340         rtl_destroyUnicodeToTextConverter(aConverter);
341         if (nInfo == 0) {
342             OSL_ENSURE(nConverted == nSrcSize, "bad rtl_convertUnicodeToText");
343             for (sal_Size i = 0; i < nDstSize; ++i)
344                 writeEscapeOctet(pBuffer, pCapacity,
345                                  static_cast< unsigned char >(aDst[i]));
346                     // FIXME  all octets are escaped, even if there is no need
347         } else {
348             if (bStrict) {
349                 return false;
350             } else {
351                 writeUcs4(pBuffer, pCapacity, nUtf32);
352             }
353         }
354     }
355     return true;
356 }
357 
358 struct Component
359 {
360     sal_Unicode const * pBegin;
361     sal_Unicode const * pEnd;
362 
363     inline Component(): pBegin(0) {}
364 
365     inline bool isPresent() const { return pBegin != 0; }
366 
367     inline sal_Int32 getLength() const;
368 };
369 
370 inline sal_Int32 Component::getLength() const
371 {
372     OSL_ENSURE(isPresent(), "taking length of non-present component");
373     return static_cast< sal_Int32 >(pEnd - pBegin);
374 }
375 
376 struct Components
377 {
378     Component aScheme;
379     Component aAuthority;
380     Component aPath;
381     Component aQuery;
382     Component aFragment;
383 };
384 
385 void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
386 {
387     // This algorithm is liberal and accepts various forms of illegal input.
388 
389     sal_Unicode const * pBegin = pUriRef->buffer;
390     sal_Unicode const * pEnd = pBegin + pUriRef->length;
391     sal_Unicode const * pPos = pBegin;
392 
393     if (pPos != pEnd && isAlpha(*pPos))
394         for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
395             if (*p == ':')
396             {
397                 pComponents->aScheme.pBegin = pBegin;
398                 pComponents->aScheme.pEnd = ++p;
399                 pPos = p;
400                 break;
401             }
402             else if (!isAlpha(*p) && !isDigit(*p) && *p != '+' && *p != '-'
403                      && *p != '.')
404                 break;
405 
406     if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
407     {
408         pComponents->aAuthority.pBegin = pPos;
409         pPos += 2;
410         while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
411             ++pPos;
412         pComponents->aAuthority.pEnd = pPos;
413     }
414 
415     pComponents->aPath.pBegin = pPos;
416     while (pPos != pEnd && *pPos != '?' && * pPos != '#')
417         ++pPos;
418     pComponents->aPath.pEnd = pPos;
419 
420     if (pPos != pEnd && *pPos == '?')
421     {
422         pComponents->aQuery.pBegin = pPos++;
423         while (pPos != pEnd && * pPos != '#')
424             ++pPos;
425         pComponents->aQuery.pEnd = pPos;
426     }
427 
428     if (pPos != pEnd)
429     {
430         OSL_ASSERT(*pPos == '#');
431         pComponents->aFragment.pBegin = pPos;
432         pComponents->aFragment.pEnd = pEnd;
433     }
434 }
435 
436 rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath)
437 {
438     OSL_ASSERT(rBasePath.isPresent() && *rBasePath.pBegin == '/');
439     OSL_ASSERT(rRelPath.isPresent());
440 
441     // The invariant of aBuffer is that it always starts and ends with a slash
442     // (until probably right at the end of the algorithm, when the last segment
443     // of rRelPath is added, which does not necessarily end in a slash):
444     rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength());
445         // XXX  numeric overflow
446 
447     // Segments "." and ".." within rBasePath are not conisdered special (but
448     // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
449     // bit unclear about this point:
450     sal_Int32 nFixed = 1;
451     sal_Unicode const * p = rBasePath.pBegin + 1;
452     for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q)
453         if (*q == '/')
454         {
455             if (
456                 (q - p == 1 && p[0] == '.') ||
457                 (q - p == 2 && p[0] == '.' && p[1] == '.')
458                )
459             {
460                 nFixed = q + 1 - rBasePath.pBegin;
461             }
462             p = q + 1;
463         }
464     aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin);
465 
466     p = rRelPath.pBegin;
467     if (p != rRelPath.pEnd)
468         for (;;)
469         {
470             sal_Unicode const * q = p;
471             sal_Unicode const * r;
472             for (;;)
473             {
474                 if (q == rRelPath.pEnd)
475                 {
476                     r = q;
477                     break;
478                 }
479                 if (*q == '/')
480                 {
481                     r = q + 1;
482                     break;
483                 }
484                 ++q;
485             }
486             if (q - p == 2 && p[0] == '.' && p[1] == '.')
487             {
488                 // Erroneous excess segments ".." within rRelPath are left
489                 // intact, as the examples in RFC 2396, section C.2, suggest:
490                 sal_Int32 i = aBuffer.getLength() - 1;
491                 if (i < nFixed)
492                 {
493                     aBuffer.append(p, r - p);
494                     nFixed += 3;
495                 }
496                 else
497                 {
498                     while (aBuffer.charAt(i - 1) != '/')
499                         --i;
500                     aBuffer.setLength(i);
501                 }
502             }
503             else if (q - p != 1 || *p != '.')
504                 aBuffer.append(p, r - p);
505             if (q == rRelPath.pEnd)
506                 break;
507             p = q + 1;
508         }
509 
510     return aBuffer.makeStringAndClear();
511 }
512 
513 }
514 
515 sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
516     SAL_THROW_EXTERN_C()
517 {
518     static sal_Bool const aCharClass[][nCharClassSize]
519     = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
520          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
522          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
523          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
524          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
525          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
526          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  /*pqrstuvwxyz{|}~ */
527        },
528        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
529          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
531          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
532          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
533          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
534          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
535          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
536        },
537        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
538          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
539          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
540          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
541          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
542          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
543          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
544          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
545        },
546        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
547          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
548          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
549          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
550          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
551          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
552          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
553          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
554        },
555        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
556          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
557          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
558          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
559          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
560          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
561          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
562          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
563        },
564        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
565          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
567          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
568          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
569          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
570          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
571          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
572        },
573        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
574          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
576          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
577          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
578          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
579          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
580          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
581        },
582        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
583          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
585          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
586          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
587          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
588          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
589          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
590        }};
591     OSL_ENSURE(
592         (eCharClass >= 0
593          && (sal::static_int_cast< std::size_t >(eCharClass)
594              < sizeof aCharClass / sizeof aCharClass[0])),
595         "bad eCharClass");
596     return aCharClass[eCharClass];
597 }
598 
599 void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
600                             rtl_UriEncodeMechanism eMechanism,
601                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
602     SAL_THROW_EXTERN_C()
603 {
604     OSL_ENSURE(!pCharClass[0x25], "bad pCharClass");
605         // make sure the percent sign is encoded...
606 
607     sal_Unicode const * p = pText->buffer;
608     sal_Unicode const * pEnd = p + pText->length;
609     sal_Int32 nCapacity = 0;
610     rtl_uString_new(pResult);
611     while (p < pEnd)
612     {
613         EscapeType eType;
614         sal_uInt32 nUtf32 = readUcs4(
615             &p, pEnd,
616             (eMechanism == rtl_UriEncodeKeepEscapes
617              || eMechanism == rtl_UriEncodeCheckEscapes
618              || eMechanism == rtl_UriEncodeStrictKeepEscapes),
619             eCharset, &eType);
620         switch (eType)
621         {
622         case EscapeNo:
623             if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
624                 writeUnicode(pResult, &nCapacity,
625                              static_cast< sal_Unicode >(nUtf32));
626             else if (!writeEscapeChar(
627                          pResult, &nCapacity, nUtf32, eCharset,
628                          (eMechanism == rtl_UriEncodeStrict
629                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
630             {
631                 rtl_uString_new(pResult);
632                 return;
633             }
634             break;
635 
636         case EscapeChar:
637             if (eMechanism == rtl_UriEncodeCheckEscapes
638                 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
639                 writeUnicode(pResult, &nCapacity,
640                              static_cast< sal_Unicode >(nUtf32));
641             else if (!writeEscapeChar(
642                          pResult, &nCapacity, nUtf32, eCharset,
643                          (eMechanism == rtl_UriEncodeStrict
644                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
645             {
646                 rtl_uString_new(pResult);
647                 return;
648             }
649             break;
650 
651         case EscapeOctet:
652             writeEscapeOctet(pResult, &nCapacity, nUtf32);
653             break;
654         }
655     }
656 }
657 
658 void SAL_CALL rtl_uriDecode(rtl_uString * pText,
659                             rtl_UriDecodeMechanism eMechanism,
660                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
661     SAL_THROW_EXTERN_C()
662 {
663     switch (eMechanism)
664     {
665     case rtl_UriDecodeNone:
666         rtl_uString_assign(pResult, pText);
667         break;
668 
669     case rtl_UriDecodeToIuri:
670         eCharset = RTL_TEXTENCODING_UTF8;
671     default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
672         {
673             sal_Unicode const * p = pText->buffer;
674             sal_Unicode const * pEnd = p + pText->length;
675             sal_Int32 nCapacity = 0;
676             rtl_uString_new(pResult);
677             while (p < pEnd)
678             {
679                 EscapeType eType;
680                 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
681                 switch (eType)
682                 {
683                 case EscapeChar:
684                     if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
685                     {
686                         writeEscapeOctet(pResult, &nCapacity, nUtf32);
687                         break;
688                     }
689                 case EscapeNo:
690                     writeUcs4(pResult, &nCapacity, nUtf32);
691                     break;
692 
693                 case EscapeOctet:
694                     if (eMechanism == rtl_UriDecodeStrict) {
695                         rtl_uString_new(pResult);
696                         return;
697                     }
698                     writeEscapeOctet(pResult, &nCapacity, nUtf32);
699                     break;
700                 }
701             }
702         }
703         break;
704     }
705 }
706 
707 sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
708                                          rtl_uString * pRelUriRef,
709                                          rtl_uString ** pResult,
710                                          rtl_uString ** pException)
711     SAL_THROW_EXTERN_C()
712 {
713     // If pRelUriRef starts with a scheme component it is an absolute URI
714     // reference, and we are done (i.e., this algorithm does not support
715     // backwards-compatible relative URIs starting with a scheme component, see
716     // RFC 2396, section 5.2, step 3):
717     Components aRelComponents;
718     parseUriRef(pRelUriRef, &aRelComponents);
719     if (aRelComponents.aScheme.isPresent())
720     {
721         rtl_uString_assign(pResult, pRelUriRef);
722         return true;
723     }
724 
725     // Parse pBaseUriRef; if the scheme component is not present or not valid,
726     // or the path component is not empty and starts with anything but a slash,
727     // an exception is raised:
728     Components aBaseComponents;
729     parseUriRef(pBaseUriRef, &aBaseComponents);
730     if (!aBaseComponents.aScheme.isPresent())
731     {
732         rtl::OUString aMessage(pBaseUriRef);
733         aMessage += rtl::OUString(
734                         RTL_CONSTASCII_USTRINGPARAM(
735                             " does not start with a scheme component"));
736         rtl_uString_assign(pException,
737                            const_cast< rtl::OUString & >(aMessage).pData);
738         return false;
739     }
740     if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd
741         && *aBaseComponents.aPath.pBegin != '/')
742     {
743         rtl::OUString aMessage(pBaseUriRef);
744         aMessage += rtl::OUString(
745                         RTL_CONSTASCII_USTRINGPARAM(
746                             "path component does not start with slash"));
747         rtl_uString_assign(pException, aMessage.pData);
748         return false;
749     }
750 
751     // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
752     // into an absolute one (if the relative URI is a reference to the "current
753     // document," the "current document" is here taken to be the base URI):
754     rtl::OUStringBuffer aBuffer;
755     aBuffer.append(aBaseComponents.aScheme.pBegin,
756                    aBaseComponents.aScheme.getLength());
757     if (aRelComponents.aAuthority.isPresent())
758     {
759         aBuffer.append(aRelComponents.aAuthority.pBegin,
760                        aRelComponents.aAuthority.getLength());
761         aBuffer.append(aRelComponents.aPath.pBegin,
762                        aRelComponents.aPath.getLength());
763         if (aRelComponents.aQuery.isPresent())
764             aBuffer.append(aRelComponents.aQuery.pBegin,
765                            aRelComponents.aQuery.getLength());
766     }
767     else
768     {
769         if (aBaseComponents.aAuthority.isPresent())
770             aBuffer.append(aBaseComponents.aAuthority.pBegin,
771                            aBaseComponents.aAuthority.getLength());
772         if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd
773             && !aRelComponents.aQuery.isPresent())
774         {
775             aBuffer.append(aBaseComponents.aPath.pBegin,
776                            aBaseComponents.aPath.getLength());
777             if (aBaseComponents.aQuery.isPresent())
778                 aBuffer.append(aBaseComponents.aQuery.pBegin,
779                                aBaseComponents.aQuery.getLength());
780         }
781         else
782         {
783             if (*aRelComponents.aPath.pBegin == '/')
784                 aBuffer.append(aRelComponents.aPath.pBegin,
785                                aRelComponents.aPath.getLength());
786             else
787                 aBuffer.append(joinPaths(aBaseComponents.aPath,
788                                          aRelComponents.aPath));
789             if (aRelComponents.aQuery.isPresent())
790                 aBuffer.append(aRelComponents.aQuery.pBegin,
791                                aRelComponents.aQuery.getLength());
792         }
793     }
794     if (aRelComponents.aFragment.isPresent())
795         aBuffer.append(aRelComponents.aFragment.pBegin,
796                        aRelComponents.aFragment.getLength());
797     rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
798     return true;
799 }
800