xref: /AOO41X/main/sal/rtl/source/uri.cxx (revision cdf0e10c4e3984b49a9502b011690b615761d4a3)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_sal.hxx"
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include "rtl/uri.h"
32*cdf0e10cSrcweir 
33*cdf0e10cSrcweir #include "surrogates.h"
34*cdf0e10cSrcweir 
35*cdf0e10cSrcweir #include "osl/diagnose.h"
36*cdf0e10cSrcweir #include "rtl/strbuf.hxx"
37*cdf0e10cSrcweir #include "rtl/textenc.h"
38*cdf0e10cSrcweir #include "rtl/textcvt.h"
39*cdf0e10cSrcweir #include "rtl/uri.h"
40*cdf0e10cSrcweir #include "rtl/ustrbuf.h"
41*cdf0e10cSrcweir #include "rtl/ustrbuf.hxx"
42*cdf0e10cSrcweir #include "rtl/ustring.h"
43*cdf0e10cSrcweir #include "rtl/ustring.hxx"
44*cdf0e10cSrcweir #include "sal/types.h"
45*cdf0e10cSrcweir 
46*cdf0e10cSrcweir #include <cstddef>
47*cdf0e10cSrcweir 
48*cdf0e10cSrcweir namespace {
49*cdf0e10cSrcweir 
50*cdf0e10cSrcweir std::size_t const nCharClassSize = 128;
51*cdf0e10cSrcweir 
52*cdf0e10cSrcweir sal_Unicode const cEscapePrefix = 0x25; // '%'
53*cdf0e10cSrcweir 
54*cdf0e10cSrcweir inline bool isDigit(sal_uInt32 nUtf32)
55*cdf0e10cSrcweir {
56*cdf0e10cSrcweir     return nUtf32 >= 0x30 && nUtf32 <= 0x39; // '0'--'9'
57*cdf0e10cSrcweir }
58*cdf0e10cSrcweir 
59*cdf0e10cSrcweir inline bool isAlpha(sal_uInt32 nUtf32)
60*cdf0e10cSrcweir {
61*cdf0e10cSrcweir     // 'A'--'Z', 'a'--'z'
62*cdf0e10cSrcweir     return (
63*cdf0e10cSrcweir             (nUtf32 >= 0x41 && nUtf32 <= 0x5A) ||
64*cdf0e10cSrcweir             (nUtf32 >= 0x61 && nUtf32 <= 0x7A)
65*cdf0e10cSrcweir            );
66*cdf0e10cSrcweir }
67*cdf0e10cSrcweir 
68*cdf0e10cSrcweir inline bool isHighSurrogate(sal_uInt32 nUtf16)
69*cdf0e10cSrcweir {
70*cdf0e10cSrcweir     return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
71*cdf0e10cSrcweir }
72*cdf0e10cSrcweir 
73*cdf0e10cSrcweir inline bool isLowSurrogate(sal_uInt32 nUtf16)
74*cdf0e10cSrcweir {
75*cdf0e10cSrcweir     return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
76*cdf0e10cSrcweir }
77*cdf0e10cSrcweir 
78*cdf0e10cSrcweir inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
79*cdf0e10cSrcweir {
80*cdf0e10cSrcweir     return SAL_RTL_COMBINE_SURROGATES(high, low);
81*cdf0e10cSrcweir }
82*cdf0e10cSrcweir 
83*cdf0e10cSrcweir inline int getHexWeight(sal_uInt32 nUtf32)
84*cdf0e10cSrcweir {
85*cdf0e10cSrcweir     return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
86*cdf0e10cSrcweir                static_cast< int >(nUtf32 - 0x30) :
87*cdf0e10cSrcweir            nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
88*cdf0e10cSrcweir                static_cast< int >(nUtf32 - 0x41 + 10) :
89*cdf0e10cSrcweir            nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
90*cdf0e10cSrcweir                static_cast< int >(nUtf32 - 0x61 + 10) :
91*cdf0e10cSrcweir                -1; // not a hex digit
92*cdf0e10cSrcweir }
93*cdf0e10cSrcweir 
94*cdf0e10cSrcweir inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
95*cdf0e10cSrcweir {
96*cdf0e10cSrcweir     return nUtf32 < nCharClassSize && pCharClass[nUtf32];
97*cdf0e10cSrcweir }
98*cdf0e10cSrcweir 
99*cdf0e10cSrcweir inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
100*cdf0e10cSrcweir                          sal_Unicode cChar)
101*cdf0e10cSrcweir {
102*cdf0e10cSrcweir     rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
103*cdf0e10cSrcweir }
104*cdf0e10cSrcweir 
105*cdf0e10cSrcweir enum EscapeType
106*cdf0e10cSrcweir {
107*cdf0e10cSrcweir     EscapeNo,
108*cdf0e10cSrcweir     EscapeChar,
109*cdf0e10cSrcweir     EscapeOctet
110*cdf0e10cSrcweir };
111*cdf0e10cSrcweir 
112*cdf0e10cSrcweir /* Read any of the following:
113*cdf0e10cSrcweir 
114*cdf0e10cSrcweir    - sequence of escape sequences representing character from eCharset,
115*cdf0e10cSrcweir      translated to single UCS4 character; or
116*cdf0e10cSrcweir 
117*cdf0e10cSrcweir    - pair of UTF-16 surrogates, translated to single UCS4 character; or
118*cdf0e10cSrcweir 
119*cdf0e10cSrcweir    _ single UTF-16 character, extended to UCS4 character.
120*cdf0e10cSrcweir  */
121*cdf0e10cSrcweir sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
122*cdf0e10cSrcweir                     bool bEncoded, rtl_TextEncoding eCharset,
123*cdf0e10cSrcweir                     EscapeType * pType)
124*cdf0e10cSrcweir {
125*cdf0e10cSrcweir     sal_uInt32 nChar = *(*pBegin)++;
126*cdf0e10cSrcweir     int nWeight1;
127*cdf0e10cSrcweir     int nWeight2;
128*cdf0e10cSrcweir     if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
129*cdf0e10cSrcweir         && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
130*cdf0e10cSrcweir         && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
131*cdf0e10cSrcweir     {
132*cdf0e10cSrcweir         *pBegin += 2;
133*cdf0e10cSrcweir         nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
134*cdf0e10cSrcweir         if (nChar <= 0x7F)
135*cdf0e10cSrcweir             *pType = EscapeChar;
136*cdf0e10cSrcweir         else if (eCharset == RTL_TEXTENCODING_UTF8)
137*cdf0e10cSrcweir         {
138*cdf0e10cSrcweir             if (nChar >= 0xC0 && nChar <= 0xF4)
139*cdf0e10cSrcweir             {
140*cdf0e10cSrcweir                 sal_uInt32 nEncoded;
141*cdf0e10cSrcweir                 int nShift;
142*cdf0e10cSrcweir                 sal_uInt32 nMin;
143*cdf0e10cSrcweir                 if (nChar <= 0xDF)
144*cdf0e10cSrcweir                 {
145*cdf0e10cSrcweir                     nEncoded = (nChar & 0x1F) << 6;
146*cdf0e10cSrcweir                     nShift = 0;
147*cdf0e10cSrcweir                     nMin = 0x80;
148*cdf0e10cSrcweir                 }
149*cdf0e10cSrcweir                 else if (nChar <= 0xEF)
150*cdf0e10cSrcweir                 {
151*cdf0e10cSrcweir                     nEncoded = (nChar & 0x0F) << 12;
152*cdf0e10cSrcweir                     nShift = 6;
153*cdf0e10cSrcweir                     nMin = 0x800;
154*cdf0e10cSrcweir                 }
155*cdf0e10cSrcweir                 else
156*cdf0e10cSrcweir                 {
157*cdf0e10cSrcweir                     nEncoded = (nChar & 0x07) << 18;
158*cdf0e10cSrcweir                     nShift = 12;
159*cdf0e10cSrcweir                     nMin = 0x10000;
160*cdf0e10cSrcweir                 }
161*cdf0e10cSrcweir                 sal_Unicode const * p = *pBegin;
162*cdf0e10cSrcweir                 bool bUTF8 = true;
163*cdf0e10cSrcweir                 for (; nShift >= 0; nShift -= 6)
164*cdf0e10cSrcweir                 {
165*cdf0e10cSrcweir                     if (pEnd - p < 3 || p[0] != cEscapePrefix
166*cdf0e10cSrcweir                         || (nWeight1 = getHexWeight(p[1])) < 8
167*cdf0e10cSrcweir                         || nWeight1 > 11
168*cdf0e10cSrcweir                         || (nWeight2 = getHexWeight(p[2])) < 0)
169*cdf0e10cSrcweir                     {
170*cdf0e10cSrcweir                         bUTF8 = sal_False;
171*cdf0e10cSrcweir                         break;
172*cdf0e10cSrcweir                     }
173*cdf0e10cSrcweir                     p += 3;
174*cdf0e10cSrcweir                     nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
175*cdf0e10cSrcweir                 }
176*cdf0e10cSrcweir                 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
177*cdf0e10cSrcweir                     && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
178*cdf0e10cSrcweir                 {
179*cdf0e10cSrcweir                     *pBegin = p;
180*cdf0e10cSrcweir                     *pType = EscapeChar;
181*cdf0e10cSrcweir                     return nEncoded;
182*cdf0e10cSrcweir                 }
183*cdf0e10cSrcweir             }
184*cdf0e10cSrcweir             *pType = EscapeOctet;
185*cdf0e10cSrcweir         }
186*cdf0e10cSrcweir         else
187*cdf0e10cSrcweir         {
188*cdf0e10cSrcweir             rtl::OStringBuffer aBuf;
189*cdf0e10cSrcweir             aBuf.append(static_cast< char >(nChar));
190*cdf0e10cSrcweir             rtl_TextToUnicodeConverter aConverter
191*cdf0e10cSrcweir                 = rtl_createTextToUnicodeConverter(eCharset);
192*cdf0e10cSrcweir             sal_Unicode const * p = *pBegin;
193*cdf0e10cSrcweir             for (;;)
194*cdf0e10cSrcweir             {
195*cdf0e10cSrcweir                 sal_Unicode aDst[2];
196*cdf0e10cSrcweir                 sal_uInt32 nInfo;
197*cdf0e10cSrcweir                 sal_Size nConverted;
198*cdf0e10cSrcweir                 sal_Size nDstSize = rtl_convertTextToUnicode(
199*cdf0e10cSrcweir                     aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
200*cdf0e10cSrcweir                     sizeof aDst / sizeof aDst[0],
201*cdf0e10cSrcweir                     (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
202*cdf0e10cSrcweir                      | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
203*cdf0e10cSrcweir                      | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
204*cdf0e10cSrcweir                     &nInfo, &nConverted);
205*cdf0e10cSrcweir                 if (nInfo == 0)
206*cdf0e10cSrcweir                 {
207*cdf0e10cSrcweir                     OSL_ASSERT(
208*cdf0e10cSrcweir                         nConverted
209*cdf0e10cSrcweir                         == sal::static_int_cast< sal_uInt32 >(
210*cdf0e10cSrcweir                             aBuf.getLength()));
211*cdf0e10cSrcweir                     rtl_destroyTextToUnicodeConverter(aConverter);
212*cdf0e10cSrcweir                     *pBegin = p;
213*cdf0e10cSrcweir                     *pType = EscapeChar;
214*cdf0e10cSrcweir                     OSL_ASSERT(
215*cdf0e10cSrcweir                         nDstSize == 1
216*cdf0e10cSrcweir                         || (nDstSize == 2 && isHighSurrogate(aDst[0])
217*cdf0e10cSrcweir                             && isLowSurrogate(aDst[1])));
218*cdf0e10cSrcweir                     return nDstSize == 1
219*cdf0e10cSrcweir                         ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
220*cdf0e10cSrcweir                 }
221*cdf0e10cSrcweir                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
222*cdf0e10cSrcweir                          && pEnd - p >= 3 && p[0] == cEscapePrefix
223*cdf0e10cSrcweir                          && (nWeight1 = getHexWeight(p[1])) >= 0
224*cdf0e10cSrcweir                          && (nWeight2 = getHexWeight(p[2])) >= 0)
225*cdf0e10cSrcweir                 {
226*cdf0e10cSrcweir                     p += 3;
227*cdf0e10cSrcweir                     aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
228*cdf0e10cSrcweir                 }
229*cdf0e10cSrcweir                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
230*cdf0e10cSrcweir                          && p != pEnd && *p <= 0x7F)
231*cdf0e10cSrcweir                 {
232*cdf0e10cSrcweir                     aBuf.append(static_cast< char >(*p++));
233*cdf0e10cSrcweir                 }
234*cdf0e10cSrcweir                 else
235*cdf0e10cSrcweir                 {
236*cdf0e10cSrcweir                     OSL_ASSERT(
237*cdf0e10cSrcweir                         (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
238*cdf0e10cSrcweir                         == 0);
239*cdf0e10cSrcweir                     break;
240*cdf0e10cSrcweir                 }
241*cdf0e10cSrcweir             }
242*cdf0e10cSrcweir             rtl_destroyTextToUnicodeConverter(aConverter);
243*cdf0e10cSrcweir             *pType = EscapeOctet;
244*cdf0e10cSrcweir         }
245*cdf0e10cSrcweir         return nChar;
246*cdf0e10cSrcweir     }
247*cdf0e10cSrcweir     else
248*cdf0e10cSrcweir     {
249*cdf0e10cSrcweir         *pType = EscapeNo;
250*cdf0e10cSrcweir         return isHighSurrogate(nChar) && *pBegin < pEnd
251*cdf0e10cSrcweir                && isLowSurrogate(**pBegin) ?
252*cdf0e10cSrcweir                    combineSurrogates(nChar, *(*pBegin)++) : nChar;
253*cdf0e10cSrcweir     }
254*cdf0e10cSrcweir }
255*cdf0e10cSrcweir 
256*cdf0e10cSrcweir void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
257*cdf0e10cSrcweir {
258*cdf0e10cSrcweir     OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
259*cdf0e10cSrcweir     if (nUtf32 <= 0xFFFF) {
260*cdf0e10cSrcweir         writeUnicode(
261*cdf0e10cSrcweir             pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
262*cdf0e10cSrcweir     } else {
263*cdf0e10cSrcweir         nUtf32 -= 0x10000;
264*cdf0e10cSrcweir         writeUnicode(
265*cdf0e10cSrcweir             pBuffer, pCapacity,
266*cdf0e10cSrcweir             static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
267*cdf0e10cSrcweir         writeUnicode(
268*cdf0e10cSrcweir             pBuffer, pCapacity,
269*cdf0e10cSrcweir             static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
270*cdf0e10cSrcweir     }
271*cdf0e10cSrcweir }
272*cdf0e10cSrcweir 
273*cdf0e10cSrcweir void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
274*cdf0e10cSrcweir                       sal_uInt32 nOctet)
275*cdf0e10cSrcweir {
276*cdf0e10cSrcweir     OSL_ENSURE(nOctet <= 0xFF, "bad octet");
277*cdf0e10cSrcweir 
278*cdf0e10cSrcweir     static sal_Unicode const aHex[16]
279*cdf0e10cSrcweir         = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
280*cdf0e10cSrcweir             0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
281*cdf0e10cSrcweir 
282*cdf0e10cSrcweir     writeUnicode(pBuffer, pCapacity, cEscapePrefix);
283*cdf0e10cSrcweir     writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
284*cdf0e10cSrcweir     writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
285*cdf0e10cSrcweir }
286*cdf0e10cSrcweir 
287*cdf0e10cSrcweir bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
288*cdf0e10cSrcweir                      sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
289*cdf0e10cSrcweir {
290*cdf0e10cSrcweir     OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
291*cdf0e10cSrcweir     if (eCharset == RTL_TEXTENCODING_UTF8) {
292*cdf0e10cSrcweir         if (nUtf32 < 0x80)
293*cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, nUtf32);
294*cdf0e10cSrcweir         else if (nUtf32 < 0x800)
295*cdf0e10cSrcweir         {
296*cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
297*cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
298*cdf0e10cSrcweir         }
299*cdf0e10cSrcweir         else if (nUtf32 < 0x10000)
300*cdf0e10cSrcweir         {
301*cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
302*cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
303*cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
304*cdf0e10cSrcweir         }
305*cdf0e10cSrcweir         else
306*cdf0e10cSrcweir         {
307*cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
308*cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
309*cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
310*cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
311*cdf0e10cSrcweir         }
312*cdf0e10cSrcweir     } else {
313*cdf0e10cSrcweir         rtl_UnicodeToTextConverter aConverter
314*cdf0e10cSrcweir             = rtl_createUnicodeToTextConverter(eCharset);
315*cdf0e10cSrcweir         sal_Unicode aSrc[2];
316*cdf0e10cSrcweir         sal_Size nSrcSize;
317*cdf0e10cSrcweir         if (nUtf32 <= 0xFFFF)
318*cdf0e10cSrcweir         {
319*cdf0e10cSrcweir             aSrc[0] = static_cast< sal_Unicode >(nUtf32);
320*cdf0e10cSrcweir             nSrcSize = 1;
321*cdf0e10cSrcweir         }
322*cdf0e10cSrcweir         else
323*cdf0e10cSrcweir         {
324*cdf0e10cSrcweir             aSrc[0] = static_cast< sal_Unicode >(
325*cdf0e10cSrcweir                 ((nUtf32 - 0x10000) >> 10) | 0xD800);
326*cdf0e10cSrcweir             aSrc[1] = static_cast< sal_Unicode >(
327*cdf0e10cSrcweir                 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
328*cdf0e10cSrcweir             nSrcSize = 2;
329*cdf0e10cSrcweir         }
330*cdf0e10cSrcweir         sal_Char aDst[32]; // FIXME  random value
331*cdf0e10cSrcweir         sal_uInt32 nInfo;
332*cdf0e10cSrcweir         sal_Size nConverted;
333*cdf0e10cSrcweir         sal_Size nDstSize = rtl_convertUnicodeToText(
334*cdf0e10cSrcweir             aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
335*cdf0e10cSrcweir             RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
336*cdf0e10cSrcweir             | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
337*cdf0e10cSrcweir             | RTL_UNICODETOTEXT_FLAGS_FLUSH,
338*cdf0e10cSrcweir             &nInfo, &nConverted);
339*cdf0e10cSrcweir         OSL_ASSERT((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
340*cdf0e10cSrcweir         rtl_destroyUnicodeToTextConverter(aConverter);
341*cdf0e10cSrcweir         if (nInfo == 0) {
342*cdf0e10cSrcweir             OSL_ENSURE(nConverted == nSrcSize, "bad rtl_convertUnicodeToText");
343*cdf0e10cSrcweir             for (sal_Size i = 0; i < nDstSize; ++i)
344*cdf0e10cSrcweir                 writeEscapeOctet(pBuffer, pCapacity,
345*cdf0e10cSrcweir                                  static_cast< unsigned char >(aDst[i]));
346*cdf0e10cSrcweir                     // FIXME  all octets are escaped, even if there is no need
347*cdf0e10cSrcweir         } else {
348*cdf0e10cSrcweir             if (bStrict) {
349*cdf0e10cSrcweir                 return false;
350*cdf0e10cSrcweir             } else {
351*cdf0e10cSrcweir                 writeUcs4(pBuffer, pCapacity, nUtf32);
352*cdf0e10cSrcweir             }
353*cdf0e10cSrcweir         }
354*cdf0e10cSrcweir     }
355*cdf0e10cSrcweir     return true;
356*cdf0e10cSrcweir }
357*cdf0e10cSrcweir 
358*cdf0e10cSrcweir struct Component
359*cdf0e10cSrcweir {
360*cdf0e10cSrcweir     sal_Unicode const * pBegin;
361*cdf0e10cSrcweir     sal_Unicode const * pEnd;
362*cdf0e10cSrcweir 
363*cdf0e10cSrcweir     inline Component(): pBegin(0) {}
364*cdf0e10cSrcweir 
365*cdf0e10cSrcweir     inline bool isPresent() const { return pBegin != 0; }
366*cdf0e10cSrcweir 
367*cdf0e10cSrcweir     inline sal_Int32 getLength() const;
368*cdf0e10cSrcweir };
369*cdf0e10cSrcweir 
370*cdf0e10cSrcweir inline sal_Int32 Component::getLength() const
371*cdf0e10cSrcweir {
372*cdf0e10cSrcweir     OSL_ENSURE(isPresent(), "taking length of non-present component");
373*cdf0e10cSrcweir     return static_cast< sal_Int32 >(pEnd - pBegin);
374*cdf0e10cSrcweir }
375*cdf0e10cSrcweir 
376*cdf0e10cSrcweir struct Components
377*cdf0e10cSrcweir {
378*cdf0e10cSrcweir     Component aScheme;
379*cdf0e10cSrcweir     Component aAuthority;
380*cdf0e10cSrcweir     Component aPath;
381*cdf0e10cSrcweir     Component aQuery;
382*cdf0e10cSrcweir     Component aFragment;
383*cdf0e10cSrcweir };
384*cdf0e10cSrcweir 
385*cdf0e10cSrcweir void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
386*cdf0e10cSrcweir {
387*cdf0e10cSrcweir     // This algorithm is liberal and accepts various forms of illegal input.
388*cdf0e10cSrcweir 
389*cdf0e10cSrcweir     sal_Unicode const * pBegin = pUriRef->buffer;
390*cdf0e10cSrcweir     sal_Unicode const * pEnd = pBegin + pUriRef->length;
391*cdf0e10cSrcweir     sal_Unicode const * pPos = pBegin;
392*cdf0e10cSrcweir 
393*cdf0e10cSrcweir     if (pPos != pEnd && isAlpha(*pPos))
394*cdf0e10cSrcweir         for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
395*cdf0e10cSrcweir             if (*p == ':')
396*cdf0e10cSrcweir             {
397*cdf0e10cSrcweir                 pComponents->aScheme.pBegin = pBegin;
398*cdf0e10cSrcweir                 pComponents->aScheme.pEnd = ++p;
399*cdf0e10cSrcweir                 pPos = p;
400*cdf0e10cSrcweir                 break;
401*cdf0e10cSrcweir             }
402*cdf0e10cSrcweir             else if (!isAlpha(*p) && !isDigit(*p) && *p != '+' && *p != '-'
403*cdf0e10cSrcweir                      && *p != '.')
404*cdf0e10cSrcweir                 break;
405*cdf0e10cSrcweir 
406*cdf0e10cSrcweir     if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
407*cdf0e10cSrcweir     {
408*cdf0e10cSrcweir         pComponents->aAuthority.pBegin = pPos;
409*cdf0e10cSrcweir         pPos += 2;
410*cdf0e10cSrcweir         while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
411*cdf0e10cSrcweir             ++pPos;
412*cdf0e10cSrcweir         pComponents->aAuthority.pEnd = pPos;
413*cdf0e10cSrcweir     }
414*cdf0e10cSrcweir 
415*cdf0e10cSrcweir     pComponents->aPath.pBegin = pPos;
416*cdf0e10cSrcweir     while (pPos != pEnd && *pPos != '?' && * pPos != '#')
417*cdf0e10cSrcweir         ++pPos;
418*cdf0e10cSrcweir     pComponents->aPath.pEnd = pPos;
419*cdf0e10cSrcweir 
420*cdf0e10cSrcweir     if (pPos != pEnd && *pPos == '?')
421*cdf0e10cSrcweir     {
422*cdf0e10cSrcweir         pComponents->aQuery.pBegin = pPos++;
423*cdf0e10cSrcweir         while (pPos != pEnd && * pPos != '#')
424*cdf0e10cSrcweir             ++pPos;
425*cdf0e10cSrcweir         pComponents->aQuery.pEnd = pPos;
426*cdf0e10cSrcweir     }
427*cdf0e10cSrcweir 
428*cdf0e10cSrcweir     if (pPos != pEnd)
429*cdf0e10cSrcweir     {
430*cdf0e10cSrcweir         OSL_ASSERT(*pPos == '#');
431*cdf0e10cSrcweir         pComponents->aFragment.pBegin = pPos;
432*cdf0e10cSrcweir         pComponents->aFragment.pEnd = pEnd;
433*cdf0e10cSrcweir     }
434*cdf0e10cSrcweir }
435*cdf0e10cSrcweir 
436*cdf0e10cSrcweir rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath)
437*cdf0e10cSrcweir {
438*cdf0e10cSrcweir     OSL_ASSERT(rBasePath.isPresent() && *rBasePath.pBegin == '/');
439*cdf0e10cSrcweir     OSL_ASSERT(rRelPath.isPresent());
440*cdf0e10cSrcweir 
441*cdf0e10cSrcweir     // The invariant of aBuffer is that it always starts and ends with a slash
442*cdf0e10cSrcweir     // (until probably right at the end of the algorithm, when the last segment
443*cdf0e10cSrcweir     // of rRelPath is added, which does not necessarily end in a slash):
444*cdf0e10cSrcweir     rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength());
445*cdf0e10cSrcweir         // XXX  numeric overflow
446*cdf0e10cSrcweir 
447*cdf0e10cSrcweir     // Segments "." and ".." within rBasePath are not conisdered special (but
448*cdf0e10cSrcweir     // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
449*cdf0e10cSrcweir     // bit unclear about this point:
450*cdf0e10cSrcweir     sal_Int32 nFixed = 1;
451*cdf0e10cSrcweir     sal_Unicode const * p = rBasePath.pBegin + 1;
452*cdf0e10cSrcweir     for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q)
453*cdf0e10cSrcweir         if (*q == '/')
454*cdf0e10cSrcweir         {
455*cdf0e10cSrcweir             if (
456*cdf0e10cSrcweir                 (q - p == 1 && p[0] == '.') ||
457*cdf0e10cSrcweir                 (q - p == 2 && p[0] == '.' && p[1] == '.')
458*cdf0e10cSrcweir                )
459*cdf0e10cSrcweir             {
460*cdf0e10cSrcweir                 nFixed = q + 1 - rBasePath.pBegin;
461*cdf0e10cSrcweir             }
462*cdf0e10cSrcweir             p = q + 1;
463*cdf0e10cSrcweir         }
464*cdf0e10cSrcweir     aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin);
465*cdf0e10cSrcweir 
466*cdf0e10cSrcweir     p = rRelPath.pBegin;
467*cdf0e10cSrcweir     if (p != rRelPath.pEnd)
468*cdf0e10cSrcweir         for (;;)
469*cdf0e10cSrcweir         {
470*cdf0e10cSrcweir             sal_Unicode const * q = p;
471*cdf0e10cSrcweir             sal_Unicode const * r;
472*cdf0e10cSrcweir             for (;;)
473*cdf0e10cSrcweir             {
474*cdf0e10cSrcweir                 if (q == rRelPath.pEnd)
475*cdf0e10cSrcweir                 {
476*cdf0e10cSrcweir                     r = q;
477*cdf0e10cSrcweir                     break;
478*cdf0e10cSrcweir                 }
479*cdf0e10cSrcweir                 if (*q == '/')
480*cdf0e10cSrcweir                 {
481*cdf0e10cSrcweir                     r = q + 1;
482*cdf0e10cSrcweir                     break;
483*cdf0e10cSrcweir                 }
484*cdf0e10cSrcweir                 ++q;
485*cdf0e10cSrcweir             }
486*cdf0e10cSrcweir             if (q - p == 2 && p[0] == '.' && p[1] == '.')
487*cdf0e10cSrcweir             {
488*cdf0e10cSrcweir                 // Erroneous excess segments ".." within rRelPath are left
489*cdf0e10cSrcweir                 // intact, as the examples in RFC 2396, section C.2, suggest:
490*cdf0e10cSrcweir                 sal_Int32 i = aBuffer.getLength() - 1;
491*cdf0e10cSrcweir                 if (i < nFixed)
492*cdf0e10cSrcweir                 {
493*cdf0e10cSrcweir                     aBuffer.append(p, r - p);
494*cdf0e10cSrcweir                     nFixed += 3;
495*cdf0e10cSrcweir                 }
496*cdf0e10cSrcweir                 else
497*cdf0e10cSrcweir                 {
498*cdf0e10cSrcweir                     while (aBuffer.charAt(i - 1) != '/')
499*cdf0e10cSrcweir                         --i;
500*cdf0e10cSrcweir                     aBuffer.setLength(i);
501*cdf0e10cSrcweir                 }
502*cdf0e10cSrcweir             }
503*cdf0e10cSrcweir             else if (q - p != 1 || *p != '.')
504*cdf0e10cSrcweir                 aBuffer.append(p, r - p);
505*cdf0e10cSrcweir             if (q == rRelPath.pEnd)
506*cdf0e10cSrcweir                 break;
507*cdf0e10cSrcweir             p = q + 1;
508*cdf0e10cSrcweir         }
509*cdf0e10cSrcweir 
510*cdf0e10cSrcweir     return aBuffer.makeStringAndClear();
511*cdf0e10cSrcweir }
512*cdf0e10cSrcweir 
513*cdf0e10cSrcweir }
514*cdf0e10cSrcweir 
515*cdf0e10cSrcweir sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
516*cdf0e10cSrcweir     SAL_THROW_EXTERN_C()
517*cdf0e10cSrcweir {
518*cdf0e10cSrcweir     static sal_Bool const aCharClass[][nCharClassSize]
519*cdf0e10cSrcweir     = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
520*cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521*cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
522*cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
523*cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
524*cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
525*cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
526*cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  /*pqrstuvwxyz{|}~ */
527*cdf0e10cSrcweir        },
528*cdf0e10cSrcweir        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
529*cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530*cdf0e10cSrcweir          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
531*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
532*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
533*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
534*cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
535*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
536*cdf0e10cSrcweir        },
537*cdf0e10cSrcweir        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
538*cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
539*cdf0e10cSrcweir          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
540*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
541*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
542*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
543*cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
544*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
545*cdf0e10cSrcweir        },
546*cdf0e10cSrcweir        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
547*cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
548*cdf0e10cSrcweir          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
549*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
550*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
551*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
552*cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
553*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
554*cdf0e10cSrcweir        },
555*cdf0e10cSrcweir        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
556*cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
557*cdf0e10cSrcweir          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
558*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
559*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
560*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
561*cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
562*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
563*cdf0e10cSrcweir        },
564*cdf0e10cSrcweir        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
565*cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566*cdf0e10cSrcweir          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
567*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
568*cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
569*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
570*cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
571*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
572*cdf0e10cSrcweir        },
573*cdf0e10cSrcweir        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
574*cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575*cdf0e10cSrcweir          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
576*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
577*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
578*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
579*cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
580*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
581*cdf0e10cSrcweir        },
582*cdf0e10cSrcweir        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
583*cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584*cdf0e10cSrcweir          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
585*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
586*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
587*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
588*cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
589*cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
590*cdf0e10cSrcweir        }};
591*cdf0e10cSrcweir     OSL_ENSURE(
592*cdf0e10cSrcweir         (eCharClass >= 0
593*cdf0e10cSrcweir          && (sal::static_int_cast< std::size_t >(eCharClass)
594*cdf0e10cSrcweir              < sizeof aCharClass / sizeof aCharClass[0])),
595*cdf0e10cSrcweir         "bad eCharClass");
596*cdf0e10cSrcweir     return aCharClass[eCharClass];
597*cdf0e10cSrcweir }
598*cdf0e10cSrcweir 
599*cdf0e10cSrcweir void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
600*cdf0e10cSrcweir                             rtl_UriEncodeMechanism eMechanism,
601*cdf0e10cSrcweir                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
602*cdf0e10cSrcweir     SAL_THROW_EXTERN_C()
603*cdf0e10cSrcweir {
604*cdf0e10cSrcweir     OSL_ENSURE(!pCharClass[0x25], "bad pCharClass");
605*cdf0e10cSrcweir         // make sure the percent sign is encoded...
606*cdf0e10cSrcweir 
607*cdf0e10cSrcweir     sal_Unicode const * p = pText->buffer;
608*cdf0e10cSrcweir     sal_Unicode const * pEnd = p + pText->length;
609*cdf0e10cSrcweir     sal_Int32 nCapacity = 0;
610*cdf0e10cSrcweir     rtl_uString_new(pResult);
611*cdf0e10cSrcweir     while (p < pEnd)
612*cdf0e10cSrcweir     {
613*cdf0e10cSrcweir         EscapeType eType;
614*cdf0e10cSrcweir         sal_uInt32 nUtf32 = readUcs4(
615*cdf0e10cSrcweir             &p, pEnd,
616*cdf0e10cSrcweir             (eMechanism == rtl_UriEncodeKeepEscapes
617*cdf0e10cSrcweir              || eMechanism == rtl_UriEncodeCheckEscapes
618*cdf0e10cSrcweir              || eMechanism == rtl_UriEncodeStrictKeepEscapes),
619*cdf0e10cSrcweir             eCharset, &eType);
620*cdf0e10cSrcweir         switch (eType)
621*cdf0e10cSrcweir         {
622*cdf0e10cSrcweir         case EscapeNo:
623*cdf0e10cSrcweir             if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
624*cdf0e10cSrcweir                 writeUnicode(pResult, &nCapacity,
625*cdf0e10cSrcweir                              static_cast< sal_Unicode >(nUtf32));
626*cdf0e10cSrcweir             else if (!writeEscapeChar(
627*cdf0e10cSrcweir                          pResult, &nCapacity, nUtf32, eCharset,
628*cdf0e10cSrcweir                          (eMechanism == rtl_UriEncodeStrict
629*cdf0e10cSrcweir                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
630*cdf0e10cSrcweir             {
631*cdf0e10cSrcweir                 rtl_uString_new(pResult);
632*cdf0e10cSrcweir                 return;
633*cdf0e10cSrcweir             }
634*cdf0e10cSrcweir             break;
635*cdf0e10cSrcweir 
636*cdf0e10cSrcweir         case EscapeChar:
637*cdf0e10cSrcweir             if (eMechanism == rtl_UriEncodeCheckEscapes
638*cdf0e10cSrcweir                 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
639*cdf0e10cSrcweir                 writeUnicode(pResult, &nCapacity,
640*cdf0e10cSrcweir                              static_cast< sal_Unicode >(nUtf32));
641*cdf0e10cSrcweir             else if (!writeEscapeChar(
642*cdf0e10cSrcweir                          pResult, &nCapacity, nUtf32, eCharset,
643*cdf0e10cSrcweir                          (eMechanism == rtl_UriEncodeStrict
644*cdf0e10cSrcweir                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
645*cdf0e10cSrcweir             {
646*cdf0e10cSrcweir                 rtl_uString_new(pResult);
647*cdf0e10cSrcweir                 return;
648*cdf0e10cSrcweir             }
649*cdf0e10cSrcweir             break;
650*cdf0e10cSrcweir 
651*cdf0e10cSrcweir         case EscapeOctet:
652*cdf0e10cSrcweir             writeEscapeOctet(pResult, &nCapacity, nUtf32);
653*cdf0e10cSrcweir             break;
654*cdf0e10cSrcweir         }
655*cdf0e10cSrcweir     }
656*cdf0e10cSrcweir }
657*cdf0e10cSrcweir 
658*cdf0e10cSrcweir void SAL_CALL rtl_uriDecode(rtl_uString * pText,
659*cdf0e10cSrcweir                             rtl_UriDecodeMechanism eMechanism,
660*cdf0e10cSrcweir                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
661*cdf0e10cSrcweir     SAL_THROW_EXTERN_C()
662*cdf0e10cSrcweir {
663*cdf0e10cSrcweir     switch (eMechanism)
664*cdf0e10cSrcweir     {
665*cdf0e10cSrcweir     case rtl_UriDecodeNone:
666*cdf0e10cSrcweir         rtl_uString_assign(pResult, pText);
667*cdf0e10cSrcweir         break;
668*cdf0e10cSrcweir 
669*cdf0e10cSrcweir     case rtl_UriDecodeToIuri:
670*cdf0e10cSrcweir         eCharset = RTL_TEXTENCODING_UTF8;
671*cdf0e10cSrcweir     default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
672*cdf0e10cSrcweir         {
673*cdf0e10cSrcweir             sal_Unicode const * p = pText->buffer;
674*cdf0e10cSrcweir             sal_Unicode const * pEnd = p + pText->length;
675*cdf0e10cSrcweir             sal_Int32 nCapacity = 0;
676*cdf0e10cSrcweir             rtl_uString_new(pResult);
677*cdf0e10cSrcweir             while (p < pEnd)
678*cdf0e10cSrcweir             {
679*cdf0e10cSrcweir                 EscapeType eType;
680*cdf0e10cSrcweir                 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
681*cdf0e10cSrcweir                 switch (eType)
682*cdf0e10cSrcweir                 {
683*cdf0e10cSrcweir                 case EscapeChar:
684*cdf0e10cSrcweir                     if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
685*cdf0e10cSrcweir                     {
686*cdf0e10cSrcweir                         writeEscapeOctet(pResult, &nCapacity, nUtf32);
687*cdf0e10cSrcweir                         break;
688*cdf0e10cSrcweir                     }
689*cdf0e10cSrcweir                 case EscapeNo:
690*cdf0e10cSrcweir                     writeUcs4(pResult, &nCapacity, nUtf32);
691*cdf0e10cSrcweir                     break;
692*cdf0e10cSrcweir 
693*cdf0e10cSrcweir                 case EscapeOctet:
694*cdf0e10cSrcweir                     if (eMechanism == rtl_UriDecodeStrict) {
695*cdf0e10cSrcweir                         rtl_uString_new(pResult);
696*cdf0e10cSrcweir                         return;
697*cdf0e10cSrcweir                     }
698*cdf0e10cSrcweir                     writeEscapeOctet(pResult, &nCapacity, nUtf32);
699*cdf0e10cSrcweir                     break;
700*cdf0e10cSrcweir                 }
701*cdf0e10cSrcweir             }
702*cdf0e10cSrcweir         }
703*cdf0e10cSrcweir         break;
704*cdf0e10cSrcweir     }
705*cdf0e10cSrcweir }
706*cdf0e10cSrcweir 
707*cdf0e10cSrcweir sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
708*cdf0e10cSrcweir                                          rtl_uString * pRelUriRef,
709*cdf0e10cSrcweir                                          rtl_uString ** pResult,
710*cdf0e10cSrcweir                                          rtl_uString ** pException)
711*cdf0e10cSrcweir     SAL_THROW_EXTERN_C()
712*cdf0e10cSrcweir {
713*cdf0e10cSrcweir     // If pRelUriRef starts with a scheme component it is an absolute URI
714*cdf0e10cSrcweir     // reference, and we are done (i.e., this algorithm does not support
715*cdf0e10cSrcweir     // backwards-compatible relative URIs starting with a scheme component, see
716*cdf0e10cSrcweir     // RFC 2396, section 5.2, step 3):
717*cdf0e10cSrcweir     Components aRelComponents;
718*cdf0e10cSrcweir     parseUriRef(pRelUriRef, &aRelComponents);
719*cdf0e10cSrcweir     if (aRelComponents.aScheme.isPresent())
720*cdf0e10cSrcweir     {
721*cdf0e10cSrcweir         rtl_uString_assign(pResult, pRelUriRef);
722*cdf0e10cSrcweir         return true;
723*cdf0e10cSrcweir     }
724*cdf0e10cSrcweir 
725*cdf0e10cSrcweir     // Parse pBaseUriRef; if the scheme component is not present or not valid,
726*cdf0e10cSrcweir     // or the path component is not empty and starts with anything but a slash,
727*cdf0e10cSrcweir     // an exception is raised:
728*cdf0e10cSrcweir     Components aBaseComponents;
729*cdf0e10cSrcweir     parseUriRef(pBaseUriRef, &aBaseComponents);
730*cdf0e10cSrcweir     if (!aBaseComponents.aScheme.isPresent())
731*cdf0e10cSrcweir     {
732*cdf0e10cSrcweir         rtl::OUString aMessage(pBaseUriRef);
733*cdf0e10cSrcweir         aMessage += rtl::OUString(
734*cdf0e10cSrcweir                         RTL_CONSTASCII_USTRINGPARAM(
735*cdf0e10cSrcweir                             " does not start with a scheme component"));
736*cdf0e10cSrcweir         rtl_uString_assign(pException,
737*cdf0e10cSrcweir                            const_cast< rtl::OUString & >(aMessage).pData);
738*cdf0e10cSrcweir         return false;
739*cdf0e10cSrcweir     }
740*cdf0e10cSrcweir     if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd
741*cdf0e10cSrcweir         && *aBaseComponents.aPath.pBegin != '/')
742*cdf0e10cSrcweir     {
743*cdf0e10cSrcweir         rtl::OUString aMessage(pBaseUriRef);
744*cdf0e10cSrcweir         aMessage += rtl::OUString(
745*cdf0e10cSrcweir                         RTL_CONSTASCII_USTRINGPARAM(
746*cdf0e10cSrcweir                             "path component does not start with slash"));
747*cdf0e10cSrcweir         rtl_uString_assign(pException, aMessage.pData);
748*cdf0e10cSrcweir         return false;
749*cdf0e10cSrcweir     }
750*cdf0e10cSrcweir 
751*cdf0e10cSrcweir     // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
752*cdf0e10cSrcweir     // into an absolute one (if the relative URI is a reference to the "current
753*cdf0e10cSrcweir     // document," the "current document" is here taken to be the base URI):
754*cdf0e10cSrcweir     rtl::OUStringBuffer aBuffer;
755*cdf0e10cSrcweir     aBuffer.append(aBaseComponents.aScheme.pBegin,
756*cdf0e10cSrcweir                    aBaseComponents.aScheme.getLength());
757*cdf0e10cSrcweir     if (aRelComponents.aAuthority.isPresent())
758*cdf0e10cSrcweir     {
759*cdf0e10cSrcweir         aBuffer.append(aRelComponents.aAuthority.pBegin,
760*cdf0e10cSrcweir                        aRelComponents.aAuthority.getLength());
761*cdf0e10cSrcweir         aBuffer.append(aRelComponents.aPath.pBegin,
762*cdf0e10cSrcweir                        aRelComponents.aPath.getLength());
763*cdf0e10cSrcweir         if (aRelComponents.aQuery.isPresent())
764*cdf0e10cSrcweir             aBuffer.append(aRelComponents.aQuery.pBegin,
765*cdf0e10cSrcweir                            aRelComponents.aQuery.getLength());
766*cdf0e10cSrcweir     }
767*cdf0e10cSrcweir     else
768*cdf0e10cSrcweir     {
769*cdf0e10cSrcweir         if (aBaseComponents.aAuthority.isPresent())
770*cdf0e10cSrcweir             aBuffer.append(aBaseComponents.aAuthority.pBegin,
771*cdf0e10cSrcweir                            aBaseComponents.aAuthority.getLength());
772*cdf0e10cSrcweir         if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd
773*cdf0e10cSrcweir             && !aRelComponents.aQuery.isPresent())
774*cdf0e10cSrcweir         {
775*cdf0e10cSrcweir             aBuffer.append(aBaseComponents.aPath.pBegin,
776*cdf0e10cSrcweir                            aBaseComponents.aPath.getLength());
777*cdf0e10cSrcweir             if (aBaseComponents.aQuery.isPresent())
778*cdf0e10cSrcweir                 aBuffer.append(aBaseComponents.aQuery.pBegin,
779*cdf0e10cSrcweir                                aBaseComponents.aQuery.getLength());
780*cdf0e10cSrcweir         }
781*cdf0e10cSrcweir         else
782*cdf0e10cSrcweir         {
783*cdf0e10cSrcweir             if (*aRelComponents.aPath.pBegin == '/')
784*cdf0e10cSrcweir                 aBuffer.append(aRelComponents.aPath.pBegin,
785*cdf0e10cSrcweir                                aRelComponents.aPath.getLength());
786*cdf0e10cSrcweir             else
787*cdf0e10cSrcweir                 aBuffer.append(joinPaths(aBaseComponents.aPath,
788*cdf0e10cSrcweir                                          aRelComponents.aPath));
789*cdf0e10cSrcweir             if (aRelComponents.aQuery.isPresent())
790*cdf0e10cSrcweir                 aBuffer.append(aRelComponents.aQuery.pBegin,
791*cdf0e10cSrcweir                                aRelComponents.aQuery.getLength());
792*cdf0e10cSrcweir         }
793*cdf0e10cSrcweir     }
794*cdf0e10cSrcweir     if (aRelComponents.aFragment.isPresent())
795*cdf0e10cSrcweir         aBuffer.append(aRelComponents.aFragment.pBegin,
796*cdf0e10cSrcweir                        aRelComponents.aFragment.getLength());
797*cdf0e10cSrcweir     rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
798*cdf0e10cSrcweir     return true;
799*cdf0e10cSrcweir }
800