1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_sal.hxx" 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #include "rtl/uri.h" 32*cdf0e10cSrcweir 33*cdf0e10cSrcweir #include "surrogates.h" 34*cdf0e10cSrcweir 35*cdf0e10cSrcweir #include "osl/diagnose.h" 36*cdf0e10cSrcweir #include "rtl/strbuf.hxx" 37*cdf0e10cSrcweir #include "rtl/textenc.h" 38*cdf0e10cSrcweir #include "rtl/textcvt.h" 39*cdf0e10cSrcweir #include "rtl/uri.h" 40*cdf0e10cSrcweir #include "rtl/ustrbuf.h" 41*cdf0e10cSrcweir #include "rtl/ustrbuf.hxx" 42*cdf0e10cSrcweir #include "rtl/ustring.h" 43*cdf0e10cSrcweir #include "rtl/ustring.hxx" 44*cdf0e10cSrcweir #include "sal/types.h" 45*cdf0e10cSrcweir 46*cdf0e10cSrcweir #include <cstddef> 47*cdf0e10cSrcweir 48*cdf0e10cSrcweir namespace { 49*cdf0e10cSrcweir 50*cdf0e10cSrcweir std::size_t const nCharClassSize = 128; 51*cdf0e10cSrcweir 52*cdf0e10cSrcweir sal_Unicode const cEscapePrefix = 0x25; // '%' 53*cdf0e10cSrcweir 54*cdf0e10cSrcweir inline bool isDigit(sal_uInt32 nUtf32) 55*cdf0e10cSrcweir { 56*cdf0e10cSrcweir return nUtf32 >= 0x30 && nUtf32 <= 0x39; // '0'--'9' 57*cdf0e10cSrcweir } 58*cdf0e10cSrcweir 59*cdf0e10cSrcweir inline bool isAlpha(sal_uInt32 nUtf32) 60*cdf0e10cSrcweir { 61*cdf0e10cSrcweir // 'A'--'Z', 'a'--'z' 62*cdf0e10cSrcweir return ( 63*cdf0e10cSrcweir (nUtf32 >= 0x41 && nUtf32 <= 0x5A) || 64*cdf0e10cSrcweir (nUtf32 >= 0x61 && nUtf32 <= 0x7A) 65*cdf0e10cSrcweir ); 66*cdf0e10cSrcweir } 67*cdf0e10cSrcweir 68*cdf0e10cSrcweir inline bool isHighSurrogate(sal_uInt32 nUtf16) 69*cdf0e10cSrcweir { 70*cdf0e10cSrcweir return SAL_RTL_IS_HIGH_SURROGATE(nUtf16); 71*cdf0e10cSrcweir } 72*cdf0e10cSrcweir 73*cdf0e10cSrcweir inline bool isLowSurrogate(sal_uInt32 nUtf16) 74*cdf0e10cSrcweir { 75*cdf0e10cSrcweir return SAL_RTL_IS_LOW_SURROGATE(nUtf16); 76*cdf0e10cSrcweir } 77*cdf0e10cSrcweir 78*cdf0e10cSrcweir inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) 79*cdf0e10cSrcweir { 80*cdf0e10cSrcweir return SAL_RTL_COMBINE_SURROGATES(high, low); 81*cdf0e10cSrcweir } 82*cdf0e10cSrcweir 83*cdf0e10cSrcweir inline int getHexWeight(sal_uInt32 nUtf32) 84*cdf0e10cSrcweir { 85*cdf0e10cSrcweir return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9' 86*cdf0e10cSrcweir static_cast< int >(nUtf32 - 0x30) : 87*cdf0e10cSrcweir nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F' 88*cdf0e10cSrcweir static_cast< int >(nUtf32 - 0x41 + 10) : 89*cdf0e10cSrcweir nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f' 90*cdf0e10cSrcweir static_cast< int >(nUtf32 - 0x61 + 10) : 91*cdf0e10cSrcweir -1; // not a hex digit 92*cdf0e10cSrcweir } 93*cdf0e10cSrcweir 94*cdf0e10cSrcweir inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32) 95*cdf0e10cSrcweir { 96*cdf0e10cSrcweir return nUtf32 < nCharClassSize && pCharClass[nUtf32]; 97*cdf0e10cSrcweir } 98*cdf0e10cSrcweir 99*cdf0e10cSrcweir inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity, 100*cdf0e10cSrcweir sal_Unicode cChar) 101*cdf0e10cSrcweir { 102*cdf0e10cSrcweir rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1); 103*cdf0e10cSrcweir } 104*cdf0e10cSrcweir 105*cdf0e10cSrcweir enum EscapeType 106*cdf0e10cSrcweir { 107*cdf0e10cSrcweir EscapeNo, 108*cdf0e10cSrcweir EscapeChar, 109*cdf0e10cSrcweir EscapeOctet 110*cdf0e10cSrcweir }; 111*cdf0e10cSrcweir 112*cdf0e10cSrcweir /* Read any of the following: 113*cdf0e10cSrcweir 114*cdf0e10cSrcweir - sequence of escape sequences representing character from eCharset, 115*cdf0e10cSrcweir translated to single UCS4 character; or 116*cdf0e10cSrcweir 117*cdf0e10cSrcweir - pair of UTF-16 surrogates, translated to single UCS4 character; or 118*cdf0e10cSrcweir 119*cdf0e10cSrcweir _ single UTF-16 character, extended to UCS4 character. 120*cdf0e10cSrcweir */ 121*cdf0e10cSrcweir sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd, 122*cdf0e10cSrcweir bool bEncoded, rtl_TextEncoding eCharset, 123*cdf0e10cSrcweir EscapeType * pType) 124*cdf0e10cSrcweir { 125*cdf0e10cSrcweir sal_uInt32 nChar = *(*pBegin)++; 126*cdf0e10cSrcweir int nWeight1; 127*cdf0e10cSrcweir int nWeight2; 128*cdf0e10cSrcweir if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2 129*cdf0e10cSrcweir && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0 130*cdf0e10cSrcweir && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0) 131*cdf0e10cSrcweir { 132*cdf0e10cSrcweir *pBegin += 2; 133*cdf0e10cSrcweir nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2); 134*cdf0e10cSrcweir if (nChar <= 0x7F) 135*cdf0e10cSrcweir *pType = EscapeChar; 136*cdf0e10cSrcweir else if (eCharset == RTL_TEXTENCODING_UTF8) 137*cdf0e10cSrcweir { 138*cdf0e10cSrcweir if (nChar >= 0xC0 && nChar <= 0xF4) 139*cdf0e10cSrcweir { 140*cdf0e10cSrcweir sal_uInt32 nEncoded; 141*cdf0e10cSrcweir int nShift; 142*cdf0e10cSrcweir sal_uInt32 nMin; 143*cdf0e10cSrcweir if (nChar <= 0xDF) 144*cdf0e10cSrcweir { 145*cdf0e10cSrcweir nEncoded = (nChar & 0x1F) << 6; 146*cdf0e10cSrcweir nShift = 0; 147*cdf0e10cSrcweir nMin = 0x80; 148*cdf0e10cSrcweir } 149*cdf0e10cSrcweir else if (nChar <= 0xEF) 150*cdf0e10cSrcweir { 151*cdf0e10cSrcweir nEncoded = (nChar & 0x0F) << 12; 152*cdf0e10cSrcweir nShift = 6; 153*cdf0e10cSrcweir nMin = 0x800; 154*cdf0e10cSrcweir } 155*cdf0e10cSrcweir else 156*cdf0e10cSrcweir { 157*cdf0e10cSrcweir nEncoded = (nChar & 0x07) << 18; 158*cdf0e10cSrcweir nShift = 12; 159*cdf0e10cSrcweir nMin = 0x10000; 160*cdf0e10cSrcweir } 161*cdf0e10cSrcweir sal_Unicode const * p = *pBegin; 162*cdf0e10cSrcweir bool bUTF8 = true; 163*cdf0e10cSrcweir for (; nShift >= 0; nShift -= 6) 164*cdf0e10cSrcweir { 165*cdf0e10cSrcweir if (pEnd - p < 3 || p[0] != cEscapePrefix 166*cdf0e10cSrcweir || (nWeight1 = getHexWeight(p[1])) < 8 167*cdf0e10cSrcweir || nWeight1 > 11 168*cdf0e10cSrcweir || (nWeight2 = getHexWeight(p[2])) < 0) 169*cdf0e10cSrcweir { 170*cdf0e10cSrcweir bUTF8 = sal_False; 171*cdf0e10cSrcweir break; 172*cdf0e10cSrcweir } 173*cdf0e10cSrcweir p += 3; 174*cdf0e10cSrcweir nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift; 175*cdf0e10cSrcweir } 176*cdf0e10cSrcweir if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded) 177*cdf0e10cSrcweir && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF) 178*cdf0e10cSrcweir { 179*cdf0e10cSrcweir *pBegin = p; 180*cdf0e10cSrcweir *pType = EscapeChar; 181*cdf0e10cSrcweir return nEncoded; 182*cdf0e10cSrcweir } 183*cdf0e10cSrcweir } 184*cdf0e10cSrcweir *pType = EscapeOctet; 185*cdf0e10cSrcweir } 186*cdf0e10cSrcweir else 187*cdf0e10cSrcweir { 188*cdf0e10cSrcweir rtl::OStringBuffer aBuf; 189*cdf0e10cSrcweir aBuf.append(static_cast< char >(nChar)); 190*cdf0e10cSrcweir rtl_TextToUnicodeConverter aConverter 191*cdf0e10cSrcweir = rtl_createTextToUnicodeConverter(eCharset); 192*cdf0e10cSrcweir sal_Unicode const * p = *pBegin; 193*cdf0e10cSrcweir for (;;) 194*cdf0e10cSrcweir { 195*cdf0e10cSrcweir sal_Unicode aDst[2]; 196*cdf0e10cSrcweir sal_uInt32 nInfo; 197*cdf0e10cSrcweir sal_Size nConverted; 198*cdf0e10cSrcweir sal_Size nDstSize = rtl_convertTextToUnicode( 199*cdf0e10cSrcweir aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst, 200*cdf0e10cSrcweir sizeof aDst / sizeof aDst[0], 201*cdf0e10cSrcweir (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR 202*cdf0e10cSrcweir | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR 203*cdf0e10cSrcweir | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR), 204*cdf0e10cSrcweir &nInfo, &nConverted); 205*cdf0e10cSrcweir if (nInfo == 0) 206*cdf0e10cSrcweir { 207*cdf0e10cSrcweir OSL_ASSERT( 208*cdf0e10cSrcweir nConverted 209*cdf0e10cSrcweir == sal::static_int_cast< sal_uInt32 >( 210*cdf0e10cSrcweir aBuf.getLength())); 211*cdf0e10cSrcweir rtl_destroyTextToUnicodeConverter(aConverter); 212*cdf0e10cSrcweir *pBegin = p; 213*cdf0e10cSrcweir *pType = EscapeChar; 214*cdf0e10cSrcweir OSL_ASSERT( 215*cdf0e10cSrcweir nDstSize == 1 216*cdf0e10cSrcweir || (nDstSize == 2 && isHighSurrogate(aDst[0]) 217*cdf0e10cSrcweir && isLowSurrogate(aDst[1]))); 218*cdf0e10cSrcweir return nDstSize == 1 219*cdf0e10cSrcweir ? aDst[0] : combineSurrogates(aDst[0], aDst[1]); 220*cdf0e10cSrcweir } 221*cdf0e10cSrcweir else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL 222*cdf0e10cSrcweir && pEnd - p >= 3 && p[0] == cEscapePrefix 223*cdf0e10cSrcweir && (nWeight1 = getHexWeight(p[1])) >= 0 224*cdf0e10cSrcweir && (nWeight2 = getHexWeight(p[2])) >= 0) 225*cdf0e10cSrcweir { 226*cdf0e10cSrcweir p += 3; 227*cdf0e10cSrcweir aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2)); 228*cdf0e10cSrcweir } 229*cdf0e10cSrcweir else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL 230*cdf0e10cSrcweir && p != pEnd && *p <= 0x7F) 231*cdf0e10cSrcweir { 232*cdf0e10cSrcweir aBuf.append(static_cast< char >(*p++)); 233*cdf0e10cSrcweir } 234*cdf0e10cSrcweir else 235*cdf0e10cSrcweir { 236*cdf0e10cSrcweir OSL_ASSERT( 237*cdf0e10cSrcweir (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL) 238*cdf0e10cSrcweir == 0); 239*cdf0e10cSrcweir break; 240*cdf0e10cSrcweir } 241*cdf0e10cSrcweir } 242*cdf0e10cSrcweir rtl_destroyTextToUnicodeConverter(aConverter); 243*cdf0e10cSrcweir *pType = EscapeOctet; 244*cdf0e10cSrcweir } 245*cdf0e10cSrcweir return nChar; 246*cdf0e10cSrcweir } 247*cdf0e10cSrcweir else 248*cdf0e10cSrcweir { 249*cdf0e10cSrcweir *pType = EscapeNo; 250*cdf0e10cSrcweir return isHighSurrogate(nChar) && *pBegin < pEnd 251*cdf0e10cSrcweir && isLowSurrogate(**pBegin) ? 252*cdf0e10cSrcweir combineSurrogates(nChar, *(*pBegin)++) : nChar; 253*cdf0e10cSrcweir } 254*cdf0e10cSrcweir } 255*cdf0e10cSrcweir 256*cdf0e10cSrcweir void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32) 257*cdf0e10cSrcweir { 258*cdf0e10cSrcweir OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char"); 259*cdf0e10cSrcweir if (nUtf32 <= 0xFFFF) { 260*cdf0e10cSrcweir writeUnicode( 261*cdf0e10cSrcweir pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32)); 262*cdf0e10cSrcweir } else { 263*cdf0e10cSrcweir nUtf32 -= 0x10000; 264*cdf0e10cSrcweir writeUnicode( 265*cdf0e10cSrcweir pBuffer, pCapacity, 266*cdf0e10cSrcweir static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800)); 267*cdf0e10cSrcweir writeUnicode( 268*cdf0e10cSrcweir pBuffer, pCapacity, 269*cdf0e10cSrcweir static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00)); 270*cdf0e10cSrcweir } 271*cdf0e10cSrcweir } 272*cdf0e10cSrcweir 273*cdf0e10cSrcweir void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity, 274*cdf0e10cSrcweir sal_uInt32 nOctet) 275*cdf0e10cSrcweir { 276*cdf0e10cSrcweir OSL_ENSURE(nOctet <= 0xFF, "bad octet"); 277*cdf0e10cSrcweir 278*cdf0e10cSrcweir static sal_Unicode const aHex[16] 279*cdf0e10cSrcweir = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 280*cdf0e10cSrcweir 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */ 281*cdf0e10cSrcweir 282*cdf0e10cSrcweir writeUnicode(pBuffer, pCapacity, cEscapePrefix); 283*cdf0e10cSrcweir writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]); 284*cdf0e10cSrcweir writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]); 285*cdf0e10cSrcweir } 286*cdf0e10cSrcweir 287*cdf0e10cSrcweir bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity, 288*cdf0e10cSrcweir sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict) 289*cdf0e10cSrcweir { 290*cdf0e10cSrcweir OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char"); 291*cdf0e10cSrcweir if (eCharset == RTL_TEXTENCODING_UTF8) { 292*cdf0e10cSrcweir if (nUtf32 < 0x80) 293*cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, nUtf32); 294*cdf0e10cSrcweir else if (nUtf32 < 0x800) 295*cdf0e10cSrcweir { 296*cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0); 297*cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); 298*cdf0e10cSrcweir } 299*cdf0e10cSrcweir else if (nUtf32 < 0x10000) 300*cdf0e10cSrcweir { 301*cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0); 302*cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80); 303*cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); 304*cdf0e10cSrcweir } 305*cdf0e10cSrcweir else 306*cdf0e10cSrcweir { 307*cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0); 308*cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80); 309*cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80); 310*cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); 311*cdf0e10cSrcweir } 312*cdf0e10cSrcweir } else { 313*cdf0e10cSrcweir rtl_UnicodeToTextConverter aConverter 314*cdf0e10cSrcweir = rtl_createUnicodeToTextConverter(eCharset); 315*cdf0e10cSrcweir sal_Unicode aSrc[2]; 316*cdf0e10cSrcweir sal_Size nSrcSize; 317*cdf0e10cSrcweir if (nUtf32 <= 0xFFFF) 318*cdf0e10cSrcweir { 319*cdf0e10cSrcweir aSrc[0] = static_cast< sal_Unicode >(nUtf32); 320*cdf0e10cSrcweir nSrcSize = 1; 321*cdf0e10cSrcweir } 322*cdf0e10cSrcweir else 323*cdf0e10cSrcweir { 324*cdf0e10cSrcweir aSrc[0] = static_cast< sal_Unicode >( 325*cdf0e10cSrcweir ((nUtf32 - 0x10000) >> 10) | 0xD800); 326*cdf0e10cSrcweir aSrc[1] = static_cast< sal_Unicode >( 327*cdf0e10cSrcweir ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00); 328*cdf0e10cSrcweir nSrcSize = 2; 329*cdf0e10cSrcweir } 330*cdf0e10cSrcweir sal_Char aDst[32]; // FIXME random value 331*cdf0e10cSrcweir sal_uInt32 nInfo; 332*cdf0e10cSrcweir sal_Size nConverted; 333*cdf0e10cSrcweir sal_Size nDstSize = rtl_convertUnicodeToText( 334*cdf0e10cSrcweir aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst, 335*cdf0e10cSrcweir RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR 336*cdf0e10cSrcweir | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR 337*cdf0e10cSrcweir | RTL_UNICODETOTEXT_FLAGS_FLUSH, 338*cdf0e10cSrcweir &nInfo, &nConverted); 339*cdf0e10cSrcweir OSL_ASSERT((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0); 340*cdf0e10cSrcweir rtl_destroyUnicodeToTextConverter(aConverter); 341*cdf0e10cSrcweir if (nInfo == 0) { 342*cdf0e10cSrcweir OSL_ENSURE(nConverted == nSrcSize, "bad rtl_convertUnicodeToText"); 343*cdf0e10cSrcweir for (sal_Size i = 0; i < nDstSize; ++i) 344*cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, 345*cdf0e10cSrcweir static_cast< unsigned char >(aDst[i])); 346*cdf0e10cSrcweir // FIXME all octets are escaped, even if there is no need 347*cdf0e10cSrcweir } else { 348*cdf0e10cSrcweir if (bStrict) { 349*cdf0e10cSrcweir return false; 350*cdf0e10cSrcweir } else { 351*cdf0e10cSrcweir writeUcs4(pBuffer, pCapacity, nUtf32); 352*cdf0e10cSrcweir } 353*cdf0e10cSrcweir } 354*cdf0e10cSrcweir } 355*cdf0e10cSrcweir return true; 356*cdf0e10cSrcweir } 357*cdf0e10cSrcweir 358*cdf0e10cSrcweir struct Component 359*cdf0e10cSrcweir { 360*cdf0e10cSrcweir sal_Unicode const * pBegin; 361*cdf0e10cSrcweir sal_Unicode const * pEnd; 362*cdf0e10cSrcweir 363*cdf0e10cSrcweir inline Component(): pBegin(0) {} 364*cdf0e10cSrcweir 365*cdf0e10cSrcweir inline bool isPresent() const { return pBegin != 0; } 366*cdf0e10cSrcweir 367*cdf0e10cSrcweir inline sal_Int32 getLength() const; 368*cdf0e10cSrcweir }; 369*cdf0e10cSrcweir 370*cdf0e10cSrcweir inline sal_Int32 Component::getLength() const 371*cdf0e10cSrcweir { 372*cdf0e10cSrcweir OSL_ENSURE(isPresent(), "taking length of non-present component"); 373*cdf0e10cSrcweir return static_cast< sal_Int32 >(pEnd - pBegin); 374*cdf0e10cSrcweir } 375*cdf0e10cSrcweir 376*cdf0e10cSrcweir struct Components 377*cdf0e10cSrcweir { 378*cdf0e10cSrcweir Component aScheme; 379*cdf0e10cSrcweir Component aAuthority; 380*cdf0e10cSrcweir Component aPath; 381*cdf0e10cSrcweir Component aQuery; 382*cdf0e10cSrcweir Component aFragment; 383*cdf0e10cSrcweir }; 384*cdf0e10cSrcweir 385*cdf0e10cSrcweir void parseUriRef(rtl_uString const * pUriRef, Components * pComponents) 386*cdf0e10cSrcweir { 387*cdf0e10cSrcweir // This algorithm is liberal and accepts various forms of illegal input. 388*cdf0e10cSrcweir 389*cdf0e10cSrcweir sal_Unicode const * pBegin = pUriRef->buffer; 390*cdf0e10cSrcweir sal_Unicode const * pEnd = pBegin + pUriRef->length; 391*cdf0e10cSrcweir sal_Unicode const * pPos = pBegin; 392*cdf0e10cSrcweir 393*cdf0e10cSrcweir if (pPos != pEnd && isAlpha(*pPos)) 394*cdf0e10cSrcweir for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p) 395*cdf0e10cSrcweir if (*p == ':') 396*cdf0e10cSrcweir { 397*cdf0e10cSrcweir pComponents->aScheme.pBegin = pBegin; 398*cdf0e10cSrcweir pComponents->aScheme.pEnd = ++p; 399*cdf0e10cSrcweir pPos = p; 400*cdf0e10cSrcweir break; 401*cdf0e10cSrcweir } 402*cdf0e10cSrcweir else if (!isAlpha(*p) && !isDigit(*p) && *p != '+' && *p != '-' 403*cdf0e10cSrcweir && *p != '.') 404*cdf0e10cSrcweir break; 405*cdf0e10cSrcweir 406*cdf0e10cSrcweir if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/') 407*cdf0e10cSrcweir { 408*cdf0e10cSrcweir pComponents->aAuthority.pBegin = pPos; 409*cdf0e10cSrcweir pPos += 2; 410*cdf0e10cSrcweir while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#') 411*cdf0e10cSrcweir ++pPos; 412*cdf0e10cSrcweir pComponents->aAuthority.pEnd = pPos; 413*cdf0e10cSrcweir } 414*cdf0e10cSrcweir 415*cdf0e10cSrcweir pComponents->aPath.pBegin = pPos; 416*cdf0e10cSrcweir while (pPos != pEnd && *pPos != '?' && * pPos != '#') 417*cdf0e10cSrcweir ++pPos; 418*cdf0e10cSrcweir pComponents->aPath.pEnd = pPos; 419*cdf0e10cSrcweir 420*cdf0e10cSrcweir if (pPos != pEnd && *pPos == '?') 421*cdf0e10cSrcweir { 422*cdf0e10cSrcweir pComponents->aQuery.pBegin = pPos++; 423*cdf0e10cSrcweir while (pPos != pEnd && * pPos != '#') 424*cdf0e10cSrcweir ++pPos; 425*cdf0e10cSrcweir pComponents->aQuery.pEnd = pPos; 426*cdf0e10cSrcweir } 427*cdf0e10cSrcweir 428*cdf0e10cSrcweir if (pPos != pEnd) 429*cdf0e10cSrcweir { 430*cdf0e10cSrcweir OSL_ASSERT(*pPos == '#'); 431*cdf0e10cSrcweir pComponents->aFragment.pBegin = pPos; 432*cdf0e10cSrcweir pComponents->aFragment.pEnd = pEnd; 433*cdf0e10cSrcweir } 434*cdf0e10cSrcweir } 435*cdf0e10cSrcweir 436*cdf0e10cSrcweir rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath) 437*cdf0e10cSrcweir { 438*cdf0e10cSrcweir OSL_ASSERT(rBasePath.isPresent() && *rBasePath.pBegin == '/'); 439*cdf0e10cSrcweir OSL_ASSERT(rRelPath.isPresent()); 440*cdf0e10cSrcweir 441*cdf0e10cSrcweir // The invariant of aBuffer is that it always starts and ends with a slash 442*cdf0e10cSrcweir // (until probably right at the end of the algorithm, when the last segment 443*cdf0e10cSrcweir // of rRelPath is added, which does not necessarily end in a slash): 444*cdf0e10cSrcweir rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength()); 445*cdf0e10cSrcweir // XXX numeric overflow 446*cdf0e10cSrcweir 447*cdf0e10cSrcweir // Segments "." and ".." within rBasePath are not conisdered special (but 448*cdf0e10cSrcweir // are also not removed by ".." segments within rRelPath), RFC 2396 seems a 449*cdf0e10cSrcweir // bit unclear about this point: 450*cdf0e10cSrcweir sal_Int32 nFixed = 1; 451*cdf0e10cSrcweir sal_Unicode const * p = rBasePath.pBegin + 1; 452*cdf0e10cSrcweir for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q) 453*cdf0e10cSrcweir if (*q == '/') 454*cdf0e10cSrcweir { 455*cdf0e10cSrcweir if ( 456*cdf0e10cSrcweir (q - p == 1 && p[0] == '.') || 457*cdf0e10cSrcweir (q - p == 2 && p[0] == '.' && p[1] == '.') 458*cdf0e10cSrcweir ) 459*cdf0e10cSrcweir { 460*cdf0e10cSrcweir nFixed = q + 1 - rBasePath.pBegin; 461*cdf0e10cSrcweir } 462*cdf0e10cSrcweir p = q + 1; 463*cdf0e10cSrcweir } 464*cdf0e10cSrcweir aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin); 465*cdf0e10cSrcweir 466*cdf0e10cSrcweir p = rRelPath.pBegin; 467*cdf0e10cSrcweir if (p != rRelPath.pEnd) 468*cdf0e10cSrcweir for (;;) 469*cdf0e10cSrcweir { 470*cdf0e10cSrcweir sal_Unicode const * q = p; 471*cdf0e10cSrcweir sal_Unicode const * r; 472*cdf0e10cSrcweir for (;;) 473*cdf0e10cSrcweir { 474*cdf0e10cSrcweir if (q == rRelPath.pEnd) 475*cdf0e10cSrcweir { 476*cdf0e10cSrcweir r = q; 477*cdf0e10cSrcweir break; 478*cdf0e10cSrcweir } 479*cdf0e10cSrcweir if (*q == '/') 480*cdf0e10cSrcweir { 481*cdf0e10cSrcweir r = q + 1; 482*cdf0e10cSrcweir break; 483*cdf0e10cSrcweir } 484*cdf0e10cSrcweir ++q; 485*cdf0e10cSrcweir } 486*cdf0e10cSrcweir if (q - p == 2 && p[0] == '.' && p[1] == '.') 487*cdf0e10cSrcweir { 488*cdf0e10cSrcweir // Erroneous excess segments ".." within rRelPath are left 489*cdf0e10cSrcweir // intact, as the examples in RFC 2396, section C.2, suggest: 490*cdf0e10cSrcweir sal_Int32 i = aBuffer.getLength() - 1; 491*cdf0e10cSrcweir if (i < nFixed) 492*cdf0e10cSrcweir { 493*cdf0e10cSrcweir aBuffer.append(p, r - p); 494*cdf0e10cSrcweir nFixed += 3; 495*cdf0e10cSrcweir } 496*cdf0e10cSrcweir else 497*cdf0e10cSrcweir { 498*cdf0e10cSrcweir while (aBuffer.charAt(i - 1) != '/') 499*cdf0e10cSrcweir --i; 500*cdf0e10cSrcweir aBuffer.setLength(i); 501*cdf0e10cSrcweir } 502*cdf0e10cSrcweir } 503*cdf0e10cSrcweir else if (q - p != 1 || *p != '.') 504*cdf0e10cSrcweir aBuffer.append(p, r - p); 505*cdf0e10cSrcweir if (q == rRelPath.pEnd) 506*cdf0e10cSrcweir break; 507*cdf0e10cSrcweir p = q + 1; 508*cdf0e10cSrcweir } 509*cdf0e10cSrcweir 510*cdf0e10cSrcweir return aBuffer.makeStringAndClear(); 511*cdf0e10cSrcweir } 512*cdf0e10cSrcweir 513*cdf0e10cSrcweir } 514*cdf0e10cSrcweir 515*cdf0e10cSrcweir sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass) 516*cdf0e10cSrcweir SAL_THROW_EXTERN_C() 517*cdf0e10cSrcweir { 518*cdf0e10cSrcweir static sal_Bool const aCharClass[][nCharClassSize] 519*cdf0e10cSrcweir = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */ 520*cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 521*cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/ 522*cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/ 523*cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/ 524*cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/ 525*cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/ 526*cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */ 527*cdf0e10cSrcweir }, 528*cdf0e10cSrcweir { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */ 529*cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 530*cdf0e10cSrcweir 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/ 531*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/ 532*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 533*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 534*cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 535*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 536*cdf0e10cSrcweir }, 537*cdf0e10cSrcweir { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */ 538*cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 539*cdf0e10cSrcweir 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 540*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/ 541*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 542*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 543*cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 544*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 545*cdf0e10cSrcweir }, 546*cdf0e10cSrcweir { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */ 547*cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 548*cdf0e10cSrcweir 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 549*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ 550*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 551*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 552*cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 553*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 554*cdf0e10cSrcweir }, 555*cdf0e10cSrcweir { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */ 556*cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 557*cdf0e10cSrcweir 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 558*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ 559*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 560*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 561*cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 562*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 563*cdf0e10cSrcweir }, 564*cdf0e10cSrcweir { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */ 565*cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 566*cdf0e10cSrcweir 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 567*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ 568*cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 569*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 570*cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 571*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 572*cdf0e10cSrcweir }, 573*cdf0e10cSrcweir { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */ 574*cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 575*cdf0e10cSrcweir 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 576*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/ 577*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 578*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 579*cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 580*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 581*cdf0e10cSrcweir }, 582*cdf0e10cSrcweir { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */ 583*cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 584*cdf0e10cSrcweir 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/ 585*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/ 586*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 587*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 588*cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 589*cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 590*cdf0e10cSrcweir }}; 591*cdf0e10cSrcweir OSL_ENSURE( 592*cdf0e10cSrcweir (eCharClass >= 0 593*cdf0e10cSrcweir && (sal::static_int_cast< std::size_t >(eCharClass) 594*cdf0e10cSrcweir < sizeof aCharClass / sizeof aCharClass[0])), 595*cdf0e10cSrcweir "bad eCharClass"); 596*cdf0e10cSrcweir return aCharClass[eCharClass]; 597*cdf0e10cSrcweir } 598*cdf0e10cSrcweir 599*cdf0e10cSrcweir void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass, 600*cdf0e10cSrcweir rtl_UriEncodeMechanism eMechanism, 601*cdf0e10cSrcweir rtl_TextEncoding eCharset, rtl_uString ** pResult) 602*cdf0e10cSrcweir SAL_THROW_EXTERN_C() 603*cdf0e10cSrcweir { 604*cdf0e10cSrcweir OSL_ENSURE(!pCharClass[0x25], "bad pCharClass"); 605*cdf0e10cSrcweir // make sure the percent sign is encoded... 606*cdf0e10cSrcweir 607*cdf0e10cSrcweir sal_Unicode const * p = pText->buffer; 608*cdf0e10cSrcweir sal_Unicode const * pEnd = p + pText->length; 609*cdf0e10cSrcweir sal_Int32 nCapacity = 0; 610*cdf0e10cSrcweir rtl_uString_new(pResult); 611*cdf0e10cSrcweir while (p < pEnd) 612*cdf0e10cSrcweir { 613*cdf0e10cSrcweir EscapeType eType; 614*cdf0e10cSrcweir sal_uInt32 nUtf32 = readUcs4( 615*cdf0e10cSrcweir &p, pEnd, 616*cdf0e10cSrcweir (eMechanism == rtl_UriEncodeKeepEscapes 617*cdf0e10cSrcweir || eMechanism == rtl_UriEncodeCheckEscapes 618*cdf0e10cSrcweir || eMechanism == rtl_UriEncodeStrictKeepEscapes), 619*cdf0e10cSrcweir eCharset, &eType); 620*cdf0e10cSrcweir switch (eType) 621*cdf0e10cSrcweir { 622*cdf0e10cSrcweir case EscapeNo: 623*cdf0e10cSrcweir if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F 624*cdf0e10cSrcweir writeUnicode(pResult, &nCapacity, 625*cdf0e10cSrcweir static_cast< sal_Unicode >(nUtf32)); 626*cdf0e10cSrcweir else if (!writeEscapeChar( 627*cdf0e10cSrcweir pResult, &nCapacity, nUtf32, eCharset, 628*cdf0e10cSrcweir (eMechanism == rtl_UriEncodeStrict 629*cdf0e10cSrcweir || eMechanism == rtl_UriEncodeStrictKeepEscapes))) 630*cdf0e10cSrcweir { 631*cdf0e10cSrcweir rtl_uString_new(pResult); 632*cdf0e10cSrcweir return; 633*cdf0e10cSrcweir } 634*cdf0e10cSrcweir break; 635*cdf0e10cSrcweir 636*cdf0e10cSrcweir case EscapeChar: 637*cdf0e10cSrcweir if (eMechanism == rtl_UriEncodeCheckEscapes 638*cdf0e10cSrcweir && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F 639*cdf0e10cSrcweir writeUnicode(pResult, &nCapacity, 640*cdf0e10cSrcweir static_cast< sal_Unicode >(nUtf32)); 641*cdf0e10cSrcweir else if (!writeEscapeChar( 642*cdf0e10cSrcweir pResult, &nCapacity, nUtf32, eCharset, 643*cdf0e10cSrcweir (eMechanism == rtl_UriEncodeStrict 644*cdf0e10cSrcweir || eMechanism == rtl_UriEncodeStrictKeepEscapes))) 645*cdf0e10cSrcweir { 646*cdf0e10cSrcweir rtl_uString_new(pResult); 647*cdf0e10cSrcweir return; 648*cdf0e10cSrcweir } 649*cdf0e10cSrcweir break; 650*cdf0e10cSrcweir 651*cdf0e10cSrcweir case EscapeOctet: 652*cdf0e10cSrcweir writeEscapeOctet(pResult, &nCapacity, nUtf32); 653*cdf0e10cSrcweir break; 654*cdf0e10cSrcweir } 655*cdf0e10cSrcweir } 656*cdf0e10cSrcweir } 657*cdf0e10cSrcweir 658*cdf0e10cSrcweir void SAL_CALL rtl_uriDecode(rtl_uString * pText, 659*cdf0e10cSrcweir rtl_UriDecodeMechanism eMechanism, 660*cdf0e10cSrcweir rtl_TextEncoding eCharset, rtl_uString ** pResult) 661*cdf0e10cSrcweir SAL_THROW_EXTERN_C() 662*cdf0e10cSrcweir { 663*cdf0e10cSrcweir switch (eMechanism) 664*cdf0e10cSrcweir { 665*cdf0e10cSrcweir case rtl_UriDecodeNone: 666*cdf0e10cSrcweir rtl_uString_assign(pResult, pText); 667*cdf0e10cSrcweir break; 668*cdf0e10cSrcweir 669*cdf0e10cSrcweir case rtl_UriDecodeToIuri: 670*cdf0e10cSrcweir eCharset = RTL_TEXTENCODING_UTF8; 671*cdf0e10cSrcweir default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict 672*cdf0e10cSrcweir { 673*cdf0e10cSrcweir sal_Unicode const * p = pText->buffer; 674*cdf0e10cSrcweir sal_Unicode const * pEnd = p + pText->length; 675*cdf0e10cSrcweir sal_Int32 nCapacity = 0; 676*cdf0e10cSrcweir rtl_uString_new(pResult); 677*cdf0e10cSrcweir while (p < pEnd) 678*cdf0e10cSrcweir { 679*cdf0e10cSrcweir EscapeType eType; 680*cdf0e10cSrcweir sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType); 681*cdf0e10cSrcweir switch (eType) 682*cdf0e10cSrcweir { 683*cdf0e10cSrcweir case EscapeChar: 684*cdf0e10cSrcweir if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri) 685*cdf0e10cSrcweir { 686*cdf0e10cSrcweir writeEscapeOctet(pResult, &nCapacity, nUtf32); 687*cdf0e10cSrcweir break; 688*cdf0e10cSrcweir } 689*cdf0e10cSrcweir case EscapeNo: 690*cdf0e10cSrcweir writeUcs4(pResult, &nCapacity, nUtf32); 691*cdf0e10cSrcweir break; 692*cdf0e10cSrcweir 693*cdf0e10cSrcweir case EscapeOctet: 694*cdf0e10cSrcweir if (eMechanism == rtl_UriDecodeStrict) { 695*cdf0e10cSrcweir rtl_uString_new(pResult); 696*cdf0e10cSrcweir return; 697*cdf0e10cSrcweir } 698*cdf0e10cSrcweir writeEscapeOctet(pResult, &nCapacity, nUtf32); 699*cdf0e10cSrcweir break; 700*cdf0e10cSrcweir } 701*cdf0e10cSrcweir } 702*cdf0e10cSrcweir } 703*cdf0e10cSrcweir break; 704*cdf0e10cSrcweir } 705*cdf0e10cSrcweir } 706*cdf0e10cSrcweir 707*cdf0e10cSrcweir sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef, 708*cdf0e10cSrcweir rtl_uString * pRelUriRef, 709*cdf0e10cSrcweir rtl_uString ** pResult, 710*cdf0e10cSrcweir rtl_uString ** pException) 711*cdf0e10cSrcweir SAL_THROW_EXTERN_C() 712*cdf0e10cSrcweir { 713*cdf0e10cSrcweir // If pRelUriRef starts with a scheme component it is an absolute URI 714*cdf0e10cSrcweir // reference, and we are done (i.e., this algorithm does not support 715*cdf0e10cSrcweir // backwards-compatible relative URIs starting with a scheme component, see 716*cdf0e10cSrcweir // RFC 2396, section 5.2, step 3): 717*cdf0e10cSrcweir Components aRelComponents; 718*cdf0e10cSrcweir parseUriRef(pRelUriRef, &aRelComponents); 719*cdf0e10cSrcweir if (aRelComponents.aScheme.isPresent()) 720*cdf0e10cSrcweir { 721*cdf0e10cSrcweir rtl_uString_assign(pResult, pRelUriRef); 722*cdf0e10cSrcweir return true; 723*cdf0e10cSrcweir } 724*cdf0e10cSrcweir 725*cdf0e10cSrcweir // Parse pBaseUriRef; if the scheme component is not present or not valid, 726*cdf0e10cSrcweir // or the path component is not empty and starts with anything but a slash, 727*cdf0e10cSrcweir // an exception is raised: 728*cdf0e10cSrcweir Components aBaseComponents; 729*cdf0e10cSrcweir parseUriRef(pBaseUriRef, &aBaseComponents); 730*cdf0e10cSrcweir if (!aBaseComponents.aScheme.isPresent()) 731*cdf0e10cSrcweir { 732*cdf0e10cSrcweir rtl::OUString aMessage(pBaseUriRef); 733*cdf0e10cSrcweir aMessage += rtl::OUString( 734*cdf0e10cSrcweir RTL_CONSTASCII_USTRINGPARAM( 735*cdf0e10cSrcweir " does not start with a scheme component")); 736*cdf0e10cSrcweir rtl_uString_assign(pException, 737*cdf0e10cSrcweir const_cast< rtl::OUString & >(aMessage).pData); 738*cdf0e10cSrcweir return false; 739*cdf0e10cSrcweir } 740*cdf0e10cSrcweir if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd 741*cdf0e10cSrcweir && *aBaseComponents.aPath.pBegin != '/') 742*cdf0e10cSrcweir { 743*cdf0e10cSrcweir rtl::OUString aMessage(pBaseUriRef); 744*cdf0e10cSrcweir aMessage += rtl::OUString( 745*cdf0e10cSrcweir RTL_CONSTASCII_USTRINGPARAM( 746*cdf0e10cSrcweir "path component does not start with slash")); 747*cdf0e10cSrcweir rtl_uString_assign(pException, aMessage.pData); 748*cdf0e10cSrcweir return false; 749*cdf0e10cSrcweir } 750*cdf0e10cSrcweir 751*cdf0e10cSrcweir // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI 752*cdf0e10cSrcweir // into an absolute one (if the relative URI is a reference to the "current 753*cdf0e10cSrcweir // document," the "current document" is here taken to be the base URI): 754*cdf0e10cSrcweir rtl::OUStringBuffer aBuffer; 755*cdf0e10cSrcweir aBuffer.append(aBaseComponents.aScheme.pBegin, 756*cdf0e10cSrcweir aBaseComponents.aScheme.getLength()); 757*cdf0e10cSrcweir if (aRelComponents.aAuthority.isPresent()) 758*cdf0e10cSrcweir { 759*cdf0e10cSrcweir aBuffer.append(aRelComponents.aAuthority.pBegin, 760*cdf0e10cSrcweir aRelComponents.aAuthority.getLength()); 761*cdf0e10cSrcweir aBuffer.append(aRelComponents.aPath.pBegin, 762*cdf0e10cSrcweir aRelComponents.aPath.getLength()); 763*cdf0e10cSrcweir if (aRelComponents.aQuery.isPresent()) 764*cdf0e10cSrcweir aBuffer.append(aRelComponents.aQuery.pBegin, 765*cdf0e10cSrcweir aRelComponents.aQuery.getLength()); 766*cdf0e10cSrcweir } 767*cdf0e10cSrcweir else 768*cdf0e10cSrcweir { 769*cdf0e10cSrcweir if (aBaseComponents.aAuthority.isPresent()) 770*cdf0e10cSrcweir aBuffer.append(aBaseComponents.aAuthority.pBegin, 771*cdf0e10cSrcweir aBaseComponents.aAuthority.getLength()); 772*cdf0e10cSrcweir if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd 773*cdf0e10cSrcweir && !aRelComponents.aQuery.isPresent()) 774*cdf0e10cSrcweir { 775*cdf0e10cSrcweir aBuffer.append(aBaseComponents.aPath.pBegin, 776*cdf0e10cSrcweir aBaseComponents.aPath.getLength()); 777*cdf0e10cSrcweir if (aBaseComponents.aQuery.isPresent()) 778*cdf0e10cSrcweir aBuffer.append(aBaseComponents.aQuery.pBegin, 779*cdf0e10cSrcweir aBaseComponents.aQuery.getLength()); 780*cdf0e10cSrcweir } 781*cdf0e10cSrcweir else 782*cdf0e10cSrcweir { 783*cdf0e10cSrcweir if (*aRelComponents.aPath.pBegin == '/') 784*cdf0e10cSrcweir aBuffer.append(aRelComponents.aPath.pBegin, 785*cdf0e10cSrcweir aRelComponents.aPath.getLength()); 786*cdf0e10cSrcweir else 787*cdf0e10cSrcweir aBuffer.append(joinPaths(aBaseComponents.aPath, 788*cdf0e10cSrcweir aRelComponents.aPath)); 789*cdf0e10cSrcweir if (aRelComponents.aQuery.isPresent()) 790*cdf0e10cSrcweir aBuffer.append(aRelComponents.aQuery.pBegin, 791*cdf0e10cSrcweir aRelComponents.aQuery.getLength()); 792*cdf0e10cSrcweir } 793*cdf0e10cSrcweir } 794*cdf0e10cSrcweir if (aRelComponents.aFragment.isPresent()) 795*cdf0e10cSrcweir aBuffer.append(aRelComponents.aFragment.pBegin, 796*cdf0e10cSrcweir aRelComponents.aFragment.getLength()); 797*cdf0e10cSrcweir rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData); 798*cdf0e10cSrcweir return true; 799*cdf0e10cSrcweir } 800