1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_sal.hxx" 30 31 #include "rtl/uri.h" 32 33 #include "surrogates.h" 34 35 #include "osl/diagnose.h" 36 #include "rtl/strbuf.hxx" 37 #include "rtl/textenc.h" 38 #include "rtl/textcvt.h" 39 #include "rtl/uri.h" 40 #include "rtl/ustrbuf.h" 41 #include "rtl/ustrbuf.hxx" 42 #include "rtl/ustring.h" 43 #include "rtl/ustring.hxx" 44 #include "sal/types.h" 45 46 #include <cstddef> 47 48 namespace { 49 50 std::size_t const nCharClassSize = 128; 51 52 sal_Unicode const cEscapePrefix = 0x25; // '%' 53 54 inline bool isDigit(sal_uInt32 nUtf32) 55 { 56 return nUtf32 >= 0x30 && nUtf32 <= 0x39; // '0'--'9' 57 } 58 59 inline bool isAlpha(sal_uInt32 nUtf32) 60 { 61 // 'A'--'Z', 'a'--'z' 62 return ( 63 (nUtf32 >= 0x41 && nUtf32 <= 0x5A) || 64 (nUtf32 >= 0x61 && nUtf32 <= 0x7A) 65 ); 66 } 67 68 inline bool isHighSurrogate(sal_uInt32 nUtf16) 69 { 70 return SAL_RTL_IS_HIGH_SURROGATE(nUtf16); 71 } 72 73 inline bool isLowSurrogate(sal_uInt32 nUtf16) 74 { 75 return SAL_RTL_IS_LOW_SURROGATE(nUtf16); 76 } 77 78 inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) 79 { 80 return SAL_RTL_COMBINE_SURROGATES(high, low); 81 } 82 83 inline int getHexWeight(sal_uInt32 nUtf32) 84 { 85 return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9' 86 static_cast< int >(nUtf32 - 0x30) : 87 nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F' 88 static_cast< int >(nUtf32 - 0x41 + 10) : 89 nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f' 90 static_cast< int >(nUtf32 - 0x61 + 10) : 91 -1; // not a hex digit 92 } 93 94 inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32) 95 { 96 return nUtf32 < nCharClassSize && pCharClass[nUtf32]; 97 } 98 99 inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity, 100 sal_Unicode cChar) 101 { 102 rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1); 103 } 104 105 enum EscapeType 106 { 107 EscapeNo, 108 EscapeChar, 109 EscapeOctet 110 }; 111 112 /* Read any of the following: 113 114 - sequence of escape sequences representing character from eCharset, 115 translated to single UCS4 character; or 116 117 - pair of UTF-16 surrogates, translated to single UCS4 character; or 118 119 _ single UTF-16 character, extended to UCS4 character. 120 */ 121 sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd, 122 bool bEncoded, rtl_TextEncoding eCharset, 123 EscapeType * pType) 124 { 125 sal_uInt32 nChar = *(*pBegin)++; 126 int nWeight1; 127 int nWeight2; 128 if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2 129 && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0 130 && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0) 131 { 132 *pBegin += 2; 133 nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2); 134 if (nChar <= 0x7F) 135 *pType = EscapeChar; 136 else if (eCharset == RTL_TEXTENCODING_UTF8) 137 { 138 if (nChar >= 0xC0 && nChar <= 0xF4) 139 { 140 sal_uInt32 nEncoded; 141 int nShift; 142 sal_uInt32 nMin; 143 if (nChar <= 0xDF) 144 { 145 nEncoded = (nChar & 0x1F) << 6; 146 nShift = 0; 147 nMin = 0x80; 148 } 149 else if (nChar <= 0xEF) 150 { 151 nEncoded = (nChar & 0x0F) << 12; 152 nShift = 6; 153 nMin = 0x800; 154 } 155 else 156 { 157 nEncoded = (nChar & 0x07) << 18; 158 nShift = 12; 159 nMin = 0x10000; 160 } 161 sal_Unicode const * p = *pBegin; 162 bool bUTF8 = true; 163 for (; nShift >= 0; nShift -= 6) 164 { 165 if (pEnd - p < 3 || p[0] != cEscapePrefix 166 || (nWeight1 = getHexWeight(p[1])) < 8 167 || nWeight1 > 11 168 || (nWeight2 = getHexWeight(p[2])) < 0) 169 { 170 bUTF8 = sal_False; 171 break; 172 } 173 p += 3; 174 nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift; 175 } 176 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded) 177 && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF) 178 { 179 *pBegin = p; 180 *pType = EscapeChar; 181 return nEncoded; 182 } 183 } 184 *pType = EscapeOctet; 185 } 186 else 187 { 188 rtl::OStringBuffer aBuf; 189 aBuf.append(static_cast< char >(nChar)); 190 rtl_TextToUnicodeConverter aConverter 191 = rtl_createTextToUnicodeConverter(eCharset); 192 sal_Unicode const * p = *pBegin; 193 for (;;) 194 { 195 sal_Unicode aDst[2]; 196 sal_uInt32 nInfo; 197 sal_Size nConverted; 198 sal_Size nDstSize = rtl_convertTextToUnicode( 199 aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst, 200 sizeof aDst / sizeof aDst[0], 201 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR 202 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR 203 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR), 204 &nInfo, &nConverted); 205 if (nInfo == 0) 206 { 207 OSL_ASSERT( 208 nConverted 209 == sal::static_int_cast< sal_uInt32 >( 210 aBuf.getLength())); 211 rtl_destroyTextToUnicodeConverter(aConverter); 212 *pBegin = p; 213 *pType = EscapeChar; 214 OSL_ASSERT( 215 nDstSize == 1 216 || (nDstSize == 2 && isHighSurrogate(aDst[0]) 217 && isLowSurrogate(aDst[1]))); 218 return nDstSize == 1 219 ? aDst[0] : combineSurrogates(aDst[0], aDst[1]); 220 } 221 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL 222 && pEnd - p >= 3 && p[0] == cEscapePrefix 223 && (nWeight1 = getHexWeight(p[1])) >= 0 224 && (nWeight2 = getHexWeight(p[2])) >= 0) 225 { 226 p += 3; 227 aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2)); 228 } 229 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL 230 && p != pEnd && *p <= 0x7F) 231 { 232 aBuf.append(static_cast< char >(*p++)); 233 } 234 else 235 { 236 OSL_ASSERT( 237 (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL) 238 == 0); 239 break; 240 } 241 } 242 rtl_destroyTextToUnicodeConverter(aConverter); 243 *pType = EscapeOctet; 244 } 245 return nChar; 246 } 247 else 248 { 249 *pType = EscapeNo; 250 return isHighSurrogate(nChar) && *pBegin < pEnd 251 && isLowSurrogate(**pBegin) ? 252 combineSurrogates(nChar, *(*pBegin)++) : nChar; 253 } 254 } 255 256 void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32) 257 { 258 OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char"); 259 if (nUtf32 <= 0xFFFF) { 260 writeUnicode( 261 pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32)); 262 } else { 263 nUtf32 -= 0x10000; 264 writeUnicode( 265 pBuffer, pCapacity, 266 static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800)); 267 writeUnicode( 268 pBuffer, pCapacity, 269 static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00)); 270 } 271 } 272 273 void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity, 274 sal_uInt32 nOctet) 275 { 276 OSL_ENSURE(nOctet <= 0xFF, "bad octet"); 277 278 static sal_Unicode const aHex[16] 279 = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 280 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */ 281 282 writeUnicode(pBuffer, pCapacity, cEscapePrefix); 283 writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]); 284 writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]); 285 } 286 287 bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity, 288 sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict) 289 { 290 OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char"); 291 if (eCharset == RTL_TEXTENCODING_UTF8) { 292 if (nUtf32 < 0x80) 293 writeEscapeOctet(pBuffer, pCapacity, nUtf32); 294 else if (nUtf32 < 0x800) 295 { 296 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0); 297 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); 298 } 299 else if (nUtf32 < 0x10000) 300 { 301 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0); 302 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80); 303 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); 304 } 305 else 306 { 307 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0); 308 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80); 309 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80); 310 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); 311 } 312 } else { 313 rtl_UnicodeToTextConverter aConverter 314 = rtl_createUnicodeToTextConverter(eCharset); 315 sal_Unicode aSrc[2]; 316 sal_Size nSrcSize; 317 if (nUtf32 <= 0xFFFF) 318 { 319 aSrc[0] = static_cast< sal_Unicode >(nUtf32); 320 nSrcSize = 1; 321 } 322 else 323 { 324 aSrc[0] = static_cast< sal_Unicode >( 325 ((nUtf32 - 0x10000) >> 10) | 0xD800); 326 aSrc[1] = static_cast< sal_Unicode >( 327 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00); 328 nSrcSize = 2; 329 } 330 sal_Char aDst[32]; // FIXME random value 331 sal_uInt32 nInfo; 332 sal_Size nConverted; 333 sal_Size nDstSize = rtl_convertUnicodeToText( 334 aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst, 335 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR 336 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR 337 | RTL_UNICODETOTEXT_FLAGS_FLUSH, 338 &nInfo, &nConverted); 339 OSL_ASSERT((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0); 340 rtl_destroyUnicodeToTextConverter(aConverter); 341 if (nInfo == 0) { 342 OSL_ENSURE(nConverted == nSrcSize, "bad rtl_convertUnicodeToText"); 343 for (sal_Size i = 0; i < nDstSize; ++i) 344 writeEscapeOctet(pBuffer, pCapacity, 345 static_cast< unsigned char >(aDst[i])); 346 // FIXME all octets are escaped, even if there is no need 347 } else { 348 if (bStrict) { 349 return false; 350 } else { 351 writeUcs4(pBuffer, pCapacity, nUtf32); 352 } 353 } 354 } 355 return true; 356 } 357 358 struct Component 359 { 360 sal_Unicode const * pBegin; 361 sal_Unicode const * pEnd; 362 363 inline Component(): pBegin(0) {} 364 365 inline bool isPresent() const { return pBegin != 0; } 366 367 inline sal_Int32 getLength() const; 368 }; 369 370 inline sal_Int32 Component::getLength() const 371 { 372 OSL_ENSURE(isPresent(), "taking length of non-present component"); 373 return static_cast< sal_Int32 >(pEnd - pBegin); 374 } 375 376 struct Components 377 { 378 Component aScheme; 379 Component aAuthority; 380 Component aPath; 381 Component aQuery; 382 Component aFragment; 383 }; 384 385 void parseUriRef(rtl_uString const * pUriRef, Components * pComponents) 386 { 387 // This algorithm is liberal and accepts various forms of illegal input. 388 389 sal_Unicode const * pBegin = pUriRef->buffer; 390 sal_Unicode const * pEnd = pBegin + pUriRef->length; 391 sal_Unicode const * pPos = pBegin; 392 393 if (pPos != pEnd && isAlpha(*pPos)) 394 for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p) 395 if (*p == ':') 396 { 397 pComponents->aScheme.pBegin = pBegin; 398 pComponents->aScheme.pEnd = ++p; 399 pPos = p; 400 break; 401 } 402 else if (!isAlpha(*p) && !isDigit(*p) && *p != '+' && *p != '-' 403 && *p != '.') 404 break; 405 406 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/') 407 { 408 pComponents->aAuthority.pBegin = pPos; 409 pPos += 2; 410 while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#') 411 ++pPos; 412 pComponents->aAuthority.pEnd = pPos; 413 } 414 415 pComponents->aPath.pBegin = pPos; 416 while (pPos != pEnd && *pPos != '?' && * pPos != '#') 417 ++pPos; 418 pComponents->aPath.pEnd = pPos; 419 420 if (pPos != pEnd && *pPos == '?') 421 { 422 pComponents->aQuery.pBegin = pPos++; 423 while (pPos != pEnd && * pPos != '#') 424 ++pPos; 425 pComponents->aQuery.pEnd = pPos; 426 } 427 428 if (pPos != pEnd) 429 { 430 OSL_ASSERT(*pPos == '#'); 431 pComponents->aFragment.pBegin = pPos; 432 pComponents->aFragment.pEnd = pEnd; 433 } 434 } 435 436 rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath) 437 { 438 OSL_ASSERT(rBasePath.isPresent() && *rBasePath.pBegin == '/'); 439 OSL_ASSERT(rRelPath.isPresent()); 440 441 // The invariant of aBuffer is that it always starts and ends with a slash 442 // (until probably right at the end of the algorithm, when the last segment 443 // of rRelPath is added, which does not necessarily end in a slash): 444 rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength()); 445 // XXX numeric overflow 446 447 // Segments "." and ".." within rBasePath are not conisdered special (but 448 // are also not removed by ".." segments within rRelPath), RFC 2396 seems a 449 // bit unclear about this point: 450 sal_Int32 nFixed = 1; 451 sal_Unicode const * p = rBasePath.pBegin + 1; 452 for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q) 453 if (*q == '/') 454 { 455 if ( 456 (q - p == 1 && p[0] == '.') || 457 (q - p == 2 && p[0] == '.' && p[1] == '.') 458 ) 459 { 460 nFixed = q + 1 - rBasePath.pBegin; 461 } 462 p = q + 1; 463 } 464 aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin); 465 466 p = rRelPath.pBegin; 467 if (p != rRelPath.pEnd) 468 for (;;) 469 { 470 sal_Unicode const * q = p; 471 sal_Unicode const * r; 472 for (;;) 473 { 474 if (q == rRelPath.pEnd) 475 { 476 r = q; 477 break; 478 } 479 if (*q == '/') 480 { 481 r = q + 1; 482 break; 483 } 484 ++q; 485 } 486 if (q - p == 2 && p[0] == '.' && p[1] == '.') 487 { 488 // Erroneous excess segments ".." within rRelPath are left 489 // intact, as the examples in RFC 2396, section C.2, suggest: 490 sal_Int32 i = aBuffer.getLength() - 1; 491 if (i < nFixed) 492 { 493 aBuffer.append(p, r - p); 494 nFixed += 3; 495 } 496 else 497 { 498 while (aBuffer.charAt(i - 1) != '/') 499 --i; 500 aBuffer.setLength(i); 501 } 502 } 503 else if (q - p != 1 || *p != '.') 504 aBuffer.append(p, r - p); 505 if (q == rRelPath.pEnd) 506 break; 507 p = q + 1; 508 } 509 510 return aBuffer.makeStringAndClear(); 511 } 512 513 } 514 515 sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass) 516 SAL_THROW_EXTERN_C() 517 { 518 static sal_Bool const aCharClass[][nCharClassSize] 519 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */ 520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/ 522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/ 523 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/ 524 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/ 525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/ 526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */ 527 }, 528 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */ 529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 530 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/ 531 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/ 532 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 533 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 534 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 535 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 536 }, 537 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */ 538 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 539 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 540 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/ 541 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 542 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 543 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 544 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 545 }, 546 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */ 547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 548 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 549 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ 550 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 551 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 552 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 553 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 554 }, 555 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */ 556 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 557 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 558 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ 559 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 560 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 561 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 562 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 563 }, 564 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */ 565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 566 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 567 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ 568 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 569 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 570 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 571 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 572 }, 573 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */ 574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 575 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 576 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/ 577 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 578 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 579 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 580 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 581 }, 582 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */ 583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 584 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/ 585 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/ 586 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 587 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 588 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 589 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 590 }}; 591 OSL_ENSURE( 592 (eCharClass >= 0 593 && (sal::static_int_cast< std::size_t >(eCharClass) 594 < sizeof aCharClass / sizeof aCharClass[0])), 595 "bad eCharClass"); 596 return aCharClass[eCharClass]; 597 } 598 599 void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass, 600 rtl_UriEncodeMechanism eMechanism, 601 rtl_TextEncoding eCharset, rtl_uString ** pResult) 602 SAL_THROW_EXTERN_C() 603 { 604 OSL_ENSURE(!pCharClass[0x25], "bad pCharClass"); 605 // make sure the percent sign is encoded... 606 607 sal_Unicode const * p = pText->buffer; 608 sal_Unicode const * pEnd = p + pText->length; 609 sal_Int32 nCapacity = 0; 610 rtl_uString_new(pResult); 611 while (p < pEnd) 612 { 613 EscapeType eType; 614 sal_uInt32 nUtf32 = readUcs4( 615 &p, pEnd, 616 (eMechanism == rtl_UriEncodeKeepEscapes 617 || eMechanism == rtl_UriEncodeCheckEscapes 618 || eMechanism == rtl_UriEncodeStrictKeepEscapes), 619 eCharset, &eType); 620 switch (eType) 621 { 622 case EscapeNo: 623 if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F 624 writeUnicode(pResult, &nCapacity, 625 static_cast< sal_Unicode >(nUtf32)); 626 else if (!writeEscapeChar( 627 pResult, &nCapacity, nUtf32, eCharset, 628 (eMechanism == rtl_UriEncodeStrict 629 || eMechanism == rtl_UriEncodeStrictKeepEscapes))) 630 { 631 rtl_uString_new(pResult); 632 return; 633 } 634 break; 635 636 case EscapeChar: 637 if (eMechanism == rtl_UriEncodeCheckEscapes 638 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F 639 writeUnicode(pResult, &nCapacity, 640 static_cast< sal_Unicode >(nUtf32)); 641 else if (!writeEscapeChar( 642 pResult, &nCapacity, nUtf32, eCharset, 643 (eMechanism == rtl_UriEncodeStrict 644 || eMechanism == rtl_UriEncodeStrictKeepEscapes))) 645 { 646 rtl_uString_new(pResult); 647 return; 648 } 649 break; 650 651 case EscapeOctet: 652 writeEscapeOctet(pResult, &nCapacity, nUtf32); 653 break; 654 } 655 } 656 } 657 658 void SAL_CALL rtl_uriDecode(rtl_uString * pText, 659 rtl_UriDecodeMechanism eMechanism, 660 rtl_TextEncoding eCharset, rtl_uString ** pResult) 661 SAL_THROW_EXTERN_C() 662 { 663 switch (eMechanism) 664 { 665 case rtl_UriDecodeNone: 666 rtl_uString_assign(pResult, pText); 667 break; 668 669 case rtl_UriDecodeToIuri: 670 eCharset = RTL_TEXTENCODING_UTF8; 671 default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict 672 { 673 sal_Unicode const * p = pText->buffer; 674 sal_Unicode const * pEnd = p + pText->length; 675 sal_Int32 nCapacity = 0; 676 rtl_uString_new(pResult); 677 while (p < pEnd) 678 { 679 EscapeType eType; 680 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType); 681 switch (eType) 682 { 683 case EscapeChar: 684 if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri) 685 { 686 writeEscapeOctet(pResult, &nCapacity, nUtf32); 687 break; 688 } 689 case EscapeNo: 690 writeUcs4(pResult, &nCapacity, nUtf32); 691 break; 692 693 case EscapeOctet: 694 if (eMechanism == rtl_UriDecodeStrict) { 695 rtl_uString_new(pResult); 696 return; 697 } 698 writeEscapeOctet(pResult, &nCapacity, nUtf32); 699 break; 700 } 701 } 702 } 703 break; 704 } 705 } 706 707 sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef, 708 rtl_uString * pRelUriRef, 709 rtl_uString ** pResult, 710 rtl_uString ** pException) 711 SAL_THROW_EXTERN_C() 712 { 713 // If pRelUriRef starts with a scheme component it is an absolute URI 714 // reference, and we are done (i.e., this algorithm does not support 715 // backwards-compatible relative URIs starting with a scheme component, see 716 // RFC 2396, section 5.2, step 3): 717 Components aRelComponents; 718 parseUriRef(pRelUriRef, &aRelComponents); 719 if (aRelComponents.aScheme.isPresent()) 720 { 721 rtl_uString_assign(pResult, pRelUriRef); 722 return true; 723 } 724 725 // Parse pBaseUriRef; if the scheme component is not present or not valid, 726 // or the path component is not empty and starts with anything but a slash, 727 // an exception is raised: 728 Components aBaseComponents; 729 parseUriRef(pBaseUriRef, &aBaseComponents); 730 if (!aBaseComponents.aScheme.isPresent()) 731 { 732 rtl::OUString aMessage(pBaseUriRef); 733 aMessage += rtl::OUString( 734 RTL_CONSTASCII_USTRINGPARAM( 735 " does not start with a scheme component")); 736 rtl_uString_assign(pException, 737 const_cast< rtl::OUString & >(aMessage).pData); 738 return false; 739 } 740 if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd 741 && *aBaseComponents.aPath.pBegin != '/') 742 { 743 rtl::OUString aMessage(pBaseUriRef); 744 aMessage += rtl::OUString( 745 RTL_CONSTASCII_USTRINGPARAM( 746 "path component does not start with slash")); 747 rtl_uString_assign(pException, aMessage.pData); 748 return false; 749 } 750 751 // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI 752 // into an absolute one (if the relative URI is a reference to the "current 753 // document," the "current document" is here taken to be the base URI): 754 rtl::OUStringBuffer aBuffer; 755 aBuffer.append(aBaseComponents.aScheme.pBegin, 756 aBaseComponents.aScheme.getLength()); 757 if (aRelComponents.aAuthority.isPresent()) 758 { 759 aBuffer.append(aRelComponents.aAuthority.pBegin, 760 aRelComponents.aAuthority.getLength()); 761 aBuffer.append(aRelComponents.aPath.pBegin, 762 aRelComponents.aPath.getLength()); 763 if (aRelComponents.aQuery.isPresent()) 764 aBuffer.append(aRelComponents.aQuery.pBegin, 765 aRelComponents.aQuery.getLength()); 766 } 767 else 768 { 769 if (aBaseComponents.aAuthority.isPresent()) 770 aBuffer.append(aBaseComponents.aAuthority.pBegin, 771 aBaseComponents.aAuthority.getLength()); 772 if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd 773 && !aRelComponents.aQuery.isPresent()) 774 { 775 aBuffer.append(aBaseComponents.aPath.pBegin, 776 aBaseComponents.aPath.getLength()); 777 if (aBaseComponents.aQuery.isPresent()) 778 aBuffer.append(aBaseComponents.aQuery.pBegin, 779 aBaseComponents.aQuery.getLength()); 780 } 781 else 782 { 783 if (*aRelComponents.aPath.pBegin == '/') 784 aBuffer.append(aRelComponents.aPath.pBegin, 785 aRelComponents.aPath.getLength()); 786 else 787 aBuffer.append(joinPaths(aBaseComponents.aPath, 788 aRelComponents.aPath)); 789 if (aRelComponents.aQuery.isPresent()) 790 aBuffer.append(aRelComponents.aQuery.pBegin, 791 aRelComponents.aQuery.getLength()); 792 } 793 } 794 if (aRelComponents.aFragment.isPresent()) 795 aBuffer.append(aRelComponents.aFragment.pBegin, 796 aRelComponents.aFragment.getLength()); 797 rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData); 798 return true; 799 } 800