1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_sal.hxx" 26 27 #include "rtl/uri.h" 28 29 #include "surrogates.h" 30 31 #include "osl/diagnose.h" 32 #include "rtl/strbuf.hxx" 33 #include "rtl/textenc.h" 34 #include "rtl/textcvt.h" 35 #include "rtl/uri.h" 36 #include "rtl/ustrbuf.h" 37 #include "rtl/ustrbuf.hxx" 38 #include "rtl/ustring.h" 39 #include "rtl/ustring.hxx" 40 #include "sal/types.h" 41 42 #include <cstddef> 43 44 namespace { 45 46 std::size_t const nCharClassSize = 128; 47 48 sal_Unicode const cEscapePrefix = 0x25; // '%' 49 50 inline bool isDigit(sal_uInt32 nUtf32) 51 { 52 return nUtf32 >= 0x30 && nUtf32 <= 0x39; // '0'--'9' 53 } 54 55 inline bool isAlpha(sal_uInt32 nUtf32) 56 { 57 // 'A'--'Z', 'a'--'z' 58 return ( 59 (nUtf32 >= 0x41 && nUtf32 <= 0x5A) || 60 (nUtf32 >= 0x61 && nUtf32 <= 0x7A) 61 ); 62 } 63 64 inline bool isHighSurrogate(sal_uInt32 nUtf16) 65 { 66 return SAL_RTL_IS_HIGH_SURROGATE(nUtf16); 67 } 68 69 inline bool isLowSurrogate(sal_uInt32 nUtf16) 70 { 71 return SAL_RTL_IS_LOW_SURROGATE(nUtf16); 72 } 73 74 inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) 75 { 76 return SAL_RTL_COMBINE_SURROGATES(high, low); 77 } 78 79 inline int getHexWeight(sal_uInt32 nUtf32) 80 { 81 return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9' 82 static_cast< int >(nUtf32 - 0x30) : 83 nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F' 84 static_cast< int >(nUtf32 - 0x41 + 10) : 85 nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f' 86 static_cast< int >(nUtf32 - 0x61 + 10) : 87 -1; // not a hex digit 88 } 89 90 inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32) 91 { 92 return nUtf32 < nCharClassSize && pCharClass[nUtf32]; 93 } 94 95 inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity, 96 sal_Unicode cChar) 97 { 98 rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1); 99 } 100 101 enum EscapeType 102 { 103 EscapeNo, 104 EscapeChar, 105 EscapeOctet 106 }; 107 108 /* Read any of the following: 109 110 - sequence of escape sequences representing character from eCharset, 111 translated to single UCS4 character; or 112 113 - pair of UTF-16 surrogates, translated to single UCS4 character; or 114 115 _ single UTF-16 character, extended to UCS4 character. 116 */ 117 sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd, 118 bool bEncoded, rtl_TextEncoding eCharset, 119 EscapeType * pType) 120 { 121 sal_uInt32 nChar = *(*pBegin)++; 122 int nWeight1; 123 int nWeight2; 124 if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2 125 && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0 126 && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0) 127 { 128 *pBegin += 2; 129 nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2); 130 if (nChar <= 0x7F) 131 *pType = EscapeChar; 132 else if (eCharset == RTL_TEXTENCODING_UTF8) 133 { 134 if (nChar >= 0xC0 && nChar <= 0xF4) 135 { 136 sal_uInt32 nEncoded; 137 int nShift; 138 sal_uInt32 nMin; 139 if (nChar <= 0xDF) 140 { 141 nEncoded = (nChar & 0x1F) << 6; 142 nShift = 0; 143 nMin = 0x80; 144 } 145 else if (nChar <= 0xEF) 146 { 147 nEncoded = (nChar & 0x0F) << 12; 148 nShift = 6; 149 nMin = 0x800; 150 } 151 else 152 { 153 nEncoded = (nChar & 0x07) << 18; 154 nShift = 12; 155 nMin = 0x10000; 156 } 157 sal_Unicode const * p = *pBegin; 158 bool bUTF8 = true; 159 for (; nShift >= 0; nShift -= 6) 160 { 161 if (pEnd - p < 3 || p[0] != cEscapePrefix 162 || (nWeight1 = getHexWeight(p[1])) < 8 163 || nWeight1 > 11 164 || (nWeight2 = getHexWeight(p[2])) < 0) 165 { 166 bUTF8 = sal_False; 167 break; 168 } 169 p += 3; 170 nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift; 171 } 172 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded) 173 && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF) 174 { 175 *pBegin = p; 176 *pType = EscapeChar; 177 return nEncoded; 178 } 179 } 180 *pType = EscapeOctet; 181 } 182 else 183 { 184 rtl::OStringBuffer aBuf; 185 aBuf.append(static_cast< char >(nChar)); 186 rtl_TextToUnicodeConverter aConverter 187 = rtl_createTextToUnicodeConverter(eCharset); 188 sal_Unicode const * p = *pBegin; 189 for (;;) 190 { 191 sal_Unicode aDst[2]; 192 sal_uInt32 nInfo; 193 sal_Size nConverted; 194 sal_Size nDstSize = rtl_convertTextToUnicode( 195 aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst, 196 sizeof aDst / sizeof aDst[0], 197 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR 198 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR 199 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR), 200 &nInfo, &nConverted); 201 if (nInfo == 0) 202 { 203 OSL_ASSERT( 204 nConverted 205 == sal::static_int_cast< sal_uInt32 >( 206 aBuf.getLength())); 207 rtl_destroyTextToUnicodeConverter(aConverter); 208 *pBegin = p; 209 *pType = EscapeChar; 210 OSL_ASSERT( 211 nDstSize == 1 212 || (nDstSize == 2 && isHighSurrogate(aDst[0]) 213 && isLowSurrogate(aDst[1]))); 214 return nDstSize == 1 215 ? aDst[0] : combineSurrogates(aDst[0], aDst[1]); 216 } 217 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL 218 && pEnd - p >= 3 && p[0] == cEscapePrefix 219 && (nWeight1 = getHexWeight(p[1])) >= 0 220 && (nWeight2 = getHexWeight(p[2])) >= 0) 221 { 222 p += 3; 223 aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2)); 224 } 225 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL 226 && p != pEnd && *p <= 0x7F) 227 { 228 aBuf.append(static_cast< char >(*p++)); 229 } 230 else 231 { 232 OSL_ASSERT( 233 (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL) 234 == 0); 235 break; 236 } 237 } 238 rtl_destroyTextToUnicodeConverter(aConverter); 239 *pType = EscapeOctet; 240 } 241 return nChar; 242 } 243 else 244 { 245 *pType = EscapeNo; 246 return isHighSurrogate(nChar) && *pBegin < pEnd 247 && isLowSurrogate(**pBegin) ? 248 combineSurrogates(nChar, *(*pBegin)++) : nChar; 249 } 250 } 251 252 void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32) 253 { 254 OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char"); 255 if (nUtf32 <= 0xFFFF) { 256 writeUnicode( 257 pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32)); 258 } else { 259 nUtf32 -= 0x10000; 260 writeUnicode( 261 pBuffer, pCapacity, 262 static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800)); 263 writeUnicode( 264 pBuffer, pCapacity, 265 static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00)); 266 } 267 } 268 269 void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity, 270 sal_uInt32 nOctet) 271 { 272 OSL_ENSURE(nOctet <= 0xFF, "bad octet"); 273 274 static sal_Unicode const aHex[16] 275 = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 276 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */ 277 278 writeUnicode(pBuffer, pCapacity, cEscapePrefix); 279 writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]); 280 writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]); 281 } 282 283 bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity, 284 sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict) 285 { 286 OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char"); 287 if (eCharset == RTL_TEXTENCODING_UTF8) { 288 if (nUtf32 < 0x80) 289 writeEscapeOctet(pBuffer, pCapacity, nUtf32); 290 else if (nUtf32 < 0x800) 291 { 292 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0); 293 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); 294 } 295 else if (nUtf32 < 0x10000) 296 { 297 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0); 298 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80); 299 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); 300 } 301 else 302 { 303 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0); 304 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80); 305 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80); 306 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80); 307 } 308 } else { 309 rtl_UnicodeToTextConverter aConverter 310 = rtl_createUnicodeToTextConverter(eCharset); 311 sal_Unicode aSrc[2]; 312 sal_Size nSrcSize; 313 if (nUtf32 <= 0xFFFF) 314 { 315 aSrc[0] = static_cast< sal_Unicode >(nUtf32); 316 nSrcSize = 1; 317 } 318 else 319 { 320 aSrc[0] = static_cast< sal_Unicode >( 321 ((nUtf32 - 0x10000) >> 10) | 0xD800); 322 aSrc[1] = static_cast< sal_Unicode >( 323 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00); 324 nSrcSize = 2; 325 } 326 sal_Char aDst[32]; // FIXME random value 327 sal_uInt32 nInfo; 328 sal_Size nConverted; 329 sal_Size nDstSize = rtl_convertUnicodeToText( 330 aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst, 331 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR 332 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR 333 | RTL_UNICODETOTEXT_FLAGS_FLUSH, 334 &nInfo, &nConverted); 335 OSL_ASSERT((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0); 336 rtl_destroyUnicodeToTextConverter(aConverter); 337 if (nInfo == 0) { 338 OSL_ENSURE(nConverted == nSrcSize, "bad rtl_convertUnicodeToText"); 339 for (sal_Size i = 0; i < nDstSize; ++i) 340 writeEscapeOctet(pBuffer, pCapacity, 341 static_cast< unsigned char >(aDst[i])); 342 // FIXME all octets are escaped, even if there is no need 343 } else { 344 if (bStrict) { 345 return false; 346 } else { 347 writeUcs4(pBuffer, pCapacity, nUtf32); 348 } 349 } 350 } 351 return true; 352 } 353 354 struct Component 355 { 356 sal_Unicode const * pBegin; 357 sal_Unicode const * pEnd; 358 359 inline Component(): pBegin(0) {} 360 361 inline bool isPresent() const { return pBegin != 0; } 362 363 inline sal_Int32 getLength() const; 364 }; 365 366 inline sal_Int32 Component::getLength() const 367 { 368 OSL_ENSURE(isPresent(), "taking length of non-present component"); 369 return static_cast< sal_Int32 >(pEnd - pBegin); 370 } 371 372 struct Components 373 { 374 Component aScheme; 375 Component aAuthority; 376 Component aPath; 377 Component aQuery; 378 Component aFragment; 379 }; 380 381 void parseUriRef(rtl_uString const * pUriRef, Components * pComponents) 382 { 383 // This algorithm is liberal and accepts various forms of illegal input. 384 385 sal_Unicode const * pBegin = pUriRef->buffer; 386 sal_Unicode const * pEnd = pBegin + pUriRef->length; 387 sal_Unicode const * pPos = pBegin; 388 389 if (pPos != pEnd && isAlpha(*pPos)) 390 for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p) 391 if (*p == ':') 392 { 393 pComponents->aScheme.pBegin = pBegin; 394 pComponents->aScheme.pEnd = ++p; 395 pPos = p; 396 break; 397 } 398 else if (!isAlpha(*p) && !isDigit(*p) && *p != '+' && *p != '-' 399 && *p != '.') 400 break; 401 402 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/') 403 { 404 pComponents->aAuthority.pBegin = pPos; 405 pPos += 2; 406 while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#') 407 ++pPos; 408 pComponents->aAuthority.pEnd = pPos; 409 } 410 411 pComponents->aPath.pBegin = pPos; 412 while (pPos != pEnd && *pPos != '?' && * pPos != '#') 413 ++pPos; 414 pComponents->aPath.pEnd = pPos; 415 416 if (pPos != pEnd && *pPos == '?') 417 { 418 pComponents->aQuery.pBegin = pPos++; 419 while (pPos != pEnd && * pPos != '#') 420 ++pPos; 421 pComponents->aQuery.pEnd = pPos; 422 } 423 424 if (pPos != pEnd) 425 { 426 OSL_ASSERT(*pPos == '#'); 427 pComponents->aFragment.pBegin = pPos; 428 pComponents->aFragment.pEnd = pEnd; 429 } 430 } 431 432 rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath) 433 { 434 OSL_ASSERT(rBasePath.isPresent() && *rBasePath.pBegin == '/'); 435 OSL_ASSERT(rRelPath.isPresent()); 436 437 // The invariant of aBuffer is that it always starts and ends with a slash 438 // (until probably right at the end of the algorithm, when the last segment 439 // of rRelPath is added, which does not necessarily end in a slash): 440 rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength()); 441 // XXX numeric overflow 442 443 // Segments "." and ".." within rBasePath are not conisdered special (but 444 // are also not removed by ".." segments within rRelPath), RFC 2396 seems a 445 // bit unclear about this point: 446 sal_Int32 nFixed = 1; 447 sal_Unicode const * p = rBasePath.pBegin + 1; 448 for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q) 449 if (*q == '/') 450 { 451 if ( 452 (q - p == 1 && p[0] == '.') || 453 (q - p == 2 && p[0] == '.' && p[1] == '.') 454 ) 455 { 456 nFixed = q + 1 - rBasePath.pBegin; 457 } 458 p = q + 1; 459 } 460 aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin); 461 462 p = rRelPath.pBegin; 463 if (p != rRelPath.pEnd) 464 for (;;) 465 { 466 sal_Unicode const * q = p; 467 sal_Unicode const * r; 468 for (;;) 469 { 470 if (q == rRelPath.pEnd) 471 { 472 r = q; 473 break; 474 } 475 if (*q == '/') 476 { 477 r = q + 1; 478 break; 479 } 480 ++q; 481 } 482 if (q - p == 2 && p[0] == '.' && p[1] == '.') 483 { 484 // Erroneous excess segments ".." within rRelPath are left 485 // intact, as the examples in RFC 2396, section C.2, suggest: 486 sal_Int32 i = aBuffer.getLength() - 1; 487 if (i < nFixed) 488 { 489 aBuffer.append(p, r - p); 490 nFixed += 3; 491 } 492 else 493 { 494 while (aBuffer.charAt(i - 1) != '/') 495 --i; 496 aBuffer.setLength(i); 497 } 498 } 499 else if (q - p != 1 || *p != '.') 500 aBuffer.append(p, r - p); 501 if (q == rRelPath.pEnd) 502 break; 503 p = q + 1; 504 } 505 506 return aBuffer.makeStringAndClear(); 507 } 508 509 } 510 511 sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass) 512 SAL_THROW_EXTERN_C() 513 { 514 static sal_Bool const aCharClass[][nCharClassSize] 515 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */ 516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/ 518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/ 519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/ 520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/ 521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/ 522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */ 523 }, 524 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */ 525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 526 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/ 527 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/ 528 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 529 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 530 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 531 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 532 }, 533 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */ 534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 535 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 536 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/ 537 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 538 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 539 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 540 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 541 }, 542 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */ 543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 544 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 545 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ 546 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 547 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 548 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 549 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 550 }, 551 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */ 552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 553 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 554 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ 555 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 556 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 557 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 558 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 559 }, 560 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */ 561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 562 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 563 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/ 564 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 565 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 566 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 567 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 568 }, 569 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */ 570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 571 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/ 572 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/ 573 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 574 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 575 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 576 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 577 }, 578 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */ 579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 580 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/ 581 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/ 582 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/ 583 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/ 584 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/ 585 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */ 586 }}; 587 OSL_ENSURE( 588 (eCharClass >= 0 589 && (sal::static_int_cast< std::size_t >(eCharClass) 590 < sizeof aCharClass / sizeof aCharClass[0])), 591 "bad eCharClass"); 592 return aCharClass[eCharClass]; 593 } 594 595 void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass, 596 rtl_UriEncodeMechanism eMechanism, 597 rtl_TextEncoding eCharset, rtl_uString ** pResult) 598 SAL_THROW_EXTERN_C() 599 { 600 OSL_ENSURE(!pCharClass[0x25], "bad pCharClass"); 601 // make sure the percent sign is encoded... 602 603 sal_Unicode const * p = pText->buffer; 604 sal_Unicode const * pEnd = p + pText->length; 605 sal_Int32 nCapacity = 0; 606 rtl_uString_new(pResult); 607 while (p < pEnd) 608 { 609 EscapeType eType; 610 sal_uInt32 nUtf32 = readUcs4( 611 &p, pEnd, 612 (eMechanism == rtl_UriEncodeKeepEscapes 613 || eMechanism == rtl_UriEncodeCheckEscapes 614 || eMechanism == rtl_UriEncodeStrictKeepEscapes), 615 eCharset, &eType); 616 switch (eType) 617 { 618 case EscapeNo: 619 if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F 620 writeUnicode(pResult, &nCapacity, 621 static_cast< sal_Unicode >(nUtf32)); 622 else if (!writeEscapeChar( 623 pResult, &nCapacity, nUtf32, eCharset, 624 (eMechanism == rtl_UriEncodeStrict 625 || eMechanism == rtl_UriEncodeStrictKeepEscapes))) 626 { 627 rtl_uString_new(pResult); 628 return; 629 } 630 break; 631 632 case EscapeChar: 633 if (eMechanism == rtl_UriEncodeCheckEscapes 634 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F 635 writeUnicode(pResult, &nCapacity, 636 static_cast< sal_Unicode >(nUtf32)); 637 else if (!writeEscapeChar( 638 pResult, &nCapacity, nUtf32, eCharset, 639 (eMechanism == rtl_UriEncodeStrict 640 || eMechanism == rtl_UriEncodeStrictKeepEscapes))) 641 { 642 rtl_uString_new(pResult); 643 return; 644 } 645 break; 646 647 case EscapeOctet: 648 writeEscapeOctet(pResult, &nCapacity, nUtf32); 649 break; 650 } 651 } 652 } 653 654 void SAL_CALL rtl_uriDecode(rtl_uString * pText, 655 rtl_UriDecodeMechanism eMechanism, 656 rtl_TextEncoding eCharset, rtl_uString ** pResult) 657 SAL_THROW_EXTERN_C() 658 { 659 switch (eMechanism) 660 { 661 case rtl_UriDecodeNone: 662 rtl_uString_assign(pResult, pText); 663 break; 664 665 case rtl_UriDecodeToIuri: 666 eCharset = RTL_TEXTENCODING_UTF8; 667 default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict 668 { 669 sal_Unicode const * p = pText->buffer; 670 sal_Unicode const * pEnd = p + pText->length; 671 sal_Int32 nCapacity = 0; 672 rtl_uString_new(pResult); 673 while (p < pEnd) 674 { 675 EscapeType eType; 676 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType); 677 switch (eType) 678 { 679 case EscapeChar: 680 if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri) 681 { 682 writeEscapeOctet(pResult, &nCapacity, nUtf32); 683 break; 684 } 685 case EscapeNo: 686 writeUcs4(pResult, &nCapacity, nUtf32); 687 break; 688 689 case EscapeOctet: 690 if (eMechanism == rtl_UriDecodeStrict) { 691 rtl_uString_new(pResult); 692 return; 693 } 694 writeEscapeOctet(pResult, &nCapacity, nUtf32); 695 break; 696 } 697 } 698 } 699 break; 700 } 701 } 702 703 sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef, 704 rtl_uString * pRelUriRef, 705 rtl_uString ** pResult, 706 rtl_uString ** pException) 707 SAL_THROW_EXTERN_C() 708 { 709 // If pRelUriRef starts with a scheme component it is an absolute URI 710 // reference, and we are done (i.e., this algorithm does not support 711 // backwards-compatible relative URIs starting with a scheme component, see 712 // RFC 2396, section 5.2, step 3): 713 Components aRelComponents; 714 parseUriRef(pRelUriRef, &aRelComponents); 715 if (aRelComponents.aScheme.isPresent()) 716 { 717 rtl_uString_assign(pResult, pRelUriRef); 718 return true; 719 } 720 721 // Parse pBaseUriRef; if the scheme component is not present or not valid, 722 // or the path component is not empty and starts with anything but a slash, 723 // an exception is raised: 724 Components aBaseComponents; 725 parseUriRef(pBaseUriRef, &aBaseComponents); 726 if (!aBaseComponents.aScheme.isPresent()) 727 { 728 rtl::OUString aMessage(pBaseUriRef); 729 aMessage += rtl::OUString( 730 RTL_CONSTASCII_USTRINGPARAM( 731 " does not start with a scheme component")); 732 rtl_uString_assign(pException, 733 const_cast< rtl::OUString & >(aMessage).pData); 734 return false; 735 } 736 if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd 737 && *aBaseComponents.aPath.pBegin != '/') 738 { 739 rtl::OUString aMessage(pBaseUriRef); 740 aMessage += rtl::OUString( 741 RTL_CONSTASCII_USTRINGPARAM( 742 "path component does not start with slash")); 743 rtl_uString_assign(pException, aMessage.pData); 744 return false; 745 } 746 747 // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI 748 // into an absolute one (if the relative URI is a reference to the "current 749 // document," the "current document" is here taken to be the base URI): 750 rtl::OUStringBuffer aBuffer; 751 aBuffer.append(aBaseComponents.aScheme.pBegin, 752 aBaseComponents.aScheme.getLength()); 753 if (aRelComponents.aAuthority.isPresent()) 754 { 755 aBuffer.append(aRelComponents.aAuthority.pBegin, 756 aRelComponents.aAuthority.getLength()); 757 aBuffer.append(aRelComponents.aPath.pBegin, 758 aRelComponents.aPath.getLength()); 759 if (aRelComponents.aQuery.isPresent()) 760 aBuffer.append(aRelComponents.aQuery.pBegin, 761 aRelComponents.aQuery.getLength()); 762 } 763 else 764 { 765 if (aBaseComponents.aAuthority.isPresent()) 766 aBuffer.append(aBaseComponents.aAuthority.pBegin, 767 aBaseComponents.aAuthority.getLength()); 768 if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd 769 && !aRelComponents.aQuery.isPresent()) 770 { 771 aBuffer.append(aBaseComponents.aPath.pBegin, 772 aBaseComponents.aPath.getLength()); 773 if (aBaseComponents.aQuery.isPresent()) 774 aBuffer.append(aBaseComponents.aQuery.pBegin, 775 aBaseComponents.aQuery.getLength()); 776 } 777 else 778 { 779 if (*aRelComponents.aPath.pBegin == '/') 780 aBuffer.append(aRelComponents.aPath.pBegin, 781 aRelComponents.aPath.getLength()); 782 else 783 aBuffer.append(joinPaths(aBaseComponents.aPath, 784 aRelComponents.aPath)); 785 if (aRelComponents.aQuery.isPresent()) 786 aBuffer.append(aRelComponents.aQuery.pBegin, 787 aRelComponents.aQuery.getLength()); 788 } 789 } 790 if (aRelComponents.aFragment.isPresent()) 791 aBuffer.append(aRelComponents.aFragment.pBegin, 792 aRelComponents.aFragment.getLength()); 793 rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData); 794 return true; 795 } 796