1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_svl.hxx" 26 #include <svl/urihelper.hxx> 27 #include <com/sun/star/beans/XPropertySet.hpp> 28 #include "com/sun/star/lang/WrappedTargetRuntimeException.hpp" 29 #include "com/sun/star/lang/XMultiComponentFactory.hpp" 30 #include "com/sun/star/ucb/Command.hpp" 31 #include <com/sun/star/ucb/FileSystemNotation.hpp> 32 #include "com/sun/star/ucb/IllegalIdentifierException.hpp" 33 #include "com/sun/star/ucb/UnsupportedCommandException.hpp" 34 #include "com/sun/star/ucb/XCommandEnvironment.hpp" 35 #include "com/sun/star/ucb/XCommandProcessor.hpp" 36 #include "com/sun/star/ucb/XContent.hpp" 37 #include "com/sun/star/ucb/XContentIdentifierFactory.hpp" 38 #include "com/sun/star/ucb/XContentProvider.hpp" 39 #include <com/sun/star/ucb/XContentProviderManager.hpp> 40 #include "com/sun/star/uno/Any.hxx" 41 #include "com/sun/star/uno/Exception.hpp" 42 #include "com/sun/star/uno/Reference.hxx" 43 #include "com/sun/star/uno/RuntimeException.hpp" 44 #include "com/sun/star/uno/Sequence.hxx" 45 #include "com/sun/star/uno/XComponentContext.hpp" 46 #include "com/sun/star/uno/XInterface.hpp" 47 #include "com/sun/star/uri/UriReferenceFactory.hpp" 48 #include "com/sun/star/uri/XUriReference.hpp" 49 #include "com/sun/star/uri/XUriReferenceFactory.hpp" 50 #include "cppuhelper/exc_hlp.hxx" 51 #include "comphelper/processfactory.hxx" 52 #include "osl/diagnose.h" 53 #include "rtl/ustrbuf.hxx" 54 #include "rtl/ustring.h" 55 #include "rtl/ustring.hxx" 56 #include "sal/types.h" 57 #include <tools/debug.hxx> 58 #include <tools/inetmime.hxx> 59 #include <ucbhelper/contentbroker.hxx> 60 #include <unotools/charclass.hxx> 61 #include "rtl/instance.hxx" 62 63 namespace unnamed_svl_urihelper {} 64 using namespace unnamed_svl_urihelper; 65 // unnamed namespaces don't work well yet... 66 67 namespace css = com::sun::star; 68 using namespace com::sun::star; 69 70 //============================================================================ 71 // 72 // SmartRel2Abs 73 // 74 //============================================================================ 75 76 namespace unnamed_svl_urihelper { 77 78 inline UniString toUniString(ByteString const & rString) 79 { 80 return UniString(rString, RTL_TEXTENCODING_ISO_8859_1); 81 } 82 83 inline UniString toUniString(UniString const & rString) 84 { 85 return rString; 86 } 87 88 template< typename Str > 89 inline UniString SmartRel2Abs_Impl(INetURLObject const & rTheBaseURIRef, 90 Str const & rTheRelURIRef, 91 Link const & rMaybeFileHdl, 92 bool bCheckFileExists, 93 bool bIgnoreFragment, 94 INetURLObject::EncodeMechanism 95 eEncodeMechanism, 96 INetURLObject::DecodeMechanism 97 eDecodeMechanism, 98 rtl_TextEncoding eCharset, 99 bool bRelativeNonURIs, 100 INetURLObject::FSysStyle eStyle) 101 { 102 // Backwards compatibility: 103 if (rTheRelURIRef.Len() != 0 && rTheRelURIRef.GetChar(0) == '#') 104 return toUniString(rTheRelURIRef); 105 106 INetURLObject aAbsURIRef; 107 if (rTheBaseURIRef.HasError()) 108 aAbsURIRef. 109 SetSmartURL(rTheRelURIRef, eEncodeMechanism, eCharset, eStyle); 110 else 111 { 112 bool bWasAbsolute; 113 aAbsURIRef = rTheBaseURIRef.smartRel2Abs(rTheRelURIRef, 114 bWasAbsolute, 115 bIgnoreFragment, 116 eEncodeMechanism, 117 eCharset, 118 bRelativeNonURIs, 119 eStyle); 120 if (bCheckFileExists 121 && !bWasAbsolute 122 && (aAbsURIRef.GetProtocol() == INET_PROT_FILE)) 123 { 124 INetURLObject aNonFileURIRef; 125 aNonFileURIRef.SetSmartURL(rTheRelURIRef, 126 eEncodeMechanism, 127 eCharset, 128 eStyle); 129 if (!aNonFileURIRef.HasError() 130 && aNonFileURIRef.GetProtocol() != INET_PROT_FILE) 131 { 132 bool bMaybeFile = false; 133 if (rMaybeFileHdl.IsSet()) 134 { 135 UniString aFilePath(toUniString(rTheRelURIRef)); 136 bMaybeFile = rMaybeFileHdl.Call(&aFilePath) != 0; 137 } 138 if (!bMaybeFile) 139 aAbsURIRef = aNonFileURIRef; 140 } 141 } 142 } 143 return aAbsURIRef.GetMainURL(eDecodeMechanism, eCharset); 144 } 145 146 } 147 148 UniString 149 URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef, 150 ByteString const & rTheRelURIRef, 151 Link const & rMaybeFileHdl, 152 bool bCheckFileExists, 153 bool bIgnoreFragment, 154 INetURLObject::EncodeMechanism eEncodeMechanism, 155 INetURLObject::DecodeMechanism eDecodeMechanism, 156 rtl_TextEncoding eCharset, 157 bool bRelativeNonURIs, 158 INetURLObject::FSysStyle eStyle) 159 { 160 return SmartRel2Abs_Impl(rTheBaseURIRef, rTheRelURIRef, rMaybeFileHdl, 161 bCheckFileExists, bIgnoreFragment, 162 eEncodeMechanism, eDecodeMechanism, eCharset, 163 bRelativeNonURIs, eStyle); 164 } 165 166 UniString 167 URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef, 168 UniString const & rTheRelURIRef, 169 Link const & rMaybeFileHdl, 170 bool bCheckFileExists, 171 bool bIgnoreFragment, 172 INetURLObject::EncodeMechanism eEncodeMechanism, 173 INetURLObject::DecodeMechanism eDecodeMechanism, 174 rtl_TextEncoding eCharset, 175 bool bRelativeNonURIs, 176 INetURLObject::FSysStyle eStyle) 177 { 178 return SmartRel2Abs_Impl(rTheBaseURIRef, rTheRelURIRef, rMaybeFileHdl, 179 bCheckFileExists, bIgnoreFragment, 180 eEncodeMechanism, eDecodeMechanism, eCharset, 181 bRelativeNonURIs, eStyle); 182 } 183 184 //============================================================================ 185 // 186 // SetMaybeFileHdl 187 // 188 //============================================================================ 189 190 namespace { struct MaybeFileHdl : public rtl::Static< Link, MaybeFileHdl > {}; } 191 192 void URIHelper::SetMaybeFileHdl(Link const & rTheMaybeFileHdl) 193 { 194 MaybeFileHdl::get() = rTheMaybeFileHdl; 195 } 196 197 //============================================================================ 198 // 199 // GetMaybeFileHdl 200 // 201 //============================================================================ 202 203 Link URIHelper::GetMaybeFileHdl() 204 { 205 return MaybeFileHdl::get(); 206 } 207 208 namespace { 209 210 bool isAbsoluteHierarchicalUriReference( 211 css::uno::Reference< css::uri::XUriReference > const & uriReference) 212 { 213 return uriReference.is() && uriReference->isAbsolute() 214 && uriReference->isHierarchical() && !uriReference->hasRelativePath(); 215 } 216 217 // To improve performance, assume that if for any prefix URL of a given 218 // hierarchical URL either a UCB content cannot be created, or the UCB content 219 // does not support the getCasePreservingURL command, then this will hold for 220 // any other prefix URL of the given URL, too: 221 enum Result { Success, GeneralFailure, SpecificFailure }; 222 223 Result normalizePrefix( 224 css::uno::Reference< css::ucb::XContentProvider > const & broker, 225 rtl::OUString const & uri, rtl::OUString * normalized) 226 { 227 OSL_ASSERT(broker.is() && normalized != 0); 228 css::uno::Reference< css::ucb::XContent > content; 229 try { 230 content = broker->queryContent( 231 css::uno::Reference< css::ucb::XContentIdentifierFactory >( 232 broker, css::uno::UNO_QUERY_THROW)->createContentIdentifier( 233 uri)); 234 } catch (css::ucb::IllegalIdentifierException &) {} 235 if (!content.is()) { 236 return GeneralFailure; 237 } 238 try { 239 #if OSL_DEBUG_LEVEL > 0 240 bool ok = 241 #endif 242 (css::uno::Reference< css::ucb::XCommandProcessor >( 243 content, css::uno::UNO_QUERY_THROW)->execute( 244 css::ucb::Command( 245 rtl::OUString( 246 RTL_CONSTASCII_USTRINGPARAM( 247 "getCasePreservingURL")), 248 -1, css::uno::Any()), 249 0, 250 css::uno::Reference< css::ucb::XCommandEnvironment >()) 251 >>= *normalized); 252 OSL_ASSERT(ok); 253 } catch (css::uno::RuntimeException &) { 254 throw; 255 } catch (css::ucb::UnsupportedCommandException &) { 256 return GeneralFailure; 257 } catch (css::uno::Exception &) { 258 return SpecificFailure; 259 } 260 return Success; 261 } 262 263 rtl::OUString normalize( 264 css::uno::Reference< css::ucb::XContentProvider > const & broker, 265 css::uno::Reference< css::uri::XUriReferenceFactory > const & uriFactory, 266 rtl::OUString const & uriReference) 267 { 268 // normalizePrefix can potentially fail (a typically example being a file 269 // URL that denotes a non-existing resource); in such a case, try to 270 // normalize as long a prefix of the given URL as possible (i.e., normalize 271 // all the existing directories within the path): 272 rtl::OUString normalized; 273 sal_Int32 n = uriReference.indexOf('#'); 274 normalized = n == -1 ? uriReference : uriReference.copy(0, n); 275 switch (normalizePrefix(broker, normalized, &normalized)) { 276 case Success: 277 return n == -1 ? normalized : normalized + uriReference.copy(n); 278 case GeneralFailure: 279 return uriReference; 280 case SpecificFailure: 281 default: 282 break; 283 } 284 css::uno::Reference< css::uri::XUriReference > ref( 285 uriFactory->parse(uriReference)); 286 if (!isAbsoluteHierarchicalUriReference(ref)) { 287 return uriReference; 288 } 289 sal_Int32 count = ref->getPathSegmentCount(); 290 if (count < 2) { 291 return uriReference; 292 } 293 rtl::OUStringBuffer head(ref->getScheme()); 294 head.append(static_cast< sal_Unicode >(':')); 295 if (ref->hasAuthority()) { 296 head.appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 297 head.append(ref->getAuthority()); 298 } 299 for (sal_Int32 i = count - 1; i > 0; --i) { 300 rtl::OUStringBuffer buf(head); 301 for (sal_Int32 j = 0; j < i; ++j) { 302 buf.append(static_cast< sal_Unicode >('/')); 303 buf.append(ref->getPathSegment(j)); 304 } 305 normalized = buf.makeStringAndClear(); 306 if (normalizePrefix(broker, normalized, &normalized) != SpecificFailure) 307 { 308 buf.append(normalized); 309 css::uno::Reference< css::uri::XUriReference > preRef( 310 uriFactory->parse(normalized)); 311 if (!isAbsoluteHierarchicalUriReference(preRef)) { 312 // This could only happen if something is inconsistent: 313 break; 314 } 315 sal_Int32 preCount = preRef->getPathSegmentCount(); 316 // normalizePrefix may have added or removed a final slash: 317 if (preCount != i) { 318 if (preCount == i - 1) { 319 buf.append(static_cast< sal_Unicode >('/')); 320 } else if (preCount - 1 == i && buf.getLength() > 0 321 && buf.charAt(buf.getLength() - 1) == '/') 322 { 323 buf.setLength(buf.getLength() - 1); 324 } else { 325 // This could only happen if something is inconsistent: 326 break; 327 } 328 } 329 for (sal_Int32 j = i; j < count; ++j) { 330 buf.append(static_cast< sal_Unicode >('/')); 331 buf.append(ref->getPathSegment(j)); 332 } 333 if (ref->hasQuery()) { 334 buf.append(static_cast< sal_Unicode >('?')); 335 buf.append(ref->getQuery()); 336 } 337 if (ref->hasFragment()) { 338 buf.append(static_cast< sal_Unicode >('#')); 339 buf.append(ref->getFragment()); 340 } 341 return buf.makeStringAndClear(); 342 } 343 } 344 return uriReference; 345 } 346 347 } 348 349 css::uno::Reference< css::uri::XUriReference > 350 URIHelper::normalizedMakeRelative( 351 css::uno::Reference< css::uno::XComponentContext > const & context, 352 rtl::OUString const & baseUriReference, rtl::OUString const & uriReference) 353 { 354 OSL_ASSERT(context.is()); 355 css::uno::Reference< css::lang::XMultiComponentFactory > componentFactory( 356 context->getServiceManager()); 357 if (!componentFactory.is()) { 358 throw css::uno::RuntimeException( 359 rtl::OUString( 360 RTL_CONSTASCII_USTRINGPARAM( 361 "component context has no service manager")), 362 css::uno::Reference< css::uno::XInterface >()); 363 } 364 css::uno::Sequence< css::uno::Any > args(2); 365 args[0] <<= rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Local")); 366 args[1] <<= rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Office")); 367 css::uno::Reference< css::ucb::XContentProvider > broker; 368 try { 369 broker = css::uno::Reference< css::ucb::XContentProvider >( 370 componentFactory->createInstanceWithArgumentsAndContext( 371 rtl::OUString( 372 RTL_CONSTASCII_USTRINGPARAM( 373 "com.sun.star.ucb.UniversalContentBroker")), 374 args, context), 375 css::uno::UNO_QUERY_THROW); 376 } catch (css::uno::RuntimeException &) { 377 throw; 378 } catch (css::uno::Exception &) { 379 css::uno::Any exception(cppu::getCaughtException()); 380 throw css::lang::WrappedTargetRuntimeException( 381 rtl::OUString( 382 RTL_CONSTASCII_USTRINGPARAM( 383 "creating com.sun.star.ucb.UniversalContentBroker failed")), 384 css::uno::Reference< css::uno::XInterface >(), 385 exception); 386 } 387 css::uno::Reference< css::uri::XUriReferenceFactory > uriFactory( 388 css::uri::UriReferenceFactory::create(context)); 389 return uriFactory->makeRelative( 390 uriFactory->parse(normalize(broker, uriFactory, baseUriReference)), 391 uriFactory->parse(normalize(broker, uriFactory, uriReference)), true, 392 true, false); 393 } 394 395 rtl::OUString URIHelper::simpleNormalizedMakeRelative( 396 rtl::OUString const & baseUriReference, rtl::OUString const & uriReference) 397 { 398 com::sun::star::uno::Reference< com::sun::star::uri::XUriReference > rel( 399 URIHelper::normalizedMakeRelative( 400 com::sun::star::uno::Reference< 401 com::sun::star::uno::XComponentContext >( 402 (com::sun::star::uno::Reference< 403 com::sun::star::beans::XPropertySet >( 404 comphelper::getProcessServiceFactory(), 405 com::sun::star::uno::UNO_QUERY_THROW)-> 406 getPropertyValue( 407 rtl::OUString( 408 RTL_CONSTASCII_USTRINGPARAM("DefaultContext")))), 409 com::sun::star::uno::UNO_QUERY_THROW), 410 baseUriReference, uriReference)); 411 return rel.is() ? rel->getUriReference() : uriReference; 412 } 413 414 //============================================================================ 415 // 416 // FindFirstURLInText 417 // 418 //============================================================================ 419 420 namespace unnamed_svl_urihelper { 421 422 inline xub_StrLen nextChar(UniString const & rStr, xub_StrLen nPos) 423 { 424 return INetMIME::isHighSurrogate(rStr.GetChar(nPos)) 425 && rStr.Len() - nPos >= 2 426 && INetMIME::isLowSurrogate(rStr.GetChar(nPos + 1)) ? 427 nPos + 2 : nPos + 1; 428 } 429 430 bool isBoundary1(CharClass const & rCharClass, UniString const & rStr, 431 xub_StrLen nPos, xub_StrLen nEnd) 432 { 433 if (nPos == nEnd) 434 return true; 435 if (rCharClass.isLetterNumeric(rStr, nPos)) 436 return false; 437 switch (rStr.GetChar(nPos)) 438 { 439 case '$': 440 case '%': 441 case '&': 442 case '-': 443 case '/': 444 case '@': 445 case '\\': 446 return false; 447 default: 448 return true; 449 } 450 } 451 452 bool isBoundary2(CharClass const & rCharClass, UniString const & rStr, 453 xub_StrLen nPos, xub_StrLen nEnd) 454 { 455 if (nPos == nEnd) 456 return true; 457 if (rCharClass.isLetterNumeric(rStr, nPos)) 458 return false; 459 switch (rStr.GetChar(nPos)) 460 { 461 case '!': 462 case '#': 463 case '$': 464 case '%': 465 case '&': 466 case '\'': 467 case '*': 468 case '+': 469 case '-': 470 case '/': 471 case '=': 472 case '?': 473 case '@': 474 case '^': 475 case '_': 476 case '`': 477 case '{': 478 case '|': 479 case '}': 480 case '~': 481 return false; 482 default: 483 return true; 484 } 485 } 486 487 bool checkWChar(CharClass const & rCharClass, UniString const & rStr, 488 xub_StrLen * pPos, xub_StrLen * pEnd, bool bBackslash = false, 489 bool bPipe = false) 490 { 491 sal_Unicode c = rStr.GetChar(*pPos); 492 if (INetMIME::isUSASCII(c)) 493 { 494 static sal_uInt8 const aMap[128] 495 = { 0, 0, 0, 0, 0, 0, 0, 0, 496 0, 0, 0, 0, 0, 0, 0, 0, 497 0, 0, 0, 0, 0, 0, 0, 0, 498 0, 0, 0, 0, 0, 0, 0, 0, 499 0, 1, 0, 0, 4, 4, 4, 1, // !"#$%&' 500 1, 1, 1, 1, 1, 4, 1, 4, // ()*+,-./ 501 4, 4, 4, 4, 4, 4, 4, 4, // 01234567 502 4, 4, 1, 1, 0, 1, 0, 1, // 89:;<=>? 503 4, 4, 4, 4, 4, 4, 4, 4, // @ABCDEFG 504 4, 4, 4, 4, 4, 4, 4, 4, // HIJKLMNO 505 4, 4, 4, 4, 4, 4, 4, 4, // PQRSTUVW 506 4, 4, 4, 1, 2, 1, 0, 1, // XYZ[\]^_ 507 0, 4, 4, 4, 4, 4, 4, 4, // `abcdefg 508 4, 4, 4, 4, 4, 4, 4, 4, // hijklmno 509 4, 4, 4, 4, 4, 4, 4, 4, // pqrstuvw 510 4, 4, 4, 0, 3, 0, 1, 0 }; // xyz{|}~ 511 switch (aMap[c]) 512 { 513 default: // not uric 514 return false; 515 516 case 1: // uric 517 ++(*pPos); 518 return true; 519 520 case 2: // "\" 521 if (bBackslash) 522 { 523 *pEnd = ++(*pPos); 524 return true; 525 } 526 else 527 return false; 528 529 case 3: // "|" 530 if (bPipe) 531 { 532 *pEnd = ++(*pPos); 533 return true; 534 } 535 else 536 return false; 537 538 case 4: // alpha, digit, "$", "%", "&", "-", "/", "@" (see 539 // isBoundary1) 540 *pEnd = ++(*pPos); 541 return true; 542 } 543 } 544 else if (rCharClass.isLetterNumeric(rStr, *pPos)) 545 { 546 *pEnd = *pPos = nextChar(rStr, *pPos); 547 return true; 548 } 549 else 550 return false; 551 } 552 553 sal_uInt32 scanDomain(UniString const & rStr, xub_StrLen * pPos, 554 xub_StrLen nEnd) 555 { 556 sal_Unicode const * pBuffer = rStr.GetBuffer(); 557 sal_Unicode const * p = pBuffer + *pPos; 558 sal_uInt32 nLabels = INetURLObject::scanDomain(p, pBuffer + nEnd, false); 559 *pPos = sal::static_int_cast< xub_StrLen >(p - pBuffer); 560 return nLabels; 561 } 562 563 } 564 565 UniString 566 URIHelper::FindFirstURLInText(UniString const & rText, 567 xub_StrLen & rBegin, 568 xub_StrLen & rEnd, 569 CharClass const & rCharClass, 570 INetURLObject::EncodeMechanism eMechanism, 571 rtl_TextEncoding eCharset, 572 INetURLObject::FSysStyle eStyle) 573 { 574 if (!(rBegin <= rEnd && rEnd <= rText.Len())) 575 return UniString(); 576 577 // Search for the first substring of [rBegin..rEnd[ that matches any of the 578 // following productions (for which the appropriate style bit is set in 579 // eStyle, if applicable). 580 // 581 // 1st Production (known scheme): 582 // \B1 <one of the known schemes, except file> ":" 1*wchar ["#" 1*wchar] 583 // \B1 584 // 585 // 2nd Production (file): 586 // \B1 "FILE:" 1*(wchar / "\" / "|") ["#" 1*wchar] \B1 587 // 588 // 3rd Production (ftp): 589 // \B1 "FTP" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1 590 // 591 // 4th Production (http): 592 // \B1 "WWW" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1 593 // 594 // 5th Production (mailto): 595 // \B2 local-part "@" domain \B1 596 // 597 // 6th Production (UNC file): 598 // \B1 "\\" domain "\" *(wchar / "\") \B1 599 // 600 // 7th Production (DOS file): 601 // \B1 ALPHA ":\" *(wchar / "\") \B1 602 // 603 // 8th Production (Unix-like DOS file): 604 // \B1 ALPHA ":/" *(wchar / "\") \B1 605 // 606 // The productions use the following auxiliary rules. 607 // 608 // local-part = atom *("." atom) 609 // atom = 1*(alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" 610 // / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" 611 // / "~") 612 // domain = label *("." label) 613 // label = alphanum [*(alphanum / "-") alphanum] 614 // alphanum = ALPHA / DIGIT 615 // wchar = <any uric character (ignoring the escaped rule), or "%", or 616 // a letter or digit (according to rCharClass)> 617 // 618 // "\B1" (boundary 1) stands for the beginning or end of the block of text, 619 // or a character that is neither (a) a letter or digit (according to 620 // rCharClass), nor (b) any of "$", "%", "&", "-", "/", "@", or "\". 621 // (FIXME: What was the rationale for this set of punctuation characters?) 622 // 623 // "\B2" (boundary 2) stands for the beginning or end of the block of text, 624 // or a character that is neither (a) a letter or digit (according to 625 // rCharClass), nor (b) any of "!", "#", "$", "%", "&", "'", "*", "+", "-", 626 // "/", "=", "?", "@", "^", "_", "`", "{", "|", "}", or "~" (i.e., an RFC 627 // 822 <atom> character, or "@" from \B1's set above). 628 // 629 // Productions 1--4, and 6--8 try to find a maximum-length match, but they 630 // stop at the first <wchar> character that is a "\B1" character which is 631 // only followed by "\B1" characters (taking "\" and "|" characters into 632 // account appropriately). Production 5 simply tries to find a maximum- 633 // length match. 634 // 635 // Productions 1--4 use the given eMechanism and eCharset. Productions 5--9 636 // use ENCODE_ALL. 637 // 638 // Productions 6--9 are only applicable if the FSYS_DOS bit is set in 639 // eStyle. 640 641 bool bBoundary1 = true; 642 bool bBoundary2 = true; 643 for (xub_StrLen nPos = rBegin; nPos != rEnd; nPos = nextChar(rText, nPos)) 644 { 645 sal_Unicode c = rText.GetChar(nPos); 646 if (bBoundary1) 647 { 648 if (INetMIME::isAlpha(c)) 649 { 650 xub_StrLen i = nPos; 651 INetProtocol eScheme 652 = INetURLObject::CompareProtocolScheme(UniString(rText, i, 653 rEnd)); 654 if (eScheme == INET_PROT_FILE) // 2nd 655 { 656 while (rText.GetChar(i++) != ':') ; 657 xub_StrLen nPrefixEnd = i; 658 xub_StrLen nUriEnd = i; 659 while (i != rEnd 660 && checkWChar(rCharClass, rText, &i, &nUriEnd, true, 661 true)) ; 662 if (i != nPrefixEnd && rText.GetChar(i) == '#') 663 { 664 ++i; 665 while (i != rEnd 666 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ; 667 } 668 if (nUriEnd != nPrefixEnd 669 && isBoundary1(rCharClass, rText, nUriEnd, rEnd)) 670 { 671 INetURLObject aUri(UniString(rText, nPos, 672 nUriEnd - nPos), 673 INET_PROT_FILE, eMechanism, eCharset, 674 eStyle); 675 if (!aUri.HasError()) 676 { 677 rBegin = nPos; 678 rEnd = nUriEnd; 679 return 680 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI); 681 } 682 } 683 } 684 else if (eScheme != INET_PROT_NOT_VALID) // 1st 685 { 686 while (rText.GetChar(i++) != ':') ; 687 xub_StrLen nPrefixEnd = i; 688 xub_StrLen nUriEnd = i; 689 while (i != rEnd 690 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ; 691 if (i != nPrefixEnd && rText.GetChar(i) == '#') 692 { 693 ++i; 694 while (i != rEnd 695 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ; 696 } 697 if (nUriEnd != nPrefixEnd 698 && (isBoundary1(rCharClass, rText, nUriEnd, rEnd) 699 || rText.GetChar(nUriEnd) == '\\')) 700 { 701 INetURLObject aUri(UniString(rText, nPos, 702 nUriEnd - nPos), 703 INET_PROT_HTTP, eMechanism, 704 eCharset); 705 if (!aUri.HasError()) 706 { 707 rBegin = nPos; 708 rEnd = nUriEnd; 709 return 710 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI); 711 } 712 } 713 } 714 715 // 3rd, 4th: 716 i = nPos; 717 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd); 718 if (nLabels >= 3 719 && rText.GetChar(nPos + 3) == '.' 720 && (((rText.GetChar(nPos) == 'w' 721 || rText.GetChar(nPos) == 'W') 722 && (rText.GetChar(nPos + 1) == 'w' 723 || rText.GetChar(nPos + 1) == 'W') 724 && (rText.GetChar(nPos + 2) == 'w' 725 || rText.GetChar(nPos + 2) == 'W')) 726 || ((rText.GetChar(nPos) == 'f' 727 || rText.GetChar(nPos) == 'F') 728 && (rText.GetChar(nPos + 1) == 't' 729 || rText.GetChar(nPos + 1) == 'T') 730 && (rText.GetChar(nPos + 2) == 'p' 731 || rText.GetChar(nPos + 2) == 'P')))) 732 // (note that rText.GetChar(nPos + 3) is guaranteed to be 733 // valid) 734 { 735 xub_StrLen nUriEnd = i; 736 if (i != rEnd && rText.GetChar(i) == '/') 737 { 738 nUriEnd = ++i; 739 while (i != rEnd 740 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ; 741 } 742 if (i != rEnd && rText.GetChar(i) == '#') 743 { 744 ++i; 745 while (i != rEnd 746 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ; 747 } 748 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd) 749 || rText.GetChar(nUriEnd) == '\\') 750 { 751 INetURLObject aUri(UniString(rText, nPos, 752 nUriEnd - nPos), 753 INET_PROT_HTTP, eMechanism, 754 eCharset); 755 if (!aUri.HasError()) 756 { 757 rBegin = nPos; 758 rEnd = nUriEnd; 759 return 760 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI); 761 } 762 } 763 } 764 765 if ((eStyle & INetURLObject::FSYS_DOS) != 0 && rEnd - nPos >= 3 766 && rText.GetChar(nPos + 1) == ':' 767 && (rText.GetChar(nPos + 2) == '/' 768 || rText.GetChar(nPos + 2) == '\\')) // 7th, 8th 769 { 770 i = nPos + 3; 771 xub_StrLen nUriEnd = i; 772 while (i != rEnd 773 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ; 774 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)) 775 { 776 INetURLObject aUri(UniString(rText, nPos, 777 nUriEnd - nPos), 778 INET_PROT_FILE, 779 INetURLObject::ENCODE_ALL, 780 RTL_TEXTENCODING_UTF8, 781 INetURLObject::FSYS_DOS); 782 if (!aUri.HasError()) 783 { 784 rBegin = nPos; 785 rEnd = nUriEnd; 786 return 787 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI); 788 } 789 } 790 } 791 } 792 else if ((eStyle & INetURLObject::FSYS_DOS) != 0 && rEnd - nPos >= 2 793 && rText.GetChar(nPos) == '\\' 794 && rText.GetChar(nPos + 1) == '\\') // 6th 795 { 796 xub_StrLen i = nPos + 2; 797 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd); 798 if (nLabels >= 1 && i != rEnd && rText.GetChar(i) == '\\') 799 { 800 xub_StrLen nUriEnd = ++i; 801 while (i != rEnd 802 && checkWChar(rCharClass, rText, &i, &nUriEnd, 803 true)) ; 804 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)) 805 { 806 INetURLObject aUri(UniString(rText, nPos, 807 nUriEnd - nPos), 808 INET_PROT_FILE, 809 INetURLObject::ENCODE_ALL, 810 RTL_TEXTENCODING_UTF8, 811 INetURLObject::FSYS_DOS); 812 if (!aUri.HasError()) 813 { 814 rBegin = nPos; 815 rEnd = nUriEnd; 816 return 817 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI); 818 } 819 } 820 } 821 } 822 } 823 if (bBoundary2 && INetMIME::isAtomChar(c)) // 5th 824 { 825 bool bDot = false; 826 for (xub_StrLen i = nPos + 1; i != rEnd; ++i) 827 { 828 sal_Unicode c2 = rText.GetChar(i); 829 if (INetMIME::isAtomChar(c2)) 830 bDot = false; 831 else if (bDot) 832 break; 833 else if (c2 == '.') 834 bDot = true; 835 else 836 { 837 if (c2 == '@') 838 { 839 ++i; 840 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd); 841 if (nLabels >= 1 842 && isBoundary1(rCharClass, rText, i, rEnd)) 843 { 844 INetURLObject aUri(UniString(rText, nPos, i - nPos), 845 INET_PROT_MAILTO, 846 INetURLObject::ENCODE_ALL); 847 if (!aUri.HasError()) 848 { 849 rBegin = nPos; 850 rEnd = i; 851 return aUri.GetMainURL( 852 INetURLObject::DECODE_TO_IURI); 853 } 854 } 855 } 856 break; 857 } 858 } 859 } 860 bBoundary1 = isBoundary1(rCharClass, rText, nPos, rEnd); 861 bBoundary2 = isBoundary2(rCharClass, rText, nPos, rEnd); 862 } 863 rBegin = rEnd; 864 return UniString(); 865 } 866 867 //============================================================================ 868 // 869 // removePassword 870 // 871 //============================================================================ 872 873 UniString 874 URIHelper::removePassword(UniString const & rURI, 875 INetURLObject::EncodeMechanism eEncodeMechanism, 876 INetURLObject::DecodeMechanism eDecodeMechanism, 877 rtl_TextEncoding eCharset) 878 { 879 INetURLObject aObj(rURI, eEncodeMechanism, eCharset); 880 return aObj.HasError() ? 881 rURI : 882 String(aObj.GetURLNoPass(eDecodeMechanism, eCharset)); 883 } 884 885 //============================================================================ 886 // 887 // queryFSysStyle 888 // 889 //============================================================================ 890 891 INetURLObject::FSysStyle URIHelper::queryFSysStyle(UniString const & rFileUrl, 892 bool bAddConvenienceStyles) 893 throw (uno::RuntimeException) 894 { 895 ::ucbhelper::ContentBroker const * pBroker = ::ucbhelper::ContentBroker::get(); 896 uno::Reference< ucb::XContentProviderManager > xManager; 897 if (pBroker) 898 xManager = pBroker->getContentProviderManagerInterface(); 899 uno::Reference< beans::XPropertySet > xProperties; 900 if (xManager.is()) 901 xProperties 902 = uno::Reference< beans::XPropertySet >( 903 xManager->queryContentProvider(rFileUrl), uno::UNO_QUERY); 904 sal_Int32 nNotation = ucb::FileSystemNotation::UNKNOWN_NOTATION; 905 if (xProperties.is()) 906 try 907 { 908 xProperties->getPropertyValue(rtl::OUString( 909 RTL_CONSTASCII_USTRINGPARAM( 910 "FileSystemNotation"))) 911 >>= nNotation; 912 } 913 catch (beans::UnknownPropertyException const &) {} 914 catch (lang::WrappedTargetException const &) {} 915 916 // The following code depends on the fact that the 917 // com::sun::star::ucb::FileSystemNotation constants range from UNKNOWN to 918 // MAC, without any holes. The table below has two entries per notation, 919 // the first is used if bAddConvenienceStyles == false, while the second 920 // is used if bAddConvenienceStyles == true: 921 static INetURLObject::FSysStyle const aMap[][2] 922 = { { INetURLObject::FSysStyle(0), 923 INetURLObject::FSYS_DETECT }, 924 // UNKNOWN 925 { INetURLObject::FSYS_UNX, 926 INetURLObject::FSysStyle(INetURLObject::FSYS_VOS 927 | INetURLObject::FSYS_UNX) }, 928 // UNIX 929 { INetURLObject::FSYS_DOS, 930 INetURLObject::FSysStyle(INetURLObject::FSYS_VOS 931 | INetURLObject::FSYS_UNX 932 | INetURLObject::FSYS_DOS) }, 933 // DOS 934 { INetURLObject::FSYS_MAC, 935 INetURLObject::FSysStyle(INetURLObject::FSYS_VOS 936 | INetURLObject::FSYS_UNX 937 | INetURLObject::FSYS_MAC) } }; 938 return aMap[nNotation < ucb::FileSystemNotation::UNKNOWN_NOTATION 939 || nNotation > ucb::FileSystemNotation::MAC_NOTATION ? 940 0 : 941 nNotation 942 - ucb::FileSystemNotation::UNKNOWN_NOTATION] 943 [bAddConvenienceStyles]; 944 } 945