1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_ucb.hxx" 26 #include <regexp.hxx> 27 28 #include <cstddef> 29 30 #include "osl/diagnose.h" 31 #include <com/sun/star/lang/IllegalArgumentException.hpp> 32 #include <rtl/ustrbuf.hxx> 33 #include <rtl/ustring.hxx> 34 35 namespace unnamed_ucb_regexp {} using namespace unnamed_ucb_regexp; 36 // unnamed namespaces don't work well yet... 37 38 using namespace com::sun::star; 39 using namespace ucb_impl; 40 41 //============================================================================ 42 // 43 // Regexp 44 // 45 //============================================================================ 46 47 inline Regexp::Regexp(Kind eTheKind, rtl::OUString const & rThePrefix, 48 bool bTheEmptyDomain, rtl::OUString const & rTheInfix, 49 bool bTheTranslation, 50 rtl::OUString const & rTheReversePrefix): 51 m_eKind(eTheKind), 52 m_aPrefix(rThePrefix), 53 m_aInfix(rTheInfix), 54 m_aReversePrefix(rTheReversePrefix), 55 m_bEmptyDomain(bTheEmptyDomain), 56 m_bTranslation(bTheTranslation) 57 { 58 OSL_ASSERT(m_eKind == KIND_DOMAIN 59 || !m_bEmptyDomain && m_aInfix.getLength() == 0); 60 OSL_ASSERT(m_bTranslation || m_aReversePrefix.getLength() == 0); 61 } 62 63 //============================================================================ 64 namespace unnamed_ucb_regexp { 65 66 bool matchStringIgnoreCase(sal_Unicode const ** pBegin, 67 sal_Unicode const * pEnd, 68 rtl::OUString const & rString) 69 { 70 sal_Unicode const * p = *pBegin; 71 72 sal_Unicode const * q = rString.getStr(); 73 sal_Unicode const * qEnd = q + rString.getLength(); 74 75 if (pEnd - p < qEnd - q) 76 return false; 77 78 while (q != qEnd) 79 { 80 sal_Unicode c1 = *p++; 81 sal_Unicode c2 = *q++; 82 if (c1 >= 'a' && c1 <= 'z') 83 c1 -= 'a' - 'A'; 84 if (c2 >= 'a' && c2 <= 'z') 85 c2 -= 'a' - 'A'; 86 if (c1 != c2) 87 return false; 88 } 89 90 *pBegin = p; 91 return true; 92 } 93 94 } 95 96 bool Regexp::matches(rtl::OUString const & rString, 97 rtl::OUString * pTranslation, bool * pTranslated) const 98 { 99 sal_Unicode const * pBegin = rString.getStr(); 100 sal_Unicode const * pEnd = pBegin + rString.getLength(); 101 102 bool bMatches = false; 103 104 sal_Unicode const * p = pBegin; 105 if (matchStringIgnoreCase(&p, pEnd, m_aPrefix)) 106 { 107 sal_Unicode const * pBlock1Begin = p; 108 sal_Unicode const * pBlock1End = pEnd; 109 110 sal_Unicode const * pBlock2Begin = 0; 111 sal_Unicode const * pBlock2End = 0; 112 113 switch (m_eKind) 114 { 115 case KIND_PREFIX: 116 bMatches = true; 117 break; 118 119 case KIND_AUTHORITY: 120 bMatches = p == pEnd || *p == '/' || *p == '?' || *p == '#'; 121 break; 122 123 case KIND_DOMAIN: 124 if (!m_bEmptyDomain) 125 { 126 if (p == pEnd || *p == '/' || *p == '?' || *p == '#') 127 break; 128 ++p; 129 } 130 for (;;) 131 { 132 sal_Unicode const * q = p; 133 if (matchStringIgnoreCase(&q, pEnd, m_aInfix) 134 && (q == pEnd || *q == '/' || *q == '?' || *q == '#')) 135 { 136 bMatches = true; 137 pBlock1End = p; 138 pBlock2Begin = q; 139 pBlock2End = pEnd; 140 break; 141 } 142 143 if (p == pEnd) 144 break; 145 146 sal_Unicode c = *p++; 147 if (c == '/' || c == '?' || c == '#') 148 break; 149 } 150 break; 151 } 152 153 if (bMatches) 154 { 155 if (m_bTranslation) 156 { 157 if (pTranslation) 158 { 159 rtl::OUStringBuffer aBuffer(m_aReversePrefix); 160 aBuffer.append(pBlock1Begin, pBlock1End - pBlock1Begin); 161 aBuffer.append(m_aInfix); 162 aBuffer.append(pBlock2Begin, pBlock2End - pBlock2Begin); 163 *pTranslation = aBuffer.makeStringAndClear(); 164 } 165 if (pTranslated) 166 *pTranslated = true; 167 } 168 else 169 { 170 if (pTranslation) 171 *pTranslation = rString; 172 if (pTranslated) 173 *pTranslated = false; 174 } 175 } 176 } 177 178 return bMatches; 179 } 180 181 //============================================================================ 182 namespace unnamed_ucb_regexp { 183 184 inline bool isAlpha(sal_Unicode c) 185 { 186 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); 187 } 188 189 inline bool isDigit(sal_Unicode c) 190 { 191 return c >= '0' && c <= '9'; 192 } 193 194 bool isScheme(rtl::OUString const & rString, bool bColon) 195 { 196 // Return true if rString matches <scheme> (plus a trailing ":" if bColon 197 // is true) from RFC 2396: 198 sal_Unicode const * p = rString.getStr(); 199 sal_Unicode const * pEnd = p + rString.getLength(); 200 if (p != pEnd && isAlpha(*p)) 201 for (++p;;) 202 { 203 if (p == pEnd) 204 return !bColon; 205 sal_Unicode c = *p++; 206 if (!(isAlpha(c) || isDigit(c) 207 || c == '+' || c == '-' || c == '.')) 208 return bColon && c == ':' && p == pEnd; 209 } 210 return false; 211 } 212 213 void appendStringLiteral(rtl::OUStringBuffer * pBuffer, 214 rtl::OUString const & rString) 215 { 216 OSL_ASSERT(pBuffer); 217 218 pBuffer->append(sal_Unicode('"')); 219 sal_Unicode const * p = rString.getStr(); 220 sal_Unicode const * pEnd = p + rString.getLength(); 221 while (p != pEnd) 222 { 223 sal_Unicode c = *p++; 224 if (c == '"' || c == '\\') 225 pBuffer->append(sal_Unicode('\\')); 226 pBuffer->append(c); 227 } 228 pBuffer->append(sal_Unicode('"')); 229 } 230 231 } 232 233 rtl::OUString Regexp::getRegexp(bool bReverse) const 234 { 235 if (m_bTranslation) 236 { 237 rtl::OUStringBuffer aBuffer; 238 if (bReverse) 239 { 240 if (m_aReversePrefix.getLength() != 0) 241 appendStringLiteral(&aBuffer, m_aReversePrefix); 242 } 243 else 244 { 245 if (m_aPrefix.getLength() != 0) 246 appendStringLiteral(&aBuffer, m_aPrefix); 247 } 248 switch (m_eKind) 249 { 250 case KIND_PREFIX: 251 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("(.*)")); 252 break; 253 254 case KIND_AUTHORITY: 255 aBuffer. 256 appendAscii(RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)")); 257 break; 258 259 case KIND_DOMAIN: 260 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([^/?#]")); 261 aBuffer.append(sal_Unicode(m_bEmptyDomain ? '*' : '+')); 262 if (m_aInfix.getLength() != 0) 263 appendStringLiteral(&aBuffer, m_aInfix); 264 aBuffer. 265 appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?)")); 266 break; 267 } 268 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("->")); 269 if (bReverse) 270 { 271 if (m_aPrefix.getLength() != 0) 272 appendStringLiteral(&aBuffer, m_aPrefix); 273 } 274 else 275 { 276 if (m_aReversePrefix.getLength() != 0) 277 appendStringLiteral(&aBuffer, m_aReversePrefix); 278 } 279 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("\\1")); 280 return aBuffer.makeStringAndClear(); 281 } 282 else if (m_eKind == KIND_PREFIX && isScheme(m_aPrefix, true)) 283 return m_aPrefix.copy(0, m_aPrefix.getLength() - 1); 284 else 285 { 286 rtl::OUStringBuffer aBuffer; 287 if (m_aPrefix.getLength() != 0) 288 appendStringLiteral(&aBuffer, m_aPrefix); 289 switch (m_eKind) 290 { 291 case KIND_PREFIX: 292 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM(".*")); 293 break; 294 295 case KIND_AUTHORITY: 296 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")); 297 break; 298 299 case KIND_DOMAIN: 300 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("[^/?#]")); 301 aBuffer.append(sal_Unicode(m_bEmptyDomain ? '*' : '+')); 302 if (m_aInfix.getLength() != 0) 303 appendStringLiteral(&aBuffer, m_aInfix); 304 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")); 305 break; 306 } 307 return aBuffer.makeStringAndClear(); 308 } 309 } 310 311 //============================================================================ 312 namespace unnamed_ucb_regexp { 313 314 bool matchString(sal_Unicode const ** pBegin, sal_Unicode const * pEnd, 315 sal_Char const * pString, size_t nStringLength) 316 { 317 sal_Unicode const * p = *pBegin; 318 319 sal_uChar const * q = reinterpret_cast< sal_uChar const * >(pString); 320 sal_uChar const * qEnd = q + nStringLength; 321 322 if (pEnd - p < qEnd - q) 323 return false; 324 325 while (q != qEnd) 326 { 327 sal_Unicode c1 = *p++; 328 sal_Unicode c2 = *q++; 329 if (c1 != c2) 330 return false; 331 } 332 333 *pBegin = p; 334 return true; 335 } 336 337 bool scanStringLiteral(sal_Unicode const ** pBegin, sal_Unicode const * pEnd, 338 rtl::OUString * pString) 339 { 340 sal_Unicode const * p = *pBegin; 341 342 if (p == pEnd || *p++ != '"') 343 return false; 344 345 rtl::OUStringBuffer aBuffer; 346 for (;;) 347 { 348 if (p == pEnd) 349 return false; 350 sal_Unicode c = *p++; 351 if (c == '"') 352 break; 353 if (c == '\\') 354 { 355 if (p == pEnd) 356 return false; 357 c = *p++; 358 if (c != '"' && c != '\\') 359 return false; 360 } 361 aBuffer.append(c); 362 } 363 364 *pBegin = p; 365 *pString = aBuffer.makeStringAndClear(); 366 return true; 367 } 368 369 } 370 371 Regexp Regexp::parse(rtl::OUString const & rRegexp) 372 { 373 // Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*' 374 // where <scheme> is as defined in RFC 2396: 375 if (isScheme(rRegexp, false)) 376 return Regexp(Regexp::KIND_PREFIX, 377 rRegexp 378 + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM(":")), 379 false, 380 rtl::OUString(), 381 false, 382 rtl::OUString()); 383 384 sal_Unicode const * p = rRegexp.getStr(); 385 sal_Unicode const * pEnd = p + rRegexp.getLength(); 386 387 rtl::OUString aPrefix; 388 scanStringLiteral(&p, pEnd, &aPrefix); 389 390 if (p == pEnd) 391 throw lang::IllegalArgumentException(); 392 393 if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(".*"))) 394 { 395 if (p != pEnd) 396 throw lang::IllegalArgumentException(); 397 398 return Regexp(Regexp::KIND_PREFIX, aPrefix, false, rtl::OUString(), 399 false, rtl::OUString()); 400 } 401 else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("(.*)->"))) 402 { 403 rtl::OUString aReversePrefix; 404 scanStringLiteral(&p, pEnd, &aReversePrefix); 405 406 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1")) 407 || p != pEnd) 408 throw lang::IllegalArgumentException(); 409 410 return Regexp(Regexp::KIND_PREFIX, aPrefix, false, rtl::OUString(), 411 true, aReversePrefix); 412 } 413 else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"))) 414 { 415 if (p != pEnd) 416 throw lang::IllegalArgumentException(); 417 418 return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, rtl::OUString(), 419 false, rtl::OUString()); 420 } 421 else if (matchString(&p, pEnd, 422 RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->"))) 423 { 424 rtl::OUString aReversePrefix; 425 if (!(scanStringLiteral(&p, pEnd, &aReversePrefix) 426 && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1")) 427 && p == pEnd)) 428 throw lang::IllegalArgumentException(); 429 430 return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, rtl::OUString(), 431 true, aReversePrefix); 432 } 433 else 434 { 435 bool bOpen = false; 436 if (p != pEnd && *p == '(') 437 { 438 ++p; 439 bOpen = true; 440 } 441 442 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("[^/?#]"))) 443 throw lang::IllegalArgumentException(); 444 445 if (p == pEnd || (*p != '*' && *p != '+')) 446 throw lang::IllegalArgumentException(); 447 bool bEmptyDomain = *p++ == '*'; 448 449 rtl::OUString aInfix; 450 scanStringLiteral(&p, pEnd, &aInfix); 451 452 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"))) 453 throw lang::IllegalArgumentException(); 454 455 rtl::OUString aReversePrefix; 456 if (bOpen 457 && !(matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(")->")) 458 && scanStringLiteral(&p, pEnd, &aReversePrefix) 459 && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1")))) 460 throw lang::IllegalArgumentException(); 461 462 if (p != pEnd) 463 throw lang::IllegalArgumentException(); 464 465 return Regexp(Regexp::KIND_DOMAIN, aPrefix, bEmptyDomain, aInfix, 466 bOpen, aReversePrefix); 467 } 468 } 469 470