xref: /AOO41X/main/tools/source/fsys/urlobj.cxx (revision 89b56da77b74925c286b3e777681ba8dda16bf41)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_tools.hxx"
26 #include <tools/urlobj.hxx>
27 #include <tools/debug.hxx>
28 #include <tools/inetmime.hxx>
29 #include "com/sun/star/uno/Reference.hxx"
30 #include "com/sun/star/util/XStringWidth.hpp"
31 #include "osl/diagnose.h"
32 #include "osl/file.hxx"
33 #include "rtl/string.h"
34 #include "rtl/textenc.h"
35 #include "rtl/ustring.hxx"
36 #include "sal/types.h"
37 
38 #ifndef INCLUDED_ALGORITHM
39 #include <algorithm>
40 #define INCLUDED_ALGORITHM
41 #endif
42 #ifndef INCLUDED_LIMITS
43 #include <limits>
44 #define INCLUDED_LIMITS
45 #endif
46 
47 #include <string.h>
48 
49 namespace unnamed_tools_urlobj {} using namespace unnamed_tools_urlobj;
50     // unnamed namespaces don't work well yet...
51 
52 using namespace com::sun;
53 
54 //============================================================================
55 //
56 //  INetURLObject
57 //
58 //============================================================================
59 
60 /* The URI grammar (using RFC 2234 conventions).
61 
62    Constructs of the form
63        {reference <rule1> using rule2}
64    stand for a rule matching the given rule1 specified in the given reference,
65    encoded to URI syntax using rule2 (as specified in this URI grammar).
66 
67 
68    ; RFC 1738, RFC 2396, RFC 2732, private
69    login = [user [":" password] "@"] hostport
70    user = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
71    password = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
72    hostport = host [":" port]
73    host = incomplete-hostname / hostname / IPv4address / IPv6reference
74    incomplete-hostname = *(domainlabel ".") domainlabel
75    hostname = *(domainlabel ".") toplabel ["."]
76    domainlabel = alphanum [*(alphanum / "-") alphanum]
77    toplabel = ALPHA [*(alphanum / "-") alphanum]
78    IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
79    IPv6reference = "[" hexpart [":" IPv4address] "]"
80    hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
81    hexseq = hex4 *(":" hex4)
82    hex4 = 1*4HEXDIG
83    port = *DIGIT
84    escaped = "%" HEXDIG HEXDIG
85    reserved = "$" / "&" / "+" / "," / "/" / ":" / ";" / "=" / "?" / "@" / "[" / "]"
86    mark = "!" / "'" / "(" / ")" / "*" / "-" / "." / "_" / "~"
87    alphanum = ALPHA / DIGIT
88    unreserved = alphanum / mark
89    uric = escaped / reserved / unreserved
90    pchar = escaped / unreserved / "$" / "&" / "+" / "," / ":" / "=" / "@"
91 
92 
93    ; RFC 1738, RFC 2396
94    ftp-url = "FTP://" login ["/" segment *("/" segment) [";TYPE=" ("A" / "D" / "I")]]
95    segment = *pchar
96 
97 
98    ; RFC 1738, RFC 2396
99    http-url = "HTTP://" hostport ["/" segment *("/" segment) ["?" *uric]]
100    segment = *(pchar / ";")
101 
102 
103    ; RFC 1738, RFC 2396, <http://support.microsoft.com/default.aspx?scid=KB;EN-US;Q188997&>
104    file-url = "FILE://" [host / "LOCALHOST" / netbios-name] ["/" segment *("/" segment)]
105    segment = *pchar
106    netbios-name = 1*{<alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "^" / "_" / "{" / "}" / "~"> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "_" / "~")}
107 
108 
109    ; RFC 2368, RFC 2396
110    mailto-url = "MAILTO:" [to] [headers]
111    to = {RFC 822 <#mailbox> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
112    headers = "?" header *("&" header)
113    header = hname "=" hvalue
114    hname = {RFC 822 <field-name> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} / "BODY"
115    hvalue = {RFC 822 <field-body> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
116 
117 
118    ; private (see RFC 1738, RFC 2396)
119    vnd-sun-star-webdav-url = "VND.SUN.STAR.WEBDAV://" hostport ["/" segment *("/" segment) ["?" *uric]]
120    segment = *(pchar / ";")
121 
122 
123    ; RFC 1738, RFC 2396, RFC 2732
124    news-url = "NEWS:" grouppart
125    grouppart = "*" / group / article
126    group = alpha *(alphanum / "+" / "-" / "." / "_")
127    article = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "?" / "_" / "~") "@" host
128 
129 
130    ; private
131    private-url = "PRIVATE:" path ["?" *uric]
132    path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
133 
134 
135    ; private
136    vnd-sun-star-help-url = "VND.SUN.STAR.HELP://" name *("/" segment) ["?" *uric]
137    name = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
138    segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
139 
140 
141    ; private
142    https-url = "HTTPS://" hostport ["/" segment *("/" segment) ["?" *uric]]
143    segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
144 
145 
146    ; private
147    slot-url = "SLOT:" path ["?" *uric]
148    path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
149 
150 
151    ; private
152    macro-url = "MACRO:" path ["?" *uric]
153    path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
154 
155 
156    ; private
157    javascript-url = "JAVASCRIPT:" *uric
158 
159 
160    ; private (see RFC 2192)
161    imap-url = "IMAP://" user [";AUTH=" auth] "@" hostport "/" segment *("/" segment) ["/;UID=" nz_number]
162    user = 1*{RFC 2060 <CHAR8> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "=" / "_" / "~")}
163    auth = {RFC 2060 <atom> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "+" / "," / "-" / "." / "=" / "_" / "~")}
164    segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / "=" / "@" / "_" / "~")
165    nz_number = {RFC 2060 <nz_number> using *DIGIT}
166 
167 
168    ; private
169    pop3-url = "POP3://" login ["/" ["<" *uric ">"]]
170 
171 
172    ; RFC 2397
173    data-url = "DATA:" [mediatype] [";BASE64"] "," *uric
174    mediatype = [type "/" subtype] *(";" attribute "=" value)
175    type = {RFC 2045 <type> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
176    subtype = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
177    attribute = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
178    value = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
179 
180 
181    ; RFC 2392, RFC 2396
182    cid-url = "CID:" {RFC 822 <addr-spec> using *uric}
183 
184 
185    ; private
186    out-url = "OUT:///~" name ["/" *uric]
187    name = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "?" / "@" / "_" / "~"
188 
189 
190    ; private
191    vnd-sun-star-hier-url = "VND.SUN.STAR.HIER:" ["//"reg_name] *("/" *pchar)
192    reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
193 
194    ; private
195    vim-url = "VIM://" +vimc [":" *vimc] ["/" [("INBOX" message) / ("NEWSGROUPS" ["/" [+vimc message]])]]
196    message = ["/" [+vimc [":" +DIGIT "." +DIGIT "." +DIGIT]]]
197    vimc = ("=" HEXDIG HEXDIG) / alphanum
198 
199 
200    ; private
201    uno-url = ".UNO:" path ["?" *uric]
202    path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
203 
204 
205    ; private
206    component-url = ".COMPONENT:" path ["?" *uric]
207    path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
208 
209 
210    ; private
211    vnd-sun-star-pkg-url = "VND.SUN.STAR.PKG://" reg_name *("/" *pchar) ["?" *uric]
212    reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
213 
214 
215    ; RFC 2255
216    ldap-url = "LDAP://" [hostport] ["/" [dn ["?" [attrdesct *("," attrdesc)] ["?" ["base" / "one" / "sub"] ["?" [filter] ["?" extension *("," extension)]]]]]]
217    dn = {RFC 2253 <distinguishedName> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
218    attrdesc = {RFC 2251 <AttributeDescription> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
219    filter = {RFC 2254 <filter> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
220    extension = ["!"] ["X-"] extoken ["=" exvalue]
221    extoken = {RFC 2252 <oid> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
222    exvalue = {RFC 2251 <LDAPString> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
223 
224 
225    ; private
226    db-url = "DB:" *uric
227 
228 
229    ; private
230    vnd-sun-star-cmd-url = "VND.SUN.STAR.CMD:" opaque_part
231    opaque_part = uric_no_slash *uric
232    uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
233 
234 
235    ; private
236    vnd-sun-star-url = "VND.SUN.STAR.ODMA:" ["/" *uric_no_slash]
237    uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
238 
239 
240    ; RFC 1738
241    telnet-url = "TELNET://" login ["/"]
242 
243 
244    ; private
245    vnd-sun-star-expand-url = "VND.SUN.STAR.EXPAND:" opaque_part
246    opaque_part = uric_no_slash *uric
247    uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
248 
249 
250    ; private
251    vnd-sun-star-tdoc-url = "VND.SUN.STAR.TDOC:/" segment *("/" segment)
252    segment = *pchar
253 
254 
255    ; private
256    unknown-url = scheme ":" 1*uric
257    scheme = ALPHA *(alphanum / "+" / "-" / ".")
258 
259 
260    ; private (http://ubiqx.org/cifs/Appendix-D.html):
261    smb-url = "SMB://" login ["/" segment *("/" segment) ["?" *uric]]
262    segment = *(pchar / ";")
263  */
264 
265 //============================================================================
clear()266 inline sal_Int32 INetURLObject::SubString::clear()
267 {
268     sal_Int32 nDelta = -m_nLength;
269     m_nBegin = -1;
270     m_nLength = 0;
271     return nDelta;
272 }
273 
set(rtl::OUStringBuffer & rString,rtl::OUString const & rSubString)274 inline sal_Int32 INetURLObject::SubString::set(rtl::OUStringBuffer & rString,
275                                        rtl::OUString const & rSubString)
276 {
277     rtl::OUString sTemp(rString.makeStringAndClear());
278     sal_Int32 nDelta = set(sTemp, rSubString);
279     rString.append(sTemp);
280     return nDelta;
281 }
282 
set(rtl::OUString & rString,rtl::OUString const & rSubString)283 inline sal_Int32 INetURLObject::SubString::set(rtl::OUString & rString,
284                                        rtl::OUString const & rSubString)
285 {
286     sal_Int32 nDelta = rSubString.getLength() - m_nLength;
287 
288     rString = rString.replaceAt(m_nBegin, m_nLength, rSubString);
289 
290     m_nLength = rSubString.getLength();
291     return nDelta;
292 }
293 
set(rtl::OUStringBuffer & rString,rtl::OUString const & rSubString,sal_Int32 nTheBegin)294 inline sal_Int32 INetURLObject::SubString::set(rtl::OUStringBuffer & rString,
295                                        rtl::OUString const & rSubString,
296                                                sal_Int32 nTheBegin)
297 {
298     m_nBegin = nTheBegin;
299     return set(rString, rSubString);
300 }
301 
302 //============================================================================
operator +=(sal_Int32 nDelta)303 inline void INetURLObject::SubString::operator +=(sal_Int32 nDelta)
304 {
305     if (isPresent())
306         m_nBegin = m_nBegin + nDelta;
307 }
308 
309 //============================================================================
compare(SubString const & rOther,rtl::OUStringBuffer const & rThisString,rtl::OUStringBuffer const & rOtherString) const310 int INetURLObject::SubString::compare(SubString const & rOther,
311                                       rtl::OUStringBuffer const & rThisString,
312                                       rtl::OUStringBuffer const & rOtherString) const
313 {
314     sal_Int32 len = std::min(m_nLength, rOther.m_nLength);
315     sal_Unicode const * p1 = rThisString.getStr() + m_nBegin;
316     sal_Unicode const * end = p1 + len;
317     sal_Unicode const * p2 = rOtherString.getStr() + rOther.m_nBegin;
318     while (p1 != end) {
319         if (*p1 < *p2) {
320             return -1;
321         } else if (*p1 > *p2) {
322             return 1;
323         }
324         ++p1;
325         ++p2;
326     }
327     return m_nLength < rOther.m_nLength ? -1
328         : m_nLength > rOther.m_nLength ? 1
329         : 0;
330 }
331 
332 //============================================================================
333 struct INetURLObject::SchemeInfo
334 {
335     sal_Char const * m_pScheme;
336     sal_Char const * m_pPrefix;
337     sal_uInt16 m_nDefaultPort;
338     bool m_bAuthority;
339     bool m_bUser;
340     bool m_bAuth;
341     bool m_bPassword;
342     bool m_bHost;
343     bool m_bPort;
344     bool m_bHierarchical;
345     bool m_bQuery;
346 };
347 
348 //============================================================================
349 struct INetURLObject::PrefixInfo
350 {
351     enum Kind { OFFICIAL, INTERNAL, EXTERNAL, ALIAS }; // order is important!
352 
353     sal_Char const * m_pPrefix;
354     sal_Char const * m_pTranslatedPrefix;
355     INetProtocol m_eScheme;
356     Kind m_eKind;
357 };
358 
359 //============================================================================
360 static INetURLObject::SchemeInfo const aSchemeInfoMap[INET_PROT_END]
361     = { { "", "", 0, false, false, false, false, false, false, false,
362           false },
363         { "ftp", "ftp://", 21, true, true, false, true, true, true, true,
364           false },
365         { "http", "http://", 80, true, false, false, false, true, true,
366           true, true },
367         { "file", "file://", 0, true, false, false, false, true, false,
368           true, false },
369         { "mailto", "mailto:", 0, false, false, false, false, false,
370           false, false, true },
371         { "vnd.sun.star.webdav", "vnd.sun.star.webdav://", 80, true, false,
372           false, false, true, true, true, true },
373         { "news", "news:", 0, false, false, false, false, false, false, false,
374           false },
375         { "private", "private:", 0, false, false, false, false, false,
376           false, false, true },
377         { "vnd.sun.star.help", "vnd.sun.star.help://", 0, true, false, false,
378           false, false, false, true, true },
379         { "https", "https://", 443, true, false, false, false, true, true,
380           true, true },
381         { "slot", "slot:", 0, false, false, false, false, false, false,
382           false, true },
383         { "macro", "macro:", 0, false, false, false, false, false, false,
384           false, true },
385         { "javascript", "javascript:", 0, false, false, false, false,
386           false, false, false, false },
387         { "imap", "imap://", 143, true, true, true, false, true, true,
388           true, false },
389         { "pop3", "pop3://", 110, true, true, false, true, true, true,
390           false, false },
391         { "data", "data:", 0, false, false, false, false, false, false,
392           false, false },
393         { "cid", "cid:", 0, false, false, false, false, false, false,
394           false, false },
395         { "out", "out://", 0, true, false, false, false, false, false,
396           false, false },
397         { "vnd.sun.star.hier", "vnd.sun.star.hier:", 0, true, false, false,
398           false, false, false, true, false },
399         { "vim", "vim://", 0, true, true, false, true, false, false, true,
400           false },
401         { ".uno", ".uno:", 0, false, false, false, false, false, false,
402           false, true },
403         { ".component", ".component:", 0, false, false, false, false,
404           false, false, false, true },
405         { "vnd.sun.star.pkg", "vnd.sun.star.pkg://", 0, true, false, false,
406           false, false, false, true, true },
407         { "ldap", "ldap://", 389, true, false, false, false, true, true,
408           false, true },
409         { "db", "db:", 0, false, false, false, false, false, false, false,
410           false },
411         { "vnd.sun.star.cmd", "vnd.sun.star.cmd:", 0, false, false, false,
412           false, false, false, false, false },
413         { "vnd.sun.star.odma", "vnd.sun.star.odma:", 0, false, false, false,
414           false, false, false, true, false },
415         { "telnet", "telnet://", 23, true, true, false, true, true, true, true,
416           false },
417         { "vnd.sun.star.expand", "vnd.sun.star.expand:", 0, false, false, false,
418           false, false, false, false, false },
419         { "vnd.sun.star.tdoc", "vnd.sun.star.tdoc:", 0, false, false, false,
420           false, false, false, true, false },
421         { "", "", 0, false, false, false, false, true, true, true, false },
422         { "smb", "smb://", 139, true, true, false, true, true, true, true,
423           true },
424         { "hid", "hid:", 0, false, false, false, false, false, false,
425         false, true } };
426 
427 // static
428 inline INetURLObject::SchemeInfo const &
getSchemeInfo(INetProtocol eTheScheme)429 INetURLObject::getSchemeInfo(INetProtocol eTheScheme)
430 {
431     return aSchemeInfoMap[eTheScheme];
432 };
433 
434 //============================================================================
getSchemeInfo() const435 inline INetURLObject::SchemeInfo const & INetURLObject::getSchemeInfo() const
436 {
437     return getSchemeInfo(m_eScheme);
438 }
439 
440 //============================================================================
441 // static
appendEscape(rtl::OUStringBuffer & rTheText,sal_Char cEscapePrefix,sal_uInt32 nOctet)442 inline void INetURLObject::appendEscape(rtl::OUStringBuffer & rTheText,
443                                         sal_Char cEscapePrefix,
444                                         sal_uInt32 nOctet)
445 {
446     rTheText.append(sal_Unicode(cEscapePrefix));
447     rTheText.append(sal_Unicode(INetMIME::getHexDigit(int(nOctet >> 4))));
448     rTheText.append(sal_Unicode(INetMIME::getHexDigit(int(nOctet & 15))));
449 }
450 
451 //============================================================================
452 namespace unnamed_tools_urlobj {
453 
454 enum
455 {
456     PA = INetURLObject::PART_OBSOLETE_NORMAL,
457     PB = INetURLObject::PART_OBSOLETE_FILE,
458     PC = INetURLObject::PART_OBSOLETE_PARAM,
459     PD = INetURLObject::PART_USER_PASSWORD,
460     PE = INetURLObject::PART_IMAP_ACHAR,
461     PF = INetURLObject::PART_VIM,
462     PG = INetURLObject::PART_HOST_EXTRA,
463     PH = INetURLObject::PART_FPATH,
464     PI = INetURLObject::PART_AUTHORITY,
465     PJ = INetURLObject::PART_PATH_SEGMENTS_EXTRA,
466     PK = INetURLObject::PART_REL_SEGMENT_EXTRA,
467     PL = INetURLObject::PART_URIC,
468     PM = INetURLObject::PART_HTTP_PATH,
469     PN = INetURLObject::PART_FILE_SEGMENT_EXTRA,
470     PO = INetURLObject::PART_MESSAGE_ID,
471     PP = INetURLObject::PART_MESSAGE_ID_PATH,
472     PQ = INetURLObject::PART_MAILTO,
473     PR = INetURLObject::PART_PATH_BEFORE_QUERY,
474     PS = INetURLObject::PART_PCHAR,
475     PT = INetURLObject::PART_FRAGMENT,
476     PU = INetURLObject::PART_VISIBLE,
477     PV = INetURLObject::PART_VISIBLE_NONSPECIAL,
478     PW = INetURLObject::PART_CREATEFRAGMENT,
479     PX = INetURLObject::PART_UNO_PARAM_VALUE,
480     PY = INetURLObject::PART_UNAMBIGUOUS,
481     PZ = INetURLObject::PART_URIC_NO_SLASH,
482     P1 = INetURLObject::PART_HTTP_QUERY,
483     P2 = INetURLObject::PART_NEWS_ARTICLE_LOCALPART
484 };
485 
486 static sal_uInt32 const aMustEncodeMap[128]
487     = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 /*   */                                                                         PY,
490 /* ! */       PC+PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
491 /* " */                                                             PU+PV      +PY,
492 /* # */                                                             PU,
493 /* $ */          PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
494 /* % */                                                             PU,
495 /* & */ PA+PB+PC+PD+PE      +PH+PI+PJ+PK+PL+PM+PN+PO+PP   +PR+PS+PT+PU+PV+PW+PX   +PZ+P1+P2,
496 /* ' */          PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
497 /* ( */          PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
498 /* ) */          PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
499 /* * */ PA+PB+PC+PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
500 /* + */ PA+PB+PC+PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX   +PZ+P1+P2,
501 /* , */ PA+PB+PC+PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW      +PZ+P1+P2,
502 /* - */ PA+PB+PC+PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
503 /* . */ PA+PB+PC+PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
504 /* / */ PA+PB+PC            +PH   +PJ   +PL+PM      +PP+PQ+PR   +PT+PU+PV   +PX         +P2,
505 /* 0 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
506 /* 1 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
507 /* 2 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
508 /* 3 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
509 /* 4 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
510 /* 5 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
511 /* 6 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
512 /* 7 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
513 /* 8 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
514 /* 9 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
515 /* : */    PB+PC            +PH+PI+PJ   +PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX   +PZ+P1+P2,
516 /* ; */       PC+PD            +PI+PJ+PK+PL+PM   +PO+PP+PQ+PR   +PT+PU   +PW      +PZ+P1+P2,
517 /* < */       PC                                 +PO+PP            +PU+PV      +PY,
518 /* = */ PA+PB+PC+PD+PE      +PH+PI+PJ+PK+PL+PM+PN         +PR+PS+PT+PU+PV+PW      +PZ+P1+P2,
519 /* > */       PC                                 +PO+PP            +PU+PV      +PY,
520 /* ? */       PC                        +PL                     +PT+PU   +PW+PX   +PZ   +P2,
521 /* @ */       PC            +PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1,
522 /* A */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
523 /* B */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
524 /* C */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
525 /* D */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
526 /* E */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
527 /* F */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
528 /* G */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
529 /* H */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
530 /* I */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
531 /* J */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
532 /* K */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
533 /* L */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
534 /* M */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
535 /* N */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
536 /* O */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
537 /* P */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
538 /* Q */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
539 /* R */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
540 /* S */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
541 /* T */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
542 /* U */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
543 /* V */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
544 /* W */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
545 /* X */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
546 /* Y */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
547 /* Z */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
548 /* [ */                                  PL                        +PU+PV   +PX,
549 /* \ */    PB                                                      +PU+PV      +PY,
550 /* ] */                                  PL                        +PU+PV   +PX,
551 /* ^ */                                                             PU+PV      +PY,
552 /* _ */ PA+PB+PC+PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
553 /* ` */                                                             PU+PV      +PY,
554 /* a */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
555 /* b */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
556 /* c */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
557 /* d */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
558 /* e */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
559 /* f */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
560 /* g */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
561 /* h */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
562 /* i */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
563 /* j */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
564 /* k */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
565 /* l */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
566 /* m */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
567 /* n */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
568 /* o */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
569 /* p */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
570 /* q */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
571 /* r */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
572 /* s */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
573 /* t */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
574 /* u */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
575 /* v */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
576 /* w */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
577 /* x */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
578 /* y */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
579 /* z */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
580 /* { */                                                             PU+PV      +PY,
581 /* | */    PB+PC                              +PN               +PT+PU+PV      +PY,
582 /* } */                                                             PU+PV      +PY,
583 /* ~ */ PA+PB+PC+PD+PE      +PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ  +P2,
584         0 };
585 
mustEncode(sal_uInt32 nUTF32,INetURLObject::Part ePart)586 inline bool mustEncode(sal_uInt32 nUTF32, INetURLObject::Part ePart)
587 {
588     return !INetMIME::isUSASCII(nUTF32) || !(aMustEncodeMap[nUTF32] & ePart);
589 }
590 
591 }
592 
593 //============================================================================
setInvalid()594 void INetURLObject::setInvalid()
595 {
596     m_aAbsURIRef.setLength(0);
597     m_eScheme = INET_PROT_NOT_VALID;
598     m_aScheme.clear();
599     m_aUser.clear();
600     m_aAuth.clear();
601     m_aHost.clear();
602     m_aPort.clear();
603     m_aPath.clear();
604     m_aQuery.clear();
605     m_aFragment.clear();
606 }
607 
608 //============================================================================
609 
610 namespace unnamed_tools_urlobj {
611 
612 INetURLObject::FSysStyle
guessFSysStyleByCounting(sal_Unicode const * pBegin,sal_Unicode const * pEnd,INetURLObject::FSysStyle eStyle)613 guessFSysStyleByCounting(sal_Unicode const * pBegin,
614                          sal_Unicode const * pEnd,
615                          INetURLObject::FSysStyle eStyle)
616 {
617     DBG_ASSERT(eStyle
618                    & (INetURLObject::FSYS_UNX
619                           | INetURLObject::FSYS_DOS
620                           | INetURLObject::FSYS_MAC),
621                "guessFSysStyleByCounting(): Bad style");
622     DBG_ASSERT(std::numeric_limits< sal_Int32 >::min() < pBegin - pEnd
623                && pEnd - pBegin <= std::numeric_limits< sal_Int32 >::max(),
624                "guessFSysStyleByCounting(): Too big");
625     sal_Int32 nSlashCount
626         = eStyle & INetURLObject::FSYS_UNX ?
627               0 : std::numeric_limits< sal_Int32 >::min();
628     sal_Int32 nBackslashCount
629         = eStyle & INetURLObject::FSYS_DOS ?
630               0 : std::numeric_limits< sal_Int32 >::min();
631     sal_Int32 nColonCount
632         = eStyle & INetURLObject::FSYS_MAC ?
633               0 : std::numeric_limits< sal_Int32 >::min();
634     while (pBegin != pEnd)
635         switch (*pBegin++)
636         {
637             case '/':
638                 ++nSlashCount;
639                 break;
640 
641             case '\\':
642                 ++nBackslashCount;
643                 break;
644 
645             case ':':
646                 ++nColonCount;
647                 break;
648         }
649     return nSlashCount >= nBackslashCount ?
650                nSlashCount >= nColonCount ?
651                    INetURLObject::FSYS_UNX : INetURLObject::FSYS_MAC :
652                nBackslashCount >= nColonCount ?
653                    INetURLObject::FSYS_DOS : INetURLObject::FSYS_MAC;
654 }
655 
parseScheme(sal_Unicode const ** begin,sal_Unicode const * end,sal_uInt32 fragmentDelimiter)656 rtl::OUString parseScheme(
657     sal_Unicode const ** begin, sal_Unicode const * end,
658     sal_uInt32 fragmentDelimiter)
659 {
660     sal_Unicode const * p = *begin;
661     if (p != end && INetMIME::isAlpha(*p)) {
662         do {
663             ++p;
664         } while (p != end
665                  && (INetMIME::isAlphanumeric(*p) || *p == '+' || *p == '-'
666                      || *p == '.'));
667         // #i34835# To avoid problems with Windows file paths like "C:\foo",
668         // do not accept generic schemes that are only one character long:
669         if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter
670             && p - *begin >= 2)
671         {
672             rtl::OUString scheme(
673                 rtl::OUString(*begin, p - *begin).toAsciiLowerCase());
674             *begin = p + 1;
675             return scheme;
676         }
677     }
678     return rtl::OUString();
679 }
680 
681 }
682 
setAbsURIRef(rtl::OUString const & rTheAbsURIRef,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset,bool bSmart,FSysStyle eStyle)683 bool INetURLObject::setAbsURIRef(rtl::OUString const & rTheAbsURIRef,
684                                  bool bOctets,
685                                  EncodeMechanism eMechanism,
686                                  rtl_TextEncoding eCharset,
687                                  bool bSmart,
688                                  FSysStyle eStyle)
689 {
690     sal_Unicode const * pPos = rTheAbsURIRef.getStr();
691     sal_Unicode const * pEnd = pPos + rTheAbsURIRef.getLength();
692 
693     setInvalid();
694 
695     sal_uInt32 nFragmentDelimiter = '#';
696 
697     rtl::OUStringBuffer aSynAbsURIRef;
698 
699     // Parse <scheme>:
700     sal_Unicode const * p = pPos;
701     PrefixInfo const * pPrefix = getPrefix(p, pEnd);
702     if (pPrefix)
703     {
704         pPos = p;
705         m_eScheme = pPrefix->m_eScheme;
706 
707         rtl::OUString sTemp(rtl::OUString::createFromAscii(pPrefix->m_eKind
708                                                  >= PrefixInfo::EXTERNAL ?
709                                              pPrefix->m_pTranslatedPrefix :
710                                              pPrefix->m_pPrefix));
711         aSynAbsURIRef.append(sTemp);
712         m_aScheme = SubString( 0, sTemp.indexOf(static_cast< sal_Unicode >(':')) );
713     }
714     else
715     {
716         if (bSmart)
717         {
718             // For scheme detection, the first (if any) of the following
719             // productions that matches the input string (and for which the
720             // appropriate style bit is set in eStyle, if applicable)
721             // determines the scheme. The productions use the auxiliary rules
722             //
723             //    domain = label *("." label)
724             //    label = alphanum [*(alphanum / "-") alphanum]
725             //    alphanum = ALPHA / DIGIT
726             //    IPv6reference = "[" IPv6address "]"
727             //    IPv6address = hexpart [":" IPv4address]
728             //    IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
729             //    hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
730             //    hexseq = hex4 *(":" hex4)
731             //    hex4 = 1*4HEXDIG
732             //    UCS4 = <any UCS4 character>
733             //
734             // 1st Production (known scheme):
735             //    <one of the known schemes, ignoring case> ":" *UCS4
736             //
737             // 2nd Production (mailto):
738             //    domain "@" domain
739             //
740             // 3rd Production (ftp):
741             //    "FTP" 2*("." label) ["/" *UCS4]
742             //
743             // 4th Production (http):
744             //    label 2*("." label) ["/" *UCS4]
745             //
746             // 5th Production (file):
747             //    "//" (domain / IPv6reference) ["/" *UCS4]
748             //
749             // 6th Production (Unix file):
750             //    "/" *UCS4
751             //
752             // 7th Production (UNC file; FSYS_DOS only):
753             //    "\\" domain ["\" *UCS4]
754             //
755             // 8th Production (Unix-like DOS file; FSYS_DOS only):
756             //    ALPHA ":" ["/" *UCS4]
757             //
758             // 9th Production (DOS file; FSYS_DOS only):
759             //    ALPHA ":" ["\" *UCS4]
760             //
761             // For the 'non URL' file productions 6--9, the interpretation of
762             // the input as a (degenerate) URI is turned off, i.e., escape
763             // sequences and fragments are never detected as such, but are
764             // taken as literal characters.
765 
766             sal_Unicode const * p1 = pPos;
767             if (eStyle & FSYS_DOS
768                 && pEnd - p1 >= 2
769                 && INetMIME::isAlpha(p1[0])
770                 && p1[1] == ':'
771                 && (pEnd - p1 == 2 || p1[2] == '/' || p1[2] == '\\'))
772             {
773                 m_eScheme = INET_PROT_FILE; // 8th, 9th
774                 eMechanism = ENCODE_ALL;
775                 nFragmentDelimiter = 0x80000000;
776             }
777             else if (pEnd - p1 >= 2 && p1[0] == '/' && p1[1] == '/')
778             {
779                 p1 += 2;
780                 if ((scanDomain(p1, pEnd) > 0 || scanIPv6reference(p1, pEnd))
781                     && (p1 == pEnd || *p1 == '/'))
782                     m_eScheme = INET_PROT_FILE; // 5th
783             }
784             else if (p1 != pEnd && *p1 == '/')
785             {
786                 m_eScheme = INET_PROT_FILE; // 6th
787                 eMechanism = ENCODE_ALL;
788                 nFragmentDelimiter = 0x80000000;
789             }
790             else if (eStyle & FSYS_DOS
791                      && pEnd - p1 >= 2
792                      && p1[0] == '\\'
793                      && p1[1] == '\\')
794             {
795                 p1 += 2;
796                 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
797                     p1, pEnd - p1, '\\');
798                 sal_Unicode const * pe = n == -1 ? pEnd : p1 + n;
799                 if (
800                     parseHostOrNetBiosName(
801                         p1, pe, bOctets, ENCODE_ALL, RTL_TEXTENCODING_DONTKNOW,
802                         true, NULL) ||
803                     (scanDomain(p1, pe) > 0 && p1 == pe)
804                    )
805                 {
806                     m_eScheme = INET_PROT_FILE; // 7th
807                     eMechanism = ENCODE_ALL;
808                     nFragmentDelimiter = 0x80000000;
809                 }
810             }
811             else
812             {
813                 sal_Unicode const * pDomainEnd = p1;
814                 sal_uInt32 nLabels = scanDomain(pDomainEnd, pEnd);
815                 if (nLabels > 0 && pDomainEnd != pEnd && *pDomainEnd == '@')
816                 {
817                     ++pDomainEnd;
818                     if (scanDomain(pDomainEnd, pEnd) > 0
819                         && pDomainEnd == pEnd)
820                         m_eScheme = INET_PROT_MAILTO; // 2nd
821                 }
822                 else if (nLabels >= 3
823                          && (pDomainEnd == pEnd || *pDomainEnd == '/'))
824                     m_eScheme
825                         = pDomainEnd - p1 >= 4
826                           && (p1[0] == 'f' || p1[0] == 'F')
827                           && (p1[1] == 't' || p1[1] == 'T')
828                           && (p1[2] == 'p' || p1[2] == 'P')
829                           && p1[3] == '.' ?
830                               INET_PROT_FTP : INET_PROT_HTTP; // 3rd, 4th
831             }
832         }
833 
834         rtl::OUString aSynScheme;
835         if (m_eScheme == INET_PROT_NOT_VALID) {
836             sal_Unicode const * p1 = pPos;
837             aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter);
838             if (aSynScheme.getLength() > 0)
839             {
840                 m_eScheme = INET_PROT_GENERIC;
841                 pPos = p1;
842             }
843         }
844 
845         if (bSmart && m_eScheme == INET_PROT_NOT_VALID && pPos != pEnd
846             && *pPos != nFragmentDelimiter)
847         {
848             m_eScheme = m_eSmartScheme;
849         }
850 
851         if (m_eScheme == INET_PROT_NOT_VALID)
852         {
853             setInvalid();
854             return false;
855         }
856 
857         if (m_eScheme != INET_PROT_GENERIC) {
858             aSynScheme = rtl::OUString::createFromAscii(getSchemeInfo().m_pScheme);
859         }
860         m_aScheme.set(aSynAbsURIRef, aSynScheme, aSynAbsURIRef.getLength());
861         aSynAbsURIRef.append(sal_Unicode(':'));
862     }
863 
864     sal_Char cEscapePrefix = getEscapePrefix();
865     sal_uInt32 nSegmentDelimiter = '/';
866     sal_uInt32 nAltSegmentDelimiter = 0x80000000;
867     bool bSkippedInitialSlash = false;
868 
869     // Parse //<user>;AUTH=<auth>@<host>:<port> or
870     // //<user>:<password>@<host>:<port> or
871     // //<reg_name>
872     if (getSchemeInfo().m_bAuthority)
873     {
874         sal_Unicode const * pUserInfoBegin = 0;
875         sal_Unicode const * pUserInfoEnd = 0;
876         sal_Unicode const * pHostPortBegin = 0;
877         sal_Unicode const * pHostPortEnd = 0;
878 
879         switch (m_eScheme)
880         {
881             case INET_PROT_VND_SUN_STAR_HELP:
882             {
883                 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
884                 {
885                     setInvalid();
886                     return false;
887                 }
888                 aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
889                 rtl::OUStringBuffer aSynAuthority;
890                 while (pPos < pEnd
891                        && *pPos != '/' && *pPos != '?'
892                        && *pPos != nFragmentDelimiter)
893                 {
894                     EscapeType eEscapeType;
895                     sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
896                                                  cEscapePrefix, eMechanism,
897                                                  eCharset, eEscapeType);
898                     appendUCS4(aSynAuthority, nUTF32, eEscapeType, bOctets,
899                                PART_AUTHORITY, cEscapePrefix, eCharset,
900                                false);
901                 }
902                 m_aHost.set(aSynAbsURIRef,
903                             aSynAuthority.makeStringAndClear(),
904                             aSynAbsURIRef.getLength());
905                     // misusing m_aHost to store the authority
906                 break;
907             }
908 
909             case INET_PROT_VND_SUN_STAR_HIER:
910             {
911                 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
912                 {
913                     pPos += 2;
914                     aSynAbsURIRef.
915                         appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
916                     rtl::OUStringBuffer aSynAuthority;
917                     while (pPos < pEnd
918                            && *pPos != '/' && *pPos != '?'
919                            && *pPos != nFragmentDelimiter)
920                     {
921                         EscapeType eEscapeType;
922                         sal_uInt32 nUTF32 = getUTF32(pPos,
923                                                      pEnd,
924                                                      bOctets,
925                                                      cEscapePrefix,
926                                                      eMechanism,
927                                                      eCharset,
928                                                      eEscapeType);
929                         appendUCS4(aSynAuthority,
930                                    nUTF32,
931                                    eEscapeType,
932                                    bOctets,
933                                    PART_AUTHORITY,
934                                    cEscapePrefix,
935                                    eCharset,
936                                    false);
937                     }
938                     if (aSynAuthority.getLength() == 0)
939                     {
940                         setInvalid();
941                         return false;
942                     }
943                     m_aHost.set(aSynAbsURIRef,
944                                 aSynAuthority.makeStringAndClear(),
945                                 aSynAbsURIRef.getLength());
946                         // misusing m_aHost to store the authority
947                 }
948                 break;
949             }
950 
951             case INET_PROT_VND_SUN_STAR_PKG:
952             {
953                 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
954                 {
955                     setInvalid();
956                     return false;
957                 }
958                 aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
959                 rtl::OUStringBuffer aSynAuthority;
960                 while (pPos < pEnd
961                        && *pPos != '/' && *pPos != '?'
962                        && *pPos != nFragmentDelimiter)
963                 {
964                     EscapeType eEscapeType;
965                     sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
966                                                  cEscapePrefix, eMechanism,
967                                                  eCharset, eEscapeType);
968                     appendUCS4(aSynAuthority, nUTF32, eEscapeType, bOctets,
969                                PART_AUTHORITY, cEscapePrefix, eCharset,
970                                false);
971                 }
972                 if (aSynAuthority.getLength() == 0)
973                 {
974                     setInvalid();
975                     return false;
976                 }
977                 m_aHost.set(aSynAbsURIRef,
978                             aSynAuthority.makeStringAndClear(),
979                             aSynAbsURIRef.getLength());
980                     // misusing m_aHost to store the authority
981                 break;
982             }
983 
984             case INET_PROT_FILE:
985                 if (bSmart)
986                 {
987                     // The first of the following seven productions that
988                     // matches the rest of the input string (and for which the
989                     // appropriate style bit is set in eStyle, if applicable)
990                     // determines the used notation.  The productions use the
991                     // auxiliary rules
992                     //
993                     //    domain = label *("." label)
994                     //    label = alphanum [*(alphanum / "-") alphanum]
995                     //    alphanum = ALPHA / DIGIT
996                     //    IPv6reference = "[" IPv6address "]"
997                     //    IPv6address = hexpart [":" IPv4address]
998                     //    IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
999                     //    hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
1000                     //    hexseq = hex4 *(":" hex4)
1001                     //    hex4 = 1*4HEXDIG
1002                     //    path = <any UCS4 character except "#">
1003                     //    UCS4 = <any UCS4 character>
1004 
1005                     // 1st Production (URL):
1006                     //    "//" [domain / IPv6reference] ["/" *path]
1007                     //        ["#" *UCS4]
1008                     //  becomes
1009                     //    "file://" domain "/" *path ["#" *UCS4]
1010                     if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1011                     {
1012                         sal_Unicode const * p1 = pPos + 2;
1013                         while (p1 != pEnd && *p1 != '/' &&
1014                                *p1 != nFragmentDelimiter)
1015                         {
1016                             ++p1;
1017                         }
1018                         if (parseHostOrNetBiosName(
1019                                 pPos + 2, p1, bOctets, ENCODE_ALL,
1020                                 RTL_TEXTENCODING_DONTKNOW, true, NULL))
1021                         {
1022                             aSynAbsURIRef.
1023                                 appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1024                             pHostPortBegin = pPos + 2;
1025                             pHostPortEnd = p1;
1026                             pPos = p1;
1027                             break;
1028                         }
1029                     }
1030 
1031                     // 2nd Production (MS IE generated 1; FSYS_DOS only):
1032                     //    "//" ALPHA ":" ["/" *path] ["#" *UCS4]
1033                     //  becomes
1034                     //    "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1035                     //  replacing "\" by "/" within <*path>
1036                     //
1037                     // 3rd Production (MS IE generated 2; FSYS_DOS only):
1038                     //    "//" ALPHA ":" ["\" *path] ["#" *UCS4]
1039                     //  becomes
1040                     //    "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1041                     //  replacing "\" by "/" within <*path>
1042                     //
1043                     // 4th Production (misscounted slashes):
1044                     //    "//" *path ["#" *UCS4]
1045                     //  becomes
1046                     //    "file:///" *path ["#" *UCS4]
1047                     if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1048                     {
1049                         aSynAbsURIRef.
1050                             appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1051                         pPos += 2;
1052                         bSkippedInitialSlash = true;
1053                         if ((eStyle & FSYS_DOS) != 0
1054                             && pEnd - pPos >= 2
1055                             && INetMIME::isAlpha(pPos[0])
1056                             && pPos[1] == ':'
1057                             && (pEnd - pPos == 2
1058                                 || pPos[2] == '/' || pPos[2] == '\\'))
1059                             nAltSegmentDelimiter = '\\';
1060                         break;
1061                     }
1062 
1063                     // 5th Production (Unix):
1064                     //    "/" *path ["#" *UCS4]
1065                     //  becomes
1066                     //    "file:///" *path ["#" *UCS4]
1067                     if (pPos < pEnd && *pPos == '/')
1068                     {
1069                         aSynAbsURIRef.
1070                             appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1071                         break;
1072                     }
1073 
1074                     // 6th Production (UNC; FSYS_DOS only):
1075                     //    "\\" domain ["\" *path] ["#" *UCS4]
1076                     //  becomes
1077                     //    "file://" domain "/" *path ["#" *UCS4]
1078                     //  replacing "\" by "/" within <*path>
1079                     if (eStyle & FSYS_DOS
1080                         && pEnd - pPos >= 2
1081                         && pPos[0] == '\\'
1082                         && pPos[1] == '\\')
1083                     {
1084                         sal_Unicode const * p1 = pPos + 2;
1085                         sal_Unicode const * pe = p1;
1086                         while (pe < pEnd && *pe != '\\' &&
1087                                *pe != nFragmentDelimiter)
1088                         {
1089                             ++pe;
1090                         }
1091                         if (
1092                              parseHostOrNetBiosName(
1093                                 p1, pe, bOctets, ENCODE_ALL,
1094                                 RTL_TEXTENCODING_DONTKNOW, true, NULL) ||
1095                              (scanDomain(p1, pe) > 0 && p1 == pe)
1096                            )
1097                         {
1098                             aSynAbsURIRef.
1099                                 appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1100                             pHostPortBegin = pPos + 2;
1101                             pHostPortEnd = pe;
1102                             pPos = pe;
1103                             nSegmentDelimiter = '\\';
1104                             break;
1105                         }
1106                     }
1107 
1108                     // 7th Production (Unix-like DOS; FSYS_DOS only):
1109                     //    ALPHA ":" ["/" *path] ["#" *UCS4]
1110                     //  becomes
1111                     //    "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1112                     //  replacing "\" by "/" within <*path>
1113                     //
1114                     // 8th Production (DOS; FSYS_DOS only):
1115                     //    ALPHA ":" ["\" *path] ["#" *UCS4]
1116                     //  becomes
1117                     //    "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1118                     //  replacing "\" by "/" within <*path>
1119                     if (eStyle & FSYS_DOS
1120                         && pEnd - pPos >= 2
1121                         && INetMIME::isAlpha(pPos[0])
1122                         && pPos[1] == ':'
1123                         && (pEnd - pPos == 2
1124                             || pPos[2] == '/'
1125                             || pPos[2] == '\\'))
1126                     {
1127                         aSynAbsURIRef.
1128                             appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1129                         nAltSegmentDelimiter = '\\';
1130                         bSkippedInitialSlash = true;
1131                         break;
1132                     }
1133 
1134                     // 9th Production (any):
1135                     //    *path ["#" *UCS4]
1136                     //  becomes
1137                     //    "file:///" *path ["#" *UCS4]
1138                     //  replacing the delimiter by "/" within <*path>.  The
1139                     //  delimiter is that character from the set { "/", "\",
1140                     //  ":" } which appears most often in <*path> (if FSYS_UNX
1141                     //  is not among the style bits, "/" is removed from the
1142                     //  set; if FSYS_DOS is not among the style bits, "\" is
1143                     //  removed from the set; if FSYS_MAC is not among the
1144                     //  style bits, ":" is removed from the set).  If two or
1145                     //  more characters appear the same number of times, the
1146                     //  character mentioned first in that set is chosen.  If
1147                     //  the first character of <*path> is the delimiter, that
1148                     //  character is not copied.
1149                     if (eStyle & (FSYS_UNX | FSYS_DOS | FSYS_MAC))
1150                     {
1151                         aSynAbsURIRef.
1152                             appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1153                         switch (guessFSysStyleByCounting(pPos, pEnd, eStyle))
1154                         {
1155                             case FSYS_UNX:
1156                                 nSegmentDelimiter = '/';
1157                                 break;
1158 
1159                             case FSYS_DOS:
1160                                 nSegmentDelimiter = '\\';
1161                                 break;
1162 
1163                             case FSYS_MAC:
1164                                 nSegmentDelimiter = ':';
1165                                 break;
1166 
1167                             default:
1168                                 DBG_ERROR(
1169                                     "INetURLObject::setAbsURIRef():"
1170                                         " Bad guessFSysStyleByCounting");
1171                                 break;
1172                         }
1173                         bSkippedInitialSlash
1174                             = pPos != pEnd && *pPos != nSegmentDelimiter;
1175                         break;
1176                     }
1177                 }
1178             default:
1179             {
1180                 // For INET_PROT_FILE, allow an empty authority ("//") to be
1181                 // missing if the following path starts with an explicit "/"
1182                 // (Java is notorious in generating such file URLs, so be
1183                 // liberal here):
1184                 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1185                     pPos += 2;
1186                 else if (!bSmart
1187                          && !(m_eScheme == INET_PROT_FILE
1188                               && pPos != pEnd && *pPos == '/'))
1189                 {
1190                     setInvalid();
1191                     return false;
1192                 }
1193                 aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1194 
1195                 sal_Unicode const * pAuthority = pPos;
1196                 sal_uInt32 c = getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1197                 while (pPos < pEnd && *pPos != '/' && *pPos != c
1198                        && *pPos != nFragmentDelimiter)
1199                     ++pPos;
1200                 if (getSchemeInfo().m_bUser)
1201                     if (getSchemeInfo().m_bHost)
1202                     {
1203                         sal_Unicode const * p1 = pAuthority;
1204                         while (p1 < pPos && *p1 != '@')
1205                             ++p1;
1206                         if (p1 == pPos)
1207                         {
1208                             pHostPortBegin = pAuthority;
1209                             pHostPortEnd = pPos;
1210                         }
1211                         else
1212                         {
1213                             pUserInfoBegin = pAuthority;
1214                             pUserInfoEnd = p1;
1215                             pHostPortBegin = p1 + 1;
1216                             pHostPortEnd = pPos;
1217                         }
1218                     }
1219                     else
1220                     {
1221                         pUserInfoBegin = pAuthority;
1222                         pUserInfoEnd = pPos;
1223                     }
1224                 else if (getSchemeInfo().m_bHost)
1225                 {
1226                     pHostPortBegin = pAuthority;
1227                     pHostPortEnd = pPos;
1228                 }
1229                 else if (pPos != pAuthority)
1230                 {
1231                     setInvalid();
1232                     return false;
1233                 }
1234                 break;
1235             }
1236         }
1237 
1238         if (pUserInfoBegin)
1239         {
1240             Part ePart = m_eScheme == INET_PROT_IMAP ?
1241                              PART_IMAP_ACHAR :
1242                          m_eScheme == INET_PROT_VIM ?
1243                              PART_VIM :
1244                              PART_USER_PASSWORD;
1245             bool bSupportsPassword = getSchemeInfo().m_bPassword;
1246             bool bSupportsAuth
1247                 = !bSupportsPassword && getSchemeInfo().m_bAuth;
1248             bool bHasAuth = false;
1249             rtl::OUStringBuffer aSynUser;
1250             sal_Unicode const * p1 = pUserInfoBegin;
1251             while (p1 < pUserInfoEnd)
1252             {
1253                 EscapeType eEscapeType;
1254                 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1255                                              cEscapePrefix, eMechanism,
1256                                              eCharset, eEscapeType);
1257                 if (eEscapeType == ESCAPE_NO)
1258                 {
1259                     if (nUTF32 == ':' && bSupportsPassword)
1260                     {
1261                         bHasAuth = true;
1262                         break;
1263                     }
1264                     else if (nUTF32 == ';' && bSupportsAuth
1265                              && pUserInfoEnd - p1
1266                                     > RTL_CONSTASCII_LENGTH("auth=")
1267                              && INetMIME::equalIgnoreCase(
1268                                     p1,
1269                                     p1 + RTL_CONSTASCII_LENGTH("auth="),
1270                                     "auth="))
1271                     {
1272                         p1 += RTL_CONSTASCII_LENGTH("auth=");
1273                         bHasAuth = true;
1274                         break;
1275                     }
1276                 }
1277                 appendUCS4(aSynUser, nUTF32, eEscapeType, bOctets, ePart,
1278                            cEscapePrefix, eCharset, false);
1279             }
1280             m_aUser.set(aSynAbsURIRef, aSynUser.makeStringAndClear(),
1281                 aSynAbsURIRef.getLength());
1282             if (bHasAuth)
1283             {
1284                 if (bSupportsPassword)
1285                 {
1286                     aSynAbsURIRef.append(sal_Unicode(':'));
1287                     rtl::OUStringBuffer aSynAuth;
1288                     while (p1 < pUserInfoEnd)
1289                     {
1290                         EscapeType eEscapeType;
1291                         sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1292                                                      cEscapePrefix,
1293                                                      eMechanism, eCharset,
1294                                                      eEscapeType);
1295                         appendUCS4(aSynAuth, nUTF32, eEscapeType, bOctets,
1296                                    ePart, cEscapePrefix, eCharset, false);
1297                     }
1298                     m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1299                         aSynAbsURIRef.getLength());
1300                 }
1301                 else
1302                 {
1303                     aSynAbsURIRef.
1304                         appendAscii(RTL_CONSTASCII_STRINGPARAM(";AUTH="));
1305                     rtl::OUStringBuffer aSynAuth;
1306                     while (p1 < pUserInfoEnd)
1307                     {
1308                         EscapeType eEscapeType;
1309                         sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1310                                                      cEscapePrefix,
1311                                                      eMechanism, eCharset,
1312                                                      eEscapeType);
1313                         if (!INetMIME::isIMAPAtomChar(nUTF32))
1314                         {
1315                             setInvalid();
1316                             return false;
1317                         }
1318                         appendUCS4(aSynAuth, nUTF32, eEscapeType, bOctets,
1319                                    ePart, cEscapePrefix, eCharset, false);
1320                     }
1321                     m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1322                         aSynAbsURIRef.getLength());
1323                 }
1324             }
1325             if (pHostPortBegin)
1326                 aSynAbsURIRef.append(sal_Unicode('@'));
1327         }
1328 
1329         if (pHostPortBegin)
1330         {
1331             sal_Unicode const * pPort = pHostPortEnd;
1332             if ( getSchemeInfo().m_bPort && pHostPortBegin < pHostPortEnd )
1333             {
1334                 sal_Unicode const * p1 = pHostPortEnd - 1;
1335                 while (p1 > pHostPortBegin && INetMIME::isDigit(*p1))
1336                     --p1;
1337                 if (*p1 == ':')
1338                     pPort = p1;
1339             }
1340             bool bNetBiosName = false;
1341             switch (m_eScheme)
1342             {
1343                 case INET_PROT_FILE:
1344                     // If the host equals "LOCALHOST" (unencoded and ignoring
1345                     // case), turn it into an empty host:
1346                     if (INetMIME::equalIgnoreCase(pHostPortBegin, pPort,
1347                                                   "localhost"))
1348                         pHostPortBegin = pPort;
1349                     bNetBiosName = true;
1350                     break;
1351 
1352                 case INET_PROT_LDAP:
1353                 case INET_PROT_SMB:
1354                     if (pHostPortBegin == pPort && pPort != pHostPortEnd)
1355                     {
1356                         setInvalid();
1357                         return false;
1358                     }
1359                     break;
1360                 default:
1361                     if (pHostPortBegin == pPort)
1362                     {
1363                         setInvalid();
1364                         return false;
1365                     }
1366                     break;
1367             }
1368             rtl::OUStringBuffer aSynHost;
1369             if (!parseHostOrNetBiosName(
1370                     pHostPortBegin, pPort, bOctets, eMechanism, eCharset,
1371                     bNetBiosName, &aSynHost))
1372             {
1373                 setInvalid();
1374                 return false;
1375             }
1376             m_aHost.set(aSynAbsURIRef, aSynHost.makeStringAndClear(),
1377                 aSynAbsURIRef.getLength());
1378             if (pPort != pHostPortEnd)
1379             {
1380                 aSynAbsURIRef.append(sal_Unicode(':'));
1381                 m_aPort.set(aSynAbsURIRef,
1382                     rtl::OUString(pPort + 1, pHostPortEnd - (pPort + 1)),
1383                     aSynAbsURIRef.getLength());
1384             }
1385         }
1386     }
1387 
1388     // Parse <path>
1389     rtl::OUStringBuffer aSynPath;
1390     if (!parsePath(m_eScheme, &pPos, pEnd, bOctets, eMechanism, eCharset,
1391                    bSkippedInitialSlash, nSegmentDelimiter,
1392                    nAltSegmentDelimiter,
1393                    getSchemeInfo().m_bQuery ? '?' : 0x80000000,
1394                    nFragmentDelimiter, aSynPath))
1395     {
1396         setInvalid();
1397         return false;
1398     }
1399     m_aPath.set(aSynAbsURIRef, aSynPath.makeStringAndClear(),
1400         aSynAbsURIRef.getLength());
1401 
1402     // Parse ?<query>
1403     if (getSchemeInfo().m_bQuery && pPos < pEnd && *pPos == '?')
1404     {
1405         aSynAbsURIRef.append(sal_Unicode('?'));
1406         rtl::OUStringBuffer aSynQuery;
1407         for (++pPos; pPos < pEnd && *pPos != nFragmentDelimiter;)
1408         {
1409             EscapeType eEscapeType;
1410             sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, cEscapePrefix,
1411                                          eMechanism, eCharset, eEscapeType);
1412             appendUCS4(aSynQuery, nUTF32, eEscapeType, bOctets,
1413                        PART_URIC, cEscapePrefix, eCharset, true);
1414         }
1415         m_aQuery.set(aSynAbsURIRef, aSynQuery.makeStringAndClear(),
1416             aSynAbsURIRef.getLength());
1417     }
1418 
1419     // Parse #<fragment>
1420     if (pPos < pEnd && *pPos == nFragmentDelimiter)
1421     {
1422         aSynAbsURIRef.append(sal_Unicode(nFragmentDelimiter));
1423         rtl::OUStringBuffer aSynFragment;
1424         for (++pPos; pPos < pEnd;)
1425         {
1426             EscapeType eEscapeType;
1427             sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, cEscapePrefix,
1428                                          eMechanism, eCharset, eEscapeType);
1429             appendUCS4(aSynFragment, nUTF32, eEscapeType, bOctets, PART_URIC,
1430                        cEscapePrefix, eCharset, true);
1431         }
1432         m_aFragment.set(aSynAbsURIRef, aSynFragment.makeStringAndClear(),
1433             aSynAbsURIRef.getLength());
1434     }
1435 
1436     if (pPos != pEnd)
1437     {
1438         setInvalid();
1439         return false;
1440     }
1441 
1442     m_aAbsURIRef = aSynAbsURIRef;
1443 
1444     return true;
1445 }
1446 
1447 //============================================================================
convertRelToAbs(rtl::OUString const & rTheRelURIRef,bool bOctets,INetURLObject & rTheAbsURIRef,bool & rWasAbsolute,EncodeMechanism eMechanism,rtl_TextEncoding eCharset,bool bIgnoreFragment,bool bSmart,bool bRelativeNonURIs,FSysStyle eStyle) const1448 bool INetURLObject::convertRelToAbs(rtl::OUString const & rTheRelURIRef,
1449                                     bool bOctets,
1450                                     INetURLObject & rTheAbsURIRef,
1451                                     bool & rWasAbsolute,
1452                                     EncodeMechanism eMechanism,
1453                                     rtl_TextEncoding eCharset,
1454                                     bool bIgnoreFragment, bool bSmart,
1455                                     bool bRelativeNonURIs, FSysStyle eStyle)
1456     const
1457 {
1458     sal_Unicode const * p = rTheRelURIRef.getStr();
1459     sal_Unicode const * pEnd = p + rTheRelURIRef.getLength();
1460 
1461     sal_Unicode const * pPrefixBegin = p;
1462     PrefixInfo const * pPrefix = getPrefix(pPrefixBegin, pEnd);
1463     bool hasScheme = pPrefix != 0;
1464     if (!hasScheme) {
1465         pPrefixBegin = p;
1466         hasScheme = parseScheme(&pPrefixBegin, pEnd, '#').getLength() > 0;
1467     }
1468 
1469     sal_uInt32 nSegmentDelimiter = '/';
1470     sal_uInt32 nQueryDelimiter
1471         = !bSmart || getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1472     sal_uInt32 nFragmentDelimiter = '#';
1473     Part ePart = PART_VISIBLE;
1474 
1475     if (!hasScheme && bSmart)
1476     {
1477         // If the input matches any of the following productions (for which
1478         // the appropriate style bit is set in eStyle), it is assumed to be an
1479         // absolute file system path, rather than a relative URI reference.
1480         // (This is only a subset of the productions used for scheme detection
1481         // in INetURLObject::setAbsURIRef(), because most of those productions
1482         // interfere with the syntax of relative URI references.)  The
1483         // productions use the auxiliary rules
1484         //
1485         //    domain = label *("." label)
1486         //    label = alphanum [*(alphanum / "-") alphanum]
1487         //    alphanum = ALPHA / DIGIT
1488         //    UCS4 = <any UCS4 character>
1489         //
1490         // 1st Production (UNC file; FSYS_DOS only):
1491         //    "\\" domain ["\" *UCS4]
1492         //
1493         // 2nd Production (Unix-like DOS file; FSYS_DOS only):
1494         //    ALPHA ":" ["/" *UCS4]
1495         //
1496         // 3rd Production (DOS file; FSYS_DOS only):
1497         //    ALPHA ":" ["\" *UCS4]
1498         if (eStyle & FSYS_DOS)
1499         {
1500             bool bFSys = false;
1501             sal_Unicode const * q = p;
1502             if (pEnd - q >= 2
1503                 && INetMIME::isAlpha(q[0])
1504                 && q[1] == ':'
1505                 && (pEnd - q == 2 || q[2] == '/' || q[2] == '\\'))
1506                 bFSys = true; // 2nd, 3rd
1507             else if (pEnd - q >= 2 && q[0] == '\\' && q[1] == '\\')
1508             {
1509                 q += 2;
1510                 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
1511                     q, pEnd - q, '\\');
1512                 sal_Unicode const * qe = n == -1 ? pEnd : q + n;
1513                 if (parseHostOrNetBiosName(
1514                         q, qe, bOctets, ENCODE_ALL, RTL_TEXTENCODING_DONTKNOW,
1515                         true, NULL))
1516                 {
1517                     bFSys = true; // 1st
1518                 }
1519             }
1520             if (bFSys)
1521             {
1522                 INetURLObject aNewURI;
1523                 aNewURI.setAbsURIRef(rTheRelURIRef, bOctets, eMechanism,
1524                                      eCharset, true, eStyle);
1525                 if (!aNewURI.HasError())
1526                 {
1527                     rTheAbsURIRef = aNewURI;
1528                     rWasAbsolute = true;
1529                     return true;
1530                 }
1531             }
1532         }
1533 
1534         // When the base URL is a file URL, accept relative file system paths
1535         // using "\" or ":" as delimiter (and ignoring URI conventions for "%"
1536         // and "#"), as well as relative URIs using "/" as delimiter:
1537         if (m_eScheme == INET_PROT_FILE)
1538             switch (guessFSysStyleByCounting(p, pEnd, eStyle))
1539             {
1540                 case FSYS_UNX:
1541                     nSegmentDelimiter = '/';
1542                     break;
1543 
1544                 case FSYS_DOS:
1545                     nSegmentDelimiter = '\\';
1546                     bRelativeNonURIs = true;
1547                     break;
1548 
1549                 case FSYS_MAC:
1550                     nSegmentDelimiter = ':';
1551                     bRelativeNonURIs = true;
1552                     break;
1553 
1554                 default:
1555                     DBG_ERROR("INetURLObject::convertRelToAbs():"
1556                                   " Bad guessFSysStyleByCounting");
1557                     break;
1558             }
1559 
1560         if (bRelativeNonURIs)
1561         {
1562             eMechanism = ENCODE_ALL;
1563             nQueryDelimiter = 0x80000000;
1564             nFragmentDelimiter = 0x80000000;
1565             ePart = PART_VISIBLE_NONSPECIAL;
1566         }
1567     }
1568 
1569     // If the relative URI has the same scheme as the base URI, and that
1570     // scheme is hierarchical, then ignore its presence in the relative
1571     // URI in order to be backward compatible (cf. RFC 2396 section 5.2
1572     // step 3):
1573     if (pPrefix && pPrefix->m_eScheme == m_eScheme
1574         && getSchemeInfo().m_bHierarchical)
1575     {
1576         hasScheme = false;
1577         while (p != pEnd && *p++ != ':') ;
1578     }
1579     rWasAbsolute = hasScheme;
1580 
1581     // Fast solution for non-relative URIs:
1582     if (hasScheme)
1583     {
1584         INetURLObject aNewURI(rTheRelURIRef, eMechanism, eCharset);
1585         if (aNewURI.HasError())
1586         {
1587             rWasAbsolute = false;
1588             return false;
1589         }
1590 
1591         if (bIgnoreFragment)
1592             aNewURI.clearFragment();
1593         rTheAbsURIRef = aNewURI;
1594         return true;
1595     }
1596 
1597     enum State { STATE_AUTH, STATE_ABS_PATH, STATE_REL_PATH, STATE_FRAGMENT,
1598                  STATE_DONE };
1599 
1600     rtl::OUStringBuffer aSynAbsURIRef;
1601     // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
1602     // is empty ("") in that case, so take the scheme from m_aAbsURIRef
1603     if (m_eScheme != INET_PROT_GENERIC)
1604     {
1605         aSynAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
1606     }
1607     else
1608     {
1609         sal_Unicode const * pSchemeBegin
1610             = m_aAbsURIRef.getStr();
1611         sal_Unicode const * pSchemeEnd = pSchemeBegin;
1612         while (pSchemeEnd[0] != ':')
1613         {
1614             ++pSchemeEnd;
1615         }
1616         aSynAbsURIRef.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
1617     }
1618     aSynAbsURIRef.append(sal_Unicode(':'));
1619 
1620     sal_Char cEscapePrefix = getEscapePrefix();
1621 
1622     State eState = STATE_AUTH;
1623     bool bSameDoc = true;
1624 
1625     if (getSchemeInfo().m_bAuthority)
1626     {
1627         if (pEnd - p >= 2 && p[0] == '/' && p[1] == '/')
1628         {
1629             aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1630             p += 2;
1631             eState = STATE_ABS_PATH;
1632             bSameDoc = false;
1633             while (p != pEnd)
1634             {
1635                 EscapeType eEscapeType;
1636                 sal_uInt32 nUTF32
1637                     = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1638                                eCharset, eEscapeType);
1639                 if (eEscapeType == ESCAPE_NO)
1640                 {
1641                     if (nUTF32 == nSegmentDelimiter)
1642                         break;
1643                     else if (nUTF32 == nFragmentDelimiter)
1644                     {
1645                         eState = STATE_FRAGMENT;
1646                         break;
1647                     }
1648                 }
1649                 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets,
1650                            PART_VISIBLE, cEscapePrefix, eCharset, true);
1651             }
1652         }
1653         else
1654         {
1655             SubString aAuthority(getAuthority());
1656             aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1657                                      + aAuthority.getBegin(),
1658                                  aAuthority.getLength());
1659         }
1660     }
1661 
1662     if (eState == STATE_AUTH)
1663     {
1664         if (p == pEnd)
1665             eState = STATE_DONE;
1666         else if (*p == nFragmentDelimiter)
1667         {
1668             ++p;
1669             eState = STATE_FRAGMENT;
1670         }
1671         else if (*p == nSegmentDelimiter)
1672         {
1673             ++p;
1674             eState = STATE_ABS_PATH;
1675             bSameDoc = false;
1676         }
1677         else
1678         {
1679             eState = STATE_REL_PATH;
1680             bSameDoc = false;
1681         }
1682     }
1683 
1684     if (eState == STATE_ABS_PATH)
1685     {
1686         aSynAbsURIRef.append(sal_Unicode('/'));
1687         eState = STATE_DONE;
1688         while (p != pEnd)
1689         {
1690             EscapeType eEscapeType;
1691             sal_uInt32 nUTF32
1692                 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1693                            eCharset, eEscapeType);
1694             if (eEscapeType == ESCAPE_NO)
1695             {
1696                 if (nUTF32 == nFragmentDelimiter)
1697                 {
1698                     eState = STATE_FRAGMENT;
1699                     break;
1700                 }
1701                 else if (nUTF32 == nSegmentDelimiter)
1702                     nUTF32 = '/';
1703             }
1704             appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1705                        cEscapePrefix, eCharset, true);
1706         }
1707     }
1708     else if (eState == STATE_REL_PATH)
1709     {
1710         if (!getSchemeInfo().m_bHierarchical)
1711         {
1712             // Detect cases where a relative input could not be made absolute
1713             // because the given base URL is broken (most probably because it is
1714             // empty):
1715             OSL_ASSERT(!HasError());
1716             rWasAbsolute = false;
1717             return false;
1718         }
1719 
1720         sal_Unicode const * pBasePathBegin
1721             = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1722         sal_Unicode const * pBasePathEnd
1723             = pBasePathBegin + m_aPath.getLength();
1724         while (pBasePathEnd != pBasePathBegin)
1725             if (*(--pBasePathEnd) == '/')
1726             {
1727                 ++pBasePathEnd;
1728                 break;
1729             }
1730 
1731         sal_Int32 nPathBegin = aSynAbsURIRef.getLength();
1732         aSynAbsURIRef.append(pBasePathBegin, pBasePathEnd - pBasePathBegin);
1733         DBG_ASSERT(aSynAbsURIRef.getLength() > nPathBegin
1734                  && aSynAbsURIRef.charAt(aSynAbsURIRef.getLength() - 1) == '/',
1735                  "INetURLObject::convertRelToAbs(): Bad base path");
1736 
1737         while (p != pEnd && *p != nQueryDelimiter && *p != nFragmentDelimiter)
1738         {
1739             if (*p == '.')
1740             {
1741                 if (pEnd - p == 1
1742                     || p[1] == nSegmentDelimiter
1743                     || p[1] == nQueryDelimiter
1744                     || p[1] == nFragmentDelimiter)
1745                 {
1746                     ++p;
1747                     if (p != pEnd && *p == nSegmentDelimiter)
1748                         ++p;
1749                     continue;
1750                 }
1751                 else if (pEnd - p >= 2
1752                          && p[1] == '.'
1753                          && (pEnd - p == 2
1754                              || p[2] == nSegmentDelimiter
1755                              || p[2] == nQueryDelimiter
1756                              || p[2] == nFragmentDelimiter)
1757                          && aSynAbsURIRef.getLength() - nPathBegin > 1)
1758                 {
1759                     p += 2;
1760                     if (p != pEnd && *p == nSegmentDelimiter)
1761                         ++p;
1762 
1763                     sal_Int32 i = aSynAbsURIRef.getLength() - 2;
1764                     while (i > nPathBegin && aSynAbsURIRef.charAt(i) != '/')
1765                         --i;
1766                     aSynAbsURIRef.setLength(i + 1);
1767                     DBG_ASSERT(
1768                         aSynAbsURIRef.getLength() > nPathBegin
1769                         && aSynAbsURIRef.charAt(aSynAbsURIRef.getLength() - 1)
1770                                == '/',
1771                         "INetURLObject::convertRelToAbs(): Bad base path");
1772                     continue;
1773                 }
1774             }
1775 
1776             while (p != pEnd
1777                    && *p != nSegmentDelimiter
1778                    && *p != nQueryDelimiter
1779                    && *p != nFragmentDelimiter)
1780             {
1781                 EscapeType eEscapeType;
1782                 sal_uInt32 nUTF32
1783                     = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1784                                eCharset, eEscapeType);
1785                 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1786                            cEscapePrefix, eCharset, true);
1787             }
1788             if (p != pEnd && *p == nSegmentDelimiter)
1789             {
1790                 aSynAbsURIRef.append(sal_Unicode('/'));
1791                 ++p;
1792             }
1793         }
1794 
1795         while (p != pEnd && *p != nFragmentDelimiter)
1796         {
1797             EscapeType eEscapeType;
1798             sal_uInt32 nUTF32
1799                 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1800                            eCharset, eEscapeType);
1801             appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1802                        cEscapePrefix, eCharset, true);
1803         }
1804 
1805         if (p == pEnd)
1806             eState = STATE_DONE;
1807         else
1808         {
1809             ++p;
1810             eState = STATE_FRAGMENT;
1811         }
1812     }
1813     else if (bSameDoc)
1814     {
1815         aSynAbsURIRef.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
1816                              m_aPath.getLength());
1817         if (m_aQuery.isPresent())
1818             aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1819                                      + m_aQuery.getBegin() - 1,
1820                                  m_aQuery.getLength() + 1);
1821     }
1822 
1823     if (eState == STATE_FRAGMENT && !bIgnoreFragment)
1824     {
1825         aSynAbsURIRef.append(sal_Unicode('#'));
1826         while (p != pEnd)
1827         {
1828             EscapeType eEscapeType;
1829             sal_uInt32 nUTF32
1830                 = getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1831                            eCharset, eEscapeType);
1832             appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets,
1833                        PART_VISIBLE, cEscapePrefix, eCharset, true);
1834         }
1835     }
1836 
1837     INetURLObject aNewURI(aSynAbsURIRef.makeStringAndClear());
1838     if (aNewURI.HasError())
1839     {
1840         // Detect cases where a relative input could not be made absolute
1841         // because the given base URL is broken (most probably because it is
1842         // empty):
1843         OSL_ASSERT(!HasError());
1844         rWasAbsolute = false;
1845         return false;
1846     }
1847 
1848     rTheAbsURIRef = aNewURI;
1849     return true;
1850 }
1851 
1852 //============================================================================
convertAbsToRel(rtl::OUString const & rTheAbsURIRef,bool bOctets,rtl::OUString & rTheRelURIRef,EncodeMechanism eEncodeMechanism,DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset,FSysStyle eStyle) const1853 bool INetURLObject::convertAbsToRel(rtl::OUString const & rTheAbsURIRef,
1854                                     bool bOctets, rtl::OUString & rTheRelURIRef,
1855                                     EncodeMechanism eEncodeMechanism,
1856                                     DecodeMechanism eDecodeMechanism,
1857                                     rtl_TextEncoding eCharset,
1858                                     FSysStyle eStyle) const
1859 {
1860     // Check for hierarchical base URL:
1861     if (!getSchemeInfo().m_bHierarchical)
1862     {
1863         rTheRelURIRef
1864             = decode(rTheAbsURIRef,
1865                      getEscapePrefix(CompareProtocolScheme(rTheAbsURIRef)),
1866                      eDecodeMechanism, eCharset);
1867         return false;
1868     }
1869 
1870     // Convert the input (absolute or relative URI ref) to an absolute URI
1871     // ref:
1872     INetURLObject aSubject;
1873     bool bWasAbsolute;
1874     if (!convertRelToAbs(rTheAbsURIRef, bOctets, aSubject, bWasAbsolute,
1875                          eEncodeMechanism, eCharset, false, false, false,
1876                          eStyle))
1877     {
1878         rTheRelURIRef
1879             = decode(rTheAbsURIRef,
1880                      getEscapePrefix(CompareProtocolScheme(rTheAbsURIRef)),
1881                      eDecodeMechanism, eCharset);
1882         return false;
1883     }
1884 
1885     // Check for differing scheme or authority parts:
1886     if ((m_aScheme.compare(
1887              aSubject.m_aScheme, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1888          != 0)
1889         || (m_aUser.compare(
1890                 aSubject.m_aUser, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1891             != 0)
1892         || (m_aAuth.compare(
1893                 aSubject.m_aAuth, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1894             != 0)
1895         || (m_aHost.compare(
1896                 aSubject.m_aHost, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1897             != 0)
1898         || (m_aPort.compare(
1899                 aSubject.m_aPort, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1900             != 0))
1901     {
1902         rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1903         return false;
1904     }
1905 
1906     sal_Unicode const * pBasePathBegin
1907         = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1908     sal_Unicode const * pBasePathEnd = pBasePathBegin + m_aPath.getLength();
1909     sal_Unicode const * pSubjectPathBegin
1910         = aSubject.m_aAbsURIRef.getStr() + aSubject.m_aPath.getBegin();
1911     sal_Unicode const * pSubjectPathEnd
1912         = pSubjectPathBegin + aSubject.m_aPath.getLength();
1913 
1914     // Make nMatch point past the last matching slash, or past the end of the
1915     // paths, in case they are equal:
1916     sal_Unicode const * pSlash = 0;
1917     sal_Unicode const * p1 = pBasePathBegin;
1918     sal_Unicode const * p2 = pSubjectPathBegin;
1919     for (;;)
1920     {
1921         if (p1 == pBasePathEnd || p2 == pSubjectPathEnd)
1922         {
1923             if (p1 == pBasePathEnd && p2 == pSubjectPathEnd)
1924                 pSlash = p1;
1925             break;
1926         }
1927 
1928         sal_Unicode c = *p1++;
1929         if (c != *p2++)
1930             break;
1931         if (c == '/')
1932             pSlash = p1;
1933     }
1934     if (!pSlash)
1935     {
1936         // One of the paths does not start with '/':
1937         rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1938         return false;
1939     }
1940     sal_Int32 nMatch = pSlash - pBasePathBegin;
1941 
1942     // If the two URLs are DOS file URLs starting with different volumes
1943     // (e.g., file:///a:/... and file:///b:/...), the subject is not made
1944     // relative (it could be, but some people do not like that):
1945     if (m_eScheme == INET_PROT_FILE
1946         && nMatch <= 1
1947         && hasDosVolume(eStyle)
1948         && aSubject.hasDosVolume(eStyle)) //TODO! ok to use eStyle for these?
1949     {
1950         rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1951         return false;
1952     }
1953 
1954     // For every slash in the base path after nMatch, a prefix of "../" is
1955     // added to the new relative URL (if the common prefix of the two paths is
1956     // only "/"---but see handling of file URLs above---, the complete subject
1957     // path could go into the new relative URL instead, but some people don't
1958     // like that):
1959     rtl::OUStringBuffer aSynRelURIRef;
1960 //  if (nMatch <= 1) nMatch = 0; else // see comment above
1961     for (sal_Unicode const * p = pBasePathBegin + nMatch; p != pBasePathEnd;
1962          ++p)
1963     {
1964         if (*p == '/')
1965             aSynRelURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("../"));
1966     }
1967 
1968     // If the new relative URL would start with "//" (i.e., it would be
1969     // mistaken for a relative URL starting with an authority part), or if the
1970     // new relative URL would neither be empty nor start with <"/"> nor start
1971     // with <1*rseg> (i.e., it could be mistaken for an absolute URL starting
1972     // with a scheme part), then the new relative URL is prefixed with "./":
1973     if (aSynRelURIRef.getLength() == 0)
1974     {
1975         if (pSubjectPathEnd - pSubjectPathBegin >= nMatch + 2
1976             && pSubjectPathBegin[nMatch] == '/'
1977             && pSubjectPathBegin[nMatch + 1] == '/')
1978         {
1979             aSynRelURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("./"));
1980         }
1981         else
1982         {
1983             for (sal_Unicode const * p = pSubjectPathBegin + nMatch;
1984                  p != pSubjectPathEnd && *p != '/'; ++p)
1985             {
1986                 if (mustEncode(*p, PART_REL_SEGMENT_EXTRA))
1987                 {
1988                     aSynRelURIRef.
1989                         appendAscii(RTL_CONSTASCII_STRINGPARAM("./"));
1990                     break;
1991                 }
1992             }
1993         }
1994     }
1995 
1996     // The remainder of the subject path, starting at nMatch, is appended to
1997     // the new relative URL:
1998     sal_Char cEscapePrefix = getEscapePrefix();
1999     aSynRelURIRef.append(decode(pSubjectPathBegin + nMatch, pSubjectPathEnd,
2000                             cEscapePrefix, eDecodeMechanism, eCharset));
2001 
2002     // If the subject has defined query or fragment parts, they are appended
2003     // to the new relative URL:
2004     if (aSubject.m_aQuery.isPresent())
2005     {
2006         aSynRelURIRef.append(sal_Unicode('?'));
2007         aSynRelURIRef.append(aSubject.decode(aSubject.m_aQuery, cEscapePrefix,
2008                                          eDecodeMechanism, eCharset));
2009     }
2010     if (aSubject.m_aFragment.isPresent())
2011     {
2012         aSynRelURIRef.append(sal_Unicode('#'));
2013         aSynRelURIRef.append(aSubject.decode(aSubject.m_aFragment,
2014             cEscapePrefix, eDecodeMechanism, eCharset));
2015     }
2016 
2017     rTheRelURIRef = aSynRelURIRef.makeStringAndClear();
2018     return true;
2019 }
2020 
2021 //============================================================================
2022 // static
convertIntToExt(rtl::OUString const & rTheIntURIRef,bool bOctets,rtl::OUString & rTheExtURIRef,DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset)2023 bool INetURLObject::convertIntToExt(rtl::OUString const & rTheIntURIRef,
2024                                     bool bOctets, rtl::OUString & rTheExtURIRef,
2025                                     DecodeMechanism eDecodeMechanism,
2026                                     rtl_TextEncoding eCharset)
2027 {
2028     sal_Char cEscapePrefix
2029         = getEscapePrefix(CompareProtocolScheme(rTheIntURIRef));
2030     rtl::OUString aSynExtURIRef(encodeText(rTheIntURIRef, bOctets, PART_VISIBLE,
2031                                        cEscapePrefix, NOT_CANONIC, eCharset,
2032                                        true));
2033     sal_Unicode const * pBegin = aSynExtURIRef.getStr();
2034     sal_Unicode const * pEnd = pBegin + aSynExtURIRef.getLength();
2035     sal_Unicode const * p = pBegin;
2036     PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2037     bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::INTERNAL;
2038     if (bConvert)
2039     {
2040         aSynExtURIRef =
2041             aSynExtURIRef.replaceAt(0, p - pBegin,
2042                 rtl::OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2043     }
2044     rTheExtURIRef = decode(aSynExtURIRef, cEscapePrefix, eDecodeMechanism,
2045                            eCharset);
2046     return bConvert;
2047 }
2048 
2049 //============================================================================
2050 // static
convertExtToInt(rtl::OUString const & rTheExtURIRef,bool bOctets,rtl::OUString & rTheIntURIRef,DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset)2051 bool INetURLObject::convertExtToInt(rtl::OUString const & rTheExtURIRef,
2052                                     bool bOctets, rtl::OUString & rTheIntURIRef,
2053                                     DecodeMechanism eDecodeMechanism,
2054                                     rtl_TextEncoding eCharset)
2055 {
2056     sal_Char cEscapePrefix
2057         = getEscapePrefix(CompareProtocolScheme(rTheExtURIRef));
2058     rtl::OUString aSynIntURIRef(encodeText(rTheExtURIRef, bOctets, PART_VISIBLE,
2059                                        cEscapePrefix, NOT_CANONIC, eCharset,
2060                                        true));
2061     sal_Unicode const * pBegin = aSynIntURIRef.getStr();
2062     sal_Unicode const * pEnd = pBegin + aSynIntURIRef.getLength();
2063     sal_Unicode const * p = pBegin;
2064     PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2065     bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::EXTERNAL;
2066     if (bConvert)
2067     {
2068         aSynIntURIRef =
2069             aSynIntURIRef.replaceAt(0, p - pBegin,
2070                 rtl::OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2071     }
2072     rTheIntURIRef = decode(aSynIntURIRef, cEscapePrefix, eDecodeMechanism,
2073                            eCharset);
2074     return bConvert;
2075 }
2076 
2077 //============================================================================
2078 // static
2079 INetURLObject::PrefixInfo const *
getPrefix(sal_Unicode const * & rBegin,sal_Unicode const * pEnd)2080 INetURLObject::getPrefix(sal_Unicode const *& rBegin,
2081                          sal_Unicode const * pEnd)
2082 {
2083     static PrefixInfo const aMap[]
2084         = { // dummy entry at front needed, because pLast may point here:
2085             { 0, 0, INET_PROT_NOT_VALID, PrefixInfo::INTERNAL },
2086             { ".component:", "staroffice.component:", INET_PROT_COMPONENT,
2087               PrefixInfo::INTERNAL },
2088             { ".uno:", "staroffice.uno:", INET_PROT_UNO,
2089               PrefixInfo::INTERNAL },
2090             { "cid:", 0, INET_PROT_CID, PrefixInfo::OFFICIAL },
2091             { "data:", 0, INET_PROT_DATA, PrefixInfo::OFFICIAL },
2092             { "db:", "staroffice.db:", INET_PROT_DB, PrefixInfo::INTERNAL },
2093             { "file:", 0, INET_PROT_FILE, PrefixInfo::OFFICIAL },
2094             { "ftp:", 0, INET_PROT_FTP, PrefixInfo::OFFICIAL },
2095             { "hid:", "staroffice.hid:", INET_PROT_HID,
2096               PrefixInfo::INTERNAL },
2097             { "http:", 0, INET_PROT_HTTP, PrefixInfo::OFFICIAL },
2098             { "https:", 0, INET_PROT_HTTPS, PrefixInfo::OFFICIAL },
2099             { "imap:", 0, INET_PROT_IMAP, PrefixInfo::OFFICIAL },
2100             { "javascript:", 0, INET_PROT_JAVASCRIPT, PrefixInfo::OFFICIAL },
2101             { "ldap:", 0, INET_PROT_LDAP, PrefixInfo::OFFICIAL },
2102             { "macro:", "staroffice.macro:", INET_PROT_MACRO,
2103               PrefixInfo::INTERNAL },
2104             { "mailto:", 0, INET_PROT_MAILTO, PrefixInfo::OFFICIAL },
2105             { "news:", 0, INET_PROT_NEWS, PrefixInfo::OFFICIAL },
2106             { "out:", "staroffice.out:", INET_PROT_OUT,
2107               PrefixInfo::INTERNAL },
2108             { "pop3:", "staroffice.pop3:", INET_PROT_POP3,
2109               PrefixInfo::INTERNAL },
2110             { "private:", "staroffice.private:", INET_PROT_PRIV_SOFFICE,
2111               PrefixInfo::INTERNAL },
2112             { "private:factory/", "staroffice.factory:",
2113               INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL },
2114             { "private:helpid/", "staroffice.helpid:", INET_PROT_PRIV_SOFFICE,
2115               PrefixInfo::INTERNAL },
2116             { "private:java/", "staroffice.java:", INET_PROT_PRIV_SOFFICE,
2117               PrefixInfo::INTERNAL },
2118             { "private:searchfolder:", "staroffice.searchfolder:",
2119               INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL },
2120             { "private:trashcan:", "staroffice.trashcan:",
2121               INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL },
2122             { "slot:", "staroffice.slot:", INET_PROT_SLOT,
2123               PrefixInfo::INTERNAL },
2124             { "smb:", 0, INET_PROT_SMB, PrefixInfo::OFFICIAL },
2125             { "staroffice.component:", ".component:", INET_PROT_COMPONENT,
2126               PrefixInfo::EXTERNAL },
2127             { "staroffice.db:", "db:", INET_PROT_DB, PrefixInfo::EXTERNAL },
2128             { "staroffice.factory:", "private:factory/",
2129               INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL },
2130             { "staroffice.helpid:", "private:helpid/", INET_PROT_PRIV_SOFFICE,
2131               PrefixInfo::EXTERNAL },
2132             { "staroffice.hid:", "hid:", INET_PROT_HID,
2133               PrefixInfo::EXTERNAL },
2134             { "staroffice.java:", "private:java/", INET_PROT_PRIV_SOFFICE,
2135               PrefixInfo::EXTERNAL },
2136             { "staroffice.macro:", "macro:", INET_PROT_MACRO,
2137               PrefixInfo::EXTERNAL },
2138             { "staroffice.out:", "out:", INET_PROT_OUT,
2139               PrefixInfo::EXTERNAL },
2140             { "staroffice.pop3:", "pop3:", INET_PROT_POP3,
2141               PrefixInfo::EXTERNAL },
2142             { "staroffice.private:", "private:", INET_PROT_PRIV_SOFFICE,
2143               PrefixInfo::EXTERNAL },
2144             { "staroffice.searchfolder:", "private:searchfolder:",
2145               INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL },
2146             { "staroffice.slot:", "slot:", INET_PROT_SLOT,
2147               PrefixInfo::EXTERNAL },
2148             { "staroffice.trashcan:", "private:trashcan:",
2149               INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL },
2150             { "staroffice.uno:", ".uno:", INET_PROT_UNO,
2151               PrefixInfo::EXTERNAL },
2152             { "staroffice.vim:", "vim:", INET_PROT_VIM,
2153               PrefixInfo::EXTERNAL },
2154             { "staroffice:", "private:", INET_PROT_PRIV_SOFFICE,
2155               PrefixInfo::EXTERNAL },
2156             { "telnet:", 0, INET_PROT_TELNET, PrefixInfo::OFFICIAL },
2157             { "vim:", "staroffice.vim:", INET_PROT_VIM,
2158               PrefixInfo::INTERNAL },
2159             { "vnd.sun.star.cmd:", 0, INET_PROT_VND_SUN_STAR_CMD,
2160               PrefixInfo::OFFICIAL },
2161             { "vnd.sun.star.expand:", 0, INET_PROT_VND_SUN_STAR_EXPAND,
2162               PrefixInfo::OFFICIAL },
2163             { "vnd.sun.star.help:", 0, INET_PROT_VND_SUN_STAR_HELP,
2164               PrefixInfo::OFFICIAL },
2165             { "vnd.sun.star.hier:", 0, INET_PROT_VND_SUN_STAR_HIER,
2166               PrefixInfo::OFFICIAL },
2167             { "vnd.sun.star.odma:", 0, INET_PROT_VND_SUN_STAR_ODMA,
2168               PrefixInfo::OFFICIAL },
2169             { "vnd.sun.star.pkg:", 0, INET_PROT_VND_SUN_STAR_PKG,
2170               PrefixInfo::OFFICIAL },
2171             { "vnd.sun.star.tdoc:", 0, INET_PROT_VND_SUN_STAR_TDOC,
2172               PrefixInfo::OFFICIAL },
2173             { "vnd.sun.star.webdav:", 0, INET_PROT_VND_SUN_STAR_WEBDAV,
2174               PrefixInfo::OFFICIAL } };
2175     PrefixInfo const * pFirst = aMap + 1;
2176     PrefixInfo const * pLast = aMap + sizeof aMap / sizeof (PrefixInfo) - 1;
2177     PrefixInfo const * pMatch = 0;
2178     sal_Unicode const * pMatched = rBegin;
2179     sal_Unicode const * p = rBegin;
2180     sal_Int32 i = 0;
2181     for (; pFirst < pLast; ++i)
2182     {
2183         if (pFirst->m_pPrefix[i] == '\0')
2184         {
2185             pMatch = pFirst++;
2186             pMatched = p;
2187         }
2188         if (p >= pEnd)
2189             break;
2190         sal_uInt32 nChar = INetMIME::toLowerCase(*p++);
2191         while (pFirst <= pLast && sal_uChar(pFirst->m_pPrefix[i]) < nChar)
2192             ++pFirst;
2193         while (pFirst <= pLast && sal_uChar(pLast->m_pPrefix[i]) > nChar)
2194             --pLast;
2195     }
2196     if (pFirst == pLast)
2197     {
2198         sal_Char const * q = pFirst->m_pPrefix + i;
2199         while (p < pEnd && *q != '\0'
2200                && INetMIME::toLowerCase(*p) == sal_uChar(*q))
2201         {
2202             ++p;
2203             ++q;
2204         }
2205         if (*q == '\0')
2206         {
2207             rBegin = p;
2208             return pFirst;
2209         }
2210     }
2211     rBegin = pMatched;
2212     return pMatch;
2213 }
2214 
2215 //============================================================================
getAuthorityBegin() const2216 sal_Int32 INetURLObject::getAuthorityBegin() const
2217 {
2218     DBG_ASSERT(getSchemeInfo().m_bAuthority,
2219                "INetURLObject::getAuthority(): Bad scheme");
2220     sal_Int32 nBegin;
2221     if (m_aUser.isPresent())
2222         nBegin = m_aUser.getBegin();
2223     else if (m_aHost.isPresent())
2224         nBegin = m_aHost.getBegin();
2225     else
2226         nBegin = m_aPath.getBegin();
2227     nBegin -= RTL_CONSTASCII_LENGTH("//");
2228     DBG_ASSERT(m_aAbsURIRef.charAt(nBegin) == '/'
2229                && m_aAbsURIRef.charAt(nBegin + 1) == '/',
2230                "INetURLObject::getAuthority(): Bad authority");
2231     return nBegin;
2232 }
2233 
2234 //============================================================================
getAuthority() const2235 INetURLObject::SubString INetURLObject::getAuthority() const
2236 {
2237     sal_Int32 nBegin = getAuthorityBegin();
2238     sal_Int32 nEnd = m_aPort.isPresent() ? m_aPort.getEnd() :
2239                       m_aHost.isPresent() ? m_aHost.getEnd() :
2240                       m_aAuth.isPresent() ? m_aAuth.getEnd() :
2241                       m_aUser.isPresent() ? m_aUser.getEnd() :
2242                           nBegin + RTL_CONSTASCII_LENGTH("//");
2243     return SubString(nBegin, nEnd - nBegin);
2244 }
2245 
2246 //============================================================================
setUser(rtl::OUString const & rTheUser,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)2247 bool INetURLObject::setUser(rtl::OUString const & rTheUser,
2248                             bool bOctets, EncodeMechanism eMechanism,
2249                             rtl_TextEncoding eCharset)
2250 {
2251     if (
2252          !getSchemeInfo().m_bUser ||
2253          (m_eScheme == INET_PROT_IMAP && rTheUser.getLength() == 0)
2254        )
2255     {
2256         return false;
2257     }
2258 
2259     rtl::OUString aNewUser(encodeText(rTheUser, bOctets,
2260                                   m_eScheme == INET_PROT_IMAP ?
2261                                       PART_IMAP_ACHAR :
2262                                   m_eScheme == INET_PROT_VIM ?
2263                                       PART_VIM :
2264                                       PART_USER_PASSWORD,
2265                                   getEscapePrefix(), eMechanism, eCharset,
2266                                   false));
2267     sal_Int32 nDelta;
2268     if (m_aUser.isPresent())
2269         nDelta = m_aUser.set(m_aAbsURIRef, aNewUser);
2270     else if (m_aHost.isPresent())
2271     {
2272         m_aAbsURIRef.insert(m_aHost.getBegin(), sal_Unicode('@'));
2273         nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aHost.getBegin()) + 1;
2274     }
2275     else if (getSchemeInfo().m_bHost)
2276         return false;
2277     else
2278         nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aPath.getBegin());
2279     m_aAuth += nDelta;
2280     m_aHost += nDelta;
2281     m_aPort += nDelta;
2282     m_aPath += nDelta;
2283     m_aQuery += nDelta;
2284     m_aFragment += nDelta;
2285     return true;
2286 }
2287 
2288 namespace
2289 {
lcl_Erase(rtl::OUStringBuffer & rBuf,sal_Int32 index,sal_Int32 count)2290     void lcl_Erase(rtl::OUStringBuffer &rBuf, sal_Int32 index, sal_Int32 count)
2291     {
2292         rtl::OUString sTemp(rBuf.makeStringAndClear());
2293         rBuf.append(sTemp.replaceAt(index, count, rtl::OUString()));
2294     }
2295 }
2296 
2297 //============================================================================
clearPassword()2298 bool INetURLObject::clearPassword()
2299 {
2300     if (!getSchemeInfo().m_bPassword)
2301         return false;
2302     if (m_aAuth.isPresent())
2303     {
2304         lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin() - 1,
2305             m_aAuth.getLength() + 1);
2306         sal_Int32 nDelta = m_aAuth.clear() - 1;
2307         m_aHost += nDelta;
2308         m_aPort += nDelta;
2309         m_aPath += nDelta;
2310         m_aQuery += nDelta;
2311         m_aFragment += nDelta;
2312     }
2313     return true;
2314 }
2315 
2316 //============================================================================
setPassword(rtl::OUString const & rThePassword,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)2317 bool INetURLObject::setPassword(rtl::OUString const & rThePassword,
2318                                 bool bOctets, EncodeMechanism eMechanism,
2319                                 rtl_TextEncoding eCharset)
2320 {
2321     if (!getSchemeInfo().m_bPassword)
2322         return false;
2323     rtl::OUString aNewAuth(encodeText(rThePassword, bOctets,
2324                                   m_eScheme == INET_PROT_VIM ?
2325                                       PART_VIM : PART_USER_PASSWORD,
2326                                   getEscapePrefix(), eMechanism, eCharset,
2327                                   false));
2328     sal_Int32 nDelta;
2329     if (m_aAuth.isPresent())
2330         nDelta = m_aAuth.set(m_aAbsURIRef, aNewAuth);
2331     else if (m_aUser.isPresent())
2332     {
2333         m_aAbsURIRef.insert(m_aUser.getEnd(), sal_Unicode(':'));
2334         nDelta
2335             = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aUser.getEnd() + 1) + 1;
2336     }
2337     else if (m_aHost.isPresent())
2338     {
2339         m_aAbsURIRef.insert(m_aHost.getBegin(),
2340             rtl::OUString::createFromAscii(":@"));
2341         m_aUser.set(m_aAbsURIRef, rtl::OUString(), m_aHost.getBegin());
2342         nDelta
2343             = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aHost.getBegin() + 1) + 2;
2344     }
2345     else if (getSchemeInfo().m_bHost)
2346         return false;
2347     else
2348     {
2349         m_aAbsURIRef.insert(m_aPath.getBegin(), sal_Unicode(':'));
2350         m_aUser.set(m_aAbsURIRef, rtl::OUString(), m_aPath.getBegin());
2351         nDelta
2352             = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aPath.getBegin() + 1) + 1;
2353     }
2354     m_aHost += nDelta;
2355     m_aPort += nDelta;
2356     m_aPath += nDelta;
2357     m_aQuery += nDelta;
2358     m_aFragment += nDelta;
2359     return true;
2360 }
2361 
2362 //============================================================================
2363 // static
parseHost(sal_Unicode const * & rBegin,sal_Unicode const * pEnd,rtl::OUString & rCanonic)2364 bool INetURLObject::parseHost(
2365     sal_Unicode const *& rBegin, sal_Unicode const * pEnd,
2366     rtl::OUString & rCanonic)
2367 {
2368     // RFC 2373 is inconsistent about how to write an IPv6 address in which an
2369     // IPv4 address directly follows the abbreviating "::".  The ABNF in
2370     // Appendix B suggests ":::13.1.68.3", while an example in 2.2/3 explicitly
2371     // mentions "::13:1.68.3".  This algorithm accepts both variants:
2372     enum State { STATE_INITIAL, STATE_LABEL, STATE_LABEL_HYPHEN,
2373                  STATE_LABEL_DOT, STATE_TOPLABEL, STATE_TOPLABEL_HYPHEN,
2374                  STATE_TOPLABEL_DOT, STATE_IP4, STATE_IP4_DOT, STATE_IP6,
2375                  STATE_IP6_COLON, STATE_IP6_2COLON, STATE_IP6_3COLON,
2376                  STATE_IP6_HEXSEQ1, STATE_IP6_HEXSEQ1_COLON,
2377                  STATE_IP6_HEXSEQ1_MAYBE_IP4, STATE_IP6_HEXSEQ2,
2378                  STATE_IP6_HEXSEQ2_COLON, STATE_IP6_HEXSEQ2_MAYBE_IP4,
2379                  STATE_IP6_IP4, STATE_IP6_IP4_DOT, STATE_IP6_DONE };
2380     rtl::OUStringBuffer aTheCanonic;
2381     sal_uInt32 nNumber = 0;
2382     int nDigits = 0;
2383     int nOctets = 0;
2384     State eState = STATE_INITIAL;
2385     sal_Unicode const * p = rBegin;
2386     for (; p != pEnd; ++p)
2387         switch (eState)
2388         {
2389             case STATE_INITIAL:
2390                 if (*p == '[')
2391                 {
2392                     aTheCanonic.append(sal_Unicode('['));
2393                     eState = STATE_IP6;
2394                 }
2395                 else if (INetMIME::isAlpha(*p))
2396                     eState = STATE_TOPLABEL;
2397                 else if (INetMIME::isDigit(*p))
2398                 {
2399                     nNumber = INetMIME::getWeight(*p);
2400                     nDigits = 1;
2401                     nOctets = 1;
2402                     eState = STATE_IP4;
2403                 }
2404                 else
2405                     goto done;
2406                 break;
2407 
2408             case STATE_LABEL:
2409                 if (*p == '.')
2410                     eState = STATE_LABEL_DOT;
2411                 else if (*p == '-')
2412                     eState = STATE_LABEL_HYPHEN;
2413                 else if (!INetMIME::isAlphanumeric(*p))
2414                     goto done;
2415                 break;
2416 
2417             case STATE_LABEL_HYPHEN:
2418                 if (INetMIME::isAlphanumeric(*p))
2419                     eState = STATE_LABEL;
2420                 else if (*p != '-')
2421                     goto done;
2422                 break;
2423 
2424             case STATE_LABEL_DOT:
2425                 if (INetMIME::isAlpha(*p))
2426                     eState = STATE_TOPLABEL;
2427                 else if (INetMIME::isDigit(*p))
2428                     eState = STATE_LABEL;
2429                 else
2430                     goto done;
2431                 break;
2432 
2433             case STATE_TOPLABEL:
2434                 if (*p == '.')
2435                     eState = STATE_TOPLABEL_DOT;
2436                 else if (*p == '-')
2437                     eState = STATE_TOPLABEL_HYPHEN;
2438                 else if (!INetMIME::isAlphanumeric(*p))
2439                     goto done;
2440                 break;
2441 
2442             case STATE_TOPLABEL_HYPHEN:
2443                 if (INetMIME::isAlphanumeric(*p))
2444                     eState = STATE_TOPLABEL;
2445                 else if (*p != '-')
2446                     goto done;
2447                 break;
2448 
2449             case STATE_TOPLABEL_DOT:
2450                 if (INetMIME::isAlpha(*p))
2451                     eState = STATE_TOPLABEL;
2452                 else if (INetMIME::isDigit(*p))
2453                     eState = STATE_LABEL;
2454                 else
2455                     goto done;
2456                 break;
2457 
2458             case STATE_IP4:
2459                 if (*p == '.')
2460                     if (nOctets < 4)
2461                     {
2462                         aTheCanonic.append(
2463                             rtl::OUString::valueOf(sal_Int32(nNumber)));
2464                         aTheCanonic.append(sal_Unicode('.'));
2465                         ++nOctets;
2466                         eState = STATE_IP4_DOT;
2467                     }
2468                     else
2469                         eState = STATE_LABEL_DOT;
2470                 else if (*p == '-')
2471                     eState = STATE_LABEL_HYPHEN;
2472                 else if (INetMIME::isAlpha(*p))
2473                     eState = STATE_LABEL;
2474                 else if (INetMIME::isDigit(*p))
2475                     if (nDigits < 3)
2476                     {
2477                         nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2478                         ++nDigits;
2479                     }
2480                     else
2481                         eState = STATE_LABEL;
2482                 else
2483                     goto done;
2484                 break;
2485 
2486             case STATE_IP4_DOT:
2487                 if (INetMIME::isAlpha(*p))
2488                     eState = STATE_TOPLABEL;
2489                 else if (INetMIME::isDigit(*p))
2490                 {
2491                     nNumber = INetMIME::getWeight(*p);
2492                     nDigits = 1;
2493                     eState = STATE_IP4;
2494                 }
2495                 else
2496                     goto done;
2497                 break;
2498 
2499             case STATE_IP6:
2500                 if (*p == ':')
2501                     eState = STATE_IP6_COLON;
2502                 else if (INetMIME::isHexDigit(*p))
2503                 {
2504                     nNumber = INetMIME::getHexWeight(*p);
2505                     nDigits = 1;
2506                     eState = STATE_IP6_HEXSEQ1;
2507                 }
2508                 else
2509                     goto done;
2510                 break;
2511 
2512             case STATE_IP6_COLON:
2513                 if (*p == ':')
2514                 {
2515                     aTheCanonic.appendAscii(RTL_CONSTASCII_STRINGPARAM("::"));
2516                     eState = STATE_IP6_2COLON;
2517                 }
2518                 else
2519                     goto done;
2520                 break;
2521 
2522             case STATE_IP6_2COLON:
2523                 if (*p == ']')
2524                     eState = STATE_IP6_DONE;
2525                 else if (*p == ':')
2526                 {
2527                     aTheCanonic.append(sal_Unicode(':'));
2528                     eState = STATE_IP6_3COLON;
2529                 }
2530                 else if (INetMIME::isDigit(*p))
2531                 {
2532                     nNumber = INetMIME::getWeight(*p);
2533                     nDigits = 1;
2534                     eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2535                 }
2536                 else if (INetMIME::isHexDigit(*p))
2537                 {
2538                     nNumber = INetMIME::getHexWeight(*p);
2539                     nDigits = 1;
2540                     eState = STATE_IP6_HEXSEQ2;
2541                 }
2542                 else
2543                     goto done;
2544                 break;
2545 
2546             case STATE_IP6_3COLON:
2547                 if (INetMIME::isDigit(*p))
2548                 {
2549                     nNumber = INetMIME::getWeight(*p);
2550                     nDigits = 1;
2551                     nOctets = 1;
2552                     eState = STATE_IP6_IP4;
2553                 }
2554                 else
2555                     goto done;
2556                 break;
2557 
2558             case STATE_IP6_HEXSEQ1:
2559                 if (*p == ']')
2560                 {
2561                     aTheCanonic.append(
2562                         rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2563                     eState = STATE_IP6_DONE;
2564                 }
2565                 else if (*p == ':')
2566                 {
2567                     aTheCanonic.append(
2568                         rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2569                     aTheCanonic.append(sal_Unicode(':'));
2570                     eState = STATE_IP6_HEXSEQ1_COLON;
2571                 }
2572                 else if (INetMIME::isHexDigit(*p) && nDigits < 4)
2573                 {
2574                     nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2575                     ++nDigits;
2576                 }
2577                 else
2578                     goto done;
2579                 break;
2580 
2581             case STATE_IP6_HEXSEQ1_COLON:
2582                 if (*p == ':')
2583                 {
2584                     aTheCanonic.append(sal_Unicode(':'));
2585                     eState = STATE_IP6_2COLON;
2586                 }
2587                 else if (INetMIME::isDigit(*p))
2588                 {
2589                     nNumber = INetMIME::getWeight(*p);
2590                     nDigits = 1;
2591                     eState = STATE_IP6_HEXSEQ1_MAYBE_IP4;
2592                 }
2593                 else if (INetMIME::isHexDigit(*p))
2594                 {
2595                     nNumber = INetMIME::getHexWeight(*p);
2596                     nDigits = 1;
2597                     eState = STATE_IP6_HEXSEQ1;
2598                 }
2599                 else
2600                     goto done;
2601                 break;
2602 
2603             case STATE_IP6_HEXSEQ1_MAYBE_IP4:
2604                 if (*p == ']')
2605                 {
2606                     aTheCanonic.append(
2607                         rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2608                     eState = STATE_IP6_DONE;
2609                 }
2610                 else if (*p == ':')
2611                 {
2612                     aTheCanonic.append(
2613                         rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2614                     aTheCanonic.append(sal_Unicode(':'));
2615                     eState = STATE_IP6_HEXSEQ1_COLON;
2616                 }
2617                 else if (*p == '.')
2618                 {
2619                     nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2620                                   + (nNumber & 15);
2621                     aTheCanonic.append(
2622                         rtl::OUString::valueOf(sal_Int32(nNumber)));
2623                     aTheCanonic.append(sal_Unicode('.'));
2624                     nOctets = 2;
2625                     eState = STATE_IP6_IP4_DOT;
2626                 }
2627                 else if (INetMIME::isDigit(*p) && nDigits < 3)
2628                 {
2629                     nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2630                     ++nDigits;
2631                 }
2632                 else if (INetMIME::isHexDigit(*p) && nDigits < 4)
2633                 {
2634                     nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2635                     ++nDigits;
2636                     eState = STATE_IP6_HEXSEQ1;
2637                 }
2638                 else
2639                     goto done;
2640                 break;
2641 
2642             case STATE_IP6_HEXSEQ2:
2643                 if (*p == ']')
2644                 {
2645                     aTheCanonic.append(
2646                         rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2647                     eState = STATE_IP6_DONE;
2648                 }
2649                 else if (*p == ':')
2650                 {
2651                     aTheCanonic.append(
2652                         rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2653                     aTheCanonic.append(sal_Unicode(':'));
2654                     eState = STATE_IP6_HEXSEQ2_COLON;
2655                 }
2656                 else if (INetMIME::isHexDigit(*p) && nDigits < 4)
2657                 {
2658                     nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2659                     ++nDigits;
2660                 }
2661                 else
2662                     goto done;
2663                 break;
2664 
2665             case STATE_IP6_HEXSEQ2_COLON:
2666                 if (INetMIME::isDigit(*p))
2667                 {
2668                     nNumber = INetMIME::getWeight(*p);
2669                     nDigits = 1;
2670                     eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2671                 }
2672                 else if (INetMIME::isHexDigit(*p))
2673                 {
2674                     nNumber = INetMIME::getHexWeight(*p);
2675                     nDigits = 1;
2676                     eState = STATE_IP6_HEXSEQ2;
2677                 }
2678                 else
2679                     goto done;
2680                 break;
2681 
2682             case STATE_IP6_HEXSEQ2_MAYBE_IP4:
2683                 if (*p == ']')
2684                 {
2685                     aTheCanonic.append(
2686                         rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2687                     eState = STATE_IP6_DONE;
2688                 }
2689                 else if (*p == ':')
2690                 {
2691                     aTheCanonic.append(
2692                         rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2693                     aTheCanonic.append(sal_Unicode(':'));
2694                     eState = STATE_IP6_HEXSEQ2_COLON;
2695                 }
2696                 else if (*p == '.')
2697                 {
2698                     nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2699                                   + (nNumber & 15);
2700                     aTheCanonic.append(
2701                         rtl::OUString::valueOf(sal_Int32(nNumber)));
2702                     aTheCanonic.append(sal_Unicode('.'));
2703                     nOctets = 2;
2704                     eState = STATE_IP6_IP4_DOT;
2705                 }
2706                 else if (INetMIME::isDigit(*p) && nDigits < 3)
2707                 {
2708                     nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2709                     ++nDigits;
2710                 }
2711                 else if (INetMIME::isHexDigit(*p) && nDigits < 4)
2712                 {
2713                     nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2714                     ++nDigits;
2715                     eState = STATE_IP6_HEXSEQ2;
2716                 }
2717                 else
2718                     goto done;
2719                 break;
2720 
2721             case STATE_IP6_IP4:
2722                 if (*p == ']')
2723                     if (nOctets == 4)
2724                     {
2725                         aTheCanonic.append(
2726                             rtl::OUString::valueOf(sal_Int32(nNumber)));
2727                         eState = STATE_IP6_DONE;
2728                     }
2729                     else
2730                         goto done;
2731                 else if (*p == '.')
2732                     if (nOctets < 4)
2733                     {
2734                         aTheCanonic.append(
2735                             rtl::OUString::valueOf(sal_Int32(nNumber)));
2736                         aTheCanonic.append(sal_Unicode('.'));
2737                         ++nOctets;
2738                         eState = STATE_IP6_IP4_DOT;
2739                     }
2740                     else
2741                         goto done;
2742                 else if (INetMIME::isDigit(*p) && nDigits < 3)
2743                 {
2744                     nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2745                     ++nDigits;
2746                 }
2747                 else
2748                     goto done;
2749                 break;
2750 
2751             case STATE_IP6_IP4_DOT:
2752                 if (INetMIME::isDigit(*p))
2753                 {
2754                     nNumber = INetMIME::getWeight(*p);
2755                     nDigits = 1;
2756                     eState = STATE_IP6_IP4;
2757                 }
2758                 else
2759                     goto done;
2760                 break;
2761 
2762             case STATE_IP6_DONE:
2763                 goto done;
2764         }
2765  done:
2766     switch (eState)
2767     {
2768         case STATE_LABEL:
2769         case STATE_TOPLABEL:
2770         case STATE_TOPLABEL_DOT:
2771             aTheCanonic.setLength(0);
2772             aTheCanonic.append(rBegin, p - rBegin);
2773             rBegin = p;
2774             rCanonic = aTheCanonic.makeStringAndClear();
2775             return true;
2776 
2777         case STATE_IP4:
2778             if (nOctets == 4)
2779             {
2780                 aTheCanonic.append(
2781                     rtl::OUString::valueOf(sal_Int32(nNumber)));
2782                 rBegin = p;
2783                 rCanonic = aTheCanonic.makeStringAndClear();
2784                 return true;
2785             }
2786             return false;
2787 
2788         case STATE_IP6_DONE:
2789             aTheCanonic.append(sal_Unicode(']'));
2790             rBegin = p;
2791             rCanonic = aTheCanonic.makeStringAndClear();
2792             return true;
2793 
2794         default:
2795             return false;
2796     }
2797 }
2798 
2799 //============================================================================
2800 // static
parseHostOrNetBiosName(sal_Unicode const * pBegin,sal_Unicode const * pEnd,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset,bool bNetBiosName,rtl::OUStringBuffer * pCanonic)2801 bool INetURLObject::parseHostOrNetBiosName(
2802     sal_Unicode const * pBegin, sal_Unicode const * pEnd, bool bOctets,
2803     EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bNetBiosName,
2804     rtl::OUStringBuffer* pCanonic)
2805 {
2806     rtl::OUString aTheCanonic;
2807     if (pBegin < pEnd)
2808     {
2809         sal_Unicode const * p = pBegin;
2810         if (!parseHost(p, pEnd, aTheCanonic) || p != pEnd)
2811         {
2812             if (bNetBiosName)
2813             {
2814                 rtl::OUStringBuffer buf;
2815                 while (pBegin < pEnd)
2816                 {
2817                     EscapeType eEscapeType;
2818                     sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, bOctets, '%',
2819                                                  eMechanism, eCharset,
2820                                                  eEscapeType);
2821                     if (!INetMIME::isVisible(nUTF32))
2822                         return false;
2823                     if (!INetMIME::isAlphanumeric(nUTF32))
2824                         switch (nUTF32)
2825                         {
2826                         case '"':
2827                         case '*':
2828                         case '+':
2829                         case ',':
2830                         case '/':
2831                         case ':':
2832                         case ';':
2833                         case '<':
2834                         case '=':
2835                         case '>':
2836                         case '?':
2837                         case '[':
2838                         case '\\':
2839                         case ']':
2840                         case '`':
2841                         case '|':
2842                             return false;;
2843                         }
2844                     if (pCanonic != NULL) {
2845                         appendUCS4(
2846                             buf, nUTF32, eEscapeType, bOctets, PART_URIC, '%',
2847                             eCharset, true);
2848                     }
2849                 }
2850                 aTheCanonic = buf.makeStringAndClear();
2851             }
2852             else
2853                 return false;
2854         }
2855     }
2856     if (pCanonic != NULL) {
2857         *pCanonic = aTheCanonic;
2858     }
2859     return true;
2860 }
2861 
2862 //============================================================================
2863 // static
encodeHostPort(rtl::OUString const & rTheHostPort,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)2864 rtl::OUString INetURLObject::encodeHostPort(rtl::OUString const & rTheHostPort,
2865                                         bool bOctets,
2866                                         EncodeMechanism eMechanism,
2867                                         rtl_TextEncoding eCharset)
2868 {
2869     sal_Int32 nPort = rTheHostPort.getLength();
2870     if (nPort != 0)
2871     {
2872         sal_Int32 i = nPort - 1;
2873         while (i != 0 && INetMIME::isDigit(rTheHostPort.getStr()[i]))
2874             --i;
2875         if (rTheHostPort.getStr()[i] == ':')
2876             nPort = i;
2877     }
2878     rtl::OUString aResult(encodeText(rTheHostPort.copy(0, nPort), bOctets,
2879                                  PART_HOST_EXTRA, '%', eMechanism, eCharset,
2880                                  true));
2881     aResult += rTheHostPort.copy(nPort);
2882     return aResult;
2883 }
2884 
2885 //============================================================================
setHost(rtl::OUString const & rTheHost,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)2886 bool INetURLObject::setHost(rtl::OUString const & rTheHost, bool bOctets,
2887                             EncodeMechanism eMechanism,
2888                             rtl_TextEncoding eCharset)
2889 {
2890     if (!getSchemeInfo().m_bHost)
2891         return false;
2892     rtl::OUStringBuffer aSynHost(rTheHost);
2893     bool bNetBiosName = false;
2894     switch (m_eScheme)
2895     {
2896         case INET_PROT_FILE:
2897             {
2898                 rtl::OUString sTemp(aSynHost);
2899                 if (sTemp.equalsIgnoreAsciiCaseAsciiL(
2900                     RTL_CONSTASCII_STRINGPARAM("localhost")))
2901                 {
2902                     aSynHost.setLength(0);
2903                 }
2904                 bNetBiosName = true;
2905             }
2906             break;
2907         case INET_PROT_LDAP:
2908             if (aSynHost.getLength() == 0 && m_aPort.isPresent())
2909                 return false;
2910             break;
2911 
2912         default:
2913             if (aSynHost.getLength() == 0)
2914                 return false;
2915             break;
2916     }
2917     if (!parseHostOrNetBiosName(
2918             aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
2919             bOctets, eMechanism, eCharset, bNetBiosName, &aSynHost))
2920         return false;
2921     sal_Int32 nDelta = m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear());
2922     m_aPort += nDelta;
2923     m_aPath += nDelta;
2924     m_aQuery += nDelta;
2925     m_aFragment += nDelta;
2926     return true;
2927 }
2928 
2929 //============================================================================
2930 // static
parsePath(INetProtocol eScheme,sal_Unicode const ** pBegin,sal_Unicode const * pEnd,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset,bool bSkippedInitialSlash,sal_uInt32 nSegmentDelimiter,sal_uInt32 nAltSegmentDelimiter,sal_uInt32 nQueryDelimiter,sal_uInt32 nFragmentDelimiter,rtl::OUStringBuffer & rSynPath)2931 bool INetURLObject::parsePath(INetProtocol eScheme,
2932                               sal_Unicode const ** pBegin,
2933                               sal_Unicode const * pEnd,
2934                               bool bOctets,
2935                               EncodeMechanism eMechanism,
2936                               rtl_TextEncoding eCharset,
2937                               bool bSkippedInitialSlash,
2938                               sal_uInt32 nSegmentDelimiter,
2939                               sal_uInt32 nAltSegmentDelimiter,
2940                               sal_uInt32 nQueryDelimiter,
2941                               sal_uInt32 nFragmentDelimiter,
2942                               rtl::OUStringBuffer &rSynPath)
2943 {
2944     DBG_ASSERT(pBegin, "INetURLObject::parsePath(): Null output param");
2945 
2946     sal_Unicode const * pPos = *pBegin;
2947     rtl::OUStringBuffer aTheSynPath;
2948 
2949     switch (eScheme)
2950     {
2951         case INET_PROT_NOT_VALID:
2952             return false;
2953 
2954         case INET_PROT_FTP:
2955         case INET_PROT_IMAP:
2956             if (pPos < pEnd && *pPos != '/')
2957                 return false;
2958             while (pPos < pEnd && *pPos != nFragmentDelimiter)
2959             {
2960                 EscapeType eEscapeType;
2961                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
2962                                              '%', eMechanism,
2963                                              eCharset, eEscapeType);
2964                 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
2965                            PART_HTTP_PATH, '%', eCharset, true);
2966             }
2967             if (aTheSynPath.getLength() == 0)
2968                 aTheSynPath.append(sal_Unicode('/'));
2969             break;
2970 
2971         case INET_PROT_HTTP:
2972         case INET_PROT_VND_SUN_STAR_WEBDAV:
2973         case INET_PROT_HTTPS:
2974         case INET_PROT_SMB:
2975             if (pPos < pEnd && *pPos != '/')
2976                 return false;
2977             while (pPos < pEnd && *pPos != nQueryDelimiter
2978                    && *pPos != nFragmentDelimiter)
2979             {
2980                 EscapeType eEscapeType;
2981                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
2982                                              '%', eMechanism,
2983                                              eCharset, eEscapeType);
2984                 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
2985                            PART_HTTP_PATH, '%', eCharset, true);
2986             }
2987             if (aTheSynPath.getLength() == 0)
2988                 aTheSynPath.append(sal_Unicode('/'));
2989             break;
2990 
2991         case INET_PROT_FILE:
2992         {
2993             if (bSkippedInitialSlash)
2994                 aTheSynPath.append(sal_Unicode('/'));
2995             else if (pPos < pEnd
2996                      && *pPos != nSegmentDelimiter
2997                      && *pPos != nAltSegmentDelimiter)
2998                 return false;
2999             while (pPos < pEnd && *pPos != nFragmentDelimiter)
3000             {
3001                 EscapeType eEscapeType;
3002                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3003                                              '%', eMechanism,
3004                                              eCharset, eEscapeType);
3005                 if (eEscapeType == ESCAPE_NO)
3006                 {
3007                     if (nUTF32 == nSegmentDelimiter
3008                         || nUTF32 == nAltSegmentDelimiter)
3009                     {
3010                         aTheSynPath.append(sal_Unicode('/'));
3011                         continue;
3012                     }
3013                     else if (nUTF32 == '|'
3014                              && (pPos == pEnd
3015                                  || *pPos == nFragmentDelimiter
3016                                  || *pPos == nSegmentDelimiter
3017                                  || *pPos == nAltSegmentDelimiter)
3018                              && aTheSynPath.getLength() == 2
3019                              && INetMIME::isAlpha(aTheSynPath.charAt(1)))
3020                     {
3021                         // A first segment of <ALPHA "|"> is translated to
3022                         // <ALPHA ":">:
3023                         aTheSynPath.append(sal_Unicode(':'));
3024                         continue;
3025                     }
3026                 }
3027                 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3028                            PART_PCHAR, '%', eCharset, true);
3029             }
3030             if (aTheSynPath.getLength() == 0)
3031                 aTheSynPath.append(sal_Unicode('/'));
3032             break;
3033         }
3034 
3035         case INET_PROT_MAILTO:
3036             while (pPos < pEnd && *pPos != nQueryDelimiter
3037                    && *pPos != nFragmentDelimiter)
3038             {
3039                 EscapeType eEscapeType;
3040                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3041                                              '%', eMechanism,
3042                                              eCharset, eEscapeType);
3043                 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3044                            PART_MAILTO, '%', eCharset, true);
3045             }
3046             break;
3047 
3048         case INET_PROT_NEWS:
3049             if (pPos == pEnd || *pPos == nQueryDelimiter
3050                 || *pPos == nFragmentDelimiter)
3051                 return false;
3052 
3053             // Match <"*">:
3054             if (*pPos == '*'
3055                 && (pEnd - pPos == 1 || pPos[1] == nQueryDelimiter
3056                     || pPos[1] == nFragmentDelimiter))
3057             {
3058                 ++pPos;
3059                 aTheSynPath.append(sal_Unicode('*'));
3060                 break;
3061             }
3062 
3063             // Match <group>:
3064             if (INetMIME::isAlpha(*pPos))
3065                 for (sal_Unicode const * p = pPos + 1;; ++p)
3066                     if (p == pEnd || *p == nQueryDelimiter
3067                         || *p == nFragmentDelimiter)
3068                     {
3069                         aTheSynPath.setLength(0);
3070                         aTheSynPath.append(pPos, p - pPos);
3071                         pPos = p;
3072                         goto done;
3073                     }
3074                     else if (!INetMIME::isAlphanumeric(*p) && *p != '+'
3075                              && *p != '-' && *p != '.' && *p != '_')
3076                         break;
3077 
3078             // Match <article>:
3079             for (;;)
3080             {
3081                 if (pPos == pEnd || *pPos == nQueryDelimiter
3082                     || *pPos == nFragmentDelimiter)
3083                     return false;
3084                 if (*pPos == '@')
3085                     break;
3086                 EscapeType eEscapeType;
3087                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, '%',
3088                                              eMechanism, eCharset, eEscapeType);
3089                 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3090                            PART_NEWS_ARTICLE_LOCALPART, '%', eCharset, true);
3091             }
3092             if (aTheSynPath.getLength() == 0)
3093                 return false;
3094             ++pPos;
3095             aTheSynPath.append(sal_Unicode('@'));
3096             {
3097                 sal_Unicode const * p = pPos;
3098                 while (p < pEnd && *pPos != nQueryDelimiter
3099                        && *pPos != nFragmentDelimiter)
3100                     ++p;
3101                 rtl::OUString aCanonic;
3102                 if (!parseHost(pPos, p, aCanonic))
3103                     return false;
3104                 aTheSynPath.append(aCanonic);
3105             }
3106 
3107         done:
3108             break;
3109 
3110         case INET_PROT_POP3:
3111             while (pPos < pEnd && *pPos != nFragmentDelimiter)
3112             {
3113                 EscapeType eEscapeType;
3114                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3115                                              '%', eMechanism,
3116                                              eCharset, eEscapeType);
3117                 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3118                            PART_MESSAGE_ID_PATH, '%', eCharset,
3119                            true);
3120             }
3121             break;
3122 
3123         case INET_PROT_PRIV_SOFFICE:
3124         case INET_PROT_SLOT:
3125         case INET_PROT_HID:
3126         case INET_PROT_MACRO:
3127         case INET_PROT_UNO:
3128         case INET_PROT_COMPONENT:
3129         case INET_PROT_LDAP:
3130             while (pPos < pEnd && *pPos != nQueryDelimiter
3131                    && *pPos != nFragmentDelimiter)
3132             {
3133                 EscapeType eEscapeType;
3134                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3135                                              '%', eMechanism,
3136                                              eCharset, eEscapeType);
3137                 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3138                            PART_PATH_BEFORE_QUERY, '%', eCharset,
3139                            true);
3140             }
3141             break;
3142 
3143         case INET_PROT_VND_SUN_STAR_HELP:
3144             if (pPos == pEnd
3145                 || *pPos == nQueryDelimiter
3146                 || *pPos == nFragmentDelimiter)
3147                 aTheSynPath.append(sal_Unicode('/'));
3148             else
3149             {
3150                 if (*pPos != '/')
3151                     return false;
3152                 while (pPos < pEnd && *pPos != nQueryDelimiter
3153                        && *pPos != nFragmentDelimiter)
3154                 {
3155                     EscapeType eEscapeType;
3156                     sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3157                                                  '%', eMechanism,
3158                                                  eCharset, eEscapeType);
3159                     appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3160                                PART_HTTP_PATH, '%', eCharset, true);
3161                 }
3162             }
3163             break;
3164 
3165         case INET_PROT_JAVASCRIPT:
3166         case INET_PROT_DATA:
3167         case INET_PROT_CID:
3168         case INET_PROT_DB:
3169             while (pPos < pEnd && *pPos != nFragmentDelimiter)
3170             {
3171                 EscapeType eEscapeType;
3172                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3173                                              '%', eMechanism,
3174                                              eCharset, eEscapeType);
3175                 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3176                            PART_URIC, '%', eCharset, true);
3177             }
3178             break;
3179 
3180         case INET_PROT_OUT:
3181             if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '~')
3182                 return false;
3183             aTheSynPath.appendAscii(RTL_CONSTASCII_STRINGPARAM("/~"));
3184             while (pPos < pEnd && *pPos != nFragmentDelimiter)
3185             {
3186                 EscapeType eEscapeType;
3187                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3188                                              '%', eMechanism,
3189                                              eCharset, eEscapeType);
3190                 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3191                            PART_URIC, '%', eCharset, true);
3192             }
3193             break;
3194 
3195         case INET_PROT_VND_SUN_STAR_HIER:
3196         case INET_PROT_VND_SUN_STAR_PKG:
3197             if (pPos < pEnd && *pPos != '/'
3198                 && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter)
3199                 return false;
3200             while (pPos < pEnd && *pPos != nQueryDelimiter
3201                    && *pPos != nFragmentDelimiter)
3202             {
3203                 EscapeType eEscapeType;
3204                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3205                                              '%', eMechanism,
3206                                              eCharset, eEscapeType);
3207                 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
3208                     aTheSynPath.append(sal_Unicode('/'));
3209                 else
3210                     appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3211                                PART_PCHAR, '%', eCharset, false);
3212             }
3213             if (aTheSynPath.getLength() == 0)
3214                 aTheSynPath.append(sal_Unicode('/'));
3215             break;
3216 
3217         case INET_PROT_VIM:
3218         {
3219 /* test had to be taken out to make parsePath static; ok since INET_PROT_VIM is
3220    obsolete, anyway
3221             if (m_aUser.isEmpty())
3222                 return false;
3223 */
3224             sal_Unicode const * pPathEnd = pPos;
3225             while (pPathEnd < pEnd && *pPathEnd != nFragmentDelimiter)
3226                 ++pPathEnd;
3227             aTheSynPath.append(sal_Unicode('/'));
3228             if (pPos == pPathEnd)
3229                 break;
3230             else if (*pPos++ != '/')
3231                 return false;
3232             if (pPos == pPathEnd)
3233                 break;
3234             while (pPos < pPathEnd && *pPos != '/')
3235             {
3236                 EscapeType eEscapeType;
3237                 sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3238                                              '=', eMechanism,
3239                                              eCharset, eEscapeType);
3240                 appendUCS4(aTheSynPath,
3241                            eEscapeType == ESCAPE_NO ?
3242                                INetMIME::toLowerCase(nUTF32) : nUTF32,
3243                            eEscapeType, bOctets, PART_VIM, '=',
3244                            eCharset, false);
3245             }
3246             bool bInbox;
3247             rtl::OUString sCompare(aTheSynPath);
3248             if (sCompare.equalsAscii("/inbox"))
3249                 bInbox = true;
3250             else if (sCompare.equalsAscii("/newsgroups"))
3251                 bInbox = false;
3252             else
3253                 return false;
3254             aTheSynPath.append(sal_Unicode('/'));
3255             if (pPos == pPathEnd)
3256                 break;
3257             else if (*pPos++ != '/')
3258                 return false;
3259             if (!bInbox)
3260             {
3261                 bool bEmpty = true;
3262                 while (pPos < pPathEnd && *pPos != '/')
3263                 {
3264                     EscapeType eEscapeType;
3265                     sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3266                                                  '=', eMechanism,
3267                                                  eCharset, eEscapeType);
3268                     appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3269                                PART_VIM, '=', eCharset, false);
3270                     bEmpty = false;
3271                 }
3272                 if (bEmpty)
3273                     return false;
3274                 aTheSynPath.append(sal_Unicode('/'));
3275                 if (pPos == pPathEnd)
3276                     break;
3277                 else if (*pPos++ != '/')
3278                     return false;
3279             }
3280             bool bEmpty = true;
3281             while (pPos < pPathEnd && *pPos != ':')
3282             {
3283                 EscapeType eEscapeType;
3284                 sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3285                                              '=', eMechanism,
3286                                              eCharset, eEscapeType);
3287                 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3288                            PART_VIM, '=', eCharset, false);
3289                 bEmpty = false;
3290             }
3291             if (bEmpty)
3292                 return false;
3293             if (pPos == pPathEnd)
3294                 break;
3295             else if (*pPos++ != ':')
3296                 return false;
3297             aTheSynPath.append(sal_Unicode(':'));
3298             for (int i = 0; i < 3; ++i)
3299             {
3300                 if (i != 0)
3301                 {
3302                     if (pPos == pPathEnd || *pPos++ != '.')
3303                         return false;
3304                     aTheSynPath.append(sal_Unicode('.'));
3305                 }
3306                 bEmpty = true;
3307                 while (pPos < pPathEnd && *pPos != '.')
3308                 {
3309                     EscapeType eEscapeType;
3310                     sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3311                                                  '=', eMechanism,
3312                                                  eCharset, eEscapeType);
3313                     if (!INetMIME::isDigit(nUTF32))
3314                         return false;
3315                     aTheSynPath.append(sal_Unicode(nUTF32));
3316                     bEmpty = false;
3317                 }
3318                 if (bEmpty)
3319                     return false;
3320             }
3321             if (pPos != pPathEnd)
3322                 return false;
3323             break;
3324         }
3325 
3326         case INET_PROT_VND_SUN_STAR_CMD:
3327         case INET_PROT_VND_SUN_STAR_EXPAND:
3328         {
3329             if (pPos == pEnd || *pPos == nFragmentDelimiter)
3330                 return false;
3331             Part ePart = PART_URIC_NO_SLASH;
3332             while (pPos != pEnd && *pPos != nFragmentDelimiter)
3333             {
3334                 EscapeType eEscapeType;
3335                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3336                                              '%', eMechanism,
3337                                              eCharset, eEscapeType);
3338                 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, ePart,
3339                            '%', eCharset, true);
3340                 ePart = PART_URIC;
3341             }
3342             break;
3343         }
3344 
3345         case INET_PROT_VND_SUN_STAR_ODMA:
3346             if (pPos < pEnd)
3347             {
3348                 if (*pPos == '/')
3349                     ++pPos;
3350                 else
3351                     return false;
3352             }
3353             aTheSynPath.append(sal_Unicode('/'));
3354             while (pPos < pEnd && *pPos != nFragmentDelimiter)
3355             {
3356                 EscapeType eEscapeType;
3357                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3358                                              '%', eMechanism,
3359                                              eCharset, eEscapeType);
3360                 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3361                            PART_URIC_NO_SLASH, '%', eCharset, true);
3362             }
3363             break;
3364 
3365         case INET_PROT_TELNET:
3366             if (pPos < pEnd)
3367             {
3368                 if (*pPos != '/' || pEnd - pPos > 1)
3369                     return false;
3370                 ++pPos;
3371             }
3372             aTheSynPath.append(sal_Unicode('/'));
3373             break;
3374 
3375         case INET_PROT_VND_SUN_STAR_TDOC:
3376             if (pPos == pEnd || *pPos != '/')
3377                 return false;
3378             while (pPos < pEnd && *pPos != nFragmentDelimiter)
3379             {
3380                 EscapeType eEscapeType;
3381                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3382                                              '%', eMechanism,
3383                                              eCharset, eEscapeType);
3384                 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
3385                     aTheSynPath.append(sal_Unicode('/'));
3386                 else
3387                     appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3388                                PART_PCHAR, '%', eCharset, false);
3389             }
3390             break;
3391 
3392         case INET_PROT_GENERIC:
3393             while (pPos < pEnd && *pPos != nFragmentDelimiter)
3394             {
3395                 EscapeType eEscapeType;
3396                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3397                                              '%', eMechanism,
3398                                              eCharset, eEscapeType);
3399                 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3400                            PART_URIC, '%', eCharset, true);
3401             }
3402             if (aTheSynPath.getLength() == 0)
3403                 return false;
3404             break;
3405         default:
3406             OSL_ASSERT(false);
3407             break;
3408     }
3409 
3410     *pBegin = pPos;
3411     rSynPath = aTheSynPath;
3412     return true;
3413 }
3414 
3415 //============================================================================
setPath(rtl::OUString const & rThePath,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)3416 bool INetURLObject::setPath(rtl::OUString const & rThePath, bool bOctets,
3417                             EncodeMechanism eMechanism,
3418                             rtl_TextEncoding eCharset)
3419 {
3420     rtl::OUStringBuffer aSynPath;
3421     sal_Unicode const * p = rThePath.getStr();
3422     sal_Unicode const * pEnd = p + rThePath.getLength();
3423     if (!parsePath(m_eScheme, &p, pEnd, bOctets, eMechanism, eCharset, false,
3424                    '/', 0x80000000, 0x80000000, 0x80000000, aSynPath)
3425         || p != pEnd)
3426         return false;
3427     sal_Int32 nDelta = m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear());
3428     m_aQuery += nDelta;
3429     m_aFragment += nDelta;
3430     return true;
3431 }
3432 
3433 //============================================================================
checkHierarchical() const3434 bool INetURLObject::checkHierarchical() const {
3435     if (m_eScheme == INET_PROT_VND_SUN_STAR_EXPAND) {
3436         OSL_ENSURE(
3437             false, "INetURLObject::checkHierarchical vnd.sun.star.expand");
3438         return true;
3439     } else {
3440         return getSchemeInfo().m_bHierarchical;
3441     }
3442 }
3443 
3444 //============================================================================
appendSegment(rtl::OUString const & rTheSegment,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)3445 bool INetURLObject::appendSegment(rtl::OUString const & rTheSegment,
3446                                   bool bOctets, EncodeMechanism eMechanism,
3447                                   rtl_TextEncoding eCharset)
3448 {
3449     return insertName(rTheSegment, bOctets, false, LAST_SEGMENT, true,
3450                       eMechanism, eCharset);
3451 }
3452 
3453 //============================================================================
getSegment(sal_Int32 nIndex,bool bIgnoreFinalSlash) const3454 INetURLObject::SubString INetURLObject::getSegment(sal_Int32 nIndex,
3455                                                    bool bIgnoreFinalSlash)
3456     const
3457 {
3458     DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3459                "INetURLObject::getSegment(): Bad index");
3460 
3461     if (!checkHierarchical())
3462         return SubString();
3463 
3464     sal_Unicode const * pPathBegin
3465         = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3466     sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3467     sal_Unicode const * pSegBegin;
3468     sal_Unicode const * pSegEnd;
3469     if (nIndex == LAST_SEGMENT)
3470     {
3471         pSegEnd = pPathEnd;
3472         if (bIgnoreFinalSlash && pSegEnd > pPathBegin && pSegEnd[-1] == '/')
3473             --pSegEnd;
3474         if (pSegEnd <= pPathBegin)
3475             return SubString();
3476         pSegBegin = pSegEnd - 1;
3477         while (pSegBegin > pPathBegin && *pSegBegin != '/')
3478             --pSegBegin;
3479     }
3480     else
3481     {
3482         pSegBegin = pPathBegin;
3483         while (nIndex-- > 0)
3484             do
3485             {
3486                 ++pSegBegin;
3487                 if (pSegBegin >= pPathEnd)
3488                     return SubString();
3489             }
3490             while (*pSegBegin != '/');
3491         pSegEnd = pSegBegin + 1;
3492         while (pSegEnd < pPathEnd && *pSegEnd != '/')
3493             ++pSegEnd;
3494     }
3495 
3496     return SubString(pSegBegin - m_aAbsURIRef.getStr(),
3497                      pSegEnd - pSegBegin);
3498 }
3499 
3500 //============================================================================
insertName(rtl::OUString const & rTheName,bool bOctets,bool bAppendFinalSlash,sal_Int32 nIndex,bool bIgnoreFinalSlash,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)3501 bool INetURLObject::insertName(rtl::OUString const & rTheName, bool bOctets,
3502                                bool bAppendFinalSlash, sal_Int32 nIndex,
3503                                bool bIgnoreFinalSlash,
3504                                EncodeMechanism eMechanism,
3505                                rtl_TextEncoding eCharset)
3506 {
3507     DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3508                "INetURLObject::insertName(): Bad index");
3509 
3510     if (!checkHierarchical())
3511         return false;
3512 
3513     sal_Unicode const * pPathBegin
3514         = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3515     sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3516     sal_Unicode const * pPrefixEnd;
3517     bool bInsertSlash;
3518     sal_Unicode const * pSuffixBegin;
3519     if (nIndex == LAST_SEGMENT)
3520     {
3521         pPrefixEnd = pPathEnd;
3522         if (bIgnoreFinalSlash && pPrefixEnd > pPathBegin &&
3523             pPrefixEnd[-1] == '/')
3524         {
3525             --pPrefixEnd;
3526         }
3527         bInsertSlash = bAppendFinalSlash;
3528         pSuffixBegin = pPathEnd;
3529     }
3530     else if (nIndex == 0)
3531     {
3532         pPrefixEnd = pPathBegin;
3533         bInsertSlash =
3534             (pPathBegin < pPathEnd && *pPathBegin != '/') ||
3535             (pPathBegin == pPathEnd && bAppendFinalSlash);
3536         pSuffixBegin =
3537             (pPathEnd - pPathBegin == 1 && *pPathBegin == '/' &&
3538              !bAppendFinalSlash && bIgnoreFinalSlash)
3539             ? pPathEnd : pPathBegin;
3540     }
3541     else
3542     {
3543         pPrefixEnd = pPathBegin;
3544         sal_Unicode const * pEnd = pPathEnd;
3545         if (bIgnoreFinalSlash && pEnd > pPathBegin && pEnd[-1] == '/')
3546             --pEnd;
3547         bool bSkip = pPrefixEnd < pEnd && *pPrefixEnd == '/';
3548         bInsertSlash = false;
3549         pSuffixBegin = pPathEnd;
3550         while (nIndex-- > 0)
3551             for (;;)
3552             {
3553                 if (bSkip)
3554                     ++pPrefixEnd;
3555                 bSkip = true;
3556                 if (pPrefixEnd >= pEnd)
3557                 {
3558                     if (nIndex == 0)
3559                     {
3560                         bInsertSlash = bAppendFinalSlash;
3561                         break;
3562                     }
3563                     else
3564                         return false;
3565                 }
3566                 if (*pPrefixEnd == '/')
3567                 {
3568                     pSuffixBegin = pPrefixEnd;
3569                     break;
3570                 }
3571             }
3572     }
3573 
3574     rtl::OUStringBuffer aNewPath;
3575     aNewPath.append(pPathBegin, pPrefixEnd - pPathBegin);
3576     aNewPath.append(sal_Unicode('/'));
3577     aNewPath.append(encodeText(rTheName, bOctets, PART_PCHAR, getEscapePrefix(),
3578                            eMechanism, eCharset, true));
3579     if (bInsertSlash) {
3580         aNewPath.append(sal_Unicode('/'));
3581     }
3582     aNewPath.append(pSuffixBegin, pPathEnd - pSuffixBegin);
3583 
3584     return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
3585         RTL_TEXTENCODING_UTF8);
3586 }
3587 
3588 //============================================================================
clearQuery()3589 bool INetURLObject::clearQuery()
3590 {
3591     if (HasError())
3592         return false;
3593     if (m_aQuery.isPresent())
3594     {
3595         lcl_Erase(m_aAbsURIRef, m_aQuery.getBegin() - 1,
3596             m_aQuery.getLength() + 1);
3597         m_aFragment += m_aQuery.clear() - 1;
3598     }
3599     return false;
3600 }
3601 
3602 //============================================================================
setQuery(rtl::OUString const & rTheQuery,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)3603 bool INetURLObject::setQuery(rtl::OUString const & rTheQuery, bool bOctets,
3604                              EncodeMechanism eMechanism,
3605                              rtl_TextEncoding eCharset)
3606 {
3607     if (!getSchemeInfo().m_bQuery)
3608         return false;
3609     rtl::OUString aNewQuery(encodeText(rTheQuery, bOctets, PART_URIC,
3610                                    getEscapePrefix(), eMechanism, eCharset,
3611                                    true));
3612     sal_Int32 nDelta;
3613     if (m_aQuery.isPresent())
3614         nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery);
3615     else
3616     {
3617         m_aAbsURIRef.insert(m_aPath.getEnd(), sal_Unicode('?'));
3618         nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery, m_aPath.getEnd() + 1)
3619                      + 1;
3620     }
3621     m_aFragment += nDelta;
3622     return true;
3623 }
3624 
3625 //============================================================================
clearFragment()3626 bool INetURLObject::clearFragment()
3627 {
3628     if (HasError())
3629         return false;
3630     if (m_aFragment.isPresent())
3631     {
3632         m_aAbsURIRef.setLength(m_aFragment.getBegin() - 1);
3633         m_aFragment.clear();
3634     }
3635     return true;
3636 }
3637 
3638 //============================================================================
setFragment(rtl::OUString const & rTheFragment,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)3639 bool INetURLObject::setFragment(rtl::OUString const & rTheFragment,
3640                                 bool bOctets, EncodeMechanism eMechanism,
3641                                 rtl_TextEncoding eCharset)
3642 {
3643     if (HasError())
3644         return false;
3645     rtl::OUString aNewFragment(encodeText(rTheFragment, bOctets, PART_URIC,
3646                                       getEscapePrefix(), eMechanism,
3647                                       eCharset, true));
3648     if (m_aFragment.isPresent())
3649         m_aFragment.set(m_aAbsURIRef, aNewFragment);
3650     else
3651     {
3652         m_aAbsURIRef.append(sal_Unicode('#'));
3653         m_aFragment.set(m_aAbsURIRef, aNewFragment, m_aAbsURIRef.getLength());
3654     }
3655     return true;
3656 }
3657 
3658 //============================================================================
getFTPType() const3659 INetURLObject::FTPType INetURLObject::getFTPType() const
3660 {
3661     if (m_eScheme == INET_PROT_FTP
3662         && m_aPath.getLength() >= RTL_CONSTASCII_LENGTH(";type=") + 1
3663         && rtl::OUString(m_aAbsURIRef).copy(
3664             m_aPath.getEnd() - (RTL_CONSTASCII_LENGTH(";type=") + 1),
3665             RTL_CONSTASCII_LENGTH(";type=")).equalsIgnoreAsciiCaseAscii(";type="))
3666         switch (m_aAbsURIRef.charAt(m_aPath.getEnd()))
3667         {
3668             case 'A':
3669             case 'a':
3670                 return FTP_TYPE_A;
3671 
3672             case 'D':
3673             case 'd':
3674                 return FTP_TYPE_D;
3675 
3676             case 'I':
3677             case 'i':
3678                 return FTP_TYPE_I;
3679         }
3680     return FTP_TYPE_NONE;
3681 }
3682 
3683 //============================================================================
hasDosVolume(FSysStyle eStyle) const3684 bool INetURLObject::hasDosVolume(FSysStyle eStyle) const
3685 {
3686     sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3687     return (eStyle & FSYS_DOS) != 0
3688            && m_aPath.getLength() >= 3
3689            && p[0] == '/'
3690            && INetMIME::isAlpha(p[1])
3691            && p[2] == ':'
3692            && (m_aPath.getLength() == 3 || p[3] == '/');
3693 }
3694 
3695 //============================================================================
getIMAPUID() const3696 sal_uInt32 INetURLObject::getIMAPUID() const
3697 {
3698     if (m_eScheme == INET_PROT_IMAP
3699         && m_aPath.getLength() >= RTL_CONSTASCII_LENGTH("/;uid=") + 1)
3700     {
3701         sal_Unicode const * pBegin = m_aAbsURIRef.getStr()
3702                                          + m_aPath.getBegin()
3703                                          + RTL_CONSTASCII_LENGTH("/;uid=");
3704         sal_Unicode const * pEnd = pBegin + m_aPath.getLength();
3705         sal_Unicode const * p = pEnd;
3706         while (p > pBegin && INetMIME::isDigit(p[-1]))
3707             --p;
3708         if (p < pEnd && *--p != '0'
3709             && rtl::OUString(m_aAbsURIRef).copy(
3710                 p - RTL_CONSTASCII_LENGTH("/;uid=") - m_aAbsURIRef.getStr(),
3711                 RTL_CONSTASCII_LENGTH("/;uid=")).equalsIgnoreAsciiCaseAscii("/;uid=")
3712            )
3713         {
3714             sal_uInt32 nUID;
3715             if (INetMIME::scanUnsigned(p, pEnd, false, nUID))
3716                 return nUID;
3717         }
3718     }
3719     return 0;
3720 }
3721 
3722 //============================================================================
3723 // static
encodeText(sal_Unicode const * pBegin,sal_Unicode const * pEnd,bool bOctets,Part ePart,sal_Char cEscapePrefix,EncodeMechanism eMechanism,rtl_TextEncoding eCharset,bool bKeepVisibleEscapes)3724 rtl::OUString INetURLObject::encodeText(sal_Unicode const * pBegin,
3725                                     sal_Unicode const * pEnd, bool bOctets,
3726                                     Part ePart, sal_Char cEscapePrefix,
3727                                     EncodeMechanism eMechanism,
3728                                     rtl_TextEncoding eCharset,
3729                                     bool bKeepVisibleEscapes)
3730 {
3731     rtl::OUStringBuffer aResult;
3732     while (pBegin < pEnd)
3733     {
3734         EscapeType eEscapeType;
3735         sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, bOctets, cEscapePrefix,
3736                                      eMechanism, eCharset, eEscapeType);
3737         appendUCS4(aResult, nUTF32, eEscapeType, bOctets, ePart,
3738                    cEscapePrefix, eCharset, bKeepVisibleEscapes);
3739     }
3740     return aResult.makeStringAndClear();
3741 }
3742 
3743 //============================================================================
3744 // static
decode(sal_Unicode const * pBegin,sal_Unicode const * pEnd,sal_Char cEscapePrefix,DecodeMechanism eMechanism,rtl_TextEncoding eCharset)3745 rtl::OUString INetURLObject::decode(sal_Unicode const * pBegin,
3746                                 sal_Unicode const * pEnd,
3747                                 sal_Char cEscapePrefix,
3748                                 DecodeMechanism eMechanism,
3749                                 rtl_TextEncoding eCharset)
3750 {
3751     switch (eMechanism)
3752     {
3753         case NO_DECODE:
3754             return rtl::OUString(pBegin, pEnd - pBegin);
3755 
3756         case DECODE_TO_IURI:
3757             eCharset = RTL_TEXTENCODING_UTF8;
3758             break;
3759 
3760         default:
3761             break;
3762     }
3763     rtl::OUStringBuffer aResult;
3764     while (pBegin < pEnd)
3765     {
3766         EscapeType eEscapeType;
3767         sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, false, cEscapePrefix,
3768                                      WAS_ENCODED, eCharset, eEscapeType);
3769         switch (eEscapeType)
3770         {
3771             case ESCAPE_NO:
3772                 aResult.append(sal_Unicode(nUTF32));
3773                 break;
3774 
3775             case ESCAPE_OCTET:
3776                 appendEscape(aResult, cEscapePrefix, nUTF32);
3777                 break;
3778 
3779             case ESCAPE_UTF32:
3780                 if (
3781                      INetMIME::isUSASCII(nUTF32) &&
3782                      (
3783                        eMechanism == DECODE_TO_IURI ||
3784                        (
3785                          eMechanism == DECODE_UNAMBIGUOUS &&
3786                          mustEncode(nUTF32, PART_UNAMBIGUOUS)
3787                        )
3788                      )
3789                    )
3790                 {
3791                     appendEscape(aResult, cEscapePrefix, nUTF32);
3792                 }
3793                 else
3794                     aResult.append(sal_Unicode(nUTF32));
3795                 break;
3796         }
3797     }
3798     return aResult.makeStringAndClear();
3799 }
3800 
3801 //============================================================================
GetURLNoPass(DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const3802 rtl::OUString INetURLObject::GetURLNoPass(DecodeMechanism eMechanism,
3803                                       rtl_TextEncoding eCharset) const
3804 {
3805     INetURLObject aTemp(*this);
3806     aTemp.clearPassword();
3807     return aTemp.GetMainURL(eMechanism, eCharset);
3808 }
3809 
3810 //============================================================================
GetURLNoMark(DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const3811 rtl::OUString INetURLObject::GetURLNoMark(DecodeMechanism eMechanism,
3812                                       rtl_TextEncoding eCharset) const
3813 {
3814     INetURLObject aTemp(*this);
3815     aTemp.clearFragment();
3816     return aTemp.GetMainURL(eMechanism, eCharset);
3817 }
3818 
3819 //============================================================================
3820 rtl::OUString
getAbbreviated(star::uno::Reference<star::util::XStringWidth> const & rStringWidth,sal_Int32 nWidth,DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const3821 INetURLObject::getAbbreviated(
3822     star::uno::Reference< star::util::XStringWidth > const & rStringWidth,
3823     sal_Int32 nWidth,
3824     DecodeMechanism eMechanism,
3825     rtl_TextEncoding eCharset)
3826     const
3827 {
3828     OSL_ENSURE(rStringWidth.is(), "specification violation");
3829     sal_Char cEscapePrefix = getEscapePrefix();
3830     rtl::OUStringBuffer aBuffer;
3831     // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
3832     // is empty ("") in that case, so take the scheme from m_aAbsURIRef
3833     if (m_eScheme != INET_PROT_GENERIC)
3834     {
3835         aBuffer.appendAscii(getSchemeInfo().m_pScheme);
3836     }
3837     else
3838     {
3839         if (m_aAbsURIRef)
3840         {
3841             sal_Unicode const * pSchemeBegin
3842                 = m_aAbsURIRef.getStr();
3843             sal_Unicode const * pSchemeEnd = pSchemeBegin;
3844 
3845             while (pSchemeEnd[0] != ':')
3846             {
3847                 ++pSchemeEnd;
3848             }
3849             aBuffer.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
3850         }
3851     }
3852     aBuffer.append(static_cast< sal_Unicode >(':'));
3853     bool bAuthority = getSchemeInfo().m_bAuthority;
3854     sal_Unicode const * pCoreBegin
3855         = m_aAbsURIRef.getStr() + (bAuthority ? getAuthorityBegin() :
3856                                                    m_aPath.getBegin());
3857     sal_Unicode const * pCoreEnd
3858         = m_aAbsURIRef.getStr() + m_aPath.getBegin() + m_aPath.getLength();
3859     bool bSegment = false;
3860     if (getSchemeInfo().m_bHierarchical)
3861     {
3862         rtl::OUString aRest;
3863         if (m_aQuery.isPresent())
3864             aRest = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("?..."));
3865         else if (m_aFragment.isPresent())
3866             aRest = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("#..."));
3867         rtl::OUStringBuffer aTrailer;
3868         sal_Unicode const * pBegin = pCoreBegin;
3869         sal_Unicode const * pEnd = pCoreEnd;
3870         sal_Unicode const * pPrefixBegin = pBegin;
3871         sal_Unicode const * pSuffixEnd = pEnd;
3872         bool bPrefix = true;
3873         bool bSuffix = true;
3874         do
3875         {
3876             if (bSuffix)
3877             {
3878                 sal_Unicode const * p = pSuffixEnd - 1;
3879                 if (pSuffixEnd == pCoreEnd && *p == '/')
3880                     --p;
3881                 while (*p != '/')
3882                     --p;
3883                 if (bAuthority && p == pCoreBegin + 1)
3884                     --p;
3885                 rtl::OUString
3886                     aSegment(decode(p + (p == pBegin && pBegin != pCoreBegin ?
3887                                              1 : 0),
3888                                     pSuffixEnd,
3889                                     cEscapePrefix,
3890                                     eMechanism,
3891                                     eCharset));
3892                 pSuffixEnd = p;
3893                 rtl::OUStringBuffer aResult(aBuffer);
3894                 if (pSuffixEnd != pBegin)
3895                     aResult.appendAscii(RTL_CONSTASCII_STRINGPARAM("..."));
3896                 aResult.append(aSegment);
3897                 aResult.append(aTrailer);
3898                 aResult.append(aRest);
3899                 if (rStringWidth->
3900                             queryStringWidth(aResult.makeStringAndClear())
3901                         <= nWidth)
3902                 {
3903                     aTrailer.insert(0, aSegment);
3904                     bSegment = true;
3905                     pEnd = pSuffixEnd;
3906                 }
3907                 else
3908                     bSuffix = false;
3909                 if (pPrefixBegin > pSuffixEnd)
3910                     pPrefixBegin = pSuffixEnd;
3911                 if (pBegin == pEnd)
3912                     break;
3913             }
3914             if (bPrefix)
3915             {
3916                 sal_Unicode const * p
3917                     = pPrefixBegin
3918                           + (bAuthority && pPrefixBegin == pCoreBegin ? 2 :
3919                                                                         1);
3920                 OSL_ASSERT(p <= pEnd);
3921                 while (p < pEnd && *p != '/')
3922                     ++p;
3923                 if (p == pCoreEnd - 1 && *p == '/')
3924                     ++p;
3925                 rtl::OUString
3926                     aSegment(decode(pPrefixBegin
3927                                         + (pPrefixBegin == pCoreBegin ? 0 :
3928                                                                         1),
3929                                     p == pEnd ? p : p + 1,
3930                                     cEscapePrefix,
3931                                     eMechanism,
3932                                     eCharset));
3933                 pPrefixBegin = p;
3934                 rtl::OUStringBuffer aResult(aBuffer);
3935                 aResult.append(aSegment);
3936                 if (pPrefixBegin != pEnd)
3937                     aResult.appendAscii(RTL_CONSTASCII_STRINGPARAM("..."));
3938                 aResult.append(aTrailer);
3939                 aResult.append(aRest);
3940                 if (rStringWidth->
3941                             queryStringWidth(aResult.makeStringAndClear())
3942                         <= nWidth)
3943                 {
3944                     aBuffer.append(aSegment);
3945                     bSegment = true;
3946                     pBegin = pPrefixBegin;
3947                 }
3948                 else
3949                     bPrefix = false;
3950                 if (pPrefixBegin > pSuffixEnd)
3951                     pSuffixEnd = pPrefixBegin;
3952                 if (pBegin == pEnd)
3953                     break;
3954             }
3955         }
3956         while (bPrefix || bSuffix);
3957         if (bSegment)
3958         {
3959             if (pPrefixBegin != pBegin || pSuffixEnd != pEnd)
3960                 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("..."));
3961             aBuffer.append(aTrailer);
3962         }
3963     }
3964     if (!bSegment)
3965         aBuffer.append(decode(pCoreBegin,
3966                               pCoreEnd,
3967                               cEscapePrefix,
3968                               eMechanism,
3969                               eCharset));
3970     if (m_aQuery.isPresent())
3971     {
3972         aBuffer.append(static_cast< sal_Unicode >('?'));
3973         aBuffer.append(decode(m_aQuery, cEscapePrefix, eMechanism, eCharset));
3974     }
3975     if (m_aFragment.isPresent())
3976     {
3977         aBuffer.append(static_cast< sal_Unicode >('#'));
3978         aBuffer.
3979             append(decode(m_aFragment, cEscapePrefix, eMechanism, eCharset));
3980     }
3981     if (aBuffer.getLength() != 0)
3982     {
3983         rtl::OUStringBuffer aResult(aBuffer);
3984         if (rStringWidth->queryStringWidth(aResult.makeStringAndClear())
3985                 > nWidth)
3986             for (sal_Int32 i = aBuffer.getLength();;)
3987             {
3988                 if (i == 0)
3989                 {
3990                     aBuffer.setLength(aBuffer.getLength() - 1);
3991                     if (aBuffer.getLength() == 0)
3992                         break;
3993                 }
3994                 else
3995                 {
3996                     aBuffer.setLength(--i);
3997                     aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("..."));
3998                 }
3999                 aResult = aBuffer;
4000                 if (rStringWidth->
4001                             queryStringWidth(aResult.makeStringAndClear())
4002                         <= nWidth)
4003                     break;
4004             }
4005     }
4006     return aBuffer.makeStringAndClear();
4007 }
4008 
4009 //============================================================================
operator ==(INetURLObject const & rObject) const4010 bool INetURLObject::operator ==(INetURLObject const & rObject) const
4011 {
4012     if (m_eScheme != rObject.m_eScheme)
4013         return false;
4014     if (m_eScheme == INET_PROT_NOT_VALID)
4015         return (m_aAbsURIRef == rObject.m_aAbsURIRef) != false;
4016     if ((m_aScheme.compare(
4017              rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef)
4018          != 0)
4019         || GetUser(NO_DECODE) != rObject.GetUser(NO_DECODE)
4020         || GetPass(NO_DECODE) != rObject.GetPass(NO_DECODE)
4021         || !GetHost(NO_DECODE).equalsIgnoreAsciiCase(
4022             rObject.GetHost(NO_DECODE))
4023         || GetPort() != rObject.GetPort()
4024         || HasParam() != rObject.HasParam()
4025         || GetParam(NO_DECODE) != rObject.GetParam(NO_DECODE)
4026         || GetMsgId(NO_DECODE) != rObject.GetMsgId(NO_DECODE))
4027         return false;
4028     rtl::OUString aPath1(GetURLPath(NO_DECODE));
4029     rtl::OUString aPath2(rObject.GetURLPath(NO_DECODE));
4030     switch (m_eScheme)
4031     {
4032         case INET_PROT_FILE:
4033         {
4034             // If the URL paths of two file URLs only differ in that one has a
4035             // final '/' and the other has not, take the two paths as
4036             // equivalent (this could be usefull for other schemes, too):
4037             sal_Int32 nLength = aPath1.getLength();
4038             switch (nLength - aPath2.getLength())
4039             {
4040                 case -1:
4041                     if (aPath2.getStr()[nLength] != '/')
4042                         return false;
4043                     break;
4044 
4045                 case 0:
4046                     break;
4047 
4048                 case 1:
4049                     if (aPath1.getStr()[--nLength] != '/')
4050                         return false;
4051                     break;
4052 
4053                 default:
4054                     return false;
4055             }
4056             return aPath1.compareTo(aPath2, nLength) == 0;
4057         }
4058 
4059         default:
4060             return (aPath1 == aPath2) != false;
4061     }
4062 }
4063 
4064 //============================================================================
operator <(INetURLObject const & rObject) const4065 bool INetURLObject::operator <(INetURLObject const & rObject) const
4066 {
4067     sal_Int32 nCompare = m_aScheme.compare(
4068         rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef);
4069     if (nCompare < 0) {
4070         return true;
4071     } else if (nCompare > 0) {
4072         return false;
4073     }
4074     sal_uInt32 nPort1 = GetPort();
4075     sal_uInt32 nPort2 = rObject.GetPort();
4076     if (nPort1 < nPort2)
4077         return true;
4078     else if (nPort1 > nPort2)
4079         return false;
4080     nCompare = GetUser(NO_DECODE).compareTo(rObject.GetUser(NO_DECODE));
4081     if (nCompare < 0)
4082         return true;
4083     else if (nCompare > 0)
4084         return false;
4085     nCompare = GetPass(NO_DECODE).compareTo(rObject.GetPass(NO_DECODE));
4086     if (nCompare < 0)
4087         return true;
4088     else if (nCompare > 0)
4089         return false;
4090     nCompare = GetHost(NO_DECODE).compareTo(rObject.GetHost(NO_DECODE));
4091     if (nCompare < 0)
4092         return true;
4093     else if (nCompare > 0)
4094         return false;
4095     const rtl::OUString &rPath1(GetURLPath(NO_DECODE));
4096     const rtl::OUString &rPath2(rObject.GetURLPath(NO_DECODE));
4097     nCompare = rPath1.compareTo(rPath2);
4098     if (nCompare < 0)
4099         return true;
4100     else if (nCompare > 0)
4101         return false;
4102     nCompare = GetParam(NO_DECODE).compareTo(rObject.GetParam(NO_DECODE));
4103     if (nCompare < 0)
4104         return true;
4105     else if (nCompare > 0)
4106         return false;
4107     return GetMsgId(NO_DECODE).compareTo(rObject.GetMsgId(NO_DECODE)) < 0;
4108 }
4109 
4110 //============================================================================
ConcatData(INetProtocol eTheScheme,rtl::OUString const & rTheUser,rtl::OUString const & rThePassword,rtl::OUString const & rTheHost,sal_uInt32 nThePort,rtl::OUString const & rThePath,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)4111 bool INetURLObject::ConcatData(INetProtocol eTheScheme,
4112                                rtl::OUString const & rTheUser,
4113                                rtl::OUString const & rThePassword,
4114                                rtl::OUString const & rTheHost,
4115                                sal_uInt32 nThePort,
4116                                rtl::OUString const & rThePath,
4117                                EncodeMechanism eMechanism,
4118                                rtl_TextEncoding eCharset)
4119 {
4120     setInvalid();
4121     m_eScheme = eTheScheme;
4122     if (HasError() || m_eScheme == INET_PROT_GENERIC)
4123         return false;
4124     m_aAbsURIRef.setLength(0);
4125     m_aAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
4126     m_aAbsURIRef.append(sal_Unicode(':'));
4127     if (getSchemeInfo().m_bAuthority)
4128     {
4129         m_aAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
4130         bool bUserInfo = false;
4131         if (getSchemeInfo().m_bUser)
4132         {
4133             if (m_eScheme == INET_PROT_IMAP && rTheUser.getLength() == 0)
4134             {
4135                 setInvalid();
4136                 return false;
4137             }
4138             if (rTheUser.getLength() != 0)
4139             {
4140                 m_aUser.set(m_aAbsURIRef,
4141                             encodeText(rTheUser, false,
4142                                        m_eScheme == INET_PROT_IMAP ?
4143                                            PART_IMAP_ACHAR :
4144                                        m_eScheme == INET_PROT_VIM ?
4145                                            PART_VIM :
4146                                            PART_USER_PASSWORD,
4147                                        getEscapePrefix(), eMechanism,
4148                                        eCharset, false),
4149                             m_aAbsURIRef.getLength());
4150                 bUserInfo = true;
4151             }
4152         }
4153         else if (rTheUser.getLength() != 0)
4154         {
4155             setInvalid();
4156             return false;
4157         }
4158         if (rThePassword.getLength() != 0)
4159         {
4160             if (getSchemeInfo().m_bPassword)
4161             {
4162                 m_aAbsURIRef.append(sal_Unicode(':'));
4163                 m_aAuth.set(m_aAbsURIRef,
4164                             encodeText(rThePassword, false,
4165                                        m_eScheme == INET_PROT_VIM ?
4166                                            PART_VIM : PART_USER_PASSWORD,
4167                                        getEscapePrefix(), eMechanism,
4168                                        eCharset, false),
4169                             m_aAbsURIRef.getLength());
4170                 bUserInfo = true;
4171             }
4172             else
4173             {
4174                 setInvalid();
4175                 return false;
4176             }
4177         }
4178         if (bUserInfo && getSchemeInfo().m_bHost)
4179             m_aAbsURIRef.append(sal_Unicode('@'));
4180         if (getSchemeInfo().m_bHost)
4181         {
4182             rtl::OUStringBuffer aSynHost(rTheHost);
4183             bool bNetBiosName = false;
4184             switch (m_eScheme)
4185             {
4186                 case INET_PROT_FILE:
4187                     {
4188                         rtl::OUString sTemp(aSynHost);
4189                         if (sTemp.equalsIgnoreAsciiCaseAsciiL(
4190                             RTL_CONSTASCII_STRINGPARAM("localhost")))
4191                         {
4192                             aSynHost.setLength(0);
4193                         }
4194                         bNetBiosName = true;
4195                     }
4196                     break;
4197 
4198                 case INET_PROT_LDAP:
4199                     if (aSynHost.getLength() == 0 && nThePort != 0)
4200                     {
4201                         setInvalid();
4202                         return false;
4203                     }
4204                     break;
4205 
4206                 default:
4207                     if (aSynHost.getLength() == 0)
4208                     {
4209                         setInvalid();
4210                         return false;
4211                     }
4212                     break;
4213             }
4214             if (!parseHostOrNetBiosName(
4215                     aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
4216                     false, eMechanism, eCharset, bNetBiosName, &aSynHost))
4217             {
4218                 setInvalid();
4219                 return false;
4220             }
4221             m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear(),
4222                 m_aAbsURIRef.getLength());
4223             if (nThePort != 0)
4224             {
4225                 if (getSchemeInfo().m_bPort)
4226                 {
4227                     m_aAbsURIRef.append(sal_Unicode(':'));
4228                     m_aPort.set(m_aAbsURIRef,
4229                                 rtl::OUString::valueOf(sal_Int64(nThePort)),
4230                                 m_aAbsURIRef.getLength());
4231                 }
4232                 else
4233                 {
4234                     setInvalid();
4235                     return false;
4236                 }
4237             }
4238         }
4239         else if (rTheHost.getLength() != 0 || nThePort != 0)
4240         {
4241             setInvalid();
4242             return false;
4243         }
4244     }
4245     rtl::OUStringBuffer aSynPath;
4246     sal_Unicode const * p = rThePath.getStr();
4247     sal_Unicode const * pEnd = p + rThePath.getLength();
4248     if (!parsePath(m_eScheme, &p, pEnd, false, eMechanism, eCharset, false, '/',
4249                    0x80000000, 0x80000000, 0x80000000, aSynPath)
4250         || p != pEnd)
4251     {
4252         setInvalid();
4253         return false;
4254     }
4255     m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear(),
4256         m_aAbsURIRef.getLength());
4257     return true;
4258 }
4259 
4260 //============================================================================
4261 // static
GetAbsURL(rtl::OUString const & rTheBaseURIRef,rtl::OUString const & rTheRelURIRef,bool bIgnoreFragment,EncodeMechanism eEncodeMechanism,DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset,FSysStyle eStyle)4262 rtl::OUString INetURLObject::GetAbsURL(rtl::OUString const & rTheBaseURIRef,
4263                                        rtl::OUString const & rTheRelURIRef,
4264                                        bool bIgnoreFragment,
4265                                        EncodeMechanism eEncodeMechanism,
4266                                        DecodeMechanism eDecodeMechanism,
4267                                        rtl_TextEncoding eCharset,
4268                                        FSysStyle eStyle)
4269 {
4270     // Backwards compatibility:
4271     if (rTheRelURIRef.getLength() == 0 || rTheRelURIRef[0] == '#')
4272         return rTheRelURIRef;
4273 
4274     INetURLObject aTheAbsURIRef;
4275     bool bWasAbsolute;
4276     return INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset).
4277             convertRelToAbs(rTheRelURIRef, false, aTheAbsURIRef,
4278                             bWasAbsolute, eEncodeMechanism,
4279                             eCharset, bIgnoreFragment, false,
4280                             false, eStyle)
4281            || eEncodeMechanism != WAS_ENCODED
4282            || eDecodeMechanism != DECODE_TO_IURI
4283            || eCharset != RTL_TEXTENCODING_UTF8 ?
4284                aTheAbsURIRef.GetMainURL(eDecodeMechanism, eCharset) :
4285                rTheRelURIRef;
4286 }
4287 
4288 //============================================================================
getExternalURL(DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const4289 rtl::OUString INetURLObject::getExternalURL(DecodeMechanism eMechanism,
4290                                         rtl_TextEncoding eCharset) const
4291 {
4292     rtl::OUString aTheExtURIRef;
4293     translateToExternal(
4294         rtl::OUString(m_aAbsURIRef), aTheExtURIRef, eMechanism, eCharset);
4295     return aTheExtURIRef;
4296 }
4297 
4298 //============================================================================
4299 // static
GetScheme(INetProtocol eTheScheme)4300 rtl::OUString INetURLObject::GetScheme(INetProtocol eTheScheme)
4301 {
4302     return rtl::OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pPrefix);
4303 }
4304 
4305 //============================================================================
4306 // static
CompareProtocolScheme(rtl::OUString const & rTheAbsURIRef)4307 INetProtocol INetURLObject::CompareProtocolScheme(rtl::OUString const &
4308                                                       rTheAbsURIRef)
4309 {
4310     sal_Unicode const * p = rTheAbsURIRef.getStr();
4311     PrefixInfo const * pPrefix = getPrefix(p, p + rTheAbsURIRef.getLength());
4312     return pPrefix ? pPrefix->m_eScheme : INET_PROT_NOT_VALID;
4313 }
4314 
4315 //============================================================================
hasPassword() const4316 bool INetURLObject::hasPassword() const
4317 {
4318     return m_aAuth.isPresent() && getSchemeInfo().m_bPassword;
4319 }
4320 
4321 //============================================================================
makeAuthCanonic()4322 void INetURLObject::makeAuthCanonic()
4323 {
4324     if (m_eScheme == INET_PROT_IMAP && m_aAuth.getLength() == 1
4325         && m_aAbsURIRef.charAt(m_aAuth.getBegin()) == '*')
4326     {
4327         lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin()
4328                                - RTL_CONSTASCII_LENGTH(";AUTH="),
4329                            RTL_CONSTASCII_LENGTH(";AUTH=*"));
4330         sal_Int32 nDelta = m_aAuth.clear() - RTL_CONSTASCII_LENGTH(";AUTH=");
4331         m_aPath += nDelta;
4332         m_aQuery += nDelta;
4333         m_aFragment += nDelta;
4334     }
4335 }
4336 
4337 //============================================================================
GetHostPort(DecodeMechanism eMechanism,rtl_TextEncoding eCharset)4338 rtl::OUString INetURLObject::GetHostPort(DecodeMechanism eMechanism,
4339                                      rtl_TextEncoding eCharset)
4340 {
4341     // Check because PROT_VND_SUN_STAR_HELP, PROT_VND_SUN_STAR_HIER, and
4342     // PROT_VND_SUN_STAR_PKG misuse m_aHost:
4343     if (!getSchemeInfo().m_bHost)
4344         return rtl::OUString();
4345     rtl::OUStringBuffer aHostPort(decode(m_aHost, getEscapePrefix(),
4346         eMechanism, eCharset));
4347     if (m_aPort.isPresent())
4348     {
4349         aHostPort.append(sal_Unicode(':'));
4350         aHostPort.append(decode(m_aPort, getEscapePrefix(),
4351             eMechanism, eCharset));
4352     }
4353     return aHostPort.makeStringAndClear();
4354 }
4355 
4356 //============================================================================
GetPort() const4357 sal_uInt32 INetURLObject::GetPort() const
4358 {
4359     if (m_aPort.isPresent())
4360     {
4361         sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin();
4362         sal_Unicode const * pEnd = p + m_aPort.getLength();
4363         sal_uInt32 nThePort;
4364         if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd)
4365             return nThePort;
4366     }
4367     return 0;
4368 }
4369 
4370 //============================================================================
SetPort(sal_uInt32 nThePort)4371 bool INetURLObject::SetPort(sal_uInt32 nThePort)
4372 {
4373     if (getSchemeInfo().m_bPort && m_aHost.isPresent())
4374     {
4375         rtl::OUString aNewPort(rtl::OUString::valueOf(sal_Int64(nThePort)));
4376         sal_Int32 nDelta;
4377         if (m_aPort.isPresent())
4378             nDelta = m_aPort.set(m_aAbsURIRef, aNewPort);
4379         else
4380         {
4381             m_aAbsURIRef.insert(m_aHost.getEnd(), sal_Unicode(':'));
4382             nDelta = m_aPort.set(m_aAbsURIRef, aNewPort, m_aHost.getEnd() + 1)
4383                          + 1;
4384         }
4385         m_aPath += nDelta;
4386         m_aQuery += nDelta;
4387         m_aFragment += nDelta;
4388         return true;
4389     }
4390     return false;
4391 }
4392 
4393 //============================================================================
makePortCanonic()4394 void INetURLObject::makePortCanonic()
4395 {
4396     if (m_aPort.isPresent())
4397     {
4398         sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin();
4399         sal_Unicode const * pEnd = p + m_aPort.getLength();
4400         sal_uInt32 nThePort;
4401         if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd)
4402         {
4403             sal_Int32 nDelta;
4404             if (nThePort != 0 && nThePort == getSchemeInfo().m_nDefaultPort)
4405             {
4406                 lcl_Erase(m_aAbsURIRef, m_aPort.getBegin() - 1,
4407                                    m_aPort.getLength() + 1);
4408                 nDelta = m_aPort.clear() - 1;
4409             }
4410             else
4411                 nDelta = m_aPort.set(m_aAbsURIRef,
4412                                  rtl::OUString::valueOf(sal_Int64(nThePort)));
4413             m_aPath += nDelta;
4414             m_aQuery += nDelta;
4415             m_aFragment += nDelta;
4416         }
4417     }
4418 }
4419 
4420 //============================================================================
getSegmentCount(bool bIgnoreFinalSlash) const4421 sal_Int32 INetURLObject::getSegmentCount(bool bIgnoreFinalSlash) const
4422 {
4423     if (!checkHierarchical())
4424         return 0;
4425 
4426     sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4427     sal_Unicode const * pEnd = p + m_aPath.getLength();
4428     if (bIgnoreFinalSlash && pEnd > p && pEnd[-1] == '/')
4429         --pEnd;
4430     sal_Int32 n = p == pEnd || *p == '/' ? 0 : 1;
4431     while (p != pEnd)
4432         if (*p++ == '/')
4433             ++n;
4434     return n;
4435 }
4436 
4437 //============================================================================
removeSegment(sal_Int32 nIndex,bool bIgnoreFinalSlash)4438 bool INetURLObject::removeSegment(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4439 {
4440     SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4441     if (!aSegment.isPresent())
4442         return false;
4443 
4444     rtl::OUStringBuffer aNewPath;
4445     aNewPath.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
4446                        aSegment.getBegin() - m_aPath.getBegin());
4447     if (bIgnoreFinalSlash && aSegment.getEnd() == m_aPath.getEnd())
4448         aNewPath.append(sal_Unicode('/'));
4449     else
4450         aNewPath.append(m_aAbsURIRef.getStr() + aSegment.getEnd(),
4451                         m_aPath.getEnd() - aSegment.getEnd());
4452     if (aNewPath.getLength() == 0 && !aSegment.isEmpty() &&
4453         m_aAbsURIRef[aSegment.getBegin()] == '/')
4454     {
4455         aNewPath.append(sal_Unicode('/'));
4456     }
4457 
4458     return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4459         RTL_TEXTENCODING_UTF8);
4460 }
4461 
4462 //============================================================================
getName(sal_Int32 nIndex,bool bIgnoreFinalSlash,DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const4463 rtl::OUString INetURLObject::getName(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4464                                  DecodeMechanism eMechanism,
4465                                  rtl_TextEncoding eCharset) const
4466 {
4467     SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4468     if (!aSegment.isPresent())
4469         return rtl::OUString();
4470 
4471     sal_Unicode const * pSegBegin
4472         = m_aAbsURIRef.getStr() + aSegment.getBegin();
4473     sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4474 
4475     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4476         ++pSegBegin;
4477     sal_Unicode const * p = pSegBegin;
4478     while (p != pSegEnd && *p != ';')
4479         ++p;
4480 
4481     return decode(pSegBegin, p, getEscapePrefix(), eMechanism, eCharset);
4482 }
4483 
4484 //============================================================================
setName(rtl::OUString const & rTheName,sal_Int32 nIndex,bool bIgnoreFinalSlash,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)4485 bool INetURLObject::setName(rtl::OUString const & rTheName, sal_Int32 nIndex,
4486                             bool bIgnoreFinalSlash,
4487                             EncodeMechanism eMechanism,
4488                             rtl_TextEncoding eCharset)
4489 {
4490     SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4491     if (!aSegment.isPresent())
4492         return false;
4493 
4494     sal_Unicode const * pPathBegin
4495         = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4496     sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4497     sal_Unicode const * pSegBegin
4498         = m_aAbsURIRef.getStr() + aSegment.getBegin();
4499     sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4500 
4501     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4502         ++pSegBegin;
4503     sal_Unicode const * p = pSegBegin;
4504     while (p != pSegEnd && *p != ';')
4505         ++p;
4506 
4507     rtl::OUStringBuffer aNewPath;
4508     aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4509     aNewPath.append(encodeText(rTheName, false, PART_PCHAR, getEscapePrefix(),
4510         eMechanism, eCharset, true));
4511     aNewPath.append(p, pPathEnd - p);
4512 
4513     return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4514         RTL_TEXTENCODING_UTF8);
4515 }
4516 
4517 //============================================================================
hasExtension(sal_Int32 nIndex,bool bIgnoreFinalSlash) const4518 bool INetURLObject::hasExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4519     const
4520 {
4521     SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4522     if (!aSegment.isPresent())
4523         return false;
4524 
4525     sal_Unicode const * pSegBegin
4526         = m_aAbsURIRef.getStr() + aSegment.getBegin();
4527     sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4528 
4529     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4530         ++pSegBegin;
4531     for (sal_Unicode const * p = pSegBegin; p != pSegEnd && *p != ';'; ++p)
4532         if (*p == '.' && p != pSegBegin)
4533             return true;
4534     return false;
4535 }
4536 
4537 //============================================================================
getBase(sal_Int32 nIndex,bool bIgnoreFinalSlash,DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const4538 rtl::OUString INetURLObject::getBase(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4539                                  DecodeMechanism eMechanism,
4540                                  rtl_TextEncoding eCharset) const
4541 {
4542     SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4543     if (!aSegment.isPresent())
4544         return rtl::OUString();
4545 
4546     sal_Unicode const * pSegBegin
4547         = m_aAbsURIRef.getStr() + aSegment.getBegin();
4548     sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4549 
4550     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4551         ++pSegBegin;
4552     sal_Unicode const * pExtension = 0;
4553     sal_Unicode const * p = pSegBegin;
4554     for (; p != pSegEnd && *p != ';'; ++p)
4555         if (*p == '.' && p != pSegBegin)
4556             pExtension = p;
4557     if (!pExtension)
4558         pExtension = p;
4559 
4560     return decode(pSegBegin, pExtension, getEscapePrefix(), eMechanism,
4561                   eCharset);
4562 }
4563 
4564 //============================================================================
setBase(rtl::OUString const & rTheBase,sal_Int32 nIndex,bool bIgnoreFinalSlash,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)4565 bool INetURLObject::setBase(rtl::OUString const & rTheBase, sal_Int32 nIndex,
4566                             bool bIgnoreFinalSlash,
4567                             EncodeMechanism eMechanism,
4568                             rtl_TextEncoding eCharset)
4569 {
4570     SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4571     if (!aSegment.isPresent())
4572         return false;
4573 
4574     sal_Unicode const * pPathBegin
4575         = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4576     sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4577     sal_Unicode const * pSegBegin
4578         = m_aAbsURIRef.getStr() + aSegment.getBegin();
4579     sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4580 
4581     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4582         ++pSegBegin;
4583     sal_Unicode const * pExtension = 0;
4584     sal_Unicode const * p = pSegBegin;
4585     for (; p != pSegEnd && *p != ';'; ++p)
4586         if (*p == '.' && p != pSegBegin)
4587             pExtension = p;
4588     if (!pExtension)
4589         pExtension = p;
4590 
4591     rtl::OUStringBuffer aNewPath;
4592     aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4593     aNewPath.append(encodeText(rTheBase, false, PART_PCHAR, getEscapePrefix(),
4594         eMechanism, eCharset, true));
4595     aNewPath.append(pExtension, pPathEnd - pExtension);
4596 
4597     return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4598         RTL_TEXTENCODING_UTF8);
4599 }
4600 
4601 //============================================================================
getExtension(sal_Int32 nIndex,bool bIgnoreFinalSlash,DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const4602 rtl::OUString INetURLObject::getExtension(sal_Int32 nIndex,
4603                                       bool bIgnoreFinalSlash,
4604                                       DecodeMechanism eMechanism,
4605                                       rtl_TextEncoding eCharset) const
4606 {
4607     SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4608     if (!aSegment.isPresent())
4609         return rtl::OUString();
4610 
4611     sal_Unicode const * pSegBegin
4612         = m_aAbsURIRef.getStr() + aSegment.getBegin();
4613     sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4614 
4615     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4616         ++pSegBegin;
4617     sal_Unicode const * pExtension = 0;
4618     sal_Unicode const * p = pSegBegin;
4619     for (; p != pSegEnd && *p != ';'; ++p)
4620         if (*p == '.' && p != pSegBegin)
4621             pExtension = p;
4622 
4623     if (!pExtension)
4624         return rtl::OUString();
4625 
4626     return decode(pExtension + 1, p, getEscapePrefix(), eMechanism, eCharset);
4627 }
4628 
4629 //============================================================================
setExtension(rtl::OUString const & rTheExtension,sal_Int32 nIndex,bool bIgnoreFinalSlash,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)4630 bool INetURLObject::setExtension(rtl::OUString const & rTheExtension,
4631                                  sal_Int32 nIndex, bool bIgnoreFinalSlash,
4632                                  EncodeMechanism eMechanism,
4633                                  rtl_TextEncoding eCharset)
4634 {
4635     SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4636     if (!aSegment.isPresent())
4637         return false;
4638 
4639     sal_Unicode const * pPathBegin
4640         = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4641     sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4642     sal_Unicode const * pSegBegin
4643         = m_aAbsURIRef.getStr() + aSegment.getBegin();
4644     sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4645 
4646     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4647         ++pSegBegin;
4648     sal_Unicode const * pExtension = 0;
4649     sal_Unicode const * p = pSegBegin;
4650     for (; p != pSegEnd && *p != ';'; ++p)
4651         if (*p == '.' && p != pSegBegin)
4652             pExtension = p;
4653     if (!pExtension)
4654         pExtension = p;
4655 
4656     rtl::OUStringBuffer aNewPath;
4657     aNewPath.append(pPathBegin, pExtension - pPathBegin);
4658     aNewPath.append(sal_Unicode('.'));
4659     aNewPath.append(encodeText(rTheExtension, false, PART_PCHAR,
4660         getEscapePrefix(), eMechanism, eCharset, true));
4661     aNewPath.append(p, pPathEnd - p);
4662 
4663     return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4664         RTL_TEXTENCODING_UTF8);
4665 }
4666 
4667 //============================================================================
removeExtension(sal_Int32 nIndex,bool bIgnoreFinalSlash)4668 bool INetURLObject::removeExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4669 {
4670     SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4671     if (!aSegment.isPresent())
4672         return false;
4673 
4674     sal_Unicode const * pPathBegin
4675         = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4676     sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4677     sal_Unicode const * pSegBegin
4678         = m_aAbsURIRef.getStr() + aSegment.getBegin();
4679     sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4680 
4681     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4682         ++pSegBegin;
4683     sal_Unicode const * pExtension = 0;
4684     sal_Unicode const * p = pSegBegin;
4685     for (; p != pSegEnd && *p != ';'; ++p)
4686         if (*p == '.' && p != pSegBegin)
4687             pExtension = p;
4688     if (!pExtension)
4689         return true;
4690 
4691     rtl::OUStringBuffer aNewPath;
4692     aNewPath.append(pPathBegin, pExtension - pPathBegin);
4693     aNewPath.append(p, pPathEnd - p);
4694 
4695     return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4696         RTL_TEXTENCODING_UTF8);
4697 }
4698 
4699 //============================================================================
hasFinalSlash() const4700 bool INetURLObject::hasFinalSlash() const
4701 {
4702     if (!checkHierarchical())
4703         return false;
4704 
4705     sal_Unicode const * pPathBegin
4706         = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4707     sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4708     return pPathEnd > pPathBegin && pPathEnd[-1] == '/';
4709 }
4710 
4711 //============================================================================
setFinalSlash()4712 bool INetURLObject::setFinalSlash()
4713 {
4714     if (!checkHierarchical())
4715         return false;
4716 
4717     sal_Unicode const * pPathBegin
4718         = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4719     sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4720     if (pPathEnd > pPathBegin && pPathEnd[-1] == '/')
4721         return true;
4722 
4723     rtl::OUStringBuffer aNewPath;
4724     aNewPath.append(pPathBegin, pPathEnd - pPathBegin);
4725     aNewPath.append(sal_Unicode('/'));
4726 
4727     return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4728         RTL_TEXTENCODING_UTF8);
4729 }
4730 
4731 //============================================================================
removeFinalSlash()4732 bool INetURLObject::removeFinalSlash()
4733 {
4734     if (!checkHierarchical())
4735         return false;
4736 
4737     sal_Unicode const * pPathBegin
4738         = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4739     sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4740     if (pPathEnd <= pPathBegin || pPathEnd[-1] != '/')
4741         return true;
4742 
4743     --pPathEnd;
4744     if (pPathEnd == pPathBegin && *pPathBegin == '/')
4745         return false;
4746     rtl::OUString aNewPath(pPathBegin, pPathEnd - pPathBegin);
4747 
4748     return setPath(aNewPath, false, NOT_CANONIC, RTL_TEXTENCODING_UTF8);
4749 }
4750 
4751 //============================================================================
4752 // static
createFragment(rtl::OUString const & rText)4753 rtl::OUString INetURLObject::createFragment(rtl::OUString const & rText)
4754 {
4755     rtl::OUString aFragment(rText);
4756     for (sal_Int32 i = 0; i < aFragment.getLength();)
4757     {
4758         sal_Unicode c = aFragment.getStr()[i];
4759         if (mustEncode(c, PART_CREATEFRAGMENT))
4760             aFragment = aFragment.replaceAt(i, 1, rtl::OUString());
4761         else
4762             ++i;
4763     }
4764     return aFragment;
4765 }
4766 
4767 //============================================================================
setFSysPath(rtl::OUString const & rFSysPath,FSysStyle eStyle)4768 bool INetURLObject::setFSysPath(rtl::OUString const & rFSysPath,
4769     FSysStyle eStyle)
4770 {
4771     sal_Unicode const * pFSysBegin = rFSysPath.getStr();
4772     sal_Unicode const * pFSysEnd = pFSysBegin + rFSysPath.getLength();
4773 
4774     switch ((eStyle & FSYS_VOS ? 1 : 0)
4775                 + (eStyle & FSYS_UNX ? 1 : 0)
4776                 + (eStyle & FSYS_DOS ? 1 : 0)
4777                 + (eStyle & FSYS_MAC ? 1 : 0))
4778     {
4779         case 0:
4780             return false;
4781 
4782         case 1:
4783             break;
4784 
4785         default:
4786             if (eStyle & FSYS_VOS
4787                 && pFSysEnd - pFSysBegin >= 2
4788                 && pFSysBegin[0] == '/'
4789                 && pFSysBegin[1] == '/')
4790             {
4791                 if (pFSysEnd - pFSysBegin >= 3
4792                     && pFSysBegin[2] == '.'
4793                     && (pFSysEnd - pFSysBegin == 3 || pFSysBegin[3] == '/'))
4794                 {
4795                     eStyle = FSYS_VOS; // Production T1
4796                     break;
4797                 }
4798 
4799                 sal_Unicode const * p = pFSysBegin + 2;
4800                 rtl::OUString aHost;
4801                 if (parseHost(p, pFSysEnd, aHost)
4802                     && (p == pFSysEnd || *p == '/'))
4803                 {
4804                     eStyle = FSYS_VOS; // Production T2
4805                     break;
4806                 }
4807             }
4808 
4809             if (eStyle & FSYS_DOS
4810                 && pFSysEnd - pFSysBegin >= 2
4811                 && pFSysBegin[0] == '\\'
4812                 && pFSysBegin[1] == '\\')
4813             {
4814                 sal_Unicode const * p = pFSysBegin + 2;
4815                 rtl::OUString aHost;
4816                 if (parseHost(p, pFSysEnd, aHost)
4817                     && (p == pFSysEnd || *p == '\\'))
4818                 {
4819                     eStyle = FSYS_DOS; // Production T3
4820                     break;
4821                 }
4822             }
4823 
4824             if (eStyle & FSYS_DOS
4825                 && pFSysEnd - pFSysBegin >= 2
4826                 && INetMIME::isAlpha(pFSysBegin[0])
4827                 && pFSysBegin[1] == ':'
4828                 && (pFSysEnd - pFSysBegin == 2
4829                     || pFSysBegin[2] == '/'
4830                     || pFSysBegin[2] == '\\'))
4831             {
4832                 eStyle = FSYS_DOS; // Productions T4, T5
4833                 break;
4834             }
4835 
4836             if (!(eStyle & (FSYS_UNX | FSYS_DOS | FSYS_MAC)))
4837                 return false;
4838 
4839             eStyle = guessFSysStyleByCounting(pFSysBegin, pFSysEnd, eStyle);
4840                 // Production T6
4841             break;
4842     }
4843 
4844     rtl::OUStringBuffer aSynAbsURIRef(rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("file://")));
4845 
4846     switch (eStyle)
4847     {
4848         case FSYS_VOS:
4849         {
4850             sal_Unicode const * p = pFSysBegin;
4851             if (pFSysEnd - p < 2 || *p++ != '/' || *p++ != '/')
4852                 return false;
4853             if (p != pFSysEnd && *p == '.'
4854                 && (pFSysEnd - p == 1 || p[1] == '/'))
4855                 ++p;
4856             for (; p != pFSysEnd; ++p)
4857                 switch (*p)
4858                 {
4859                     case '#':
4860                     case '%':
4861                         appendEscape(aSynAbsURIRef, '%', *p);
4862                         break;
4863 
4864                     default:
4865                         aSynAbsURIRef.append(*p);
4866                         break;
4867                 }
4868             break;
4869         }
4870 
4871         case FSYS_UNX:
4872         {
4873             sal_Unicode const * p = pFSysBegin;
4874             if (p != pFSysEnd && *p != '/')
4875                 return false;
4876             for (; p != pFSysEnd; ++p)
4877                 switch (*p)
4878                 {
4879                     case '|':
4880                     case '#':
4881                     case '%':
4882                         appendEscape(aSynAbsURIRef, '%', *p);
4883                         break;
4884 
4885                     default:
4886                         aSynAbsURIRef.append(*p);
4887                         break;
4888                 }
4889             break;
4890         }
4891 
4892         case FSYS_DOS:
4893         {
4894             sal_uInt32 nAltDelimiter = 0x80000000;
4895             sal_Unicode const * p = pFSysBegin;
4896             if (pFSysEnd - p >= 3 && p[0] == '\\' && p[1] == '\\')
4897                 p += 2;
4898             else
4899             {
4900                 aSynAbsURIRef.append(sal_Unicode('/'));
4901                 if (pFSysEnd - p >= 2
4902                     && INetMIME::isAlpha(p[0])
4903                     && p[1] == ':'
4904                     && (pFSysEnd - p == 2 || p[2] == '\\' || p[2] == '/'))
4905                     nAltDelimiter = '/';
4906             }
4907             for (; p != pFSysEnd; ++p)
4908                 if (*p == '\\' || *p == nAltDelimiter)
4909                     aSynAbsURIRef.append(sal_Unicode('/'));
4910                 else
4911                     switch (*p)
4912                     {
4913                         case '/':
4914                         case '#':
4915                         case '%':
4916                             appendEscape(aSynAbsURIRef, '%', *p);
4917                             break;
4918 
4919                         default:
4920                             aSynAbsURIRef.append(*p);
4921                             break;
4922                     }
4923             break;
4924         }
4925 
4926         case FSYS_MAC:
4927             aSynAbsURIRef.append(sal_Unicode('/'));
4928             {for (sal_Unicode const * p = pFSysBegin; p != pFSysEnd; ++p)
4929                 switch (*p)
4930                 {
4931                     case ':':
4932                         aSynAbsURIRef.append(sal_Unicode('/'));
4933                         break;
4934 
4935                     case '/':
4936                     case '|':
4937                     case '#':
4938                     case '%':
4939                         appendEscape(aSynAbsURIRef, '%', *p);
4940                         break;
4941 
4942                     default:
4943                         aSynAbsURIRef.append(*p);
4944                         break;
4945                 }
4946             }
4947             break;
4948 
4949         default:
4950             OSL_ASSERT(false);
4951             break;
4952     }
4953 
4954     INetURLObject aTemp(aSynAbsURIRef.makeStringAndClear(), WAS_ENCODED,
4955         RTL_TEXTENCODING_UTF8);
4956     if (aTemp.HasError())
4957         return false;
4958 
4959     *this = aTemp;
4960     return true;
4961 }
4962 
4963 //============================================================================
getFSysPath(FSysStyle eStyle,sal_Unicode * pDelimiter) const4964 rtl::OUString INetURLObject::getFSysPath(FSysStyle eStyle,
4965                                      sal_Unicode * pDelimiter) const
4966 {
4967     if (m_eScheme != INET_PROT_FILE)
4968         return rtl::OUString();
4969 
4970     if ((eStyle & FSYS_VOS ? 1 : 0)
4971                 + (eStyle & FSYS_UNX ? 1 : 0)
4972                 + (eStyle & FSYS_DOS ? 1 : 0)
4973                 + (eStyle & FSYS_MAC ? 1 : 0)
4974             > 1)
4975     {
4976         eStyle = eStyle & FSYS_VOS
4977                  && m_aHost.isPresent()
4978                  && m_aHost.getLength() > 0 ?
4979                      FSYS_VOS :
4980                  hasDosVolume(eStyle)
4981                  || ((eStyle & FSYS_DOS) != 0
4982                     && m_aHost.isPresent()
4983                     && m_aHost.getLength() > 0) ?
4984                      FSYS_DOS :
4985                  eStyle & FSYS_UNX
4986                  && (!m_aHost.isPresent() || m_aHost.getLength() == 0) ?
4987                      FSYS_UNX :
4988                      FSysStyle(0);
4989     }
4990 
4991     switch (eStyle)
4992     {
4993         case FSYS_VOS:
4994         {
4995             if (pDelimiter)
4996                 *pDelimiter = '/';
4997 
4998             rtl::OUStringBuffer aSynFSysPath;
4999             aSynFSysPath.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
5000             if (m_aHost.isPresent() && m_aHost.getLength() > 0)
5001                 aSynFSysPath.append(decode(m_aHost, '%', DECODE_WITH_CHARSET,
5002                                        RTL_TEXTENCODING_UTF8));
5003             else
5004                 aSynFSysPath.append(sal_Unicode('.'));
5005             aSynFSysPath.append(decode(m_aPath, '%', DECODE_WITH_CHARSET,
5006                                    RTL_TEXTENCODING_UTF8));
5007             return aSynFSysPath.makeStringAndClear();
5008         }
5009 
5010         case FSYS_UNX:
5011         {
5012             if (m_aHost.isPresent() && m_aHost.getLength() > 0)
5013                 return rtl::OUString();
5014 
5015             if (pDelimiter)
5016                 *pDelimiter = '/';
5017 
5018             return decode(m_aPath, '%', DECODE_WITH_CHARSET,
5019                           RTL_TEXTENCODING_UTF8);
5020         }
5021 
5022         case FSYS_DOS:
5023         {
5024             if (pDelimiter)
5025                 *pDelimiter = '\\';
5026 
5027             rtl::OUStringBuffer aSynFSysPath;
5028             if (m_aHost.isPresent() && m_aHost.getLength() > 0)
5029             {
5030                 aSynFSysPath.appendAscii(RTL_CONSTASCII_STRINGPARAM("\\\\"));
5031                 aSynFSysPath.append(decode(m_aHost, '%', DECODE_WITH_CHARSET,
5032                                        RTL_TEXTENCODING_UTF8));
5033                 aSynFSysPath.append(sal_Unicode('\\'));
5034             }
5035             sal_Unicode const * p
5036                 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
5037             sal_Unicode const * pEnd = p + m_aPath.getLength();
5038             DBG_ASSERT(p < pEnd && *p == '/',
5039                        "INetURLObject::getFSysPath(): Bad path");
5040             ++p;
5041             while (p < pEnd)
5042             {
5043                 EscapeType eEscapeType;
5044                 sal_uInt32 nUTF32 = getUTF32(p, pEnd, false, '%', WAS_ENCODED,
5045                                              RTL_TEXTENCODING_UTF8,
5046                                              eEscapeType);
5047                 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
5048                     aSynFSysPath.append(sal_Unicode('\\'));
5049                 else
5050                     aSynFSysPath.appendUtf32(nUTF32);
5051             }
5052             return aSynFSysPath.makeStringAndClear();
5053         }
5054 
5055         case FSYS_MAC:
5056         {
5057             if (m_aHost.isPresent() && m_aHost.getLength() > 0)
5058                 return rtl::OUString();
5059 
5060             if (pDelimiter)
5061                 *pDelimiter = ':';
5062 
5063             rtl::OUStringBuffer aSynFSysPath;
5064             sal_Unicode const * p
5065                 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
5066             sal_Unicode const * pEnd = p + m_aPath.getLength();
5067             DBG_ASSERT(p < pEnd && *p == '/',
5068                        "INetURLObject::getFSysPath(): Bad path");
5069             ++p;
5070             while (p < pEnd)
5071             {
5072                 EscapeType eEscapeType;
5073                 sal_uInt32 nUTF32 = getUTF32(p, pEnd, false, '%', WAS_ENCODED,
5074                                              RTL_TEXTENCODING_UTF8,
5075                                              eEscapeType);
5076                 if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
5077                     aSynFSysPath.append(sal_Unicode(':'));
5078                 else
5079                     aSynFSysPath.appendUtf32(nUTF32);
5080             }
5081             return aSynFSysPath.makeStringAndClear();
5082         }
5083 
5084         default:
5085             return rtl::OUString();
5086     }
5087 }
5088 
5089 //============================================================================
HasMsgId() const5090 bool INetURLObject::HasMsgId() const
5091 {
5092     if (m_eScheme != INET_PROT_POP3)
5093         return false;
5094     sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
5095     sal_Unicode const * pEnd = p + m_aPath.getLength();
5096     for (; p < pEnd; ++p)
5097         if (*p == '<')
5098             return true;
5099     return false;
5100 }
5101 
5102 //============================================================================
GetMsgId(DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const5103 rtl::OUString INetURLObject::GetMsgId(DecodeMechanism eMechanism,
5104                                   rtl_TextEncoding eCharset) const
5105 {
5106     if (m_eScheme != INET_PROT_POP3)
5107         return rtl::OUString();
5108     sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
5109     sal_Unicode const * pEnd = p + m_aPath.getLength();
5110     for (; p < pEnd; ++p)
5111         if (*p == '<')
5112             return decode(p, pEnd, getEscapePrefix(), eMechanism, eCharset);
5113     return rtl::OUString();
5114 }
5115 
5116 //============================================================================
5117 // static
appendUCS4Escape(rtl::OUStringBuffer & rTheText,sal_Char cEscapePrefix,sal_uInt32 nUCS4)5118 void INetURLObject::appendUCS4Escape(rtl::OUStringBuffer & rTheText,
5119                                      sal_Char cEscapePrefix, sal_uInt32 nUCS4)
5120 {
5121     DBG_ASSERT(nUCS4 < 0x80000000,
5122                "INetURLObject::appendUCS4Escape(): Bad char");
5123     if (nUCS4 < 0x80)
5124         appendEscape(rTheText, cEscapePrefix, nUCS4);
5125     else if (nUCS4 < 0x800)
5126     {
5127         appendEscape(rTheText, cEscapePrefix, nUCS4 >> 6 | 0xC0);
5128         appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5129     }
5130     else if (nUCS4 < 0x10000)
5131     {
5132         appendEscape(rTheText, cEscapePrefix, nUCS4 >> 12 | 0xE0);
5133         appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
5134         appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5135     }
5136     else if (nUCS4 < 0x200000)
5137     {
5138         appendEscape(rTheText, cEscapePrefix, nUCS4 >> 18 | 0xF0);
5139         appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80);
5140         appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
5141         appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5142     }
5143     else if (nUCS4 < 0x4000000)
5144     {
5145         appendEscape(rTheText, cEscapePrefix, nUCS4 >> 24 | 0xF8);
5146         appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 18 & 0x3F) | 0x80);
5147         appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80);
5148         appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
5149         appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5150     }
5151     else
5152     {
5153         appendEscape(rTheText, cEscapePrefix, nUCS4 >> 30 | 0xFC);
5154         appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 24 & 0x3F) | 0x80);
5155         appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 18 & 0x3F) | 0x80);
5156         appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80);
5157         appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
5158         appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5159     }
5160 }
5161 
5162 //============================================================================
5163 // static
appendUCS4(rtl::OUStringBuffer & rTheText,sal_uInt32 nUCS4,EscapeType eEscapeType,bool bOctets,Part ePart,sal_Char cEscapePrefix,rtl_TextEncoding eCharset,bool bKeepVisibleEscapes)5164 void INetURLObject::appendUCS4(rtl::OUStringBuffer& rTheText, sal_uInt32 nUCS4,
5165                                EscapeType eEscapeType, bool bOctets,
5166                                Part ePart, sal_Char cEscapePrefix,
5167                                rtl_TextEncoding eCharset,
5168                                bool bKeepVisibleEscapes)
5169 {
5170     bool bEscape;
5171     rtl_TextEncoding eTargetCharset = RTL_TEXTENCODING_DONTKNOW;
5172     switch (eEscapeType)
5173     {
5174         case ESCAPE_NO:
5175             if (mustEncode(nUCS4, ePart))
5176             {
5177                 bEscape = true;
5178                 eTargetCharset = bOctets ? RTL_TEXTENCODING_ISO_8859_1 :
5179                                            RTL_TEXTENCODING_UTF8;
5180             }
5181             else
5182                 bEscape = false;
5183             break;
5184 
5185         case ESCAPE_OCTET:
5186             bEscape = true;
5187             eTargetCharset = RTL_TEXTENCODING_ISO_8859_1;
5188             break;
5189 
5190         case ESCAPE_UTF32:
5191             if (mustEncode(nUCS4, ePart))
5192             {
5193                 bEscape = true;
5194                 eTargetCharset = eCharset;
5195             }
5196             else if (bKeepVisibleEscapes && INetMIME::isVisible(nUCS4))
5197             {
5198                 bEscape = true;
5199                 eTargetCharset = RTL_TEXTENCODING_ASCII_US;
5200             }
5201             else
5202                 bEscape = false;
5203             break;
5204         default:
5205             bEscape = false;
5206     }
5207 
5208     if (bEscape)
5209     {
5210         switch (eTargetCharset)
5211         {
5212             default:
5213                 DBG_ERROR("INetURLObject::appendUCS4(): Unsupported charset");
5214             case RTL_TEXTENCODING_ASCII_US:
5215             case RTL_TEXTENCODING_ISO_8859_1:
5216                 appendEscape(rTheText, cEscapePrefix, nUCS4);
5217                 break;
5218 
5219             case RTL_TEXTENCODING_UTF8:
5220                 appendUCS4Escape(rTheText, cEscapePrefix, nUCS4);
5221                 break;
5222         }
5223     }
5224     else
5225         rTheText.append(sal_Unicode(nUCS4));
5226 }
5227 
5228 //============================================================================
5229 // static
getUTF32(sal_Unicode const * & rBegin,sal_Unicode const * pEnd,bool bOctets,sal_Char cEscapePrefix,EncodeMechanism eMechanism,rtl_TextEncoding eCharset,EscapeType & rEscapeType)5230 sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
5231                                    sal_Unicode const * pEnd, bool bOctets,
5232                                    sal_Char cEscapePrefix,
5233                                    EncodeMechanism eMechanism,
5234                                    rtl_TextEncoding eCharset,
5235                                    EscapeType & rEscapeType)
5236 {
5237     DBG_ASSERT(rBegin < pEnd, "INetURLObject::getUTF32(): Bad sequence");
5238     sal_uInt32 nUTF32 = bOctets ? *rBegin++ :
5239                                   INetMIME::getUTF32Character(rBegin, pEnd);
5240     switch (eMechanism)
5241     {
5242         case ENCODE_ALL:
5243             rEscapeType = ESCAPE_NO;
5244             break;
5245 
5246         case WAS_ENCODED:
5247         {
5248             int nWeight1;
5249             int nWeight2;
5250             if (nUTF32 == sal_uChar(cEscapePrefix) && rBegin + 1 < pEnd
5251                 && (nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0
5252                 && (nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0)
5253             {
5254                 rBegin += 2;
5255                 nUTF32 = nWeight1 << 4 | nWeight2;
5256                 switch (eCharset)
5257                 {
5258                     default:
5259                         DBG_ERROR(
5260                             "INetURLObject::getUTF32(): Unsupported charset");
5261                     case RTL_TEXTENCODING_ASCII_US:
5262                         rEscapeType = INetMIME::isUSASCII(nUTF32) ?
5263                                           ESCAPE_UTF32 : ESCAPE_OCTET;
5264                         break;
5265 
5266                     case RTL_TEXTENCODING_ISO_8859_1:
5267                         rEscapeType = ESCAPE_UTF32;
5268                         break;
5269 
5270                     case RTL_TEXTENCODING_UTF8:
5271                         if (INetMIME::isUSASCII(nUTF32))
5272                             rEscapeType = ESCAPE_UTF32;
5273                         else
5274                         {
5275                             if (nUTF32 >= 0xC0 && nUTF32 <= 0xF4)
5276                             {
5277                                 sal_uInt32 nEncoded;
5278                                 int nShift;
5279                                 sal_uInt32 nMin;
5280                                 if (nUTF32 <= 0xDF)
5281                                 {
5282                                     nEncoded = (nUTF32 & 0x1F) << 6;
5283                                     nShift = 0;
5284                                     nMin = 0x80;
5285                                 }
5286                                 else if (nUTF32 <= 0xEF)
5287                                 {
5288                                     nEncoded = (nUTF32 & 0x0F) << 12;
5289                                     nShift = 6;
5290                                     nMin = 0x800;
5291                                 }
5292                                 else
5293                                 {
5294                                     nEncoded = (nUTF32 & 0x07) << 18;
5295                                     nShift = 12;
5296                                     nMin = 0x10000;
5297                                 }
5298                                 sal_Unicode const * p = rBegin;
5299                                 bool bUTF8 = true;
5300                                 for (;;)
5301                                 {
5302                                     if (pEnd - p < 3
5303                                         || p[0] != cEscapePrefix
5304                                         || (nWeight1
5305                                                = INetMIME::getHexWeight(p[1]))
5306                                                < 8
5307                                         || nWeight1 > 11
5308                                         || (nWeight2
5309                                                = INetMIME::getHexWeight(p[2]))
5310                                                < 0)
5311                                     {
5312                                         bUTF8 = false;
5313                                         break;
5314                                     }
5315                                     p += 3;
5316                                     nEncoded
5317                                         |= ((nWeight1 & 3) << 4 | nWeight2)
5318                                                << nShift;
5319                                     if (nShift == 0)
5320                                         break;
5321                                     nShift -= 6;
5322                                 }
5323                                 if (bUTF8 && nEncoded >= nMin
5324                                     && !INetMIME::isHighSurrogate(nEncoded)
5325                                     && !INetMIME::isLowSurrogate(nEncoded)
5326                                     && nEncoded <= 0x10FFFF)
5327                                 {
5328                                     rBegin = p;
5329                                     nUTF32 = nEncoded;
5330                                     rEscapeType = ESCAPE_UTF32;
5331                                     break;
5332                                 }
5333                             }
5334                             rEscapeType = ESCAPE_OCTET;
5335                         }
5336                         break;
5337                 }
5338             }
5339             else
5340                 rEscapeType = ESCAPE_NO;
5341             break;
5342         }
5343 
5344         case NOT_CANONIC:
5345         {
5346             int nWeight1;
5347             int nWeight2;
5348             if (nUTF32 == sal_uChar(cEscapePrefix) && rBegin + 1 < pEnd
5349                 && ((nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0)
5350                 && ((nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0))
5351             {
5352                 rBegin += 2;
5353                 nUTF32 = nWeight1 << 4 | nWeight2;
5354                 rEscapeType = ESCAPE_OCTET;
5355             }
5356             else
5357                 rEscapeType = ESCAPE_NO;
5358             break;
5359         }
5360     }
5361     return nUTF32;
5362 }
5363 
5364 //============================================================================
5365 // static
scanDomain(sal_Unicode const * & rBegin,sal_Unicode const * pEnd,bool bEager)5366 sal_uInt32 INetURLObject::scanDomain(sal_Unicode const *& rBegin,
5367                                      sal_Unicode const * pEnd,
5368                                      bool bEager)
5369 {
5370     enum State { STATE_DOT, STATE_LABEL, STATE_HYPHEN };
5371     State eState = STATE_DOT;
5372     sal_Int32 nLabels = 0;
5373     sal_Unicode const * pLastAlphanumeric = 0;
5374     for (sal_Unicode const * p = rBegin;; ++p)
5375         switch (eState)
5376         {
5377             case STATE_DOT:
5378                 if (p != pEnd && INetMIME::isAlphanumeric(*p))
5379                 {
5380                     ++nLabels;
5381                     eState = STATE_LABEL;
5382                     break;
5383                 }
5384                 if (bEager || nLabels == 0)
5385                     return 0;
5386                 rBegin = p - 1;
5387                 return nLabels;
5388 
5389             case STATE_LABEL:
5390                 if (p != pEnd)
5391                 {
5392                     if (INetMIME::isAlphanumeric(*p))
5393                         break;
5394                     else if (*p == '.')
5395                     {
5396                         eState = STATE_DOT;
5397                         break;
5398                     }
5399                     else if (*p == '-')
5400                     {
5401                         pLastAlphanumeric = p;
5402                         eState = STATE_HYPHEN;
5403                         break;
5404                     }
5405                 }
5406                 rBegin = p;
5407                 return nLabels;
5408 
5409             case STATE_HYPHEN:
5410                 if (p != pEnd)
5411                 {
5412                     if (INetMIME::isAlphanumeric(*p))
5413                     {
5414                         eState = STATE_LABEL;
5415                         break;
5416                     }
5417                     else if (*p == '-')
5418                         break;
5419                 }
5420                 if (bEager)
5421                     return 0;
5422                 rBegin = pLastAlphanumeric;
5423                 return nLabels;
5424         }
5425 }
5426 
5427 //============================================================================
5428 // static
scanIPv6reference(sal_Unicode const * & rBegin,sal_Unicode const * pEnd)5429 bool INetURLObject::scanIPv6reference(sal_Unicode const *& rBegin,
5430                                       sal_Unicode const * pEnd)
5431 {
5432     if (rBegin != pEnd && *rBegin == '[') {
5433         sal_Unicode const * p = rBegin + 1;
5434         //TODO: check for valid IPv6address (RFC 2373):
5435         while (p != pEnd && (INetMIME::isHexDigit(*p) || *p == ':' || *p == '.'))
5436         {
5437             ++p;
5438         }
5439         if (p != pEnd && *p == ']') {
5440             rBegin = p + 1;
5441             return true;
5442         }
5443     }
5444     return false;
5445 }
5446 
5447 //============================================================================
GetPartBeforeLastName(DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const5448 rtl::OUString INetURLObject::GetPartBeforeLastName(DecodeMechanism eMechanism,
5449                                                rtl_TextEncoding eCharset)
5450     const
5451 {
5452     if (!checkHierarchical())
5453         return rtl::OUString();
5454     INetURLObject aTemp(*this);
5455     aTemp.clearFragment();
5456     aTemp.clearQuery();
5457     aTemp.removeSegment(LAST_SEGMENT, false);
5458     aTemp.setFinalSlash();
5459     return aTemp.GetMainURL(eMechanism, eCharset);
5460 }
5461 
5462 //============================================================================
GetLastName(DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const5463 rtl::OUString INetURLObject::GetLastName(DecodeMechanism eMechanism,
5464                                      rtl_TextEncoding eCharset) const
5465 {
5466     return getName(LAST_SEGMENT, true, eMechanism, eCharset);
5467 }
5468 
5469 //============================================================================
GetFileExtension(DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const5470 rtl::OUString INetURLObject::GetFileExtension(DecodeMechanism eMechanism,
5471                                           rtl_TextEncoding eCharset) const
5472 {
5473     return getExtension(LAST_SEGMENT, false, eMechanism, eCharset);
5474 }
5475 
5476 //============================================================================
CutLastName()5477 bool INetURLObject::CutLastName()
5478 {
5479     INetURLObject aTemp(*this);
5480     aTemp.clearFragment();
5481     aTemp.clearQuery();
5482     if (!aTemp.removeSegment(LAST_SEGMENT, false))
5483         return false;
5484     *this = aTemp;
5485     return true;
5486 }
5487 
5488 //============================================================================
PathToFileName() const5489 rtl::OUString INetURLObject::PathToFileName() const
5490 {
5491     if (m_eScheme != INET_PROT_FILE)
5492         return rtl::OUString();
5493     rtl::OUString aSystemPath;
5494     if (osl::FileBase::getSystemPathFromFileURL(
5495                 decode(m_aAbsURIRef.getStr(),
5496                        m_aAbsURIRef.getStr() + m_aPath.getEnd(),
5497                        getEscapePrefix(), NO_DECODE, RTL_TEXTENCODING_UTF8),
5498                 aSystemPath)
5499             != osl::FileBase::E_None)
5500         return rtl::OUString();
5501     return aSystemPath;
5502 }
5503 
5504 //============================================================================
GetFull() const5505 rtl::OUString INetURLObject::GetFull() const
5506 {
5507     INetURLObject aTemp(*this);
5508     aTemp.removeFinalSlash();
5509     return aTemp.PathToFileName();
5510 }
5511 
5512 //============================================================================
GetPath() const5513 rtl::OUString INetURLObject::GetPath() const
5514 {
5515     INetURLObject aTemp(*this);
5516     aTemp.removeSegment(LAST_SEGMENT, true);
5517     aTemp.removeFinalSlash();
5518     return aTemp.PathToFileName();
5519 }
5520 
5521 //============================================================================
SetBase(rtl::OUString const & rTheBase)5522 void INetURLObject::SetBase(rtl::OUString const & rTheBase)
5523 {
5524     setBase(rTheBase, LAST_SEGMENT, true, ENCODE_ALL);
5525 }
5526 
5527 //============================================================================
GetBase() const5528 rtl::OUString INetURLObject::GetBase() const
5529 {
5530     return getBase(LAST_SEGMENT, true, DECODE_WITH_CHARSET);
5531 }
5532 
5533 //============================================================================
SetName(rtl::OUString const & rTheName,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)5534 void INetURLObject::SetName(rtl::OUString const & rTheName,
5535                             EncodeMechanism eMechanism,
5536                             rtl_TextEncoding eCharset)
5537 {
5538     INetURLObject aTemp(*this);
5539     if (aTemp.removeSegment(LAST_SEGMENT, true)
5540         && aTemp.insertName(rTheName, false, LAST_SEGMENT, true, eMechanism,
5541                             eCharset))
5542         *this = aTemp;
5543 }
5544 
5545 //============================================================================
CutName(DecodeMechanism eMechanism,rtl_TextEncoding eCharset)5546 rtl::OUString INetURLObject::CutName(DecodeMechanism eMechanism,
5547                                  rtl_TextEncoding eCharset)
5548 {
5549     rtl::OUString aTheName(getName(LAST_SEGMENT, true, eMechanism, eCharset));
5550     return removeSegment(LAST_SEGMENT, true) ? aTheName : rtl::OUString();
5551 }
5552 
5553 //============================================================================
SetExtension(rtl::OUString const & rTheExtension,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)5554 void INetURLObject::SetExtension(rtl::OUString const & rTheExtension,
5555                                  EncodeMechanism eMechanism,
5556                                  rtl_TextEncoding eCharset)
5557 {
5558     setExtension(rTheExtension, LAST_SEGMENT, false, eMechanism, eCharset);
5559 }
5560 
5561 //============================================================================
CutExtension(DecodeMechanism eMechanism,rtl_TextEncoding eCharset)5562 rtl::OUString INetURLObject::CutExtension(DecodeMechanism eMechanism,
5563                                       rtl_TextEncoding eCharset)
5564 {
5565     rtl::OUString aTheExtension(getExtension(LAST_SEGMENT, false, eMechanism,
5566                                          eCharset));
5567     return removeExtension(LAST_SEGMENT, false)
5568         ? aTheExtension : rtl::OUString();
5569 }
5570 
5571 //============================================================================
IsCaseSensitive() const5572 bool INetURLObject::IsCaseSensitive() const
5573 {
5574     return true;
5575 }
5576