1 /* Based on nsURLParsers.cc from Mozilla
2 * -------------------------------------
3 * The contents of this file are subject to the Mozilla Public License Version
4 * 1.1 (the "License"); you may not use this file except in compliance with
5 * the License. You may obtain a copy of the License at
6 * http://www.mozilla.org/MPL/
7 *
8 * Software distributed under the License is distributed on an "AS IS" basis,
9 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
10 * for the specific language governing rights and limitations under the
11 * License.
12 *
13 * The Original Code is mozilla.org code.
14 *
15 * The Initial Developer of the Original Code is
16 * Netscape Communications Corporation.
17 * Portions created by the Initial Developer are Copyright (C) 1998
18 * the Initial Developer. All Rights Reserved.
19 *
20 * Contributor(s):
21 * Darin Fisher (original author)
22 *
23 * Alternatively, the contents of this file may be used under the terms of
24 * either the GNU General Public License Version 2 or later (the "GPL"), or
25 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26 * in which case the provisions of the GPL or the LGPL are applicable instead
27 * of those above. If you wish to allow use of your version of this file only
28 * under the terms of either the GPL or the LGPL, and not to allow others to
29 * use your version of this file under the terms of the MPL, indicate your
30 * decision by deleting the provisions above and replace them with the notice
31 * and other provisions required by the GPL or the LGPL. If you do not delete
32 * the provisions above, a recipient may use your version of this file under
33 * the terms of any one of the MPL, the GPL or the LGPL.
34 *
35 * ***** END LICENSE BLOCK ***** */
36
37 #include "third_party/mozilla/url_parse.h"
38
39 #include <assert.h>
40 #include <ctype.h>
41 #include <stdlib.h>
42
43 #include "third_party/mozilla/url_parse_internal.h"
44
45 namespace openscreen {
46 namespace {
47
48 // Returns true if the given character is a valid digit to use in a port.
IsPortDigit(char ch)49 bool IsPortDigit(char ch) {
50 return ch >= '0' && ch <= '9';
51 }
52
53 // Returns the offset of the next authority terminator in the input starting
54 // from start_offset. If no terminator is found, the return value will be equal
55 // to spec_len.
FindNextAuthorityTerminator(const char * spec,int start_offset,int spec_len)56 int FindNextAuthorityTerminator(const char* spec,
57 int start_offset,
58 int spec_len) {
59 for (int i = start_offset; i < spec_len; i++) {
60 if (IsAuthorityTerminator(spec[i]))
61 return i;
62 }
63 return spec_len; // Not found.
64 }
65
ParseUserInfo(const char * spec,const Component & user,Component * username,Component * password)66 void ParseUserInfo(const char* spec,
67 const Component& user,
68 Component* username,
69 Component* password) {
70 // Find the first colon in the user section, which separates the username and
71 // password.
72 int colon_offset = 0;
73 while (colon_offset < user.len && spec[user.begin + colon_offset] != ':')
74 colon_offset++;
75
76 if (colon_offset < user.len) {
77 // Found separator: <username>:<password>
78 *username = Component(user.begin, colon_offset);
79 *password = MakeRange(user.begin + colon_offset + 1, user.begin + user.len);
80 } else {
81 // No separator, treat everything as the username
82 *username = user;
83 *password = Component();
84 }
85 }
86
ParseServerInfo(const char * spec,const Component & serverinfo,Component * hostname,Component * port_num)87 void ParseServerInfo(const char* spec,
88 const Component& serverinfo,
89 Component* hostname,
90 Component* port_num) {
91 if (serverinfo.len == 0) {
92 // No server info, host name is empty.
93 hostname->reset();
94 port_num->reset();
95 return;
96 }
97
98 // If the host starts with a left-bracket, assume the entire host is an
99 // IPv6 literal. Otherwise, assume none of the host is an IPv6 literal.
100 // This assumption will be overridden if we find a right-bracket.
101 //
102 // Our IPv6 address canonicalization code requires both brackets to exist,
103 // but the ability to locate an incomplete address can still be useful.
104 int ipv6_terminator = spec[serverinfo.begin] == '[' ? serverinfo.end() : -1;
105 int colon = -1;
106
107 // Find the last right-bracket, and the last colon.
108 for (int i = serverinfo.begin; i < serverinfo.end(); i++) {
109 switch (spec[i]) {
110 case ']':
111 ipv6_terminator = i;
112 break;
113 case ':':
114 colon = i;
115 break;
116 }
117 }
118
119 if (colon > ipv6_terminator) {
120 // Found a port number: <hostname>:<port>
121 *hostname = MakeRange(serverinfo.begin, colon);
122 if (hostname->len == 0)
123 hostname->reset();
124 *port_num = MakeRange(colon + 1, serverinfo.end());
125 } else {
126 // No port: <hostname>
127 *hostname = serverinfo;
128 port_num->reset();
129 }
130 }
131
132 // Given an already-identified auth section, breaks it into its consituent
133 // parts. The port number will be parsed and the resulting integer will be
134 // filled into the given *port variable, or -1 if there is no port number or it
135 // is invalid.
DoParseAuthority(const char * spec,const Component & auth,Component * username,Component * password,Component * hostname,Component * port_num)136 void DoParseAuthority(const char* spec,
137 const Component& auth,
138 Component* username,
139 Component* password,
140 Component* hostname,
141 Component* port_num) {
142 assert(auth.is_valid());
143 if (auth.len == 0) {
144 username->reset();
145 password->reset();
146 hostname->reset();
147 port_num->reset();
148 return;
149 }
150
151 // Search backwards for @, which is the separator between the user info and
152 // the server info.
153 int i = auth.begin + auth.len - 1;
154 while (i > auth.begin && spec[i] != '@')
155 i--;
156
157 if (spec[i] == '@') {
158 // Found user info: <user-info>@<server-info>
159 ParseUserInfo(spec, Component(auth.begin, i - auth.begin), username,
160 password);
161 ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len), hostname,
162 port_num);
163 } else {
164 // No user info, everything is server info.
165 username->reset();
166 password->reset();
167 ParseServerInfo(spec, auth, hostname, port_num);
168 }
169 }
170
FindQueryAndRefParts(const char * spec,const Component & path,int * query_separator,int * ref_separator)171 inline void FindQueryAndRefParts(const char* spec,
172 const Component& path,
173 int* query_separator,
174 int* ref_separator) {
175 int path_end = path.begin + path.len;
176 for (int i = path.begin; i < path_end; i++) {
177 switch (spec[i]) {
178 case '?':
179 // Only match the query string if it precedes the reference fragment
180 // and when we haven't found one already.
181 if (*query_separator < 0)
182 *query_separator = i;
183 break;
184 case '#':
185 // Record the first # sign only.
186 if (*ref_separator < 0) {
187 *ref_separator = i;
188 return;
189 }
190 break;
191 }
192 }
193 }
194
ParsePath(const char * spec,const Component & path,Component * filepath,Component * query,Component * ref)195 void ParsePath(const char* spec,
196 const Component& path,
197 Component* filepath,
198 Component* query,
199 Component* ref) {
200 // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<ref>
201
202 // Special case when there is no path.
203 if (path.len == -1) {
204 filepath->reset();
205 query->reset();
206 ref->reset();
207 return;
208 }
209 assert(path.len > 0);
210
211 // Search for first occurrence of either ? or #.
212 int query_separator = -1; // Index of the '?'
213 int ref_separator = -1; // Index of the '#'
214 FindQueryAndRefParts(spec, path, &query_separator, &ref_separator);
215
216 // Markers pointing to the character after each of these corresponding
217 // components. The code below words from the end back to the beginning,
218 // and will update these indices as it finds components that exist.
219 int file_end, query_end;
220
221 // Ref fragment: from the # to the end of the path.
222 int path_end = path.begin + path.len;
223 if (ref_separator >= 0) {
224 file_end = query_end = ref_separator;
225 *ref = MakeRange(ref_separator + 1, path_end);
226 } else {
227 file_end = query_end = path_end;
228 ref->reset();
229 }
230
231 // Query fragment: everything from the ? to the next boundary (either the end
232 // of the path or the ref fragment).
233 if (query_separator >= 0) {
234 file_end = query_separator;
235 *query = MakeRange(query_separator + 1, query_end);
236 } else {
237 query->reset();
238 }
239
240 // File path: treat an empty file path as no file path.
241 if (file_end != path.begin)
242 *filepath = MakeRange(path.begin, file_end);
243 else
244 filepath->reset();
245 }
246
DoExtractScheme(const char * url,int url_len,Component * scheme)247 bool DoExtractScheme(const char* url, int url_len, Component* scheme) {
248 // Skip leading whitespace and control characters.
249 int begin = 0;
250 while (begin < url_len && ShouldTrimFromURL(url[begin]))
251 begin++;
252 if (begin == url_len)
253 return false; // Input is empty or all whitespace.
254
255 // Find the first colon character.
256 for (int i = begin; i < url_len; i++) {
257 if (url[i] == ':') {
258 *scheme = MakeRange(begin, i);
259 return true;
260 }
261 }
262 return false; // No colon found: no scheme
263 }
264
265 // Fills in all members of the Parsed structure except for the scheme.
266 //
267 // |spec| is the full spec being parsed, of length |spec_len|.
268 // |after_scheme| is the character immediately following the scheme (after the
269 // colon) where we'll begin parsing.
270 //
271 // Compatability data points. I list "host", "path" extracted:
272 // Input IE6 Firefox Us
273 // ----- -------------- -------------- --------------
274 // http://foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"
275 // http:foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"
276 // http:/foo.com/ fail(*) "foo.com", "/" "foo.com", "/"
277 // http:\foo.com/ fail(*) "\foo.com", "/"(fail) "foo.com", "/"
278 // http:////foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"
279 //
280 // (*) Interestingly, although IE fails to load these URLs, its history
281 // canonicalizer handles them, meaning if you've been to the corresponding
282 // "http://foo.com/" link, it will be colored.
DoParseAfterScheme(const char * spec,int spec_len,int after_scheme,Parsed * parsed)283 void DoParseAfterScheme(const char* spec,
284 int spec_len,
285 int after_scheme,
286 Parsed* parsed) {
287 int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
288 int after_slashes = after_scheme + num_slashes;
289
290 // First split into two main parts, the authority (username, password, host,
291 // and port) and the full path (path, query, and reference).
292 Component authority;
293 Component full_path;
294
295 // Found "//<some data>", looks like an authority section. Treat everything
296 // from there to the next slash (or end of spec) to be the authority. Note
297 // that we ignore the number of slashes and treat it as the authority.
298 int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len);
299 authority = Component(after_slashes, end_auth - after_slashes);
300
301 if (end_auth == spec_len) // No beginning of path found.
302 full_path = Component();
303 else // Everything starting from the slash to the end is the path.
304 full_path = Component(end_auth, spec_len - end_auth);
305
306 // Now parse those two sub-parts.
307 DoParseAuthority(spec, authority, &parsed->username, &parsed->password,
308 &parsed->host, &parsed->port);
309 ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref);
310 }
311
312 // The main parsing function for standard URLs. Standard URLs have a scheme,
313 // host, path, etc.
DoParseStandardURL(const char * spec,int spec_len,Parsed * parsed)314 void DoParseStandardURL(const char* spec, int spec_len, Parsed* parsed) {
315 assert(spec_len >= 0);
316
317 // Strip leading & trailing spaces and control characters.
318 int begin = 0;
319 TrimURL(spec, &begin, &spec_len);
320
321 int after_scheme;
322 if (DoExtractScheme(spec, spec_len, &parsed->scheme)) {
323 after_scheme = parsed->scheme.end() + 1; // Skip past the colon.
324 } else {
325 // Say there's no scheme when there is no colon. We could also say that
326 // everything is the scheme. Both would produce an invalid URL, but this way
327 // seems less wrong in more cases.
328 parsed->scheme.reset();
329 after_scheme = begin;
330 }
331 DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
332 }
333
DoParseFileSystemURL(const char * spec,int spec_len,Parsed * parsed)334 void DoParseFileSystemURL(const char* spec, int spec_len, Parsed* parsed) {
335 assert(spec_len >= 0);
336
337 // Get the unused parts of the URL out of the way.
338 parsed->username.reset();
339 parsed->password.reset();
340 parsed->host.reset();
341 parsed->port.reset();
342 parsed->path.reset(); // May use this; reset for convenience.
343 parsed->ref.reset(); // May use this; reset for convenience.
344 parsed->query.reset(); // May use this; reset for convenience.
345 parsed->clear_inner_parsed(); // May use this; reset for convenience.
346
347 // Strip leading & trailing spaces and control characters.
348 int begin = 0;
349 TrimURL(spec, &begin, &spec_len);
350
351 // Handle empty specs or ones that contain only whitespace or control chars.
352 if (begin == spec_len) {
353 parsed->scheme.reset();
354 return;
355 }
356
357 int inner_start = -1;
358
359 // Extract the scheme. We also handle the case where there is no scheme.
360 if (DoExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
361 // Offset the results since we gave ExtractScheme a substring.
362 parsed->scheme.begin += begin;
363
364 if (parsed->scheme.end() == spec_len - 1)
365 return;
366
367 inner_start = parsed->scheme.end() + 1;
368 } else {
369 // No scheme found; that's not valid for filesystem URLs.
370 parsed->scheme.reset();
371 return;
372 }
373
374 Component inner_scheme;
375 const char* inner_spec = &spec[inner_start];
376 int inner_spec_len = spec_len - inner_start;
377
378 if (DoExtractScheme(inner_spec, inner_spec_len, &inner_scheme)) {
379 // Offset the results since we gave ExtractScheme a substring.
380 inner_scheme.begin += inner_start;
381
382 if (inner_scheme.end() == spec_len - 1)
383 return;
384 } else {
385 // No scheme found; that's not valid for filesystem URLs.
386 // The best we can do is return "filesystem://".
387 return;
388 }
389
390 Parsed inner_parsed;
391
392 if (CompareSchemeComponent(spec, inner_scheme, kFileScheme)) {
393 // File URLs are special.
394 ParseFileURL(inner_spec, inner_spec_len, &inner_parsed);
395 } else if (CompareSchemeComponent(spec, inner_scheme, kFileSystemScheme)) {
396 // Filesystem URLs don't nest.
397 return;
398 } else if (IsStandard(spec, inner_scheme)) {
399 // All "normal" URLs.
400 DoParseStandardURL(inner_spec, inner_spec_len, &inner_parsed);
401 } else {
402 return;
403 }
404
405 // All members of inner_parsed need to be offset by inner_start.
406 // If we had any scheme that supported nesting more than one level deep,
407 // we'd have to recurse into the inner_parsed's inner_parsed when
408 // adjusting by inner_start.
409 inner_parsed.scheme.begin += inner_start;
410 inner_parsed.username.begin += inner_start;
411 inner_parsed.password.begin += inner_start;
412 inner_parsed.host.begin += inner_start;
413 inner_parsed.port.begin += inner_start;
414 inner_parsed.query.begin += inner_start;
415 inner_parsed.ref.begin += inner_start;
416 inner_parsed.path.begin += inner_start;
417
418 // Query and ref move from inner_parsed to parsed.
419 parsed->query = inner_parsed.query;
420 inner_parsed.query.reset();
421 parsed->ref = inner_parsed.ref;
422 inner_parsed.ref.reset();
423
424 parsed->set_inner_parsed(inner_parsed);
425 if (!inner_parsed.scheme.is_valid() || !inner_parsed.path.is_valid() ||
426 inner_parsed.inner_parsed()) {
427 return;
428 }
429
430 // The path in inner_parsed should start with a slash, then have a filesystem
431 // type followed by a slash. From the first slash up to but excluding the
432 // second should be what it keeps; the rest goes to parsed. If the path ends
433 // before the second slash, it's still pretty clear what the user meant, so
434 // we'll let that through.
435 if (!IsURLSlash(spec[inner_parsed.path.begin])) {
436 return;
437 }
438 int inner_path_end = inner_parsed.path.begin + 1; // skip the leading slash
439 while (inner_path_end < spec_len && !IsURLSlash(spec[inner_path_end]))
440 ++inner_path_end;
441 parsed->path.begin = inner_path_end;
442 int new_inner_path_length = inner_path_end - inner_parsed.path.begin;
443 parsed->path.len = inner_parsed.path.len - new_inner_path_length;
444 parsed->inner_parsed()->path.len = new_inner_path_length;
445 }
446
447 // Initializes a path URL which is merely a scheme followed by a path. Examples
448 // include "about:foo" and "javascript:alert('bar');"
DoParsePathURL(const char * spec,int spec_len,bool trim_path_end,Parsed * parsed)449 void DoParsePathURL(const char* spec,
450 int spec_len,
451 bool trim_path_end,
452 Parsed* parsed) {
453 // Get the non-path and non-scheme parts of the URL out of the way, we never
454 // use them.
455 parsed->username.reset();
456 parsed->password.reset();
457 parsed->host.reset();
458 parsed->port.reset();
459 parsed->path.reset();
460 parsed->query.reset();
461 parsed->ref.reset();
462
463 // Strip leading & trailing spaces and control characters.
464 int scheme_begin = 0;
465 TrimURL(spec, &scheme_begin, &spec_len, trim_path_end);
466
467 // Handle empty specs or ones that contain only whitespace or control chars.
468 if (scheme_begin == spec_len) {
469 parsed->scheme.reset();
470 parsed->path.reset();
471 return;
472 }
473
474 int path_begin;
475 // Extract the scheme, with the path being everything following. We also
476 // handle the case where there is no scheme.
477 if (ExtractScheme(&spec[scheme_begin], spec_len - scheme_begin,
478 &parsed->scheme)) {
479 // Offset the results since we gave ExtractScheme a substring.
480 parsed->scheme.begin += scheme_begin;
481 path_begin = parsed->scheme.end() + 1;
482 } else {
483 // No scheme case.
484 parsed->scheme.reset();
485 path_begin = scheme_begin;
486 }
487
488 if (path_begin == spec_len)
489 return;
490 assert(path_begin < spec_len);
491
492 ParsePath(spec, MakeRange(path_begin, spec_len), &parsed->path,
493 &parsed->query, &parsed->ref);
494 }
495
DoParseMailtoURL(const char * spec,int spec_len,Parsed * parsed)496 void DoParseMailtoURL(const char* spec, int spec_len, Parsed* parsed) {
497 assert(spec_len >= 0);
498
499 // Get the non-path and non-scheme parts of the URL out of the way, we never
500 // use them.
501 parsed->username.reset();
502 parsed->password.reset();
503 parsed->host.reset();
504 parsed->port.reset();
505 parsed->ref.reset();
506 parsed->query.reset(); // May use this; reset for convenience.
507
508 // Strip leading & trailing spaces and control characters.
509 int begin = 0;
510 TrimURL(spec, &begin, &spec_len);
511
512 // Handle empty specs or ones that contain only whitespace or control chars.
513 if (begin == spec_len) {
514 parsed->scheme.reset();
515 parsed->path.reset();
516 return;
517 }
518
519 int path_begin = -1;
520 int path_end = -1;
521
522 // Extract the scheme, with the path being everything following. We also
523 // handle the case where there is no scheme.
524 if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
525 // Offset the results since we gave ExtractScheme a substring.
526 parsed->scheme.begin += begin;
527
528 if (parsed->scheme.end() != spec_len - 1) {
529 path_begin = parsed->scheme.end() + 1;
530 path_end = spec_len;
531 }
532 } else {
533 // No scheme found, just path.
534 parsed->scheme.reset();
535 path_begin = begin;
536 path_end = spec_len;
537 }
538
539 // Split [path_begin, path_end) into a path + query.
540 for (int i = path_begin; i < path_end; ++i) {
541 if (spec[i] == '?') {
542 parsed->query = MakeRange(i + 1, path_end);
543 path_end = i;
544 break;
545 }
546 }
547
548 // For compatability with the standard URL parser, treat no path as
549 // -1, rather than having a length of 0
550 if (path_begin == path_end) {
551 parsed->path.reset();
552 } else {
553 parsed->path = MakeRange(path_begin, path_end);
554 }
555 }
556
557 // Converts a port number in a string to an integer. We'd like to just call
558 // sscanf but our input is not NULL-terminated, which sscanf requires. Instead,
559 // we copy the digits to a small stack buffer (since we know the maximum number
560 // of digits in a valid port number) that we can NULL terminate.
DoParsePort(const char * spec,const Component & component)561 int DoParsePort(const char* spec, const Component& component) {
562 // Easy success case when there is no port.
563 const int kMaxDigits = 5;
564 if (!component.is_nonempty())
565 return PORT_UNSPECIFIED;
566
567 // Skip over any leading 0s.
568 Component digits_comp(component.end(), 0);
569 for (int i = 0; i < component.len; i++) {
570 if (spec[component.begin + i] != '0') {
571 digits_comp = MakeRange(component.begin + i, component.end());
572 break;
573 }
574 }
575 if (digits_comp.len == 0)
576 return 0; // All digits were 0.
577
578 // Verify we don't have too many digits (we'll be copying to our buffer so
579 // we need to double-check).
580 if (digits_comp.len > kMaxDigits)
581 return PORT_INVALID;
582
583 // Copy valid digits to the buffer.
584 char digits[kMaxDigits + 1]; // +1 for null terminator
585 for (int i = 0; i < digits_comp.len; i++) {
586 char ch = spec[digits_comp.begin + i];
587 if (!IsPortDigit(ch)) {
588 // Invalid port digit, fail.
589 return PORT_INVALID;
590 }
591 digits[i] = static_cast<char>(ch);
592 }
593
594 // Null-terminate the string and convert to integer. Since we guarantee
595 // only digits, atoi's lack of error handling is OK.
596 digits[digits_comp.len] = 0;
597 int port = atoi(digits);
598 if (port > 65535)
599 return PORT_INVALID; // Out of range.
600 return port;
601 }
602
DoExtractFileName(const char * spec,const Component & path,Component * file_name)603 void DoExtractFileName(const char* spec,
604 const Component& path,
605 Component* file_name) {
606 // Handle empty paths: they have no file names.
607 if (!path.is_nonempty()) {
608 file_name->reset();
609 return;
610 }
611
612 // Extract the filename range from the path which is between
613 // the last slash and the following semicolon.
614 int file_end = path.end();
615 for (int i = path.end() - 1; i >= path.begin; i--) {
616 if (spec[i] == ';') {
617 file_end = i;
618 } else if (IsURLSlash(spec[i])) {
619 // File name is everything following this character to the end
620 *file_name = MakeRange(i + 1, file_end);
621 return;
622 }
623 }
624
625 // No slash found, this means the input was degenerate (generally paths
626 // will start with a slash). Let's call everything the file name.
627 *file_name = MakeRange(path.begin, file_end);
628 return;
629 }
630
DoExtractQueryKeyValue(const char * spec,Component * query,Component * key,Component * value)631 bool DoExtractQueryKeyValue(const char* spec,
632 Component* query,
633 Component* key,
634 Component* value) {
635 if (!query->is_nonempty())
636 return false;
637
638 int start = query->begin;
639 int cur = start;
640 int end = query->end();
641
642 // We assume the beginning of the input is the beginning of the "key" and we
643 // skip to the end of it.
644 key->begin = cur;
645 while (cur < end && spec[cur] != '&' && spec[cur] != '=')
646 cur++;
647 key->len = cur - key->begin;
648
649 // Skip the separator after the key (if any).
650 if (cur < end && spec[cur] == '=')
651 cur++;
652
653 // Find the value part.
654 value->begin = cur;
655 while (cur < end && spec[cur] != '&')
656 cur++;
657 value->len = cur - value->begin;
658
659 // Finally skip the next separator if any
660 if (cur < end && spec[cur] == '&')
661 cur++;
662
663 // Save the new query
664 *query = MakeRange(cur, end);
665 return true;
666 }
667
668 } // namespace
669
Parsed()670 Parsed::Parsed() : potentially_dangling_markup(false), inner_parsed_(NULL) {}
671
Parsed(const Parsed & other)672 Parsed::Parsed(const Parsed& other)
673 : scheme(other.scheme),
674 username(other.username),
675 password(other.password),
676 host(other.host),
677 port(other.port),
678 path(other.path),
679 query(other.query),
680 ref(other.ref),
681 potentially_dangling_markup(other.potentially_dangling_markup),
682 inner_parsed_(NULL) {
683 if (other.inner_parsed_)
684 set_inner_parsed(*other.inner_parsed_);
685 }
686
operator =(const Parsed & other)687 Parsed& Parsed::operator=(const Parsed& other) {
688 if (this != &other) {
689 scheme = other.scheme;
690 username = other.username;
691 password = other.password;
692 host = other.host;
693 port = other.port;
694 path = other.path;
695 query = other.query;
696 ref = other.ref;
697 potentially_dangling_markup = other.potentially_dangling_markup;
698 if (other.inner_parsed_)
699 set_inner_parsed(*other.inner_parsed_);
700 else
701 clear_inner_parsed();
702 }
703 return *this;
704 }
705
~Parsed()706 Parsed::~Parsed() {
707 delete inner_parsed_;
708 }
709
Length() const710 int Parsed::Length() const {
711 if (ref.is_valid())
712 return ref.end();
713 return CountCharactersBefore(REF, false);
714 }
715
CountCharactersBefore(ComponentType type,bool include_delimiter) const716 int Parsed::CountCharactersBefore(ComponentType type,
717 bool include_delimiter) const {
718 if (type == SCHEME)
719 return scheme.begin;
720
721 // There will be some characters after the scheme like "://" and we don't
722 // know how many. Search forwards for the next thing until we find one.
723 int cur = 0;
724 if (scheme.is_valid())
725 cur = scheme.end() + 1; // Advance over the ':' at the end of the scheme.
726
727 if (username.is_valid()) {
728 if (type <= USERNAME)
729 return username.begin;
730 cur = username.end() + 1; // Advance over the '@' or ':' at the end.
731 }
732
733 if (password.is_valid()) {
734 if (type <= PASSWORD)
735 return password.begin;
736 cur = password.end() + 1; // Advance over the '@' at the end.
737 }
738
739 if (host.is_valid()) {
740 if (type <= HOST)
741 return host.begin;
742 cur = host.end();
743 }
744
745 if (port.is_valid()) {
746 if (type < PORT || (type == PORT && include_delimiter))
747 return port.begin - 1; // Back over delimiter.
748 if (type == PORT)
749 return port.begin; // Don't want delimiter counted.
750 cur = port.end();
751 }
752
753 if (path.is_valid()) {
754 if (type <= PATH)
755 return path.begin;
756 cur = path.end();
757 }
758
759 if (query.is_valid()) {
760 if (type < QUERY || (type == QUERY && include_delimiter))
761 return query.begin - 1; // Back over delimiter.
762 if (type == QUERY)
763 return query.begin; // Don't want delimiter counted.
764 cur = query.end();
765 }
766
767 if (ref.is_valid()) {
768 if (type == REF && !include_delimiter)
769 return ref.begin; // Back over delimiter.
770
771 // When there is a ref and we get here, the component we wanted was before
772 // this and not found, so we always know the beginning of the ref is right.
773 return ref.begin - 1; // Don't want delimiter counted.
774 }
775
776 return cur;
777 }
778
GetContent() const779 Component Parsed::GetContent() const {
780 const int begin = CountCharactersBefore(USERNAME, false);
781 const int len = Length() - begin;
782 // For compatability with the standard URL parser, we treat no content as
783 // -1, rather than having a length of 0 (we normally wouldn't care so
784 // much for these non-standard URLs).
785 return len ? Component(begin, len) : Component();
786 }
787
ExtractScheme(const char * url,int url_len,Component * scheme)788 bool ExtractScheme(const char* url, int url_len, Component* scheme) {
789 return DoExtractScheme(url, url_len, scheme);
790 }
791
792 // This handles everything that may be an authority terminator, including
793 // backslash. For special backslash handling see DoParseAfterScheme.
IsAuthorityTerminator(char ch)794 bool IsAuthorityTerminator(char ch) {
795 return IsURLSlash(ch) || ch == '?' || ch == '#';
796 }
797
ExtractFileName(const char * url,const Component & path,Component * file_name)798 void ExtractFileName(const char* url,
799 const Component& path,
800 Component* file_name) {
801 DoExtractFileName(url, path, file_name);
802 }
803
ExtractQueryKeyValue(const char * url,Component * query,Component * key,Component * value)804 bool ExtractQueryKeyValue(const char* url,
805 Component* query,
806 Component* key,
807 Component* value) {
808 return DoExtractQueryKeyValue(url, query, key, value);
809 }
810
ParseAuthority(const char * spec,const Component & auth,Component * username,Component * password,Component * hostname,Component * port_num)811 void ParseAuthority(const char* spec,
812 const Component& auth,
813 Component* username,
814 Component* password,
815 Component* hostname,
816 Component* port_num) {
817 DoParseAuthority(spec, auth, username, password, hostname, port_num);
818 }
819
ParsePort(const char * url,const Component & port)820 int ParsePort(const char* url, const Component& port) {
821 return DoParsePort(url, port);
822 }
823
ParseStandardURL(const char * url,int url_len,Parsed * parsed)824 void ParseStandardURL(const char* url, int url_len, Parsed* parsed) {
825 DoParseStandardURL(url, url_len, parsed);
826 }
827
ParsePathURL(const char * url,int url_len,bool trim_path_end,Parsed * parsed)828 void ParsePathURL(const char* url,
829 int url_len,
830 bool trim_path_end,
831 Parsed* parsed) {
832 DoParsePathURL(url, url_len, trim_path_end, parsed);
833 }
834
ParseFileSystemURL(const char * url,int url_len,Parsed * parsed)835 void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed) {
836 DoParseFileSystemURL(url, url_len, parsed);
837 }
838
ParseMailtoURL(const char * url,int url_len,Parsed * parsed)839 void ParseMailtoURL(const char* url, int url_len, Parsed* parsed) {
840 DoParseMailtoURL(url, url_len, parsed);
841 }
842
ParsePathInternal(const char * spec,const Component & path,Component * filepath,Component * query,Component * ref)843 void ParsePathInternal(const char* spec,
844 const Component& path,
845 Component* filepath,
846 Component* query,
847 Component* ref) {
848 ParsePath(spec, path, filepath, query, ref);
849 }
850
ParseAfterScheme(const char * spec,int spec_len,int after_scheme,Parsed * parsed)851 void ParseAfterScheme(const char* spec,
852 int spec_len,
853 int after_scheme,
854 Parsed* parsed) {
855 DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
856 }
857
858 } // namespace openscreen
859