• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 * Copyright 2006 Sony Computer Entertainment Inc.
3 *
4 * Licensed under the MIT Open Source License, for details please see license.txt or the website
5 * http://www.opensource.org/licenses/mit-license.php
6 *
7 */
8 
9 #include <algorithm>
10 #include <dae.h>
11 #include <dae/daeURI.h>
12 #include <ctype.h>
13 #include <dae/daeDocument.h>
14 #include <dae/daeErrorHandler.h>
15 #include <dae/daeUtils.h>
16 #include <pcrecpp.h>
17 
18 using namespace std;
19 using namespace cdom;
20 
initialize()21 void daeURI::initialize() {
22 	reset();
23 	container = NULL;
24 }
25 
~daeURI()26 daeURI::~daeURI() { }
27 
daeURI(DAE & dae)28 daeURI::daeURI(DAE& dae) : dae(&dae) {
29 	initialize();
30 }
31 
daeURI(DAE & dae,const string & uriStr,daeBool nofrag)32 daeURI::daeURI(DAE& dae, const string& uriStr, daeBool nofrag) : dae(&dae) {
33 	initialize();
34 
35 	if (nofrag) {
36 		size_t pos = uriStr.find_last_of('#');
37 		if (pos != string::npos) {
38 			set(uriStr.substr(0, pos));
39 			return;
40 		}
41 	}
42 
43 	set(uriStr);
44 }
45 
daeURI(const daeURI & baseURI,const string & uriStr)46 daeURI::daeURI(const daeURI& baseURI, const string& uriStr) : dae(baseURI.getDAE())
47 {
48 	initialize();
49 	set(uriStr, &baseURI);
50 }
51 
daeURI(const daeURI & copyFrom_)52 daeURI::daeURI(const daeURI& copyFrom_) : dae(copyFrom_.getDAE()), container(NULL)
53 {
54 	initialize();
55 	copyFrom(copyFrom_);
56 }
57 
daeURI(daeElement & container_,const std::string & uriStr)58 daeURI::daeURI(daeElement& container_, const std::string& uriStr)
59 	: dae(container_.getDAE())
60 {
61 	initialize();
62 	container = &container_;
63 	set(uriStr);
64 }
65 
daeURI(DAE & dae,daeElement & container_,const string & uriStr)66 daeURI::daeURI(DAE& dae, daeElement& container_, const string& uriStr)
67 	: dae(&dae)
68 {
69 	initialize();
70 	container = &container_;
71 	set(uriStr);
72 }
73 
74 void
copyFrom(const daeURI & copyFrom)75 daeURI::copyFrom(const daeURI& copyFrom)
76 {
77 	if (!container)
78 		container = copyFrom.container;
79 	set(copyFrom.originalStr());
80 }
81 
operator =(const daeURI & other)82 daeURI& daeURI::operator=(const daeURI& other) {
83 	copyFrom(other);
84 	return *this;
85 }
86 
operator =(const string & uriStr)87 daeURI& daeURI::operator=(const string& uriStr) {
88 	set(uriStr);
89 	return *this;
90 }
91 
reset()92 void daeURI::reset() {
93 	// Clear everything except the container, which doesn't change for the lifetime of the daeURI
94 	uriString	         = "";
95 	originalURIString	 = "";
96 	_scheme            = "";
97 	_authority	       = "";
98 	_path              = "";
99 	_query             = "";
100 	_fragment          = "";
101 }
102 
getDAE() const103 DAE* daeURI::getDAE() const {
104 	return dae;
105 }
106 
107 
str() const108 const string& daeURI::str() const {
109 	return uriString;
110 }
111 
originalStr() const112 const string& daeURI::originalStr() const {
113 	return originalURIString;
114 }
115 
getURI() const116 daeString daeURI::getURI() const {
117 	return str().c_str();
118 }
119 
getOriginalURI() const120 daeString daeURI::getOriginalURI() const {
121 	return originalStr().c_str();
122 }
123 
124 
125 namespace {
parsePath(const string & path,string & dir,string & baseName,string & extension)126 	void parsePath(const string& path,
127 	               /* out */ string& dir,
128 	               /* out */ string& baseName,
129 	               /* out */ string& extension) {
130 		// !!!steveT Currently, if we have a file name that begins with a '.', as in
131 		// ".emacs", that will be treated as having no base name with an extension
132 		// of ".emacs". We might want to change this behavior, so that the base name
133 		// is considered ".emacs" and the extension is empty. I think this is more
134 		// in line with what path parsers in other libraries/languages do, and it
135 		// more accurately reflects the intended structure of the file name.
136 
137         // The following implementation cannot handle paths like this:
138         // /tmp/se.3/file
139         //static pcrecpp::RE re("(.*/)?([^.]*)?(\\..*)?");
140 		//dir = baseName = extension = "";
141 		//re.FullMatch(path, &dir, &baseName, &extension);
142 
143         static pcrecpp::RE findDir("(.*/)?(.*)?");
144         static pcrecpp::RE findExt("([^.]*)?(\\..*)?");
145         string tmpFile;
146         dir = baseName = extension = tmpFile = "";
147         findDir.PartialMatch(path, &dir, &tmpFile);
148         findExt.PartialMatch(tmpFile, &baseName, &extension);
149 	}
150 }
151 
set(const string & uriStr_,const daeURI * baseURI)152 void daeURI::set(const string& uriStr_, const daeURI* baseURI) {
153 	// We make a copy of the uriStr so that set(originalURIString, ...) works properly.
154 	string uriStr = uriStr_;
155 	reset();
156 	originalURIString = uriStr;
157 
158 	if (!parseUriRef(uriStr, _scheme, _authority, _path, _query, _fragment)) {
159 		reset();
160 		return;
161 	}
162 
163 	validate(baseURI);
164 }
165 
set(const string & scheme_,const string & authority_,const string & path_,const string & query_,const string & fragment_,const daeURI * baseURI)166 void daeURI::set(const string& scheme_,
167                  const string& authority_,
168                  const string& path_,
169                  const string& query_,
170                  const string& fragment_,
171                  const daeURI* baseURI)
172 {
173 	set(assembleUri(scheme_, authority_, path_, query_, fragment_), baseURI);
174 }
175 
setURI(daeString _URIString,const daeURI * baseURI)176 void daeURI::setURI(daeString _URIString, const daeURI* baseURI) {
177 	string uriStr = _URIString ? _URIString : "";
178 	set(uriStr, baseURI);
179 }
180 
181 
scheme() const182 const string& daeURI::scheme() const { return _scheme; }
authority() const183 const string& daeURI::authority() const { return _authority; }
path() const184 const string& daeURI::path() const { return _path; }
query() const185 const string& daeURI::query() const { return _query; }
fragment() const186 const string& daeURI::fragment() const { return _fragment; }
id() const187 const string& daeURI::id() const { return fragment(); }
188 
189 
190 namespace {
addSlashToEnd(const string & s)191 	string addSlashToEnd(const string& s) {
192 		return (!s.empty() && s[s.length()-1] != '/')  ?  s + '/' : s;
193 	}
194 }
195 
pathComponents(string & dir,string & baseName,string & ext) const196 void daeURI::pathComponents(string& dir, string& baseName, string& ext) const {
197 	parsePath(_path, dir, baseName, ext);
198 }
199 
pathDir() const200 string daeURI::pathDir() const {
201 	string dir, base, ext;
202 	parsePath(_path, dir, base, ext);
203 	return dir;
204 }
205 
pathFileBase() const206 string daeURI::pathFileBase() const {
207 	string dir, base, ext;
208 	parsePath(_path, dir, base, ext);
209 	return base;
210 }
211 
pathExt() const212 string daeURI::pathExt() const {
213 	string dir, base, ext;
214 	parsePath(_path, dir, base, ext);
215 	return ext;
216 }
217 
pathFile() const218 string daeURI::pathFile() const {
219 	string dir, base, ext;
220 	parsePath(_path, dir, base, ext);
221 	return base + ext;
222 }
223 
path(const string & dir,const string & baseName,const string & ext)224 void daeURI::path(const string& dir, const string& baseName, const string& ext) {
225 	path(addSlashToEnd(dir) + baseName + ext);
226 }
227 
pathDir(const string & dir)228 void daeURI::pathDir(const string& dir) {
229 	string tmp, base, ext;
230 	parsePath(_path, tmp, base, ext);
231 	path(addSlashToEnd(dir), base, ext);
232 }
233 
pathFileBase(const string & baseName)234 void daeURI::pathFileBase(const string& baseName) {
235 	string dir, tmp, ext;
236 	parsePath(_path, dir, tmp, ext);
237 	path(dir, baseName, ext);
238 }
239 
pathExt(const string & ext)240 void daeURI::pathExt(const string& ext) {
241 	string dir, base, tmp;
242 	parsePath(_path, dir, base, tmp);
243 	path(dir, base, ext);
244 }
245 
pathFile(const string & file)246 void daeURI::pathFile(const string& file) {
247 	string dir, base, ext;
248 	parsePath(_path, dir, base, ext);
249 	path(dir, file, "");
250 }
251 
252 
getScheme() const253 daeString daeURI::getScheme() const { return _scheme.c_str(); }
getProtocol() const254 daeString daeURI::getProtocol() const {	return getScheme(); }
getAuthority() const255 daeString daeURI::getAuthority() const { return _authority.c_str(); }
getPath() const256 daeString daeURI::getPath() const { return _path.c_str(); }
getQuery() const257 daeString daeURI::getQuery() const { return _query.c_str(); }
getFragment() const258 daeString daeURI::getFragment() const { return _fragment.c_str(); }
getID() const259 daeString daeURI::getID() const { return getFragment(); }
getPath(daeChar * dest,daeInt size) const260 daeBool daeURI::getPath(daeChar *dest, daeInt size) const {
261 	if (int(_path.length()) < size) {
262 		strcpy(dest, _path.c_str());
263 		return true;
264 	}
265 	return false;
266 }
267 
268 
scheme(const string & scheme_)269 void daeURI::scheme(const string& scheme_) { set(scheme_, _authority, _path, _query, _fragment); };
authority(const string & authority_)270 void daeURI::authority(const string& authority_) { set(_scheme, authority_, _path, _query, _fragment); }
path(const string & path_)271 void daeURI::path(const string& path_) { set(_scheme, _authority, path_, _query, _fragment); }
query(const string & query_)272 void daeURI::query(const string& query_) { set(_scheme, _authority, _path, query_, _fragment); }
fragment(const string & fragment_)273 void daeURI::fragment(const string& fragment_) { set(_scheme, _authority, _path, _query, fragment_); }
id(const string & id)274 void daeURI::id(const string& id) { fragment(id); }
275 
276 void
print()277 daeURI::print()
278 {
279 	fprintf(stderr,"URI(%s)\n",uriString.c_str());
280 	fprintf(stderr,"scheme = %s\n",_scheme.c_str());
281 	fprintf(stderr,"authority = %s\n",_authority.c_str());
282 	fprintf(stderr,"path = %s\n",_path.c_str());
283 	fprintf(stderr,"query = %s\n",_query.c_str());
284 	fprintf(stderr,"fragment = %s\n",_fragment.c_str());
285 	fprintf(stderr,"URI without base = %s\n",originalURIString.c_str());
286 	fflush(stderr);
287 }
288 
289 namespace {
normalize(string & path)290 	void normalize(string& path) {
291 		daeURI::normalizeURIPath(const_cast<char*>(path.c_str()));
292 		path = path.substr(0, strlen(path.c_str()));
293 	}
294 }
295 
296 void
validate(const daeURI * baseURI)297 daeURI::validate(const daeURI* baseURI)
298 {
299 	// If no base URI was supplied, use the container's document URI. If there's
300 	// no container or the container doesn't have a doc URI, use the application
301 	// base URI.
302 	if (!baseURI) {
303 		if (container)
304         {
305             if (container->getDocument())
306             {
307                 if (container->getDocument()->isZAERootDocument())
308                     baseURI = &container->getDocument()->getExtractedFileURI();
309                 else
310                     baseURI = container->getDocumentURI();
311             }
312         }
313         if (!baseURI)
314             baseURI = &dae->getBaseURI();
315 		if (this == baseURI)
316 			return;
317 	}
318 
319 	// This is rewritten according to the updated rfc 3986
320 	if (!_scheme.empty()) // if defined(R.scheme) then
321 	{
322 		// Everything stays the same except path which we normalize
323 		// T.scheme    = R.scheme;
324 		// T.authority = R.authority;
325 		// T.path      = remove_dot_segments(R.path);
326 		// T.query     = R.query;
327 		normalize(_path);
328 	}
329 	else
330 	{
331 		if (!_authority.empty()) // if defined(R.authority) then
332 		{
333 			// Authority and query stay the same, path is normalized
334 			// T.authority = R.authority;
335 			// T.path      = remove_dot_segments(R.path);
336 			// T.query     = R.query;
337 			normalize(_path);
338 		}
339 		else
340 		{
341 			if (_path.empty())  // if (R.path == "") then
342 			{
343 				// T.path = Base.path;
344 				_path = baseURI->_path;
345 
346 				//if defined(R.query) then
347 				//   T.query = R.query;
348 				//else
349 				//   T.query = Base.query;
350 				//endif;
351 				if (_query.empty())
352 					_query = baseURI->_query;
353 			}
354 			else
355 			{
356 				if (_path[0] == '/')  // if (R.path starts-with "/") then
357 				{
358 					// T.path = remove_dot_segments(R.path);
359 					normalize(_path);
360 				}
361 				else
362 				{
363 					// T.path = merge(Base.path, R.path);
364 					if (!baseURI->_authority.empty() && baseURI->_path.empty()) // authority defined, path empty
365 						_path.insert(0, "/");
366 					else {
367 						string dir, baseName, ext;
368 						parsePath(baseURI->_path, dir, baseName, ext);
369 						_path = dir + _path;
370 					}
371 					// T.path = remove_dot_segments(T.path);
372 					normalize(_path);
373 				}
374 				// T.query = R.query;
375 			}
376 			// T.authority = Base.authority;
377 			_authority = baseURI->_authority;
378 		}
379 		// T.scheme = Base.scheme;
380 		_scheme = baseURI->_scheme;
381 	}
382 	// T.fragment = R.fragment;
383 
384 	// Reassemble all this into a string version of the URI
385 	uriString = assembleUri(_scheme, _authority, _path, _query, _fragment);
386 }
387 
getElement() const388 daeElementRef daeURI::getElement() const {
389 	return internalResolveElement();
390 }
391 
internalResolveElement() const392 daeElement* daeURI::internalResolveElement() const {
393 	if (uriString.empty())
394 		return NULL;
395 
396 	return dae->getURIResolvers().resolveElement(*this);
397 }
398 
resolveElement()399 void daeURI::resolveElement() { }
400 
setContainer(daeElement * cont)401 void daeURI::setContainer(daeElement* cont) {
402 	container = cont;
403 	// Since we have a new container element, the base URI may have changed. Re-resolve.
404 	set(originalURIString);
405 }
406 
isExternalReference() const407 daeBool daeURI::isExternalReference() const {
408 	if (uriString.empty())
409 		return false;
410 
411 	if (container && container->getDocumentURI()) {
412 		daeURI* docURI = container->getDocumentURI();
413 		if (_path != docURI->_path ||
414 		    _scheme != docURI->_scheme ||
415 		    _authority != docURI->_authority) {
416 			return true;
417 		}
418 	}
419 
420 	return false;
421 }
422 
423 
getReferencedDocument() const424 daeDocument* daeURI::getReferencedDocument() const {
425 	string doc = assembleUri(_scheme, _authority, _path, "", "");
426 	return dae->getDatabase()->getDocument(doc.c_str(), true);
427 }
428 
getState() const429 daeURI::ResolveState daeURI::getState() const {
430 	return uriString.empty() ? uri_empty : uri_loaded;
431 }
432 
setState(ResolveState newState)433 void daeURI::setState(ResolveState newState) { }
434 
435 
436 // This code is loosely based on the RFC 2396 normalization code from
437 // libXML. Specifically it does the RFC steps 6.c->6.g from section 5.2
438 // The path is modified in place, there is no error return.
normalizeURIPath(char * path)439 void daeURI::normalizeURIPath(char* path)
440 {
441 	char *cur, // location we are currently processing
442 	     *out; // Everything from this back we are done with
443 
444 	// Return if the path pointer is null
445 
446 	if (path == NULL) return;
447 
448 	// Skip any initial / characters to get us to the start of the first segment
449 
450 	for(cur=path; *cur == '/'; cur++);
451 
452 	// Return if we hit the end of the string
453 
454 	if (*cur == 0) return;
455 
456 	// Keep everything we've seen so far.
457 
458 	out = cur;
459 
460 	// Analyze each segment in sequence for cases (c) and (d).
461 
462 	while (*cur != 0)
463 	{
464 		// (c) All occurrences of "./", where "." is a complete path segment, are removed from the buffer string.
465 
466 		if ((*cur == '.') && (*(cur+1) == '/'))
467 		{
468 			cur += 2;
469 			// If there were multiple slashes, skip them too
470 			while (*cur == '/') cur++;
471 			continue;
472 		}
473 
474 		// (d) If the buffer string ends with "." as a complete path segment, that "." is removed.
475 
476 		if ((*cur == '.') && (*(cur+1) == 0))
477 			break;
478 
479 		// If we passed the above tests copy the segment to the output side
480 
481 		while (*cur != '/' && *cur != 0)
482 		{
483 			*(out++) = *(cur++);
484 		}
485 
486 		if(*cur != 0)
487 		{
488 			// Skip any occurrances of // at the end of the segment
489 
490 			while ((*cur == '/') && (*(cur+1) == '/')) cur++;
491 
492 			// Bring the last character in the segment (/ or a null terminator) into the output
493 
494 			*(out++) = *(cur++);
495 		}
496 	}
497 
498 	*out = 0;
499 
500     // Restart at the beginning of the first segment for the next part
501 
502 	for(cur=path; *cur == '/'; cur++);
503 	if (*cur == 0) return;
504 
505 	// Analyze each segment in sequence for cases (e) and (f).
506 	//
507 	// e) All occurrences of "<segment>/../", where <segment> is a
508 	//    complete path segment not equal to "..", are removed from the
509 	//    buffer string.  Removal of these path segments is performed
510 	//    iteratively, removing the leftmost matching pattern on each
511 	//    iteration, until no matching pattern remains.
512 	//
513 	// f) If the buffer string ends with "<segment>/..", where <segment>
514 	//    is a complete path segment not equal to "..", that
515 	//    "<segment>/.." is removed.
516 	//
517 	// To satisfy the "iterative" clause in (e), we need to collapse the
518 	// string every time we find something that needs to be removed.  Thus,
519 	// we don't need to keep two pointers into the string: we only need a
520 	// "current position" pointer.
521 	//
522 	while (true)
523 	{
524 		char *segp, *tmp;
525 
526 		// At the beginning of each iteration of this loop, "cur" points to
527 		// the first character of the segment we want to examine.
528 
529 		// Find the end of the current segment.
530 
531 		for(segp = cur;(*segp != '/') && (*segp != 0); ++segp);
532 
533 		// If this is the last segment, we're done (we need at least two
534 		// segments to meet the criteria for the (e) and (f) cases).
535 
536 		if (*segp == 0)
537 			break;
538 
539 		// If the first segment is "..", or if the next segment _isn't_ "..",
540 		// keep this segment and try the next one.
541 
542 		++segp;
543 		if (((*cur == '.') && (cur[1] == '.') && (segp == cur+3))
544             || ((*segp != '.') || (segp[1] != '.')
545             || ((segp[2] != '/') && (segp[2] != 0))))
546 		{
547 			cur = segp;
548 			continue;
549 		}
550 
551 		// If we get here, remove this segment and the next one and back up
552 		// to the previous segment (if there is one), to implement the
553 		// "iteratively" clause.  It's pretty much impossible to back up
554 		// while maintaining two pointers into the buffer, so just compact
555 		// the whole buffer now.
556 
557 		// If this is the end of the buffer, we're done.
558 
559 		if (segp[2] == 0)
560 		{
561 			*cur = 0;
562 			break;
563 		}
564 
565 		// Strings overlap during this copy, but not in a bad way, just avoid using strcpy
566 
567 		tmp = cur;
568 		segp += 3;
569 		while ((*(tmp++) = *(segp++)) != 0);
570 
571 		// If there are no previous segments, then keep going from here.
572 
573 		segp = cur;
574 		while ((segp > path) && (*(--segp) == '/'));
575 
576 		if (segp == path)
577 			continue;
578 
579 		// "segp" is pointing to the end of a previous segment; find it's
580 		// start.  We need to back up to the previous segment and start
581 		// over with that to handle things like "foo/bar/../..".  If we
582 		// don't do this, then on the first pass we'll remove the "bar/..",
583 		// but be pointing at the second ".." so we won't realize we can also
584 		// remove the "foo/..".
585 
586 		for(cur = segp;(cur > path) && (*(cur-1) != '/'); cur--);
587 	}
588 
589 	*out = 0;
590 
591 	// g) If the resulting buffer string still begins with one or more
592 	//    complete path segments of "..", then the reference is
593 	//    considered to be in error. Implementations may handle this
594 	//    error by retaining these components in the resolved path (i.e.,
595 	//    treating them as part of the final URI), by removing them from
596 	//    the resolved path (i.e., discarding relative levels above the
597 	//    root), or by avoiding traversal of the reference.
598 	//
599 	// We discard them from the final path.
600 
601 	if (*path == '/')
602 	{
603 		for(cur=path; (*cur == '/') && (cur[1] == '.') && (cur[2] == '.') && ((cur[3] == '/') || (cur[3] == 0)); cur += 3);
604 
605 		if (cur != path)
606 		{
607 			for(out=path; *cur != 0; *(out++) = *(cur++));
608 
609 			*out = 0;
610 		}
611 	}
612 	return;
613 }
614 
615 // This function will take a resolved URI and create a version of it that is relative to
616 // another existing URI.  The new URI is stored in the "originalURI"
makeRelativeTo(const daeURI * relativeToURI)617 int daeURI::makeRelativeTo(const daeURI* relativeToURI)
618 {
619 	// Can only do this function if both URIs have the same scheme and authority
620 	if (_scheme != relativeToURI->_scheme  ||  _authority != relativeToURI->_authority)
621 		return DAE_ERR_INVALID_CALL;
622 
623 	// advance till we find a segment that doesn't match
624 	const char *this_path        = getPath();
625 	const char *relativeTo_path  = relativeToURI->getPath();
626 	const char *this_slash       = this_path;
627 	const char *relativeTo_slash = relativeTo_path;
628 
629 	while((*this_path == *relativeTo_path) && *this_path)
630 	{
631 		if(*this_path == '/')
632 		{
633 			this_slash = this_path;
634 			relativeTo_slash = relativeTo_path;
635 		}
636 		this_path++;
637 		relativeTo_path++;
638 	}
639 
640 	// Decide how many ../ segments are needed (Filepath should always end in a /)
641 	int segment_count = 0;
642 	relativeTo_slash++;
643 	while(*relativeTo_slash != 0)
644 	{
645 		if(*relativeTo_slash == '/')
646 			segment_count ++;
647 		relativeTo_slash++;
648 	}
649 	this_slash++;
650 
651 	string newPath;
652 	for (int i = 0; i < segment_count; i++)
653 		newPath += "../";
654 	newPath += this_slash;
655 
656 	set("", "", newPath, _query, _fragment, relativeToURI);
657 	return(DAE_OK);
658 }
659 
660 
661 daeBool daeURIResolver::_loadExternalDocuments = true;
662 
daeURIResolver(DAE & dae)663 daeURIResolver::daeURIResolver(DAE& dae) : dae(&dae) { }
664 
~daeURIResolver()665 daeURIResolver::~daeURIResolver() { }
666 
setAutoLoadExternalDocuments(daeBool load)667 void daeURIResolver::setAutoLoadExternalDocuments( daeBool load )
668 {
669 	_loadExternalDocuments = load;
670 }
671 
getAutoLoadExternalDocuments()672 daeBool daeURIResolver::getAutoLoadExternalDocuments()
673 {
674 	return _loadExternalDocuments;
675 }
676 
677 
daeURIResolverList()678 daeURIResolverList::daeURIResolverList() { }
679 
~daeURIResolverList()680 daeURIResolverList::~daeURIResolverList() {
681 	for (size_t i = 0; i < resolvers.getCount(); i++)
682 		delete resolvers[i];
683 }
684 
list()685 daeTArray<daeURIResolver*>& daeURIResolverList::list() {
686 	return resolvers;
687 }
688 
resolveElement(const daeURI & uri)689 daeElement* daeURIResolverList::resolveElement(const daeURI& uri) {
690 	for (size_t i = 0; i < resolvers.getCount(); i++)
691 		if (daeElement* elt = resolvers[i]->resolveElement(uri))
692 			return elt;
693 	return NULL;
694 }
695 
696 
697 // Returns true if parsing succeeded, false otherwise. Parsing can fail if the uri
698 // reference isn't properly formed.
parseUriRef(const string & uriRef,string & scheme,string & authority,string & path,string & query,string & fragment)699 bool cdom::parseUriRef(const string& uriRef,
700                        string& scheme,
701                        string& authority,
702                        string& path,
703                        string& query,
704                        string& fragment) {
705 	// This regular expression for parsing URI references comes from the URI spec:
706 	//   http://tools.ietf.org/html/rfc3986#appendix-B
707 	static pcrecpp::RE re("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?");
708 	string s1, s3, s6, s8;
709 	if (re.FullMatch(uriRef, &s1, &scheme, &s3, &authority, &path, &s6, &query, &s8, &fragment))
710 		return true;
711 
712 	return false;
713 }
714 
715 namespace {
safeSubstr(const string & s,size_t offset,size_t length)716 	string safeSubstr(const string& s, size_t offset, size_t length) {
717 		string result = s.substr(offset, min(length, s.length() - offset));
718 		result.resize(length, '\0');
719 		return result;
720 	}
721 }
722 
assembleUri(const string & scheme,const string & authority,const string & path,const string & query,const string & fragment,bool forceLibxmlCompatible)723 string cdom::assembleUri(const string& scheme,
724                          const string& authority,
725                          const string& path,
726                          const string& query,
727                          const string& fragment,
728                          bool forceLibxmlCompatible) {
729 	string p = safeSubstr(path, 0, 3);
730 	bool libxmlHack = forceLibxmlCompatible && scheme == "file";
731 	bool uncPath = false;
732 	string uri;
733 
734 	if (!scheme.empty())
735 		uri += scheme + ":";
736 
737 	if (!authority.empty() || libxmlHack || (p[0] == '/' && p[1] == '/'))
738 		uri += "//";
739 	if (!authority.empty()) {
740 		if (libxmlHack) {
741 			// We have a UNC path URI of the form file://otherMachine/file.dae.
742 			// Convert it to file://///otherMachine/file.dae, which is how libxml
743 			// does UNC paths.
744 			uri += "///" + authority;
745 			uncPath = true;
746 		}
747 		else {
748 			uri += authority;
749 		}
750 	}
751 
752 	if (!uncPath && libxmlHack && getSystemType() == Windows) {
753 		// We have to be delicate in how we pass absolute path URIs to libxml on Windows.
754 		// If the path is an absolute path with no drive letter, add an extra slash to
755 		// appease libxml.
756 		if (p[0] == '/' && p[1] != '/' && p[2] != ':') {
757 			uri += "/";
758 		}
759 	}
760 	uri += path;
761 
762 	if (!query.empty())
763 		uri += "?" + query;
764 	if (!fragment.empty())
765 		uri += "#" + fragment;
766 
767 	return uri;
768 }
769 
fixUriForLibxml(const string & uriRef)770 string cdom::fixUriForLibxml(const string& uriRef) {
771 	string scheme, authority, path, query, fragment;
772 	cdom::parseUriRef(uriRef, scheme, authority, path, query, fragment);
773 	return assembleUri(scheme, authority, path, query, fragment, true);
774 }
775 
776 
nativePathToUri(const string & nativePath,systemType type)777 string cdom::nativePathToUri(const string& nativePath, systemType type) {
778 	string uri = nativePath;
779 
780 	if (type == Windows) {
781 		// Convert "c:\" to "/c:/"
782 		if (uri.length() >= 2  &&  isalpha(uri[0])  &&  uri[1] == ':')
783 			uri.insert(0, "/");
784 		// Convert backslashes to forward slashes
785 		uri = replace(uri, "\\", "/");
786 	}
787 
788 	// Convert spaces to %20
789 	uri = replace(uri, " ", "%20");
790 
791 	return uri;
792 }
793 
filePathToUri(const string & filePath)794 string cdom::filePathToUri(const string& filePath) {
795 	return nativePathToUri(filePath);
796 }
797 
uriToNativePath(const string & uriRef,systemType type)798 string cdom::uriToNativePath(const string& uriRef, systemType type) {
799 	string scheme, authority, path, query, fragment;
800 	parseUriRef(uriRef, scheme, authority, path, query, fragment);
801 
802 	// Make sure we have a file scheme URI, or that it doesn't have a scheme
803 	if (!scheme.empty()  &&  scheme != "file")
804 		return "";
805 
806 	string filePath;
807 
808 	if (type == Windows) {
809 		if (!authority.empty())
810 			filePath += string("\\\\") + authority; // UNC path
811 
812 		// Replace two leading slashes with one leading slash, so that
813 		// ///otherComputer/file.dae becomes //otherComputer/file.dae and
814 		// //folder/file.dae becomes /folder/file.dae
815 		if (path.length() >= 2  &&  path[0] == '/'  &&  path[1] == '/')
816 			path.erase(0, 1);
817 
818 		// Convert "/C:/" to "C:/"
819 		if (path.length() >= 3  &&  path[0] == '/'  &&  path[2] == ':')
820 			path.erase(0, 1);
821 
822 		// Convert forward slashes to back slashes
823 		path = replace(path, "/", "\\");
824 	}
825 
826 	filePath += path;
827 
828 	// Replace %20 with space
829 	filePath = replace(filePath, "%20", " ");
830 
831 	return filePath;
832 }
833 
uriToFilePath(const string & uriRef)834 string cdom::uriToFilePath(const string& uriRef) {
835 	return uriToNativePath(uriRef);
836 }
837