/* * Copyright 2006 Sony Computer Entertainment Inc. * * Licensed under the MIT Open Source License, for details please see license.txt or the website * http://www.opensource.org/licenses/mit-license.php * */ #include #include #include #include #include #include #include #include using namespace std; using namespace cdom; void daeURI::initialize() { reset(); container = NULL; } daeURI::~daeURI() { } daeURI::daeURI(DAE& dae) : dae(&dae) { initialize(); } daeURI::daeURI(DAE& dae, const string& uriStr, daeBool nofrag) : dae(&dae) { initialize(); if (nofrag) { size_t pos = uriStr.find_last_of('#'); if (pos != string::npos) { set(uriStr.substr(0, pos)); return; } } set(uriStr); } daeURI::daeURI(const daeURI& baseURI, const string& uriStr) : dae(baseURI.getDAE()) { initialize(); set(uriStr, &baseURI); } daeURI::daeURI(const daeURI& copyFrom_) : dae(copyFrom_.getDAE()), container(NULL) { initialize(); copyFrom(copyFrom_); } daeURI::daeURI(daeElement& container_, const std::string& uriStr) : dae(container_.getDAE()) { initialize(); container = &container_; set(uriStr); } daeURI::daeURI(DAE& dae, daeElement& container_, const string& uriStr) : dae(&dae) { initialize(); container = &container_; set(uriStr); } void daeURI::copyFrom(const daeURI& copyFrom) { if (!container) container = copyFrom.container; set(copyFrom.originalStr()); } daeURI& daeURI::operator=(const daeURI& other) { copyFrom(other); return *this; } daeURI& daeURI::operator=(const string& uriStr) { set(uriStr); return *this; } void daeURI::reset() { // Clear everything except the container, which doesn't change for the lifetime of the daeURI uriString = ""; originalURIString = ""; _scheme = ""; _authority = ""; _path = ""; _query = ""; _fragment = ""; } DAE* daeURI::getDAE() const { return dae; } const string& daeURI::str() const { return uriString; } const string& daeURI::originalStr() const { return originalURIString; } daeString daeURI::getURI() const { return str().c_str(); } daeString daeURI::getOriginalURI() const { return originalStr().c_str(); } namespace { void parsePath(const string& path, /* out */ string& dir, /* out */ string& baseName, /* out */ string& extension) { // !!!steveT Currently, if we have a file name that begins with a '.', as in // ".emacs", that will be treated as having no base name with an extension // of ".emacs". We might want to change this behavior, so that the base name // is considered ".emacs" and the extension is empty. I think this is more // in line with what path parsers in other libraries/languages do, and it // more accurately reflects the intended structure of the file name. // The following implementation cannot handle paths like this: // /tmp/se.3/file //static pcrecpp::RE re("(.*/)?([^.]*)?(\\..*)?"); //dir = baseName = extension = ""; //re.FullMatch(path, &dir, &baseName, &extension); static pcrecpp::RE findDir("(.*/)?(.*)?"); static pcrecpp::RE findExt("([^.]*)?(\\..*)?"); string tmpFile; dir = baseName = extension = tmpFile = ""; findDir.PartialMatch(path, &dir, &tmpFile); findExt.PartialMatch(tmpFile, &baseName, &extension); } } void daeURI::set(const string& uriStr_, const daeURI* baseURI) { // We make a copy of the uriStr so that set(originalURIString, ...) works properly. string uriStr = uriStr_; reset(); originalURIString = uriStr; if (!parseUriRef(uriStr, _scheme, _authority, _path, _query, _fragment)) { reset(); return; } validate(baseURI); } void daeURI::set(const string& scheme_, const string& authority_, const string& path_, const string& query_, const string& fragment_, const daeURI* baseURI) { set(assembleUri(scheme_, authority_, path_, query_, fragment_), baseURI); } void daeURI::setURI(daeString _URIString, const daeURI* baseURI) { string uriStr = _URIString ? _URIString : ""; set(uriStr, baseURI); } const string& daeURI::scheme() const { return _scheme; } const string& daeURI::authority() const { return _authority; } const string& daeURI::path() const { return _path; } const string& daeURI::query() const { return _query; } const string& daeURI::fragment() const { return _fragment; } const string& daeURI::id() const { return fragment(); } namespace { string addSlashToEnd(const string& s) { return (!s.empty() && s[s.length()-1] != '/') ? s + '/' : s; } } void daeURI::pathComponents(string& dir, string& baseName, string& ext) const { parsePath(_path, dir, baseName, ext); } string daeURI::pathDir() const { string dir, base, ext; parsePath(_path, dir, base, ext); return dir; } string daeURI::pathFileBase() const { string dir, base, ext; parsePath(_path, dir, base, ext); return base; } string daeURI::pathExt() const { string dir, base, ext; parsePath(_path, dir, base, ext); return ext; } string daeURI::pathFile() const { string dir, base, ext; parsePath(_path, dir, base, ext); return base + ext; } void daeURI::path(const string& dir, const string& baseName, const string& ext) { path(addSlashToEnd(dir) + baseName + ext); } void daeURI::pathDir(const string& dir) { string tmp, base, ext; parsePath(_path, tmp, base, ext); path(addSlashToEnd(dir), base, ext); } void daeURI::pathFileBase(const string& baseName) { string dir, tmp, ext; parsePath(_path, dir, tmp, ext); path(dir, baseName, ext); } void daeURI::pathExt(const string& ext) { string dir, base, tmp; parsePath(_path, dir, base, tmp); path(dir, base, ext); } void daeURI::pathFile(const string& file) { string dir, base, ext; parsePath(_path, dir, base, ext); path(dir, file, ""); } daeString daeURI::getScheme() const { return _scheme.c_str(); } daeString daeURI::getProtocol() const { return getScheme(); } daeString daeURI::getAuthority() const { return _authority.c_str(); } daeString daeURI::getPath() const { return _path.c_str(); } daeString daeURI::getQuery() const { return _query.c_str(); } daeString daeURI::getFragment() const { return _fragment.c_str(); } daeString daeURI::getID() const { return getFragment(); } daeBool daeURI::getPath(daeChar *dest, daeInt size) const { if (int(_path.length()) < size) { strcpy(dest, _path.c_str()); return true; } return false; } void daeURI::scheme(const string& scheme_) { set(scheme_, _authority, _path, _query, _fragment); }; void daeURI::authority(const string& authority_) { set(_scheme, authority_, _path, _query, _fragment); } void daeURI::path(const string& path_) { set(_scheme, _authority, path_, _query, _fragment); } void daeURI::query(const string& query_) { set(_scheme, _authority, _path, query_, _fragment); } void daeURI::fragment(const string& fragment_) { set(_scheme, _authority, _path, _query, fragment_); } void daeURI::id(const string& id) { fragment(id); } void daeURI::print() { fprintf(stderr,"URI(%s)\n",uriString.c_str()); fprintf(stderr,"scheme = %s\n",_scheme.c_str()); fprintf(stderr,"authority = %s\n",_authority.c_str()); fprintf(stderr,"path = %s\n",_path.c_str()); fprintf(stderr,"query = %s\n",_query.c_str()); fprintf(stderr,"fragment = %s\n",_fragment.c_str()); fprintf(stderr,"URI without base = %s\n",originalURIString.c_str()); fflush(stderr); } namespace { void normalize(string& path) { daeURI::normalizeURIPath(const_cast(path.c_str())); path = path.substr(0, strlen(path.c_str())); } } void daeURI::validate(const daeURI* baseURI) { // If no base URI was supplied, use the container's document URI. If there's // no container or the container doesn't have a doc URI, use the application // base URI. if (!baseURI) { if (container) { if (container->getDocument()) { if (container->getDocument()->isZAERootDocument()) baseURI = &container->getDocument()->getExtractedFileURI(); else baseURI = container->getDocumentURI(); } } if (!baseURI) baseURI = &dae->getBaseURI(); if (this == baseURI) return; } // This is rewritten according to the updated rfc 3986 if (!_scheme.empty()) // if defined(R.scheme) then { // Everything stays the same except path which we normalize // T.scheme = R.scheme; // T.authority = R.authority; // T.path = remove_dot_segments(R.path); // T.query = R.query; normalize(_path); } else { if (!_authority.empty()) // if defined(R.authority) then { // Authority and query stay the same, path is normalized // T.authority = R.authority; // T.path = remove_dot_segments(R.path); // T.query = R.query; normalize(_path); } else { if (_path.empty()) // if (R.path == "") then { // T.path = Base.path; _path = baseURI->_path; //if defined(R.query) then // T.query = R.query; //else // T.query = Base.query; //endif; if (_query.empty()) _query = baseURI->_query; } else { if (_path[0] == '/') // if (R.path starts-with "/") then { // T.path = remove_dot_segments(R.path); normalize(_path); } else { // T.path = merge(Base.path, R.path); if (!baseURI->_authority.empty() && baseURI->_path.empty()) // authority defined, path empty _path.insert(0, "/"); else { string dir, baseName, ext; parsePath(baseURI->_path, dir, baseName, ext); _path = dir + _path; } // T.path = remove_dot_segments(T.path); normalize(_path); } // T.query = R.query; } // T.authority = Base.authority; _authority = baseURI->_authority; } // T.scheme = Base.scheme; _scheme = baseURI->_scheme; } // T.fragment = R.fragment; // Reassemble all this into a string version of the URI uriString = assembleUri(_scheme, _authority, _path, _query, _fragment); } daeElementRef daeURI::getElement() const { return internalResolveElement(); } daeElement* daeURI::internalResolveElement() const { if (uriString.empty()) return NULL; return dae->getURIResolvers().resolveElement(*this); } void daeURI::resolveElement() { } void daeURI::setContainer(daeElement* cont) { container = cont; // Since we have a new container element, the base URI may have changed. Re-resolve. set(originalURIString); } daeBool daeURI::isExternalReference() const { if (uriString.empty()) return false; if (container && container->getDocumentURI()) { daeURI* docURI = container->getDocumentURI(); if (_path != docURI->_path || _scheme != docURI->_scheme || _authority != docURI->_authority) { return true; } } return false; } daeDocument* daeURI::getReferencedDocument() const { string doc = assembleUri(_scheme, _authority, _path, "", ""); return dae->getDatabase()->getDocument(doc.c_str(), true); } daeURI::ResolveState daeURI::getState() const { return uriString.empty() ? uri_empty : uri_loaded; } void daeURI::setState(ResolveState newState) { } // This code is loosely based on the RFC 2396 normalization code from // libXML. Specifically it does the RFC steps 6.c->6.g from section 5.2 // The path is modified in place, there is no error return. void daeURI::normalizeURIPath(char* path) { char *cur, // location we are currently processing *out; // Everything from this back we are done with // Return if the path pointer is null if (path == NULL) return; // Skip any initial / characters to get us to the start of the first segment for(cur=path; *cur == '/'; cur++); // Return if we hit the end of the string if (*cur == 0) return; // Keep everything we've seen so far. out = cur; // Analyze each segment in sequence for cases (c) and (d). while (*cur != 0) { // (c) All occurrences of "./", where "." is a complete path segment, are removed from the buffer string. if ((*cur == '.') && (*(cur+1) == '/')) { cur += 2; // If there were multiple slashes, skip them too while (*cur == '/') cur++; continue; } // (d) If the buffer string ends with "." as a complete path segment, that "." is removed. if ((*cur == '.') && (*(cur+1) == 0)) break; // If we passed the above tests copy the segment to the output side while (*cur != '/' && *cur != 0) { *(out++) = *(cur++); } if(*cur != 0) { // Skip any occurrances of // at the end of the segment while ((*cur == '/') && (*(cur+1) == '/')) cur++; // Bring the last character in the segment (/ or a null terminator) into the output *(out++) = *(cur++); } } *out = 0; // Restart at the beginning of the first segment for the next part for(cur=path; *cur == '/'; cur++); if (*cur == 0) return; // Analyze each segment in sequence for cases (e) and (f). // // e) All occurrences of "/../", where is a // complete path segment not equal to "..", are removed from the // buffer string. Removal of these path segments is performed // iteratively, removing the leftmost matching pattern on each // iteration, until no matching pattern remains. // // f) If the buffer string ends with "/..", where // is a complete path segment not equal to "..", that // "/.." is removed. // // To satisfy the "iterative" clause in (e), we need to collapse the // string every time we find something that needs to be removed. Thus, // we don't need to keep two pointers into the string: we only need a // "current position" pointer. // while (true) { char *segp, *tmp; // At the beginning of each iteration of this loop, "cur" points to // the first character of the segment we want to examine. // Find the end of the current segment. for(segp = cur;(*segp != '/') && (*segp != 0); ++segp); // If this is the last segment, we're done (we need at least two // segments to meet the criteria for the (e) and (f) cases). if (*segp == 0) break; // If the first segment is "..", or if the next segment _isn't_ "..", // keep this segment and try the next one. ++segp; if (((*cur == '.') && (cur[1] == '.') && (segp == cur+3)) || ((*segp != '.') || (segp[1] != '.') || ((segp[2] != '/') && (segp[2] != 0)))) { cur = segp; continue; } // If we get here, remove this segment and the next one and back up // to the previous segment (if there is one), to implement the // "iteratively" clause. It's pretty much impossible to back up // while maintaining two pointers into the buffer, so just compact // the whole buffer now. // If this is the end of the buffer, we're done. if (segp[2] == 0) { *cur = 0; break; } // Strings overlap during this copy, but not in a bad way, just avoid using strcpy tmp = cur; segp += 3; while ((*(tmp++) = *(segp++)) != 0); // If there are no previous segments, then keep going from here. segp = cur; while ((segp > path) && (*(--segp) == '/')); if (segp == path) continue; // "segp" is pointing to the end of a previous segment; find it's // start. We need to back up to the previous segment and start // over with that to handle things like "foo/bar/../..". If we // don't do this, then on the first pass we'll remove the "bar/..", // but be pointing at the second ".." so we won't realize we can also // remove the "foo/..". for(cur = segp;(cur > path) && (*(cur-1) != '/'); cur--); } *out = 0; // g) If the resulting buffer string still begins with one or more // complete path segments of "..", then the reference is // considered to be in error. Implementations may handle this // error by retaining these components in the resolved path (i.e., // treating them as part of the final URI), by removing them from // the resolved path (i.e., discarding relative levels above the // root), or by avoiding traversal of the reference. // // We discard them from the final path. if (*path == '/') { for(cur=path; (*cur == '/') && (cur[1] == '.') && (cur[2] == '.') && ((cur[3] == '/') || (cur[3] == 0)); cur += 3); if (cur != path) { for(out=path; *cur != 0; *(out++) = *(cur++)); *out = 0; } } return; } // This function will take a resolved URI and create a version of it that is relative to // another existing URI. The new URI is stored in the "originalURI" int daeURI::makeRelativeTo(const daeURI* relativeToURI) { // Can only do this function if both URIs have the same scheme and authority if (_scheme != relativeToURI->_scheme || _authority != relativeToURI->_authority) return DAE_ERR_INVALID_CALL; // advance till we find a segment that doesn't match const char *this_path = getPath(); const char *relativeTo_path = relativeToURI->getPath(); const char *this_slash = this_path; const char *relativeTo_slash = relativeTo_path; while((*this_path == *relativeTo_path) && *this_path) { if(*this_path == '/') { this_slash = this_path; relativeTo_slash = relativeTo_path; } this_path++; relativeTo_path++; } // Decide how many ../ segments are needed (Filepath should always end in a /) int segment_count = 0; relativeTo_slash++; while(*relativeTo_slash != 0) { if(*relativeTo_slash == '/') segment_count ++; relativeTo_slash++; } this_slash++; string newPath; for (int i = 0; i < segment_count; i++) newPath += "../"; newPath += this_slash; set("", "", newPath, _query, _fragment, relativeToURI); return(DAE_OK); } daeBool daeURIResolver::_loadExternalDocuments = true; daeURIResolver::daeURIResolver(DAE& dae) : dae(&dae) { } daeURIResolver::~daeURIResolver() { } void daeURIResolver::setAutoLoadExternalDocuments( daeBool load ) { _loadExternalDocuments = load; } daeBool daeURIResolver::getAutoLoadExternalDocuments() { return _loadExternalDocuments; } daeURIResolverList::daeURIResolverList() { } daeURIResolverList::~daeURIResolverList() { for (size_t i = 0; i < resolvers.getCount(); i++) delete resolvers[i]; } daeTArray& daeURIResolverList::list() { return resolvers; } daeElement* daeURIResolverList::resolveElement(const daeURI& uri) { for (size_t i = 0; i < resolvers.getCount(); i++) if (daeElement* elt = resolvers[i]->resolveElement(uri)) return elt; return NULL; } // Returns true if parsing succeeded, false otherwise. Parsing can fail if the uri // reference isn't properly formed. bool cdom::parseUriRef(const string& uriRef, string& scheme, string& authority, string& path, string& query, string& fragment) { // This regular expression for parsing URI references comes from the URI spec: // http://tools.ietf.org/html/rfc3986#appendix-B static pcrecpp::RE re("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"); string s1, s3, s6, s8; if (re.FullMatch(uriRef, &s1, &scheme, &s3, &authority, &path, &s6, &query, &s8, &fragment)) return true; return false; } namespace { string safeSubstr(const string& s, size_t offset, size_t length) { string result = s.substr(offset, min(length, s.length() - offset)); result.resize(length, '\0'); return result; } } string cdom::assembleUri(const string& scheme, const string& authority, const string& path, const string& query, const string& fragment, bool forceLibxmlCompatible) { string p = safeSubstr(path, 0, 3); bool libxmlHack = forceLibxmlCompatible && scheme == "file"; bool uncPath = false; string uri; if (!scheme.empty()) uri += scheme + ":"; if (!authority.empty() || libxmlHack || (p[0] == '/' && p[1] == '/')) uri += "//"; if (!authority.empty()) { if (libxmlHack) { // We have a UNC path URI of the form file://otherMachine/file.dae. // Convert it to file://///otherMachine/file.dae, which is how libxml // does UNC paths. uri += "///" + authority; uncPath = true; } else { uri += authority; } } if (!uncPath && libxmlHack && getSystemType() == Windows) { // We have to be delicate in how we pass absolute path URIs to libxml on Windows. // If the path is an absolute path with no drive letter, add an extra slash to // appease libxml. if (p[0] == '/' && p[1] != '/' && p[2] != ':') { uri += "/"; } } uri += path; if (!query.empty()) uri += "?" + query; if (!fragment.empty()) uri += "#" + fragment; return uri; } string cdom::fixUriForLibxml(const string& uriRef) { string scheme, authority, path, query, fragment; cdom::parseUriRef(uriRef, scheme, authority, path, query, fragment); return assembleUri(scheme, authority, path, query, fragment, true); } string cdom::nativePathToUri(const string& nativePath, systemType type) { string uri = nativePath; if (type == Windows) { // Convert "c:\" to "/c:/" if (uri.length() >= 2 && isalpha(uri[0]) && uri[1] == ':') uri.insert(0, "/"); // Convert backslashes to forward slashes uri = replace(uri, "\\", "/"); } // Convert spaces to %20 uri = replace(uri, " ", "%20"); return uri; } string cdom::filePathToUri(const string& filePath) { return nativePathToUri(filePath); } string cdom::uriToNativePath(const string& uriRef, systemType type) { string scheme, authority, path, query, fragment; parseUriRef(uriRef, scheme, authority, path, query, fragment); // Make sure we have a file scheme URI, or that it doesn't have a scheme if (!scheme.empty() && scheme != "file") return ""; string filePath; if (type == Windows) { if (!authority.empty()) filePath += string("\\\\") + authority; // UNC path // Replace two leading slashes with one leading slash, so that // ///otherComputer/file.dae becomes //otherComputer/file.dae and // //folder/file.dae becomes /folder/file.dae if (path.length() >= 2 && path[0] == '/' && path[1] == '/') path.erase(0, 1); // Convert "/C:/" to "C:/" if (path.length() >= 3 && path[0] == '/' && path[2] == ':') path.erase(0, 1); // Convert forward slashes to back slashes path = replace(path, "/", "\\"); } filePath += path; // Replace %20 with space filePath = replace(filePath, "%20", " "); return filePath; } string cdom::uriToFilePath(const string& uriRef) { return uriToNativePath(uriRef); }