• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3  *           (C) 1999 Antti Koivisto (koivisto@kde.org)
4  *           (C) 2001 Dirk Mueller (mueller@kde.org)
5  *           (C) 2006 Alexey Proskuryakov (ap@webkit.org)
6  * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Library General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Library General Public License for more details.
17  *
18  * You should have received a copy of the GNU Library General Public License
19  * along with this library; see the file COPYING.LIB.  If not, write to
20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21  * Boston, MA 02110-1301, USA.
22  */
23 
24 #include "config.h"
25 #include "AtomicString.h"
26 #include "KURL.h"
27 #include "LinkHash.h"
28 #include "PlatformString.h"
29 #include "StringHash.h"
30 #include "StringImpl.h"
31 
32 namespace WebCore {
33 
findSlashDotDotSlash(const UChar * characters,size_t length)34 static inline int findSlashDotDotSlash(const UChar* characters, size_t length)
35 {
36     if (length < 4)
37         return -1;
38     unsigned loopLimit = length - 3;
39     for (unsigned i = 0; i < loopLimit; ++i) {
40         if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/')
41             return i;
42     }
43     return -1;
44 }
45 
findSlashSlash(const UChar * characters,size_t length,int position)46 static inline int findSlashSlash(const UChar* characters, size_t length, int position)
47 {
48     if (length < 2)
49         return -1;
50     unsigned loopLimit = length - 1;
51     for (unsigned i = position; i < loopLimit; ++i) {
52         if (characters[i] == '/' && characters[i + 1] == '/')
53             return i;
54     }
55     return -1;
56 }
57 
findSlashDotSlash(const UChar * characters,size_t length)58 static inline int findSlashDotSlash(const UChar* characters, size_t length)
59 {
60     if (length < 3)
61         return -1;
62     unsigned loopLimit = length - 2;
63     for (unsigned i = 0; i < loopLimit; ++i) {
64         if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/')
65             return i;
66     }
67     return -1;
68 }
69 
containsColonSlashSlash(const UChar * characters,unsigned length)70 static inline bool containsColonSlashSlash(const UChar* characters, unsigned length)
71 {
72     if (length < 3)
73         return false;
74     unsigned loopLimit = length - 2;
75     for (unsigned i = 0; i < loopLimit; ++i) {
76         if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/')
77             return true;
78     }
79     return false;
80 }
81 
cleanPath(Vector<UChar,512> & path)82 static inline void cleanPath(Vector<UChar, 512>& path)
83 {
84     // FIXME: Shold not do this in the query or anchor part.
85     int pos;
86     while ((pos = findSlashDotDotSlash(path.data(), path.size())) != -1) {
87         int prev = reverseFind(path.data(), path.size(), '/', pos - 1);
88         // don't remove the host, i.e. http://foo.org/../foo.html
89         if (prev < 0 || (prev > 3 && path[prev - 2] == ':' && path[prev - 1] == '/'))
90             path.remove(pos, 3);
91         else
92             path.remove(prev, pos - prev + 3);
93     }
94 
95     // FIXME: Shold not do this in the query part.
96     // Set refPos to -2 to mean "I haven't looked for the anchor yet".
97     // We don't want to waste a function call on the search for the the anchor
98     // in the vast majority of cases where there is no "//" in the path.
99     pos = 0;
100     int refPos = -2;
101     while ((pos = findSlashSlash(path.data(), path.size(), pos)) != -1) {
102         if (refPos == -2)
103             refPos = find(path.data(), path.size(), '#');
104         if (refPos > 0 && pos >= refPos)
105             break;
106 
107         if (pos == 0 || path[pos - 1] != ':')
108             path.remove(pos);
109         else
110             pos += 2;
111     }
112 
113     // FIXME: Shold not do this in the query or anchor part.
114     while ((pos = findSlashDotSlash(path.data(), path.size())) != -1)
115         path.remove(pos, 2);
116 }
117 
118 
matchLetter(UChar c,UChar lowercaseLetter)119 static inline bool matchLetter(UChar c, UChar lowercaseLetter)
120 {
121     return (c | 0x20) == lowercaseLetter;
122 }
123 
needsTrailingSlash(const UChar * characters,unsigned length)124 static inline bool needsTrailingSlash(const UChar* characters, unsigned length)
125 {
126     if (length < 6)
127         return false;
128     if (!matchLetter(characters[0], 'h')
129             || !matchLetter(characters[1], 't')
130             || !matchLetter(characters[2], 't')
131             || !matchLetter(characters[3], 'p'))
132         return false;
133     if (!(characters[4] == ':'
134             || (matchLetter(characters[4], 's') && characters[5] == ':')))
135         return false;
136 
137     unsigned pos = characters[4] == ':' ? 5 : 6;
138 
139     // Skip initial two slashes if present.
140     if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/')
141         pos += 2;
142 
143     // Find next slash.
144     while (pos < length && characters[pos] != '/')
145         ++pos;
146 
147     return pos == length;
148 }
149 
visitedLinkHash(const UChar * url,unsigned length)150 LinkHash visitedLinkHash(const UChar* url, unsigned length)
151 {
152   return AlreadyHashed::avoidDeletedValue(StringImpl::computeHash(url, length));
153 }
154 
visitedLinkHash(const KURL & base,const AtomicString & attributeURL)155 LinkHash visitedLinkHash(const KURL& base, const AtomicString& attributeURL)
156 {
157     const UChar* characters = attributeURL.characters();
158     unsigned length = attributeURL.length();
159     if (!length)
160         return 0;
161 
162     // This is a poor man's completeURL. Faster with less memory allocation.
163     // FIXME: It's missing a lot of what completeURL does and a lot of what KURL does.
164     // For example, it does not handle international domain names properly.
165 
166     // FIXME: It is wrong that we do not do further processing on strings that have "://" in them:
167     //    1) The "://" could be in the query or anchor.
168     //    2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it.
169 
170     // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does
171     // have a query or anchor.
172 
173     bool hasColonSlashSlash = containsColonSlashSlash(characters, length);
174 
175     if (hasColonSlashSlash && !needsTrailingSlash(characters, length))
176         return visitedLinkHash(attributeURL.characters(), attributeURL.length());
177 
178     Vector<UChar, 512> buffer;
179 
180     if (hasColonSlashSlash) {
181         // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
182         // end of the path, *before* the query or anchor.
183         buffer.append(characters, length);
184         buffer.append('/');
185         return visitedLinkHash(buffer.data(), buffer.size());
186     }
187 
188     switch (characters[0]) {
189         case '/':
190             buffer.append(base.string().characters(), base.pathStart());
191             break;
192         case '#':
193             buffer.append(base.string().characters(), base.pathEnd());
194             break;
195         default:
196             buffer.append(base.string().characters(), base.pathAfterLastSlash());
197             break;
198     }
199     buffer.append(characters, length);
200     cleanPath(buffer);
201     if (needsTrailingSlash(buffer.data(), buffer.size())) {
202         // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
203         // end of the path, *before* the query or anchor.
204         buffer.append('/');
205     }
206 
207     return visitedLinkHash(buffer.data(), buffer.size());
208 }
209 
210 }  // namespace WebCore
211