1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller (mueller@kde.org)
5 * (C) 2006 Alexey Proskuryakov (ap@webkit.org)
6 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 */
23
24 #include "config.h"
25 #include "AtomicString.h"
26 #include "KURL.h"
27 #include "LinkHash.h"
28 #include "PlatformString.h"
29 #include "StringHash.h"
30 #include "StringImpl.h"
31
32 namespace WebCore {
33
findSlashDotDotSlash(const UChar * characters,size_t length)34 static inline int findSlashDotDotSlash(const UChar* characters, size_t length)
35 {
36 if (length < 4)
37 return -1;
38 unsigned loopLimit = length - 3;
39 for (unsigned i = 0; i < loopLimit; ++i) {
40 if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/')
41 return i;
42 }
43 return -1;
44 }
45
findSlashSlash(const UChar * characters,size_t length,int position)46 static inline int findSlashSlash(const UChar* characters, size_t length, int position)
47 {
48 if (length < 2)
49 return -1;
50 unsigned loopLimit = length - 1;
51 for (unsigned i = position; i < loopLimit; ++i) {
52 if (characters[i] == '/' && characters[i + 1] == '/')
53 return i;
54 }
55 return -1;
56 }
57
findSlashDotSlash(const UChar * characters,size_t length)58 static inline int findSlashDotSlash(const UChar* characters, size_t length)
59 {
60 if (length < 3)
61 return -1;
62 unsigned loopLimit = length - 2;
63 for (unsigned i = 0; i < loopLimit; ++i) {
64 if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/')
65 return i;
66 }
67 return -1;
68 }
69
containsColonSlashSlash(const UChar * characters,unsigned length)70 static inline bool containsColonSlashSlash(const UChar* characters, unsigned length)
71 {
72 if (length < 3)
73 return false;
74 unsigned loopLimit = length - 2;
75 for (unsigned i = 0; i < loopLimit; ++i) {
76 if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/')
77 return true;
78 }
79 return false;
80 }
81
cleanPath(Vector<UChar,512> & path)82 static inline void cleanPath(Vector<UChar, 512>& path)
83 {
84 // FIXME: Shold not do this in the query or anchor part.
85 int pos;
86 while ((pos = findSlashDotDotSlash(path.data(), path.size())) != -1) {
87 int prev = reverseFind(path.data(), path.size(), '/', pos - 1);
88 // don't remove the host, i.e. http://foo.org/../foo.html
89 if (prev < 0 || (prev > 3 && path[prev - 2] == ':' && path[prev - 1] == '/'))
90 path.remove(pos, 3);
91 else
92 path.remove(prev, pos - prev + 3);
93 }
94
95 // FIXME: Shold not do this in the query part.
96 // Set refPos to -2 to mean "I haven't looked for the anchor yet".
97 // We don't want to waste a function call on the search for the the anchor
98 // in the vast majority of cases where there is no "//" in the path.
99 pos = 0;
100 int refPos = -2;
101 while ((pos = findSlashSlash(path.data(), path.size(), pos)) != -1) {
102 if (refPos == -2)
103 refPos = find(path.data(), path.size(), '#');
104 if (refPos > 0 && pos >= refPos)
105 break;
106
107 if (pos == 0 || path[pos - 1] != ':')
108 path.remove(pos);
109 else
110 pos += 2;
111 }
112
113 // FIXME: Shold not do this in the query or anchor part.
114 while ((pos = findSlashDotSlash(path.data(), path.size())) != -1)
115 path.remove(pos, 2);
116 }
117
118
matchLetter(UChar c,UChar lowercaseLetter)119 static inline bool matchLetter(UChar c, UChar lowercaseLetter)
120 {
121 return (c | 0x20) == lowercaseLetter;
122 }
123
needsTrailingSlash(const UChar * characters,unsigned length)124 static inline bool needsTrailingSlash(const UChar* characters, unsigned length)
125 {
126 if (length < 6)
127 return false;
128 if (!matchLetter(characters[0], 'h')
129 || !matchLetter(characters[1], 't')
130 || !matchLetter(characters[2], 't')
131 || !matchLetter(characters[3], 'p'))
132 return false;
133 if (!(characters[4] == ':'
134 || (matchLetter(characters[4], 's') && characters[5] == ':')))
135 return false;
136
137 unsigned pos = characters[4] == ':' ? 5 : 6;
138
139 // Skip initial two slashes if present.
140 if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/')
141 pos += 2;
142
143 // Find next slash.
144 while (pos < length && characters[pos] != '/')
145 ++pos;
146
147 return pos == length;
148 }
149
visitedLinkHash(const UChar * url,unsigned length)150 LinkHash visitedLinkHash(const UChar* url, unsigned length)
151 {
152 return AlreadyHashed::avoidDeletedValue(StringImpl::computeHash(url, length));
153 }
154
visitedLinkHash(const KURL & base,const AtomicString & attributeURL)155 LinkHash visitedLinkHash(const KURL& base, const AtomicString& attributeURL)
156 {
157 const UChar* characters = attributeURL.characters();
158 unsigned length = attributeURL.length();
159 if (!length)
160 return 0;
161
162 // This is a poor man's completeURL. Faster with less memory allocation.
163 // FIXME: It's missing a lot of what completeURL does and a lot of what KURL does.
164 // For example, it does not handle international domain names properly.
165
166 // FIXME: It is wrong that we do not do further processing on strings that have "://" in them:
167 // 1) The "://" could be in the query or anchor.
168 // 2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it.
169
170 // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does
171 // have a query or anchor.
172
173 bool hasColonSlashSlash = containsColonSlashSlash(characters, length);
174
175 if (hasColonSlashSlash && !needsTrailingSlash(characters, length))
176 return visitedLinkHash(attributeURL.characters(), attributeURL.length());
177
178 Vector<UChar, 512> buffer;
179
180 if (hasColonSlashSlash) {
181 // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
182 // end of the path, *before* the query or anchor.
183 buffer.append(characters, length);
184 buffer.append('/');
185 return visitedLinkHash(buffer.data(), buffer.size());
186 }
187
188 switch (characters[0]) {
189 case '/':
190 buffer.append(base.string().characters(), base.pathStart());
191 break;
192 case '#':
193 buffer.append(base.string().characters(), base.pathEnd());
194 break;
195 default:
196 buffer.append(base.string().characters(), base.pathAfterLastSlash());
197 break;
198 }
199 buffer.append(characters, length);
200 cleanPath(buffer);
201 if (needsTrailingSlash(buffer.data(), buffer.size())) {
202 // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
203 // end of the path, *before* the query or anchor.
204 buffer.append('/');
205 }
206
207 return visitedLinkHash(buffer.data(), buffer.size());
208 }
209
210 } // namespace WebCore
211