1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_URI_H_
6 #define V8_URI_H_
7
8 #include "src/v8.h"
9
10 #include "src/conversions.h"
11 #include "src/string-search.h"
12 #include "src/utils.h"
13
14 namespace v8 {
15 namespace internal {
16
17
18 template <typename Char>
19 static INLINE(Vector<const Char> GetCharVector(Handle<String> string));
20
21
22 template <>
GetCharVector(Handle<String> string)23 Vector<const uint8_t> GetCharVector(Handle<String> string) {
24 String::FlatContent flat = string->GetFlatContent();
25 DCHECK(flat.IsOneByte());
26 return flat.ToOneByteVector();
27 }
28
29
30 template <>
GetCharVector(Handle<String> string)31 Vector<const uc16> GetCharVector(Handle<String> string) {
32 String::FlatContent flat = string->GetFlatContent();
33 DCHECK(flat.IsTwoByte());
34 return flat.ToUC16Vector();
35 }
36
37
38 class URIUnescape : public AllStatic {
39 public:
40 template<typename Char>
41 MUST_USE_RESULT static MaybeHandle<String> Unescape(Isolate* isolate,
42 Handle<String> source);
43
44 private:
45 static const signed char kHexValue['g'];
46
47 template<typename Char>
48 MUST_USE_RESULT static MaybeHandle<String> UnescapeSlow(
49 Isolate* isolate, Handle<String> string, int start_index);
50
51 static INLINE(int TwoDigitHex(uint16_t character1, uint16_t character2));
52
53 template <typename Char>
54 static INLINE(int UnescapeChar(Vector<const Char> vector,
55 int i,
56 int length,
57 int* step));
58 };
59
60
61 const signed char URIUnescape::kHexValue[] = {
62 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
63 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
64 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
65 -0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
66 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
67 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
68 -1, 10, 11, 12, 13, 14, 15 };
69
70
71 template<typename Char>
Unescape(Isolate * isolate,Handle<String> source)72 MaybeHandle<String> URIUnescape::Unescape(Isolate* isolate,
73 Handle<String> source) {
74 int index;
75 { DisallowHeapAllocation no_allocation;
76 StringSearch<uint8_t, Char> search(isolate, STATIC_CHAR_VECTOR("%"));
77 index = search.Search(GetCharVector<Char>(source), 0);
78 if (index < 0) return source;
79 }
80 return UnescapeSlow<Char>(isolate, source, index);
81 }
82
83
84 template <typename Char>
UnescapeSlow(Isolate * isolate,Handle<String> string,int start_index)85 MaybeHandle<String> URIUnescape::UnescapeSlow(
86 Isolate* isolate, Handle<String> string, int start_index) {
87 bool one_byte = true;
88 int length = string->length();
89
90 int unescaped_length = 0;
91 { DisallowHeapAllocation no_allocation;
92 Vector<const Char> vector = GetCharVector<Char>(string);
93 for (int i = start_index; i < length; unescaped_length++) {
94 int step;
95 if (UnescapeChar(vector, i, length, &step) >
96 String::kMaxOneByteCharCode) {
97 one_byte = false;
98 }
99 i += step;
100 }
101 }
102
103 DCHECK(start_index < length);
104 Handle<String> first_part =
105 isolate->factory()->NewProperSubString(string, 0, start_index);
106
107 int dest_position = 0;
108 Handle<String> second_part;
109 DCHECK(unescaped_length <= String::kMaxLength);
110 if (one_byte) {
111 Handle<SeqOneByteString> dest = isolate->factory()->NewRawOneByteString(
112 unescaped_length).ToHandleChecked();
113 DisallowHeapAllocation no_allocation;
114 Vector<const Char> vector = GetCharVector<Char>(string);
115 for (int i = start_index; i < length; dest_position++) {
116 int step;
117 dest->SeqOneByteStringSet(dest_position,
118 UnescapeChar(vector, i, length, &step));
119 i += step;
120 }
121 second_part = dest;
122 } else {
123 Handle<SeqTwoByteString> dest = isolate->factory()->NewRawTwoByteString(
124 unescaped_length).ToHandleChecked();
125 DisallowHeapAllocation no_allocation;
126 Vector<const Char> vector = GetCharVector<Char>(string);
127 for (int i = start_index; i < length; dest_position++) {
128 int step;
129 dest->SeqTwoByteStringSet(dest_position,
130 UnescapeChar(vector, i, length, &step));
131 i += step;
132 }
133 second_part = dest;
134 }
135 return isolate->factory()->NewConsString(first_part, second_part);
136 }
137
138
TwoDigitHex(uint16_t character1,uint16_t character2)139 int URIUnescape::TwoDigitHex(uint16_t character1, uint16_t character2) {
140 if (character1 > 'f') return -1;
141 int hi = kHexValue[character1];
142 if (hi == -1) return -1;
143 if (character2 > 'f') return -1;
144 int lo = kHexValue[character2];
145 if (lo == -1) return -1;
146 return (hi << 4) + lo;
147 }
148
149
150 template <typename Char>
UnescapeChar(Vector<const Char> vector,int i,int length,int * step)151 int URIUnescape::UnescapeChar(Vector<const Char> vector,
152 int i,
153 int length,
154 int* step) {
155 uint16_t character = vector[i];
156 int32_t hi = 0;
157 int32_t lo = 0;
158 if (character == '%' &&
159 i <= length - 6 &&
160 vector[i + 1] == 'u' &&
161 (hi = TwoDigitHex(vector[i + 2],
162 vector[i + 3])) != -1 &&
163 (lo = TwoDigitHex(vector[i + 4],
164 vector[i + 5])) != -1) {
165 *step = 6;
166 return (hi << 8) + lo;
167 } else if (character == '%' &&
168 i <= length - 3 &&
169 (lo = TwoDigitHex(vector[i + 1],
170 vector[i + 2])) != -1) {
171 *step = 3;
172 return lo;
173 } else {
174 *step = 1;
175 return character;
176 }
177 }
178
179
180 class URIEscape : public AllStatic {
181 public:
182 template<typename Char>
183 MUST_USE_RESULT static MaybeHandle<String> Escape(Isolate* isolate,
184 Handle<String> string);
185
186 private:
187 static const char kHexChars[17];
188 static const char kNotEscaped[256];
189
IsNotEscaped(uint16_t c)190 static bool IsNotEscaped(uint16_t c) { return kNotEscaped[c] != 0; }
191 };
192
193
194 const char URIEscape::kHexChars[] = "0123456789ABCDEF";
195
196
197 // kNotEscaped is generated by the following:
198 //
199 // #!/bin/perl
200 // for (my $i = 0; $i < 256; $i++) {
201 // print "\n" if $i % 16 == 0;
202 // my $c = chr($i);
203 // my $escaped = 1;
204 // $escaped = 0 if $c =~ m#[A-Za-z0-9@*_+./-]#;
205 // print $escaped ? "0, " : "1, ";
206 // }
207
208 const char URIEscape::kNotEscaped[] = {
209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
212 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
213 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
214 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
215 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
218 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
221 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
225
226
227 template<typename Char>
Escape(Isolate * isolate,Handle<String> string)228 MaybeHandle<String> URIEscape::Escape(Isolate* isolate, Handle<String> string) {
229 DCHECK(string->IsFlat());
230 int escaped_length = 0;
231 int length = string->length();
232
233 { DisallowHeapAllocation no_allocation;
234 Vector<const Char> vector = GetCharVector<Char>(string);
235 for (int i = 0; i < length; i++) {
236 uint16_t c = vector[i];
237 if (c >= 256) {
238 escaped_length += 6;
239 } else if (IsNotEscaped(c)) {
240 escaped_length++;
241 } else {
242 escaped_length += 3;
243 }
244
245 // We don't allow strings that are longer than a maximal length.
246 DCHECK(String::kMaxLength < 0x7fffffff - 6); // Cannot overflow.
247 if (escaped_length > String::kMaxLength) break; // Provoke exception.
248 }
249 }
250
251 // No length change implies no change. Return original string if no change.
252 if (escaped_length == length) return string;
253
254 Handle<SeqOneByteString> dest;
255 ASSIGN_RETURN_ON_EXCEPTION(
256 isolate, dest,
257 isolate->factory()->NewRawOneByteString(escaped_length),
258 String);
259 int dest_position = 0;
260
261 { DisallowHeapAllocation no_allocation;
262 Vector<const Char> vector = GetCharVector<Char>(string);
263 for (int i = 0; i < length; i++) {
264 uint16_t c = vector[i];
265 if (c >= 256) {
266 dest->SeqOneByteStringSet(dest_position, '%');
267 dest->SeqOneByteStringSet(dest_position+1, 'u');
268 dest->SeqOneByteStringSet(dest_position+2, kHexChars[c >> 12]);
269 dest->SeqOneByteStringSet(dest_position+3, kHexChars[(c >> 8) & 0xf]);
270 dest->SeqOneByteStringSet(dest_position+4, kHexChars[(c >> 4) & 0xf]);
271 dest->SeqOneByteStringSet(dest_position+5, kHexChars[c & 0xf]);
272 dest_position += 6;
273 } else if (IsNotEscaped(c)) {
274 dest->SeqOneByteStringSet(dest_position, c);
275 dest_position++;
276 } else {
277 dest->SeqOneByteStringSet(dest_position, '%');
278 dest->SeqOneByteStringSet(dest_position+1, kHexChars[c >> 4]);
279 dest->SeqOneByteStringSet(dest_position+2, kHexChars[c & 0xf]);
280 dest_position += 3;
281 }
282 }
283 }
284
285 return dest;
286 }
287
288 } } // namespace v8::internal
289
290 #endif // V8_URI_H_
291