• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright Joyent, Inc. and other Node contributors.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a
4 // copy of this software and associated documentation files (the
5 // "Software"), to deal in the Software without restriction, including
6 // without limitation the rights to use, copy, modify, merge, publish,
7 // distribute, sublicense, and/or sell copies of the Software, and to permit
8 // persons to whom the Software is furnished to do so, subject to the
9 // following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included
12 // in all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17 // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18 // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 // USE OR OTHER DEALINGS IN THE SOFTWARE.
21 
22 #include "string_bytes.h"
23 
24 #include "base64-inl.h"
25 #include "env-inl.h"
26 #include "node_buffer.h"
27 #include "node_errors.h"
28 #include "util.h"
29 
30 #include <climits>
31 #include <cstring>  // memcpy
32 
33 #include <algorithm>
34 
35 // When creating strings >= this length v8's gc spins up and consumes
36 // most of the execution time. For these cases it's more performant to
37 // use external string resources.
38 #define EXTERN_APEX 0xFBEE9
39 
40 namespace node {
41 
42 using v8::HandleScope;
43 using v8::Isolate;
44 using v8::Just;
45 using v8::Local;
46 using v8::Maybe;
47 using v8::MaybeLocal;
48 using v8::Nothing;
49 using v8::String;
50 using v8::Value;
51 
52 namespace {
53 
54 template <typename ResourceType, typename TypeName>
55 class ExternString: public ResourceType {
56  public:
~ExternString()57   ~ExternString() override {
58     free(const_cast<TypeName*>(data_));
59     isolate()->AdjustAmountOfExternalAllocatedMemory(-byte_length());
60   }
61 
data() const62   const TypeName* data() const override {
63     return data_;
64   }
65 
length() const66   size_t length() const override {
67     return length_;
68   }
69 
byte_length() const70   int64_t byte_length() const {
71     return length() * sizeof(*data());
72   }
73 
NewFromCopy(Isolate * isolate,const TypeName * data,size_t length,Local<Value> * error)74   static MaybeLocal<Value> NewFromCopy(Isolate* isolate,
75                                        const TypeName* data,
76                                        size_t length,
77                                        Local<Value>* error) {
78     if (length == 0)
79       return String::Empty(isolate);
80 
81     if (length < EXTERN_APEX)
82       return NewSimpleFromCopy(isolate, data, length, error);
83 
84     TypeName* new_data = node::UncheckedMalloc<TypeName>(length);
85     if (new_data == nullptr) {
86       *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
87       return MaybeLocal<Value>();
88     }
89     memcpy(new_data, data, length * sizeof(*new_data));
90 
91     return ExternString<ResourceType, TypeName>::New(isolate,
92                                                      new_data,
93                                                      length,
94                                                      error);
95   }
96 
97   // uses "data" for external resource, and will be free'd on gc
New(Isolate * isolate,TypeName * data,size_t length,Local<Value> * error)98   static MaybeLocal<Value> New(Isolate* isolate,
99                                TypeName* data,
100                                size_t length,
101                                Local<Value>* error) {
102     if (length == 0)
103       return String::Empty(isolate);
104 
105     if (length < EXTERN_APEX) {
106       MaybeLocal<Value> str = NewSimpleFromCopy(isolate, data, length, error);
107       free(data);
108       return str;
109     }
110 
111     ExternString* h_str = new ExternString<ResourceType, TypeName>(isolate,
112                                                                    data,
113                                                                    length);
114     MaybeLocal<Value> str = NewExternal(isolate, h_str);
115     isolate->AdjustAmountOfExternalAllocatedMemory(h_str->byte_length());
116 
117     if (str.IsEmpty()) {
118       delete h_str;
119       *error = node::ERR_STRING_TOO_LONG(isolate);
120       return MaybeLocal<Value>();
121     }
122 
123     return str.ToLocalChecked();
124   }
125 
isolate() const126   inline Isolate* isolate() const { return isolate_; }
127 
128  private:
ExternString(Isolate * isolate,const TypeName * data,size_t length)129   ExternString(Isolate* isolate, const TypeName* data, size_t length)
130     : isolate_(isolate), data_(data), length_(length) { }
131   static MaybeLocal<Value> NewExternal(Isolate* isolate,
132                                        ExternString* h_str);
133 
134   // This method does not actually create ExternString instances.
135   static MaybeLocal<Value> NewSimpleFromCopy(Isolate* isolate,
136                                              const TypeName* data,
137                                              size_t length,
138                                              Local<Value>* error);
139 
140   Isolate* isolate_;
141   const TypeName* data_;
142   size_t length_;
143 };
144 
145 
146 typedef ExternString<String::ExternalOneByteStringResource,
147                      char> ExternOneByteString;
148 typedef ExternString<String::ExternalStringResource,
149                      uint16_t> ExternTwoByteString;
150 
151 
152 template <>
NewExternal(Isolate * isolate,ExternOneByteString * h_str)153 MaybeLocal<Value> ExternOneByteString::NewExternal(
154     Isolate* isolate, ExternOneByteString* h_str) {
155   return String::NewExternalOneByte(isolate, h_str).FromMaybe(Local<Value>());
156 }
157 
158 
159 template <>
NewExternal(Isolate * isolate,ExternTwoByteString * h_str)160 MaybeLocal<Value> ExternTwoByteString::NewExternal(
161     Isolate* isolate, ExternTwoByteString* h_str) {
162   return String::NewExternalTwoByte(isolate, h_str).FromMaybe(Local<Value>());
163 }
164 
165 template <>
NewSimpleFromCopy(Isolate * isolate,const char * data,size_t length,Local<Value> * error)166 MaybeLocal<Value> ExternOneByteString::NewSimpleFromCopy(Isolate* isolate,
167                                                          const char* data,
168                                                          size_t length,
169                                                          Local<Value>* error) {
170   MaybeLocal<String> str =
171       String::NewFromOneByte(isolate,
172                              reinterpret_cast<const uint8_t*>(data),
173                              v8::NewStringType::kNormal,
174                              length);
175   if (str.IsEmpty()) {
176     *error = node::ERR_STRING_TOO_LONG(isolate);
177     return MaybeLocal<Value>();
178   }
179   return str.ToLocalChecked();
180 }
181 
182 
183 template <>
NewSimpleFromCopy(Isolate * isolate,const uint16_t * data,size_t length,Local<Value> * error)184 MaybeLocal<Value> ExternTwoByteString::NewSimpleFromCopy(Isolate* isolate,
185                                                          const uint16_t* data,
186                                                          size_t length,
187                                                          Local<Value>* error) {
188   MaybeLocal<String> str =
189       String::NewFromTwoByte(isolate,
190                              data,
191                              v8::NewStringType::kNormal,
192                              length);
193   if (str.IsEmpty()) {
194     *error = node::ERR_STRING_TOO_LONG(isolate);
195     return MaybeLocal<Value>();
196   }
197   return str.ToLocalChecked();
198 }
199 
200 }  // anonymous namespace
201 
202 // supports regular and URL-safe base64
203 const int8_t unbase64_table[256] =
204   { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, -2, -1, -1,
205     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
206     -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63,
207     52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
208     -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
209     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
210     -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
211     41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
212     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
213     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
214     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
215     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
216     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
217     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
218     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
219     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
220   };
221 
222 
223 static const int8_t unhex_table[256] =
224   { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
225     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
226     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
227      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
228     -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
229     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
230     -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
231     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
232     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
233     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
234     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
235     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
236     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
237     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
238     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
239     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
240   };
241 
unhex(uint8_t x)242 static inline unsigned unhex(uint8_t x) {
243   return unhex_table[x];
244 }
245 
246 template <typename TypeName>
hex_decode(char * buf,size_t len,const TypeName * src,const size_t srcLen)247 static size_t hex_decode(char* buf,
248                          size_t len,
249                          const TypeName* src,
250                          const size_t srcLen) {
251   size_t i;
252   for (i = 0; i < len && i * 2 + 1 < srcLen; ++i) {
253     unsigned a = unhex(static_cast<uint8_t>(src[i * 2 + 0]));
254     unsigned b = unhex(static_cast<uint8_t>(src[i * 2 + 1]));
255     if (!~a || !~b)
256       return i;
257     buf[i] = (a << 4) | b;
258   }
259 
260   return i;
261 }
262 
WriteUCS2(Isolate * isolate,char * buf,size_t buflen,Local<String> str,int flags,size_t * chars_written)263 size_t StringBytes::WriteUCS2(Isolate* isolate,
264                               char* buf,
265                               size_t buflen,
266                               Local<String> str,
267                               int flags,
268                               size_t* chars_written) {
269   uint16_t* const dst = reinterpret_cast<uint16_t*>(buf);
270 
271   size_t max_chars = buflen / sizeof(*dst);
272   if (max_chars == 0) {
273     return 0;
274   }
275 
276   uint16_t* const aligned_dst = AlignUp(dst, sizeof(*dst));
277   size_t nchars;
278   if (aligned_dst == dst) {
279     nchars = str->Write(isolate, dst, 0, max_chars, flags);
280     *chars_written = nchars;
281     return nchars * sizeof(*dst);
282   }
283 
284   CHECK_EQ(reinterpret_cast<uintptr_t>(aligned_dst) % sizeof(*dst), 0);
285 
286   // Write all but the last char
287   max_chars = std::min(max_chars, static_cast<size_t>(str->Length()));
288   if (max_chars == 0) return 0;
289   nchars = str->Write(isolate, aligned_dst, 0, max_chars - 1, flags);
290   CHECK_EQ(nchars, max_chars - 1);
291 
292   // Shift everything to unaligned-left
293   memmove(dst, aligned_dst, nchars * sizeof(*dst));
294 
295   // One more char to be written
296   uint16_t last;
297   CHECK_EQ(str->Write(isolate, &last, nchars, 1, flags), 1);
298   memcpy(buf + nchars * sizeof(*dst), &last, sizeof(last));
299   nchars++;
300 
301   *chars_written = nchars;
302   return nchars * sizeof(*dst);
303 }
304 
305 
Write(Isolate * isolate,char * buf,size_t buflen,Local<Value> val,enum encoding encoding,int * chars_written)306 size_t StringBytes::Write(Isolate* isolate,
307                           char* buf,
308                           size_t buflen,
309                           Local<Value> val,
310                           enum encoding encoding,
311                           int* chars_written) {
312   HandleScope scope(isolate);
313   size_t nbytes;
314   int nchars;
315 
316   if (chars_written == nullptr)
317     chars_written = &nchars;
318 
319   CHECK(val->IsString() == true);
320   Local<String> str = val.As<String>();
321 
322   int flags = String::HINT_MANY_WRITES_EXPECTED |
323               String::NO_NULL_TERMINATION |
324               String::REPLACE_INVALID_UTF8;
325 
326   switch (encoding) {
327     case ASCII:
328     case LATIN1:
329       if (str->IsExternalOneByte()) {
330         auto ext = str->GetExternalOneByteStringResource();
331         nbytes = std::min(buflen, ext->length());
332         memcpy(buf, ext->data(), nbytes);
333       } else {
334         uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
335         nbytes = str->WriteOneByte(isolate, dst, 0, buflen, flags);
336       }
337       *chars_written = nbytes;
338       break;
339 
340     case BUFFER:
341     case UTF8:
342       nbytes = str->WriteUtf8(isolate, buf, buflen, chars_written, flags);
343       break;
344 
345     case UCS2: {
346       size_t nchars;
347 
348       nbytes = WriteUCS2(isolate, buf, buflen, str, flags, &nchars);
349       *chars_written = static_cast<int>(nchars);
350 
351       // Node's "ucs2" encoding wants LE character data stored in
352       // the Buffer, so we need to reorder on BE platforms.  See
353       // https://nodejs.org/api/buffer.html regarding Node's "ucs2"
354       // encoding specification
355       if (IsBigEndian())
356         SwapBytes16(buf, nbytes);
357 
358       break;
359     }
360 
361     case BASE64URL:
362       // Fall through
363     case BASE64:
364       if (str->IsExternalOneByte()) {
365         auto ext = str->GetExternalOneByteStringResource();
366         nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
367       } else {
368         String::Value value(isolate, str);
369         nbytes = base64_decode(buf, buflen, *value, value.length());
370       }
371       *chars_written = nbytes;
372       break;
373 
374     case HEX:
375       if (str->IsExternalOneByte()) {
376         auto ext = str->GetExternalOneByteStringResource();
377         nbytes = hex_decode(buf, buflen, ext->data(), ext->length());
378       } else {
379         String::Value value(isolate, str);
380         nbytes = hex_decode(buf, buflen, *value, value.length());
381       }
382       *chars_written = nbytes;
383       break;
384 
385     default:
386       CHECK(0 && "unknown encoding");
387       break;
388   }
389 
390   return nbytes;
391 }
392 
393 
394 // Quick and dirty size calculation
395 // Will always be at least big enough, but may have some extra
396 // UTF8 can be as much as 3x the size, Base64 can have 1-2 extra bytes
StorageSize(Isolate * isolate,Local<Value> val,enum encoding encoding)397 Maybe<size_t> StringBytes::StorageSize(Isolate* isolate,
398                                        Local<Value> val,
399                                        enum encoding encoding) {
400   HandleScope scope(isolate);
401   size_t data_size = 0;
402   bool is_buffer = Buffer::HasInstance(val);
403 
404   if (is_buffer && (encoding == BUFFER || encoding == LATIN1)) {
405     return Just(Buffer::Length(val));
406   }
407 
408   Local<String> str;
409   if (!val->ToString(isolate->GetCurrentContext()).ToLocal(&str))
410     return Nothing<size_t>();
411 
412   switch (encoding) {
413     case ASCII:
414     case LATIN1:
415       data_size = str->Length();
416       break;
417 
418     case BUFFER:
419     case UTF8:
420       // A single UCS2 codepoint never takes up more than 3 utf8 bytes.
421       // It is an exercise for the caller to decide when a string is
422       // long enough to justify calling Size() instead of StorageSize()
423       data_size = 3 * str->Length();
424       break;
425 
426     case UCS2:
427       data_size = str->Length() * sizeof(uint16_t);
428       break;
429 
430     case BASE64URL:
431       // Fall through
432     case BASE64:
433       data_size = base64_decoded_size_fast(str->Length());
434       break;
435 
436     case HEX:
437       CHECK(str->Length() % 2 == 0 && "invalid hex string length");
438       data_size = str->Length() / 2;
439       break;
440 
441     default:
442       CHECK(0 && "unknown encoding");
443       break;
444   }
445 
446   return Just(data_size);
447 }
448 
Size(Isolate * isolate,Local<Value> val,enum encoding encoding)449 Maybe<size_t> StringBytes::Size(Isolate* isolate,
450                                 Local<Value> val,
451                                 enum encoding encoding) {
452   HandleScope scope(isolate);
453 
454   if (Buffer::HasInstance(val) && (encoding == BUFFER || encoding == LATIN1))
455     return Just(Buffer::Length(val));
456 
457   Local<String> str;
458   if (!val->ToString(isolate->GetCurrentContext()).ToLocal(&str))
459     return Nothing<size_t>();
460 
461   switch (encoding) {
462     case ASCII:
463     case LATIN1:
464       return Just<size_t>(str->Length());
465 
466     case BUFFER:
467     case UTF8:
468       return Just<size_t>(str->Utf8Length(isolate));
469 
470     case UCS2:
471       return Just(str->Length() * sizeof(uint16_t));
472 
473     case BASE64URL:
474       // Fall through
475     case BASE64: {
476       String::Value value(isolate, str);
477       return Just(base64_decoded_size(*value, value.length()));
478     }
479 
480     case HEX:
481       return Just<size_t>(str->Length() / 2);
482   }
483 
484   UNREACHABLE();
485 }
486 
487 
488 
489 
contains_non_ascii_slow(const char * buf,size_t len)490 static bool contains_non_ascii_slow(const char* buf, size_t len) {
491   for (size_t i = 0; i < len; ++i) {
492     if (buf[i] & 0x80)
493       return true;
494   }
495   return false;
496 }
497 
498 
contains_non_ascii(const char * src,size_t len)499 static bool contains_non_ascii(const char* src, size_t len) {
500   if (len < 16) {
501     return contains_non_ascii_slow(src, len);
502   }
503 
504   const unsigned bytes_per_word = sizeof(uintptr_t);
505   const unsigned align_mask = bytes_per_word - 1;
506   const unsigned unaligned = reinterpret_cast<uintptr_t>(src) & align_mask;
507 
508   if (unaligned > 0) {
509     const unsigned n = bytes_per_word - unaligned;
510     if (contains_non_ascii_slow(src, n))
511       return true;
512     src += n;
513     len -= n;
514   }
515 
516 
517 #if defined(_WIN64) || defined(_LP64)
518   const uintptr_t mask = 0x8080808080808080ll;
519 #else
520   const uintptr_t mask = 0x80808080l;
521 #endif
522 
523   const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
524 
525   for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
526     if (srcw[i] & mask)
527       return true;
528   }
529 
530   const unsigned remainder = len & align_mask;
531   if (remainder > 0) {
532     const size_t offset = len - remainder;
533     if (contains_non_ascii_slow(src + offset, remainder))
534       return true;
535   }
536 
537   return false;
538 }
539 
540 
force_ascii_slow(const char * src,char * dst,size_t len)541 static void force_ascii_slow(const char* src, char* dst, size_t len) {
542   for (size_t i = 0; i < len; ++i) {
543     dst[i] = src[i] & 0x7f;
544   }
545 }
546 
547 
force_ascii(const char * src,char * dst,size_t len)548 static void force_ascii(const char* src, char* dst, size_t len) {
549   if (len < 16) {
550     force_ascii_slow(src, dst, len);
551     return;
552   }
553 
554   const unsigned bytes_per_word = sizeof(uintptr_t);
555   const unsigned align_mask = bytes_per_word - 1;
556   const unsigned src_unalign = reinterpret_cast<uintptr_t>(src) & align_mask;
557   const unsigned dst_unalign = reinterpret_cast<uintptr_t>(dst) & align_mask;
558 
559   if (src_unalign > 0) {
560     if (src_unalign == dst_unalign) {
561       const unsigned unalign = bytes_per_word - src_unalign;
562       force_ascii_slow(src, dst, unalign);
563       src += unalign;
564       dst += unalign;
565       len -= src_unalign;
566     } else {
567       force_ascii_slow(src, dst, len);
568       return;
569     }
570   }
571 
572 #if defined(_WIN64) || defined(_LP64)
573   const uintptr_t mask = ~0x8080808080808080ll;
574 #else
575   const uintptr_t mask = ~0x80808080l;
576 #endif
577 
578   const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
579   uintptr_t* dstw = reinterpret_cast<uintptr_t*>(dst);
580 
581   for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
582     dstw[i] = srcw[i] & mask;
583   }
584 
585   const unsigned remainder = len & align_mask;
586   if (remainder > 0) {
587     const size_t offset = len - remainder;
588     force_ascii_slow(src + offset, dst + offset, remainder);
589   }
590 }
591 
592 
hex_encode(const char * src,size_t slen,char * dst,size_t dlen)593 size_t StringBytes::hex_encode(
594     const char* src,
595     size_t slen,
596     char* dst,
597     size_t dlen) {
598   // We know how much we'll write, just make sure that there's space.
599   CHECK(dlen >= slen * 2 &&
600       "not enough space provided for hex encode");
601 
602   dlen = slen * 2;
603   for (uint32_t i = 0, k = 0; k < dlen; i += 1, k += 2) {
604     static const char hex[] = "0123456789abcdef";
605     uint8_t val = static_cast<uint8_t>(src[i]);
606     dst[k + 0] = hex[val >> 4];
607     dst[k + 1] = hex[val & 15];
608   }
609 
610   return dlen;
611 }
612 
hex_encode(const char * src,size_t slen)613 std::string StringBytes::hex_encode(const char* src, size_t slen) {
614   size_t dlen = slen * 2;
615   std::string dst(dlen, '\0');
616   hex_encode(src, slen, &dst[0], dlen);
617   return dst;
618 }
619 
620 #define CHECK_BUFLEN_IN_RANGE(len)                                    \
621   do {                                                                \
622     if ((len) > Buffer::kMaxLength) {                                 \
623       *error = node::ERR_BUFFER_TOO_LARGE(isolate);                   \
624       return MaybeLocal<Value>();                                     \
625     }                                                                 \
626   } while (0)
627 
628 
Encode(Isolate * isolate,const char * buf,size_t buflen,enum encoding encoding,Local<Value> * error)629 MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
630                                       const char* buf,
631                                       size_t buflen,
632                                       enum encoding encoding,
633                                       Local<Value>* error) {
634   CHECK_BUFLEN_IN_RANGE(buflen);
635 
636   if (!buflen && encoding != BUFFER) {
637     return String::Empty(isolate);
638   }
639 
640   MaybeLocal<String> val;
641 
642   switch (encoding) {
643     case BUFFER:
644       {
645         if (buflen > node::Buffer::kMaxLength) {
646           *error = node::ERR_BUFFER_TOO_LARGE(isolate);
647           return MaybeLocal<Value>();
648         }
649         auto maybe_buf = Buffer::Copy(isolate, buf, buflen);
650         Local<v8::Object> buf;
651         if (!maybe_buf.ToLocal(&buf)) {
652           *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
653         }
654         return buf;
655       }
656 
657     case ASCII:
658       if (contains_non_ascii(buf, buflen)) {
659         char* out = node::UncheckedMalloc(buflen);
660         if (out == nullptr) {
661           *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
662           return MaybeLocal<Value>();
663         }
664         force_ascii(buf, out, buflen);
665         return ExternOneByteString::New(isolate, out, buflen, error);
666       } else {
667         return ExternOneByteString::NewFromCopy(isolate, buf, buflen, error);
668       }
669 
670     case UTF8:
671       {
672         val = String::NewFromUtf8(isolate,
673                                   buf,
674                                   v8::NewStringType::kNormal,
675                                   buflen);
676         Local<String> str;
677         if (!val.ToLocal(&str)) {
678           *error = node::ERR_STRING_TOO_LONG(isolate);
679         }
680         return str;
681       }
682 
683     case LATIN1:
684       return ExternOneByteString::NewFromCopy(isolate, buf, buflen, error);
685 
686     case BASE64: {
687       size_t dlen = base64_encoded_size(buflen);
688       char* dst = node::UncheckedMalloc(dlen);
689       if (dst == nullptr) {
690         *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
691         return MaybeLocal<Value>();
692       }
693 
694       size_t written = base64_encode(buf, buflen, dst, dlen);
695       CHECK_EQ(written, dlen);
696 
697       return ExternOneByteString::New(isolate, dst, dlen, error);
698     }
699 
700     case BASE64URL: {
701       size_t dlen = base64_encoded_size(buflen, Base64Mode::URL);
702       char* dst = node::UncheckedMalloc(dlen);
703       if (dst == nullptr) {
704         *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
705         return MaybeLocal<Value>();
706       }
707 
708       size_t written = base64_encode(buf, buflen, dst, dlen, Base64Mode::URL);
709       CHECK_EQ(written, dlen);
710 
711       return ExternOneByteString::New(isolate, dst, dlen, error);
712     }
713 
714     case HEX: {
715       size_t dlen = buflen * 2;
716       char* dst = node::UncheckedMalloc(dlen);
717       if (dst == nullptr) {
718         *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
719         return MaybeLocal<Value>();
720       }
721       size_t written = hex_encode(buf, buflen, dst, dlen);
722       CHECK_EQ(written, dlen);
723 
724       return ExternOneByteString::New(isolate, dst, dlen, error);
725     }
726 
727     case UCS2: {
728       if (IsBigEndian()) {
729         uint16_t* dst = node::UncheckedMalloc<uint16_t>(buflen / 2);
730         if (dst == nullptr) {
731           *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
732           return MaybeLocal<Value>();
733         }
734         for (size_t i = 0, k = 0; k < buflen / 2; i += 2, k += 1) {
735           // The input is in *little endian*, because that's what Node.js
736           // expects, so the high byte comes after the low byte.
737           const uint8_t hi = static_cast<uint8_t>(buf[i + 1]);
738           const uint8_t lo = static_cast<uint8_t>(buf[i + 0]);
739           dst[k] = static_cast<uint16_t>(hi) << 8 | lo;
740         }
741         return ExternTwoByteString::New(isolate, dst, buflen / 2, error);
742       }
743       if (reinterpret_cast<uintptr_t>(buf) % 2 != 0) {
744         // Unaligned data still means we can't directly pass it to V8.
745         char* dst = node::UncheckedMalloc(buflen);
746         if (dst == nullptr) {
747           *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
748           return MaybeLocal<Value>();
749         }
750         memcpy(dst, buf, buflen);
751         return ExternTwoByteString::New(
752             isolate, reinterpret_cast<uint16_t*>(dst), buflen / 2, error);
753       }
754       return ExternTwoByteString::NewFromCopy(
755           isolate, reinterpret_cast<const uint16_t*>(buf), buflen / 2, error);
756     }
757 
758     default:
759       CHECK(0 && "unknown encoding");
760       break;
761   }
762 
763   UNREACHABLE();
764 }
765 
766 
Encode(Isolate * isolate,const uint16_t * buf,size_t buflen,Local<Value> * error)767 MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
768                                       const uint16_t* buf,
769                                       size_t buflen,
770                                       Local<Value>* error) {
771   CHECK_BUFLEN_IN_RANGE(buflen);
772 
773   // Node's "ucs2" encoding expects LE character data inside a
774   // Buffer, so we need to reorder on BE platforms.  See
775   // https://nodejs.org/api/buffer.html regarding Node's "ucs2"
776   // encoding specification
777   if (IsBigEndian()) {
778     uint16_t* dst = node::UncheckedMalloc<uint16_t>(buflen);
779     if (dst == nullptr) {
780       *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
781       return MaybeLocal<Value>();
782     }
783     size_t nbytes = buflen * sizeof(uint16_t);
784     memcpy(dst, buf, nbytes);
785     SwapBytes16(reinterpret_cast<char*>(dst), nbytes);
786     return ExternTwoByteString::New(isolate, dst, buflen, error);
787   } else {
788     return ExternTwoByteString::NewFromCopy(isolate, buf, buflen, error);
789   }
790 }
791 
Encode(Isolate * isolate,const char * buf,enum encoding encoding,Local<Value> * error)792 MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
793                                       const char* buf,
794                                       enum encoding encoding,
795                                       Local<Value>* error) {
796   const size_t len = strlen(buf);
797   return Encode(isolate, buf, len, encoding, error);
798 }
799 
800 }  // namespace node
801