• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright Joyent, Inc. and other Node contributors.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a
4 // copy of this software and associated documentation files (the
5 // "Software"), to deal in the Software without restriction, including
6 // without limitation the rights to use, copy, modify, merge, publish,
7 // distribute, sublicense, and/or sell copies of the Software, and to permit
8 // persons to whom the Software is furnished to do so, subject to the
9 // following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included
12 // in all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17 // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18 // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 // USE OR OTHER DEALINGS IN THE SOFTWARE.
21 
22 #include "string_bytes.h"
23 
24 #include "base64.h"
25 #include "env-inl.h"
26 #include "node_buffer.h"
27 #include "node_errors.h"
28 #include "util.h"
29 
30 #include <climits>
31 #include <cstring>  // memcpy
32 
33 #include <algorithm>
34 
35 // When creating strings >= this length v8's gc spins up and consumes
36 // most of the execution time. For these cases it's more performant to
37 // use external string resources.
38 #define EXTERN_APEX 0xFBEE9
39 
40 namespace node {
41 
42 using v8::HandleScope;
43 using v8::Isolate;
44 using v8::Just;
45 using v8::Local;
46 using v8::Maybe;
47 using v8::MaybeLocal;
48 using v8::Nothing;
49 using v8::String;
50 using v8::Value;
51 
52 namespace {
53 
54 template <typename ResourceType, typename TypeName>
55 class ExternString: public ResourceType {
56  public:
~ExternString()57   ~ExternString() override {
58     free(const_cast<TypeName*>(data_));
59     isolate()->AdjustAmountOfExternalAllocatedMemory(-byte_length());
60   }
61 
data() const62   const TypeName* data() const override {
63     return data_;
64   }
65 
length() const66   size_t length() const override {
67     return length_;
68   }
69 
byte_length() const70   int64_t byte_length() const {
71     return length() * sizeof(*data());
72   }
73 
NewFromCopy(Isolate * isolate,const TypeName * data,size_t length,Local<Value> * error)74   static MaybeLocal<Value> NewFromCopy(Isolate* isolate,
75                                        const TypeName* data,
76                                        size_t length,
77                                        Local<Value>* error) {
78     if (length == 0)
79       return String::Empty(isolate);
80 
81     if (length < EXTERN_APEX)
82       return NewSimpleFromCopy(isolate, data, length, error);
83 
84     TypeName* new_data = node::UncheckedMalloc<TypeName>(length);
85     if (new_data == nullptr) {
86       *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
87       return MaybeLocal<Value>();
88     }
89     memcpy(new_data, data, length * sizeof(*new_data));
90 
91     return ExternString<ResourceType, TypeName>::New(isolate,
92                                                      new_data,
93                                                      length,
94                                                      error);
95   }
96 
97   // uses "data" for external resource, and will be free'd on gc
New(Isolate * isolate,TypeName * data,size_t length,Local<Value> * error)98   static MaybeLocal<Value> New(Isolate* isolate,
99                                TypeName* data,
100                                size_t length,
101                                Local<Value>* error) {
102     if (length == 0)
103       return String::Empty(isolate);
104 
105     if (length < EXTERN_APEX) {
106       MaybeLocal<Value> str = NewSimpleFromCopy(isolate, data, length, error);
107       free(data);
108       return str;
109     }
110 
111     ExternString* h_str = new ExternString<ResourceType, TypeName>(isolate,
112                                                                    data,
113                                                                    length);
114     MaybeLocal<Value> str = NewExternal(isolate, h_str);
115     isolate->AdjustAmountOfExternalAllocatedMemory(h_str->byte_length());
116 
117     if (str.IsEmpty()) {
118       delete h_str;
119       *error = node::ERR_STRING_TOO_LONG(isolate);
120       return MaybeLocal<Value>();
121     }
122 
123     return str.ToLocalChecked();
124   }
125 
isolate() const126   inline Isolate* isolate() const { return isolate_; }
127 
128  private:
ExternString(Isolate * isolate,const TypeName * data,size_t length)129   ExternString(Isolate* isolate, const TypeName* data, size_t length)
130     : isolate_(isolate), data_(data), length_(length) { }
131   static MaybeLocal<Value> NewExternal(Isolate* isolate,
132                                        ExternString* h_str);
133 
134   // This method does not actually create ExternString instances.
135   static MaybeLocal<Value> NewSimpleFromCopy(Isolate* isolate,
136                                              const TypeName* data,
137                                              size_t length,
138                                              Local<Value>* error);
139 
140   Isolate* isolate_;
141   const TypeName* data_;
142   size_t length_;
143 };
144 
145 
146 typedef ExternString<String::ExternalOneByteStringResource,
147                      char> ExternOneByteString;
148 typedef ExternString<String::ExternalStringResource,
149                      uint16_t> ExternTwoByteString;
150 
151 
152 template <>
NewExternal(Isolate * isolate,ExternOneByteString * h_str)153 MaybeLocal<Value> ExternOneByteString::NewExternal(
154     Isolate* isolate, ExternOneByteString* h_str) {
155   return String::NewExternalOneByte(isolate, h_str).FromMaybe(Local<Value>());
156 }
157 
158 
159 template <>
NewExternal(Isolate * isolate,ExternTwoByteString * h_str)160 MaybeLocal<Value> ExternTwoByteString::NewExternal(
161     Isolate* isolate, ExternTwoByteString* h_str) {
162   return String::NewExternalTwoByte(isolate, h_str).FromMaybe(Local<Value>());
163 }
164 
165 template <>
NewSimpleFromCopy(Isolate * isolate,const char * data,size_t length,Local<Value> * error)166 MaybeLocal<Value> ExternOneByteString::NewSimpleFromCopy(Isolate* isolate,
167                                                          const char* data,
168                                                          size_t length,
169                                                          Local<Value>* error) {
170   MaybeLocal<String> str =
171       String::NewFromOneByte(isolate,
172                              reinterpret_cast<const uint8_t*>(data),
173                              v8::NewStringType::kNormal,
174                              length);
175   if (str.IsEmpty()) {
176     *error = node::ERR_STRING_TOO_LONG(isolate);
177     return MaybeLocal<Value>();
178   }
179   return str.ToLocalChecked();
180 }
181 
182 
183 template <>
NewSimpleFromCopy(Isolate * isolate,const uint16_t * data,size_t length,Local<Value> * error)184 MaybeLocal<Value> ExternTwoByteString::NewSimpleFromCopy(Isolate* isolate,
185                                                          const uint16_t* data,
186                                                          size_t length,
187                                                          Local<Value>* error) {
188   MaybeLocal<String> str =
189       String::NewFromTwoByte(isolate,
190                              data,
191                              v8::NewStringType::kNormal,
192                              length);
193   if (str.IsEmpty()) {
194     *error = node::ERR_STRING_TOO_LONG(isolate);
195     return MaybeLocal<Value>();
196   }
197   return str.ToLocalChecked();
198 }
199 
200 }  // anonymous namespace
201 
202 // supports regular and URL-safe base64
203 const int8_t unbase64_table[256] =
204   { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, -2, -1, -1,
205     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
206     -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63,
207     52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
208     -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
209     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
210     -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
211     41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
212     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
213     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
214     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
215     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
216     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
217     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
218     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
219     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
220   };
221 
222 
223 static const int8_t unhex_table[256] =
224   { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
225     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
226     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
227      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
228     -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
229     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
230     -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
231     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
232     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
233     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
234     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
235     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
236     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
237     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
238     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
239     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
240   };
241 
unhex(uint8_t x)242 static inline unsigned unhex(uint8_t x) {
243   return unhex_table[x];
244 }
245 
246 template <typename TypeName>
hex_decode(char * buf,size_t len,const TypeName * src,const size_t srcLen)247 static size_t hex_decode(char* buf,
248                          size_t len,
249                          const TypeName* src,
250                          const size_t srcLen) {
251   size_t i;
252   for (i = 0; i < len && i * 2 + 1 < srcLen; ++i) {
253     unsigned a = unhex(src[i * 2 + 0]);
254     unsigned b = unhex(src[i * 2 + 1]);
255     if (!~a || !~b)
256       return i;
257     buf[i] = (a << 4) | b;
258   }
259 
260   return i;
261 }
262 
WriteUCS2(Isolate * isolate,char * buf,size_t buflen,Local<String> str,int flags,size_t * chars_written)263 size_t StringBytes::WriteUCS2(Isolate* isolate,
264                               char* buf,
265                               size_t buflen,
266                               Local<String> str,
267                               int flags,
268                               size_t* chars_written) {
269   uint16_t* const dst = reinterpret_cast<uint16_t*>(buf);
270 
271   size_t max_chars = buflen / sizeof(*dst);
272   if (max_chars == 0) {
273     return 0;
274   }
275 
276   size_t nchars;
277   size_t alignment = reinterpret_cast<uintptr_t>(dst) % sizeof(*dst);
278   if (alignment == 0) {
279     nchars = str->Write(isolate, dst, 0, max_chars, flags);
280     *chars_written = nchars;
281     return nchars * sizeof(*dst);
282   }
283 
284   uint16_t* aligned_dst =
285       reinterpret_cast<uint16_t*>(buf + sizeof(*dst) - alignment);
286   CHECK_EQ(reinterpret_cast<uintptr_t>(aligned_dst) % sizeof(*dst), 0);
287 
288   // Write all but the last char
289   max_chars = std::min(max_chars, static_cast<size_t>(str->Length()));
290   if (max_chars == 0) return 0;
291   nchars = str->Write(isolate, aligned_dst, 0, max_chars - 1, flags);
292   CHECK_EQ(nchars, max_chars - 1);
293 
294   // Shift everything to unaligned-left
295   memmove(dst, aligned_dst, nchars * sizeof(*dst));
296 
297   // One more char to be written
298   uint16_t last;
299   CHECK_EQ(str->Write(isolate, &last, nchars, 1, flags), 1);
300   memcpy(buf + nchars * sizeof(*dst), &last, sizeof(last));
301   nchars++;
302 
303   *chars_written = nchars;
304   return nchars * sizeof(*dst);
305 }
306 
307 
Write(Isolate * isolate,char * buf,size_t buflen,Local<Value> val,enum encoding encoding,int * chars_written)308 size_t StringBytes::Write(Isolate* isolate,
309                           char* buf,
310                           size_t buflen,
311                           Local<Value> val,
312                           enum encoding encoding,
313                           int* chars_written) {
314   HandleScope scope(isolate);
315   size_t nbytes;
316   int nchars;
317 
318   if (chars_written == nullptr)
319     chars_written = &nchars;
320 
321   CHECK(val->IsString() == true);
322   Local<String> str = val.As<String>();
323 
324   int flags = String::HINT_MANY_WRITES_EXPECTED |
325               String::NO_NULL_TERMINATION |
326               String::REPLACE_INVALID_UTF8;
327 
328   switch (encoding) {
329     case ASCII:
330     case LATIN1:
331       if (str->IsExternalOneByte()) {
332         auto ext = str->GetExternalOneByteStringResource();
333         nbytes = std::min(buflen, ext->length());
334         memcpy(buf, ext->data(), nbytes);
335       } else {
336         uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
337         nbytes = str->WriteOneByte(isolate, dst, 0, buflen, flags);
338       }
339       *chars_written = nbytes;
340       break;
341 
342     case BUFFER:
343     case UTF8:
344       nbytes = str->WriteUtf8(isolate, buf, buflen, chars_written, flags);
345       break;
346 
347     case UCS2: {
348       size_t nchars;
349 
350       nbytes = WriteUCS2(isolate, buf, buflen, str, flags, &nchars);
351       *chars_written = static_cast<int>(nchars);
352 
353       // Node's "ucs2" encoding wants LE character data stored in
354       // the Buffer, so we need to reorder on BE platforms.  See
355       // https://nodejs.org/api/buffer.html regarding Node's "ucs2"
356       // encoding specification
357       if (IsBigEndian())
358         SwapBytes16(buf, nbytes);
359 
360       break;
361     }
362 
363     case BASE64:
364       if (str->IsExternalOneByte()) {
365         auto ext = str->GetExternalOneByteStringResource();
366         nbytes = base64_decode(buf, buflen, ext->data(), ext->length());
367       } else {
368         String::Value value(isolate, str);
369         nbytes = base64_decode(buf, buflen, *value, value.length());
370       }
371       *chars_written = nbytes;
372       break;
373 
374     case HEX:
375       if (str->IsExternalOneByte()) {
376         auto ext = str->GetExternalOneByteStringResource();
377         nbytes = hex_decode(buf, buflen, ext->data(), ext->length());
378       } else {
379         String::Value value(isolate, str);
380         nbytes = hex_decode(buf, buflen, *value, value.length());
381       }
382       *chars_written = nbytes;
383       break;
384 
385     default:
386       CHECK(0 && "unknown encoding");
387       break;
388   }
389 
390   return nbytes;
391 }
392 
393 
IsValidString(Local<String> string,enum encoding enc)394 bool StringBytes::IsValidString(Local<String> string,
395                                 enum encoding enc) {
396   if (enc == HEX && string->Length() % 2 != 0)
397     return false;
398   // TODO(bnoordhuis) Add BASE64 check?
399   return true;
400 }
401 
402 
403 // Quick and dirty size calculation
404 // Will always be at least big enough, but may have some extra
405 // UTF8 can be as much as 3x the size, Base64 can have 1-2 extra bytes
StorageSize(Isolate * isolate,Local<Value> val,enum encoding encoding)406 Maybe<size_t> StringBytes::StorageSize(Isolate* isolate,
407                                        Local<Value> val,
408                                        enum encoding encoding) {
409   HandleScope scope(isolate);
410   size_t data_size = 0;
411   bool is_buffer = Buffer::HasInstance(val);
412 
413   if (is_buffer && (encoding == BUFFER || encoding == LATIN1)) {
414     return Just(Buffer::Length(val));
415   }
416 
417   Local<String> str;
418   if (!val->ToString(isolate->GetCurrentContext()).ToLocal(&str))
419     return Nothing<size_t>();
420 
421   switch (encoding) {
422     case ASCII:
423     case LATIN1:
424       data_size = str->Length();
425       break;
426 
427     case BUFFER:
428     case UTF8:
429       // A single UCS2 codepoint never takes up more than 3 utf8 bytes.
430       // It is an exercise for the caller to decide when a string is
431       // long enough to justify calling Size() instead of StorageSize()
432       data_size = 3 * str->Length();
433       break;
434 
435     case UCS2:
436       data_size = str->Length() * sizeof(uint16_t);
437       break;
438 
439     case BASE64:
440       data_size = base64_decoded_size_fast(str->Length());
441       break;
442 
443     case HEX:
444       CHECK(str->Length() % 2 == 0 && "invalid hex string length");
445       data_size = str->Length() / 2;
446       break;
447 
448     default:
449       CHECK(0 && "unknown encoding");
450       break;
451   }
452 
453   return Just(data_size);
454 }
455 
Size(Isolate * isolate,Local<Value> val,enum encoding encoding)456 Maybe<size_t> StringBytes::Size(Isolate* isolate,
457                                 Local<Value> val,
458                                 enum encoding encoding) {
459   HandleScope scope(isolate);
460 
461   if (Buffer::HasInstance(val) && (encoding == BUFFER || encoding == LATIN1))
462     return Just(Buffer::Length(val));
463 
464   Local<String> str;
465   if (!val->ToString(isolate->GetCurrentContext()).ToLocal(&str))
466     return Nothing<size_t>();
467 
468   switch (encoding) {
469     case ASCII:
470     case LATIN1:
471       return Just<size_t>(str->Length());
472 
473     case BUFFER:
474     case UTF8:
475       return Just<size_t>(str->Utf8Length(isolate));
476 
477     case UCS2:
478       return Just(str->Length() * sizeof(uint16_t));
479 
480     case BASE64: {
481       String::Value value(isolate, str);
482       return Just(base64_decoded_size(*value, value.length()));
483     }
484 
485     case HEX:
486       return Just<size_t>(str->Length() / 2);
487   }
488 
489   UNREACHABLE();
490 }
491 
492 
493 
494 
contains_non_ascii_slow(const char * buf,size_t len)495 static bool contains_non_ascii_slow(const char* buf, size_t len) {
496   for (size_t i = 0; i < len; ++i) {
497     if (buf[i] & 0x80)
498       return true;
499   }
500   return false;
501 }
502 
503 
contains_non_ascii(const char * src,size_t len)504 static bool contains_non_ascii(const char* src, size_t len) {
505   if (len < 16) {
506     return contains_non_ascii_slow(src, len);
507   }
508 
509   const unsigned bytes_per_word = sizeof(uintptr_t);
510   const unsigned align_mask = bytes_per_word - 1;
511   const unsigned unaligned = reinterpret_cast<uintptr_t>(src) & align_mask;
512 
513   if (unaligned > 0) {
514     const unsigned n = bytes_per_word - unaligned;
515     if (contains_non_ascii_slow(src, n))
516       return true;
517     src += n;
518     len -= n;
519   }
520 
521 
522 #if defined(_WIN64) || defined(_LP64)
523   const uintptr_t mask = 0x8080808080808080ll;
524 #else
525   const uintptr_t mask = 0x80808080l;
526 #endif
527 
528   const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
529 
530   for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
531     if (srcw[i] & mask)
532       return true;
533   }
534 
535   const unsigned remainder = len & align_mask;
536   if (remainder > 0) {
537     const size_t offset = len - remainder;
538     if (contains_non_ascii_slow(src + offset, remainder))
539       return true;
540   }
541 
542   return false;
543 }
544 
545 
force_ascii_slow(const char * src,char * dst,size_t len)546 static void force_ascii_slow(const char* src, char* dst, size_t len) {
547   for (size_t i = 0; i < len; ++i) {
548     dst[i] = src[i] & 0x7f;
549   }
550 }
551 
552 
force_ascii(const char * src,char * dst,size_t len)553 static void force_ascii(const char* src, char* dst, size_t len) {
554   if (len < 16) {
555     force_ascii_slow(src, dst, len);
556     return;
557   }
558 
559   const unsigned bytes_per_word = sizeof(uintptr_t);
560   const unsigned align_mask = bytes_per_word - 1;
561   const unsigned src_unalign = reinterpret_cast<uintptr_t>(src) & align_mask;
562   const unsigned dst_unalign = reinterpret_cast<uintptr_t>(dst) & align_mask;
563 
564   if (src_unalign > 0) {
565     if (src_unalign == dst_unalign) {
566       const unsigned unalign = bytes_per_word - src_unalign;
567       force_ascii_slow(src, dst, unalign);
568       src += unalign;
569       dst += unalign;
570       len -= src_unalign;
571     } else {
572       force_ascii_slow(src, dst, len);
573       return;
574     }
575   }
576 
577 #if defined(_WIN64) || defined(_LP64)
578   const uintptr_t mask = ~0x8080808080808080ll;
579 #else
580   const uintptr_t mask = ~0x80808080l;
581 #endif
582 
583   const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
584   uintptr_t* dstw = reinterpret_cast<uintptr_t*>(dst);
585 
586   for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
587     dstw[i] = srcw[i] & mask;
588   }
589 
590   const unsigned remainder = len & align_mask;
591   if (remainder > 0) {
592     const size_t offset = len - remainder;
593     force_ascii_slow(src + offset, dst + offset, remainder);
594   }
595 }
596 
597 
hex_encode(const char * src,size_t slen,char * dst,size_t dlen)598 size_t StringBytes::hex_encode(
599     const char* src,
600     size_t slen,
601     char* dst,
602     size_t dlen) {
603   // We know how much we'll write, just make sure that there's space.
604   CHECK(dlen >= slen * 2 &&
605       "not enough space provided for hex encode");
606 
607   dlen = slen * 2;
608   for (uint32_t i = 0, k = 0; k < dlen; i += 1, k += 2) {
609     static const char hex[] = "0123456789abcdef";
610     uint8_t val = static_cast<uint8_t>(src[i]);
611     dst[k + 0] = hex[val >> 4];
612     dst[k + 1] = hex[val & 15];
613   }
614 
615   return dlen;
616 }
617 
hex_encode(const char * src,size_t slen)618 std::string StringBytes::hex_encode(const char* src, size_t slen) {
619   size_t dlen = slen * 2;
620   std::string dst(dlen, '\0');
621   hex_encode(src, slen, &dst[0], dlen);
622   return dst;
623 }
624 
625 #define CHECK_BUFLEN_IN_RANGE(len)                                    \
626   do {                                                                \
627     if ((len) > Buffer::kMaxLength) {                                 \
628       *error = node::ERR_BUFFER_TOO_LARGE(isolate);                   \
629       return MaybeLocal<Value>();                                     \
630     }                                                                 \
631   } while (0)
632 
633 
Encode(Isolate * isolate,const char * buf,size_t buflen,enum encoding encoding,Local<Value> * error)634 MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
635                                       const char* buf,
636                                       size_t buflen,
637                                       enum encoding encoding,
638                                       Local<Value>* error) {
639   CHECK_BUFLEN_IN_RANGE(buflen);
640 
641   if (!buflen && encoding != BUFFER) {
642     return String::Empty(isolate);
643   }
644 
645   MaybeLocal<String> val;
646 
647   switch (encoding) {
648     case BUFFER:
649       {
650         if (buflen > node::Buffer::kMaxLength) {
651           *error = node::ERR_BUFFER_TOO_LARGE(isolate);
652           return MaybeLocal<Value>();
653         }
654         auto maybe_buf = Buffer::Copy(isolate, buf, buflen);
655         Local<v8::Object> buf;
656         if (!maybe_buf.ToLocal(&buf)) {
657           *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
658         }
659         return buf;
660       }
661 
662     case ASCII:
663       if (contains_non_ascii(buf, buflen)) {
664         char* out = node::UncheckedMalloc(buflen);
665         if (out == nullptr) {
666           *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
667           return MaybeLocal<Value>();
668         }
669         force_ascii(buf, out, buflen);
670         return ExternOneByteString::New(isolate, out, buflen, error);
671       } else {
672         return ExternOneByteString::NewFromCopy(isolate, buf, buflen, error);
673       }
674 
675     case UTF8:
676       {
677         val = String::NewFromUtf8(isolate,
678                                   buf,
679                                   v8::NewStringType::kNormal,
680                                   buflen);
681         Local<String> str;
682         if (!val.ToLocal(&str)) {
683           *error = node::ERR_STRING_TOO_LONG(isolate);
684         }
685         return str;
686       }
687 
688     case LATIN1:
689       return ExternOneByteString::NewFromCopy(isolate, buf, buflen, error);
690 
691     case BASE64: {
692       size_t dlen = base64_encoded_size(buflen);
693       char* dst = node::UncheckedMalloc(dlen);
694       if (dst == nullptr) {
695         *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
696         return MaybeLocal<Value>();
697       }
698 
699       size_t written = base64_encode(buf, buflen, dst, dlen);
700       CHECK_EQ(written, dlen);
701 
702       return ExternOneByteString::New(isolate, dst, dlen, error);
703     }
704 
705     case HEX: {
706       size_t dlen = buflen * 2;
707       char* dst = node::UncheckedMalloc(dlen);
708       if (dst == nullptr) {
709         *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
710         return MaybeLocal<Value>();
711       }
712       size_t written = hex_encode(buf, buflen, dst, dlen);
713       CHECK_EQ(written, dlen);
714 
715       return ExternOneByteString::New(isolate, dst, dlen, error);
716     }
717 
718     case UCS2: {
719       if (IsBigEndian()) {
720         uint16_t* dst = node::UncheckedMalloc<uint16_t>(buflen / 2);
721         if (dst == nullptr) {
722           *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
723           return MaybeLocal<Value>();
724         }
725         for (size_t i = 0, k = 0; k < buflen / 2; i += 2, k += 1) {
726           // The input is in *little endian*, because that's what Node.js
727           // expects, so the high byte comes after the low byte.
728           const uint8_t hi = static_cast<uint8_t>(buf[i + 1]);
729           const uint8_t lo = static_cast<uint8_t>(buf[i + 0]);
730           dst[k] = static_cast<uint16_t>(hi) << 8 | lo;
731         }
732         return ExternTwoByteString::New(isolate, dst, buflen / 2, error);
733       }
734       if (reinterpret_cast<uintptr_t>(buf) % 2 != 0) {
735         // Unaligned data still means we can't directly pass it to V8.
736         char* dst = node::UncheckedMalloc(buflen);
737         if (dst == nullptr) {
738           *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
739           return MaybeLocal<Value>();
740         }
741         memcpy(dst, buf, buflen);
742         return ExternTwoByteString::New(
743             isolate, reinterpret_cast<uint16_t*>(dst), buflen / 2, error);
744       }
745       return ExternTwoByteString::NewFromCopy(
746           isolate, reinterpret_cast<const uint16_t*>(buf), buflen / 2, error);
747     }
748 
749     default:
750       CHECK(0 && "unknown encoding");
751       break;
752   }
753 
754   UNREACHABLE();
755 }
756 
757 
Encode(Isolate * isolate,const uint16_t * buf,size_t buflen,Local<Value> * error)758 MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
759                                       const uint16_t* buf,
760                                       size_t buflen,
761                                       Local<Value>* error) {
762   CHECK_BUFLEN_IN_RANGE(buflen);
763 
764   // Node's "ucs2" encoding expects LE character data inside a
765   // Buffer, so we need to reorder on BE platforms.  See
766   // https://nodejs.org/api/buffer.html regarding Node's "ucs2"
767   // encoding specification
768   if (IsBigEndian()) {
769     uint16_t* dst = node::UncheckedMalloc<uint16_t>(buflen);
770     if (dst == nullptr) {
771       *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
772       return MaybeLocal<Value>();
773     }
774     size_t nbytes = buflen * sizeof(uint16_t);
775     memcpy(dst, buf, nbytes);
776     SwapBytes16(reinterpret_cast<char*>(dst), nbytes);
777     return ExternTwoByteString::New(isolate, dst, buflen, error);
778   } else {
779     return ExternTwoByteString::NewFromCopy(isolate, buf, buflen, error);
780   }
781 }
782 
Encode(Isolate * isolate,const char * buf,enum encoding encoding,Local<Value> * error)783 MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
784                                       const char* buf,
785                                       enum encoding encoding,
786                                       Local<Value>* error) {
787   const size_t len = strlen(buf);
788   return Encode(isolate, buf, len, encoding, error);
789 }
790 
791 }  // namespace node
792