1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_CORE_PLATFORM_TSTRING_H_
17 #define TENSORFLOW_CORE_PLATFORM_TSTRING_H_
18
19 #include <assert.h>
20
21 #include <ostream>
22 #include <string>
23
24 #include "tensorflow/core/platform/cord.h"
25 #include "tensorflow/core/platform/ctstring.h"
26
27 // TODO(dero): This include is temporary, and will be superfluous once
28 // absl::string_view is aliased to std::string_view.
29 namespace absl {
30 #ifdef ABSL_NAMESPACE_BEGIN
31 ABSL_NAMESPACE_BEGIN
32 #endif // ABSL_NAMESPACE_BEGIN
33 class AlphaNum;
34 #ifdef ABSL_NAMESPACE_END
35 ABSL_NAMESPACE_END
36 #endif // ABSL_NAMESPACE_END
37 } // namespace absl
38
39 namespace tensorflow {
40
41 // tensorflow::tstring is the scalar type for DT_STRING tensors.
42 //
43 // tstrings are meant to be used when interfacing with string tensors, and
44 // should not be considered as a general replacement for std::string in
45 // tensorflow. The primary purpose of tstring is to provide a unified and
46 // stable ABI for string tensors across TF Core/C-API/Lite/etc---mitigating
47 // unnecessary conversions across language boundaries, and allowing for compiler
48 // agnostic interoperability across dynamically loaded modules.
49 //
50 // In addition to ABI stability, tstrings features two string subtypes, VIEW and
51 // OFFSET.
52 //
53 // VIEW tstrings are views into unowned character buffers; they can be used to
54 // pass around existing character strings without incurring a per object heap
55 // allocation. Note that, like std::string_view, it is the user's
56 // responsibility to ensure that the underlying buffer of a VIEW tstring exceeds
57 // the lifetime of the associated tstring object.
58 //
59 // TODO(dero): Methods for creating OFFSET tensors are not currently
60 // implemented.
61 //
62 // OFFSET tstrings are platform independent offset defined strings which can be
63 // directly mmaped or copied into a tensor buffer without the need for
64 // deserialization or processing. For security reasons, it is imperative that
65 // OFFSET based string tensors are validated before use, or are from a trusted
66 // source.
67 //
68 // Underlying VIEW and OFFSET buffers are considered immutable, so l-value
69 // assignment, mutation, or non-const access to data() of tstrings will result
70 // in the conversion to an owned SMALL/LARGE type.
71 //
72 // The interface for tstring largely overlaps with std::string. Except where
73 // noted, expect equivalent semantics with synonymous std::string methods.
74 class tstring {
75 TF_TString tstr_;
76
77 public:
78 enum Type {
79 // See cstring.h
80 SMALL = TF_TSTR_SMALL,
81 LARGE = TF_TSTR_LARGE,
82 OFFSET = TF_TSTR_OFFSET,
83 VIEW = TF_TSTR_VIEW,
84 };
85
86 // Assignment to a tstring object with a tstring::view type will create a VIEW
87 // type tstring.
88 class view {
89 const char* data_;
90 size_t size_;
91
92 public:
view(const char * data,size_t size)93 explicit view(const char* data, size_t size) : data_(data), size_(size) {}
view(const char * data)94 explicit view(const char* data) : data_(data), size_(::strlen(data)) {}
95
data()96 const char* data() const { return data_; }
97
size()98 size_t size() const { return size_; }
99
100 view() = delete;
101 view(const view&) = delete;
102 view& operator=(const view&) = delete;
103 };
104
105 typedef const char* const_iterator;
106
107 // Ctor
108 tstring();
109 tstring(const std::string& str); // NOLINT TODO(b/147740521): Make explicit.
110 tstring(const char* str, size_t len);
111 tstring(const char* str); // NOLINT TODO(b/147740521): Make explicit.
112 tstring(size_t n, char c);
113 explicit tstring(const std::string_view str);
114 #ifdef PLATFORM_GOOGLE
115 explicit tstring(const std::Cord& cord);
116 #endif // PLATFORM_GOOGLE
117
118 // Copy
119 tstring(const tstring& str);
120
121 // Move
122 tstring(tstring&& str) noexcept;
123
124 // Dtor
125 ~tstring();
126
127 // Copy Assignment
128 tstring& operator=(const tstring& str);
129 tstring& operator=(const std::string& str);
130 tstring& operator=(const char* str);
131 tstring& operator=(char ch);
132 tstring& operator=(const std::string_view str);
133 #ifdef PLATFORM_GOOGLE
134 tstring& operator=(const std::Cord& cord);
135 #endif // PLATFORM_GOOGLE
136
137 // View Assignment
138 tstring& operator=(const view& tsv);
139
140 // Move Assignment
141 tstring& operator=(tstring&& str);
142
143 // Comparison
144 int compare(const char* str, size_t len) const;
145 bool operator<(const tstring& o) const;
146 bool operator>(const tstring& o) const;
147 bool operator==(const char* str) const;
148 bool operator==(const tstring& o) const;
149 bool operator!=(const char* str) const;
150 bool operator!=(const tstring& o) const;
151
152 // Conversion Operators
153 // TODO(b/147740521): Make explicit.
154 operator std::string() const; // NOLINT
155 // TODO(b/147740521): Make explicit.
156 operator std::string_view() const; // NOLINT
157 #ifdef PLATFORM_GOOGLE
158 template <typename T,
159 typename std::enable_if<std::is_same<T, std::AlphaNum>::value,
160 T>::type* = nullptr>
161 operator T() const; // NOLINT TODO(b/147740521): Remove.
162 #endif // PLATFORM_GOOGLE
163
164 // Attributes
165 size_t size() const;
166 size_t length() const;
167 size_t capacity() const;
168 bool empty() const;
169 Type type() const;
170
171 // Allocation
172 void resize(size_t new_size, char c = 0);
173 // Similar to resize, but will leave the newly grown region uninitialized.
174 void resize_uninitialized(size_t new_size);
175 void clear() noexcept;
176 void reserve(size_t n);
177
178 // Iterators
179 const_iterator begin() const;
180 const_iterator end() const;
181
182 // Const Element Access
183 const char* c_str() const;
184 const char* data() const;
185 const char& operator[](size_t i) const;
186 const char& back() const;
187
188 // Mutable Element Access
189 // NOTE: For VIEW/OFFSET types, calling these methods will result in the
190 // conversion to a SMALL or heap allocated LARGE type. As a result,
191 // previously obtained pointers, references, or iterators to the underlying
192 // buffer will point to the original VIEW/OFFSET and not the new allocation.
193 char* mdata();
194 char* data(); // DEPRECATED: Use mdata().
195 char& operator[](size_t i);
196
197 // Assignment
198 tstring& assign(const char* str, size_t len);
199 tstring& assign(const char* str);
200
201 // View Assignment
202 tstring& assign_as_view(const tstring& str);
203 tstring& assign_as_view(const std::string& str);
204 tstring& assign_as_view(const std::string_view str);
205 tstring& assign_as_view(const char* str, size_t len);
206 tstring& assign_as_view(const char* str);
207
208 // Modifiers
209 // NOTE: Invalid input will result in undefined behavior.
210 tstring& append(const tstring& str);
211 tstring& append(const char* str, size_t len);
212 tstring& append(const char* str);
213 tstring& append(size_t n, char c);
214
215 tstring& erase(size_t pos, size_t len);
216
217 tstring& insert(size_t pos, const tstring& str, size_t subpos, size_t sublen);
218 tstring& insert(size_t pos, size_t n, char c);
219 void swap(tstring& str);
220 void push_back(char ch);
221
222 // Friends
223 friend bool operator==(const char* a, const tstring& b);
224 friend bool operator==(const std::string& a, const tstring& b);
225 friend tstring operator+(const tstring& a, const tstring& b);
226 friend std::ostream& operator<<(std::ostream& o, const tstring& str);
227 friend std::hash<tstring>;
228 };
229
230 // Non-member function overloads
231
232 bool operator==(const char* a, const tstring& b);
233 bool operator==(const std::string& a, const tstring& b);
234 tstring operator+(const tstring& a, const tstring& b);
235 std::ostream& operator<<(std::ostream& o, const tstring& str);
236
237 // Implementations
238
239 // Ctor
240
tstring()241 inline tstring::tstring() { TF_TString_Init(&tstr_); }
242
tstring(const char * str,size_t len)243 inline tstring::tstring(const char* str, size_t len) {
244 TF_TString_Init(&tstr_);
245 TF_TString_Copy(&tstr_, str, len);
246 }
247
tstring(const char * str)248 inline tstring::tstring(const char* str) : tstring(str, ::strlen(str)) {}
249
tstring(size_t n,char c)250 inline tstring::tstring(size_t n, char c) {
251 TF_TString_Init(&tstr_);
252 TF_TString_Resize(&tstr_, n, c);
253 }
254
tstring(const std::string & str)255 inline tstring::tstring(const std::string& str)
256 : tstring(str.data(), str.size()) {}
257
tstring(const std::string_view str)258 inline tstring::tstring(const std::string_view str)
259 : tstring(str.data(), str.size()) {}
260
261 #ifdef PLATFORM_GOOGLE
tstring(const std::Cord & cord)262 inline tstring::tstring(const std::Cord& cord) {
263 TF_TString_Init(&tstr_);
264 TF_TString_ResizeUninitialized(&tstr_, cord.size());
265
266 cord.CopyToArray(data());
267 }
268 #endif // PLATFORM_GOOGLE
269
270 // Copy
271
tstring(const tstring & str)272 inline tstring::tstring(const tstring& str) {
273 TF_TString_Init(&tstr_);
274 TF_TString_Assign(&tstr_, &str.tstr_);
275 }
276
277 // Move
278
tstring(tstring && str)279 inline tstring::tstring(tstring&& str) noexcept {
280 TF_TString_Init(&tstr_);
281 TF_TString_Move(&tstr_, &str.tstr_);
282 }
283
284 // Dtor
285
~tstring()286 inline tstring::~tstring() { TF_TString_Dealloc(&tstr_); }
287
288 // Copy Assignment
289
290 inline tstring& tstring::operator=(const tstring& str) {
291 TF_TString_Assign(&tstr_, &str.tstr_);
292
293 return *this;
294 }
295
296 inline tstring& tstring::operator=(const std::string& str) {
297 TF_TString_Copy(&tstr_, str.data(), str.size());
298 return *this;
299 }
300
301 inline tstring& tstring::operator=(const char* str) {
302 TF_TString_Copy(&tstr_, str, ::strlen(str));
303
304 return *this;
305 }
306
307 inline tstring& tstring::operator=(char c) {
308 resize_uninitialized(1);
309 (*this)[0] = c;
310
311 return *this;
312 }
313
314 inline tstring& tstring::operator=(const std::string_view str) {
315 TF_TString_Copy(&tstr_, str.data(), str.size());
316
317 return *this;
318 }
319
320 #ifdef PLATFORM_GOOGLE
321 inline tstring& tstring::operator=(const std::Cord& cord) {
322 TF_TString_ResizeUninitialized(&tstr_, cord.size());
323
324 cord.CopyToArray(data());
325
326 return *this;
327 }
328 #endif // PLATFORM_GOOGLE
329
330 // View Assignment
331
332 inline tstring& tstring::operator=(const tstring::view& tsv) {
333 assign_as_view(tsv.data(), tsv.size());
334
335 return *this;
336 }
337
338 // Move Assignment
339
340 inline tstring& tstring::operator=(tstring&& str) {
341 TF_TString_Move(&tstr_, &str.tstr_);
342
343 return *this;
344 }
345
346 // Comparison
347
compare(const char * str,size_t len)348 inline int tstring::compare(const char* str, size_t len) const {
349 int ret = ::memcmp(data(), str, std::min(len, size()));
350
351 if (ret < 0) return -1;
352 if (ret > 0) return +1;
353
354 if (size() < len) return -1;
355 if (size() > len) return +1;
356
357 return 0;
358 }
359
360 inline bool tstring::operator<(const tstring& o) const {
361 return compare(o.data(), o.size()) < 0;
362 }
363
364 inline bool tstring::operator>(const tstring& o) const {
365 return compare(o.data(), o.size()) > 0;
366 }
367
368 inline bool tstring::operator==(const char* str) const {
369 return ::strlen(str) == size() && ::memcmp(data(), str, size()) == 0;
370 }
371
372 inline bool tstring::operator==(const tstring& o) const {
373 return o.size() == size() && ::memcmp(data(), o.data(), size()) == 0;
374 }
375
376 inline bool tstring::operator!=(const char* str) const {
377 return !(*this == str);
378 }
379
380 inline bool tstring::operator!=(const tstring& o) const {
381 return !(*this == o);
382 }
383
384 // Conversion Operators
385
string()386 inline tstring::operator std::string() const {
387 return std::string(data(), size());
388 }
389
string_view()390 inline tstring::operator std::string_view() const {
391 return std::string_view(data(), size());
392 }
393
394 #ifdef PLATFORM_GOOGLE
395 template <typename T, typename std::enable_if<
396 std::is_same<T, std::AlphaNum>::value, T>::type*>
T()397 inline tstring::operator T() const {
398 return T(std::string_view(*this));
399 }
400 #endif // PLATFORM_GOOGLE
401
402 // Attributes
403
size()404 inline size_t tstring::size() const { return TF_TString_GetSize(&tstr_); }
405
length()406 inline size_t tstring::length() const { return size(); }
407
capacity()408 inline size_t tstring::capacity() const {
409 return TF_TString_GetCapacity(&tstr_);
410 }
411
empty()412 inline bool tstring::empty() const { return size() == 0; }
413
type()414 inline tstring::Type tstring::type() const {
415 return static_cast<tstring::Type>(TF_TString_GetType(&tstr_));
416 }
417
418 // Allocation
419
resize(size_t new_size,char c)420 inline void tstring::resize(size_t new_size, char c) {
421 TF_TString_Resize(&tstr_, new_size, c);
422 }
423
resize_uninitialized(size_t new_size)424 inline void tstring::resize_uninitialized(size_t new_size) {
425 TF_TString_ResizeUninitialized(&tstr_, new_size);
426 }
427
clear()428 inline void tstring::clear() noexcept {
429 TF_TString_ResizeUninitialized(&tstr_, 0);
430 }
431
reserve(size_t n)432 inline void tstring::reserve(size_t n) { TF_TString_Reserve(&tstr_, n); }
433
434 // Iterators
435
begin()436 inline tstring::const_iterator tstring::begin() const { return &(*this)[0]; }
end()437 inline tstring::const_iterator tstring::end() const { return &(*this)[size()]; }
438
439 // Element Access
440
c_str()441 inline const char* tstring::c_str() const { return data(); }
442
data()443 inline const char* tstring::data() const {
444 return TF_TString_GetDataPointer(&tstr_);
445 }
446
447 inline const char& tstring::operator[](size_t i) const { return data()[i]; }
448
back()449 inline const char& tstring::back() const { return (*this)[size() - 1]; }
450
mdata()451 inline char* tstring::mdata() {
452 return TF_TString_GetMutableDataPointer(&tstr_);
453 }
454
data()455 inline char* tstring::data() {
456 // Deprecated
457 return mdata();
458 }
459
460 inline char& tstring::operator[](size_t i) { return mdata()[i]; }
461
462 // Assignment
463
assign(const char * str,size_t len)464 inline tstring& tstring::assign(const char* str, size_t len) {
465 TF_TString_Copy(&tstr_, str, len);
466
467 return *this;
468 }
469
assign(const char * str)470 inline tstring& tstring::assign(const char* str) {
471 assign(str, ::strlen(str));
472
473 return *this;
474 }
475
476 // View Assignment
477
assign_as_view(const tstring & str)478 inline tstring& tstring::assign_as_view(const tstring& str) {
479 assign_as_view(str.data(), str.size());
480
481 return *this;
482 }
483
assign_as_view(const std::string & str)484 inline tstring& tstring::assign_as_view(const std::string& str) {
485 assign_as_view(str.data(), str.size());
486
487 return *this;
488 }
489
assign_as_view(const std::string_view str)490 inline tstring& tstring::assign_as_view(const std::string_view str) {
491 assign_as_view(str.data(), str.size());
492
493 return *this;
494 }
495
assign_as_view(const char * str,size_t len)496 inline tstring& tstring::assign_as_view(const char* str, size_t len) {
497 TF_TString_AssignView(&tstr_, str, len);
498
499 return *this;
500 }
501
assign_as_view(const char * str)502 inline tstring& tstring::assign_as_view(const char* str) {
503 assign_as_view(str, ::strlen(str));
504
505 return *this;
506 }
507
508 // Modifiers
509
append(const tstring & str)510 inline tstring& tstring::append(const tstring& str) {
511 TF_TString_Append(&tstr_, &str.tstr_);
512
513 return *this;
514 }
515
append(const char * str,size_t len)516 inline tstring& tstring::append(const char* str, size_t len) {
517 TF_TString_AppendN(&tstr_, str, len);
518
519 return *this;
520 }
521
append(const char * str)522 inline tstring& tstring::append(const char* str) {
523 append(str, ::strlen(str));
524
525 return *this;
526 }
527
append(size_t n,char c)528 inline tstring& tstring::append(size_t n, char c) {
529 resize(size() + n, c);
530
531 return *this;
532 }
533
erase(size_t pos,size_t len)534 inline tstring& tstring::erase(size_t pos, size_t len) {
535 memmove(mdata() + pos, data() + pos + len, size() - len - pos);
536
537 resize(size() - len);
538
539 return *this;
540 }
541
insert(size_t pos,const tstring & str,size_t subpos,size_t sublen)542 inline tstring& tstring::insert(size_t pos, const tstring& str, size_t subpos,
543 size_t sublen) {
544 size_t orig_size = size();
545 TF_TString_ResizeUninitialized(&tstr_, orig_size + sublen);
546
547 memmove(mdata() + pos + sublen, data() + pos, orig_size - pos);
548 memmove(mdata() + pos, str.data() + subpos, sublen);
549
550 return *this;
551 }
552
insert(size_t pos,size_t n,char c)553 inline tstring& tstring::insert(size_t pos, size_t n, char c) {
554 size_t size_ = size();
555 TF_TString_ResizeUninitialized(&tstr_, size_ + n);
556
557 memmove(mdata() + pos + n, data() + pos, size_ - pos);
558 memset(mdata() + pos, c, n);
559
560 return *this;
561 }
562
swap(tstring & str)563 inline void tstring::swap(tstring& str) {
564 // TODO(dero): Invalid for OFFSET (unimplemented).
565 std::swap(tstr_, str.tstr_);
566 }
567
push_back(char ch)568 inline void tstring::push_back(char ch) { append(1, ch); }
569
570 // Friends
571
572 inline bool operator==(const char* a, const tstring& b) {
573 return ::strlen(a) == b.size() && ::memcmp(a, b.data(), b.size()) == 0;
574 }
575
576 inline bool operator==(const std::string& a, const tstring& b) {
577 return a.size() == b.size() && ::memcmp(a.data(), b.data(), b.size()) == 0;
578 }
579
580 inline tstring operator+(const tstring& a, const tstring& b) {
581 tstring r;
582 r.reserve(a.size() + b.size());
583 r.append(a);
584 r.append(b);
585
586 return r;
587 }
588
589 inline std::ostream& operator<<(std::ostream& o, const tstring& str) {
590 return o.write(str.data(), str.size());
591 }
592
593 } // namespace tensorflow
594
595 #endif // TENSORFLOW_CORE_PLATFORM_TSTRING_H_
596