• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PLATFORM_STR_UTIL_H_
17 #define TENSORFLOW_CORE_PLATFORM_STR_UTIL_H_
18 
19 #include <string>
20 #include <vector>
21 
22 #include "absl/strings/str_join.h"
23 #include "absl/strings/str_split.h"
24 #include "tensorflow/core/platform/macros.h"
25 #include "tensorflow/core/platform/stringpiece.h"
26 #include "tensorflow/core/platform/types.h"
27 
28 // Basic string utility routines
29 namespace tensorflow {
30 namespace str_util {
31 
32 // Returns a version of 'src' where unprintable characters have been
33 // escaped using C-style escape sequences.
34 string CEscape(StringPiece src);
35 
36 // Copies "source" to "dest", rewriting C-style escape sequences --
37 // '\n', '\r', '\\', '\ooo', etc -- to their ASCII equivalents.
38 //
39 // Errors: Sets the description of the first encountered error in
40 // 'error'. To disable error reporting, set 'error' to NULL.
41 //
42 // NOTE: Does not support \u or \U!
43 bool CUnescape(StringPiece source, string* dest, string* error);
44 
45 // Removes any trailing whitespace from "*s".
46 void StripTrailingWhitespace(string* s);
47 
48 // Removes leading ascii_isspace() characters.
49 // Returns number of characters removed.
50 size_t RemoveLeadingWhitespace(StringPiece* text);
51 
52 // Removes trailing ascii_isspace() characters.
53 // Returns number of characters removed.
54 size_t RemoveTrailingWhitespace(StringPiece* text);
55 
56 // Removes leading and trailing ascii_isspace() chars.
57 // Returns number of chars removed.
58 size_t RemoveWhitespaceContext(StringPiece* text);
59 
60 // Consume a leading positive integer value.  If any digits were
61 // found, store the value of the leading unsigned number in "*val",
62 // advance "*s" past the consumed number, and return true.  If
63 // overflow occurred, returns false.  Otherwise, returns false.
64 bool ConsumeLeadingDigits(StringPiece* s, uint64* val);
65 
66 // Consume a leading token composed of non-whitespace characters only.
67 // If *s starts with a non-zero number of non-whitespace characters, store
68 // them in *val, advance *s past them, and return true.  Else return false.
69 bool ConsumeNonWhitespace(StringPiece* s, StringPiece* val);
70 
71 // If "*s" starts with "expected", consume it and return true.
72 // Otherwise, return false.
73 bool ConsumePrefix(StringPiece* s, StringPiece expected);
74 
75 // If "*s" ends with "expected", remove it and return true.
76 // Otherwise, return false.
77 bool ConsumeSuffix(StringPiece* s, StringPiece expected);
78 
79 // If "s" starts with "expected", return a view into "s" after "expected" but
80 // keep "s" unchanged.
81 // Otherwise, return the original "s".
82 TF_MUST_USE_RESULT StringPiece StripPrefix(StringPiece s, StringPiece expected);
83 
84 // If "s" ends with "expected", return a view into "s" until "expected" but
85 // keep "s" unchanged.
86 // Otherwise, return the original "s".
87 TF_MUST_USE_RESULT StringPiece StripSuffix(StringPiece s, StringPiece expected);
88 
89 // Return lower-cased version of s.
90 string Lowercase(StringPiece s);
91 
92 // Return upper-cased version of s.
93 string Uppercase(StringPiece s);
94 
95 // Capitalize first character of each word in "*s".  "delimiters" is a
96 // set of characters that can be used as word boundaries.
97 void TitlecaseString(string* s, StringPiece delimiters);
98 
99 // Replaces the first occurrence (if replace_all is false) or all occurrences
100 // (if replace_all is true) of oldsub in s with newsub.
101 string StringReplace(StringPiece s, StringPiece oldsub, StringPiece newsub,
102                      bool replace_all);
103 
104 // Join functionality
105 template <typename T>
Join(const T & s,const char * sep)106 string Join(const T& s, const char* sep) {
107   return absl::StrJoin(s, sep);
108 }
109 
110 // A variant of Join where for each element of "s", f(&dest_string, elem)
111 // is invoked (f is often constructed with a lambda of the form:
112 //   [](string* result, ElemType elem)
113 template <typename T, typename Formatter>
Join(const T & s,const char * sep,Formatter f)114 string Join(const T& s, const char* sep, Formatter f) {
115   return absl::StrJoin(s, sep, f);
116 }
117 
118 struct AllowEmpty {
operatorAllowEmpty119   bool operator()(StringPiece sp) const { return true; }
120 };
121 struct SkipEmpty {
operatorSkipEmpty122   bool operator()(StringPiece sp) const { return !sp.empty(); }
123 };
124 struct SkipWhitespace {
operatorSkipWhitespace125   bool operator()(StringPiece sp) const {
126     return !absl::StripTrailingAsciiWhitespace(sp).empty();
127   }
128 };
129 
130 // Split strings using any of the supplied delimiters. For example:
131 // Split("a,b.c,d", ".,") would return {"a", "b", "c", "d"}.
Split(StringPiece text,StringPiece delims)132 inline std::vector<string> Split(StringPiece text, StringPiece delims) {
133   return text.empty() ? std::vector<string>()
134                       : absl::StrSplit(text, absl::ByAnyChar(delims));
135 }
136 
137 template <typename Predicate>
Split(StringPiece text,StringPiece delims,Predicate p)138 std::vector<string> Split(StringPiece text, StringPiece delims, Predicate p) {
139   return text.empty() ? std::vector<string>()
140                       : absl::StrSplit(text, absl::ByAnyChar(delims), p);
141 }
142 
Split(StringPiece text,char delim)143 inline std::vector<string> Split(StringPiece text, char delim) {
144   return text.empty() ? std::vector<string>() : absl::StrSplit(text, delim);
145 }
146 
147 template <typename Predicate>
Split(StringPiece text,char delim,Predicate p)148 std::vector<string> Split(StringPiece text, char delim, Predicate p) {
149   return text.empty() ? std::vector<string>() : absl::StrSplit(text, delim, p);
150 }
151 
152 // StartsWith()
153 //
154 // Returns whether a given string `text` begins with `prefix`.
155 bool StartsWith(StringPiece text, StringPiece prefix);
156 
157 // EndsWith()
158 //
159 // Returns whether a given string `text` ends with `suffix`.
160 bool EndsWith(StringPiece text, StringPiece suffix);
161 
162 // StrContains()
163 //
164 // Returns whether a given string `haystack` contains the substring `needle`.
165 bool StrContains(StringPiece haystack, StringPiece needle);
166 
167 // Returns the length of the given null-terminated byte string 'str'.
168 // Returns 'string_max_len' if the null character was not found in the first
169 // 'string_max_len' bytes of 'str'.
170 size_t Strnlen(const char* str, const size_t string_max_len);
171 
172 //   ----- NON STANDARD, TF SPECIFIC METHOD -----
173 // Converts "^2ILoveYou!" to "i_love_you_". More specifically:
174 // - converts all non-alphanumeric characters to underscores
175 // - replaces each occurrence of a capital letter (except the very
176 //   first character and if there is already an '_' before it) with '_'
177 //   followed by this letter in lower case
178 // - Skips leading non-alpha characters
179 // This method is useful for producing strings matching "[a-z][a-z0-9_]*"
180 // as required by OpDef.ArgDef.name. The resulting string is either empty or
181 // matches this regex.
182 string ArgDefCase(StringPiece s);
183 
184 }  // namespace str_util
185 }  // namespace tensorflow
186 
187 #endif  // TENSORFLOW_CORE_PLATFORM_STR_UTIL_H_
188