1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/platform/path.h"
17
18 #include <errno.h>
19 #include <fcntl.h>
20 #include <stdlib.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23 #if defined(PLATFORM_WINDOWS)
24 #include <windows.h>
25 #else
26 #include <unistd.h>
27 #endif
28
29 #include <vector>
30
31 #include "absl/algorithm/container.h"
32 #include "tensorflow/core/platform/logging.h"
33 #include "tensorflow/core/platform/mutex.h"
34 #include "tensorflow/core/platform/scanner.h"
35 #include "tensorflow/core/platform/strcat.h"
36
37 namespace tensorflow {
38 namespace io {
39 namespace internal {
40 namespace {
41
42 const char kPathSep[] = "/";
43
FixBazelEnvPath(const char * path,string * out)44 bool FixBazelEnvPath(const char* path, string* out) {
45 if (path == nullptr) return false;
46 if (out == nullptr) return true;
47
48 *out = path;
49
50 #ifdef PLATFORM_WINDOWS
51 // On Windows, paths generated by Bazel are always use `/` as the path
52 // separator. This prevents normal path management. In the event there are no
53 // `\` in the path, we convert all `/` to `\`.
54 if (out->find('\\') != string::npos) return path;
55
56 for (size_t pos = out->find('/'); pos != string::npos;
57 pos = out->find('/', pos + 1)) {
58 (*out)[pos] = kPathSep[0];
59 }
60 #endif
61
62 return true;
63 }
64
65 } // namespace
66
JoinPathImpl(std::initializer_list<StringPiece> paths)67 string JoinPathImpl(std::initializer_list<StringPiece> paths) {
68 string result;
69
70 for (StringPiece path : paths) {
71 if (path.empty()) continue;
72
73 if (result.empty()) {
74 result = string(path);
75 continue;
76 }
77
78 if (IsAbsolutePath(path)) path = path.substr(1);
79
80 if (result[result.size() - 1] == kPathSep[0]) {
81 strings::StrAppend(&result, path);
82 } else {
83 strings::StrAppend(&result, kPathSep, path);
84 }
85 }
86
87 return result;
88 }
89
90 // Return the parts of the URI, split on the final "/" in the path. If there is
91 // no "/" in the path, the first part of the output is the scheme and host, and
92 // the second is the path. If the only "/" in the path is the first character,
93 // it is included in the first part of the output.
SplitPath(StringPiece uri)94 std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) {
95 StringPiece scheme, host, path;
96 ParseURI(uri, &scheme, &host, &path);
97
98 auto pos = path.rfind('/');
99 #ifdef PLATFORM_WINDOWS
100 if (pos == StringPiece::npos) pos = path.rfind('\\');
101 #endif
102 // Handle the case with no '/' in 'path'.
103 if (pos == StringPiece::npos)
104 return std::make_pair(StringPiece(uri.begin(), host.end() - uri.begin()),
105 path);
106
107 // Handle the case with a single leading '/' in 'path'.
108 if (pos == 0)
109 return std::make_pair(
110 StringPiece(uri.begin(), path.begin() + 1 - uri.begin()),
111 StringPiece(path.data() + 1, path.size() - 1));
112
113 return std::make_pair(
114 StringPiece(uri.begin(), path.begin() + pos - uri.begin()),
115 StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
116 }
117
118 // Return the parts of the basename of path, split on the final ".".
119 // If there is no "." in the basename or "." is the final character in the
120 // basename, the second value will be empty.
SplitBasename(StringPiece path)121 std::pair<StringPiece, StringPiece> SplitBasename(StringPiece path) {
122 path = Basename(path);
123
124 auto pos = path.rfind('.');
125 if (pos == StringPiece::npos)
126 return std::make_pair(path, StringPiece(path.data() + path.size(), 0));
127 return std::make_pair(
128 StringPiece(path.data(), pos),
129 StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
130 }
131
132 } // namespace internal
133
IsAbsolutePath(StringPiece path)134 bool IsAbsolutePath(StringPiece path) {
135 return !path.empty() && path[0] == '/';
136 }
137
Dirname(StringPiece path)138 StringPiece Dirname(StringPiece path) {
139 return internal::SplitPath(path).first;
140 }
141
Basename(StringPiece path)142 StringPiece Basename(StringPiece path) {
143 return internal::SplitPath(path).second;
144 }
145
Extension(StringPiece path)146 StringPiece Extension(StringPiece path) {
147 return internal::SplitBasename(path).second;
148 }
149
CleanPath(StringPiece unclean_path)150 string CleanPath(StringPiece unclean_path) {
151 string path(unclean_path);
152 const char* src = path.c_str();
153 string::iterator dst = path.begin();
154
155 // Check for absolute path and determine initial backtrack limit.
156 const bool is_absolute_path = *src == '/';
157 if (is_absolute_path) {
158 *dst++ = *src++;
159 while (*src == '/') ++src;
160 }
161 string::const_iterator backtrack_limit = dst;
162
163 // Process all parts
164 while (*src) {
165 bool parsed = false;
166
167 if (src[0] == '.') {
168 // 1dot ".<whateverisnext>", check for END or SEP.
169 if (src[1] == '/' || !src[1]) {
170 if (*++src) {
171 ++src;
172 }
173 parsed = true;
174 } else if (src[1] == '.' && (src[2] == '/' || !src[2])) {
175 // 2dot END or SEP (".." | "../<whateverisnext>").
176 src += 2;
177 if (dst != backtrack_limit) {
178 // We can backtrack the previous part
179 for (--dst; dst != backtrack_limit && dst[-1] != '/'; --dst) {
180 // Empty.
181 }
182 } else if (!is_absolute_path) {
183 // Failed to backtrack and we can't skip it either. Rewind and copy.
184 src -= 2;
185 *dst++ = *src++;
186 *dst++ = *src++;
187 if (*src) {
188 *dst++ = *src;
189 }
190 // We can never backtrack over a copied "../" part so set new limit.
191 backtrack_limit = dst;
192 }
193 if (*src) {
194 ++src;
195 }
196 parsed = true;
197 }
198 }
199
200 // If not parsed, copy entire part until the next SEP or EOS.
201 if (!parsed) {
202 while (*src && *src != '/') {
203 *dst++ = *src++;
204 }
205 if (*src) {
206 *dst++ = *src++;
207 }
208 }
209
210 // Skip consecutive SEP occurrences
211 while (*src == '/') {
212 ++src;
213 }
214 }
215
216 // Calculate and check the length of the cleaned path.
217 string::difference_type path_length = dst - path.begin();
218 if (path_length != 0) {
219 // Remove trailing '/' except if it is root path ("/" ==> path_length := 1)
220 if (path_length > 1 && path[path_length - 1] == '/') {
221 --path_length;
222 }
223 path.resize(path_length);
224 } else {
225 // The cleaned path is empty; assign "." as per the spec.
226 path.assign(1, '.');
227 }
228 return path;
229 }
230
ParseURI(StringPiece remaining,StringPiece * scheme,StringPiece * host,StringPiece * path)231 void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host,
232 StringPiece* path) {
233 // 0. Parse scheme
234 // Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]*
235 // TODO(keveman): Allow "+" and "-" in the scheme.
236 // Keep URI pattern in tensorboard/backend/server.py updated accordingly
237 if (!strings::Scanner(remaining)
238 .One(strings::Scanner::LETTER)
239 .Many(strings::Scanner::LETTER_DIGIT_DOT)
240 .StopCapture()
241 .OneLiteral("://")
242 .GetResult(&remaining, scheme)) {
243 // If there's no scheme, assume the entire string is a path.
244 *scheme = StringPiece(remaining.begin(), 0);
245 *host = StringPiece(remaining.begin(), 0);
246 *path = remaining;
247 return;
248 }
249
250 // 1. Parse host
251 if (!strings::Scanner(remaining).ScanUntil('/').GetResult(&remaining, host)) {
252 // No path, so the rest of the URI is the host.
253 *host = remaining;
254 *path = StringPiece(remaining.end(), 0);
255 return;
256 }
257
258 // 2. The rest is the path
259 *path = remaining;
260 }
261
CreateURI(StringPiece scheme,StringPiece host,StringPiece path)262 string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) {
263 if (scheme.empty()) {
264 return string(path);
265 }
266 return strings::StrCat(scheme, "://", host, path);
267 }
268
269 // Returns a unique number every time it is called.
UniqueId()270 int64 UniqueId() {
271 static mutex mu(LINKER_INITIALIZED);
272 static int64 id = 0;
273 mutex_lock l(mu);
274 return ++id;
275 }
276
CommonPathPrefix(absl::Span<const string> paths)277 string CommonPathPrefix(absl::Span<const string> paths) {
278 if (paths.empty()) return "";
279 size_t min_filename_size =
280 absl::c_min_element(paths, [](const string& a, const string& b) {
281 return a.size() < b.size();
282 })->size();
283 if (min_filename_size == 0) return "";
284
285 size_t common_prefix_size = [&] {
286 for (size_t prefix_size = 0; prefix_size < min_filename_size;
287 prefix_size++) {
288 char c = paths[0][prefix_size];
289 for (int f = 1; f < paths.size(); f++) {
290 if (paths[f][prefix_size] != c) {
291 return prefix_size;
292 }
293 }
294 }
295 return min_filename_size;
296 }();
297
298 size_t rpos = absl::string_view(paths[0])
299 .substr(0, common_prefix_size)
300 .rfind(internal::kPathSep);
301 return rpos == std::string::npos
302 ? ""
303 : std::string(absl::string_view(paths[0]).substr(0, rpos + 1));
304 }
305
GetTempFilename(const string & extension)306 string GetTempFilename(const string& extension) {
307 #if defined(__ANDROID__)
308 LOG(FATAL) << "GetTempFilename is not implemented in this platform.";
309 #elif defined(PLATFORM_WINDOWS)
310 char temp_dir[_MAX_PATH];
311 DWORD retval;
312 retval = GetTempPath(_MAX_PATH, temp_dir);
313 if (retval > _MAX_PATH || retval == 0) {
314 LOG(FATAL) << "Cannot get the directory for temporary files.";
315 }
316
317 char temp_file_name[_MAX_PATH];
318 retval = GetTempFileName(temp_dir, "", UniqueId(), temp_file_name);
319 if (retval > _MAX_PATH || retval == 0) {
320 LOG(FATAL) << "Cannot get a temporary file in: " << temp_dir;
321 }
322
323 string full_tmp_file_name(temp_file_name);
324 full_tmp_file_name.append(extension);
325 return full_tmp_file_name;
326 #else
327 for (const char* dir : std::vector<const char*>(
328 {getenv("TEST_TMPDIR"), getenv("TMPDIR"), getenv("TMP"), "/tmp"})) {
329 if (!dir || !dir[0]) {
330 continue;
331 }
332 struct stat statbuf;
333 if (!stat(dir, &statbuf) && S_ISDIR(statbuf.st_mode)) {
334 // UniqueId is added here because mkstemps is not as thread safe as it
335 // looks. https://github.com/tensorflow/tensorflow/issues/5804 shows
336 // the problem.
337 string tmp_filepath;
338 int fd;
339 if (extension.length()) {
340 tmp_filepath = io::JoinPath(
341 dir, strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX.",
342 extension));
343 fd = mkstemps(&tmp_filepath[0], extension.length() + 1);
344 } else {
345 tmp_filepath = io::JoinPath(
346 dir,
347 strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX"));
348 fd = mkstemp(&tmp_filepath[0]);
349 }
350 if (fd < 0) {
351 LOG(FATAL) << "Failed to create temp file.";
352 } else {
353 if (close(fd) < 0) {
354 LOG(ERROR) << "close() failed: " << strerror(errno);
355 }
356 return tmp_filepath;
357 }
358 }
359 }
360 LOG(FATAL) << "No temp directory found.";
361 std::abort();
362 #endif
363 }
364
GetTestUndeclaredOutputsDir(string * dir)365 bool GetTestUndeclaredOutputsDir(string* dir) {
366 return internal::FixBazelEnvPath(getenv("TEST_UNDECLARED_OUTPUTS_DIR"), dir);
367 }
368
369 } // namespace io
370 } // namespace tensorflow
371