1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/files/file_path.h"
6
7 #include <string.h>
8
9 #include <algorithm>
10 #include <iterator>
11 #include <string_view>
12
13 #include "base/logging.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "util/build_config.h"
17
18 #if defined(OS_MACOSX)
19 #include "base/mac/scoped_cftyperef.h"
20 #include "base/third_party/icu/icu_utf.h"
21 #endif
22
23 #if defined(OS_WIN)
24 #include <windows.h>
25 #elif defined(OS_MACOSX)
26 #include <CoreFoundation/CoreFoundation.h>
27 #endif
28
29 namespace base {
30
31 using StringType = FilePath::StringType;
32 using StringViewType = FilePath::StringViewType;
33
34 namespace {
35
36 const char* const kCommonDoubleExtensionSuffixes[] = {"gz", "z", "bz2", "bz"};
37 const char* const kCommonDoubleExtensions[] = {"user.js"};
38
39 const FilePath::CharType kStringTerminator = FILE_PATH_LITERAL('\0');
40
41 // If this FilePath contains a drive letter specification, returns the
42 // position of the last character of the drive letter specification,
43 // otherwise returns npos. This can only be true on Windows, when a pathname
44 // begins with a letter followed by a colon. On other platforms, this always
45 // returns npos.
FindDriveLetter(StringViewType path)46 StringViewType::size_type FindDriveLetter(StringViewType path) {
47 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
48 // This is dependent on an ASCII-based character set, but that's a
49 // reasonable assumption. iswalpha can be too inclusive here.
50 if (path.length() >= 2 && path[1] == L':' &&
51 ((path[0] >= L'A' && path[0] <= L'Z') ||
52 (path[0] >= L'a' && path[0] <= L'z'))) {
53 return 1;
54 }
55 #endif // FILE_PATH_USES_DRIVE_LETTERS
56 return StringType::npos;
57 }
58
59 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
EqualDriveLetterCaseInsensitive(StringViewType a,StringViewType b)60 bool EqualDriveLetterCaseInsensitive(StringViewType a, StringViewType b) {
61 size_t a_letter_pos = FindDriveLetter(a);
62 size_t b_letter_pos = FindDriveLetter(b);
63
64 if (a_letter_pos == StringType::npos || b_letter_pos == StringType::npos)
65 return a == b;
66
67 StringViewType a_letter(a.substr(0, a_letter_pos + 1));
68 StringViewType b_letter(b.substr(0, b_letter_pos + 1));
69 if (!StartsWith(a_letter, b_letter, CompareCase::INSENSITIVE_ASCII))
70 return false;
71
72 StringViewType a_rest(a.substr(a_letter_pos + 1));
73 StringViewType b_rest(b.substr(b_letter_pos + 1));
74 return a_rest == b_rest;
75 }
76 #endif // defined(FILE_PATH_USES_DRIVE_LETTERS)
77
IsPathAbsolute(StringViewType path)78 bool IsPathAbsolute(StringViewType path) {
79 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
80 StringType::size_type letter = FindDriveLetter(path);
81 if (letter != StringType::npos) {
82 // Look for a separator right after the drive specification.
83 return path.length() > letter + 1 &&
84 FilePath::IsSeparator(path[letter + 1]);
85 }
86 // Look for a pair of leading separators.
87 return path.length() > 1 && FilePath::IsSeparator(path[0]) &&
88 FilePath::IsSeparator(path[1]);
89 #else // FILE_PATH_USES_DRIVE_LETTERS
90 // Look for a separator in the first position.
91 return path.length() > 0 && FilePath::IsSeparator(path[0]);
92 #endif // FILE_PATH_USES_DRIVE_LETTERS
93 }
94
AreAllSeparators(const StringType & input)95 bool AreAllSeparators(const StringType& input) {
96 for (StringType::const_iterator it = input.begin(); it != input.end(); ++it) {
97 if (!FilePath::IsSeparator(*it))
98 return false;
99 }
100
101 return true;
102 }
103
104 // Find the position of the '.' that separates the extension from the rest
105 // of the file name. The position is relative to BaseName(), not value().
106 // Returns npos if it can't find an extension.
FinalExtensionSeparatorPosition(const StringType & path)107 StringType::size_type FinalExtensionSeparatorPosition(const StringType& path) {
108 // Special case "." and ".."
109 if (path == FilePath::kCurrentDirectory || path == FilePath::kParentDirectory)
110 return StringType::npos;
111
112 return path.rfind(FilePath::kExtensionSeparator);
113 }
114
115 // Same as above, but allow a second extension component of up to 4
116 // characters when the rightmost extension component is a common double
117 // extension (gz, bz2, Z). For example, foo.tar.gz or foo.tar.Z would have
118 // extension components of '.tar.gz' and '.tar.Z' respectively.
ExtensionSeparatorPosition(const StringType & path)119 StringType::size_type ExtensionSeparatorPosition(const StringType& path) {
120 const StringType::size_type last_dot = FinalExtensionSeparatorPosition(path);
121
122 // No extension, or the extension is the whole filename.
123 if (last_dot == StringType::npos || last_dot == 0U)
124 return last_dot;
125
126 const StringType::size_type penultimate_dot =
127 path.rfind(FilePath::kExtensionSeparator, last_dot - 1);
128 const StringType::size_type last_separator = path.find_last_of(
129 FilePath::kSeparators, last_dot - 1, FilePath::kSeparatorsLength - 1);
130
131 if (penultimate_dot == StringType::npos ||
132 (last_separator != StringType::npos &&
133 penultimate_dot < last_separator)) {
134 return last_dot;
135 }
136
137 for (size_t i = 0; i < std::size(kCommonDoubleExtensions); ++i) {
138 StringType extension(path, penultimate_dot + 1);
139 if (LowerCaseEqualsASCII(extension, kCommonDoubleExtensions[i]))
140 return penultimate_dot;
141 }
142
143 StringType extension(path, last_dot + 1);
144 for (size_t i = 0; i < std::size(kCommonDoubleExtensionSuffixes); ++i) {
145 if (LowerCaseEqualsASCII(extension, kCommonDoubleExtensionSuffixes[i])) {
146 if ((last_dot - penultimate_dot) <= 5U &&
147 (last_dot - penultimate_dot) > 1U) {
148 return penultimate_dot;
149 }
150 }
151 }
152
153 return last_dot;
154 }
155
156 // Returns true if path is "", ".", or "..".
IsEmptyOrSpecialCase(const StringType & path)157 bool IsEmptyOrSpecialCase(const StringType& path) {
158 // Special cases "", ".", and ".."
159 if (path.empty() || path == FilePath::kCurrentDirectory ||
160 path == FilePath::kParentDirectory) {
161 return true;
162 }
163
164 return false;
165 }
166
167 } // namespace
168
169 FilePath::FilePath() = default;
170
171 FilePath::FilePath(const FilePath& that) = default;
172 FilePath::FilePath(FilePath&& that) noexcept = default;
173
FilePath(StringViewType path)174 FilePath::FilePath(StringViewType path) {
175 path_.assign(path);
176 StringType::size_type nul_pos = path_.find(kStringTerminator);
177 if (nul_pos != StringType::npos)
178 path_.erase(nul_pos, StringType::npos);
179 }
180
181 FilePath::~FilePath() = default;
182
183 FilePath& FilePath::operator=(const FilePath& that) = default;
184
185 FilePath& FilePath::operator=(FilePath&& that) = default;
186
operator ==(const FilePath & that) const187 bool FilePath::operator==(const FilePath& that) const {
188 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
189 return EqualDriveLetterCaseInsensitive(this->path_, that.path_);
190 #else // defined(FILE_PATH_USES_DRIVE_LETTERS)
191 return path_ == that.path_;
192 #endif // defined(FILE_PATH_USES_DRIVE_LETTERS)
193 }
194
operator !=(const FilePath & that) const195 bool FilePath::operator!=(const FilePath& that) const {
196 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
197 return !EqualDriveLetterCaseInsensitive(this->path_, that.path_);
198 #else // defined(FILE_PATH_USES_DRIVE_LETTERS)
199 return path_ != that.path_;
200 #endif // defined(FILE_PATH_USES_DRIVE_LETTERS)
201 }
202
203 // static
IsSeparator(CharType character)204 bool FilePath::IsSeparator(CharType character) {
205 for (size_t i = 0; i < kSeparatorsLength - 1; ++i) {
206 if (character == kSeparators[i]) {
207 return true;
208 }
209 }
210
211 return false;
212 }
213
GetComponents(std::vector<StringType> * components) const214 void FilePath::GetComponents(std::vector<StringType>* components) const {
215 DCHECK(components);
216 if (!components)
217 return;
218 components->clear();
219 if (value().empty())
220 return;
221
222 std::vector<StringType> ret_val;
223 FilePath current = *this;
224 FilePath base;
225
226 // Capture path components.
227 while (current != current.DirName()) {
228 base = current.BaseName();
229 if (!AreAllSeparators(base.value()))
230 ret_val.push_back(base.value());
231 current = current.DirName();
232 }
233
234 // Capture root, if any.
235 base = current.BaseName();
236 if (!base.value().empty() && base.value() != kCurrentDirectory)
237 ret_val.push_back(current.BaseName().value());
238
239 // Capture drive letter, if any.
240 FilePath dir = current.DirName();
241 StringType::size_type letter = FindDriveLetter(dir.value());
242 if (letter != StringType::npos) {
243 ret_val.push_back(StringType(dir.value(), 0, letter + 1));
244 }
245
246 *components = std::vector<StringType>(ret_val.rbegin(), ret_val.rend());
247 }
248
IsParent(const FilePath & child) const249 bool FilePath::IsParent(const FilePath& child) const {
250 return AppendRelativePath(child, nullptr);
251 }
252
AppendRelativePath(const FilePath & child,FilePath * path) const253 bool FilePath::AppendRelativePath(const FilePath& child, FilePath* path) const {
254 std::vector<StringType> parent_components;
255 std::vector<StringType> child_components;
256 GetComponents(&parent_components);
257 child.GetComponents(&child_components);
258
259 if (parent_components.empty() ||
260 parent_components.size() >= child_components.size())
261 return false;
262
263 std::vector<StringType>::const_iterator parent_comp =
264 parent_components.begin();
265 std::vector<StringType>::const_iterator child_comp = child_components.begin();
266
267 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
268 // Windows can access case sensitive filesystems, so component
269 // comparisons must be case sensitive, but drive letters are
270 // never case sensitive.
271 if ((FindDriveLetter(*parent_comp) != StringType::npos) &&
272 (FindDriveLetter(*child_comp) != StringType::npos)) {
273 if (!StartsWith(*parent_comp, *child_comp, CompareCase::INSENSITIVE_ASCII))
274 return false;
275 ++parent_comp;
276 ++child_comp;
277 }
278 #endif // defined(FILE_PATH_USES_DRIVE_LETTERS)
279
280 while (parent_comp != parent_components.end()) {
281 if (*parent_comp != *child_comp)
282 return false;
283 ++parent_comp;
284 ++child_comp;
285 }
286
287 if (path != nullptr) {
288 for (; child_comp != child_components.end(); ++child_comp) {
289 *path = path->Append(*child_comp);
290 }
291 }
292 return true;
293 }
294
295 // libgen's dirname and basename aren't guaranteed to be thread-safe and aren't
296 // guaranteed to not modify their input strings, and in fact are implemented
297 // differently in this regard on different platforms. Don't use them, but
298 // adhere to their behavior.
DirName() const299 FilePath FilePath::DirName() const {
300 FilePath new_path(path_);
301 new_path.StripTrailingSeparatorsInternal();
302
303 // The drive letter, if any, always needs to remain in the output. If there
304 // is no drive letter, as will always be the case on platforms which do not
305 // support drive letters, letter will be npos, or -1, so the comparisons and
306 // resizes below using letter will still be valid.
307 StringType::size_type letter = FindDriveLetter(new_path.path_);
308
309 StringType::size_type last_separator = new_path.path_.find_last_of(
310 kSeparators, StringType::npos, kSeparatorsLength - 1);
311 if (last_separator == StringType::npos) {
312 // path_ is in the current directory.
313 new_path.path_.resize(letter + 1);
314 } else if (last_separator == letter + 1) {
315 // path_ is in the root directory.
316 new_path.path_.resize(letter + 2);
317 } else if (last_separator == letter + 2 &&
318 IsSeparator(new_path.path_[letter + 1])) {
319 // path_ is in "//" (possibly with a drive letter); leave the double
320 // separator intact indicating alternate root.
321 new_path.path_.resize(letter + 3);
322 } else if (last_separator != 0) {
323 // path_ is somewhere else, trim the basename.
324 new_path.path_.resize(last_separator);
325 }
326
327 new_path.StripTrailingSeparatorsInternal();
328 if (!new_path.path_.length())
329 new_path.path_ = kCurrentDirectory;
330
331 return new_path;
332 }
333
BaseName() const334 FilePath FilePath::BaseName() const {
335 FilePath new_path(path_);
336 new_path.StripTrailingSeparatorsInternal();
337
338 // The drive letter, if any, is always stripped.
339 StringType::size_type letter = FindDriveLetter(new_path.path_);
340 if (letter != StringType::npos) {
341 new_path.path_.erase(0, letter + 1);
342 }
343
344 // Keep everything after the final separator, but if the pathname is only
345 // one character and it's a separator, leave it alone.
346 StringType::size_type last_separator = new_path.path_.find_last_of(
347 kSeparators, StringType::npos, kSeparatorsLength - 1);
348 if (last_separator != StringType::npos &&
349 last_separator < new_path.path_.length() - 1) {
350 new_path.path_.erase(0, last_separator + 1);
351 }
352
353 return new_path;
354 }
355
Extension() const356 StringType FilePath::Extension() const {
357 FilePath base(BaseName());
358 const StringType::size_type dot = ExtensionSeparatorPosition(base.path_);
359 if (dot == StringType::npos)
360 return StringType();
361
362 return base.path_.substr(dot, StringType::npos);
363 }
364
FinalExtension() const365 StringType FilePath::FinalExtension() const {
366 FilePath base(BaseName());
367 const StringType::size_type dot = FinalExtensionSeparatorPosition(base.path_);
368 if (dot == StringType::npos)
369 return StringType();
370
371 return base.path_.substr(dot, StringType::npos);
372 }
373
RemoveExtension() const374 FilePath FilePath::RemoveExtension() const {
375 if (Extension().empty())
376 return *this;
377
378 const StringType::size_type dot = ExtensionSeparatorPosition(path_);
379 if (dot == StringType::npos)
380 return *this;
381
382 return FilePath(path_.substr(0, dot));
383 }
384
RemoveFinalExtension() const385 FilePath FilePath::RemoveFinalExtension() const {
386 if (FinalExtension().empty())
387 return *this;
388
389 const StringType::size_type dot = FinalExtensionSeparatorPosition(path_);
390 if (dot == StringType::npos)
391 return *this;
392
393 return FilePath(path_.substr(0, dot));
394 }
395
InsertBeforeExtension(StringViewType suffix) const396 FilePath FilePath::InsertBeforeExtension(StringViewType suffix) const {
397 if (suffix.empty())
398 return FilePath(path_);
399
400 if (IsEmptyOrSpecialCase(BaseName().value()))
401 return FilePath();
402
403 StringType ext = Extension();
404 StringType ret = RemoveExtension().value();
405 ret.append(suffix);
406 ret.append(ext);
407 return FilePath(ret);
408 }
409
InsertBeforeExtensionASCII(std::string_view suffix) const410 FilePath FilePath::InsertBeforeExtensionASCII(std::string_view suffix) const {
411 DCHECK(IsStringASCII(suffix));
412 #if defined(OS_WIN)
413 return InsertBeforeExtension(ASCIIToUTF16(suffix));
414 #elif defined(OS_POSIX) || defined(OS_FUCHSIA)
415 return InsertBeforeExtension(suffix);
416 #endif
417 }
418
AddExtension(StringViewType extension) const419 FilePath FilePath::AddExtension(StringViewType extension) const {
420 if (IsEmptyOrSpecialCase(BaseName().value()))
421 return FilePath();
422
423 // If the new extension is "" or ".", then just return the current FilePath.
424 if (extension.empty() ||
425 (extension.size() == 1 && extension[0] == kExtensionSeparator))
426 return *this;
427
428 StringType str = path_;
429 if (extension[0] != kExtensionSeparator &&
430 *(str.end() - 1) != kExtensionSeparator) {
431 str.append(1, kExtensionSeparator);
432 }
433 str.append(extension);
434 return FilePath(str);
435 }
436
ReplaceExtension(StringViewType extension) const437 FilePath FilePath::ReplaceExtension(StringViewType extension) const {
438 if (IsEmptyOrSpecialCase(BaseName().value()))
439 return FilePath();
440
441 FilePath no_ext = RemoveExtension();
442 // If the new extension is "" or ".", then just remove the current extension.
443 if (extension.empty() ||
444 (extension.size() == 1 && extension[0] == kExtensionSeparator))
445 return no_ext;
446
447 StringType str = no_ext.value();
448 if (extension[0] != kExtensionSeparator)
449 str.append(1, kExtensionSeparator);
450 str.append(extension);
451 return FilePath(str);
452 }
453
Append(StringViewType component) const454 FilePath FilePath::Append(StringViewType component) const {
455 StringViewType appended = component;
456 StringType without_nuls;
457
458 StringType::size_type nul_pos = component.find(kStringTerminator);
459 if (nul_pos != StringViewType::npos) {
460 without_nuls.assign(component.substr(0, nul_pos));
461 appended = StringViewType(without_nuls);
462 }
463
464 DCHECK(!IsPathAbsolute(appended));
465
466 if (path_.compare(kCurrentDirectory) == 0 && !appended.empty()) {
467 // Append normally doesn't do any normalization, but as a special case,
468 // when appending to kCurrentDirectory, just return a new path for the
469 // component argument. Appending component to kCurrentDirectory would
470 // serve no purpose other than needlessly lengthening the path, and
471 // it's likely in practice to wind up with FilePath objects containing
472 // only kCurrentDirectory when calling DirName on a single relative path
473 // component.
474 return FilePath(appended);
475 }
476
477 FilePath new_path(path_);
478 new_path.StripTrailingSeparatorsInternal();
479
480 // Don't append a separator if the path is empty (indicating the current
481 // directory) or if the path component is empty (indicating nothing to
482 // append).
483 if (!appended.empty() && !new_path.path_.empty()) {
484 // Don't append a separator if the path still ends with a trailing
485 // separator after stripping (indicating the root directory).
486 if (!IsSeparator(new_path.path_.back())) {
487 // Don't append a separator if the path is just a drive letter.
488 if (FindDriveLetter(new_path.path_) + 1 != new_path.path_.length()) {
489 new_path.path_.append(1, kSeparators[0]);
490 }
491 }
492 }
493
494 new_path.path_.append(appended);
495 return new_path;
496 }
497
Append(const FilePath & component) const498 FilePath FilePath::Append(const FilePath& component) const {
499 return Append(component.value());
500 }
501
AppendASCII(std::string_view component) const502 FilePath FilePath::AppendASCII(std::string_view component) const {
503 DCHECK(base::IsStringASCII(component));
504 #if defined(OS_WIN)
505 return Append(ASCIIToUTF16(component));
506 #elif defined(OS_POSIX) || defined(OS_FUCHSIA)
507 return Append(component);
508 #endif
509 }
510
IsAbsolute() const511 bool FilePath::IsAbsolute() const {
512 return IsPathAbsolute(path_);
513 }
514
EndsWithSeparator() const515 bool FilePath::EndsWithSeparator() const {
516 if (empty())
517 return false;
518 return IsSeparator(path_.back());
519 }
520
AsEndingWithSeparator() const521 FilePath FilePath::AsEndingWithSeparator() const {
522 if (EndsWithSeparator() || path_.empty())
523 return *this;
524
525 StringType path_str;
526 path_str.reserve(path_.length() + 1); // Only allocate string once.
527
528 path_str = path_;
529 path_str.append(&kSeparators[0], 1);
530 return FilePath(path_str);
531 }
532
StripTrailingSeparators() const533 FilePath FilePath::StripTrailingSeparators() const {
534 FilePath new_path(path_);
535 new_path.StripTrailingSeparatorsInternal();
536
537 return new_path;
538 }
539
ReferencesParent() const540 bool FilePath::ReferencesParent() const {
541 if (path_.find(kParentDirectory) == StringType::npos) {
542 // GetComponents is quite expensive, so avoid calling it in the majority
543 // of cases where there isn't a kParentDirectory anywhere in the path.
544 return false;
545 }
546
547 std::vector<StringType> components;
548 GetComponents(&components);
549
550 std::vector<StringType>::const_iterator it = components.begin();
551 for (; it != components.end(); ++it) {
552 const StringType& component = *it;
553 // Windows has odd, undocumented behavior with path components containing
554 // only whitespace and . characters. So, if all we see is . and
555 // whitespace, then we treat any .. sequence as referencing parent.
556 // For simplicity we enforce this on all platforms.
557 if (component.find_first_not_of(FILE_PATH_LITERAL(". \n\r\t")) ==
558 std::string::npos &&
559 component.find(kParentDirectory) != std::string::npos) {
560 return true;
561 }
562 }
563 return false;
564 }
565
566 #if defined(OS_WIN)
567
LossyDisplayName() const568 std::u16string FilePath::LossyDisplayName() const {
569 return path_;
570 }
571
MaybeAsASCII() const572 std::string FilePath::MaybeAsASCII() const {
573 if (base::IsStringASCII(path_))
574 return UTF16ToASCII(path_);
575 return std::string();
576 }
577
As8Bit() const578 std::string FilePath::As8Bit() const {
579 return UTF16ToUTF8(value());
580 }
581
582 #elif defined(OS_POSIX) || defined(OS_FUCHSIA)
583
584 // See file_path.h for a discussion of the encoding of paths on POSIX
585 // platforms. These encoding conversion functions are not quite correct.
586
MaybeAsASCII() const587 std::string FilePath::MaybeAsASCII() const {
588 if (base::IsStringASCII(path_))
589 return path_;
590 return std::string();
591 }
592
As8Bit() const593 std::string FilePath::As8Bit() const {
594 return value();
595 }
596
597 #endif // defined(OS_WIN)
598
StripTrailingSeparatorsInternal()599 void FilePath::StripTrailingSeparatorsInternal() {
600 // If there is no drive letter, start will be 1, which will prevent stripping
601 // the leading separator if there is only one separator. If there is a drive
602 // letter, start will be set appropriately to prevent stripping the first
603 // separator following the drive letter, if a separator immediately follows
604 // the drive letter.
605 StringType::size_type start = FindDriveLetter(path_) + 2;
606
607 StringType::size_type last_stripped = StringType::npos;
608 for (StringType::size_type pos = path_.length();
609 pos > start && IsSeparator(path_[pos - 1]); --pos) {
610 // If the string only has two separators and they're at the beginning,
611 // don't strip them, unless the string began with more than two separators.
612 if (pos != start + 1 || last_stripped == start + 2 ||
613 !IsSeparator(path_[start - 1])) {
614 path_.resize(pos - 1);
615 last_stripped = pos;
616 }
617 }
618 }
619
NormalizePathSeparators() const620 FilePath FilePath::NormalizePathSeparators() const {
621 return NormalizePathSeparatorsTo(kSeparators[0]);
622 }
623
NormalizePathSeparatorsTo(CharType separator) const624 FilePath FilePath::NormalizePathSeparatorsTo(CharType separator) const {
625 #if defined(FILE_PATH_USES_WIN_SEPARATORS)
626 DCHECK_NE(kSeparators + kSeparatorsLength,
627 std::find(kSeparators, kSeparators + kSeparatorsLength, separator));
628 StringType copy = path_;
629 for (size_t i = 0; i < kSeparatorsLength; ++i) {
630 std::replace(copy.begin(), copy.end(), kSeparators[i], separator);
631 }
632 return FilePath(copy);
633 #else
634 return *this;
635 #endif
636 }
637
638 } // namespace base
639