1 /*
2 * Copyright (c) 2022-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "b_filesystem/b_dir.h"
17
18 #include <algorithm>
19 #include <dirent.h>
20 #include <fnmatch.h>
21 #include <functional>
22 #include <filesystem>
23 #include <glob.h>
24 #include <memory>
25 #include <set>
26 #include <string>
27 #include <tuple>
28 #include <vector>
29
30 #include "b_anony/b_anony.h"
31 #include "b_error/b_error.h"
32 #include "b_filesystem/b_file_hash.h"
33 #include "b_resources/b_constants.h"
34 #include "directory_ex.h"
35 #include "errors.h"
36 #include "filemgmt_libhilog.h"
37
38 namespace OHOS::FileManagement::Backup {
39 using namespace std;
40 const int32_t PATH_MAX_LEN = 4096;
41 const size_t TOP_ELE = 0;
42 const std::string APP_DATA_DIR = BConstants::PATH_PUBLIC_HOME +
43 BConstants::PATH_APP_DATA + BConstants::FILE_SEPARATOR_CHAR;
44
IsEmptyDirectory(const string & path)45 static bool IsEmptyDirectory(const string &path)
46 {
47 DIR *dir = opendir(path.c_str());
48 if (dir == nullptr) {
49 HILOGE("Opendir failed, errno:%{public}d", errno);
50 return false;
51 }
52 bool isEmpty = true;
53 struct dirent *entry = nullptr;
54 while ((entry = readdir(dir)) != nullptr) {
55 if (entry->d_type != DT_DIR || (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0)) {
56 isEmpty = false;
57 break;
58 }
59 }
60 closedir(dir);
61 return isEmpty;
62 }
63
GetFile(const string & path,off_t size=-1)64 static tuple<ErrCode, map<string, struct stat>, map<string, size_t>> GetFile(const string &path, off_t size = -1)
65 {
66 map<string, struct stat> files;
67 map<string, size_t> smallFiles;
68 struct stat sta = {};
69 if (stat(path.data(), &sta) == -1) {
70 HILOGE("File not exist, errno:%{public}d, fileName:%{private}s.", errno, path.c_str());
71 return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
72 }
73 if (path == "/") {
74 return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
75 }
76 if (sta.st_size <= size) {
77 smallFiles.emplace(make_pair(path, sta.st_size));
78 } else {
79 files.try_emplace(path, sta);
80 }
81 return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
82 }
83
CheckOverLongPath(const string & path)84 static uint32_t CheckOverLongPath(const string &path)
85 {
86 uint32_t len = path.length();
87 if (len >= PATH_MAX_LEN) {
88 size_t found = path.find_last_of(BConstants::FILE_SEPARATOR_CHAR);
89 string sub = path.substr(found + 1);
90 HILOGE("Path over long, length:%{public}d, fileName:%{private}s.", len, sub.c_str());
91 }
92 return len;
93 }
94
GetDirFilesDetail(const string & path,bool recursion,off_t size=-1)95 static tuple<ErrCode, map<string, struct stat>, map<string, size_t>> GetDirFilesDetail(const string &path,
96 bool recursion,
97 off_t size = -1)
98 {
99 map<string, struct stat> files;
100 map<string, size_t> smallFiles;
101
102 if (IsEmptyDirectory(path)) {
103 string newPath = path;
104 if (path.at(path.size()-1) != BConstants::FILE_SEPARATOR_CHAR) {
105 newPath += BConstants::FILE_SEPARATOR_CHAR;
106 }
107 smallFiles.emplace(make_pair(newPath, 0));
108 return {ERR_OK, files, smallFiles};
109 }
110
111 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
112 if (!dir) {
113 HILOGE("Invalid directory path: %{private}s", path.c_str());
114 return GetFile(path, size);
115 }
116 struct dirent *ptr = nullptr;
117 while (!!(ptr = readdir(dir.get()))) {
118 // current dir OR parent dir
119 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
120 continue;
121 } else if (ptr->d_type == DT_REG) {
122 struct stat sta = {};
123 string fileName = IncludeTrailingPathDelimiter(path) + string(ptr->d_name);
124 if (CheckOverLongPath(fileName) >= PATH_MAX_LEN || stat(fileName.data(), &sta) == -1) {
125 continue;
126 }
127 if (sta.st_size <= size) {
128 smallFiles.emplace(make_pair(fileName, sta.st_size));
129 continue;
130 }
131
132 files.try_emplace(fileName, sta);
133 continue;
134 } else if (ptr->d_type != DT_DIR) {
135 HILOGE("Not support file type");
136 continue;
137 }
138 // DT_DIR type
139 if (!recursion) {
140 continue;
141 }
142 auto [errCode, subFiles, subSmallFiles] =
143 GetDirFilesDetail(IncludeTrailingPathDelimiter(path) + string(ptr->d_name), recursion, size);
144 if (errCode != 0) {
145 return {errCode, files, smallFiles};
146 }
147 files.merge(subFiles);
148 smallFiles.insert(subSmallFiles.begin(), subSmallFiles.end());
149 }
150 return {ERR_OK, files, smallFiles};
151 }
152
PreDealExcludes(std::vector<std::string> & excludes)153 static void PreDealExcludes(std::vector<std::string> &excludes)
154 {
155 size_t lenEx = excludes.size();
156 int j = 0;
157 for (size_t i = 0; i < lenEx; ++i) {
158 if (!excludes[i].empty()) {
159 if (excludes[i].at(excludes[i].size() - 1) == BConstants::FILE_SEPARATOR_CHAR) {
160 excludes[i] += "*";
161 }
162 if (excludes[i].find(BConstants::FILE_SEPARATOR_CHAR) != string::npos &&
163 excludes[i].at(TOP_ELE) != BConstants::FILE_SEPARATOR_CHAR) {
164 excludes[i] = BConstants::FILE_SEPARATOR_CHAR + excludes[i];
165 }
166 excludes[j++] = excludes[i];
167 }
168 }
169 excludes.resize(j);
170 }
171
GetDirFiles(const string & path)172 tuple<ErrCode, vector<string>> BDir::GetDirFiles(const string &path)
173 {
174 vector<string> files;
175 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
176 if (!dir) {
177 HILOGE("Invalid directory path: %{private}s", path.c_str());
178 return {BError(errno).GetCode(), files};
179 }
180
181 struct dirent *ptr = nullptr;
182 while (!!(ptr = readdir(dir.get()))) {
183 // current dir OR parent dir
184 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
185 continue;
186 } else if (ptr->d_type == DT_DIR) {
187 continue;
188 } else {
189 files.emplace_back(IncludeTrailingPathDelimiter(path) + string(ptr->d_name));
190 }
191 }
192
193 return {ERR_OK, files};
194 }
195
GetSubDir(const std::string & path)196 static std::set<std::string> GetSubDir(const std::string &path)
197 {
198 if (path.empty()) {
199 return {};
200 }
201 std::set<std::string> result;
202 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
203 if (!dir) {
204 HILOGE("Invalid directory path: %{private}s", path.c_str());
205 return {};
206 }
207
208 struct dirent *ptr = nullptr;
209 while (!!(ptr = readdir(dir.get()))) {
210 // current dir OR parent dir
211 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
212 continue;
213 } else if (ptr->d_type == DT_DIR) {
214 std::string tmpPath = IncludeTrailingPathDelimiter(path) +
215 string(ptr->d_name) + BConstants::FILE_SEPARATOR_CHAR;
216 if (tmpPath == APP_DATA_DIR) {
217 HILOGI("Filter appdata successfully");
218 continue;
219 }
220 result.emplace(tmpPath);
221 } else {
222 result.emplace(IncludeTrailingPathDelimiter(path) + string(ptr->d_name));
223 }
224 }
225 return result;
226 }
227
RmForceExcludePath(set<string> & expandPath)228 static void RmForceExcludePath(set<string> &expandPath)
229 {
230 set<string> addPaths;
231 for (auto it = expandPath.begin(); it != expandPath.end();) {
232 if (*it == BConstants::PATH_PUBLIC_HOME) {
233 addPaths = GetSubDir(*it);
234 }
235 if ((*it).find(APP_DATA_DIR) == 0) {
236 it = expandPath.erase(it);
237 continue;
238 }
239 ++it;
240 }
241 if (!addPaths.empty()) {
242 expandPath.erase(BConstants::PATH_PUBLIC_HOME);
243 expandPath.merge(addPaths);
244 }
245 }
246
ExpandPathWildcard(const vector<string> & vec,bool onlyPath)247 static set<string> ExpandPathWildcard(const vector<string> &vec, bool onlyPath)
248 {
249 unique_ptr<glob_t, function<void(glob_t *)>> gl {new glob_t, [](glob_t *ptr) { globfree(ptr); }};
250 *gl = {};
251
252 unsigned int flags = GLOB_DOOFFS | GLOB_MARK;
253 for (const string &pattern : vec) {
254 if (!pattern.empty()) {
255 glob(pattern.data(), static_cast<int>(flags), NULL, gl.get());
256 flags |= GLOB_APPEND;
257 }
258 }
259
260 set<string> expandPath, filteredPath;
261 for (size_t i = 0; i < gl->gl_pathc; ++i) {
262 std::string tmpPath = gl->gl_pathv[i];
263 auto pos = tmpPath.find(BConstants::FILE_SEPARATOR_CHAR);
264 if (pos != 0 && pos != std::string::npos) {
265 tmpPath = BConstants::FILE_SEPARATOR_CHAR + tmpPath;
266 }
267 expandPath.emplace(tmpPath);
268 }
269 RmForceExcludePath(expandPath);
270 for (auto it = expandPath.begin(); it != expandPath.end(); ++it) {
271 filteredPath.emplace(*it);
272 if (onlyPath && *it->rbegin() != BConstants::FILE_SEPARATOR_CHAR) {
273 continue;
274 }
275 auto jt = it;
276 for (++jt; jt != expandPath.end() && (jt->find(*it) == 0); ++jt) {
277 }
278
279 it = --jt;
280 }
281
282 return filteredPath;
283 }
284
GetBigFiles(const vector<string> & includes,const vector<string> & excludes)285 tuple<ErrCode, map<string, struct stat>, map<string, size_t>> BDir::GetBigFiles(const vector<string> &includes,
286 const vector<string> &excludes)
287 {
288 set<string> inc = ExpandPathWildcard(includes, true);
289
290 map<string, struct stat> incFiles;
291 map<string, size_t> incSmallFiles;
292 for (const auto &item : inc) {
293 HILOGW("GetBigFiles, path = %{public}s", item.c_str());
294 auto [errCode, files, smallFiles] = GetDirFilesDetail(item, true, BConstants::BIG_FILE_BOUNDARY);
295 if (errCode == 0) {
296 incFiles.merge(move(files));
297 HILOGW("big files: %{public}zu; small files: %{public}zu", files.size(), smallFiles.size());
298 incSmallFiles.merge(move(smallFiles));
299 }
300 }
301 vector<string> endExcludes = excludes;
302 PreDealExcludes(endExcludes);
303 if (excludes.empty()) {
304 return {ERR_OK, move(incFiles), move(incSmallFiles)};
305 }
306 auto isMatch = [](const vector<string> &s, const string &str) -> bool {
307 if (str.empty()) {
308 return false;
309 }
310 for (const string &item : s) {
311 if (fnmatch(item.data(), str.data(), FNM_LEADING_DIR) == 0) {
312 return true;
313 }
314 }
315 return false;
316 };
317
318 map<string, size_t> resSmallFiles;
319 for (const auto &item : incSmallFiles) {
320 if (!isMatch(endExcludes, item.first)) {
321 resSmallFiles.emplace(item);
322 }
323 }
324
325 map<string, struct stat> bigFiles;
326 for (const auto &item : incFiles) {
327 if (!isMatch(endExcludes, item.first)) {
328 bigFiles.emplace(item);
329 }
330 }
331 HILOGW("total number of big files is %{public}zu", bigFiles.size());
332 HILOGW("total number of small files is %{public}zu", resSmallFiles.size());
333 return {ERR_OK, move(bigFiles), move(resSmallFiles)};
334 }
335
GetUser0FileStat(vector<string> bigFile,vector<string> smallFile,vector<struct ReportFileInfo> & allFiles,vector<struct ReportFileInfo> & smallFiles,vector<struct ReportFileInfo> & bigFiles)336 void BDir::GetUser0FileStat(vector<string> bigFile,
337 vector<string> smallFile,
338 vector<struct ReportFileInfo> &allFiles,
339 vector<struct ReportFileInfo> &smallFiles,
340 vector<struct ReportFileInfo> &bigFiles)
341 {
342 for (const auto &item : smallFile) {
343 struct ReportFileInfo storageFiles;
344 storageFiles.filePath = item;
345 if (filesystem::is_directory(item)) {
346 storageFiles.isDir = 1;
347 storageFiles.userTar = 0;
348 } else {
349 storageFiles.isDir = 0;
350 auto [res, fileHash] = BackupFileHash::HashWithSHA256(item);
351 if (fileHash.empty()) {
352 continue;
353 }
354 storageFiles.hash = fileHash;
355 storageFiles.userTar = 1;
356 }
357 struct stat sta = {};
358 if (stat(item.c_str(), &sta) != 0) {
359 throw BError(BError::Codes::EXT_INVAL_ARG, "Get file stat failed");
360 }
361 storageFiles.size = sta.st_size;
362 storageFiles.mode = to_string(static_cast<int32_t>(sta.st_mode));
363 int64_t lastUpdateTime = static_cast<int64_t>(sta.st_mtime);
364 storageFiles.mtime = lastUpdateTime;
365 allFiles.emplace_back(storageFiles);
366 smallFiles.emplace_back(storageFiles);
367 }
368 for (const auto &item : bigFile) {
369 struct ReportFileInfo storageFiles;
370 storageFiles.filePath = item;
371 auto [res, fileHash] = BackupFileHash::HashWithSHA256(item);
372 if (fileHash.empty()) {
373 continue;
374 }
375 storageFiles.hash = fileHash;
376 struct stat sta = {};
377 if (stat(item.c_str(), &sta) != 0) {
378 throw BError(BError::Codes::EXT_INVAL_ARG, "Get file stat failed");
379 }
380 storageFiles.size = sta.st_size;
381 storageFiles.mode = to_string(static_cast<int32_t>(sta.st_mode));
382 int64_t lastUpdateTime = static_cast<int64_t>(sta.st_mtime);
383 storageFiles.mtime = lastUpdateTime;
384 storageFiles.userTar = 1;
385 allFiles.emplace_back(storageFiles);
386 bigFiles.emplace_back(storageFiles);
387 }
388 HILOGI("get FileStat end, bigfiles = %{public}zu, smallFiles = %{public}zu, allFiles = %{public}zu,",
389 bigFiles.size(), smallFiles.size(), allFiles.size());
390 }
391
IsNotPath(const string & path,vector<string> & bigFiles,vector<string> & smallFiles,off_t size)392 static tuple<vector<string>, vector<string>> IsNotPath(const string &path, vector<string> &bigFiles,
393 vector<string> &smallFiles, off_t size)
394 {
395 struct stat sta = {};
396 if (CheckOverLongPath(path) >= PATH_MAX_LEN || stat(path.data(), &sta) == -1) {
397 return {};
398 }
399 if (sta.st_size <= size) {
400 smallFiles.emplace_back(path);
401 HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
402 return {bigFiles, smallFiles};
403 }
404 bigFiles.emplace_back(path);
405 HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
406 return {bigFiles, smallFiles};
407 }
408
GetUser0DirFilesDetail(const string & path,off_t size=-1)409 static tuple<vector<string>, vector<string>> GetUser0DirFilesDetail(const string &path, off_t size = -1)
410 {
411 vector<string> bigFiles;
412 vector<string> smallFiles;
413 if (IsEmptyDirectory(path)) {
414 string newPath = path;
415 if (path.at(path.size()-1) != BConstants::FILE_SEPARATOR_CHAR) {
416 newPath += BConstants::FILE_SEPARATOR_CHAR;
417 }
418 smallFiles.emplace_back(newPath);
419 return {bigFiles, smallFiles};
420 }
421 if (filesystem::is_regular_file(path)) {
422 return IsNotPath(path, bigFiles, smallFiles, size);
423 }
424 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
425 if (!dir) {
426 HILOGE("Invalid directory path: %{private}s", path.c_str());
427 return {};
428 }
429 struct dirent *ptr = nullptr;
430 while (!!(ptr = readdir(dir.get()))) {
431 // current dir OR parent dir
432 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
433 continue;
434 } else if (ptr->d_type == DT_REG) {
435 struct stat sta = {};
436 string fileName = IncludeTrailingPathDelimiter(path) + string(ptr->d_name);
437 if (CheckOverLongPath(fileName) >= PATH_MAX_LEN || stat(fileName.data(), &sta) == -1) {
438 continue;
439 }
440 if (sta.st_size <= size) {
441 smallFiles.emplace_back(fileName);
442 continue;
443 }
444
445 bigFiles.emplace_back(fileName);
446 continue;
447 } else if (ptr->d_type != DT_DIR) {
448 HILOGE("Not support file type");
449 continue;
450 }
451 // DT_DIR type
452 auto [subBigFiles, subSmallFiles] =
453 GetUser0DirFilesDetail(IncludeTrailingPathDelimiter(path) + string(ptr->d_name), size);
454 bigFiles.insert(bigFiles.end(), subBigFiles.begin(), subBigFiles.end());
455 smallFiles.insert(smallFiles.end(), subSmallFiles.begin(), subSmallFiles.end());
456 }
457 HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
458 return {bigFiles, smallFiles};
459 }
460
GetBackupList(const vector<string> & includes,const vector<string> & excludes)461 tuple<vector<string>, vector<string>> BDir::GetBackupList(const vector<string> &includes,
462 const vector<string> &excludes)
463 {
464 HILOGI("start get bigfiles and smallfiles");
465 set<string> inc = ExpandPathWildcard(includes, true);
466 vector<string> bigFiles;
467 vector<string> smallFiles;
468 for (const auto &item : inc) {
469 auto [bigFile, smallFile] = GetUser0DirFilesDetail(item, BConstants::BIG_FILE_BOUNDARY);
470 bigFiles.insert(bigFiles.end(), bigFile.begin(), bigFile.end());
471 smallFiles.insert(smallFiles.end(), smallFile.begin(), smallFile.end());
472 }
473 HILOGI("end bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
474 vector<string> endExcludes = excludes;
475 PreDealExcludes(endExcludes);
476 auto isMatch = [](const vector<string> &s, const string &str) -> bool {
477 if (str.empty()) {
478 return false;
479 }
480 for (const string &item : s) {
481 if (fnmatch(item.data(), str.data(), FNM_LEADING_DIR) == 0) {
482 return true;
483 }
484 }
485 return false;
486 };
487
488 for (auto item = bigFiles.begin(); item != bigFiles.end();) {
489 if (isMatch(endExcludes, *item)) {
490 item = bigFiles.erase(item);
491 } else {
492 ++item;
493 }
494 }
495 for (auto item = smallFiles.begin(); item != smallFiles.end();) {
496 if (isMatch(endExcludes, *item)) {
497 item = smallFiles.erase(item);
498 } else {
499 ++item;
500 }
501 }
502 HILOGI("End compare bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
503 return {bigFiles, smallFiles};
504 }
505
GetDirs(const vector<string_view> & paths)506 vector<string> BDir::GetDirs(const vector<string_view> &paths)
507 {
508 vector<string> wildcardPath(paths.begin(), paths.end());
509 set<string> inc = ExpandPathWildcard(wildcardPath, true);
510 vector<string> dirs(inc.begin(), inc.end());
511 return dirs;
512 }
513
CheckFilePathInvalid(const std::string & filePath)514 bool BDir::CheckFilePathInvalid(const std::string &filePath)
515 {
516 size_t pos = filePath.find(BConstants::PATH_ABSOLUTE);
517 while (pos != string::npos) {
518 if (pos == 0 || filePath[pos - 1] == BConstants::FILE_SEPARATOR_CHAR) {
519 HILOGE("Relative path is not allowed, path = %{public}s", GetAnonyPath(filePath).c_str());
520 return true;
521 }
522 pos = filePath.find(BConstants::PATH_ABSOLUTE, pos + BConstants::PATH_ABSOLUTE.size());
523 }
524 return false;
525 }
526
CheckAndRmSoftLink(const std::string & filePath)527 bool BDir::CheckAndRmSoftLink(const std::string &filePath)
528 {
529 if (std::filesystem::is_symlink(filePath)) {
530 HILOGE("Soft link is not allowed, path = %{public}s", GetAnonyPath(filePath).c_str());
531 if (unlink(filePath.c_str()) < 0) {
532 HILOGE("Failed to unlink the backup file : %{public}s", GetAnonyPath(filePath).c_str());
533 }
534 return true;
535 }
536 return false;
537 }
538
CheckAndRmSoftLink(const EndFileInfo & filePaths)539 bool BDir::CheckAndRmSoftLink(const EndFileInfo &filePaths)
540 {
541 bool isSoftLink = false;
542 for (const auto &it : filePaths) {
543 if (std::filesystem::is_symlink(it.first)) {
544 HILOGE("Soft link is not allowed, path = %{public}s", GetAnonyPath(it.first).c_str());
545 isSoftLink = true;
546 if (unlink(it.first.c_str()) < 0) {
547 HILOGE("Failed to unlink the backup file : %{public}s", GetAnonyPath(it.first).c_str());
548 }
549 }
550 }
551 return isSoftLink;
552 }
553 } // namespace OHOS::FileManagement::Backup