1 /*
2 * Copyright (c) 2022-2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "b_filesystem/b_dir.h"
17
18 #include <algorithm>
19 #include <dirent.h>
20 #include <fnmatch.h>
21 #include <functional>
22 #include <filesystem>
23 #include <glob.h>
24 #include <memory>
25 #include <set>
26 #include <string>
27 #include <tuple>
28 #include <vector>
29
30 #include "b_anony/b_anony.h"
31 #include "b_error/b_error.h"
32 #include "b_filesystem/b_file_hash.h"
33 #include "b_resources/b_constants.h"
34 #include "directory_ex.h"
35 #include "errors.h"
36 #include "filemgmt_libhilog.h"
37 #include "sandbox_helper.h"
38 #include "b_utils/scan_file_singleton.h"
39
40 namespace OHOS::FileManagement::Backup {
41 using namespace std;
42 const int32_t PATH_MAX_LEN = 4096;
43 const size_t TOP_ELE = 0;
44 const std::string APP_DATA_DIR = BConstants::PATH_PUBLIC_HOME +
45 BConstants::PATH_APP_DATA + BConstants::FILE_SEPARATOR_CHAR;
46
IsEmptyDirectory(const string & path)47 static bool IsEmptyDirectory(const string &path)
48 {
49 DIR *dir = opendir(path.c_str());
50 if (dir == nullptr) {
51 HILOGE("Opendir failed, errno:%{public}d", errno);
52 return false;
53 }
54 bool isEmpty = true;
55 struct dirent *entry = nullptr;
56 while ((entry = readdir(dir)) != nullptr) {
57 if (entry->d_type != DT_DIR || (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0)) {
58 isEmpty = false;
59 break;
60 }
61 }
62 closedir(dir);
63 return isEmpty;
64 }
65
InsertSmallFiles(std::map<string,size_t> & smallFiles,std::string fileName,size_t size)66 static void InsertSmallFiles(std::map<string, size_t> &smallFiles, std::string fileName, size_t size)
67 {
68 ScanFileSingleton::GetInstance().AddSmallFile(fileName, size);
69 smallFiles.emplace(make_pair(fileName, size));
70 }
71
GetFile(const string & path,off_t size=-1)72 static tuple<ErrCode, map<string, struct stat>, map<string, size_t>> GetFile(const string &path, off_t size = -1)
73 {
74 map<string, struct stat> files;
75 map<string, size_t> smallFiles;
76 struct stat sta = {};
77 if (stat(path.data(), &sta) == -1) {
78 HILOGE("File not exist, errno:%{public}d, fileName:%{private}s.", errno, path.c_str());
79 return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
80 }
81 if (path == "/") {
82 return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
83 }
84 if (sta.st_size <= size) {
85 smallFiles.emplace(make_pair(path, sta.st_size));
86 InsertSmallFiles(smallFiles, path, sta.st_size);
87 } else {
88 ScanFileSingleton::GetInstance().AddBigFile(path, sta);
89 files.try_emplace(path, sta);
90 }
91 return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
92 }
93
CheckOverLongPath(const string & path)94 static uint32_t CheckOverLongPath(const string &path)
95 {
96 uint32_t len = path.length();
97 if (len >= PATH_MAX_LEN) {
98 size_t found = path.find_last_of(BConstants::FILE_SEPARATOR_CHAR);
99 string sub = path.substr(found + 1);
100 HILOGE("Path over long, length:%{public}d, fileName:%{private}s.", len, sub.c_str());
101 }
102 return len;
103 }
104
GetDirFilesDetail(const string & path,bool recursion,off_t size=-1)105 static tuple<ErrCode, map<string, struct stat>, map<string, size_t>> GetDirFilesDetail(const string &path,
106 bool recursion,
107 off_t size = -1)
108 {
109 map<string, struct stat> files;
110 map<string, size_t> smallFiles;
111
112 if (IsEmptyDirectory(path)) {
113 string newPath = path;
114 if (path.at(path.size()-1) != BConstants::FILE_SEPARATOR_CHAR) {
115 newPath += BConstants::FILE_SEPARATOR_CHAR;
116 }
117 InsertSmallFiles(smallFiles, newPath, 0);
118 return {ERR_OK, files, smallFiles};
119 }
120
121 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
122 if (!dir) {
123 HILOGE("Invalid directory path: %{private}s", path.c_str());
124 return GetFile(path, size);
125 }
126 struct dirent *ptr = nullptr;
127 while (!!(ptr = readdir(dir.get()))) {
128 // current dir OR parent dir
129 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
130 continue;
131 } else if (ptr->d_type == DT_REG) {
132 struct stat sta = {};
133 string fileName = IncludeTrailingPathDelimiter(path) + string(ptr->d_name);
134 if (CheckOverLongPath(fileName) >= PATH_MAX_LEN || stat(fileName.data(), &sta) == -1) {
135 continue;
136 }
137 if (sta.st_size <= size) {
138 InsertSmallFiles(smallFiles, fileName, sta.st_size);
139 continue;
140 }
141 ScanFileSingleton::GetInstance().AddBigFile(fileName, sta);
142 files.try_emplace(fileName, sta);
143 continue;
144 } else if (ptr->d_type != DT_DIR) {
145 HILOGE("Not support file type");
146 continue;
147 }
148 // DT_DIR type
149 if (!recursion) {
150 continue;
151 }
152 auto [errCode, subFiles, subSmallFiles] =
153 GetDirFilesDetail(IncludeTrailingPathDelimiter(path) + string(ptr->d_name), recursion, size);
154 if (errCode != 0) {
155 return {errCode, files, smallFiles};
156 }
157 files.merge(subFiles);
158 smallFiles.insert(subSmallFiles.begin(), subSmallFiles.end());
159 }
160 return {ERR_OK, files, smallFiles};
161 }
162
PreDealExcludes(std::vector<std::string> & excludes)163 static void PreDealExcludes(std::vector<std::string> &excludes)
164 {
165 size_t lenEx = excludes.size();
166 int j = 0;
167 for (size_t i = 0; i < lenEx; ++i) {
168 if (!excludes[i].empty()) {
169 if (excludes[i].at(excludes[i].size() - 1) == BConstants::FILE_SEPARATOR_CHAR) {
170 excludes[i] += "*";
171 }
172 if (excludes[i].find(BConstants::FILE_SEPARATOR_CHAR) != string::npos &&
173 excludes[i].at(TOP_ELE) != BConstants::FILE_SEPARATOR_CHAR) {
174 excludes[i] = BConstants::FILE_SEPARATOR_CHAR + excludes[i];
175 }
176 excludes[j++] = excludes[i];
177 }
178 }
179 excludes.resize(j);
180 }
181
GetDirFiles(const string & path)182 tuple<ErrCode, vector<string>> BDir::GetDirFiles(const string &path)
183 {
184 vector<string> files;
185 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
186 if (!dir) {
187 HILOGE("Invalid directory path: %{private}s", path.c_str());
188 return {BError(errno).GetCode(), files};
189 }
190
191 struct dirent *ptr = nullptr;
192 while (!!(ptr = readdir(dir.get()))) {
193 // current dir OR parent dir
194 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
195 continue;
196 } else if (ptr->d_type == DT_DIR) {
197 continue;
198 } else {
199 files.emplace_back(IncludeTrailingPathDelimiter(path) + string(ptr->d_name));
200 }
201 }
202
203 return {ERR_OK, files};
204 }
205
GetSubDir(const std::string & path)206 static std::set<std::string> GetSubDir(const std::string &path)
207 {
208 if (path.empty()) {
209 return {};
210 }
211 std::set<std::string> result;
212 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
213 if (!dir) {
214 HILOGE("Invalid directory path: %{private}s", path.c_str());
215 return {};
216 }
217
218 struct dirent *ptr = nullptr;
219 while (!!(ptr = readdir(dir.get()))) {
220 // current dir OR parent dir
221 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
222 continue;
223 } else if (ptr->d_type == DT_DIR) {
224 std::string tmpPath = IncludeTrailingPathDelimiter(path) +
225 string(ptr->d_name) + BConstants::FILE_SEPARATOR_CHAR;
226 if (tmpPath == APP_DATA_DIR) {
227 HILOGI("Filter appdata successfully");
228 continue;
229 }
230 result.emplace(tmpPath);
231 } else {
232 result.emplace(IncludeTrailingPathDelimiter(path) + string(ptr->d_name));
233 }
234 }
235 return result;
236 }
237
RmForceExcludePath(set<string> & expandPath)238 static void RmForceExcludePath(set<string> &expandPath)
239 {
240 set<string> addPaths;
241 for (auto it = expandPath.begin(); it != expandPath.end();) {
242 if (*it == BConstants::PATH_PUBLIC_HOME) {
243 addPaths = GetSubDir(*it);
244 }
245 if ((*it).find(APP_DATA_DIR) == 0) {
246 it = expandPath.erase(it);
247 continue;
248 }
249 ++it;
250 }
251 if (!addPaths.empty()) {
252 expandPath.erase(BConstants::PATH_PUBLIC_HOME);
253 expandPath.merge(addPaths);
254 }
255 }
256
ExpandPathWildcard(const vector<string> & vec,bool onlyPath)257 static set<string> ExpandPathWildcard(const vector<string> &vec, bool onlyPath)
258 {
259 unique_ptr<glob_t, function<void(glob_t *)>> gl {new glob_t, [](glob_t *ptr) { globfree(ptr); }};
260 *gl = {};
261
262 unsigned int flags = GLOB_DOOFFS | GLOB_MARK;
263 for (const string &pattern : vec) {
264 if (!pattern.empty()) {
265 glob(pattern.data(), static_cast<int>(flags), NULL, gl.get());
266 flags |= GLOB_APPEND;
267 }
268 }
269
270 set<string> expandPath;
271 set<string> filteredPath;
272 for (size_t i = 0; i < gl->gl_pathc; ++i) {
273 std::string tmpPath = gl->gl_pathv[i];
274 auto pos = tmpPath.find(BConstants::FILE_SEPARATOR_CHAR);
275 if (pos != 0 && pos != std::string::npos) {
276 tmpPath = BConstants::FILE_SEPARATOR_CHAR + tmpPath;
277 }
278 expandPath.emplace(tmpPath);
279 }
280 RmForceExcludePath(expandPath);
281 for (auto it = expandPath.begin(); it != expandPath.end(); ++it) {
282 filteredPath.emplace(*it);
283 if (onlyPath && *it->rbegin() != BConstants::FILE_SEPARATOR_CHAR) {
284 continue;
285 }
286 auto jt = it;
287 for (++jt; jt != expandPath.end() && (jt->find(*it) == 0); ++jt) {
288 }
289
290 it = --jt;
291 }
292
293 return filteredPath;
294 }
295
CheckAndCreateDirectory(const string & filePath)296 bool BDir::CheckAndCreateDirectory(const string &filePath)
297 {
298 size_t pos = filePath.rfind('/');
299 if (pos == string::npos) {
300 return true;
301 }
302
303 string folderPath = "/" + filePath.substr(0, pos);
304 if (access(folderPath.c_str(), F_OK) != 0) {
305 if (!ForceCreateDirectory(folderPath.data())) {
306 return false;
307 }
308 }
309 return true;
310 }
311
GetBigFiles(const vector<string> & includes,const vector<string> & excludes)312 tuple<ErrCode, map<string, struct stat>, map<string, size_t>> BDir::GetBigFiles(const vector<string> &includes,
313 const vector<string> &excludes)
314 {
315 set<string> inc = ExpandPathWildcard(includes, true);
316 map<string, struct stat> incFiles;
317 map<string, size_t> incSmallFiles;
318 for (const auto &item : inc) {
319 HILOGW("GetBigFiles, path = %{public}s", item.c_str());
320 auto [errCode, files, smallFiles] = GetDirFilesDetail(item, true, BConstants::BIG_FILE_BOUNDARY);
321 if (errCode == 0) {
322 HILOGW("big files: %{public}zu; small files: %{public}zu", files.size(), smallFiles.size());
323 incFiles.merge(move(files));
324 incSmallFiles.merge(move(smallFiles));
325 }
326 }
327 if (excludes.empty()) {
328 return {ERR_OK, move(incFiles), move(incSmallFiles)};
329 }
330
331 vector<string> endExcludes = excludes;
332 PreDealExcludes(endExcludes);
333
334 map<string, size_t> resSmallFiles;
335 for (const auto &item : incSmallFiles) {
336 if (!IsDirsMatch(endExcludes, item.first)) {
337 resSmallFiles.emplace(item);
338 }
339 }
340
341 map<string, struct stat> bigFiles;
342 for (const auto &item : incFiles) {
343 if (!IsDirsMatch(endExcludes, item.first)) {
344 bigFiles.emplace(item);
345 }
346 }
347 HILOGW("total number of big files is %{public}zu", bigFiles.size());
348 HILOGW("total number of small files is %{public}zu", resSmallFiles.size());
349 return {ERR_OK, move(bigFiles), move(resSmallFiles)};
350 }
351
GetUser0FileStat(vector<string> bigFile,vector<string> smallFile,vector<struct ReportFileInfo> & allFiles,vector<struct ReportFileInfo> & smallFiles,vector<struct ReportFileInfo> & bigFiles)352 void BDir::GetUser0FileStat(vector<string> bigFile,
353 vector<string> smallFile,
354 vector<struct ReportFileInfo> &allFiles,
355 vector<struct ReportFileInfo> &smallFiles,
356 vector<struct ReportFileInfo> &bigFiles)
357 {
358 for (const auto &item : smallFile) {
359 struct ReportFileInfo storageFiles;
360 storageFiles.filePath = item;
361 if (filesystem::is_directory(item)) {
362 storageFiles.isDir = 1;
363 storageFiles.userTar = 0;
364 } else {
365 storageFiles.isDir = 0;
366 auto [res, fileHash] = BackupFileHash::HashWithSHA256(item);
367 if (fileHash.empty()) {
368 continue;
369 }
370 storageFiles.hash = fileHash;
371 storageFiles.userTar = 1;
372 }
373 struct stat sta = {};
374 if (stat(item.c_str(), &sta) != 0) {
375 throw BError(BError::Codes::EXT_INVAL_ARG, "Get file stat failed");
376 }
377 storageFiles.size = sta.st_size;
378 storageFiles.mode = to_string(static_cast<int32_t>(sta.st_mode));
379 int64_t lastUpdateTime = static_cast<int64_t>(sta.st_mtime);
380 storageFiles.mtime = lastUpdateTime;
381 allFiles.emplace_back(storageFiles);
382 smallFiles.emplace_back(storageFiles);
383 }
384 for (const auto &item : bigFile) {
385 struct ReportFileInfo storageFiles;
386 storageFiles.filePath = item;
387 auto [res, fileHash] = BackupFileHash::HashWithSHA256(item);
388 if (fileHash.empty()) {
389 continue;
390 }
391 storageFiles.hash = fileHash;
392 struct stat sta = {};
393 if (stat(item.c_str(), &sta) != 0) {
394 throw BError(BError::Codes::EXT_INVAL_ARG, "Get file stat failed");
395 }
396 storageFiles.size = sta.st_size;
397 storageFiles.mode = to_string(static_cast<int32_t>(sta.st_mode));
398 int64_t lastUpdateTime = static_cast<int64_t>(sta.st_mtime);
399 storageFiles.mtime = lastUpdateTime;
400 storageFiles.userTar = 1;
401 allFiles.emplace_back(storageFiles);
402 bigFiles.emplace_back(storageFiles);
403 }
404 HILOGI("get FileStat end, bigfiles = %{public}zu, smallFiles = %{public}zu, allFiles = %{public}zu,",
405 bigFiles.size(), smallFiles.size(), allFiles.size());
406 }
407
IsNotPath(const string & path,vector<string> & bigFiles,vector<string> & smallFiles,off_t size)408 static tuple<vector<string>, vector<string>> IsNotPath(const string &path, vector<string> &bigFiles,
409 vector<string> &smallFiles, off_t size)
410 {
411 struct stat sta = {};
412 if (CheckOverLongPath(path) >= PATH_MAX_LEN || stat(path.data(), &sta) == -1) {
413 return {};
414 }
415 if (sta.st_size <= size) {
416 smallFiles.emplace_back(path);
417 HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
418 return {bigFiles, smallFiles};
419 }
420 bigFiles.emplace_back(path);
421 HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
422 return {bigFiles, smallFiles};
423 }
424
GetUser0DirFilesDetail(const string & path,off_t size=-1)425 static tuple<vector<string>, vector<string>> GetUser0DirFilesDetail(const string &path, off_t size = -1)
426 {
427 vector<string> bigFiles;
428 vector<string> smallFiles;
429 if (IsEmptyDirectory(path)) {
430 string newPath = path;
431 if (path.at(path.size()-1) != BConstants::FILE_SEPARATOR_CHAR) {
432 newPath += BConstants::FILE_SEPARATOR_CHAR;
433 }
434 smallFiles.emplace_back(newPath);
435 return {bigFiles, smallFiles};
436 }
437 if (filesystem::is_regular_file(path)) {
438 return IsNotPath(path, bigFiles, smallFiles, size);
439 }
440 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
441 if (!dir) {
442 HILOGE("Invalid directory path: %{private}s", path.c_str());
443 return {};
444 }
445 struct dirent *ptr = nullptr;
446 while (!!(ptr = readdir(dir.get()))) {
447 // current dir OR parent dir
448 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
449 continue;
450 } else if (ptr->d_type == DT_REG) {
451 struct stat sta = {};
452 string fileName = IncludeTrailingPathDelimiter(path) + string(ptr->d_name);
453 if (CheckOverLongPath(fileName) >= PATH_MAX_LEN || stat(fileName.data(), &sta) == -1) {
454 continue;
455 }
456 if (sta.st_size <= size) {
457 smallFiles.emplace_back(fileName);
458 continue;
459 }
460
461 bigFiles.emplace_back(fileName);
462 continue;
463 } else if (ptr->d_type != DT_DIR) {
464 HILOGE("Not support file type");
465 continue;
466 }
467 // DT_DIR type
468 auto [subBigFiles, subSmallFiles] =
469 GetUser0DirFilesDetail(IncludeTrailingPathDelimiter(path) + string(ptr->d_name), size);
470 bigFiles.insert(bigFiles.end(), subBigFiles.begin(), subBigFiles.end());
471 smallFiles.insert(smallFiles.end(), subSmallFiles.begin(), subSmallFiles.end());
472 }
473 HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
474 return {bigFiles, smallFiles};
475 }
476
GetBackupList(const vector<string> & includes,const vector<string> & excludes)477 tuple<vector<string>, vector<string>> BDir::GetBackupList(const vector<string> &includes,
478 const vector<string> &excludes)
479 {
480 HILOGI("start get bigfiles and smallfiles");
481 set<string> inc = ExpandPathWildcard(includes, true);
482 vector<string> bigFiles;
483 vector<string> smallFiles;
484 for (const auto &item : inc) {
485 auto [bigFile, smallFile] = GetUser0DirFilesDetail(item, BConstants::BIG_FILE_BOUNDARY);
486 bigFiles.insert(bigFiles.end(), bigFile.begin(), bigFile.end());
487 smallFiles.insert(smallFiles.end(), smallFile.begin(), smallFile.end());
488 }
489 HILOGI("end bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
490 vector<string> endExcludes = excludes;
491 PreDealExcludes(endExcludes);
492 auto isMatch = [](const vector<string> &s, const string &str) -> bool {
493 if (str.empty()) {
494 return false;
495 }
496 for (const string &item : s) {
497 if (fnmatch(item.data(), str.data(), FNM_LEADING_DIR) == 0) {
498 return true;
499 }
500 }
501 return false;
502 };
503
504 for (auto item = bigFiles.begin(); item != bigFiles.end();) {
505 if (isMatch(endExcludes, *item)) {
506 item = bigFiles.erase(item);
507 } else {
508 ++item;
509 }
510 }
511 for (auto item = smallFiles.begin(); item != smallFiles.end();) {
512 if (isMatch(endExcludes, *item)) {
513 item = smallFiles.erase(item);
514 } else {
515 ++item;
516 }
517 }
518 HILOGI("End compare bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
519 return {bigFiles, smallFiles};
520 }
521
GetDirs(const vector<string_view> & paths)522 vector<string> BDir::GetDirs(const vector<string_view> &paths)
523 {
524 vector<string> wildcardPath(paths.begin(), paths.end());
525 set<string> inc = ExpandPathWildcard(wildcardPath, true);
526 vector<string> dirs(inc.begin(), inc.end());
527 return dirs;
528 }
529
IsFilePathValid(const std::string & filePath)530 bool BDir::IsFilePathValid(const std::string &filePath)
531 {
532 return AppFileService::SandboxHelper::IsValidPath(filePath);
533 }
534
IsDirsMatch(const vector<string> & excludePaths,const string & path)535 bool BDir::IsDirsMatch(const vector<string> &excludePaths, const string &path)
536 {
537 if (path.empty()) {
538 return false;
539 }
540 for (const string &item : excludePaths) {
541 if (fnmatch(item.data(), path.data(), FNM_LEADING_DIR) == 0) {
542 return true;
543 }
544 }
545 return false;
546 }
547 } // namespace OHOS::FileManagement::Backup