1 /*
2 * Copyright (c) 2022-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "b_filesystem/b_dir.h"
17
18 #include <algorithm>
19 #include <dirent.h>
20 #include <fnmatch.h>
21 #include <functional>
22 #include <filesystem>
23 #include <glob.h>
24 #include <memory>
25 #include <set>
26 #include <string>
27 #include <tuple>
28 #include <vector>
29
30 #include "b_error/b_error.h"
31 #include "b_filesystem/b_file_hash.h"
32 #include "b_resources/b_constants.h"
33 #include "directory_ex.h"
34 #include "errors.h"
35 #include "filemgmt_libhilog.h"
36
37 namespace OHOS::FileManagement::Backup {
38 using namespace std;
39 const int32_t PATH_MAX_LEN = 4096;
40 const std::string APP_DATA_DIR = BConstants::PATH_PUBLIC_HOME +
41 BConstants::PATH_APP_DATA + BConstants::FILE_SEPARATOR_CHAR;
42
IsEmptyDirectory(const string & path)43 static bool IsEmptyDirectory(const string &path)
44 {
45 DIR *dir = opendir(path.c_str());
46 if (dir == nullptr) {
47 HILOGE("Opendir failed, errno:%{public}d", errno);
48 return false;
49 }
50 bool isEmpty = true;
51 struct dirent *entry = nullptr;
52 while ((entry = readdir(dir)) != nullptr) {
53 if (entry->d_type != DT_DIR || (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0)) {
54 isEmpty = false;
55 break;
56 }
57 }
58 closedir(dir);
59 return isEmpty;
60 }
61
GetFile(const string & path,off_t size=-1)62 static tuple<ErrCode, map<string, struct stat>, map<string, size_t>> GetFile(const string &path, off_t size = -1)
63 {
64 map<string, struct stat> files;
65 map<string, size_t> smallFiles;
66 struct stat sta = {};
67 if (stat(path.data(), &sta) == -1) {
68 HILOGE("File not exist, errno:%{public}d, fileName:%{private}s.", errno, path.c_str());
69 return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
70 }
71 if (path == "/") {
72 return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
73 }
74 if (sta.st_size <= size) {
75 smallFiles.insert(make_pair(path, sta.st_size));
76 } else {
77 files.try_emplace(path, sta);
78 }
79 return {BError(BError::Codes::OK).GetCode(), files, smallFiles};
80 }
81
CheckOverLongPath(const string & path)82 static uint32_t CheckOverLongPath(const string &path)
83 {
84 uint32_t len = path.length();
85 if (len >= PATH_MAX_LEN) {
86 size_t found = path.find_last_of(BConstants::FILE_SEPARATOR_CHAR);
87 string sub = path.substr(found + 1);
88 HILOGE("Path over long, length:%{public}d, fileName:%{public}s.", len, sub.c_str());
89 }
90 return len;
91 }
92
GetDirFilesDetail(const string & path,bool recursion,off_t size=-1)93 static tuple<ErrCode, map<string, struct stat>, map<string, size_t>> GetDirFilesDetail(const string &path,
94 bool recursion,
95 off_t size = -1)
96 {
97 map<string, struct stat> files;
98 map<string, size_t> smallFiles;
99
100 if (IsEmptyDirectory(path)) {
101 string newPath = path;
102 if (path.at(path.size()-1) != BConstants::FILE_SEPARATOR_CHAR) {
103 newPath += BConstants::FILE_SEPARATOR_CHAR;
104 }
105 smallFiles.insert(make_pair(newPath, 0));
106 return {ERR_OK, files, smallFiles};
107 }
108
109 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
110 if (!dir) {
111 HILOGE("Invalid directory path: %{private}s", path.c_str());
112 return GetFile(path, size);
113 }
114 struct dirent *ptr = nullptr;
115 while (!!(ptr = readdir(dir.get()))) {
116 // current dir OR parent dir
117 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
118 continue;
119 } else if (ptr->d_type == DT_REG) {
120 struct stat sta = {};
121 string fileName = IncludeTrailingPathDelimiter(path) + string(ptr->d_name);
122 if (CheckOverLongPath(fileName) >= PATH_MAX_LEN || stat(fileName.data(), &sta) == -1) {
123 continue;
124 }
125 if (sta.st_size <= size) {
126 smallFiles.insert(make_pair(fileName, sta.st_size));
127 continue;
128 }
129
130 files.try_emplace(fileName, sta);
131 continue;
132 } else if (ptr->d_type != DT_DIR) {
133 HILOGE("Not support file type");
134 continue;
135 }
136 // DT_DIR type
137 if (!recursion) {
138 continue;
139 }
140 auto [errCode, subFiles, subSmallFiles] =
141 GetDirFilesDetail(IncludeTrailingPathDelimiter(path) + string(ptr->d_name), recursion, size);
142 if (errCode != 0) {
143 return {errCode, files, smallFiles};
144 }
145 files.merge(subFiles);
146 smallFiles.insert(subSmallFiles.begin(), subSmallFiles.end());
147 }
148 return {ERR_OK, files, smallFiles};
149 }
150
GetDirFiles(const string & path)151 tuple<ErrCode, vector<string>> BDir::GetDirFiles(const string &path)
152 {
153 vector<string> files;
154 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
155 if (!dir) {
156 HILOGE("Invalid directory path: %{private}s", path.c_str());
157 return {BError(errno).GetCode(), files};
158 }
159
160 struct dirent *ptr = nullptr;
161 while (!!(ptr = readdir(dir.get()))) {
162 // current dir OR parent dir
163 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
164 continue;
165 } else if (ptr->d_type == DT_DIR) {
166 continue;
167 } else {
168 files.push_back(IncludeTrailingPathDelimiter(path) + string(ptr->d_name));
169 }
170 }
171
172 return {ERR_OK, files};
173 }
174
GetSubDir(const std::string & path)175 static std::set<std::string> GetSubDir(const std::string &path)
176 {
177 if (path.empty()) {
178 return {};
179 }
180 std::set<std::string> result;
181 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
182 if (!dir) {
183 HILOGE("Invalid directory path: %{private}s", path.c_str());
184 return {};
185 }
186
187 struct dirent *ptr = nullptr;
188 while (!!(ptr = readdir(dir.get()))) {
189 // current dir OR parent dir
190 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
191 continue;
192 } else if (ptr->d_type == DT_DIR) {
193 std::string tmpPath = IncludeTrailingPathDelimiter(path) +
194 string(ptr->d_name) + BConstants::FILE_SEPARATOR_CHAR;
195 if (tmpPath == APP_DATA_DIR) {
196 HILOGI("Filter appdata successfully");
197 continue;
198 }
199 result.emplace(tmpPath);
200 } else {
201 result.emplace(IncludeTrailingPathDelimiter(path) + string(ptr->d_name));
202 }
203 }
204 return result;
205 }
206
RmForceExcludePath(set<string> & expandPath)207 static void RmForceExcludePath(set<string> &expandPath)
208 {
209 set<string> addPaths;
210 for (auto it = expandPath.begin(); it != expandPath.end();) {
211 if (*it == BConstants::PATH_PUBLIC_HOME) {
212 addPaths = GetSubDir(*it);
213 }
214 if ((*it).find(APP_DATA_DIR) == 0) {
215 it = expandPath.erase(it);
216 continue;
217 }
218 ++it;
219 }
220 if (!addPaths.empty()) {
221 expandPath.erase(BConstants::PATH_PUBLIC_HOME);
222 expandPath.merge(addPaths);
223 }
224 }
225
ExpandPathWildcard(const vector<string> & vec,bool onlyPath)226 static set<string> ExpandPathWildcard(const vector<string> &vec, bool onlyPath)
227 {
228 unique_ptr<glob_t, function<void(glob_t *)>> gl {new glob_t, [](glob_t *ptr) { globfree(ptr); }};
229 *gl = {};
230
231 unsigned int flags = GLOB_DOOFFS | GLOB_MARK;
232 for (const string &pattern : vec) {
233 if (!pattern.empty()) {
234 glob(pattern.data(), static_cast<int>(flags), NULL, gl.get());
235 flags |= GLOB_APPEND;
236 }
237 }
238
239 set<string> expandPath, filteredPath;
240 for (size_t i = 0; i < gl->gl_pathc; ++i) {
241 std::string tmpPath = gl->gl_pathv[i];
242 auto pos = tmpPath.find(BConstants::FILE_SEPARATOR_CHAR);
243 if (pos != 0 && pos != std::string::npos) {
244 tmpPath = BConstants::FILE_SEPARATOR_CHAR + tmpPath;
245 }
246 expandPath.emplace(tmpPath);
247 }
248 RmForceExcludePath(expandPath);
249 for (auto it = expandPath.begin(); it != expandPath.end(); ++it) {
250 filteredPath.insert(*it);
251 if (onlyPath && *it->rbegin() != BConstants::FILE_SEPARATOR_CHAR) {
252 continue;
253 }
254 auto jt = it;
255 for (++jt; jt != expandPath.end() && (jt->find(*it) == 0); ++jt) {
256 }
257
258 it = --jt;
259 }
260
261 return filteredPath;
262 }
263
GetBigFiles(const vector<string> & includes,const vector<string> & excludes)264 tuple<ErrCode, map<string, struct stat>, map<string, size_t>> BDir::GetBigFiles(const vector<string> &includes,
265 const vector<string> &excludes)
266 {
267 set<string> inc = ExpandPathWildcard(includes, true);
268
269 map<string, struct stat> incFiles;
270 map<string, size_t> incSmallFiles;
271 for (const auto &item : inc) {
272 HILOGW("GetBigFiles, path = %{public}s", item.c_str());
273 auto [errCode, files, smallFiles] = GetDirFilesDetail(item, true, BConstants::BIG_FILE_BOUNDARY);
274 if (errCode == 0) {
275 incFiles.merge(move(files));
276 HILOGW("big files: %{public}zu; small files: %{public}zu", files.size(), smallFiles.size());
277 incSmallFiles.insert(smallFiles.begin(), smallFiles.end());
278 }
279 }
280
281 auto isMatch = [](const vector<string> &s, const string &str) -> bool {
282 if (str.empty()) {
283 return false;
284 }
285 for (const string &item : s) {
286 if (item.empty()) {
287 continue;
288 }
289 string excludeItem = item;
290 if (excludeItem.at(item.size() - 1) == BConstants::FILE_SEPARATOR_CHAR) {
291 excludeItem += "*";
292 }
293 if (fnmatch(excludeItem.data(), str.data(), FNM_LEADING_DIR) == 0) {
294 return true;
295 }
296 }
297 return false;
298 };
299
300 map<string, size_t> resSmallFiles;
301 for (const auto &item : incSmallFiles) {
302 if (!isMatch(excludes, item.first)) {
303 resSmallFiles.insert(make_pair(item.first, item.second));
304 }
305 }
306
307 map<string, struct stat> bigFiles;
308 for (const auto &item : incFiles) {
309 if (!isMatch(excludes, item.first)) {
310 bigFiles[item.first] = item.second;
311 }
312 }
313 HILOGW("total number of big files is %{public}zu", bigFiles.size());
314 HILOGW("total number of small files is %{public}zu", resSmallFiles.size());
315 return {ERR_OK, move(bigFiles), move(resSmallFiles)};
316 }
317
GetUser0FileStat(vector<string> bigFile,vector<string> smallFile,vector<struct ReportFileInfo> & allFiles,vector<struct ReportFileInfo> & smallFiles,vector<struct ReportFileInfo> & bigFiles)318 void BDir::GetUser0FileStat(vector<string> bigFile,
319 vector<string> smallFile,
320 vector<struct ReportFileInfo> &allFiles,
321 vector<struct ReportFileInfo> &smallFiles,
322 vector<struct ReportFileInfo> &bigFiles)
323 {
324 for (const auto &item : smallFile) {
325 struct ReportFileInfo storageFiles;
326 storageFiles.filePath = item;
327 if (filesystem::is_directory(item)) {
328 storageFiles.isDir = 1;
329 storageFiles.userTar = 0;
330 } else {
331 storageFiles.isDir = 0;
332 auto [res, fileHash] = BackupFileHash::HashWithSHA256(item);
333 if (fileHash.empty()) {
334 continue;
335 }
336 storageFiles.hash = fileHash;
337 storageFiles.userTar = 1;
338 }
339 struct stat sta = {};
340 if (stat(item.c_str(), &sta) != 0) {
341 throw BError(BError::Codes::EXT_INVAL_ARG, "Get file stat failed");
342 }
343 storageFiles.size = sta.st_size;
344 storageFiles.mode = to_string(static_cast<int32_t>(sta.st_mode));
345 int64_t lastUpdateTime = static_cast<int64_t>(sta.st_mtime);
346 storageFiles.mtime = lastUpdateTime;
347 allFiles.push_back(storageFiles);
348 smallFiles.push_back(storageFiles);
349 }
350 for (const auto &item : bigFile) {
351 struct ReportFileInfo storageFiles;
352 storageFiles.filePath = item;
353 auto [res, fileHash] = BackupFileHash::HashWithSHA256(item);
354 if (fileHash.empty()) {
355 continue;
356 }
357 storageFiles.hash = fileHash;
358 struct stat sta = {};
359 if (stat(item.c_str(), &sta) != 0) {
360 throw BError(BError::Codes::EXT_INVAL_ARG, "Get file stat failed");
361 }
362 storageFiles.size = sta.st_size;
363 storageFiles.mode = to_string(static_cast<int32_t>(sta.st_mode));
364 int64_t lastUpdateTime = static_cast<int64_t>(sta.st_mtime);
365 storageFiles.mtime = lastUpdateTime;
366 storageFiles.userTar = 1;
367 allFiles.push_back(storageFiles);
368 bigFiles.push_back(storageFiles);
369 }
370 HILOGI("get FileStat end, bigfiles = %{public}zu, smallFiles = %{public}zu, allFiles = %{public}zu,",
371 bigFiles.size(), smallFiles.size(), allFiles.size());
372 }
373
IsNotPath(const string & path,vector<string> & bigFiles,vector<string> & smallFiles,off_t size)374 static tuple<vector<string>, vector<string>> IsNotPath(const string &path, vector<string> &bigFiles,
375 vector<string> &smallFiles, off_t size)
376 {
377 struct stat sta = {};
378 if (CheckOverLongPath(path) >= PATH_MAX_LEN || stat(path.data(), &sta) == -1) {
379 return {};
380 }
381 if (sta.st_size <= size) {
382 smallFiles.push_back(path);
383 HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
384 return {bigFiles, smallFiles};
385 }
386 bigFiles.push_back(path);
387 HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
388 return {bigFiles, smallFiles};
389 }
390
GetUser0DirFilesDetail(const string & path,off_t size=-1)391 static tuple<vector<string>, vector<string>> GetUser0DirFilesDetail(const string &path, off_t size = -1)
392 {
393 vector<string> bigFiles;
394 vector<string> smallFiles;
395 if (IsEmptyDirectory(path)) {
396 string newPath = path;
397 if (path.at(path.size()-1) != BConstants::FILE_SEPARATOR_CHAR) {
398 newPath += BConstants::FILE_SEPARATOR_CHAR;
399 }
400 smallFiles.push_back(newPath);
401 return {bigFiles, smallFiles};
402 }
403 if (filesystem::is_regular_file(path)) {
404 return IsNotPath(path, bigFiles, smallFiles, size);
405 }
406 unique_ptr<DIR, function<void(DIR *)>> dir = {opendir(path.c_str()), closedir};
407 if (!dir) {
408 HILOGE("Invalid directory path: %{private}s", path.c_str());
409 return {};
410 }
411 struct dirent *ptr = nullptr;
412 while (!!(ptr = readdir(dir.get()))) {
413 // current dir OR parent dir
414 if ((strcmp(ptr->d_name, ".") == 0) || (strcmp(ptr->d_name, "..") == 0)) {
415 continue;
416 } else if (ptr->d_type == DT_REG) {
417 struct stat sta = {};
418 string fileName = IncludeTrailingPathDelimiter(path) + string(ptr->d_name);
419 if (CheckOverLongPath(fileName) >= PATH_MAX_LEN || stat(fileName.data(), &sta) == -1) {
420 continue;
421 }
422 if (sta.st_size <= size) {
423 smallFiles.push_back(fileName);
424 continue;
425 }
426
427 bigFiles.push_back(fileName);
428 continue;
429 } else if (ptr->d_type != DT_DIR) {
430 HILOGE("Not support file type");
431 continue;
432 }
433 // DT_DIR type
434 auto [subBigFiles, subSmallFiles] =
435 GetUser0DirFilesDetail(IncludeTrailingPathDelimiter(path) + string(ptr->d_name), size);
436 bigFiles.insert(bigFiles.end(), subBigFiles.begin(), subBigFiles.end());
437 smallFiles.insert(smallFiles.end(), subSmallFiles.begin(), subSmallFiles.end());
438 }
439 HILOGI("bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
440 return {bigFiles, smallFiles};
441 }
442
GetBackupList(const vector<string> & includes,const vector<string> & excludes)443 tuple<vector<string>, vector<string>> BDir::GetBackupList(const vector<string> &includes,
444 const vector<string> &excludes)
445 {
446 HILOGI("start get bigfiles and smallfiles");
447 set<string> inc = ExpandPathWildcard(includes, true);
448 vector<string> bigFiles;
449 vector<string> smallFiles;
450 for (const auto &item : inc) {
451 auto [bigFile, smallFile] = GetUser0DirFilesDetail(item, BConstants::BIG_FILE_BOUNDARY);
452 bigFiles.insert(bigFiles.end(), bigFile.begin(), bigFile.end());
453 smallFiles.insert(smallFiles.end(), smallFile.begin(), smallFile.end());
454 }
455 HILOGI("end bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
456 auto isMatch = [](const vector<string> &s, const string &str) -> bool {
457 if (str.empty()) {
458 return false;
459 }
460 for (const string &item : s) {
461 if (item.empty()) {
462 continue;
463 }
464 string excludeItem = item;
465 if (excludeItem.at(item.size() - 1) == BConstants::FILE_SEPARATOR_CHAR) {
466 excludeItem += "*";
467 }
468 if (fnmatch(excludeItem.data(), str.data(), FNM_LEADING_DIR) == 0) {
469 return true;
470 }
471 }
472 return false;
473 };
474
475 for (auto item = bigFiles.begin(); item != bigFiles.end();) {
476 if (isMatch(excludes, *item)) {
477 item = bigFiles.erase(item);
478 } else {
479 ++item;
480 }
481 }
482 for (auto item = smallFiles.begin(); item != smallFiles.end();) {
483 if (isMatch(excludes, *item)) {
484 item = smallFiles.erase(item);
485 } else {
486 ++item;
487 }
488 }
489 HILOGI("End compare bigfiles = %{public}zu, smallfiles = %{public}zu", bigFiles.size(), smallFiles.size());
490 return {bigFiles, smallFiles};
491 }
492
GetDirs(const vector<string_view> & paths)493 vector<string> BDir::GetDirs(const vector<string_view> &paths)
494 {
495 vector<string> wildcardPath(paths.begin(), paths.end());
496 set<string> inc = ExpandPathWildcard(wildcardPath, true);
497 vector<string> dirs(inc.begin(), inc.end());
498 return dirs;
499 }
500
CheckFilePathInvalid(const std::string & filePath)501 bool BDir::CheckFilePathInvalid(const std::string &filePath)
502 {
503 if (filePath.find("../") != std::string::npos) {
504 return true;
505 }
506 return false;
507 }
508 } // namespace OHOS::FileManagement::Backup