1 /**
2 * Copyright 2021-2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "utils/file_utils.h"
18
19 #include <climits>
20 #include <cstring>
21 #include <string>
22 #include <optional>
23 #include <memory>
24 #include "utils/system/file_system.h"
25 #include "utils/system/env.h"
26
27 #if defined(_WIN32) || defined(_WIN64)
28 #include <windows.h>
29 #include <wchar.h>
30
31 #undef ERROR // which is in wingdi.h and conflict with log_adaptor.h
32 #endif
33
34 namespace mindspore {
35 #if defined(_WIN32) || defined(_WIN64)
IncludeChinese(const char * str)36 int IncludeChinese(const char *str) {
37 if (str == nullptr) {
38 MS_LOG(ERROR) << "Input str is nullptr";
39 return 0;
40 }
41
42 char *tmp_str = const_cast<char *>(str);
43 while (1) {
44 char c = *tmp_str++;
45
46 // end the input str
47 if (c == 0) {
48 break;
49 }
50
51 // The highest bit of chinese character is 1
52 if (c & 0x80) {
53 if (*tmp_str & 0x80) {
54 return 1;
55 }
56 }
57 }
58 return 0;
59 }
60
IsStrUTF_8(const char * str)61 bool IsStrUTF_8(const char *str) {
62 MS_EXCEPTION_IF_NULL(str);
63 uint32_t n_bytes = 0;
64 bool b_all_ascii = true;
65 for (uint32_t i = 0; str[i] != '\0'; ++i) {
66 unsigned char chr = *(str + i);
67 if (n_bytes == 0 && (chr & 0x80) != 0) {
68 b_all_ascii = false;
69 }
70 if (n_bytes == 0) {
71 if (chr >= 0x80) {
72 if (chr >= 0xFC && chr <= 0xFD) {
73 n_bytes = 6;
74 } else if (chr >= 0xF8) {
75 n_bytes = 5;
76 } else if (chr >= 0xF0) {
77 n_bytes = 4;
78 } else if (chr >= 0xE0) {
79 n_bytes = 3;
80 } else if (chr >= 0xC0) {
81 n_bytes = 2;
82 } else {
83 return false;
84 }
85 n_bytes--;
86 }
87 } else {
88 if ((chr & 0xC0) != 0x80) {
89 return false;
90 }
91 n_bytes--;
92 }
93 }
94
95 if (n_bytes != 0) {
96 return false;
97 }
98 if (b_all_ascii) {
99 return true;
100 }
101 return true;
102 }
103
IsStrGBK(const char * str)104 bool IsStrGBK(const char *str) {
105 MS_EXCEPTION_IF_NULL(str);
106 uint32_t n_bytes = 0;
107 bool b_all_ascii = true;
108 for (uint32_t i = 0; str[i] != '\0'; ++i) {
109 unsigned char chr = *(str + i);
110 if ((chr & 0x80) != 0 && n_bytes == 0) {
111 b_all_ascii = false;
112 }
113 if (n_bytes == 0) {
114 if (chr >= 0x80) {
115 if (chr >= 0x81 && chr <= 0xFE) {
116 n_bytes = +2;
117 } else {
118 return false;
119 }
120 n_bytes--;
121 }
122 } else {
123 if (chr < 0x40 || chr > 0xFE) {
124 return false;
125 }
126 n_bytes--;
127 }
128 }
129 if (n_bytes != 0) {
130 return false;
131 }
132 if (b_all_ascii) {
133 return true;
134 }
135 return true;
136 }
137
UTF_8ToUnicode(WCHAR * p_out,char * p_text)138 void UTF_8ToUnicode(WCHAR *p_out, char *p_text) {
139 MS_EXCEPTION_IF_NULL(p_out);
140 MS_EXCEPTION_IF_NULL(p_text);
141 char *uchar = reinterpret_cast<char *>(p_out);
142 uchar[1] = ((p_text[0] & 0x0F) << 4) + ((p_text[1] >> 2) & 0x0F);
143 uchar[0] = ((p_text[1] & 0x03) << 6) + (p_text[2] & 0x3F);
144 return;
145 }
146
UnicodeToGB2312(char * p_out,WCHAR u_data)147 void UnicodeToGB2312(char *p_out, WCHAR u_data) {
148 MS_EXCEPTION_IF_NULL(p_out);
149 WideCharToMultiByte(CP_ACP, 0, &u_data, 1, p_out, sizeof(WCHAR), nullptr, nullptr);
150 return;
151 }
152
UTF_8ToGB2312(const char * text)153 std::string FileUtils::UTF_8ToGB2312(const char *text) {
154 if (text == nullptr) {
155 MS_LOG(ERROR) << "Input text is nullptr";
156 return "";
157 }
158
159 std::string out;
160 if (!IncludeChinese(text) && IsStrUTF_8(text)) {
161 out = text;
162 return out;
163 }
164
165 if (IsStrGBK(text) && !IsStrUTF_8(text)) {
166 out = text;
167 return out;
168 }
169 char buf[4] = {0};
170 int len = strlen(text);
171 char *new_text = const_cast<char *>(text);
172 auto rst = std::make_unique<char[]>(len + (len >> 2) + 2);
173 errno_t ret = memset_s(rst.get(), len + (len >> 2) + 2, 0, len + (len >> 2) + 2);
174 if (ret != EOK) {
175 MS_LOG(ERROR) << "memset_s error, error code: " << ret;
176 return "";
177 }
178
179 int i = 0;
180 int j = 0;
181
182 while (i < len) {
183 if (*(new_text + i) >= 0) {
184 rst[j++] = new_text[i++];
185 } else {
186 WCHAR w_temp;
187 UTF_8ToUnicode(&w_temp, new_text + i);
188 UnicodeToGB2312(buf, w_temp);
189
190 rst[j] = buf[0];
191 rst[j + 1] = buf[1];
192 rst[j + 2] = buf[2];
193
194 i += 3;
195 j += 2;
196 }
197 }
198
199 rst[j] = '\0';
200 out = rst.get();
201 return out;
202 }
203
204 // gb2312 to utf8
GB2312ToUTF_8(const char * gb2312)205 std::string FileUtils::GB2312ToUTF_8(const char *gb2312) {
206 if (gb2312 == nullptr) {
207 MS_LOG(ERROR) << "Input string gb2312 is nullptr";
208 return "";
209 }
210
211 if (IsStrUTF_8(gb2312)) {
212 return std::string(gb2312);
213 }
214
215 int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, nullptr, 0);
216 auto wstr = std::make_unique<wchar_t[]>(len + 1);
217 errno_t ret = memset_s(wstr.get(), len + 1, 0, len + 1);
218 if (ret != EOK) {
219 MS_LOG(ERROR) << "memset_s error, error code: " << ret;
220 return "";
221 }
222 MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr.get(), len);
223 len = WideCharToMultiByte(CP_UTF8, 0, wstr.get(), -1, nullptr, 0, nullptr, nullptr);
224
225 auto str = std::make_unique<char[]>(len + 1);
226 errno_t ret2 = memset_s(str.get(), len + 1, 0, len + 1);
227 if (ret2 != EOK) {
228 MS_LOG(ERROR) << "memset_s error, error code: " << ret2;
229 return "";
230 }
231 WideCharToMultiByte(CP_UTF8, 0, wstr.get(), -1, str.get(), len, nullptr, nullptr);
232 std::string str_temp(str.get());
233
234 return str_temp;
235 }
236 #endif
237
GetRealPath(const char * path)238 std::optional<std::string> FileUtils::GetRealPath(const char *path) {
239 if (path == nullptr) {
240 MS_LOG(ERROR) << "Input path is nullptr";
241 return std::nullopt;
242 }
243
244 char real_path[PATH_MAX] = {0};
245 #if defined(_WIN32) || defined(_WIN64)
246 std::string new_path = FileUtils::UTF_8ToGB2312(path);
247 if (new_path.length() >= PATH_MAX || _fullpath(real_path, new_path.data(), PATH_MAX) == nullptr) {
248 MS_LOG(ERROR) << "Get realpath failed, path[" << path << "]";
249 return std::nullopt;
250 }
251 #else
252 if (strlen(path) >= PATH_MAX || realpath(path, real_path) == nullptr) {
253 MS_LOG(ERROR) << "Get realpath failed, path[" << path << "]";
254 return std::nullopt;
255 }
256 #endif
257 return std::string(real_path);
258 }
259
260 // do not call RealPath function in OpenFile, because OpenFile may open a non-exist file
OpenFile(const std::string & file_path,std::ios_base::openmode open_mode)261 std::fstream *FileUtils::OpenFile(const std::string &file_path, std::ios_base::openmode open_mode) {
262 auto fs = new (std::nothrow) std::fstream();
263 if (fs == nullptr) {
264 MS_LOG(DEBUG) << "Create file stream failed";
265 return nullptr;
266 }
267 fs->open(file_path, open_mode);
268 if (!fs->good()) {
269 MS_LOG(DEBUG) << "File is not exist: " << file_path;
270 delete fs;
271 return nullptr;
272 }
273 if (!fs->is_open()) {
274 MS_LOG(DEBUG) << "Can not open file: " << file_path;
275 delete fs;
276 return nullptr;
277 }
278 return fs;
279 }
280
ParserPathAndModelName(const std::string & output_path,std::string * save_path,std::string * model_name)281 bool FileUtils::ParserPathAndModelName(const std::string &output_path, std::string *save_path,
282 std::string *model_name) {
283 auto pos = output_path.find_last_of('/');
284 if (pos == std::string::npos) {
285 pos = output_path.find_last_of('\\');
286 }
287 std::string tmp_model_name;
288 if (pos == std::string::npos) {
289 #ifdef _WIN32
290 *save_path = ".\\";
291 #else
292 *save_path = "./";
293 #endif
294 tmp_model_name = output_path;
295 } else {
296 *save_path = output_path.substr(0, pos + 1);
297 tmp_model_name = output_path.substr(pos + 1);
298 }
299 *save_path = FileUtils::GetRealPath(save_path->c_str()).value();
300 if (save_path->empty()) {
301 MS_LOG(DEBUG) << "File path not regular: " << save_path;
302 return false;
303 }
304 auto suffix_pos = tmp_model_name.find_last_of('.');
305 if (suffix_pos == std::string::npos) {
306 *model_name = tmp_model_name;
307 } else {
308 if (tmp_model_name.substr(suffix_pos + 1) == "ms") {
309 *model_name = tmp_model_name.substr(0, suffix_pos);
310 } else {
311 *model_name = tmp_model_name;
312 }
313 }
314 return true;
315 }
316
SplitDirAndFileName(const std::string & path,std::optional<std::string> * prefix_path,std::optional<std::string> * file_name)317 void FileUtils::SplitDirAndFileName(const std::string &path, std::optional<std::string> *prefix_path,
318 std::optional<std::string> *file_name) {
319 auto path_split_pos = path.find_last_of('/');
320 auto path_split_pos_backslash = path.find_last_of('\\');
321 if (path_split_pos != std::string::npos) {
322 if (path_split_pos_backslash != std::string::npos && path_split_pos < path_split_pos_backslash) {
323 path_split_pos = path_split_pos_backslash;
324 }
325 } else {
326 path_split_pos = path_split_pos_backslash;
327 }
328
329 MS_EXCEPTION_IF_NULL(prefix_path);
330 MS_EXCEPTION_IF_NULL(file_name);
331
332 if (path_split_pos != std::string::npos) {
333 *prefix_path = path.substr(0, path_split_pos);
334 *file_name = path.substr(path_split_pos + 1);
335 } else {
336 *prefix_path = std::nullopt;
337 *file_name = path;
338 }
339 }
340
ConcatDirAndFileName(const std::optional<std::string> * dir,const std::optional<std::string> * file_name,std::optional<std::string> * path)341 void FileUtils::ConcatDirAndFileName(const std::optional<std::string> *dir, const std::optional<std::string> *file_name,
342 std::optional<std::string> *path) {
343 MS_EXCEPTION_IF_NULL(dir);
344 MS_EXCEPTION_IF_NULL(file_name);
345 MS_EXCEPTION_IF_NULL(path);
346 #if defined(_WIN32) || defined(_WIN64)
347 *path = dir->value() + "\\" + file_name->value();
348 #else
349 *path = dir->value() + "/" + file_name->value();
350 #endif
351 }
352
CreateNotExistDirs(const std::string & path,const bool support_relative_path)353 std::optional<std::string> FileUtils::CreateNotExistDirs(const std::string &path, const bool support_relative_path) {
354 if (path.size() >= PATH_MAX) {
355 MS_LOG(ERROR) << "The length of the path is greater than or equal to:" << PATH_MAX;
356 return std::nullopt;
357 }
358 if (!support_relative_path) {
359 auto dot_pos = path.find("..");
360 if (dot_pos != std::string::npos) {
361 MS_LOG(ERROR) << "Do not support relative path";
362 return std::nullopt;
363 }
364 }
365
366 std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
367 MS_EXCEPTION_IF_NULL(fs);
368 char temp_path[PATH_MAX] = {0};
369 for (uint32_t i = 0; i < path.length(); i++) {
370 temp_path[i] = path[i];
371 if (temp_path[i] == '\\' || temp_path[i] == '/') {
372 if (i != 0) {
373 char tmp_char = temp_path[i];
374 temp_path[i] = '\0';
375 std::string path_handle(temp_path);
376 if (!fs->FileExist(path_handle)) {
377 if (!fs->CreateDir(path_handle)) {
378 MS_LOG(ERROR) << "Create " << path_handle << " dir error";
379 return std::nullopt;
380 }
381 }
382 temp_path[i] = tmp_char;
383 }
384 }
385 }
386
387 if (!fs->FileExist(path)) {
388 if (!fs->CreateDir(path)) {
389 MS_LOG(ERROR) << "Create " << path << " dir error";
390 return std::nullopt;
391 }
392 }
393 return GetRealPath(path.c_str());
394 }
395 } // namespace mindspore
396