• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "utils/file_utils.h"
18 
19 #include <climits>
20 #include <cstring>
21 #include <string>
22 #include <optional>
23 #include <memory>
24 #include "utils/system/file_system.h"
25 #include "utils/system/env.h"
26 
27 #if defined(_WIN32) || defined(_WIN64)
28 #include <windows.h>
29 #include <wchar.h>
30 
31 #undef ERROR  // which is in wingdi.h and conflict with log_adaptor.h
32 #endif
33 
34 namespace mindspore {
35 #if defined(_WIN32) || defined(_WIN64)
IncludeChinese(const char * str)36 int IncludeChinese(const char *str) {
37   if (str == nullptr) {
38     MS_LOG(ERROR) << "Input str is nullptr";
39     return 0;
40   }
41 
42   char *tmp_str = const_cast<char *>(str);
43   while (1) {
44     char c = *tmp_str++;
45 
46     // end the input str
47     if (c == 0) {
48       break;
49     }
50 
51     // The highest bit of chinese character is 1
52     if (c & 0x80) {
53       if (*tmp_str & 0x80) {
54         return 1;
55       }
56     }
57   }
58   return 0;
59 }
60 
IsStrUTF_8(const char * str)61 bool IsStrUTF_8(const char *str) {
62   MS_EXCEPTION_IF_NULL(str);
63   uint32_t n_bytes = 0;
64   bool b_all_ascii = true;
65   for (uint32_t i = 0; str[i] != '\0'; ++i) {
66     unsigned char chr = *(str + i);
67     if (n_bytes == 0 && (chr & 0x80) != 0) {
68       b_all_ascii = false;
69     }
70     if (n_bytes == 0) {
71       if (chr >= 0x80) {
72         if (chr >= 0xFC && chr <= 0xFD) {
73           n_bytes = 6;
74         } else if (chr >= 0xF8) {
75           n_bytes = 5;
76         } else if (chr >= 0xF0) {
77           n_bytes = 4;
78         } else if (chr >= 0xE0) {
79           n_bytes = 3;
80         } else if (chr >= 0xC0) {
81           n_bytes = 2;
82         } else {
83           return false;
84         }
85         n_bytes--;
86       }
87     } else {
88       if ((chr & 0xC0) != 0x80) {
89         return false;
90       }
91       n_bytes--;
92     }
93   }
94 
95   if (n_bytes != 0) {
96     return false;
97   }
98   if (b_all_ascii) {
99     return true;
100   }
101   return true;
102 }
103 
IsStrGBK(const char * str)104 bool IsStrGBK(const char *str) {
105   MS_EXCEPTION_IF_NULL(str);
106   uint32_t n_bytes = 0;
107   bool b_all_ascii = true;
108   for (uint32_t i = 0; str[i] != '\0'; ++i) {
109     unsigned char chr = *(str + i);
110     if ((chr & 0x80) != 0 && n_bytes == 0) {
111       b_all_ascii = false;
112     }
113     if (n_bytes == 0) {
114       if (chr >= 0x80) {
115         if (chr >= 0x81 && chr <= 0xFE) {
116           n_bytes = +2;
117         } else {
118           return false;
119         }
120         n_bytes--;
121       }
122     } else {
123       if (chr < 0x40 || chr > 0xFE) {
124         return false;
125       }
126       n_bytes--;
127     }
128   }
129   if (n_bytes != 0) {
130     return false;
131   }
132   if (b_all_ascii) {
133     return true;
134   }
135   return true;
136 }
137 
UTF_8ToUnicode(WCHAR * p_out,char * p_text)138 void UTF_8ToUnicode(WCHAR *p_out, char *p_text) {
139   MS_EXCEPTION_IF_NULL(p_out);
140   MS_EXCEPTION_IF_NULL(p_text);
141   char *uchar = reinterpret_cast<char *>(p_out);
142   uchar[1] = ((p_text[0] & 0x0F) << 4) + ((p_text[1] >> 2) & 0x0F);
143   uchar[0] = ((p_text[1] & 0x03) << 6) + (p_text[2] & 0x3F);
144   return;
145 }
146 
UnicodeToGB2312(char * p_out,WCHAR u_data)147 void UnicodeToGB2312(char *p_out, WCHAR u_data) {
148   MS_EXCEPTION_IF_NULL(p_out);
149   WideCharToMultiByte(CP_ACP, 0, &u_data, 1, p_out, sizeof(WCHAR), nullptr, nullptr);
150   return;
151 }
152 
UTF_8ToGB2312(const char * text)153 std::string FileUtils::UTF_8ToGB2312(const char *text) {
154   if (text == nullptr) {
155     MS_LOG(ERROR) << "Input text is nullptr";
156     return "";
157   }
158 
159   std::string out;
160   if (!IncludeChinese(text) && IsStrUTF_8(text)) {
161     out = text;
162     return out;
163   }
164 
165   if (IsStrGBK(text) && !IsStrUTF_8(text)) {
166     out = text;
167     return out;
168   }
169   char buf[4] = {0};
170   int len = strlen(text);
171   char *new_text = const_cast<char *>(text);
172   auto rst = std::make_unique<char[]>(len + (len >> 2) + 2);
173   errno_t ret = memset_s(rst.get(), len + (len >> 2) + 2, 0, len + (len >> 2) + 2);
174   if (ret != EOK) {
175     MS_LOG(ERROR) << "memset_s error, error code: " << ret;
176     return "";
177   }
178 
179   int i = 0;
180   int j = 0;
181 
182   while (i < len) {
183     if (*(new_text + i) >= 0) {
184       rst[j++] = new_text[i++];
185     } else {
186       WCHAR w_temp;
187       UTF_8ToUnicode(&w_temp, new_text + i);
188       UnicodeToGB2312(buf, w_temp);
189 
190       rst[j] = buf[0];
191       rst[j + 1] = buf[1];
192       rst[j + 2] = buf[2];
193 
194       i += 3;
195       j += 2;
196     }
197   }
198 
199   rst[j] = '\0';
200   out = rst.get();
201   return out;
202 }
203 
204 // gb2312 to utf8
GB2312ToUTF_8(const char * gb2312)205 std::string FileUtils::GB2312ToUTF_8(const char *gb2312) {
206   if (gb2312 == nullptr) {
207     MS_LOG(ERROR) << "Input string gb2312 is nullptr";
208     return "";
209   }
210 
211   if (IsStrUTF_8(gb2312)) {
212     return std::string(gb2312);
213   }
214 
215   int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, nullptr, 0);
216   auto wstr = std::make_unique<wchar_t[]>(len + 1);
217   errno_t ret = memset_s(wstr.get(), len + 1, 0, len + 1);
218   if (ret != EOK) {
219     MS_LOG(ERROR) << "memset_s error, error code: " << ret;
220     return "";
221   }
222   MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr.get(), len);
223   len = WideCharToMultiByte(CP_UTF8, 0, wstr.get(), -1, nullptr, 0, nullptr, nullptr);
224 
225   auto str = std::make_unique<char[]>(len + 1);
226   errno_t ret2 = memset_s(str.get(), len + 1, 0, len + 1);
227   if (ret2 != EOK) {
228     MS_LOG(ERROR) << "memset_s error, error code: " << ret2;
229     return "";
230   }
231   WideCharToMultiByte(CP_UTF8, 0, wstr.get(), -1, str.get(), len, nullptr, nullptr);
232   std::string str_temp(str.get());
233 
234   return str_temp;
235 }
236 #endif
237 
GetRealPath(const char * path)238 std::optional<std::string> FileUtils::GetRealPath(const char *path) {
239   if (path == nullptr) {
240     MS_LOG(ERROR) << "Input path is nullptr";
241     return std::nullopt;
242   }
243 
244   char real_path[PATH_MAX] = {0};
245 #if defined(_WIN32) || defined(_WIN64)
246   std::string new_path = FileUtils::UTF_8ToGB2312(path);
247   if (new_path.length() >= PATH_MAX || _fullpath(real_path, new_path.data(), PATH_MAX) == nullptr) {
248     MS_LOG(ERROR) << "Get realpath failed, path[" << path << "]";
249     return std::nullopt;
250   }
251 #else
252   if (strlen(path) >= PATH_MAX || realpath(path, real_path) == nullptr) {
253     MS_LOG(ERROR) << "Get realpath failed, path[" << path << "]";
254     return std::nullopt;
255   }
256 #endif
257   return std::string(real_path);
258 }
259 
260 // do not call RealPath function in OpenFile, because OpenFile may open a non-exist file
OpenFile(const std::string & file_path,std::ios_base::openmode open_mode)261 std::fstream *FileUtils::OpenFile(const std::string &file_path, std::ios_base::openmode open_mode) {
262   auto fs = new (std::nothrow) std::fstream();
263   if (fs == nullptr) {
264     MS_LOG(DEBUG) << "Create file stream failed";
265     return nullptr;
266   }
267   fs->open(file_path, open_mode);
268   if (!fs->good()) {
269     MS_LOG(DEBUG) << "File is not exist: " << file_path;
270     delete fs;
271     return nullptr;
272   }
273   if (!fs->is_open()) {
274     MS_LOG(DEBUG) << "Can not open file: " << file_path;
275     delete fs;
276     return nullptr;
277   }
278   return fs;
279 }
280 
ParserPathAndModelName(const std::string & output_path,std::string * save_path,std::string * model_name)281 bool FileUtils::ParserPathAndModelName(const std::string &output_path, std::string *save_path,
282                                        std::string *model_name) {
283   auto pos = output_path.find_last_of('/');
284   if (pos == std::string::npos) {
285     pos = output_path.find_last_of('\\');
286   }
287   std::string tmp_model_name;
288   if (pos == std::string::npos) {
289 #ifdef _WIN32
290     *save_path = ".\\";
291 #else
292     *save_path = "./";
293 #endif
294     tmp_model_name = output_path;
295   } else {
296     *save_path = output_path.substr(0, pos + 1);
297     tmp_model_name = output_path.substr(pos + 1);
298   }
299   *save_path = FileUtils::GetRealPath(save_path->c_str()).value();
300   if (save_path->empty()) {
301     MS_LOG(DEBUG) << "File path not regular: " << save_path;
302     return false;
303   }
304   auto suffix_pos = tmp_model_name.find_last_of('.');
305   if (suffix_pos == std::string::npos) {
306     *model_name = tmp_model_name;
307   } else {
308     if (tmp_model_name.substr(suffix_pos + 1) == "ms") {
309       *model_name = tmp_model_name.substr(0, suffix_pos);
310     } else {
311       *model_name = tmp_model_name;
312     }
313   }
314   return true;
315 }
316 
SplitDirAndFileName(const std::string & path,std::optional<std::string> * prefix_path,std::optional<std::string> * file_name)317 void FileUtils::SplitDirAndFileName(const std::string &path, std::optional<std::string> *prefix_path,
318                                     std::optional<std::string> *file_name) {
319   auto path_split_pos = path.find_last_of('/');
320   auto path_split_pos_backslash = path.find_last_of('\\');
321   if (path_split_pos != std::string::npos) {
322     if (path_split_pos_backslash != std::string::npos && path_split_pos < path_split_pos_backslash) {
323       path_split_pos = path_split_pos_backslash;
324     }
325   } else {
326     path_split_pos = path_split_pos_backslash;
327   }
328 
329   MS_EXCEPTION_IF_NULL(prefix_path);
330   MS_EXCEPTION_IF_NULL(file_name);
331 
332   if (path_split_pos != std::string::npos) {
333     *prefix_path = path.substr(0, path_split_pos);
334     *file_name = path.substr(path_split_pos + 1);
335   } else {
336     *prefix_path = std::nullopt;
337     *file_name = path;
338   }
339 }
340 
ConcatDirAndFileName(const std::optional<std::string> * dir,const std::optional<std::string> * file_name,std::optional<std::string> * path)341 void FileUtils::ConcatDirAndFileName(const std::optional<std::string> *dir, const std::optional<std::string> *file_name,
342                                      std::optional<std::string> *path) {
343   MS_EXCEPTION_IF_NULL(dir);
344   MS_EXCEPTION_IF_NULL(file_name);
345   MS_EXCEPTION_IF_NULL(path);
346 #if defined(_WIN32) || defined(_WIN64)
347   *path = dir->value() + "\\" + file_name->value();
348 #else
349   *path = dir->value() + "/" + file_name->value();
350 #endif
351 }
352 
CreateNotExistDirs(const std::string & path,const bool support_relative_path)353 std::optional<std::string> FileUtils::CreateNotExistDirs(const std::string &path, const bool support_relative_path) {
354   if (path.size() >= PATH_MAX) {
355     MS_LOG(ERROR) << "The length of the path is greater than or equal to:" << PATH_MAX;
356     return std::nullopt;
357   }
358   if (!support_relative_path) {
359     auto dot_pos = path.find("..");
360     if (dot_pos != std::string::npos) {
361       MS_LOG(ERROR) << "Do not support relative path";
362       return std::nullopt;
363     }
364   }
365 
366   std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
367   MS_EXCEPTION_IF_NULL(fs);
368   char temp_path[PATH_MAX] = {0};
369   for (uint32_t i = 0; i < path.length(); i++) {
370     temp_path[i] = path[i];
371     if (temp_path[i] == '\\' || temp_path[i] == '/') {
372       if (i != 0) {
373         char tmp_char = temp_path[i];
374         temp_path[i] = '\0';
375         std::string path_handle(temp_path);
376         if (!fs->FileExist(path_handle)) {
377           if (!fs->CreateDir(path_handle)) {
378             MS_LOG(ERROR) << "Create " << path_handle << " dir error";
379             return std::nullopt;
380           }
381         }
382         temp_path[i] = tmp_char;
383       }
384     }
385   }
386 
387   if (!fs->FileExist(path)) {
388     if (!fs->CreateDir(path)) {
389       MS_LOG(ERROR) << "Create " << path << " dir error";
390       return std::nullopt;
391     }
392   }
393   return GetRealPath(path.c_str());
394 }
395 }  // namespace mindspore
396