1 /**
2 * Copyright 2019 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "minddata/mindrecord/include/common/shard_utils.h"
18 #include "utils/ms_utils.h"
19 #include "./securec.h"
20
21 using mindspore::LogStream;
22 using mindspore::ExceptionType::NoExceptionType;
23 using mindspore::MsLogLevel::DEBUG;
24 using mindspore::MsLogLevel::ERROR;
25
26 namespace mindspore {
27 namespace mindrecord {
28 // split a string using a character
StringSplit(const std::string & field,char separator)29 std::vector<std::string> StringSplit(const std::string &field, char separator) {
30 std::vector<std::string> res;
31 uint64_t s_pos = 0;
32 while (s_pos < field.length()) {
33 size_t e_pos = field.find_first_of(separator, s_pos);
34 if (e_pos != std::string::npos) {
35 res.push_back(field.substr(s_pos, e_pos - s_pos));
36 } else {
37 res.push_back(field.substr(s_pos, field.length() - s_pos));
38 break;
39 }
40 s_pos = e_pos + 1;
41 }
42 return res;
43 }
44
ValidateFieldName(const std::string & str)45 bool ValidateFieldName(const std::string &str) {
46 auto it = str.cbegin();
47 if (it == str.cend()) {
48 return false;
49 }
50 for (; it != str.cend(); ++it) {
51 if (*it == '_' || ((*it >= '0') && (*it <= '9')) || ((*it >= 'A') && (*it <= 'Z')) ||
52 ((*it >= 'a') && (*it <= 'z'))) {
53 continue;
54 }
55 return false;
56 }
57 return true;
58 }
59
GetFileName(const std::string & path,std::shared_ptr<std::string> * fn_ptr)60 Status GetFileName(const std::string &path, std::shared_ptr<std::string> *fn_ptr) {
61 RETURN_UNEXPECTED_IF_NULL(fn_ptr);
62 char real_path[PATH_MAX] = {0};
63 char buf[PATH_MAX] = {0};
64 if (strncpy_s(buf, PATH_MAX, common::SafeCStr(path), path.length()) != EOK) {
65 RETURN_STATUS_UNEXPECTED("Failed to call securec func [strncpy_s], path: " + path);
66 }
67 char tmp[PATH_MAX] = {0};
68 #if defined(_WIN32) || defined(_WIN64)
69 if (_fullpath(tmp, dirname(&(buf[0])), PATH_MAX) == nullptr) {
70 RETURN_STATUS_UNEXPECTED("Invalid file, path: " + std::string(buf));
71 }
72 if (_fullpath(real_path, common::SafeCStr(path), PATH_MAX) == nullptr) {
73 MS_LOG(DEBUG) << "Path: " << common::SafeCStr(path) << "check success.";
74 }
75 #else
76 if (realpath(dirname(&(buf[0])), tmp) == nullptr) {
77 RETURN_STATUS_UNEXPECTED(std::string("Invalid file, path: ") + buf);
78 }
79 if (realpath(common::SafeCStr(path), real_path) == nullptr) {
80 MS_LOG(DEBUG) << "Path: " << path << "check success.";
81 }
82 #endif
83 std::string s = real_path;
84 size_t i = s.rfind(kPathSeparator, s.length());
85 if (i != std::string::npos) {
86 if (i + 1 < s.size()) {
87 *fn_ptr = std::make_shared<std::string>(s.substr(i + 1));
88 return Status::OK();
89 }
90 }
91 *fn_ptr = std::make_shared<std::string>(s);
92 return Status::OK();
93 }
94
GetParentDir(const std::string & path,std::shared_ptr<std::string> * pd_ptr)95 Status GetParentDir(const std::string &path, std::shared_ptr<std::string> *pd_ptr) {
96 RETURN_UNEXPECTED_IF_NULL(pd_ptr);
97 char real_path[PATH_MAX] = {0};
98 char buf[PATH_MAX] = {0};
99 if (strncpy_s(buf, PATH_MAX, common::SafeCStr(path), path.length()) != EOK) {
100 RETURN_STATUS_UNEXPECTED("Securec func [strncpy_s] failed, path: " + path);
101 }
102 char tmp[PATH_MAX] = {0};
103 #if defined(_WIN32) || defined(_WIN64)
104 if (_fullpath(tmp, dirname(&(buf[0])), PATH_MAX) == nullptr) {
105 RETURN_STATUS_UNEXPECTED("Invalid file, path: " + std::string(buf));
106 }
107 if (_fullpath(real_path, common::SafeCStr(path), PATH_MAX) == nullptr) {
108 MS_LOG(DEBUG) << "Path: " << common::SafeCStr(path) << "check success.";
109 }
110 #else
111 if (realpath(dirname(&(buf[0])), tmp) == nullptr) {
112 RETURN_STATUS_UNEXPECTED(std::string("Invalid file, path: ") + buf);
113 }
114 if (realpath(common::SafeCStr(path), real_path) == nullptr) {
115 MS_LOG(DEBUG) << "Path: " << path << "check success.";
116 }
117 #endif
118 std::string s = real_path;
119 if (s.rfind(kPathSeparator) + 1 <= s.size()) {
120 *pd_ptr = std::make_shared<std::string>(s.substr(0, s.rfind(kPathSeparator) + 1));
121 return Status::OK();
122 }
123 std::string ss;
124 ss.push_back(kPathSeparator);
125 *pd_ptr = std::make_shared<std::string>(ss);
126 return Status::OK();
127 }
128
CheckIsValidUtf8(const std::string & str)129 bool CheckIsValidUtf8(const std::string &str) {
130 int n = 0;
131 int ix = str.length();
132 for (int i = 0; i < ix; ++i) {
133 uint8_t c = static_cast<unsigned char>(str[i]);
134 if (c <= 0x7f) {
135 n = 0;
136 } else if ((c & 0xE0) == 0xC0) {
137 n = 1;
138 } else if (c == 0xed && i < (ix - 1) && (static_cast<unsigned char>(str[i + 1]) & 0xa0) == 0xa0) {
139 return false;
140 } else if ((c & 0xF0) == 0xE0) {
141 n = 2;
142 } else if ((c & 0xF8) == 0xF0) {
143 n = 3;
144 } else {
145 return false;
146 }
147 for (int j = 0; j < n && i < ix; ++j) {
148 if ((++i == ix) || ((static_cast<unsigned char>(str[i]) & 0xC0) != 0x80)) {
149 return false;
150 }
151 }
152 }
153 return true;
154 }
155
IsLegalFile(const std::string & path)156 bool IsLegalFile(const std::string &path) {
157 struct stat s;
158 if (stat(common::SafeCStr(path), &s) == 0) {
159 if (S_ISDIR(s.st_mode)) {
160 return false;
161 }
162 return true;
163 }
164 return false;
165 }
166
GetDiskSize(const std::string & str_dir,const DiskSizeType & disk_type,std::shared_ptr<uint64_t> * size_ptr)167 Status GetDiskSize(const std::string &str_dir, const DiskSizeType &disk_type, std::shared_ptr<uint64_t> *size_ptr) {
168 RETURN_UNEXPECTED_IF_NULL(size_ptr);
169 #if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__)
170 *size_ptr = std::make_shared<uint64_t>(100);
171 return Status::OK();
172 #else
173 uint64_t ll_count = 0;
174 struct statfs disk_info;
175 if (statfs(common::SafeCStr(str_dir), &disk_info) == -1) {
176 RETURN_STATUS_UNEXPECTED("Failed to get disk size.");
177 }
178
179 switch (disk_type) {
180 case kTotalSize:
181 ll_count = disk_info.f_bsize * disk_info.f_blocks;
182 ll_count = ll_count >> 20;
183 break;
184 case kFreeSize:
185 ll_count = disk_info.f_bsize * disk_info.f_bavail;
186 ll_count = ll_count >> 20;
187 break;
188 default:
189 ll_count = 0;
190 break;
191 }
192 *size_ptr = std::make_shared<uint64_t>(ll_count);
193 return Status::OK();
194 #endif
195 }
196
GetMaxThreadNum()197 uint32_t GetMaxThreadNum() {
198 // define the number of thread
199 uint32_t thread_num = std::thread::hardware_concurrency();
200 if (thread_num == 0) {
201 thread_num = kMaxConsumerCount;
202 }
203 return thread_num;
204 }
205
GetDatasetFiles(const std::string & path,const json & addresses,std::shared_ptr<std::vector<std::string>> * ds)206 Status GetDatasetFiles(const std::string &path, const json &addresses, std::shared_ptr<std::vector<std::string>> *ds) {
207 RETURN_UNEXPECTED_IF_NULL(ds);
208 std::shared_ptr<std::string> parent_dir;
209 RETURN_IF_NOT_OK(GetParentDir(path, &parent_dir));
210 for (const auto &p : addresses) {
211 std::string abs_path = *parent_dir + std::string(p);
212 (*ds)->emplace_back(abs_path);
213 }
214 return Status::OK();
215 }
216 } // namespace mindrecord
217 } // namespace mindspore
218