• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "minddata/mindrecord/include/common/shard_utils.h"
18 #include "utils/ms_utils.h"
19 #include "./securec.h"
20 
21 using mindspore::LogStream;
22 using mindspore::ExceptionType::NoExceptionType;
23 using mindspore::MsLogLevel::DEBUG;
24 using mindspore::MsLogLevel::ERROR;
25 
26 namespace mindspore {
27 namespace mindrecord {
28 // split a string using a character
StringSplit(const std::string & field,char separator)29 std::vector<std::string> StringSplit(const std::string &field, char separator) {
30   std::vector<std::string> res;
31   uint64_t s_pos = 0;
32   while (s_pos < field.length()) {
33     size_t e_pos = field.find_first_of(separator, s_pos);
34     if (e_pos != std::string::npos) {
35       res.push_back(field.substr(s_pos, e_pos - s_pos));
36     } else {
37       res.push_back(field.substr(s_pos, field.length() - s_pos));
38       break;
39     }
40     s_pos = e_pos + 1;
41   }
42   return res;
43 }
44 
ValidateFieldName(const std::string & str)45 bool ValidateFieldName(const std::string &str) {
46   auto it = str.cbegin();
47   if (it == str.cend()) {
48     return false;
49   }
50   for (; it != str.cend(); ++it) {
51     if (*it == '_' || ((*it >= '0') && (*it <= '9')) || ((*it >= 'A') && (*it <= 'Z')) ||
52         ((*it >= 'a') && (*it <= 'z'))) {
53       continue;
54     }
55     return false;
56   }
57   return true;
58 }
59 
GetFileName(const std::string & path,std::shared_ptr<std::string> * fn_ptr)60 Status GetFileName(const std::string &path, std::shared_ptr<std::string> *fn_ptr) {
61   RETURN_UNEXPECTED_IF_NULL(fn_ptr);
62   char real_path[PATH_MAX] = {0};
63   char buf[PATH_MAX] = {0};
64   if (strncpy_s(buf, PATH_MAX, common::SafeCStr(path), path.length()) != EOK) {
65     RETURN_STATUS_UNEXPECTED("Failed to call securec func [strncpy_s], path: " + path);
66   }
67   char tmp[PATH_MAX] = {0};
68 #if defined(_WIN32) || defined(_WIN64)
69   if (_fullpath(tmp, dirname(&(buf[0])), PATH_MAX) == nullptr) {
70     RETURN_STATUS_UNEXPECTED("Invalid file, path: " + std::string(buf));
71   }
72   if (_fullpath(real_path, common::SafeCStr(path), PATH_MAX) == nullptr) {
73     MS_LOG(DEBUG) << "Path: " << common::SafeCStr(path) << "check success.";
74   }
75 #else
76   if (realpath(dirname(&(buf[0])), tmp) == nullptr) {
77     RETURN_STATUS_UNEXPECTED(std::string("Invalid file, path: ") + buf);
78   }
79   if (realpath(common::SafeCStr(path), real_path) == nullptr) {
80     MS_LOG(DEBUG) << "Path: " << path << "check success.";
81   }
82 #endif
83   std::string s = real_path;
84   size_t i = s.rfind(kPathSeparator, s.length());
85   if (i != std::string::npos) {
86     if (i + 1 < s.size()) {
87       *fn_ptr = std::make_shared<std::string>(s.substr(i + 1));
88       return Status::OK();
89     }
90   }
91   *fn_ptr = std::make_shared<std::string>(s);
92   return Status::OK();
93 }
94 
GetParentDir(const std::string & path,std::shared_ptr<std::string> * pd_ptr)95 Status GetParentDir(const std::string &path, std::shared_ptr<std::string> *pd_ptr) {
96   RETURN_UNEXPECTED_IF_NULL(pd_ptr);
97   char real_path[PATH_MAX] = {0};
98   char buf[PATH_MAX] = {0};
99   if (strncpy_s(buf, PATH_MAX, common::SafeCStr(path), path.length()) != EOK) {
100     RETURN_STATUS_UNEXPECTED("Securec func [strncpy_s] failed, path: " + path);
101   }
102   char tmp[PATH_MAX] = {0};
103 #if defined(_WIN32) || defined(_WIN64)
104   if (_fullpath(tmp, dirname(&(buf[0])), PATH_MAX) == nullptr) {
105     RETURN_STATUS_UNEXPECTED("Invalid file, path: " + std::string(buf));
106   }
107   if (_fullpath(real_path, common::SafeCStr(path), PATH_MAX) == nullptr) {
108     MS_LOG(DEBUG) << "Path: " << common::SafeCStr(path) << "check success.";
109   }
110 #else
111   if (realpath(dirname(&(buf[0])), tmp) == nullptr) {
112     RETURN_STATUS_UNEXPECTED(std::string("Invalid file, path: ") + buf);
113   }
114   if (realpath(common::SafeCStr(path), real_path) == nullptr) {
115     MS_LOG(DEBUG) << "Path: " << path << "check success.";
116   }
117 #endif
118   std::string s = real_path;
119   if (s.rfind(kPathSeparator) + 1 <= s.size()) {
120     *pd_ptr = std::make_shared<std::string>(s.substr(0, s.rfind(kPathSeparator) + 1));
121     return Status::OK();
122   }
123   std::string ss;
124   ss.push_back(kPathSeparator);
125   *pd_ptr = std::make_shared<std::string>(ss);
126   return Status::OK();
127 }
128 
CheckIsValidUtf8(const std::string & str)129 bool CheckIsValidUtf8(const std::string &str) {
130   int n = 0;
131   int ix = str.length();
132   for (int i = 0; i < ix; ++i) {
133     uint8_t c = static_cast<unsigned char>(str[i]);
134     if (c <= 0x7f) {
135       n = 0;
136     } else if ((c & 0xE0) == 0xC0) {
137       n = 1;
138     } else if (c == 0xed && i < (ix - 1) && (static_cast<unsigned char>(str[i + 1]) & 0xa0) == 0xa0) {
139       return false;
140     } else if ((c & 0xF0) == 0xE0) {
141       n = 2;
142     } else if ((c & 0xF8) == 0xF0) {
143       n = 3;
144     } else {
145       return false;
146     }
147     for (int j = 0; j < n && i < ix; ++j) {
148       if ((++i == ix) || ((static_cast<unsigned char>(str[i]) & 0xC0) != 0x80)) {
149         return false;
150       }
151     }
152   }
153   return true;
154 }
155 
IsLegalFile(const std::string & path)156 bool IsLegalFile(const std::string &path) {
157   struct stat s;
158   if (stat(common::SafeCStr(path), &s) == 0) {
159     if (S_ISDIR(s.st_mode)) {
160       return false;
161     }
162     return true;
163   }
164   return false;
165 }
166 
GetDiskSize(const std::string & str_dir,const DiskSizeType & disk_type,std::shared_ptr<uint64_t> * size_ptr)167 Status GetDiskSize(const std::string &str_dir, const DiskSizeType &disk_type, std::shared_ptr<uint64_t> *size_ptr) {
168   RETURN_UNEXPECTED_IF_NULL(size_ptr);
169 #if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__)
170   *size_ptr = std::make_shared<uint64_t>(100);
171   return Status::OK();
172 #else
173   uint64_t ll_count = 0;
174   struct statfs disk_info;
175   if (statfs(common::SafeCStr(str_dir), &disk_info) == -1) {
176     RETURN_STATUS_UNEXPECTED("Failed to get disk size.");
177   }
178 
179   switch (disk_type) {
180     case kTotalSize:
181       ll_count = disk_info.f_bsize * disk_info.f_blocks;
182       ll_count = ll_count >> 20;
183       break;
184     case kFreeSize:
185       ll_count = disk_info.f_bsize * disk_info.f_bavail;
186       ll_count = ll_count >> 20;
187       break;
188     default:
189       ll_count = 0;
190       break;
191   }
192   *size_ptr = std::make_shared<uint64_t>(ll_count);
193   return Status::OK();
194 #endif
195 }
196 
GetMaxThreadNum()197 uint32_t GetMaxThreadNum() {
198   // define the number of thread
199   uint32_t thread_num = std::thread::hardware_concurrency();
200   if (thread_num == 0) {
201     thread_num = kMaxConsumerCount;
202   }
203   return thread_num;
204 }
205 
GetDatasetFiles(const std::string & path,const json & addresses,std::shared_ptr<std::vector<std::string>> * ds)206 Status GetDatasetFiles(const std::string &path, const json &addresses, std::shared_ptr<std::vector<std::string>> *ds) {
207   RETURN_UNEXPECTED_IF_NULL(ds);
208   std::shared_ptr<std::string> parent_dir;
209   RETURN_IF_NOT_OK(GetParentDir(path, &parent_dir));
210   for (const auto &p : addresses) {
211     std::string abs_path = *parent_dir + std::string(p);
212     (*ds)->emplace_back(abs_path);
213   }
214   return Status::OK();
215 }
216 }  // namespace mindrecord
217 }  // namespace mindspore
218