1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "minddata/mindrecord/include/shard_column.h"
18
19 #include "utils/ms_utils.h"
20 #include "minddata/mindrecord/include/common/shard_utils.h"
21 #include "minddata/mindrecord/include/shard_error.h"
22
23 namespace mindspore {
24 namespace mindrecord {
ShardColumn(const std::shared_ptr<ShardHeader> & shard_header,bool compress_integer)25 ShardColumn::ShardColumn(const std::shared_ptr<ShardHeader> &shard_header, bool compress_integer) {
26 auto first_schema = shard_header->GetSchemas()[0];
27 json schema_json = first_schema->GetSchema();
28 Init(schema_json, compress_integer);
29 }
30
ShardColumn(const json & schema_json,bool compress_integer)31 ShardColumn::ShardColumn(const json &schema_json, bool compress_integer) { Init(schema_json, compress_integer); }
32
Init(const json & schema_json,bool compress_integer)33 void ShardColumn::Init(const json &schema_json, bool compress_integer) {
34 auto schema = schema_json["schema"];
35 auto blob_fields = schema_json["blob_fields"];
36
37 bool has_integer_array = false;
38 for (json::iterator it = schema.begin(); it != schema.end(); ++it) {
39 const std::string &column_name = it.key();
40 column_name_.push_back(column_name);
41
42 json it_value = it.value();
43
44 std::string str_type = it_value["type"];
45 column_data_type_.push_back(ColumnDataTypeMap.at(str_type));
46 if (it_value.find("shape") != it_value.end()) {
47 std::vector<int64_t> vec(it_value["shape"].size());
48 (void)std::copy(it_value["shape"].begin(), it_value["shape"].end(), vec.begin());
49 column_shape_.push_back(vec);
50 if (str_type == "int32" || str_type == "int64") {
51 has_integer_array = true;
52 }
53 } else {
54 std::vector<int64_t> vec = {};
55 column_shape_.push_back(vec);
56 }
57 }
58
59 for (uint64_t i = 0; i < column_name_.size(); i++) {
60 column_name_id_[column_name_[i]] = i;
61 }
62
63 for (const auto &field : blob_fields) {
64 blob_column_.push_back(field);
65 }
66
67 for (uint64_t i = 0; i < blob_column_.size(); i++) {
68 blob_column_id_[blob_column_[i]] = i;
69 }
70
71 has_compress_blob_ = (compress_integer && has_integer_array);
72 num_blob_column_ = blob_column_.size();
73 }
74
GetColumnTypeByName(const std::string & column_name,ColumnDataType * column_data_type,uint64_t * column_data_type_size,std::vector<int64_t> * column_shape,ColumnCategory * column_category)75 Status ShardColumn::GetColumnTypeByName(const std::string &column_name, ColumnDataType *column_data_type,
76 uint64_t *column_data_type_size, std::vector<int64_t> *column_shape,
77 ColumnCategory *column_category) {
78 RETURN_UNEXPECTED_IF_NULL_MR(column_data_type);
79 RETURN_UNEXPECTED_IF_NULL_MR(column_data_type_size);
80 RETURN_UNEXPECTED_IF_NULL_MR(column_shape);
81 RETURN_UNEXPECTED_IF_NULL_MR(column_category);
82 // Skip if column not found
83 *column_category = CheckColumnName(column_name);
84 CHECK_FAIL_RETURN_UNEXPECTED_MR(*column_category != ColumnNotFound,
85 "[Internal ERROR] the type of column: " + column_name + " can not found.");
86
87 // Get data type and size
88 auto column_id = column_name_id_[column_name];
89 *column_data_type = column_data_type_[column_id];
90 *column_data_type_size = ColumnDataTypeSize[*column_data_type];
91 *column_shape = column_shape_[column_id];
92 return Status::OK();
93 }
94
GetColumnValueByName(const std::string & column_name,const std::vector<uint8_t> & columns_blob,const json & columns_json,const unsigned char ** data,std::unique_ptr<unsigned char[]> * data_ptr,uint64_t * const n_bytes,ColumnDataType * column_data_type,uint64_t * column_data_type_size,std::vector<int64_t> * column_shape)95 Status ShardColumn::GetColumnValueByName(const std::string &column_name, const std::vector<uint8_t> &columns_blob,
96 const json &columns_json, const unsigned char **data,
97 std::unique_ptr<unsigned char[]> *data_ptr, uint64_t *const n_bytes,
98 ColumnDataType *column_data_type, uint64_t *column_data_type_size,
99 std::vector<int64_t> *column_shape) {
100 RETURN_UNEXPECTED_IF_NULL_MR(column_data_type);
101 RETURN_UNEXPECTED_IF_NULL_MR(column_data_type_size);
102 RETURN_UNEXPECTED_IF_NULL_MR(column_shape);
103 // Skip if column not found
104 auto column_category = CheckColumnName(column_name);
105 CHECK_FAIL_RETURN_UNEXPECTED_MR(column_category != ColumnNotFound,
106 "[Internal ERROR] the type of column: " + column_name + " can not found.");
107 // Get data type and size
108 auto column_id = column_name_id_[column_name];
109 *column_data_type = column_data_type_[column_id];
110 *column_data_type_size = ColumnDataTypeSize[*column_data_type];
111 *column_shape = column_shape_[column_id];
112
113 // Retrieve value from json
114 if (column_category == ColumnInRaw) {
115 RETURN_IF_NOT_OK_MR(GetColumnFromJson(column_name, columns_json, data_ptr, n_bytes));
116 *data = reinterpret_cast<const unsigned char *>(data_ptr->get());
117 return Status::OK();
118 }
119
120 // Retrieve value from blob
121 RETURN_IF_NOT_OK_MR(GetColumnFromBlob(column_name, columns_blob, data, data_ptr, n_bytes));
122 if (*data == nullptr) {
123 *data = reinterpret_cast<const unsigned char *>(data_ptr->get());
124 }
125 return Status::OK();
126 }
127
GetColumnFromJson(const std::string & column_name,const json & columns_json,std::unique_ptr<unsigned char[]> * data_ptr,uint64_t * n_bytes)128 Status ShardColumn::GetColumnFromJson(const std::string &column_name, const json &columns_json,
129 std::unique_ptr<unsigned char[]> *data_ptr, uint64_t *n_bytes) {
130 RETURN_UNEXPECTED_IF_NULL_MR(n_bytes);
131 RETURN_UNEXPECTED_IF_NULL_MR(data_ptr);
132 auto column_id = column_name_id_[column_name];
133 auto column_data_type = column_data_type_[column_id];
134
135 // Initialize num bytes
136 *n_bytes = ColumnDataTypeSize[column_data_type];
137 auto json_column_value = columns_json[column_name];
138 CHECK_FAIL_RETURN_UNEXPECTED_MR(json_column_value.is_string() || json_column_value.is_number(),
139 "[Internal ERROR] the value of column: " + column_name +
140 " should be string or number but got: " + json_column_value.dump());
141 switch (column_data_type) {
142 case ColumnFloat32: {
143 return GetFloat<float>(data_ptr, json_column_value, false);
144 }
145 case ColumnFloat64: {
146 return GetFloat<double>(data_ptr, json_column_value, true);
147 }
148 case ColumnInt32: {
149 return GetInt<int32_t>(data_ptr, json_column_value);
150 }
151 case ColumnInt64: {
152 return GetInt<int64_t>(data_ptr, json_column_value);
153 }
154 default: {
155 // Convert string to c_str
156 std::string tmp_string;
157 if (json_column_value.is_string()) {
158 tmp_string = json_column_value.get<string>();
159 } else {
160 tmp_string = json_column_value.dump();
161 }
162 *n_bytes = tmp_string.size();
163 auto data = reinterpret_cast<const unsigned char *>(common::SafeCStr(tmp_string));
164 *data_ptr = std::make_unique<unsigned char[]>(*n_bytes);
165 for (uint32_t i = 0; i < *n_bytes; i++) {
166 (*data_ptr)[i] = *(data + i);
167 }
168 break;
169 }
170 }
171 return Status::OK();
172 }
173
174 template <typename T>
GetFloat(std::unique_ptr<unsigned char[]> * data_ptr,const json & json_column_value,bool use_double)175 Status ShardColumn::GetFloat(std::unique_ptr<unsigned char[]> *data_ptr, const json &json_column_value,
176 bool use_double) {
177 RETURN_UNEXPECTED_IF_NULL_MR(data_ptr);
178 std::unique_ptr<T[]> array_data = std::make_unique<T[]>(1);
179 if (json_column_value.is_number()) {
180 array_data[0] = json_column_value;
181 } else {
182 // Convert string to float
183 try {
184 if (use_double) {
185 array_data[0] = json_column_value.get<double>();
186 } else {
187 array_data[0] = json_column_value.get<float>();
188 }
189 } catch (json::exception &e) {
190 RETURN_STATUS_UNEXPECTED_MR("[Internal ERROR] Failed to convert column value:" + json_column_value.dump() +
191 " to type float, " + std::string(e.what()));
192 }
193 }
194
195 auto data = reinterpret_cast<const unsigned char *>(array_data.get());
196 *data_ptr = std::make_unique<unsigned char[]>(sizeof(T));
197 for (uint32_t i = 0; i < sizeof(T); i++) {
198 (*data_ptr)[i] = *(data + i);
199 }
200 return Status::OK();
201 }
202
203 template <typename T>
GetInt(std::unique_ptr<unsigned char[]> * data_ptr,const json & json_column_value)204 Status ShardColumn::GetInt(std::unique_ptr<unsigned char[]> *data_ptr, const json &json_column_value) {
205 RETURN_UNEXPECTED_IF_NULL_MR(data_ptr);
206 std::unique_ptr<T[]> array_data = std::make_unique<T[]>(1);
207 int64_t temp_value;
208 bool less_than_zero = false;
209
210 if (json_column_value.is_number_integer()) {
211 const json json_zero = 0;
212 if (json_column_value < json_zero) {
213 less_than_zero = true;
214 }
215 temp_value = json_column_value;
216 } else if (json_column_value.is_string()) {
217 std::string string_value = json_column_value;
218 try {
219 if (!string_value.empty() && string_value[0] == '-') {
220 temp_value = std::stoll(string_value);
221 less_than_zero = true;
222 } else {
223 temp_value = static_cast<int64_t>(std::stoull(string_value));
224 }
225 } catch (std::invalid_argument &e) {
226 RETURN_STATUS_UNEXPECTED_MR("[Internal ERROR] Failed to convert column value:" + string_value + " to type int, " +
227 std::string(e.what()));
228 } catch (std::out_of_range &e) {
229 RETURN_STATUS_UNEXPECTED_MR("[Internal ERROR] Failed to convert column value:" + string_value + " to type int, " +
230 std::string(e.what()));
231 }
232 } else {
233 RETURN_STATUS_UNEXPECTED_MR("[Internal ERROR] column value should be type string or number but got: " +
234 json_column_value.dump());
235 }
236
237 if ((less_than_zero && temp_value < static_cast<int64_t>(std::numeric_limits<T>::min())) ||
238 (!less_than_zero && static_cast<uint64_t>(temp_value) > static_cast<uint64_t>(std::numeric_limits<T>::max()))) {
239 RETURN_STATUS_UNEXPECTED_MR("[Internal ERROR] column value: " + std::to_string(temp_value) + " is out of range.");
240 }
241 array_data[0] = static_cast<T>(temp_value);
242
243 auto data = reinterpret_cast<const unsigned char *>(array_data.get());
244 *data_ptr = std::make_unique<unsigned char[]>(sizeof(T));
245 for (uint32_t i = 0; i < sizeof(T); i++) {
246 (*data_ptr)[i] = *(data + i);
247 }
248 return Status::OK();
249 }
250
GetColumnFromBlob(const std::string & column_name,const std::vector<uint8_t> & columns_blob,const unsigned char ** data,std::unique_ptr<unsigned char[]> * data_ptr,uint64_t * const n_bytes)251 Status ShardColumn::GetColumnFromBlob(const std::string &column_name, const std::vector<uint8_t> &columns_blob,
252 const unsigned char **data, std::unique_ptr<unsigned char[]> *data_ptr,
253 uint64_t *const n_bytes) {
254 RETURN_UNEXPECTED_IF_NULL_MR(data);
255 uint64_t offset_address = 0;
256 auto column_id = column_name_id_[column_name];
257 RETURN_IF_NOT_OK_MR(GetColumnAddressInBlock(column_id, columns_blob, n_bytes, &offset_address));
258 auto column_data_type = column_data_type_[column_id];
259 if (has_compress_blob_ && column_data_type == ColumnInt32) {
260 RETURN_IF_NOT_OK_MR(UncompressInt<int32_t>(column_id, data_ptr, columns_blob, n_bytes, offset_address));
261 } else if (has_compress_blob_ && column_data_type == ColumnInt64) {
262 RETURN_IF_NOT_OK_MR(UncompressInt<int64_t>(column_id, data_ptr, columns_blob, n_bytes, offset_address));
263 } else {
264 *data = reinterpret_cast<const unsigned char *>(&(columns_blob[offset_address]));
265 }
266
267 return Status::OK();
268 }
269
CheckColumnName(const std::string & column_name)270 ColumnCategory ShardColumn::CheckColumnName(const std::string &column_name) {
271 auto it_column = column_name_id_.find(column_name);
272 if (it_column == column_name_id_.end()) {
273 return ColumnNotFound;
274 }
275 auto it_blob = blob_column_id_.find(column_name);
276 return it_blob == blob_column_id_.end() ? ColumnInRaw : ColumnInBlob;
277 }
278
CompressBlob(const std::vector<uint8_t> & blob,int64_t * compression_size)279 std::vector<uint8_t> ShardColumn::CompressBlob(const std::vector<uint8_t> &blob, int64_t *compression_size) {
280 // Skip if no compress columns
281 *compression_size = 0;
282 if (!CheckCompressBlob()) {
283 return blob;
284 }
285
286 std::vector<uint8_t> dst_blob;
287 uint64_t i_src = 0;
288 for (int64_t i = 0; i < num_blob_column_; i++) {
289 // Get column data type
290 auto src_data_type = column_data_type_[column_name_id_[blob_column_[i]]];
291 auto int_type = src_data_type == ColumnInt32 ? kInt32Type : kInt64Type;
292
293 // Compress and return is blob has 1 column only
294 if (num_blob_column_ == 1) {
295 dst_blob = CompressInt(blob, int_type);
296 *compression_size = static_cast<int64_t>(blob.size()) - static_cast<int64_t>(dst_blob.size());
297 return dst_blob;
298 }
299
300 // Just copy and continue if column dat type is not int32/int64
301 uint64_t num_bytes = BytesBigToUInt64(blob, i_src, kInt64Type);
302 if (src_data_type != ColumnInt32 && src_data_type != ColumnInt64) {
303 dst_blob.insert(dst_blob.end(), blob.begin() + i_src, blob.begin() + i_src + kInt64Len + num_bytes);
304 i_src += kInt64Len + num_bytes;
305 continue;
306 }
307
308 // Get column slice in source blob
309 std::vector<uint8_t> blob_slice(blob.begin() + i_src + kInt64Len, blob.begin() + i_src + kInt64Len + num_bytes);
310 // Compress column
311 auto dst_blob_slice = CompressInt(blob_slice, int_type);
312 // Get new column size
313 auto new_blob_size = UIntToBytesBig(dst_blob_slice.size(), kInt64Type);
314 // Append new column size
315 dst_blob.insert(dst_blob.end(), new_blob_size.begin(), new_blob_size.end());
316 // Append new column data
317 dst_blob.insert(dst_blob.end(), dst_blob_slice.begin(), dst_blob_slice.end());
318 i_src += kInt64Len + num_bytes;
319 }
320 MS_LOG(DEBUG) << "Compress blob data from " << blob.size() << " to " << dst_blob.size() << ".";
321 *compression_size = static_cast<int64_t>(blob.size()) - static_cast<int64_t>(dst_blob.size());
322 return dst_blob;
323 }
324
CompressInt(const vector<uint8_t> & src_bytes,const IntegerType & int_type)325 vector<uint8_t> ShardColumn::CompressInt(const vector<uint8_t> &src_bytes, const IntegerType &int_type) {
326 uint64_t i_size = kUnsignedOne << static_cast<uint8_t>(int_type);
327 // Get number of elements
328 uint64_t src_n_int = src_bytes.size() / i_size;
329 // Calculate bitmap size (bytes)
330 uint64_t bitmap_size = (src_n_int + kNumDataOfByte - 1) / kNumDataOfByte;
331
332 // Initialize destination blob, more space than needed, will be resized
333 vector<uint8_t> dst_bytes(kBytesOfColumnLen + bitmap_size + src_bytes.size(), 0);
334
335 // Write number of elements to destination blob
336 vector<uint8_t> size_by_bytes = UIntToBytesBig(src_n_int, kInt32Type);
337 for (uint64_t n = 0; n < kBytesOfColumnLen; n++) {
338 dst_bytes[n] = size_by_bytes[n];
339 }
340
341 // Write compressed int
342 uint64_t i_dst = kBytesOfColumnLen + bitmap_size;
343 for (uint64_t i = 0; i < src_n_int; i++) {
344 // Initialize destination data type
345 IntegerType dst_int_type = kInt8Type;
346 // Shift to next int position
347 uint64_t pos = i * (kUnsignedOne << static_cast<uint8_t>(int_type));
348 // Narrow down this int
349 int64_t i_n = BytesLittleToMinIntType(src_bytes, pos, int_type, &dst_int_type);
350
351 // Write this int to destination blob
352 uint64_t u_n = *reinterpret_cast<uint64_t *>(&i_n);
353 auto temp_bytes = UIntToBytesLittle(u_n, dst_int_type);
354 for (uint64_t j = 0; j < (kUnsignedOne << static_cast<uint8_t>(dst_int_type)); j++) {
355 dst_bytes[i_dst++] = temp_bytes[j];
356 }
357
358 // Update date type in bit map
359 dst_bytes[i / kNumDataOfByte + kBytesOfColumnLen] |=
360 (static_cast<uint8_t>(dst_int_type) << (kDataTypeBits * (kNumDataOfByte - kUnsignedOne - (i % kNumDataOfByte))));
361 }
362 // Resize destination blob
363 dst_bytes.resize(i_dst);
364 MS_LOG(DEBUG) << "Compress blob field from " << src_bytes.size() << " to " << dst_bytes.size() << ".";
365 return dst_bytes;
366 }
367
GetColumnAddressInBlock(const uint64_t & column_id,const std::vector<uint8_t> & columns_blob,uint64_t * num_bytes,uint64_t * shift_idx)368 Status ShardColumn::GetColumnAddressInBlock(const uint64_t &column_id, const std::vector<uint8_t> &columns_blob,
369 uint64_t *num_bytes, uint64_t *shift_idx) {
370 RETURN_UNEXPECTED_IF_NULL_MR(num_bytes);
371 RETURN_UNEXPECTED_IF_NULL_MR(shift_idx);
372 if (num_blob_column_ == 1) {
373 *num_bytes = columns_blob.size();
374 *shift_idx = 0;
375 return Status::OK();
376 }
377 auto blob_id = blob_column_id_[column_name_[column_id]];
378
379 for (int32_t i = 0; i < blob_id; i++) {
380 *shift_idx += kInt64Len + BytesBigToUInt64(columns_blob, *shift_idx, kInt64Type);
381 }
382 *num_bytes = BytesBigToUInt64(columns_blob, *shift_idx, kInt64Type);
383
384 (*shift_idx) += kInt64Len;
385
386 return Status::OK();
387 }
388
389 template <typename T>
UncompressInt(const uint64_t & column_id,std::unique_ptr<unsigned char[]> * const data_ptr,const std::vector<uint8_t> & columns_blob,uint64_t * num_bytes,uint64_t shift_idx)390 Status ShardColumn::UncompressInt(const uint64_t &column_id, std::unique_ptr<unsigned char[]> *const data_ptr,
391 const std::vector<uint8_t> &columns_blob, uint64_t *num_bytes, uint64_t shift_idx) {
392 RETURN_UNEXPECTED_IF_NULL_MR(data_ptr);
393 RETURN_UNEXPECTED_IF_NULL_MR(num_bytes);
394 auto num_elements = BytesBigToUInt64(columns_blob, shift_idx, kInt32Type);
395 *num_bytes = sizeof(T) * num_elements;
396
397 // Parse integer array
398 uint64_t i_source = shift_idx + kBytesOfColumnLen + (num_elements + kNumDataOfByte - 1) / kNumDataOfByte;
399 auto array_data = std::make_unique<T[]>(num_elements);
400
401 for (uint64_t i = 0; i < num_elements; i++) {
402 uint8_t iBitMap = columns_blob[shift_idx + kBytesOfColumnLen + i / kNumDataOfByte];
403 uint64_t i_type = (iBitMap >> ((kNumDataOfByte - 1 - (i % kNumDataOfByte)) * kDataTypeBits)) & kDataTypeBitMask;
404 auto mr_int_type = static_cast<IntegerType>(i_type);
405 int64_t i64 = BytesLittleToMinIntType(columns_blob, i_source, mr_int_type);
406 i_source += (kUnsignedOne << i_type);
407 array_data[i] = static_cast<T>(i64);
408 }
409
410 auto data = reinterpret_cast<const unsigned char *>(array_data.get());
411 // field is none. for example: numpy is null
412 if (*num_bytes == 0) {
413 return Status::OK();
414 }
415 *data_ptr = std::make_unique<unsigned char[]>(*num_bytes);
416 CHECK_FAIL_RETURN_UNEXPECTED_MR(memcpy_s(data_ptr->get(), *num_bytes, data, *num_bytes) == 0,
417 "[Internal ERROR] Failed to call securec func [memcpy_s]");
418 return Status::OK();
419 }
420
BytesBigToUInt64(const std::vector<uint8_t> & bytes_array,const uint64_t & pos,const IntegerType & i_type)421 uint64_t ShardColumn::BytesBigToUInt64(const std::vector<uint8_t> &bytes_array, const uint64_t &pos,
422 const IntegerType &i_type) {
423 uint64_t result = 0;
424 for (uint64_t i = 0; i < (kUnsignedOne << static_cast<uint8_t>(i_type)); i++) {
425 result = (result << kBitsOfByte) + bytes_array[pos + i];
426 }
427 return result;
428 }
429
UIntToBytesBig(uint64_t value,const IntegerType & i_type)430 std::vector<uint8_t> ShardColumn::UIntToBytesBig(uint64_t value, const IntegerType &i_type) {
431 uint64_t n_bytes = kUnsignedOne << static_cast<uint8_t>(i_type);
432 std::vector<uint8_t> result(n_bytes, 0);
433 for (uint64_t i = 0; i < n_bytes; i++) {
434 result[n_bytes - 1 - i] = value & std::numeric_limits<uint8_t>::max();
435 value >>= kBitsOfByte;
436 }
437 return result;
438 }
439
UIntToBytesLittle(uint64_t value,const IntegerType & i_type)440 std::vector<uint8_t> ShardColumn::UIntToBytesLittle(uint64_t value, const IntegerType &i_type) {
441 uint64_t n_bytes = kUnsignedOne << static_cast<uint8_t>(i_type);
442 std::vector<uint8_t> result(n_bytes, 0);
443 for (uint64_t i = 0; i < n_bytes; i++) {
444 result[i] = value & std::numeric_limits<uint8_t>::max();
445 value >>= kBitsOfByte;
446 }
447 return result;
448 }
449
BytesLittleToMinIntType(const std::vector<uint8_t> & bytes_array,const uint64_t & pos,const IntegerType & src_i_type,IntegerType * dst_i_type)450 int64_t ShardColumn::BytesLittleToMinIntType(const std::vector<uint8_t> &bytes_array, const uint64_t &pos,
451 const IntegerType &src_i_type, IntegerType *dst_i_type) {
452 uint64_t u_temp = 0;
453 for (uint64_t i = 0; i < (kUnsignedOne << static_cast<uint8_t>(src_i_type)); i++) {
454 u_temp = (u_temp << kBitsOfByte) +
455 bytes_array[pos + (kUnsignedOne << static_cast<uint8_t>(src_i_type)) - kUnsignedOne - i];
456 }
457
458 int64_t i_out;
459 switch (src_i_type) {
460 case kInt8Type: {
461 i_out = static_cast<int8_t>(u_temp & std::numeric_limits<uint8_t>::max());
462 break;
463 }
464 case kInt16Type: {
465 i_out = static_cast<int16_t>(u_temp & std::numeric_limits<uint16_t>::max());
466 break;
467 }
468 case kInt32Type: {
469 i_out = static_cast<int32_t>(u_temp & std::numeric_limits<uint32_t>::max());
470 break;
471 }
472 case kInt64Type: {
473 i_out = static_cast<int64_t>(u_temp & std::numeric_limits<uint64_t>::max());
474 break;
475 }
476 default: {
477 i_out = 0;
478 }
479 }
480
481 if (!dst_i_type) {
482 return i_out;
483 }
484
485 if (i_out >= static_cast<int64_t>(std::numeric_limits<int8_t>::min()) &&
486 i_out <= static_cast<int64_t>(std::numeric_limits<int8_t>::max())) {
487 *dst_i_type = kInt8Type;
488 } else if (i_out >= static_cast<int64_t>(std::numeric_limits<int16_t>::min()) &&
489 i_out <= static_cast<int64_t>(std::numeric_limits<int16_t>::max())) {
490 *dst_i_type = kInt16Type;
491 } else if (i_out >= static_cast<int64_t>(std::numeric_limits<int32_t>::min()) &&
492 i_out <= static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
493 *dst_i_type = kInt32Type;
494 } else {
495 *dst_i_type = kInt64Type;
496 }
497 return i_out;
498 }
499 } // namespace mindrecord
500 } // namespace mindspore
501