• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "minddata/mindrecord/include/shard_column.h"
18 
19 #include "utils/ms_utils.h"
20 #include "minddata/mindrecord/include/common/shard_utils.h"
21 #include "minddata/mindrecord/include/shard_error.h"
22 
23 namespace mindspore {
24 namespace mindrecord {
ShardColumn(const std::shared_ptr<ShardHeader> & shard_header,bool compress_integer)25 ShardColumn::ShardColumn(const std::shared_ptr<ShardHeader> &shard_header, bool compress_integer) {
26   auto first_schema = shard_header->GetSchemas()[0];
27   json schema_json = first_schema->GetSchema();
28   Init(schema_json, compress_integer);
29 }
30 
ShardColumn(const json & schema_json,bool compress_integer)31 ShardColumn::ShardColumn(const json &schema_json, bool compress_integer) { Init(schema_json, compress_integer); }
32 
Init(const json & schema_json,bool compress_integer)33 void ShardColumn::Init(const json &schema_json, bool compress_integer) {
34   auto schema = schema_json["schema"];
35   auto blob_fields = schema_json["blob_fields"];
36 
37   bool has_integer_array = false;
38   for (json::iterator it = schema.begin(); it != schema.end(); ++it) {
39     const std::string &column_name = it.key();
40     column_name_.push_back(column_name);
41 
42     json it_value = it.value();
43 
44     std::string str_type = it_value["type"];
45     column_data_type_.push_back(ColumnDataTypeMap.at(str_type));
46     if (it_value.find("shape") != it_value.end()) {
47       std::vector<int64_t> vec(it_value["shape"].size());
48       (void)std::copy(it_value["shape"].begin(), it_value["shape"].end(), vec.begin());
49       column_shape_.push_back(vec);
50       if (str_type == "int32" || str_type == "int64") {
51         has_integer_array = true;
52       }
53     } else {
54       std::vector<int64_t> vec = {};
55       column_shape_.push_back(vec);
56     }
57   }
58 
59   for (uint64_t i = 0; i < column_name_.size(); i++) {
60     column_name_id_[column_name_[i]] = i;
61   }
62 
63   for (const auto &field : blob_fields) {
64     blob_column_.push_back(field);
65   }
66 
67   for (uint64_t i = 0; i < blob_column_.size(); i++) {
68     blob_column_id_[blob_column_[i]] = i;
69   }
70 
71   has_compress_blob_ = (compress_integer && has_integer_array);
72   num_blob_column_ = blob_column_.size();
73 }
74 
GetColumnTypeByName(const std::string & column_name,ColumnDataType * column_data_type,uint64_t * column_data_type_size,std::vector<int64_t> * column_shape,ColumnCategory * column_category)75 Status ShardColumn::GetColumnTypeByName(const std::string &column_name, ColumnDataType *column_data_type,
76                                         uint64_t *column_data_type_size, std::vector<int64_t> *column_shape,
77                                         ColumnCategory *column_category) {
78   RETURN_UNEXPECTED_IF_NULL_MR(column_data_type);
79   RETURN_UNEXPECTED_IF_NULL_MR(column_data_type_size);
80   RETURN_UNEXPECTED_IF_NULL_MR(column_shape);
81   RETURN_UNEXPECTED_IF_NULL_MR(column_category);
82   // Skip if column not found
83   *column_category = CheckColumnName(column_name);
84   CHECK_FAIL_RETURN_UNEXPECTED_MR(*column_category != ColumnNotFound,
85                                   "[Internal ERROR] the type of column: " + column_name + " can not found.");
86 
87   // Get data type and size
88   auto column_id = column_name_id_[column_name];
89   *column_data_type = column_data_type_[column_id];
90   *column_data_type_size = ColumnDataTypeSize[*column_data_type];
91   *column_shape = column_shape_[column_id];
92   return Status::OK();
93 }
94 
GetColumnValueByName(const std::string & column_name,const std::vector<uint8_t> & columns_blob,const json & columns_json,const unsigned char ** data,std::unique_ptr<unsigned char[]> * data_ptr,uint64_t * const n_bytes,ColumnDataType * column_data_type,uint64_t * column_data_type_size,std::vector<int64_t> * column_shape)95 Status ShardColumn::GetColumnValueByName(const std::string &column_name, const std::vector<uint8_t> &columns_blob,
96                                          const json &columns_json, const unsigned char **data,
97                                          std::unique_ptr<unsigned char[]> *data_ptr, uint64_t *const n_bytes,
98                                          ColumnDataType *column_data_type, uint64_t *column_data_type_size,
99                                          std::vector<int64_t> *column_shape) {
100   RETURN_UNEXPECTED_IF_NULL_MR(column_data_type);
101   RETURN_UNEXPECTED_IF_NULL_MR(column_data_type_size);
102   RETURN_UNEXPECTED_IF_NULL_MR(column_shape);
103   // Skip if column not found
104   auto column_category = CheckColumnName(column_name);
105   CHECK_FAIL_RETURN_UNEXPECTED_MR(column_category != ColumnNotFound,
106                                   "[Internal ERROR] the type of column: " + column_name + " can not found.");
107   // Get data type and size
108   auto column_id = column_name_id_[column_name];
109   *column_data_type = column_data_type_[column_id];
110   *column_data_type_size = ColumnDataTypeSize[*column_data_type];
111   *column_shape = column_shape_[column_id];
112 
113   // Retrieve value from json
114   if (column_category == ColumnInRaw) {
115     RETURN_IF_NOT_OK_MR(GetColumnFromJson(column_name, columns_json, data_ptr, n_bytes));
116     *data = reinterpret_cast<const unsigned char *>(data_ptr->get());
117     return Status::OK();
118   }
119 
120   // Retrieve value from blob
121   RETURN_IF_NOT_OK_MR(GetColumnFromBlob(column_name, columns_blob, data, data_ptr, n_bytes));
122   if (*data == nullptr) {
123     *data = reinterpret_cast<const unsigned char *>(data_ptr->get());
124   }
125   return Status::OK();
126 }
127 
GetColumnFromJson(const std::string & column_name,const json & columns_json,std::unique_ptr<unsigned char[]> * data_ptr,uint64_t * n_bytes)128 Status ShardColumn::GetColumnFromJson(const std::string &column_name, const json &columns_json,
129                                       std::unique_ptr<unsigned char[]> *data_ptr, uint64_t *n_bytes) {
130   RETURN_UNEXPECTED_IF_NULL_MR(n_bytes);
131   RETURN_UNEXPECTED_IF_NULL_MR(data_ptr);
132   auto column_id = column_name_id_[column_name];
133   auto column_data_type = column_data_type_[column_id];
134 
135   // Initialize num bytes
136   *n_bytes = ColumnDataTypeSize[column_data_type];
137   auto json_column_value = columns_json[column_name];
138   CHECK_FAIL_RETURN_UNEXPECTED_MR(json_column_value.is_string() || json_column_value.is_number(),
139                                   "[Internal ERROR] the value of column: " + column_name +
140                                     " should be string or number but got: " + json_column_value.dump());
141   switch (column_data_type) {
142     case ColumnFloat32: {
143       return GetFloat<float>(data_ptr, json_column_value, false);
144     }
145     case ColumnFloat64: {
146       return GetFloat<double>(data_ptr, json_column_value, true);
147     }
148     case ColumnInt32: {
149       return GetInt<int32_t>(data_ptr, json_column_value);
150     }
151     case ColumnInt64: {
152       return GetInt<int64_t>(data_ptr, json_column_value);
153     }
154     default: {
155       // Convert string to c_str
156       std::string tmp_string;
157       if (json_column_value.is_string()) {
158         tmp_string = json_column_value.get<string>();
159       } else {
160         tmp_string = json_column_value.dump();
161       }
162       *n_bytes = tmp_string.size();
163       auto data = reinterpret_cast<const unsigned char *>(common::SafeCStr(tmp_string));
164       *data_ptr = std::make_unique<unsigned char[]>(*n_bytes);
165       for (uint32_t i = 0; i < *n_bytes; i++) {
166         (*data_ptr)[i] = *(data + i);
167       }
168       break;
169     }
170   }
171   return Status::OK();
172 }
173 
174 template <typename T>
GetFloat(std::unique_ptr<unsigned char[]> * data_ptr,const json & json_column_value,bool use_double)175 Status ShardColumn::GetFloat(std::unique_ptr<unsigned char[]> *data_ptr, const json &json_column_value,
176                              bool use_double) {
177   RETURN_UNEXPECTED_IF_NULL_MR(data_ptr);
178   std::unique_ptr<T[]> array_data = std::make_unique<T[]>(1);
179   if (json_column_value.is_number()) {
180     array_data[0] = json_column_value;
181   } else {
182     // Convert string to float
183     try {
184       if (use_double) {
185         array_data[0] = json_column_value.get<double>();
186       } else {
187         array_data[0] = json_column_value.get<float>();
188       }
189     } catch (json::exception &e) {
190       RETURN_STATUS_UNEXPECTED_MR("[Internal ERROR] Failed to convert column value:" + json_column_value.dump() +
191                                   " to type float, " + std::string(e.what()));
192     }
193   }
194 
195   auto data = reinterpret_cast<const unsigned char *>(array_data.get());
196   *data_ptr = std::make_unique<unsigned char[]>(sizeof(T));
197   for (uint32_t i = 0; i < sizeof(T); i++) {
198     (*data_ptr)[i] = *(data + i);
199   }
200   return Status::OK();
201 }
202 
203 template <typename T>
GetInt(std::unique_ptr<unsigned char[]> * data_ptr,const json & json_column_value)204 Status ShardColumn::GetInt(std::unique_ptr<unsigned char[]> *data_ptr, const json &json_column_value) {
205   RETURN_UNEXPECTED_IF_NULL_MR(data_ptr);
206   std::unique_ptr<T[]> array_data = std::make_unique<T[]>(1);
207   int64_t temp_value;
208   bool less_than_zero = false;
209 
210   if (json_column_value.is_number_integer()) {
211     const json json_zero = 0;
212     if (json_column_value < json_zero) {
213       less_than_zero = true;
214     }
215     temp_value = json_column_value;
216   } else if (json_column_value.is_string()) {
217     std::string string_value = json_column_value;
218     try {
219       if (!string_value.empty() && string_value[0] == '-') {
220         temp_value = std::stoll(string_value);
221         less_than_zero = true;
222       } else {
223         temp_value = static_cast<int64_t>(std::stoull(string_value));
224       }
225     } catch (std::invalid_argument &e) {
226       RETURN_STATUS_UNEXPECTED_MR("[Internal ERROR] Failed to convert column value:" + string_value + " to type int, " +
227                                   std::string(e.what()));
228     } catch (std::out_of_range &e) {
229       RETURN_STATUS_UNEXPECTED_MR("[Internal ERROR] Failed to convert column value:" + string_value + " to type int, " +
230                                   std::string(e.what()));
231     }
232   } else {
233     RETURN_STATUS_UNEXPECTED_MR("[Internal ERROR] column value should be type string or number but got: " +
234                                 json_column_value.dump());
235   }
236 
237   if ((less_than_zero && temp_value < static_cast<int64_t>(std::numeric_limits<T>::min())) ||
238       (!less_than_zero && static_cast<uint64_t>(temp_value) > static_cast<uint64_t>(std::numeric_limits<T>::max()))) {
239     RETURN_STATUS_UNEXPECTED_MR("[Internal ERROR] column value: " + std::to_string(temp_value) + " is out of range.");
240   }
241   array_data[0] = static_cast<T>(temp_value);
242 
243   auto data = reinterpret_cast<const unsigned char *>(array_data.get());
244   *data_ptr = std::make_unique<unsigned char[]>(sizeof(T));
245   for (uint32_t i = 0; i < sizeof(T); i++) {
246     (*data_ptr)[i] = *(data + i);
247   }
248   return Status::OK();
249 }
250 
GetColumnFromBlob(const std::string & column_name,const std::vector<uint8_t> & columns_blob,const unsigned char ** data,std::unique_ptr<unsigned char[]> * data_ptr,uint64_t * const n_bytes)251 Status ShardColumn::GetColumnFromBlob(const std::string &column_name, const std::vector<uint8_t> &columns_blob,
252                                       const unsigned char **data, std::unique_ptr<unsigned char[]> *data_ptr,
253                                       uint64_t *const n_bytes) {
254   RETURN_UNEXPECTED_IF_NULL_MR(data);
255   uint64_t offset_address = 0;
256   auto column_id = column_name_id_[column_name];
257   RETURN_IF_NOT_OK_MR(GetColumnAddressInBlock(column_id, columns_blob, n_bytes, &offset_address));
258   auto column_data_type = column_data_type_[column_id];
259   if (has_compress_blob_ && column_data_type == ColumnInt32) {
260     RETURN_IF_NOT_OK_MR(UncompressInt<int32_t>(column_id, data_ptr, columns_blob, n_bytes, offset_address));
261   } else if (has_compress_blob_ && column_data_type == ColumnInt64) {
262     RETURN_IF_NOT_OK_MR(UncompressInt<int64_t>(column_id, data_ptr, columns_blob, n_bytes, offset_address));
263   } else {
264     *data = reinterpret_cast<const unsigned char *>(&(columns_blob[offset_address]));
265   }
266 
267   return Status::OK();
268 }
269 
CheckColumnName(const std::string & column_name)270 ColumnCategory ShardColumn::CheckColumnName(const std::string &column_name) {
271   auto it_column = column_name_id_.find(column_name);
272   if (it_column == column_name_id_.end()) {
273     return ColumnNotFound;
274   }
275   auto it_blob = blob_column_id_.find(column_name);
276   return it_blob == blob_column_id_.end() ? ColumnInRaw : ColumnInBlob;
277 }
278 
CompressBlob(const std::vector<uint8_t> & blob,int64_t * compression_size)279 std::vector<uint8_t> ShardColumn::CompressBlob(const std::vector<uint8_t> &blob, int64_t *compression_size) {
280   // Skip if no compress columns
281   *compression_size = 0;
282   if (!CheckCompressBlob()) {
283     return blob;
284   }
285 
286   std::vector<uint8_t> dst_blob;
287   uint64_t i_src = 0;
288   for (int64_t i = 0; i < num_blob_column_; i++) {
289     // Get column data type
290     auto src_data_type = column_data_type_[column_name_id_[blob_column_[i]]];
291     auto int_type = src_data_type == ColumnInt32 ? kInt32Type : kInt64Type;
292 
293     // Compress and return is blob has 1 column only
294     if (num_blob_column_ == 1) {
295       dst_blob = CompressInt(blob, int_type);
296       *compression_size = static_cast<int64_t>(blob.size()) - static_cast<int64_t>(dst_blob.size());
297       return dst_blob;
298     }
299 
300     // Just copy and continue if column dat type is not int32/int64
301     uint64_t num_bytes = BytesBigToUInt64(blob, i_src, kInt64Type);
302     if (src_data_type != ColumnInt32 && src_data_type != ColumnInt64) {
303       dst_blob.insert(dst_blob.end(), blob.begin() + i_src, blob.begin() + i_src + kInt64Len + num_bytes);
304       i_src += kInt64Len + num_bytes;
305       continue;
306     }
307 
308     // Get column slice in source blob
309     std::vector<uint8_t> blob_slice(blob.begin() + i_src + kInt64Len, blob.begin() + i_src + kInt64Len + num_bytes);
310     // Compress column
311     auto dst_blob_slice = CompressInt(blob_slice, int_type);
312     // Get new column size
313     auto new_blob_size = UIntToBytesBig(dst_blob_slice.size(), kInt64Type);
314     // Append new column size
315     dst_blob.insert(dst_blob.end(), new_blob_size.begin(), new_blob_size.end());
316     // Append new column data
317     dst_blob.insert(dst_blob.end(), dst_blob_slice.begin(), dst_blob_slice.end());
318     i_src += kInt64Len + num_bytes;
319   }
320   MS_LOG(DEBUG) << "Compress blob data from " << blob.size() << " to " << dst_blob.size() << ".";
321   *compression_size = static_cast<int64_t>(blob.size()) - static_cast<int64_t>(dst_blob.size());
322   return dst_blob;
323 }
324 
CompressInt(const vector<uint8_t> & src_bytes,const IntegerType & int_type)325 vector<uint8_t> ShardColumn::CompressInt(const vector<uint8_t> &src_bytes, const IntegerType &int_type) {
326   uint64_t i_size = kUnsignedOne << static_cast<uint8_t>(int_type);
327   // Get number of elements
328   uint64_t src_n_int = src_bytes.size() / i_size;
329   // Calculate bitmap size (bytes)
330   uint64_t bitmap_size = (src_n_int + kNumDataOfByte - 1) / kNumDataOfByte;
331 
332   // Initialize destination blob, more space than needed, will be resized
333   vector<uint8_t> dst_bytes(kBytesOfColumnLen + bitmap_size + src_bytes.size(), 0);
334 
335   // Write number of elements to destination blob
336   vector<uint8_t> size_by_bytes = UIntToBytesBig(src_n_int, kInt32Type);
337   for (uint64_t n = 0; n < kBytesOfColumnLen; n++) {
338     dst_bytes[n] = size_by_bytes[n];
339   }
340 
341   // Write compressed int
342   uint64_t i_dst = kBytesOfColumnLen + bitmap_size;
343   for (uint64_t i = 0; i < src_n_int; i++) {
344     // Initialize destination data type
345     IntegerType dst_int_type = kInt8Type;
346     // Shift to next int position
347     uint64_t pos = i * (kUnsignedOne << static_cast<uint8_t>(int_type));
348     // Narrow down this int
349     int64_t i_n = BytesLittleToMinIntType(src_bytes, pos, int_type, &dst_int_type);
350 
351     // Write this int to destination blob
352     uint64_t u_n = *reinterpret_cast<uint64_t *>(&i_n);
353     auto temp_bytes = UIntToBytesLittle(u_n, dst_int_type);
354     for (uint64_t j = 0; j < (kUnsignedOne << static_cast<uint8_t>(dst_int_type)); j++) {
355       dst_bytes[i_dst++] = temp_bytes[j];
356     }
357 
358     // Update date type in bit map
359     dst_bytes[i / kNumDataOfByte + kBytesOfColumnLen] |=
360       (static_cast<uint8_t>(dst_int_type) << (kDataTypeBits * (kNumDataOfByte - kUnsignedOne - (i % kNumDataOfByte))));
361   }
362   // Resize destination blob
363   dst_bytes.resize(i_dst);
364   MS_LOG(DEBUG) << "Compress blob field from " << src_bytes.size() << " to " << dst_bytes.size() << ".";
365   return dst_bytes;
366 }
367 
GetColumnAddressInBlock(const uint64_t & column_id,const std::vector<uint8_t> & columns_blob,uint64_t * num_bytes,uint64_t * shift_idx)368 Status ShardColumn::GetColumnAddressInBlock(const uint64_t &column_id, const std::vector<uint8_t> &columns_blob,
369                                             uint64_t *num_bytes, uint64_t *shift_idx) {
370   RETURN_UNEXPECTED_IF_NULL_MR(num_bytes);
371   RETURN_UNEXPECTED_IF_NULL_MR(shift_idx);
372   if (num_blob_column_ == 1) {
373     *num_bytes = columns_blob.size();
374     *shift_idx = 0;
375     return Status::OK();
376   }
377   auto blob_id = blob_column_id_[column_name_[column_id]];
378 
379   for (int32_t i = 0; i < blob_id; i++) {
380     *shift_idx += kInt64Len + BytesBigToUInt64(columns_blob, *shift_idx, kInt64Type);
381   }
382   *num_bytes = BytesBigToUInt64(columns_blob, *shift_idx, kInt64Type);
383 
384   (*shift_idx) += kInt64Len;
385 
386   return Status::OK();
387 }
388 
389 template <typename T>
UncompressInt(const uint64_t & column_id,std::unique_ptr<unsigned char[]> * const data_ptr,const std::vector<uint8_t> & columns_blob,uint64_t * num_bytes,uint64_t shift_idx)390 Status ShardColumn::UncompressInt(const uint64_t &column_id, std::unique_ptr<unsigned char[]> *const data_ptr,
391                                   const std::vector<uint8_t> &columns_blob, uint64_t *num_bytes, uint64_t shift_idx) {
392   RETURN_UNEXPECTED_IF_NULL_MR(data_ptr);
393   RETURN_UNEXPECTED_IF_NULL_MR(num_bytes);
394   auto num_elements = BytesBigToUInt64(columns_blob, shift_idx, kInt32Type);
395   *num_bytes = sizeof(T) * num_elements;
396 
397   // Parse integer array
398   uint64_t i_source = shift_idx + kBytesOfColumnLen + (num_elements + kNumDataOfByte - 1) / kNumDataOfByte;
399   auto array_data = std::make_unique<T[]>(num_elements);
400 
401   for (uint64_t i = 0; i < num_elements; i++) {
402     uint8_t iBitMap = columns_blob[shift_idx + kBytesOfColumnLen + i / kNumDataOfByte];
403     uint64_t i_type = (iBitMap >> ((kNumDataOfByte - 1 - (i % kNumDataOfByte)) * kDataTypeBits)) & kDataTypeBitMask;
404     auto mr_int_type = static_cast<IntegerType>(i_type);
405     int64_t i64 = BytesLittleToMinIntType(columns_blob, i_source, mr_int_type);
406     i_source += (kUnsignedOne << i_type);
407     array_data[i] = static_cast<T>(i64);
408   }
409 
410   auto data = reinterpret_cast<const unsigned char *>(array_data.get());
411   // field is none. for example: numpy is null
412   if (*num_bytes == 0) {
413     return Status::OK();
414   }
415   *data_ptr = std::make_unique<unsigned char[]>(*num_bytes);
416   CHECK_FAIL_RETURN_UNEXPECTED_MR(memcpy_s(data_ptr->get(), *num_bytes, data, *num_bytes) == 0,
417                                   "[Internal ERROR] Failed to call securec func [memcpy_s]");
418   return Status::OK();
419 }
420 
BytesBigToUInt64(const std::vector<uint8_t> & bytes_array,const uint64_t & pos,const IntegerType & i_type)421 uint64_t ShardColumn::BytesBigToUInt64(const std::vector<uint8_t> &bytes_array, const uint64_t &pos,
422                                        const IntegerType &i_type) {
423   uint64_t result = 0;
424   for (uint64_t i = 0; i < (kUnsignedOne << static_cast<uint8_t>(i_type)); i++) {
425     result = (result << kBitsOfByte) + bytes_array[pos + i];
426   }
427   return result;
428 }
429 
UIntToBytesBig(uint64_t value,const IntegerType & i_type)430 std::vector<uint8_t> ShardColumn::UIntToBytesBig(uint64_t value, const IntegerType &i_type) {
431   uint64_t n_bytes = kUnsignedOne << static_cast<uint8_t>(i_type);
432   std::vector<uint8_t> result(n_bytes, 0);
433   for (uint64_t i = 0; i < n_bytes; i++) {
434     result[n_bytes - 1 - i] = value & std::numeric_limits<uint8_t>::max();
435     value >>= kBitsOfByte;
436   }
437   return result;
438 }
439 
UIntToBytesLittle(uint64_t value,const IntegerType & i_type)440 std::vector<uint8_t> ShardColumn::UIntToBytesLittle(uint64_t value, const IntegerType &i_type) {
441   uint64_t n_bytes = kUnsignedOne << static_cast<uint8_t>(i_type);
442   std::vector<uint8_t> result(n_bytes, 0);
443   for (uint64_t i = 0; i < n_bytes; i++) {
444     result[i] = value & std::numeric_limits<uint8_t>::max();
445     value >>= kBitsOfByte;
446   }
447   return result;
448 }
449 
BytesLittleToMinIntType(const std::vector<uint8_t> & bytes_array,const uint64_t & pos,const IntegerType & src_i_type,IntegerType * dst_i_type)450 int64_t ShardColumn::BytesLittleToMinIntType(const std::vector<uint8_t> &bytes_array, const uint64_t &pos,
451                                              const IntegerType &src_i_type, IntegerType *dst_i_type) {
452   uint64_t u_temp = 0;
453   for (uint64_t i = 0; i < (kUnsignedOne << static_cast<uint8_t>(src_i_type)); i++) {
454     u_temp = (u_temp << kBitsOfByte) +
455              bytes_array[pos + (kUnsignedOne << static_cast<uint8_t>(src_i_type)) - kUnsignedOne - i];
456   }
457 
458   int64_t i_out;
459   switch (src_i_type) {
460     case kInt8Type: {
461       i_out = static_cast<int8_t>(u_temp & std::numeric_limits<uint8_t>::max());
462       break;
463     }
464     case kInt16Type: {
465       i_out = static_cast<int16_t>(u_temp & std::numeric_limits<uint16_t>::max());
466       break;
467     }
468     case kInt32Type: {
469       i_out = static_cast<int32_t>(u_temp & std::numeric_limits<uint32_t>::max());
470       break;
471     }
472     case kInt64Type: {
473       i_out = static_cast<int64_t>(u_temp & std::numeric_limits<uint64_t>::max());
474       break;
475     }
476     default: {
477       i_out = 0;
478     }
479   }
480 
481   if (!dst_i_type) {
482     return i_out;
483   }
484 
485   if (i_out >= static_cast<int64_t>(std::numeric_limits<int8_t>::min()) &&
486       i_out <= static_cast<int64_t>(std::numeric_limits<int8_t>::max())) {
487     *dst_i_type = kInt8Type;
488   } else if (i_out >= static_cast<int64_t>(std::numeric_limits<int16_t>::min()) &&
489              i_out <= static_cast<int64_t>(std::numeric_limits<int16_t>::max())) {
490     *dst_i_type = kInt16Type;
491   } else if (i_out >= static_cast<int64_t>(std::numeric_limits<int32_t>::min()) &&
492              i_out <= static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
493     *dst_i_type = kInt32Type;
494   } else {
495     *dst_i_type = kInt64Type;
496   }
497   return i_out;
498 }
499 }  // namespace mindrecord
500 }  // namespace mindspore
501