• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "debug/data_dump/npy_header.h"
18 
19 #include <unordered_map>
20 #include <utility>
21 #include <sstream>
22 
23 #include "mindspore/core/ir/dtype.h"
24 #include "mindspore/core/utils/log_adapter.h"
25 #include "mindspore/core/utils/convert_utils_base.h"
26 
27 namespace mindspore {
28 namespace {
29 // npy file header start information
30 const char kMagicPrefix[] = "\x93NUMPY";
31 // magical length include kMagicPrefix length and version length
32 const size_t kMagicLen = 6;
33 const size_t kArrayAlign = 64;
34 
35 // first: header_length_type, second: encoding_type
36 // header_length_type: 1 represents 2 bytes; 2 and 3 represents 4 bytes
37 // encoding_type: 1 and 2 represents 'latin1'; 3 represents 'utf8'
38 using version_type = std::pair<int, int>;
39 
40 // data type description
41 // byteorder char: '<' is little endian; '>' is big endian; '|' is ignore(no change to byte order)
42 // type char: 'b' represents bool; 'u' represents uint; 'i' represents int; 'f' represents float
43 struct DtypeDescr {
44   char byteorder;
45   char type;
46   size_t length;
47 
48   std::string str() const;
49 };
50 
51 // npy file header description, includes data type description, fortran_order and array shape
52 // fortran_order: true represents the array data Fortran-contiguous; false represents the array data C-contiguity
53 struct NpyHeader {
54  public:
55   DtypeDescr dtype_descr;
56   bool fortran_order;
57   ShapeVector shape;
58 
59   std::string str() const;
60 
61  private:
62   std::string fortran_order_to_str() const;
63   std::string shape_to_str() const;
64 };
65 
str() const66 std::string DtypeDescr::str() const {
67   std::ostringstream buffer;
68   buffer << "\'" << byteorder << type << length << "\'";
69   return buffer.str();
70 }
71 
str() const72 std::string NpyHeader::str() const {
73   const std::string first_field = "'descr': ";
74   const std::string second_field = "'fortran_order': ";
75   const std::string third_field = "'shape': ";
76   std::ostringstream buffer;
77   buffer << "{" << first_field << dtype_descr.str() << ", " << second_field << fortran_order_to_str() << ", "
78          << third_field << shape_to_str() << ", }";
79   return buffer.str();
80 }
81 
fortran_order_to_str() const82 std::string NpyHeader::fortran_order_to_str() const { return fortran_order ? "True" : "False"; }
83 
shape_to_str() const84 std::string NpyHeader::shape_to_str() const {
85   std::ostringstream buffer;
86   buffer << "(";
87   for (const auto i : shape) {
88     buffer << std::to_string(i) << ",";
89   }
90   buffer << ")";
91   return buffer.str();
92 }
93 
94 // dtype description corresponding to tensor type
95 const std::unordered_map<TypeId, DtypeDescr> type_desc_map = {
96   {kNumberTypeBool, DtypeDescr{'|', 'b', 1}},    {kNumberTypeInt8, DtypeDescr{'|', 'i', 1}},
97   {kNumberTypeInt16, DtypeDescr{'<', 'i', 2}},   {kNumberTypeInt32, DtypeDescr{'<', 'i', 4}},
98   {kNumberTypeInt64, DtypeDescr{'<', 'i', 8}},   {kNumberTypeUInt8, DtypeDescr{'|', 'u', 1}},
99   {kNumberTypeUInt16, DtypeDescr{'<', 'u', 2}},  {kNumberTypeUInt32, DtypeDescr{'<', 'u', 4}},
100   {kNumberTypeUInt64, DtypeDescr{'<', 'u', 8}},  {kNumberTypeFloat16, DtypeDescr{'<', 'f', 2}},
101   {kNumberTypeFloat32, DtypeDescr{'<', 'f', 4}}, {kNumberTypeFloat64, DtypeDescr{'<', 'f', 8}},
102 };
103 }  // namespace
104 
int_to_byte(size_t number,char * byte,size_t length)105 void int_to_byte(size_t number, char *byte, size_t length) {
106   const size_t byte_len = 8;
107   const size_t mask = 0xff;
108   for (size_t i = 0; i < length; i++) {
109     byte[i] = (number >> (i * byte_len)) & mask;
110   }
111 }
112 
GenerateNpyHeader(const ShapeVector & shape,TypeId type_id,bool fortran_order)113 std::string GenerateNpyHeader(const ShapeVector &shape, TypeId type_id, bool fortran_order) {
114   auto type_desc = type_desc_map.find(type_id);
115   if (type_desc == type_desc_map.end()) {
116     MS_LOG(INFO) << "Not support dump the " << TypeIdToType(type_id)->ToString() << " data to npy file.";
117     return std::string();
118   }
119 
120   NpyHeader npy_header{type_desc->second, fortran_order, shape};
121   std::string header_str = npy_header.str();
122   version_type version{1, 0};
123   const size_t header_len = header_str.length();
124   const size_t version_len = 2;
125   const size_t max_len = 65535;
126   size_t length_len = 2;
127   size_t total_len = kMagicLen + version_len + length_len + header_len + 1;
128   if (total_len > max_len) {
129     version = {2, 0};
130     length_len = 4;
131     total_len = kMagicLen + version_len + length_len + header_len + 1;
132   }
133 
134   const size_t pad_len = kArrayAlign - total_len % kArrayAlign;
135   const size_t padding_header_len = header_len + pad_len + 1;
136   const std::string padding(pad_len, ' ');
137   const std::string end_line = "\n";
138   char *length_byte = new char[length_len];
139   int_to_byte(padding_header_len, length_byte, length_len);
140 
141   std::ostringstream out;
142   (void)out.write(kMagicPrefix, SizeToLong(kMagicLen));
143   (void)out.put(version.first);
144   (void)out.put(version.second);
145   (void)out.write(length_byte, SizeToLong(length_len));
146   out << header_str << padding << end_line;
147   delete[] length_byte;
148   return out.str();
149 }
150 }  // namespace mindspore
151