• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/dataset.h"
18 #include <dirent.h>
19 #include <arpa/inet.h>
20 #include <map>
21 #include <iostream>
22 #include <fstream>
23 #include <memory>
24 #include "src/utils.h"
25 
26 #pragma pack(push, 1)
27 
28 typedef struct {  // Total: 54 bytes
29   uint16_t type;  // Magic identifier: 0x4d42
30   uint32_t size;
31   uint16_t reserved1;
32   uint16_t reserved2;
33   uint32_t offset;
34   uint32_t dib_header_size;
35   int32_t width;
36   int32_t height;
37   uint16_t channels;
38   uint16_t bits_per_pixel;
39   uint32_t compression;
40   uint32_t image_size_bytes;
41   int32_t x_resolution_ppm;
42   int32_t y_resolution_ppm;
43   uint32_t num_colors;
44   uint32_t important_colors;
45 } bmp_header;
46 
47 #pragma pack(pop)
48 
49 float CH_MEAN[3] = {0.485, 0.456, 0.406};
50 float CH_STD[3] = {0.229, 0.224, 0.225};
51 
52 using LabelId = std::map<std::string, int>;
53 constexpr int kClassNum = 10;
54 constexpr int kBGRDim = 2;
55 constexpr float kRGBMAX = 255.0f;
56 constexpr int kRGBDims = 3;
57 
ReadBitmapFile(const std::string & filename,size_t * size)58 static char *ReadBitmapFile(const std::string &filename, size_t *size) {
59   MS_ASSERT(size != nullptr);
60   *size = 0;
61   bmp_header bitmap_header;
62   std::ifstream ifs(filename);
63   if (!ifs.good() || !ifs.is_open()) {
64     std::cerr << "file: " << filename << " does not exist or failed to open";
65     return nullptr;
66   }
67 
68   ifs.read(reinterpret_cast<char *>(&bitmap_header), sizeof(bmp_header));
69   if (bitmap_header.type != 0x4D42) {
70     std::cerr << "file: " << filename << " magic number does not match BMP";
71     ifs.close();
72     return nullptr;
73   }
74 
75   ifs.seekg(bitmap_header.offset, std::ios::beg);
76 
77   unsigned char *bmp_image = reinterpret_cast<unsigned char *>(malloc(bitmap_header.image_size_bytes));
78   if (bmp_image == nullptr) {
79     ifs.close();
80     return nullptr;
81   }
82 
83   ifs.read(reinterpret_cast<char *>(bmp_image), bitmap_header.image_size_bytes);
84 
85   size_t buffer_size = bitmap_header.width * bitmap_header.height * kRGBDims;
86   float *hwc_bin_image = new (std::nothrow) float[buffer_size];
87   if (hwc_bin_image == nullptr) {
88     free(bmp_image);
89     ifs.close();
90     return nullptr;
91   }
92 
93   // swap the R and B values to get RGB (bitmap is BGR)
94   // swap columns (in BMP, first pixel is lower left one...)
95   const size_t channels = 3;
96   const size_t hStride = channels * bitmap_header.width;
97   const size_t height = bitmap_header.height;
98 
99   for (int h = 0; h < bitmap_header.height; h++) {
100     for (int w = 0; w < bitmap_header.width; w++) {
101       hwc_bin_image[h * hStride + w * channels + 0] =
102         (((static_cast<float>(bmp_image[(height - h - 1) * hStride + w * channels + kBGRDim])) / kRGBMAX) -
103          CH_MEAN[0]) /
104         CH_STD[0];
105       hwc_bin_image[h * hStride + w * channels + 1] =
106         (((static_cast<float>(bmp_image[(height - h - 1) * hStride + w * channels + 1])) / kRGBMAX) - CH_MEAN[1]) /
107         CH_STD[1];
108       hwc_bin_image[h * hStride + w * channels + kBGRDim] =
109         (((static_cast<float>(bmp_image[(height - h - 1) * hStride + w * channels + 0])) / kRGBMAX) -
110          CH_MEAN[kBGRDim]) /
111         CH_STD[kBGRDim];
112     }
113   }
114 
115   *size = buffer_size * sizeof(float);
116   free(bmp_image);
117   ifs.close();
118   char *ret_buf = reinterpret_cast<char *>(hwc_bin_image);
119   return ret_buf;
120 }
121 
ReadFile(const std::string & file,size_t * size)122 char *ReadFile(const std::string &file, size_t *size) {
123   MS_ASSERT(size != nullptr);
124   std::string realPath(file);
125   std::ifstream ifs(realPath);
126   if (!ifs.good()) {
127     std::cerr << "file: " << realPath << " does not exist";
128     return nullptr;
129   }
130 
131   if (!ifs.is_open()) {
132     std::cerr << "file: " << realPath << " open failed";
133     return nullptr;
134   }
135 
136   ifs.seekg(0, std::ios::end);
137   *size = ifs.tellg();
138   std::unique_ptr<char[]> buf(new (std::nothrow) char[*size]);
139   if (buf == nullptr) {
140     std::cerr << "malloc buf failed, file: " << realPath;
141     ifs.close();
142     return nullptr;
143   }
144 
145   ifs.seekg(0, std::ios::beg);
146   ifs.read(buf.get(), *size);
147   ifs.close();
148 
149   return buf.release();
150 }
151 
~DataSet()152 DataSet::~DataSet() {
153   for (auto itr = train_data_.begin(); itr != train_data_.end(); ++itr) {
154     auto ptr = std::get<0>(*itr);
155     delete[] ptr;
156   }
157   for (auto itr = test_data_.begin(); itr != test_data_.end(); ++itr) {
158     auto ptr = std::get<0>(*itr);
159     delete[] ptr;
160   }
161   for (auto itr = val_data_.begin(); itr != val_data_.end(); ++itr) {
162     auto ptr = std::get<0>(*itr);
163     delete[] ptr;
164   }
165 }
166 
Init(const std::string & data_base_directory,database_type type)167 int DataSet::Init(const std::string &data_base_directory, database_type type) {
168   InitializeBMPFoldersDatabase(data_base_directory);
169   return 0;
170 }
171 
InitializeBMPFoldersDatabase(std::string dpath)172 void DataSet::InitializeBMPFoldersDatabase(std::string dpath) {
173   size_t file_size = 0;
174   const int ratio = 5;
175   auto vec = ReadDir(dpath);
176   int running_index = 1;
177   for (const auto ft : vec) {
178     int label;
179     std::string file_name;
180     std::tie(label, file_name) = ft;
181     char *data = ReadBitmapFile(file_name, &file_size);
182     DataLabelTuple data_entry = std::make_tuple(data, label);
183     if ((expected_data_size_ == 0) || (file_size == expected_data_size_)) {
184       if (running_index % ratio == 0) {
185         val_data_.push_back(data_entry);
186       } else if (running_index % ratio == 1) {
187         test_data_.push_back(data_entry);
188       } else {
189         train_data_.push_back(data_entry);
190       }
191       running_index++;
192     }
193   }
194 }
195 
ReadDir(const std::string dpath)196 std::vector<FileTuple> DataSet::ReadDir(const std::string dpath) {
197   std::vector<FileTuple> vec;
198   struct dirent *entry = nullptr;
199   num_of_classes_ = kClassNum;
200   for (int class_id = 0; class_id < num_of_classes_; class_id++) {
201     std::string dirname = dpath + "/" + std::to_string(class_id);
202     DIR *dp = opendir(dirname.c_str());
203     if (dp != nullptr) {
204       while ((entry = readdir(dp))) {
205         std::string filename = dirname + "/" + entry->d_name;
206         if (filename.find(".bmp") != std::string::npos) {
207           FileTuple ft = make_tuple(class_id, filename);
208           vec.push_back(ft);
209         }
210       }
211       closedir(dp);
212     } else {
213       std::cerr << "open directory: " << dirname << " failed." << std::endl;
214     }
215   }
216   return vec;
217 }
218