1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/dataset.h"
18 #include <dirent.h>
19 #include <arpa/inet.h>
20 #include <map>
21 #include <iostream>
22 #include <fstream>
23 #include <memory>
24 #include "src/utils.h"
25
26 #pragma pack(push, 1)
27
28 typedef struct { // Total: 54 bytes
29 uint16_t type; // Magic identifier: 0x4d42
30 uint32_t size;
31 uint16_t reserved1;
32 uint16_t reserved2;
33 uint32_t offset;
34 uint32_t dib_header_size;
35 int32_t width;
36 int32_t height;
37 uint16_t channels;
38 uint16_t bits_per_pixel;
39 uint32_t compression;
40 uint32_t image_size_bytes;
41 int32_t x_resolution_ppm;
42 int32_t y_resolution_ppm;
43 uint32_t num_colors;
44 uint32_t important_colors;
45 } bmp_header;
46
47 #pragma pack(pop)
48
49 float CH_MEAN[3] = {0.485, 0.456, 0.406};
50 float CH_STD[3] = {0.229, 0.224, 0.225};
51
52 using LabelId = std::map<std::string, int>;
53 constexpr int kClassNum = 10;
54 constexpr int kBGRDim = 2;
55 constexpr float kRGBMAX = 255.0f;
56 constexpr int kRGBDims = 3;
57
ReadBitmapFile(const std::string & filename,size_t * size)58 static char *ReadBitmapFile(const std::string &filename, size_t *size) {
59 MS_ASSERT(size != nullptr);
60 *size = 0;
61 bmp_header bitmap_header;
62 std::ifstream ifs(filename);
63 if (!ifs.good() || !ifs.is_open()) {
64 std::cerr << "file: " << filename << " does not exist or failed to open";
65 return nullptr;
66 }
67
68 ifs.read(reinterpret_cast<char *>(&bitmap_header), sizeof(bmp_header));
69 if (bitmap_header.type != 0x4D42) {
70 std::cerr << "file: " << filename << " magic number does not match BMP";
71 ifs.close();
72 return nullptr;
73 }
74
75 ifs.seekg(bitmap_header.offset, std::ios::beg);
76
77 unsigned char *bmp_image = reinterpret_cast<unsigned char *>(malloc(bitmap_header.image_size_bytes));
78 if (bmp_image == nullptr) {
79 ifs.close();
80 return nullptr;
81 }
82
83 ifs.read(reinterpret_cast<char *>(bmp_image), bitmap_header.image_size_bytes);
84
85 size_t buffer_size = bitmap_header.width * bitmap_header.height * kRGBDims;
86 float *hwc_bin_image = new (std::nothrow) float[buffer_size];
87 if (hwc_bin_image == nullptr) {
88 free(bmp_image);
89 ifs.close();
90 return nullptr;
91 }
92
93 // swap the R and B values to get RGB (bitmap is BGR)
94 // swap columns (in BMP, first pixel is lower left one...)
95 const size_t channels = 3;
96 const size_t hStride = channels * bitmap_header.width;
97 const size_t height = bitmap_header.height;
98
99 for (int h = 0; h < bitmap_header.height; h++) {
100 for (int w = 0; w < bitmap_header.width; w++) {
101 hwc_bin_image[h * hStride + w * channels + 0] =
102 (((static_cast<float>(bmp_image[(height - h - 1) * hStride + w * channels + kBGRDim])) / kRGBMAX) -
103 CH_MEAN[0]) /
104 CH_STD[0];
105 hwc_bin_image[h * hStride + w * channels + 1] =
106 (((static_cast<float>(bmp_image[(height - h - 1) * hStride + w * channels + 1])) / kRGBMAX) - CH_MEAN[1]) /
107 CH_STD[1];
108 hwc_bin_image[h * hStride + w * channels + kBGRDim] =
109 (((static_cast<float>(bmp_image[(height - h - 1) * hStride + w * channels + 0])) / kRGBMAX) -
110 CH_MEAN[kBGRDim]) /
111 CH_STD[kBGRDim];
112 }
113 }
114
115 *size = buffer_size * sizeof(float);
116 free(bmp_image);
117 ifs.close();
118 char *ret_buf = reinterpret_cast<char *>(hwc_bin_image);
119 return ret_buf;
120 }
121
ReadFile(const std::string & file,size_t * size)122 char *ReadFile(const std::string &file, size_t *size) {
123 MS_ASSERT(size != nullptr);
124 std::string realPath(file);
125 std::ifstream ifs(realPath);
126 if (!ifs.good()) {
127 std::cerr << "file: " << realPath << " does not exist";
128 return nullptr;
129 }
130
131 if (!ifs.is_open()) {
132 std::cerr << "file: " << realPath << " open failed";
133 return nullptr;
134 }
135
136 ifs.seekg(0, std::ios::end);
137 *size = ifs.tellg();
138 std::unique_ptr<char[]> buf(new (std::nothrow) char[*size]);
139 if (buf == nullptr) {
140 std::cerr << "malloc buf failed, file: " << realPath;
141 ifs.close();
142 return nullptr;
143 }
144
145 ifs.seekg(0, std::ios::beg);
146 ifs.read(buf.get(), *size);
147 ifs.close();
148
149 return buf.release();
150 }
151
~DataSet()152 DataSet::~DataSet() {
153 for (auto itr = train_data_.begin(); itr != train_data_.end(); ++itr) {
154 auto ptr = std::get<0>(*itr);
155 delete[] ptr;
156 }
157 for (auto itr = test_data_.begin(); itr != test_data_.end(); ++itr) {
158 auto ptr = std::get<0>(*itr);
159 delete[] ptr;
160 }
161 for (auto itr = val_data_.begin(); itr != val_data_.end(); ++itr) {
162 auto ptr = std::get<0>(*itr);
163 delete[] ptr;
164 }
165 }
166
Init(const std::string & data_base_directory,database_type type)167 int DataSet::Init(const std::string &data_base_directory, database_type type) {
168 InitializeBMPFoldersDatabase(data_base_directory);
169 return 0;
170 }
171
InitializeBMPFoldersDatabase(std::string dpath)172 void DataSet::InitializeBMPFoldersDatabase(std::string dpath) {
173 size_t file_size = 0;
174 const int ratio = 5;
175 auto vec = ReadDir(dpath);
176 int running_index = 1;
177 for (const auto ft : vec) {
178 int label;
179 std::string file_name;
180 std::tie(label, file_name) = ft;
181 char *data = ReadBitmapFile(file_name, &file_size);
182 DataLabelTuple data_entry = std::make_tuple(data, label);
183 if ((expected_data_size_ == 0) || (file_size == expected_data_size_)) {
184 if (running_index % ratio == 0) {
185 val_data_.push_back(data_entry);
186 } else if (running_index % ratio == 1) {
187 test_data_.push_back(data_entry);
188 } else {
189 train_data_.push_back(data_entry);
190 }
191 running_index++;
192 }
193 }
194 }
195
ReadDir(const std::string dpath)196 std::vector<FileTuple> DataSet::ReadDir(const std::string dpath) {
197 std::vector<FileTuple> vec;
198 struct dirent *entry = nullptr;
199 num_of_classes_ = kClassNum;
200 for (int class_id = 0; class_id < num_of_classes_; class_id++) {
201 std::string dirname = dpath + "/" + std::to_string(class_id);
202 DIR *dp = opendir(dirname.c_str());
203 if (dp != nullptr) {
204 while ((entry = readdir(dp))) {
205 std::string filename = dirname + "/" + entry->d_name;
206 if (filename.find(".bmp") != std::string::npos) {
207 FileTuple ft = make_tuple(class_id, filename);
208 vec.push_back(ft);
209 }
210 }
211 closedir(dp);
212 } else {
213 std::cerr << "open directory: " << dirname << " failed." << std::endl;
214 }
215 }
216 return vec;
217 }
218