1# Copyright 2019 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15""" 16Testing Decode op in DE 17""" 18import cv2 19import numpy as np 20 21import mindspore.dataset as ds 22import mindspore.dataset.vision.c_transforms as vision 23import mindspore.dataset.vision.py_transforms as py_vision 24from mindspore import log as logger 25from util import diff_mse 26 27DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] 28SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" 29 30 31def test_decode_op(): 32 """ 33 Test Decode op 34 """ 35 logger.info("test_decode_op") 36 37 # Decode with rgb format set to True 38 data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) 39 40 # Serialize and Load dataset requires using vision.Decode instead of vision.Decode(). 41 data1 = data1.map(operations=[vision.Decode(True)], input_columns=["image"]) 42 43 # Second dataset 44 data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) 45 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), 46 data2.create_dict_iterator(num_epochs=1, output_numpy=True)): 47 actual = item1["image"] 48 expected = cv2.imdecode(item2["image"], cv2.IMREAD_COLOR) 49 expected = cv2.cvtColor(expected, cv2.COLOR_BGR2RGB) 50 assert actual.shape == expected.shape 51 mse = diff_mse(actual, expected) 52 assert mse == 0 53 54 55def test_decode_op_tf_file_dataset(): 56 """ 57 Test Decode op with tf_file dataset 58 """ 59 logger.info("test_decode_op_tf_file_dataset") 60 61 # Decode with rgb format set to True 62 data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.FILES) 63 data1 = data1.map(operations=vision.Decode(True), input_columns=["image"]) 64 65 for item in data1.create_dict_iterator(num_epochs=1): 66 logger.info('decode == {}'.format(item['image'])) 67 68 # Second dataset 69 data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) 70 71 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), 72 data2.create_dict_iterator(num_epochs=1, output_numpy=True)): 73 actual = item1["image"] 74 expected = cv2.imdecode(item2["image"], cv2.IMREAD_COLOR) 75 expected = cv2.cvtColor(expected, cv2.COLOR_BGR2RGB) 76 assert actual.shape == expected.shape 77 mse = diff_mse(actual, expected) 78 assert mse == 0 79 80 81class ImageDataset: 82 def __init__(self, data_path, data_type="numpy"): 83 self.data = [data_path] 84 self.label = np.random.sample((1, 1)) 85 self.data_type = data_type 86 87 def __getitem__(self, index): 88 # use file open and read method 89 f = open(self.data[index], 'rb') 90 img_bytes = f.read() 91 f.close() 92 if self.data_type == "numpy": 93 img_bytes = np.frombuffer(img_bytes, dtype=np.uint8) 94 95 # return bytes directly 96 return (img_bytes, self.label[index]) 97 98 def __len__(self): 99 return len(self.data) 100 101 102def test_read_image_decode_op(): 103 data_path = "../data/dataset/testPK/data/class1/0.jpg" 104 dataset1 = ds.GeneratorDataset(ImageDataset(data_path, data_type="numpy"), ["data", "label"]) 105 dataset2 = ds.GeneratorDataset(ImageDataset(data_path, data_type="bytes"), ["data", "label"]) 106 decode_op = py_vision.Decode() 107 to_tensor = py_vision.ToTensor(output_type=np.int32) 108 dataset1 = dataset1.map(operations=[decode_op, to_tensor], input_columns=["data"]) 109 dataset2 = dataset2.map(operations=[decode_op, to_tensor], input_columns=["data"]) 110 111 for item1, item2 in zip(dataset1, dataset2): 112 assert np.count_nonzero(item1[0].asnumpy() - item2[0].asnumpy()) == 0 113 114 115if __name__ == "__main__": 116 test_decode_op() 117 test_decode_op_tf_file_dataset() 118 test_read_image_decode_op() 119