1# Copyright 2019 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15import mindspore.dataset as ds 16import mindspore.dataset.text as text 17import mindspore.dataset.vision.c_transforms as vision 18 19DATA_DIR = "../data/dataset/testVOC2012" 20IMAGE_ID = ["32", "33", "39", "42", "61", "63", "68", "121", "123", "129"] 21IMAGE_SHAPE = [2268, 2268, 2268, 2268, 642, 607, 561, 596, 612, 2268] 22TARGET_SHAPE = [680, 680, 680, 680, 642, 607, 561, 596, 612, 680] 23 24 25def test_voc_segmentation(): 26 data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True, extra_metadata=True) 27 data1 = data1.rename("_meta-filename", "filename") 28 num = 0 29 for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): 30 assert text.to_str(item["filename"]) == IMAGE_ID[num] 31 assert item["image"].shape[0] == IMAGE_SHAPE[num] 32 assert item["target"].shape[0] == TARGET_SHAPE[num] 33 num += 1 34 assert num == 10 35 36 37def test_voc_detection(): 38 data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True, extra_metadata=True) 39 data1 = data1.rename("_meta-filename", "filename") 40 num = 0 41 count = [0, 0, 0, 0, 0, 0] 42 for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): 43 assert text.to_str(item["filename"]) == IMAGE_ID[num] 44 assert item["image"].shape[0] == IMAGE_SHAPE[num] 45 for label in item["label"]: 46 count[label[0]] += 1 47 num += 1 48 assert num == 9 49 assert count == [3, 2, 1, 2, 4, 3] 50 51 52def test_voc_class_index(): 53 class_index = {'car': 0, 'cat': 1, 'train': 5} 54 data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", class_indexing=class_index, decode=True) 55 class_index1 = data1.get_class_indexing() 56 assert (class_index1 == {'car': 0, 'cat': 1, 'train': 5}) 57 data1 = data1.shuffle(4) 58 class_index2 = data1.get_class_indexing() 59 assert (class_index2 == {'car': 0, 'cat': 1, 'train': 5}) 60 num = 0 61 count = [0, 0, 0, 0, 0, 0] 62 for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): 63 for label in item["label"]: 64 count[label[0]] += 1 65 assert label[0] in (0, 1, 5) 66 num += 1 67 assert num == 6 68 assert count == [3, 2, 0, 0, 0, 3] 69 70 71def test_voc_get_class_indexing(): 72 data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", decode=True) 73 class_index1 = data1.get_class_indexing() 74 assert (class_index1 == {'car': 0, 'cat': 1, 'chair': 2, 'dog': 3, 'person': 4, 'train': 5}) 75 data1 = data1.shuffle(4) 76 class_index2 = data1.get_class_indexing() 77 assert (class_index2 == {'car': 0, 'cat': 1, 'chair': 2, 'dog': 3, 'person': 4, 'train': 5}) 78 num = 0 79 count = [0, 0, 0, 0, 0, 0] 80 for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): 81 for label in item["label"]: 82 count[label[0]] += 1 83 assert label[0] in (0, 1, 2, 3, 4, 5) 84 num += 1 85 assert num == 9 86 assert count == [3, 2, 1, 2, 4, 3] 87 88 89def test_voc_meta_column(): 90 # scenario one: output 2 columns if without rename meta column 91 data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True) 92 num = 0 93 for item in data1.create_tuple_iterator(): 94 assert len(item) == 2 95 num += 1 96 97 # scenario two: map input_columns == output_columns 98 def pyfunc1(img, label): 99 return img, label 100 101 data2 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True) 102 data2 = data2.map(operations=pyfunc1, input_columns=["image", "target"]) 103 data2 = data2.rename("_meta-filename", "filename") 104 num = 0 105 for item in data2.create_tuple_iterator(output_numpy=True): 106 assert text.to_str(item[2]) == IMAGE_ID[num] 107 num += 1 108 109 # scenario three: map input_columns != output_columns 110 def pyfunc2(img, label): 111 return img, img, label 112 113 data3 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True) 114 data3 = data3.map(operations=pyfunc2, input_columns=["image", "target"], output_columns=["img1", "img2", "label"], 115 column_order=["_meta-filename", "img1", "img2", "label"]) 116 data3 = data3.rename("_meta-filename", "filename") 117 num = 0 118 for item in data3.create_tuple_iterator(output_numpy=True): 119 assert text.to_str(item[0]) == IMAGE_ID[num] 120 num += 1 121 122 # scenario four: map input_columns != output_columns 123 def pyfunc3(img, label): 124 return img 125 126 data4 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True) 127 data4 = data4.map(operations=pyfunc3, input_columns=["image", "target"], output_columns=["img1"], 128 column_order=["_meta-filename", "img1"]) 129 data4 = data4.rename("_meta-filename", "filename") 130 num = 0 131 for item in data4.create_tuple_iterator(output_numpy=True): 132 assert text.to_str(item[0]) == IMAGE_ID[num] 133 num += 1 134 135 136def test_case_0(): 137 data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True) 138 139 resize_op = vision.Resize((224, 224)) 140 141 data1 = data1.map(operations=resize_op, input_columns=["image"]) 142 data1 = data1.map(operations=resize_op, input_columns=["target"]) 143 repeat_num = 4 144 data1 = data1.repeat(repeat_num) 145 batch_size = 2 146 data1 = data1.batch(batch_size, drop_remainder=True) 147 148 num = 0 149 for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): 150 num += 1 151 assert num == 20 152 153 154def test_case_1(): 155 data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", decode=True) 156 157 resize_op = vision.Resize((224, 224)) 158 159 data1 = data1.map(operations=resize_op, input_columns=["image"]) 160 repeat_num = 4 161 data1 = data1.repeat(repeat_num) 162 batch_size = 2 163 data1 = data1.batch(batch_size, drop_remainder=True, pad_info={}) 164 165 num = 0 166 for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): 167 num += 1 168 assert num == 18 169 170 171def test_case_2(): 172 data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True) 173 sizes = [0.5, 0.5] 174 randomize = False 175 dataset1, dataset2 = data1.split(sizes=sizes, randomize=randomize) 176 177 num_iter = 0 178 for _ in dataset1.create_dict_iterator(num_epochs=1, output_numpy=True): 179 num_iter += 1 180 assert num_iter == 5 181 182 num_iter = 0 183 for _ in dataset2.create_dict_iterator(num_epochs=1, output_numpy=True): 184 num_iter += 1 185 assert num_iter == 5 186 187 188def test_voc_exception(): 189 try: 190 data1 = ds.VOCDataset(DATA_DIR, task="InvalidTask", usage="train", decode=True) 191 for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): 192 pass 193 assert False 194 except ValueError: 195 pass 196 197 try: 198 data2 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", class_indexing={"cat": 0}, decode=True) 199 for _ in data2.create_dict_iterator(num_epochs=1, output_numpy=True): 200 pass 201 assert False 202 except ValueError: 203 pass 204 205 try: 206 data3 = ds.VOCDataset(DATA_DIR, task="Detection", usage="notexist", decode=True) 207 for _ in data3.create_dict_iterator(num_epochs=1, output_numpy=True): 208 pass 209 assert False 210 except ValueError: 211 pass 212 213 try: 214 data4 = ds.VOCDataset(DATA_DIR, task="Detection", usage="xmlnotexist", decode=True) 215 for _ in data4.create_dict_iterator(num_epochs=1, output_numpy=True): 216 pass 217 assert False 218 except RuntimeError: 219 pass 220 221 try: 222 data5 = ds.VOCDataset(DATA_DIR, task="Detection", usage="invalidxml", decode=True) 223 for _ in data5.create_dict_iterator(num_epochs=1, output_numpy=True): 224 pass 225 assert False 226 except RuntimeError: 227 pass 228 229 try: 230 data6 = ds.VOCDataset(DATA_DIR, task="Detection", usage="xmlnoobject", decode=True) 231 for _ in data6.create_dict_iterator(num_epochs=1, output_numpy=True): 232 pass 233 assert False 234 except RuntimeError: 235 pass 236 237 try: 238 data7 = ds.VOCDataset(DATA_DIR, task="Detection", usage="xmlinvalidbbox") 239 for _ in data7.create_dict_iterator(num_epochs=1): 240 pass 241 assert False 242 except RuntimeError as e: 243 assert "Invalid bndbox: {321, 121, 421, 120}" in str(e) 244 245 def exception_func(item): 246 raise Exception("Error occur!") 247 248 try: 249 data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False) 250 data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) 251 for _ in data.create_dict_iterator(output_numpy=True): 252 pass 253 assert False 254 except RuntimeError as e: 255 assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) 256 257 try: 258 data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False) 259 data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) 260 data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) 261 for _ in data.create_dict_iterator(output_numpy=True): 262 pass 263 assert False 264 except RuntimeError as e: 265 assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) 266 267 try: 268 data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False) 269 data = data.map(operations=exception_func, input_columns=["bbox"], num_parallel_workers=1) 270 for _ in data.create_dict_iterator(output_numpy=True): 271 pass 272 assert False 273 except RuntimeError as e: 274 assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) 275 276 try: 277 data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False) 278 data = data.map(operations=exception_func, input_columns=["difficult"], num_parallel_workers=1) 279 for _ in data.create_dict_iterator(output_numpy=True): 280 pass 281 assert False 282 except RuntimeError as e: 283 assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) 284 285 try: 286 data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False) 287 data = data.map(operations=exception_func, input_columns=["truncate"], num_parallel_workers=1) 288 for _ in data.create_dict_iterator(output_numpy=True): 289 pass 290 assert False 291 except RuntimeError as e: 292 assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) 293 294 try: 295 data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False) 296 data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) 297 for _ in data.create_dict_iterator(output_numpy=True): 298 pass 299 assert False 300 except RuntimeError as e: 301 assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) 302 303 try: 304 data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False) 305 data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) 306 data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) 307 for _ in data.create_dict_iterator(output_numpy=True): 308 pass 309 assert False 310 except RuntimeError as e: 311 assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) 312 313 try: 314 data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False) 315 data = data.map(operations=exception_func, input_columns=["target"], num_parallel_workers=1) 316 for _ in data.create_dict_iterator(output_numpy=True): 317 pass 318 assert False 319 except RuntimeError as e: 320 assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) 321 322 try: 323 data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False) 324 data = data.map(operations=vision.Decode(), input_columns=["target"], num_parallel_workers=1) 325 data = data.map(operations=exception_func, input_columns=["target"], num_parallel_workers=1) 326 for _ in data.create_dict_iterator(output_numpy=True): 327 pass 328 assert False 329 except RuntimeError as e: 330 assert "map operation: [PyFunc] failed. The corresponding data files" in str(e) 331 332 333def test_voc_num_classes(): 334 data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) 335 assert data1.num_classes() is None 336 337 class_index = {'car': 0, 'cat': 1, 'train': 5} 338 data2 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", class_indexing=class_index, decode=True) 339 assert data2.num_classes() is None 340 341 342if __name__ == '__main__': 343 test_voc_segmentation() 344 test_voc_detection() 345 test_voc_class_index() 346 test_voc_get_class_indexing() 347 test_voc_meta_column() 348 test_case_0() 349 test_case_1() 350 test_case_2() 351 test_voc_exception() 352 test_voc_num_classes() 353