• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15import mindspore.dataset as ds
16import mindspore.dataset.text as text
17import mindspore.dataset.vision.c_transforms as vision
18
19DATA_DIR = "../data/dataset/testVOC2012"
20IMAGE_ID = ["32", "33", "39", "42", "61", "63", "68", "121", "123", "129"]
21IMAGE_SHAPE = [2268, 2268, 2268, 2268, 642, 607, 561, 596, 612, 2268]
22TARGET_SHAPE = [680, 680, 680, 680, 642, 607, 561, 596, 612, 680]
23
24
25def test_voc_segmentation():
26    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True, extra_metadata=True)
27    data1 = data1.rename("_meta-filename", "filename")
28    num = 0
29    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
30        assert text.to_str(item["filename"]) == IMAGE_ID[num]
31        assert item["image"].shape[0] == IMAGE_SHAPE[num]
32        assert item["target"].shape[0] == TARGET_SHAPE[num]
33        num += 1
34    assert num == 10
35
36
37def test_voc_detection():
38    data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True, extra_metadata=True)
39    data1 = data1.rename("_meta-filename", "filename")
40    num = 0
41    count = [0, 0, 0, 0, 0, 0]
42    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
43        assert text.to_str(item["filename"]) == IMAGE_ID[num]
44        assert item["image"].shape[0] == IMAGE_SHAPE[num]
45        for label in item["label"]:
46            count[label[0]] += 1
47        num += 1
48    assert num == 9
49    assert count == [3, 2, 1, 2, 4, 3]
50
51
52def test_voc_class_index():
53    class_index = {'car': 0, 'cat': 1, 'train': 5}
54    data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", class_indexing=class_index, decode=True)
55    class_index1 = data1.get_class_indexing()
56    assert (class_index1 == {'car': 0, 'cat': 1, 'train': 5})
57    data1 = data1.shuffle(4)
58    class_index2 = data1.get_class_indexing()
59    assert (class_index2 == {'car': 0, 'cat': 1, 'train': 5})
60    num = 0
61    count = [0, 0, 0, 0, 0, 0]
62    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
63        for label in item["label"]:
64            count[label[0]] += 1
65            assert label[0] in (0, 1, 5)
66        num += 1
67    assert num == 6
68    assert count == [3, 2, 0, 0, 0, 3]
69
70
71def test_voc_get_class_indexing():
72    data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", decode=True)
73    class_index1 = data1.get_class_indexing()
74    assert (class_index1 == {'car': 0, 'cat': 1, 'chair': 2, 'dog': 3, 'person': 4, 'train': 5})
75    data1 = data1.shuffle(4)
76    class_index2 = data1.get_class_indexing()
77    assert (class_index2 == {'car': 0, 'cat': 1, 'chair': 2, 'dog': 3, 'person': 4, 'train': 5})
78    num = 0
79    count = [0, 0, 0, 0, 0, 0]
80    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
81        for label in item["label"]:
82            count[label[0]] += 1
83            assert label[0] in (0, 1, 2, 3, 4, 5)
84        num += 1
85    assert num == 9
86    assert count == [3, 2, 1, 2, 4, 3]
87
88
89def test_voc_meta_column():
90    # scenario one: output 2 columns if without rename meta column
91    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True)
92    num = 0
93    for item in data1.create_tuple_iterator():
94        assert len(item) == 2
95        num += 1
96
97    # scenario two: map input_columns == output_columns
98    def pyfunc1(img, label):
99        return img, label
100
101    data2 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True)
102    data2 = data2.map(operations=pyfunc1, input_columns=["image", "target"])
103    data2 = data2.rename("_meta-filename", "filename")
104    num = 0
105    for item in data2.create_tuple_iterator(output_numpy=True):
106        assert text.to_str(item[2]) == IMAGE_ID[num]
107        num += 1
108
109    # scenario three: map input_columns != output_columns
110    def pyfunc2(img, label):
111        return img, img, label
112
113    data3 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True)
114    data3 = data3.map(operations=pyfunc2, input_columns=["image", "target"], output_columns=["img1", "img2", "label"],
115                      column_order=["_meta-filename", "img1", "img2", "label"])
116    data3 = data3.rename("_meta-filename", "filename")
117    num = 0
118    for item in data3.create_tuple_iterator(output_numpy=True):
119        assert text.to_str(item[0]) == IMAGE_ID[num]
120        num += 1
121
122    # scenario four: map input_columns != output_columns
123    def pyfunc3(img, label):
124        return img
125
126    data4 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True, shuffle=False, extra_metadata=True)
127    data4 = data4.map(operations=pyfunc3, input_columns=["image", "target"], output_columns=["img1"],
128                      column_order=["_meta-filename", "img1"])
129    data4 = data4.rename("_meta-filename", "filename")
130    num = 0
131    for item in data4.create_tuple_iterator(output_numpy=True):
132        assert text.to_str(item[0]) == IMAGE_ID[num]
133        num += 1
134
135
136def test_case_0():
137    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True)
138
139    resize_op = vision.Resize((224, 224))
140
141    data1 = data1.map(operations=resize_op, input_columns=["image"])
142    data1 = data1.map(operations=resize_op, input_columns=["target"])
143    repeat_num = 4
144    data1 = data1.repeat(repeat_num)
145    batch_size = 2
146    data1 = data1.batch(batch_size, drop_remainder=True)
147
148    num = 0
149    for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
150        num += 1
151    assert num == 20
152
153
154def test_case_1():
155    data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", decode=True)
156
157    resize_op = vision.Resize((224, 224))
158
159    data1 = data1.map(operations=resize_op, input_columns=["image"])
160    repeat_num = 4
161    data1 = data1.repeat(repeat_num)
162    batch_size = 2
163    data1 = data1.batch(batch_size, drop_remainder=True, pad_info={})
164
165    num = 0
166    for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
167        num += 1
168    assert num == 18
169
170
171def test_case_2():
172    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", decode=True)
173    sizes = [0.5, 0.5]
174    randomize = False
175    dataset1, dataset2 = data1.split(sizes=sizes, randomize=randomize)
176
177    num_iter = 0
178    for _ in dataset1.create_dict_iterator(num_epochs=1, output_numpy=True):
179        num_iter += 1
180    assert num_iter == 5
181
182    num_iter = 0
183    for _ in dataset2.create_dict_iterator(num_epochs=1, output_numpy=True):
184        num_iter += 1
185    assert num_iter == 5
186
187
188def test_voc_exception():
189    try:
190        data1 = ds.VOCDataset(DATA_DIR, task="InvalidTask", usage="train", decode=True)
191        for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
192            pass
193        assert False
194    except ValueError:
195        pass
196
197    try:
198        data2 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", class_indexing={"cat": 0}, decode=True)
199        for _ in data2.create_dict_iterator(num_epochs=1, output_numpy=True):
200            pass
201        assert False
202    except ValueError:
203        pass
204
205    try:
206        data3 = ds.VOCDataset(DATA_DIR, task="Detection", usage="notexist", decode=True)
207        for _ in data3.create_dict_iterator(num_epochs=1, output_numpy=True):
208            pass
209        assert False
210    except ValueError:
211        pass
212
213    try:
214        data4 = ds.VOCDataset(DATA_DIR, task="Detection", usage="xmlnotexist", decode=True)
215        for _ in data4.create_dict_iterator(num_epochs=1, output_numpy=True):
216            pass
217        assert False
218    except RuntimeError:
219        pass
220
221    try:
222        data5 = ds.VOCDataset(DATA_DIR, task="Detection", usage="invalidxml", decode=True)
223        for _ in data5.create_dict_iterator(num_epochs=1, output_numpy=True):
224            pass
225        assert False
226    except RuntimeError:
227        pass
228
229    try:
230        data6 = ds.VOCDataset(DATA_DIR, task="Detection", usage="xmlnoobject", decode=True)
231        for _ in data6.create_dict_iterator(num_epochs=1, output_numpy=True):
232            pass
233        assert False
234    except RuntimeError:
235        pass
236
237    try:
238        data7 = ds.VOCDataset(DATA_DIR, task="Detection", usage="xmlinvalidbbox")
239        for _ in data7.create_dict_iterator(num_epochs=1):
240            pass
241        assert False
242    except RuntimeError as e:
243        assert "Invalid bndbox: {321, 121, 421, 120}" in str(e)
244
245    def exception_func(item):
246        raise Exception("Error occur!")
247
248    try:
249        data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False)
250        data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
251        for _ in data.create_dict_iterator(output_numpy=True):
252            pass
253        assert False
254    except RuntimeError as e:
255        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)
256
257    try:
258        data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False)
259        data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1)
260        data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
261        for _ in data.create_dict_iterator(output_numpy=True):
262            pass
263        assert False
264    except RuntimeError as e:
265        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)
266
267    try:
268        data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False)
269        data = data.map(operations=exception_func, input_columns=["bbox"], num_parallel_workers=1)
270        for _ in data.create_dict_iterator(output_numpy=True):
271            pass
272        assert False
273    except RuntimeError as e:
274        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)
275
276    try:
277        data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False)
278        data = data.map(operations=exception_func, input_columns=["difficult"], num_parallel_workers=1)
279        for _ in data.create_dict_iterator(output_numpy=True):
280            pass
281        assert False
282    except RuntimeError as e:
283        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)
284
285    try:
286        data = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False)
287        data = data.map(operations=exception_func, input_columns=["truncate"], num_parallel_workers=1)
288        for _ in data.create_dict_iterator(output_numpy=True):
289            pass
290        assert False
291    except RuntimeError as e:
292        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)
293
294    try:
295        data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False)
296        data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
297        for _ in data.create_dict_iterator(output_numpy=True):
298            pass
299        assert False
300    except RuntimeError as e:
301        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)
302
303    try:
304        data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False)
305        data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1)
306        data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
307        for _ in data.create_dict_iterator(output_numpy=True):
308            pass
309        assert False
310    except RuntimeError as e:
311        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)
312
313    try:
314        data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False)
315        data = data.map(operations=exception_func, input_columns=["target"], num_parallel_workers=1)
316        for _ in data.create_dict_iterator(output_numpy=True):
317            pass
318        assert False
319    except RuntimeError as e:
320        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)
321
322    try:
323        data = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False)
324        data = data.map(operations=vision.Decode(), input_columns=["target"], num_parallel_workers=1)
325        data = data.map(operations=exception_func, input_columns=["target"], num_parallel_workers=1)
326        for _ in data.create_dict_iterator(output_numpy=True):
327            pass
328        assert False
329    except RuntimeError as e:
330        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)
331
332
333def test_voc_num_classes():
334    data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True)
335    assert data1.num_classes() is None
336
337    class_index = {'car': 0, 'cat': 1, 'train': 5}
338    data2 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", class_indexing=class_index, decode=True)
339    assert data2.num_classes() is None
340
341
342if __name__ == '__main__':
343    test_voc_segmentation()
344    test_voc_detection()
345    test_voc_class_index()
346    test_voc_get_class_indexing()
347    test_voc_meta_column()
348    test_case_0()
349    test_case_1()
350    test_case_2()
351    test_voc_exception()
352    test_voc_num_classes()
353