• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15import copy
16import numpy as np
17import pytest
18
19import mindspore.common.dtype as mstype
20import mindspore.dataset as ds
21import mindspore.dataset.engine.iterators as it
22from mindspore import log as logger
23from mindspore import Tensor
24
25
26# Generate 1d int numpy array from 0 - 63
27def generator_1d():
28    for i in range(64):
29        yield (np.array([i]),)
30
31
32class DatasetGenerator:
33    def __init__(self):
34        pass
35
36    def __getitem__(self, item):
37        return (np.array([item]),)
38
39    def __len__(self):
40        return 10
41
42
43class DatasetGeneratorLarge:
44    def __init__(self):
45        self.data = np.array(range(4000))
46
47    def __getitem__(self, item):
48        return (self.data + item, self.data *10)
49
50    def __len__(self):
51        return 10
52
53
54def test_generator_0():
55    """
56    Test 1D Generator
57    """
58    logger.info("Test 1D Generator : 0 - 63")
59
60    # apply dataset operations
61    data1 = ds.GeneratorDataset(generator_1d, ["data"])
62
63    i = 0
64    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
65        golden = np.array([i])
66        np.testing.assert_array_equal(item["data"], golden)
67        i = i + 1
68
69
70# Generate md int numpy array from [[0, 1], [2, 3]] to [[63, 64], [65, 66]]
71def generator_md():
72    for i in range(64):
73        yield (np.array([[i, i + 1], [i + 2, i + 3]]),)
74
75
76def test_generator_1():
77    """
78    Test MD Generator
79    """
80    logger.info("Test MD Generator : 0 - 63, with shape [2, 2]")
81
82    # apply dataset operations
83    data1 = ds.GeneratorDataset(generator_md, ["data"])
84
85    i = 0
86    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
87        golden = np.array([[i, i + 1], [i + 2, i + 3]])
88        np.testing.assert_array_equal(item["data"], golden)
89        i = i + 1
90
91
92# Generate two columns, the first column is from Generator1D, the second column is from GeneratorMD
93def generator_mc(maxid=64):
94    for i in range(maxid):
95        yield (np.array([i]), np.array([[i, i + 1], [i + 2, i + 3]]))
96
97
98def test_generator_2():
99    """
100    Test multi column generator
101    """
102    logger.info("Test multi column generator")
103
104    # apply dataset operations
105    data1 = ds.GeneratorDataset(generator_mc, ["col0", "col1"])
106
107    i = 0
108    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
109        golden = np.array([i])
110        np.testing.assert_array_equal(item["col0"], golden)
111        golden = np.array([[i, i + 1], [i + 2, i + 3]])
112        np.testing.assert_array_equal(item["col1"], golden)
113        i = i + 1
114
115
116def test_generator_3():
117    """
118    Test 1D Generator + repeat(4)
119    """
120    logger.info("Test 1D Generator : 0 - 63 + Repeat(4)")
121
122    # apply dataset operations
123    data1 = ds.GeneratorDataset(generator_1d, ["data"])
124
125    data1 = data1.repeat(4)
126
127    i = 0
128    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
129        golden = np.array([i])
130        np.testing.assert_array_equal(item["data"], golden)
131        i = i + 1
132        if i == 64:
133            i = 0
134
135
136def test_generator_4():
137    """
138    Test fixed size 1D Generator + batch
139    """
140    logger.info("Test 1D Generator : 0 - 63 + batch(4)")
141
142    # apply dataset operations
143    data1 = ds.GeneratorDataset(generator_1d, ["data"])
144
145    data1 = data1.batch(4)
146
147    i = 0
148    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
149        golden = np.array([[i], [i + 1], [i + 2], [i + 3]])
150        np.testing.assert_array_equal(item["data"], golden)
151        i = i + 4
152
153
154def generator_with_type(t):
155    for i in range(64):
156        yield (np.array([i], dtype=t),)
157
158
159def type_tester(t):
160    logger.info("Test with Type {}".format(t.__name__))
161
162    # apply dataset operations
163    data1 = ds.GeneratorDataset((lambda: generator_with_type(t)), ["data"])
164
165    data1 = data1.batch(4)
166
167    i = 0
168    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
169        golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t)
170        np.testing.assert_array_equal(item["data"], golden)
171        i = i + 4
172
173
174def test_generator_5():
175    """
176    Test 1D Generator on different data type
177    """
178    logger.info("Test 1D Generator on all data types")
179
180    types = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float32, np.float64]
181
182    for t in types:
183        type_tester(t)
184
185
186def type_tester_with_type_check(t, c):
187    logger.info("Test with Type {}".format(t.__name__))
188
189    # apply dataset operations
190    data1 = ds.GeneratorDataset((lambda: generator_with_type(t)), ["data"], column_types=[c])
191
192    data1 = data1.batch(4)
193
194    i = 0
195    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
196        golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t)
197        np.testing.assert_array_equal(item["data"], golden)
198        i = i + 4
199
200
201def test_generator_6():
202    """
203    Test 1D Generator on different data type with type check
204    """
205    logger.info("Test 1D Generator on all data types with type check")
206
207    np_types = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float32,
208                np.float64]
209    de_types = [mstype.int8, mstype.int16, mstype.int32, mstype.int64, mstype.uint8, mstype.uint16, mstype.uint32,
210                mstype.uint64, mstype.float32, mstype.float64]
211
212    for i, _ in enumerate(np_types):
213        type_tester_with_type_check(np_types[i], de_types[i])
214
215
216def generator_with_type_2c(t):
217    for i in range(64):
218        yield (np.array([i], dtype=t), np.array([i], dtype=t))
219
220
221def type_tester_with_type_check_2c(t, c):
222    logger.info("Test with Type {}".format(t.__name__))
223
224    # apply dataset operations
225    data1 = ds.GeneratorDataset((lambda: generator_with_type_2c(t)), ["data0", "data1"], column_types=c)
226
227    data1 = data1.batch(4)
228
229    i = 0
230    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
231        golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t)
232        np.testing.assert_array_equal(item["data0"], golden)
233        i = i + 4
234
235
236def test_generator_7():
237    """
238    Test 2 column Generator on different data type with type check
239    """
240    logger.info("Test 2 column Generator on all data types with type check")
241
242    np_types = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float32,
243                np.float64]
244    de_types = [mstype.int8, mstype.int16, mstype.int32, mstype.int64, mstype.uint8, mstype.uint16, mstype.uint32,
245                mstype.uint64, mstype.float32, mstype.float64]
246
247    for i, _ in enumerate(np_types):
248        type_tester_with_type_check_2c(np_types[i], [None, de_types[i]])
249
250
251def test_generator_8():
252    """
253    Test multi column generator with few mapops
254    """
255    logger.info("Test multi column generator with mapops to check the order too")
256
257    # apply dataset operations
258    data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
259    data1 = data1.map(operations=(lambda x: x * 3), input_columns="col0", output_columns="out0",
260                      num_parallel_workers=2)
261    data1 = data1.map(operations=(lambda x: (x * 7, x)), input_columns="col1", output_columns=["out1", "out2"],
262                      num_parallel_workers=2, column_order=["out0", "out1", "out2"])
263    data1 = data1.map(operations=(lambda x: x + 1), input_columns="out2", output_columns="out2",
264                      num_parallel_workers=2)
265
266    i = 0
267    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
268        golden = np.array([i * 3])
269        np.testing.assert_array_equal(item["out0"], golden)
270        golden = np.array([[i * 7, (i + 1) * 7], [(i + 2) * 7, (i + 3) * 7]])
271        np.testing.assert_array_equal(item["out1"], golden)
272        golden = np.array([[i + 1, i + 2], [i + 3, i + 4]])
273        np.testing.assert_array_equal(item["out2"], golden)
274        i = i + 1
275
276
277def test_generator_9():
278    """
279    Test map column order when len(input_columns) == len(output_columns).
280    """
281    logger.info("Test map column order when len(input_columns) == len(output_columns).")
282
283    # apply dataset operations
284    data1 = ds.GeneratorDataset(generator_mc(2048), ["image", "label"])
285    data2 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
286    data1 = data1.map(operations=(lambda x: x * 3), input_columns="label",
287                      num_parallel_workers=4)
288    data2 = data2.map(operations=(lambda x: x * 3), input_columns="label",
289                      num_parallel_workers=4)
290
291    # Expected column order is not changed.
292    # data1 = data[0] is "image" and data[1] is "label"
293    # data2 = data[0] is "label" and data[1] is "image"
294    i = 0
295    for data1, data2 in zip(data1, data2):  # each data is a dictionary
296        golden = np.array([i])
297        np.testing.assert_array_equal(data1[0].asnumpy(), golden)
298        golden = np.array([[i * 3, (i + 1) * 3], [(i + 2) * 3, (i + 3) * 3]])
299        np.testing.assert_array_equal(data1[1].asnumpy(), golden)
300
301        golden = np.array([i * 3])
302        np.testing.assert_array_equal(data2[0].asnumpy(), golden)
303        golden = np.array([[i, i + 1], [i + 2, i + 3]])
304        np.testing.assert_array_equal(data2[1].asnumpy(), golden)
305        i = i + 1
306
307
308def test_generator_10():
309    """
310    Test map column order when len(input_columns) != len(output_columns).
311    """
312    logger.info("Test map column order when len(input_columns) != len(output_columns).")
313
314    # apply dataset operations
315    data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
316    data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"],
317                      column_order=['col0', 'out1', 'out2'], num_parallel_workers=2)
318
319    # Expected column order is |col0|out1|out2|
320    i = 0
321    for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
322        golden = np.array([i])
323        np.testing.assert_array_equal(item[0], golden)
324        golden = np.array([[i, i + 1], [i + 2, i + 3]])
325        np.testing.assert_array_equal(item[1], golden)
326        golden = np.array([[i * 5, (i + 1) * 5], [(i + 2) * 5, (i + 3) * 5]])
327        np.testing.assert_array_equal(item[2], golden)
328        i = i + 1
329
330
331def test_generator_11():
332    """
333    Test map column order when len(input_columns) != len(output_columns).
334    """
335    logger.info("Test map column order when len(input_columns) != len(output_columns), "
336                "and column_order drops some columns.")
337
338    # apply dataset operations
339    data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
340    data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"],
341                      column_order=['out1', 'out2'], num_parallel_workers=2)
342
343    # Expected column order is |out1|out2|
344    i = 0
345    for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
346        # len should be 2 because col0 is dropped (not included in column_order)
347        assert len(item) == 2
348        golden = np.array([[i, i + 1], [i + 2, i + 3]])
349        np.testing.assert_array_equal(item[0], golden)
350        golden = np.array([[i * 5, (i + 1) * 5], [(i + 2) * 5, (i + 3) * 5]])
351        np.testing.assert_array_equal(item[1], golden)
352        i = i + 1
353
354
355def test_generator_12():
356    """
357    Test map column order when input_columns and output_columns are None.
358    """
359    logger.info("Test map column order when input_columns and output_columns are None.")
360
361    # apply dataset operations
362    data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
363    data1 = data1.map(operations=(lambda x: (x * 5)), num_parallel_workers=2)
364
365    # Expected column order is |col0|col1|
366    i = 0
367    for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
368        assert len(item) == 2
369        golden = np.array([i * 5])
370        np.testing.assert_array_equal(item[0], golden)
371        golden = np.array([[i, i + 1], [i + 2, i + 3]])
372        np.testing.assert_array_equal(item[1], golden)
373        i = i + 1
374
375    data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
376    data1 = data1.map(operations=(lambda x: (x * 5)), column_order=["col1", "col0"], num_parallel_workers=2)
377
378    # Expected column order is |col0|col1|
379    i = 0
380    for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
381        assert len(item) == 2
382        golden = np.array([i * 5])
383        np.testing.assert_array_equal(item[1], golden)
384        golden = np.array([[i, i + 1], [i + 2, i + 3]])
385        np.testing.assert_array_equal(item[0], golden)
386        i = i + 1
387
388
389def test_generator_13():
390    """
391    Test map column order when input_columns is None.
392    """
393    logger.info("Test map column order when input_columns is None.")
394
395    # apply dataset operations
396    data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"])
397    data1 = data1.map(operations=(lambda x: (x * 5)), output_columns=["out0"], num_parallel_workers=2)
398
399    # Expected column order is |out0|col1|
400    i = 0
401    for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
402        assert len(item) == 2
403        golden = np.array([i * 5])
404        np.testing.assert_array_equal(item[0], golden)
405        golden = np.array([[i, i + 1], [i + 2, i + 3]])
406        np.testing.assert_array_equal(item[1], golden)
407        i = i + 1
408
409    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
410        # len should be 2 because col0 is dropped (not included in column_order)
411        assert len(item) == 2
412        golden = np.array([i * 5])
413        np.testing.assert_array_equal(item["out0"], golden)
414        golden = np.array([[i, i + 1], [i + 2, i + 3]])
415        np.testing.assert_array_equal(item["col1"], golden)
416        i = i + 1
417
418
419def test_generator_14():
420    """
421    Test 1D Generator MP + CPP sampler
422    """
423    logger.info("Test 1D Generator MP : 0 - 63")
424    # Sometimes there are some ITERATORS left in ITERATORS_LIST when run all UTs together,
425    # and cause core dump and blocking in this UT. Add cleanup() here to fix it.
426    it._cleanup()  # pylint: disable=W0212
427
428    ##reduce memory needed by reducing queue size
429    prefetch_original = ds.config.get_prefetch_size()
430    ds.config.set_prefetch_size(1)
431
432    source = [(np.array([x]),) for x in range(256)]
433    ds1 = ds.GeneratorDataset(source, ["data"], sampler=ds.SequentialSampler(),
434                              num_parallel_workers=4, max_rowsize=1).repeat(2)
435    i = 0
436    for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
437        golden = np.array([i])
438        np.testing.assert_array_equal(data["data"], golden)
439        i = i + 1
440        if i == 256:
441            i = 0
442
443    ds.config.set_prefetch_size(prefetch_original)
444
445def test_generator_15():
446    """
447    Test 1D Generator MP + Python sampler
448    """
449    logger.info("Test 1D Generator MP : 0 - 63")
450
451    ##reduce memory needed by reducing queue size
452    prefetch_original = ds.config.get_prefetch_size()
453    ds.config.set_prefetch_size(1)
454
455    sampler = [x for x in range(256)]
456    source = [(np.array([x]),) for x in range(256)]
457    ds1 = ds.GeneratorDataset(source, ["data"], sampler=sampler,
458                              num_parallel_workers=4, max_rowsize=1).repeat(1)
459    i = 0
460    for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
461        golden = np.array([i])
462        np.testing.assert_array_equal(data["data"], golden)
463        i = i + 1
464        if i == 256:
465            i = 0
466
467    ds.config.set_prefetch_size(prefetch_original)
468
469def test_generator_16():
470    """
471    Test multi column generator Mp + CPP sampler
472    """
473    logger.info("Test multi column generator")
474
475    source = [(np.array([x]), np.array([x + 1])) for x in range(256)]
476    # apply dataset operations
477    data1 = ds.GeneratorDataset(source, ["col0", "col1"], sampler=ds.SequentialSampler())
478
479    i = 0
480    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
481        golden = np.array([i])
482        np.testing.assert_array_equal(item["col0"], golden)
483        golden = np.array([i + 1])
484        np.testing.assert_array_equal(item["col1"], golden)
485        i = i + 1
486
487
488def test_generator_17():
489    """
490    Test multi column generator Mp + Python sampler
491    """
492    logger.info("Test multi column generator")
493
494    sampler = [x for x in range(256)]
495    source = [(np.array([x]), np.array([x + 1])) for x in range(256)]
496    # apply dataset operations
497    data1 = ds.GeneratorDataset(source, ["col0", "col1"], sampler=sampler)
498
499    i = 0
500    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
501        golden = np.array([i])
502        np.testing.assert_array_equal(item["col0"], golden)
503        golden = np.array([i + 1])
504        np.testing.assert_array_equal(item["col1"], golden)
505        i = i + 1
506
507
508def test_generator_18():
509    """
510    Test multiprocessing flag (same as test 13 with python_multiprocessing=True flag)
511    """
512    logger.info("Test map column order when input_columns is None.")
513
514    #reduce shm usage by disabling this optimization
515    mem_original = ds.config.get_enable_shared_mem()
516    ds.config.set_enable_shared_mem(False)
517
518    # apply dataset operations
519    data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"], python_multiprocessing=True)
520    data1 = data1.map(operations=(lambda x: (x * 5)), output_columns=["out0"], num_parallel_workers=2,
521                      python_multiprocessing=True)
522
523    # Expected column order is |out0|col1|
524    i = 0
525    for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
526        assert len(item) == 2
527        golden = np.array([i * 5])
528        np.testing.assert_array_equal(item[0], golden)
529        golden = np.array([[i, i + 1], [i + 2, i + 3]])
530        np.testing.assert_array_equal(item[1], golden)
531        i = i + 1
532
533    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
534        # len should be 2 because col0 is dropped (not included in column_order)
535        assert len(item) == 2
536        golden = np.array([i * 5])
537        np.testing.assert_array_equal(item["out0"], golden)
538
539    ds.config.set_enable_shared_mem(mem_original)
540
541def test_generator_19():
542    """
543    Test multiprocessing flag with 2 different large columns
544    """
545    logger.info("Test map column order when input_columns is None.")
546
547    # apply dataset operations
548    data1 = ds.GeneratorDataset(DatasetGeneratorLarge(), ["col0", "col1"], python_multiprocessing=True, shuffle=False)
549
550    # Expected column order is |out0|col1|
551    i = 0
552    for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
553        assert len(item) == 2
554        golden = np.array(range(4000)) + i
555        np.testing.assert_array_equal(item[0], golden)
556        golden = np.array(range(4000)) * 10
557        np.testing.assert_array_equal(item[1], golden)
558        i = i + 1
559
560
561class RandomAccessDataset:
562    def __init__(self):
563        self.__data = np.random.sample((5, 1))
564
565    def __getitem__(self, item):
566        return self.__data[item]
567
568    def __len__(self):
569        return 5
570
571
572class RandomAccessDatasetWithoutLen:
573    def __init__(self):
574        self.__data = np.random.sample((5, 1))
575
576    def __getitem__(self, item):
577        return self.__data[item]
578
579
580class IterableDataset:
581    def __init__(self):
582        self.count = 0
583        self.max = 10
584
585    def __iter__(self):
586        return self
587
588    def __next__(self):
589        if self.count >= self.max:
590            raise StopIteration
591        self.count += 1
592        return (np.array(self.count),)
593
594
595def test_generator_20():
596    """
597    Test mappable and unmappable dataset as source for GeneratorDataset.
598    """
599    logger.info("Test mappable and unmappable dataset as source for GeneratorDataset.")
600
601    # Mappable dataset
602    data1 = ds.GeneratorDataset(RandomAccessDataset(), ["col0"])
603    dataset_size1 = data1.get_dataset_size()
604    assert dataset_size1 == 5
605
606    # Mappable dataset without __len__
607    data2 = ds.GeneratorDataset(RandomAccessDatasetWithoutLen(), ["col0"])
608    try:
609        data2.get_dataset_size()
610    except RuntimeError as e:
611        assert "'__len__' method is required" in str(e)
612
613    # Unmappable dataset
614    data3 = ds.GeneratorDataset(IterableDataset(), ["col0"])
615    dataset_size3 = data3.get_dataset_size()
616    assert dataset_size3 == 10
617
618
619def test_generator_error_1():
620    def generator_np():
621        for i in range(64):
622            yield (np.array([{i}]),)
623
624    with pytest.raises(RuntimeError) as info:
625        data1 = ds.GeneratorDataset(generator_np, ["data"])
626        for _ in data1:
627            pass
628    assert "Invalid data type" in str(info.value)
629
630
631def test_generator_error_2():
632    def generator_np():
633        for i in range(64):
634            yield ({i},)
635
636    with pytest.raises(RuntimeError) as info:
637        data1 = ds.GeneratorDataset(generator_np, ["data"])
638        for _ in data1:
639            pass
640    print("========", str(info.value))
641    assert "Generator should return a tuple of NumPy arrays" in str(info.value)
642
643
644def test_generator_error_3():
645    with pytest.raises(ValueError) as info:
646        # apply dataset operations
647        data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
648        data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns=["label"], output_columns=["out1", "out2"],
649                          num_parallel_workers=2)
650
651        for _ in data1:
652            pass
653    assert "When length of input_columns and output_columns are not equal, column_order must be specified." in \
654           str(info.value)
655
656
657def test_generator_error_4():
658    with pytest.raises(RuntimeError) as info:
659        # apply dataset operations
660        data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"])
661        data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns=["label"],
662                          num_parallel_workers=2)
663
664        for _ in data1:
665            pass
666    assert "Unexpected error. Result of a tensorOp doesn't match output column names" in str(info.value)
667
668
669def test_generator_sequential_sampler():
670    source = [(np.array([x]),) for x in range(64)]
671    ds1 = ds.GeneratorDataset(source, ["data"], sampler=ds.SequentialSampler())
672    i = 0
673    for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
674        golden = np.array([i])
675        np.testing.assert_array_equal(data["data"], golden)
676        i = i + 1
677
678
679def test_generator_random_sampler():
680    source = [(np.array([x]),) for x in range(64)]
681    ds1 = ds.GeneratorDataset(source, ["data"], shuffle=True)
682    for _ in ds1.create_dict_iterator(num_epochs=1):  # each data is a dictionary
683        pass
684
685
686def test_generator_distributed_sampler():
687    source = [(np.array([x]),) for x in range(64)]
688    for sid in range(8):
689        ds1 = ds.GeneratorDataset(source, ["data"], shuffle=False, num_shards=8, shard_id=sid)
690        i = sid
691        for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
692            golden = np.array([i])
693            np.testing.assert_array_equal(data["data"], golden)
694            i = i + 8
695
696
697def test_generator_num_samples():
698    source = [(np.array([x]),) for x in range(64)]
699    num_samples = 32
700    ds1 = ds.GeneratorDataset(source, ["data"], sampler=ds.SequentialSampler(num_samples=num_samples))
701    ds2 = ds.GeneratorDataset(source, ["data"], sampler=[i for i in range(32)], num_samples=num_samples)
702    ds3 = ds.GeneratorDataset(generator_1d, ["data"], num_samples=num_samples)
703
704    count = 0
705    for _ in ds1.create_dict_iterator(num_epochs=1):
706        count = count + 1
707    assert count == num_samples
708
709    count = 0
710    for _ in ds2.create_dict_iterator(num_epochs=1):
711        count = count + 1
712    assert count == num_samples
713
714    count = 0
715    for _ in ds3.create_dict_iterator(num_epochs=1):
716        count = count + 1
717    assert count == num_samples
718
719
720def test_generator_num_samples_underflow():
721    source = [(np.array([x]),) for x in range(64)]
722    num_samples = 256
723    ds2 = ds.GeneratorDataset(source, ["data"], sampler=[i for i in range(64)], num_samples=num_samples)
724    ds3 = ds.GeneratorDataset(generator_1d, ["data"], num_samples=num_samples)
725
726    count = 0
727    for _ in ds2.create_dict_iterator(num_epochs=1):
728        count = count + 1
729    assert count == 64
730
731    count = 0
732    for _ in ds3.create_dict_iterator(num_epochs=1):
733        count = count + 1
734    assert count == 64
735
736
737def type_tester_with_type_check_2c_schema(t, c):
738    logger.info("Test with Type {}".format(t.__name__))
739
740    schema = ds.Schema()
741    schema.add_column("data0", c[0])
742    schema.add_column("data1", c[1])
743
744    # apply dataset operations
745    data1 = ds.GeneratorDataset((lambda: generator_with_type_2c(t)), schema=schema)
746
747    data1 = data1.batch(4)
748
749    i = 0
750    for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
751        golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t)
752        np.testing.assert_array_equal(item["data0"], golden)
753        i = i + 4
754
755
756def test_generator_schema():
757    """
758    Test 2 column Generator on different data type with type check with schema input
759    """
760    logger.info("Test 2 column Generator on all data types with type check")
761
762    np_types = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float32,
763                np.float64]
764    de_types = [mstype.int8, mstype.int16, mstype.int32, mstype.int64, mstype.uint8, mstype.uint16, mstype.uint32,
765                mstype.uint64, mstype.float32, mstype.float64]
766
767    for i, _ in enumerate(np_types):
768        type_tester_with_type_check_2c_schema(np_types[i], [de_types[i], de_types[i]])
769
770
771def test_generator_dataset_size_0():
772    """
773    Test GeneratorDataset get_dataset_size by iterator method.
774    """
775    logger.info("Test 1D Generator : 0 - 63 get_dataset_size")
776
777    data1 = ds.GeneratorDataset(generator_1d, ["data"])
778    data_size = data1.get_dataset_size()
779
780    num_rows = 0
781    for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):  # each data is a dictionary
782        num_rows = num_rows + 1
783    assert data_size == num_rows
784
785
786def test_generator_dataset_size_1():
787    """
788    Test GeneratorDataset get_dataset_size by __len__ method.
789    """
790    logger.info("Test DatasetGenerator get_dataset_size")
791
792    dataset_generator = DatasetGenerator()
793    data1 = ds.GeneratorDataset(dataset_generator, ["data"])
794
795    data_size = data1.get_dataset_size()
796
797    num_rows = 0
798    for _ in data1.create_dict_iterator(num_epochs=1):
799        num_rows = num_rows + 1
800    assert data_size == num_rows
801
802
803def test_generator_dataset_size_2():
804    """
805    Test GeneratorDataset + repeat get_dataset_size
806    """
807    logger.info("Test 1D Generator + repeat get_dataset_size")
808
809    data1 = ds.GeneratorDataset(generator_1d, ["data"])
810    data1 = data1.repeat(2)
811
812    data_size = data1.get_dataset_size()
813
814    num_rows = 0
815    for _ in data1.create_dict_iterator(num_epochs=1):
816        num_rows = num_rows + 1
817    assert data_size == num_rows
818
819
820def test_generator_dataset_size_3():
821    """
822    Test GeneratorDataset + batch get_dataset_size
823    """
824    logger.info("Test 1D Generator + batch get_dataset_size")
825
826    data1 = ds.GeneratorDataset(generator_1d, ["data"])
827    data1 = data1.batch(4)
828
829    data_size = data1.get_dataset_size()
830
831    num_rows = 0
832    for _ in data1.create_dict_iterator(num_epochs=1):
833        num_rows += 1
834    assert data_size == num_rows
835
836
837def test_generator_dataset_size_4():
838    """
839    Test GeneratorDataset + num_shards
840    """
841    logger.info("Test 1D Generator : 0 - 63 + num_shards get_dataset_size")
842
843    dataset_generator = DatasetGenerator()
844    data1 = ds.GeneratorDataset(dataset_generator, ["data"], num_shards=3, shard_id=0)
845    data_size = data1.get_dataset_size()
846
847    num_rows = 0
848    for _ in data1.create_dict_iterator(num_epochs=1):  # each data is a dictionary
849        num_rows = num_rows + 1
850    assert data_size == num_rows
851
852
853def test_generator_dataset_size_5():
854    """
855    Test get_dataset_size after create_dict_iterator
856    """
857    logger.info("Test get_dataset_size after create_dict_iterator")
858
859    dataset_generator = DatasetGenerator()
860    data1 = ds.GeneratorDataset(dataset_generator, ["data"], num_shards=3, shard_id=0)
861
862    num_rows = 0
863    for _ in data1.create_dict_iterator(num_epochs=1):  # each data is a dictionary
864        num_rows = num_rows + 1
865    data_size = data1.get_dataset_size()
866    assert data_size == num_rows
867
868
869def manual_test_generator_keyboard_interrupt():
870    """
871    Test keyboard_interrupt
872    """
873    logger.info("Test 1D Generator MP : 0 - 63")
874
875    class MyDS():
876        def __getitem__(self, item):
877            while True:
878                pass
879
880        def __len__(self):
881            return 1024
882
883    ds1 = ds.GeneratorDataset(MyDS(), ["data"], num_parallel_workers=4).repeat(2)
884    for _ in ds1.create_dict_iterator(num_epochs=1):  # each data is a dictionary
885        pass
886
887
888def test_explicit_deepcopy():
889    """
890    Test explicit_deepcopy
891    """
892    logger.info("Test explicit_deepcopy")
893
894    ds1 = ds.NumpySlicesDataset([1, 2], shuffle=False)
895    ds2 = copy.deepcopy(ds1)
896    for d1, d2 in zip(ds1, ds2):
897        assert d1 == d2
898
899def test_func_generator_dataset_005():
900    """
901    generator: class __getitem__
902    """
903    result = [np.random.randn(242, 242, 242), np.random.randn(42, 24, 442)]
904
905    class MyData():
906        def __init__(self, input_para):
907            self.data = input_para
908
909        def __getitem__(self, item):
910            return (Tensor(self.data[0]), Tensor(self.data[1]))
911
912        def __len__(self):
913            return 2
914
915    column_names = ["col1", "col2"]
916    dataset = ds.GeneratorDataset(MyData(result), column_names)
917    i = 0
918    for data in dataset.create_dict_iterator(output_numpy=True):
919        assert "col1" in str(data.keys())
920        assert (data["col1"] == result[0]).all()
921        assert (data["col2"] == result[1]).all()
922        i += 1
923    assert i == 2
924
925
926if __name__ == "__main__":
927    test_generator_0()
928    test_generator_1()
929    test_generator_2()
930    test_generator_3()
931    test_generator_4()
932    test_generator_5()
933    test_generator_6()
934    test_generator_7()
935    test_generator_8()
936    test_generator_9()
937    test_generator_10()
938    test_generator_11()
939    test_generator_12()
940    test_generator_13()
941    test_generator_14()
942    test_generator_15()
943    test_generator_16()
944    test_generator_17()
945    test_generator_18()
946    test_generator_19()
947    test_generator_error_1()
948    test_generator_error_2()
949    test_generator_error_3()
950    test_generator_error_4()
951    test_generator_sequential_sampler()
952    test_generator_distributed_sampler()
953    test_generator_random_sampler()
954    test_generator_num_samples()
955    test_generator_num_samples_underflow()
956    test_generator_schema()
957    test_generator_dataset_size_0()
958    test_generator_dataset_size_1()
959    test_generator_dataset_size_2()
960    test_generator_dataset_size_3()
961    test_generator_dataset_size_4()
962    test_generator_dataset_size_5()
963    test_explicit_deepcopy()
964    test_func_generator_dataset_005()
965