1# Copyright 2020 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15import numpy as np 16 17import mindspore.dataset as ds 18 19 20# tests the construction of multiple ops from a single dataset. 21# map dataset with columns order arguments should produce a ProjectOp over MapOp 22# This test does not utilize the compiling passes at this time. 23def test_map_reorder0(): 24 def generator_mc(maxid=1): 25 for _ in range(maxid): 26 yield (np.array([0]), np.array([1])) 27 28 # Generator -> Map 29 data0 = ds.GeneratorDataset(generator_mc, ["col0", "col1"]) 30 31 data0 = data0.map(operations=(lambda x: x), input_columns="col0", output_columns="out", 32 column_order=["col1", "out"]) 33 34 for item in data0.create_tuple_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary 35 assert item == [np.array(1), np.array(0)] 36 37 38# tests the construction of multiple ops from a single dataset. 39# map dataset with columns order arguments should produce a ProjectOp over MapOp 40# This test does not utilize the compiling passes at this time. 41def test_map_reorder1(): 42 def generator_mc(maxid=1): 43 for _ in range(maxid): 44 yield (np.array([0]), np.array([1]), np.array([2])) 45 46 # Three map and zip 47 data0 = ds.GeneratorDataset(generator_mc, ["a0", "a1", "a2"]) 48 data0 = data0.map(operations=(lambda x: x), input_columns="a0", column_order=["a2", "a1", "a0"]) 49 data1 = ds.GeneratorDataset(generator_mc, ["b0", "b1", "b2"]) 50 data1 = data1.map(operations=(lambda x: x), input_columns="b0", column_order=["b1", "b2", "b0"]) 51 data2 = ds.zip((data0, data1)) 52 data2 = data2.map(operations=(lambda x: x), input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"]) 53 54 for item in data2.create_tuple_iterator(num_epochs=1, output_numpy=True): 55 assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)] 56 57 58# tests the construction of multiple ops from a single dataset. 59# TFRecordDataset with global shuffle should produce a ShuffleOp over TfReaderOp. 60# This test does not utilize the compiling passes at this time. 61def test_shuffle(): 62 FILES = ["../data/dataset/testTFTestAllTypes/test.data"] 63 SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json" 64 65 ds.config.set_seed(1) 66 data1 = ds.TFRecordDataset(FILES, schema=SCHEMA_FILE, shuffle=ds.Shuffle.GLOBAL) 67 data2 = ds.TFRecordDataset(FILES, schema=SCHEMA_FILE, shuffle=ds.Shuffle.FILES) 68 data2 = data2.shuffle(10000) 69 70 for d1, d2 in zip(data1.create_tuple_iterator(output_numpy=True), data2.create_tuple_iterator(output_numpy=True)): 71 for t1, t2 in zip(d1, d2): 72 np.testing.assert_array_equal(t1, t2) 73 74 ds.config.set_seed(1) 75 DATA_ALL_FILE = "../data/dataset/testTextFileDataset/*" 76 data1 = ds.TextFileDataset(DATA_ALL_FILE, shuffle=ds.Shuffle.GLOBAL) 77 data2 = ds.TextFileDataset(DATA_ALL_FILE, shuffle=ds.Shuffle.FILES) 78 data2 = data2.shuffle(10000) 79 80 for d1, d2 in zip(data1.create_tuple_iterator(output_numpy=True), data2.create_tuple_iterator(output_numpy=True)): 81 for t1, t2 in zip(d1, d2): 82 np.testing.assert_array_equal(t1, t2) 83 84 ds.config.set_seed(1) 85 TRAIN_FILE = '../data/dataset/testCLUE/afqmc/train.json' 86 data1 = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=ds.Shuffle.GLOBAL) 87 data2 = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=ds.Shuffle.FILES) 88 data2 = data2.shuffle(10000) 89 90 for d1, d2 in zip(data1.create_tuple_iterator(output_numpy=True), data2.create_tuple_iterator(output_numpy=True)): 91 for t1, t2 in zip(d1, d2): 92 np.testing.assert_array_equal(t1, t2) 93 94 95if __name__ == "__main__": 96 test_map_reorder0() 97 test_map_reorder1() 98 test_global_shuffle() 99