1# Copyright 2021 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15import numpy as np 16 17import mindspore as ms 18from mindspore import context, Tensor, Parameter 19from mindspore.common.api import _cell_graph_executor 20from mindspore.nn import Cell, TrainOneStepCell 21from mindspore.nn.optim.adafactor import AdaFactor 22from mindspore.ops import operations as P 23 24 25class Net(Cell): 26 def __init__(self, matmul_weight, add_weight, strategy1=None, strategy2=None): 27 super().__init__() 28 self.matmul = P.MatMul().shard(strategy1) 29 self.add = P.BiasAdd().shard(strategy2) 30 self.mul_weight = Parameter(matmul_weight, "w1") 31 self.bias = Parameter(add_weight, "bias") 32 33 def construct(self, x, b): 34 out = self.matmul(x, self.mul_weight) 35 out = self.add(out, self.bias) 36 return out 37 38 39_x = Tensor(np.ones([64, 32]), dtype=ms.float32) 40_w1 = Tensor(np.ones([32, 32]), dtype=ms.float32) 41_w2 = Tensor(np.ones([32]), dtype=ms.float32) 42_b = Tensor(np.ones([64, 32]), dtype=ms.float32) 43 44 45def compile_net(net): 46 scale_parameter = False 47 relative_step = True 48 warmup_init = True 49 compression = True 50 optimizer = AdaFactor(net.trainable_params(), learning_rate=None, weight_decay=0.9, 51 scale_parameter=scale_parameter, relative_step=relative_step, 52 warmup_init=warmup_init, compression=compression) 53 train_net = TrainOneStepCell(net, optimizer) 54 train_net.set_auto_parallel() 55 train_net.set_train() 56 _cell_graph_executor.compile(train_net, _x, _b) 57 context.reset_auto_parallel_context() 58 59 60def test_opt_data_parallel(): 61 context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) 62 strategy1 = ((16, 1), (1, 1)) 63 strategy2 = ((16, 1), (1,)) 64 net = Net(_w1, _w2, strategy1, strategy2) 65 compile_net(net) 66 67 68def test_opt_model_parallel(): 69 context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) 70 strategy1 = ((4, 2), (2, 2)) 71 strategy2 = ((4, 2), (2,)) 72 net = Net(_w1, _w2, strategy1, strategy2) 73 compile_net(net) 74