python/optimizer/test_auto_grad.py

# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np

import mindspore.nn as nn
import mindspore.ops as ops
from mindspore import context
from mindspore import Tensor
from mindspore.ops import operations as P
from mindspore.ops import composite as C
from mindspore.common.parameter import Parameter, ParameterTuple

grad_all = C.GradOperation(get_all=True)
grad_by_list = C.GradOperation(get_by_list=True)

class CropAndResizeNet(nn.Cell):
    def __init__(self, crop_size):
        super(CropAndResizeNet, self).__init__()
        self.crop_and_resize = P.CropAndResize()
        self.crop_size = crop_size

    def construct(self, x, boxes, box_indices):
        return self.crop_and_resize(x, boxes, box_indices, self.crop_size)

    def bprop(self, x, boxes, box_indices, out, dout):
        return x, boxes, box_indices


class TestUserDefinedBpropNet(nn.Cell):
    def __init__(self, in_channel, out_channel):
        super(TestUserDefinedBpropNet, self).__init__()
        self.relu = nn.ReLU()
        self.conv = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=2, stride=1, has_bias=False,
                              weight_init='ones', pad_mode='same')
        self.crop = CropAndResizeNet((10, 10))
        self.boxes = Tensor(np.ones((128, 4)).astype(np.float32))
        self.box_indices = Tensor(np.ones((128,)).astype(np.int32))

    def construct(self, x):
        x = self.relu(x)
        x = self.conv(x)
        x = self.crop(x, self.boxes, self.box_indices)
        return x


class TestUserDefinedBpropGradNet(nn.Cell):
    def __init__(self, net):
        super(TestUserDefinedBpropGradNet, self).__init__()
        self.net = net

    def construct(self, x):
        return grad_all(self.net)(x)


def test_user_defined_bprop():
    context.set_context(mode=context.GRAPH_MODE)
    net = TestUserDefinedBpropNet(3, 10)
    grad_net = TestUserDefinedBpropGradNet(net)
    x = Tensor(np.ones((128, 3, 12, 12)).astype(np.float32))
    grad_net(x)


class TwoInputBPropOperator(nn.Cell):
    def __init__(self):
        super().__init__()
        self.op = P.Mul()
        self.add = P.Add()

    def construct(self, x, y):
        return self.op(x, y)

    def bprop(self, x, y, out, dout):
        return self.add(5, x), self.add(y, 9)


class BPropOperatatorNet(nn.Cell):
    def __init__(self, mul_size):
        super().__init__()
        mul_np = np.full(mul_size, 0.1, dtype=np.float32)
        floordiv_np = np.full(mul_size, 0.1, dtype=np.float32)
        self.mul_weight = Parameter(Tensor(mul_np), name="mul_weight")
        self.floordiv_weight = Parameter(Tensor(floordiv_np), name="floordiv_weight")
        self.mul = TwoInputBPropOperator()
        self.floor_div = P.FloorDiv()
        self.bn = nn.BatchNorm1d(num_features=96)

    def construct(self, inputs):
        x = self.mul(inputs, self.mul_weight)
        x = self.floor_div(x, self.floordiv_weight)
        x = self.bn(x)
        return x

def test_user_defined_bprop_with_u():
    net = BPropOperatatorNet(mul_size=(128, 96))
    grad_net = TestUserDefinedBpropGradNet(net)
    x = Tensor(np.random.randn(128, 96).astype(np.float32))
    grad_net(x)


class SinNet(nn.Cell):
    def __init__(self):
        super(SinNet, self).__init__()
        self.sin = ops.Sin()

    def construct(self, x):
        out = self.sin(x)
        return out


class SinGrad(nn.Cell):
    def __init__(self, network):
        super(SinGrad, self).__init__()
        self.grad = ops.GradOperation()
        self.network = network

    def construct(self, x):
        gout = self.grad(self.network)(x)
        return gout


class SinGradSec(nn.Cell):
    def __init__(self, network):
        super(SinGradSec, self).__init__()
        self.grad = ops.GradOperation()
        self.network = network

    def construct(self, x):
        gout = self.grad(self.network)(x)
        return gout


def test_second_grad_with_j_primitive():
    context.set_context(mode=context.GRAPH_MODE)
    net = SinNet()
    first_grad = SinGrad(net)
    second_grad = SinGradSec(first_grad)
    x = Tensor(np.array([1.0], dtype=np.float32))
    second_grad(x)


# A CNode being used as FV is MapMorphism after MapMorphism of call-site CNode;
def test_ad_fv_cnode_order():
    context.set_context(mode=context.GRAPH_MODE)
    class Net(nn.Cell):
        def __init__(self):
            super(Net, self).__init__()

        # cnode xay is not being MapMorphism when cnode second_level() is being MapMorphism and
        # BackPropagateFv as MapMorphism is started from output node and from left to right order.
        def construct(self, x, y):
            def first_level():
                xay = x + y

                def second_level():
                    return xay

                return second_level() + xay
            return first_level()

    input_x = Tensor(np.array([1.0], dtype=np.float32))
    input_y = Tensor(np.array([2.0], dtype=np.float32))

    net = Net()
    net.add_flags_recursive(defer_inline=True)
    grad_net = grad_all(net)
    grad_net(input_x, input_y)


# True and False branch of switch have different number of parameters.
def test_if_branch_with_different_params():
    context.set_context(mode=context.GRAPH_MODE)
    class Net(nn.Cell):
        def __init__(self):
            super(Net, self).__init__()
            self.weight1 = Parameter(Tensor(np.array([1.0], dtype=np.float32)), name="weight1")
            self.weight2 = Parameter(Tensor(np.array([2.0], dtype=np.float32)), name="weight2")

        def construct(self, idx, end, x):
            out = x
            if idx < end:
                out = out + self.weight1 * self.weight2
            else:
                out = out + self.weight1
            return out

    class GradNet(nn.Cell):
        def __init__(self, net):
            super(GradNet, self).__init__()
            self.net = net
            self.weights = ParameterTuple(net.trainable_params())

        def construct(self, idx, end, x):
            return grad_by_list(self.net, self.weights)(idx, end, x)

    idx = Tensor(np.array((0), dtype=np.int32))
    end = Tensor(np.array((3), dtype=np.int32))
    x = Tensor(np.array([2.0], dtype=np.float32))

    net = Net()
    grad_net = GradNet(net)
    grad_net(idx, end, x)


# Only lift fv in scope of lift_top_func_graph other than all func_graphs inside manager.
# Otherwise, "Illegal AnfNode for evaluating" may be reported
# because weight1 in Net may use old_parameter other than replicated one.
def test_limit_lift_fv_scope():
    context.set_context(mode=context.GRAPH_MODE)
    class Net(nn.Cell):
        def __init__(self):
            super(Net, self).__init__()
            self.weight1 = Parameter(Tensor(np.array([1.0], dtype=np.float32)), name="weight1")

        def construct(self, x, y):
            def inner_add(a, b):
                return a + b

            out = inner_add(x, y) + self.weight1
            return out

    class GradNet(nn.Cell):
        def __init__(self, net):
            super(GradNet, self).__init__()
            self.net = net
            self.weights = ParameterTuple(net.trainable_params())

        def construct(self, x, y):
            def inner_grad_add(a, b):
                return a + b

            d_weight = grad_by_list(self.net, self.weights)(x, y)[0]
            d_out = inner_grad_add(d_weight, y)
            return d_out

    x = Tensor(np.array([2.0], dtype=np.float32))
    y = Tensor(np.array([2.0], dtype=np.float32))

    net = Net()
    net.add_flags_recursive(defer_inline=True)
    grad_net = GradNet(net)
    grad_net.add_flags_recursive(defer_inline=True)
    grad_net(x, y)


def test_same_primal_used_by_multi_j():
    class Net(nn.Cell):
        def __init__(self):
            super(Net, self).__init__()

        def construct(self, x):
            return x

    class GradNet(nn.Cell):
        def __init__(self, net):
            super(GradNet, self).__init__()
            self.net = net
            self.grad = ops.GradOperation()

        def construct(self, x):
            out = self.net(x)
            gout = self.grad(self.net)(x)
            gout1 = self.grad(self.net)(x)
            return out, gout, gout1

    x = Tensor(np.array([1.0], dtype=np.float32))
    net = Net()
    grad = GradNet(net)
    grad(x)


def test_same_primal_used_by_multi_j_with_monad1():
    class AdamNet(nn.Cell):
        def __init__(self, var, m, v):
            super(AdamNet, self).__init__()
            self.apply_adam = P.Adam()
            self.var = Parameter(var, name="var")
            self.m = Parameter(m, name="m")
            self.v = Parameter(v, name="v")

        def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
            self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
            return self.var

    class AdamGradNet(nn.Cell):
        def __init__(self, network):
            super(AdamGradNet, self).__init__()
            self.grad_fn = ops.GradOperation(sens_param=True)
            self.sens = [Tensor(np.ones([3, 3, 3]).astype(np.float32)), Tensor(np.ones([3, 3, 3]).astype(np.float32))]
            self.network = network

        def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
            out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
            gout1 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[0])
            gout2 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[1])
            return out, gout1, gout2

    var = Tensor(np.ones([3, 3, 3]).astype(np.float32))
    m = Tensor(np.ones([3, 3, 3]).astype(np.float32))
    v = Tensor(np.ones([3, 3, 3]).astype(np.float32))
    beta1_power = Tensor(np.array([0.9], dtype=np.float32))
    beta2_power = Tensor(np.array([0.999], dtype=np.float32))
    lr = Tensor(np.array([0.001], dtype=np.float32))
    beta1 = Tensor(np.array([0.9], dtype=np.float32))
    beta2 = Tensor(np.array([0.999], dtype=np.float32))
    epsilon = Tensor(np.array([1e-8], dtype=np.float32))
    grad = Tensor(np.random.rand(3, 3, 3).astype(np.float32))
    net = AdamNet(var, m, v)
    grad_net = AdamGradNet(net)
    grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)


def test_same_primal_used_by_multi_j_with_monad2():
    class AdamNet(nn.Cell):
        def __init__(self, var, m, v):
            super(AdamNet, self).__init__()
            self.apply_adam = P.Adam()
            self.var = Parameter(var, name="var")
            self.m = Parameter(m, name="m")
            self.v = Parameter(v, name="v")

        def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
            self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
            return self.var

    class AdamGradNet(nn.Cell):
        def __init__(self, network):
            super(AdamGradNet, self).__init__()
            self.grad = ops.GradOperation(sens_param=True)
            self.sens = [Tensor(np.ones([3, 3, 3]).astype(np.float32)), Tensor(np.ones([3, 3, 3]).astype(np.float32))]
            self.network = network

        def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
            out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
            grad_fn = self.grad(self.network)
            gout1 = grad_fn(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[0])
            gout2 = grad_fn(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[1])
            return out, gout1, gout2

    var = Tensor(np.ones([3, 3, 3]).astype(np.float32))
    m = Tensor(np.ones([3, 3, 3]).astype(np.float32))
    v = Tensor(np.ones([3, 3, 3]).astype(np.float32))
    beta1_power = Tensor(np.array([0.9], dtype=np.float32))
    beta2_power = Tensor(np.array([0.999], dtype=np.float32))
    lr = Tensor(np.array([0.001], dtype=np.float32))
    beta1 = Tensor(np.array([0.9], dtype=np.float32))
    beta2 = Tensor(np.array([0.999], dtype=np.float32))
    epsilon = Tensor(np.array([1e-8], dtype=np.float32))
    grad = Tensor(np.random.rand(3, 3, 3).astype(np.float32))
    net = AdamNet(var, m, v)
    grad_net = AdamGradNet(net)
    grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)


def test_grad_args_type_error1():
    class Net(nn.Cell):
        def __init__(self):
            super(Net, self).__init__()
            self.matmul = P.MatMul()
        def construct(self, x, y):
            out = self.matmul(x, y)
            return out

    class GradNetWrtX(nn.Cell):
        def __init__(self, net):
            super(GradNetWrtX, self).__init__()
            self.net = net
            self.grad_op = ops.GradOperation(get_all=2)
        def construct(self, x, y):
            gradient_function = self.grad_op(self.net)
            return gradient_function(x, y)

    x = Tensor(np.array([2.0], dtype=np.float32))
    y = Tensor(np.array([2.0], dtype=np.float32))
    try:
        GradNetWrtX(Net())(x, y)
    except TypeError as e:
        assert "For 'GradOperation', the 'get_all' should be bool, but got" in str(e)


def test_grad_args_type_error2():
    class Net(nn.Cell):
        def __init__(self):
            super(Net, self).__init__()
            self.matmul = P.MatMul()
        def construct(self, x, y):
            out = self.matmul(x, y)
            return out

    class GradNetWrtX(nn.Cell):
        def __init__(self, net):
            super(GradNetWrtX, self).__init__()
            self.net = net
            self.grad_op = ops.GradOperation(get_by_list=2)
        def construct(self, x, y):
            gradient_function = self.grad_op(self.net)
            return gradient_function(x, y)

    x = Tensor(np.array([2.0], dtype=np.float32))
    y = Tensor(np.array([2.0], dtype=np.float32))
    try:
        GradNetWrtX(Net())(x, y)
    except TypeError as e:
        assert "For 'GradOperation', the 'get_by_list' should be bool, but got" in str(e)


def test_grad_args_type_error3():
    class Net(nn.Cell):
        def __init__(self):
            super(Net, self).__init__()
            self.matmul = P.MatMul()
        def construct(self, x, y):
            out = self.matmul(x, y)
            return out

    class GradNetWrtX(nn.Cell):
        def __init__(self, net):
            super(GradNetWrtX, self).__init__()
            self.net = net
            self.grad_op = ops.GradOperation(sens_param=2)
        def construct(self, x, y):
            gradient_function = self.grad_op(self.net)
            return gradient_function(x, y)

    x = Tensor(np.array([2.0], dtype=np.float32))
    y = Tensor(np.array([2.0], dtype=np.float32))
    try:
        GradNetWrtX(Net())(x, y)
    except TypeError as e:
        assert "For 'GradOperation', the 'sens_param' should be bool, but got" in str(e)


def test_grad_net_is_none():
    class Net(nn.Cell):
        def __init__(self):
            super(Net, self).__init__()
            self.add = P.Add()
        def construct(self, x, y):
            out = self.add(x, y)
            return out

    class GradNetWrtX(nn.Cell):
        def __init__(self, net):
            super(GradNetWrtX, self).__init__()
            self.net = P.Add()
            self.grad_op = ops.GradOperation()
        def construct(self, x, y):
            gradient_function = self.grad_op(None)
            return gradient_function(x, y)

    x = Tensor(np.array([2.0], dtype=np.float32))
    y = Tensor(np.array([2.0], dtype=np.float32))
    try:
        GradNetWrtX(Net())(x, y)
    except Exception as e:
        assert "'GradOperation' arg0 must be a 'Function' or 'Cell', but got" in str(e)


def test_grad_missing_net():
    class Net(nn.Cell):
        def __init__(self):
            super(Net, self).__init__()
            self.add = P.Add()
        def construct(self, x, y):
            out = self.add(x, y)
            return out

    class GradNetWrtX(nn.Cell):
        def __init__(self, net):
            super(GradNetWrtX, self).__init__()
            self.net = net
            self.grad_op = ops.GradOperation()
        def construct(self, x, y):
            gradient_function = self.grad_op()
            return gradient_function(x, y)

    x = Tensor(np.array([2.0], dtype=np.float32))
    y = Tensor(np.array([2.0], dtype=np.float32))
    try:
        GradNetWrtX(Net())(x, y)
    except Exception as e:
        assert "'GradOperation' requires a forward network or function as an input, while the input is empty." in str(e)


def test_user_defined_bprop_inputs_size_error():
    class BpropUserDefinedNet(nn.Cell):
        def __init__(self):
            super(BpropUserDefinedNet, self).__init__()
            self.zeros_like = P.ZerosLike()

        def construct(self, x, y):
            return x + y

        def bprop(self, out):
            return self.zeros_like(out), self.zeros_like(out)

    class BpropUserDefinedGradNet(nn.Cell):
        def __init__(self, net):
            super(BpropUserDefinedGradNet, self).__init__()
            self.net = net

        def construct(self, x, y):
            return grad_all(self.net)(x, y)

    net = BpropUserDefinedNet()
    grad_net = BpropUserDefinedGradNet(net)
    x = Tensor(np.array([2.0], dtype=np.float32))
    y = Tensor(np.array([2.0], dtype=np.float32))
    try:
        grad_net(x, y)
    except Exception as e:
        assert "The function 'bprop' of Primitive or Cell requires at least 2 params 'out' and 'dout', but got only"\
               in str(e)


def test_user_defined_bprop_net_has_parameter():
    class BpropUserDefinedNet(nn.Cell):
        def __init__(self):
            super(BpropUserDefinedNet, self).__init__()
            self.zeros_like = P.ZerosLike()
            self.x = Parameter(Tensor(np.array([2.0], dtype=np.float32)), name="x")

        def construct(self, y):
            return self.x + y

        def bprop(self, y, out, dout):
            return (self.zeros_like(out),)

    class BpropUserDefinedGradNet(nn.Cell):
        def __init__(self, net):
            super(BpropUserDefinedGradNet, self).__init__()
            self.net = net

        def construct(self, y):
            return grad_all(self.net)(y)

    net = BpropUserDefinedNet()
    grad_net = BpropUserDefinedGradNet(net)
    y = Tensor(np.array([2.0], dtype=np.float32))
    try:
        grad_net(y)
    except Exception as e:
        assert "The Cell with user defined 'bprop' function in scope" in str(e)
        assert "does not support Parameter data type." in str(e)


def test_user_defined_bprop_inputs_size_error1():
    class BpropUserDefinedNet(nn.Cell):
        def __init__(self):
            super(BpropUserDefinedNet, self).__init__()
            self.zeros_like = P.ZerosLike()

        def construct(self, x, y):
            return x + y

        def bprop(self, x, y, out):
            return self.zeros_like(out), self.zeros_like(out)

    class BpropUserDefinedGradNet(nn.Cell):
        def __init__(self, net):
            super(BpropUserDefinedGradNet, self).__init__()
            self.net = net

        def construct(self, x, y):
            return grad_all(self.net)(x, y)

    net = BpropUserDefinedNet()
    grad_net = BpropUserDefinedGradNet(net)
    x = Tensor(np.array([2.0], dtype=np.float32))
    y = Tensor(np.array([2.0], dtype=np.float32))
    try:
        grad_net(x, y)
    except TypeError as e:
        assert "The params of function 'bprop' of Primitive or Cell requires the forward inputs as well as the 'out' " \
               "and 'dout'." in str(e)


def test_grad_hook():
    def var_hook_function(grad_out):
        assert grad_out[0].asnumpy().shape == (32, 120)

    class Net(nn.Cell):
        def __init__(self):
            super(Net, self).__init__()
            self.add = P.Add()
            self.hook = P.HookBackward(var_hook_function)
        def construct(self, x, y):
            x = self.hook(x)
            out = self.add(x, y)
            return out

    class GradNetWrtX(nn.Cell):
        def __init__(self, net):
            super(GradNetWrtX, self).__init__()
            self.net = net
            self.grad_op = ops.GradOperation()
        def construct(self, x, y):
            gradient_function = self.grad_op(self.net)
            return gradient_function(x, y)

    x = Tensor(np.array([2.0], dtype=np.float32))
    y = Tensor(np.array([2.0], dtype=np.float32))
    try:
        GradNetWrtX(Net())(x, y)
    except Exception as e:
        assert "The Primitive 'HookBackward' is not supported in graph mode, which is only supported in pynative " \
               "mode." in str(e)