• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15"""CusMatrixCombine"""
16from __future__ import absolute_import
17
18from te import tik
19from tbe.tvm.topi.cce import util
20from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
21
22cus_matrix_combine_op_info = TBERegOp("CusMatrixCombine") \
23    .fusion_type("OPAQUE") \
24    .async_flag(False) \
25    .binfile_name("matrixcombine.so") \
26    .compute_cost(10) \
27    .kernel_name("cus_matrix_combine") \
28    .partial_flag(True) \
29    .input(0, "x1", False, "required", "all") \
30    .output(0, "y", False, "required", "all") \
31    .dtype_format(DataType.F32_Default, DataType.F32_Default) \
32    .get_op_info()
33
34
35@op_info_register(cus_matrix_combine_op_info)
36def cus_matrix_combine(input_x, output, kernel_name="cus_matrix_combine"):
37    """CusMatrixCombine"""
38    input_x_shape = input_x.get("shape")
39    output_shape = output.get("shape")
40
41    if util.get_product_version() == util.VERSION_MINI:
42        tik_instance = tik.Tik(tik.Dprofile("v100", "mini"))
43    else:
44        tik_instance = tik.Tik(tik.Dprofile("v100", "cloud"))
45
46    input_x = tik_instance.Tensor("float32", input_x_shape, name="input_x", scope=tik.scope_gm)
47    res = tik_instance.Tensor("float32", output_shape, name="res", scope=tik.scope_gm)
48
49    blocks = 32
50    matrix_dim = input_x_shape[0] * input_x_shape[1]
51    if input_x_shape[0] == 1 and input_x_shape[1] == 64:
52        with tik_instance.for_range(0, blocks, block_num=blocks) as block_index:
53            input_x_ub = tik_instance.Tensor("float32", (2, matrix_dim), name="input_x_ub",
54                                             scope=tik.scope_ubuf)
55            tik_instance.data_move(input_x_ub, input_x[0, block_index * 2, 0], 0, 1, 16, 0, 0)
56            tik_instance.data_move(res[block_index * 2, 0], input_x_ub, 0, 1, 16, 0, 0)
57    else:
58        tiling_dim = 4
59        input_x_ub = tik_instance.Tensor("float32", (tiling_dim, matrix_dim), name="input_x_ub",
60                                         scope=tik.scope_ubuf)
61        zero = tik_instance.Scalar("float32")
62        zero.set_as(0.0)
63        with tik_instance.for_range(0, blocks, block_num=blocks) as block_index, \
64                tik_instance.for_range(0, input_x_shape[0]) as i:
65            repeat_real = tiling_dim * matrix_dim // 64
66            if repeat_real <= 255:
67                tik_instance.vector_dup(64, input_x_ub, zero, repeat_real, 1, 8)
68            elif repeat_real <= 510:
69                tik_instance.vector_dup(64, input_x_ub, zero, 255, 1, 8)
70                tik_instance.vector_dup(64, input_x_ub[255 * 64], zero, repeat_real - 255, 1, 8)
71            elif repeat_real <= 765:
72                tik_instance.vector_dup(64, input_x_ub, zero, 255, 1, 8)
73                tik_instance.vector_dup(64, input_x_ub[255 * 64], zero, 255, 1, 8)
74                tik_instance.vector_dup(64, input_x_ub[510 * 64], zero, repeat_real - 510, 1, 8)
75            else:
76                tik_instance.vector_dup(64, input_x_ub, zero, 255, 1, 8)
77                tik_instance.vector_dup(64, input_x_ub[255 * 64], zero, 255, 1, 8)
78                tik_instance.vector_dup(64, input_x_ub[510 * 64], zero, 255, 1, 8)
79                tik_instance.vector_dup(64, input_x_ub[765 * 64], zero, repeat_real - 765, 1, 8)
80
81            with tik_instance.for_range(0, tiling_dim) as j:
82                tik_instance.data_move(input_x_ub[j, 128 * i], input_x[i, block_index * tiling_dim + j, 0],
83                                       0, 1, 16, 0, 0)
84            tik_instance.data_move(res[i * 128 + block_index * tiling_dim, 0], input_x_ub, 0, 1,
85                                   tiling_dim * matrix_dim * 4 // 32, 0, 0)
86    tik_instance.BuildCCE(kernel_name=kernel_name, inputs=[input_x], outputs=[res])
87    return tik_instance
88