1# Copyright 2020-2021 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15import os 16import sys 17import tempfile 18import time 19import shutil 20import glob 21from importlib import import_module 22from pathlib import Path 23import numpy as np 24import pytest 25import mindspore.context as context 26import mindspore.nn as nn 27from mindspore import Tensor 28from mindspore.ops import operations as P 29from mindspore.nn import Cell 30from mindspore.nn import Dense 31from mindspore.nn import SoftmaxCrossEntropyWithLogits 32from mindspore.nn import Momentum 33from mindspore.nn import TrainOneStepCell 34from mindspore.nn import WithLossCell 35from tests.st.dump.dump_test_utils import generate_dump_json 36from tests.security_utils import security_off_wrap 37 38 39class Net(nn.Cell): 40 def __init__(self): 41 super(Net, self).__init__() 42 self.add = P.Add() 43 44 def construct(self, x_, y_): 45 return self.add(x_, y_) 46 47 48x = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32) 49y = np.array([[7, 8, 9], [10, 11, 12]]).astype(np.float32) 50 51 52@pytest.mark.level1 53@pytest.mark.platform_arm_ascend_training 54@pytest.mark.platform_x86_ascend_training 55@pytest.mark.env_onecard 56@security_off_wrap 57def test_async_dump(): 58 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") 59 with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir: 60 dump_path = os.path.join(tmp_dir, 'async_dump') 61 dump_config_path = os.path.join(tmp_dir, 'async_dump.json') 62 generate_dump_json(dump_path, dump_config_path, 'test_async_dump') 63 os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path 64 dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0') 65 if os.path.isdir(dump_path): 66 shutil.rmtree(dump_path) 67 add = Net() 68 add(Tensor(x), Tensor(y)) 69 time.sleep(5) 70 assert len(os.listdir(dump_file_path)) == 1 71 72 73def run_e2e_dump(): 74 if sys.platform != 'linux': 75 return 76 with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir: 77 dump_path = os.path.join(tmp_dir, 'e2e_dump') 78 dump_config_path = os.path.join(tmp_dir, 'e2e_dump.json') 79 generate_dump_json(dump_path, dump_config_path, 'test_e2e_dump') 80 os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path 81 dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0') 82 if os.path.isdir(dump_path): 83 shutil.rmtree(dump_path) 84 add = Net() 85 add(Tensor(x), Tensor(y)) 86 if context.get_context("device_target") == "Ascend": 87 assert len(os.listdir(dump_file_path)) == 5 88 output_name = "Add.Add-op*.0.0.*.output.0.DefaultFormat.npy" 89 elif context.get_context("device_target") == "CPU": 90 assert len(os.listdir(dump_file_path)) == 5 91 output_name = "Add.Add-op*.0.0.*.output.0.DefaultFormat.npy" 92 else: 93 assert len(os.listdir(dump_file_path)) == 3 94 output_name = "Add.Add-op*.0.0.*.output.0.DefaultFormat.npy" 95 output_path = glob.glob(os.path.join(dump_file_path, output_name))[0] 96 real_path = os.path.realpath(output_path) 97 output = np.load(real_path) 98 expect = np.array([[8, 10, 12], [14, 16, 18]], np.float32) 99 assert output.dtype == expect.dtype 100 assert np.array_equal(output, expect) 101 102 103@pytest.mark.level0 104@pytest.mark.platform_arm_ascend_training 105@pytest.mark.platform_x86_ascend_training 106@pytest.mark.env_onecard 107@security_off_wrap 108def test_e2e_dump(): 109 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") 110 run_e2e_dump() 111 112 113@pytest.mark.level0 114@pytest.mark.platform_arm_ascend_training 115@pytest.mark.platform_x86_ascend_training 116@pytest.mark.env_onecard 117@security_off_wrap 118def test_e2e_dump_with_hccl_env(): 119 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") 120 os.environ["RANK_TABLE_FILE"] = "invalid_file.json" 121 os.environ["RANK_ID"] = "4" 122 run_e2e_dump() 123 124 125@pytest.mark.level0 126@pytest.mark.platform_x86_cpu 127@pytest.mark.env_onecard 128@security_off_wrap 129def test_cpu_e2e_dump(): 130 context.set_context(mode=context.GRAPH_MODE, device_target="CPU") 131 run_e2e_dump() 132 133 134@pytest.mark.level0 135@pytest.mark.platform_x86_cpu 136@pytest.mark.env_onecard 137@security_off_wrap 138def test_cpu_e2e_dump_with_hccl_set(): 139 context.set_context(mode=context.GRAPH_MODE, device_target="CPU") 140 os.environ["RANK_TABLE_FILE"] = "invalid_file.json" 141 os.environ["RANK_ID"] = "4" 142 run_e2e_dump() 143 144 145@pytest.mark.level0 146@pytest.mark.platform_x86_gpu_training 147@pytest.mark.env_onecard 148@security_off_wrap 149def test_gpu_e2e_dump(): 150 context.set_context(mode=context.GRAPH_MODE, device_target="GPU") 151 run_e2e_dump() 152 153 154@pytest.mark.level0 155@pytest.mark.platform_x86_gpu_training 156@pytest.mark.env_onecard 157@security_off_wrap 158def test_gpu_e2e_dump_with_hccl_set(): 159 context.set_context(mode=context.GRAPH_MODE, device_target="GPU") 160 os.environ["RANK_TABLE_FILE"] = "invalid_file.json" 161 os.environ["RANK_ID"] = "4" 162 run_e2e_dump() 163 164 165class ReluReduceMeanDenseRelu(Cell): 166 def __init__(self, kernel, bias, in_channel, num_class): 167 super().__init__() 168 self.relu = P.ReLU() 169 self.mean = P.ReduceMean(keep_dims=False) 170 self.dense = Dense(in_channel, num_class, kernel, bias) 171 172 def construct(self, x_): 173 x_ = self.relu(x_) 174 x_ = self.mean(x_, (2, 3)) 175 x_ = self.dense(x_) 176 x_ = self.relu(x_) 177 return x_ 178 179 180@pytest.mark.level0 181@pytest.mark.platform_arm_ascend_training 182@pytest.mark.platform_x86_ascend_training 183@pytest.mark.env_onecard 184@security_off_wrap 185def test_async_dump_net_multi_layer_mode1(): 186 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") 187 with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir: 188 dump_path = os.path.join(tmp_dir, 'async_dump_net_multi_layer_mode1') 189 json_file_path = os.path.join(tmp_dir, "test_async_dump_net_multi_layer_mode1.json") 190 generate_dump_json(dump_path, json_file_path, 'test_async_dump_net_multi_layer_mode1') 191 os.environ['MINDSPORE_DUMP_CONFIG'] = json_file_path 192 weight = Tensor(np.ones((1000, 2048)).astype(np.float32)) 193 bias = Tensor(np.ones((1000,)).astype(np.float32)) 194 net = ReluReduceMeanDenseRelu(weight, bias, 2048, 1000) 195 criterion = SoftmaxCrossEntropyWithLogits(sparse=False) 196 optimizer = Momentum(learning_rate=0.1, momentum=0.1, 197 params=filter(lambda x: x.requires_grad, net.get_parameters())) 198 net_with_criterion = WithLossCell(net, criterion) 199 train_network = TrainOneStepCell(net_with_criterion, optimizer) 200 train_network.set_train() 201 inputs = Tensor(np.random.randn(32, 2048, 7, 7).astype(np.float32)) 202 label = Tensor(np.zeros(shape=(32, 1000)).astype(np.float32)) 203 net_dict = train_network(inputs, label) 204 dump_file_path = os.path.join(dump_path, 'rank_0', 'test', '0', '0') 205 dump_file_name = list(Path(dump_file_path).rglob("*SoftmaxCrossEntropyWithLogits*"))[0] 206 dump_file_full_path = os.path.join(dump_file_path, dump_file_name) 207 npy_path = os.path.join(dump_path, "npy_files") 208 if os.path.exists(npy_path): 209 shutil.rmtree(npy_path) 210 os.mkdir(npy_path) 211 tool_path_search_list = list(Path('/usr/local/Ascend').rglob('msaccucmp.py*')) 212 if tool_path_search_list: 213 converter = import_module("mindspore.offline_debug.convert_async") 214 converter.AsyncDumpConverter([dump_file_full_path], npy_path).convert_files() 215 npy_result_file = list(Path(npy_path).rglob("*output.0.*.npy"))[0] 216 dump_result = np.load(os.path.join(npy_path, npy_result_file)) 217 for index, value in enumerate(net_dict): 218 assert value.asnumpy() == dump_result[index] 219 else: 220 print('Failed to find hisi convert tools: msaccucmp.py or msaccucmp.pyc.') 221 222 223@pytest.mark.level0 224@pytest.mark.platform_arm_ascend_training 225@pytest.mark.platform_x86_ascend_training 226@pytest.mark.env_onecard 227@security_off_wrap 228def test_dump_with_diagnostic_path(): 229 """ 230 Test e2e dump when path is not set (set to empty) in dump json file and MS_DIAGNOSTIC_DATA_PATH is set. 231 Data is expected to be dumped into MS_DIAGNOSTIC_DATA_PATH/debug_dump. 232 """ 233 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") 234 with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir: 235 dump_config_path = os.path.join(tmp_dir, 'e2e_dump.json') 236 generate_dump_json('', dump_config_path, 'test_e2e_dump') 237 os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path 238 diagnose_path = os.path.join(tmp_dir, 'e2e_dump') 239 os.environ['MS_DIAGNOSTIC_DATA_PATH'] = diagnose_path 240 dump_file_path = os.path.join(diagnose_path, 'debug_dump', 'rank_0', 'Net', '0', '0') 241 if os.path.isdir(diagnose_path): 242 shutil.rmtree(diagnose_path) 243 add = Net() 244 add(Tensor(x), Tensor(y)) 245 assert len(os.listdir(dump_file_path)) == 5 246 247 248def run_e2e_dump_execution_graph(): 249 """Run e2e dump and check execution order.""" 250 if sys.platform != 'linux': 251 return 252 with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir: 253 dump_path = os.path.join(tmp_dir, 'e2e_dump_exe_graph') 254 dump_config_path = os.path.join(tmp_dir, 'e2e_dump.json') 255 generate_dump_json(dump_path, dump_config_path, 'test_e2e_dump') 256 os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path 257 if os.path.isdir(dump_path): 258 shutil.rmtree(dump_path) 259 add = Net() 260 add(Tensor(x), Tensor(y)) 261 exe_graph_path = os.path.join(dump_path, 'rank_0', 'execution_order') 262 assert len(os.listdir(exe_graph_path)) == 1 263 264 265@pytest.mark.level0 266@pytest.mark.platform_x86_gpu_training 267@pytest.mark.env_onecard 268@security_off_wrap 269def test_dump_with_execution_graph(): 270 """Test dump with execution graph on GPU.""" 271 context.set_context(mode=context.GRAPH_MODE, device_target='GPU') 272 run_e2e_dump_execution_graph() 273