1# Copyright 2021 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15""" 16Watchpoints test script for offline debugger APIs. 17""" 18 19import os 20import json 21import time 22import tempfile 23import numpy as np 24import pytest 25import mindspore.offline_debug.dbg_services as d 26from tests.security_utils import security_off_wrap 27from dump_test_utils import build_dump_structure, write_watchpoint_to_json 28 29GENERATE_GOLDEN = False 30watchpoint_hits_json = [] 31 32 33def run_watchpoints(is_sync): 34 if is_sync: 35 test_name = "sync_watchpoints" 36 else: 37 test_name = "async_watchpoints" 38 39 name1 = "Conv2D.Conv2D-op369.0.0.1" 40 tensor1 = np.array([[[-1.2808e-03, 7.7629e-03, 1.9241e-02], 41 [-1.3931e-02, 8.9359e-04, -1.1520e-02], 42 [-6.3248e-03, 1.8749e-03, 1.0132e-02]], 43 [[-2.5520e-03, -6.0005e-03, -5.1918e-03], 44 [-2.7866e-03, 2.5487e-04, 8.4782e-04], 45 [-4.6310e-03, -8.9111e-03, -8.1778e-05]], 46 [[1.3914e-03, 6.0844e-04, 1.0643e-03], 47 [-2.0966e-02, -1.2865e-03, -1.8692e-03], 48 [-1.6647e-02, 1.0233e-03, -4.1313e-03]]], np.float32) 49 info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369", 50 slot=1, iteration=2, rank_id=0, root_graph_id=0, is_output=False) 51 52 name2 = "Parameter.fc2.bias.0.0.2" 53 tensor2 = np.array([-5.0167350e-06, 1.2509107e-05, -4.3148934e-06, 8.1415592e-06, 54 2.1177532e-07, 2.9952851e-06], np.float32) 55 info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/" 56 "Parameter[6]_11/fc2.bias", 57 slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=True) 58 59 tensor3 = np.array([2.9060817e-07, -5.1009415e-06, -2.8662325e-06, 2.6036503e-06, 60 -5.1546101e-07, 6.0798648e-06], np.float32) 61 info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/" 62 "Parameter[6]_11/fc2.bias", 63 slot=0, iteration=3, rank_id=0, root_graph_id=0, is_output=True) 64 65 tensor_info = [info1, info2, info3] 66 tensor_name = [name1, name2, name2] 67 tensor_list = [tensor1, tensor2, tensor3] 68 69 pwd = os.getcwd() 70 with tempfile.TemporaryDirectory(dir=pwd) as tmp_dir: 71 temp_dir = build_dump_structure(tmp_dir, tensor_name, tensor_list, "Test", tensor_info) 72 73 debugger_backend = d.DbgServices(dump_file_path=temp_dir) 74 debugger_backend.initialize(net_name="Test", is_sync_mode=is_sync) 75 76 # NOTES: 77 # -> watch_condition=6 is MIN_LT 78 # -> watch_condition=18 is CHANGE_TOO_LARGE 79 80 # test 1: watchpoint set and hit (watch_condition=6) 81 param1 = d.Parameter(name="param", disabled=False, value=0.0) 82 debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6, 83 check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/" 84 "conv1-Conv2d/Conv2D-op369": 85 {"rank_id": [0], "root_graph_id": [0], "is_output": False 86 }}, parameter_list=[param1]) 87 88 watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2) 89 assert len(watchpoint_hits_test_1) == 1 90 if GENERATE_GOLDEN: 91 print_watchpoint_hits(watchpoint_hits_test_1, 0, False, test_name) 92 else: 93 compare_expect_actual_result(watchpoint_hits_test_1, 0, test_name) 94 95 # test 2: watchpoint remove and ensure it's not hit 96 debugger_backend.remove_watchpoint(watchpoint_id=1) 97 watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2) 98 assert not watchpoint_hits_test_2 99 100 # test 3: watchpoint set and not hit, then remove 101 param2 = d.Parameter(name="param", disabled=False, value=-1000.0) 102 debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6, 103 check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/" 104 "conv1-Conv2d/Conv2D-op369": 105 {"rank_id": [0], "root_graph_id": [0], "is_output": False 106 }}, parameter_list=[param2]) 107 108 watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2) 109 assert not watchpoint_hits_test_3 110 _ = debugger_backend.remove_watchpoint(watchpoint_id=2) 111 112 # test 4: weight change watchpoint set and hit 113 param_abs_mean_update_ratio_gt = d.Parameter( 114 name="abs_mean_update_ratio_gt", disabled=False, value=0.0) 115 param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0) 116 debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18, 117 check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/" 118 "Parameter[6]_11/fc2.bias": 119 {"rank_id": [0], "root_graph_id": [0], "is_output": True 120 }}, parameter_list=[param_abs_mean_update_ratio_gt, 121 param_epsilon]) 122 123 watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3) 124 assert len(watchpoint_hits_test_4) == 1 125 126 if GENERATE_GOLDEN: 127 print_watchpoint_hits(watchpoint_hits_test_4, 1, True, test_name) 128 else: 129 compare_expect_actual_result(watchpoint_hits_test_4, 1, test_name) 130 131 132@pytest.mark.level1 133@pytest.mark.platform_arm_ascend_training 134@pytest.mark.platform_x86_ascend_training 135@pytest.mark.env_onecard 136@security_off_wrap 137def test_sync_watchpoints(): 138 run_watchpoints(True) 139 140 141@pytest.mark.level1 142@pytest.mark.platform_arm_ascend_training 143@pytest.mark.platform_x86_ascend_training 144@pytest.mark.env_onecard 145@security_off_wrap 146def test_async_watchpoints(): 147 run_watchpoints(False) 148 149 150def run_overflow_watchpoint(is_overflow): 151 test_name = "overflow_watchpoint" 152 tensor = np.array([65504, 65504], np.float16) 153 task_id = 2 154 stream_id = 7 155 pwd = os.getcwd() 156 with tempfile.TemporaryDirectory(dir=pwd) as tmp_dir: 157 path = os.path.join(tmp_dir, "rank_0", "Add", "0", "0") 158 os.makedirs(path, exist_ok=True) 159 add_file = os.path.join(path, "Add.Default_Add-op0." + str(task_id) + "." + str(stream_id) + "." 160 + str(int(round(time.time() * 1000000)))) 161 with open(add_file, 'wb') as add_f: 162 add_f.write(b'1') 163 add_f.seek(8) 164 add_f.write(b'\n\x032.0\x10\x83\xf7\xef\x9f\x99\xc8\xf3\x02\x1a\x10\x08\x02\x10\x02\x1a\x03') 165 add_f.write(b'\n\x01\x020\x04:\x03\n\x01\x022\x0f') 166 add_f.write(b'Default/Add-op0') 167 add_f.write(tensor) 168 overflow_file = os.path.join(path, "Opdebug.Node_OpDebug." + str(task_id) + "." + str(stream_id) + 169 "." + str(int(round(time.time() * 1000000)))) 170 with open(overflow_file, 'wb') as f: 171 f.seek(321, 0) 172 byte_list = [] 173 for i in range(256): 174 if i == 16: 175 byte_list.append(stream_id) 176 elif i == 24: 177 if is_overflow: 178 byte_list.append(task_id) 179 else: 180 # wrong task_id, should not generate overflow watchpoint hit 181 byte_list.append(task_id + 1) 182 else: 183 byte_list.append(0) 184 new_byte_array = bytearray(byte_list) 185 f.write(bytes(new_byte_array)) 186 debugger_backend = d.DbgServices(dump_file_path=tmp_dir) 187 debugger_backend.initialize(net_name="Add", is_sync_mode=False) 188 debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=2, 189 check_node_list={"Default/Add-op0": 190 {"rank_id": [0], "root_graph_id": [0], "is_output": True 191 }}, parameter_list=[]) 192 193 watchpoint_hits_test = debugger_backend.check_watchpoints(iteration=0) 194 195 if is_overflow: 196 assert len(watchpoint_hits_test) == 1 197 if GENERATE_GOLDEN: 198 print_watchpoint_hits(watchpoint_hits_test, 0, True, test_name) 199 else: 200 compare_expect_actual_result(watchpoint_hits_test, 0, test_name) 201 else: 202 assert not watchpoint_hits_test 203 204 205@pytest.mark.level1 206@pytest.mark.platform_arm_ascend_training 207@pytest.mark.platform_x86_ascend_training 208@pytest.mark.env_onecard 209@security_off_wrap 210def test_async_overflow_watchpoints_hit(): 211 """ 212 Feature: Offline Debugger CheckWatchpoint 213 Description: Test check overflow watchpoint hit 214 Expectation: Overflow watchpoint is hit 215 """ 216 run_overflow_watchpoint(True) 217 218 219def compare_expect_actual_result(watchpoint_hits_list, test_index, test_name): 220 """Compare actual result with golden file.""" 221 pwd = os.getcwd() 222 golden_file = os.path.realpath(os.path.join(pwd, "golden", test_name + "_expected.json")) 223 with open(golden_file) as f: 224 expected_list = json.load(f) 225 for x, watchpoint_hits in enumerate(watchpoint_hits_list): 226 test_id = "watchpoint_hit" + str(test_index + x + 1) 227 expect_wp = expected_list[x + test_index][test_id] 228 actual_wp = write_watchpoint_to_json(watchpoint_hits) 229 assert actual_wp == expect_wp 230 231 232def print_watchpoint_hits(watchpoint_hits_list, test_index, is_print, test_name): 233 """Print watchpoint hits.""" 234 for x, watchpoint_hits in enumerate(watchpoint_hits_list): 235 watchpoint_hit = "watchpoint_hit" + str(test_index + x + 1) 236 wp = write_watchpoint_to_json(watchpoint_hits) 237 watchpoint_hits_json.append({watchpoint_hit: wp}) 238 if is_print: 239 with open(test_name + "_expected.json", "w") as dump_f: 240 json.dump(watchpoint_hits_json, dump_f, indent=4, separators=(',', ': ')) 241