• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2021 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""
16Watchpoints test script for offline debugger APIs.
17"""
18
19import os
20import json
21import time
22import tempfile
23import numpy as np
24import pytest
25import mindspore.offline_debug.dbg_services as d
26from tests.security_utils import security_off_wrap
27from dump_test_utils import build_dump_structure, write_watchpoint_to_json
28
29GENERATE_GOLDEN = False
30watchpoint_hits_json = []
31
32
33def run_watchpoints(is_sync):
34    if is_sync:
35        test_name = "sync_watchpoints"
36    else:
37        test_name = "async_watchpoints"
38
39    name1 = "Conv2D.Conv2D-op369.0.0.1"
40    tensor1 = np.array([[[-1.2808e-03, 7.7629e-03, 1.9241e-02],
41                         [-1.3931e-02, 8.9359e-04, -1.1520e-02],
42                         [-6.3248e-03, 1.8749e-03, 1.0132e-02]],
43                        [[-2.5520e-03, -6.0005e-03, -5.1918e-03],
44                         [-2.7866e-03, 2.5487e-04, 8.4782e-04],
45                         [-4.6310e-03, -8.9111e-03, -8.1778e-05]],
46                        [[1.3914e-03, 6.0844e-04, 1.0643e-03],
47                         [-2.0966e-02, -1.2865e-03, -1.8692e-03],
48                         [-1.6647e-02, 1.0233e-03, -4.1313e-03]]], np.float32)
49    info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv1-Conv2d/Conv2D-op369",
50                         slot=1, iteration=2, rank_id=0, root_graph_id=0, is_output=False)
51
52    name2 = "Parameter.fc2.bias.0.0.2"
53    tensor2 = np.array([-5.0167350e-06, 1.2509107e-05, -4.3148934e-06, 8.1415592e-06,
54                        2.1177532e-07, 2.9952851e-06], np.float32)
55    info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
56                                   "Parameter[6]_11/fc2.bias",
57                         slot=0, iteration=2, rank_id=0, root_graph_id=0, is_output=True)
58
59    tensor3 = np.array([2.9060817e-07, -5.1009415e-06, -2.8662325e-06, 2.6036503e-06,
60                        -5.1546101e-07, 6.0798648e-06], np.float32)
61    info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
62                                   "Parameter[6]_11/fc2.bias",
63                         slot=0, iteration=3, rank_id=0, root_graph_id=0, is_output=True)
64
65    tensor_info = [info1, info2, info3]
66    tensor_name = [name1, name2, name2]
67    tensor_list = [tensor1, tensor2, tensor3]
68
69    pwd = os.getcwd()
70    with tempfile.TemporaryDirectory(dir=pwd) as tmp_dir:
71        temp_dir = build_dump_structure(tmp_dir, tensor_name, tensor_list, "Test", tensor_info)
72
73        debugger_backend = d.DbgServices(dump_file_path=temp_dir)
74        debugger_backend.initialize(net_name="Test", is_sync_mode=is_sync)
75
76        # NOTES:
77        # -> watch_condition=6 is MIN_LT
78        # -> watch_condition=18 is CHANGE_TOO_LARGE
79
80        # test 1: watchpoint set and hit (watch_condition=6)
81        param1 = d.Parameter(name="param", disabled=False, value=0.0)
82        debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
83                                        check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/"
84                                                         "conv1-Conv2d/Conv2D-op369":
85                                                             {"rank_id": [0], "root_graph_id": [0], "is_output": False
86                                                              }}, parameter_list=[param1])
87
88        watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
89        assert len(watchpoint_hits_test_1) == 1
90        if GENERATE_GOLDEN:
91            print_watchpoint_hits(watchpoint_hits_test_1, 0, False, test_name)
92        else:
93            compare_expect_actual_result(watchpoint_hits_test_1, 0, test_name)
94
95        # test 2: watchpoint remove and ensure it's not hit
96        debugger_backend.remove_watchpoint(watchpoint_id=1)
97        watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
98        assert not watchpoint_hits_test_2
99
100        # test 3: watchpoint set and not hit, then remove
101        param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
102        debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
103                                        check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/"
104                                                         "conv1-Conv2d/Conv2D-op369":
105                                                             {"rank_id": [0], "root_graph_id": [0], "is_output": False
106                                                              }}, parameter_list=[param2])
107
108        watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
109        assert not watchpoint_hits_test_3
110        _ = debugger_backend.remove_watchpoint(watchpoint_id=2)
111
112        # test 4: weight change watchpoint set and hit
113        param_abs_mean_update_ratio_gt = d.Parameter(
114            name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
115        param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
116        debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
117                                        check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
118                                                         "Parameter[6]_11/fc2.bias":
119                                                             {"rank_id": [0], "root_graph_id": [0], "is_output": True
120                                                              }}, parameter_list=[param_abs_mean_update_ratio_gt,
121                                                                                  param_epsilon])
122
123        watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3)
124        assert len(watchpoint_hits_test_4) == 1
125
126        if GENERATE_GOLDEN:
127            print_watchpoint_hits(watchpoint_hits_test_4, 1, True, test_name)
128        else:
129            compare_expect_actual_result(watchpoint_hits_test_4, 1, test_name)
130
131
132@pytest.mark.level1
133@pytest.mark.platform_arm_ascend_training
134@pytest.mark.platform_x86_ascend_training
135@pytest.mark.env_onecard
136@security_off_wrap
137def test_sync_watchpoints():
138    run_watchpoints(True)
139
140
141@pytest.mark.level1
142@pytest.mark.platform_arm_ascend_training
143@pytest.mark.platform_x86_ascend_training
144@pytest.mark.env_onecard
145@security_off_wrap
146def test_async_watchpoints():
147    run_watchpoints(False)
148
149
150def run_overflow_watchpoint(is_overflow):
151    test_name = "overflow_watchpoint"
152    tensor = np.array([65504, 65504], np.float16)
153    task_id = 2
154    stream_id = 7
155    pwd = os.getcwd()
156    with tempfile.TemporaryDirectory(dir=pwd) as tmp_dir:
157        path = os.path.join(tmp_dir, "rank_0", "Add", "0", "0")
158        os.makedirs(path, exist_ok=True)
159        add_file = os.path.join(path, "Add.Default_Add-op0." + str(task_id) + "." + str(stream_id) + "."
160                                + str(int(round(time.time() * 1000000))))
161        with open(add_file, 'wb') as add_f:
162            add_f.write(b'1')
163            add_f.seek(8)
164            add_f.write(b'\n\x032.0\x10\x83\xf7\xef\x9f\x99\xc8\xf3\x02\x1a\x10\x08\x02\x10\x02\x1a\x03')
165            add_f.write(b'\n\x01\x020\x04:\x03\n\x01\x022\x0f')
166            add_f.write(b'Default/Add-op0')
167            add_f.write(tensor)
168        overflow_file = os.path.join(path, "Opdebug.Node_OpDebug." + str(task_id) + "." + str(stream_id) +
169                                     "." + str(int(round(time.time() * 1000000))))
170        with open(overflow_file, 'wb') as f:
171            f.seek(321, 0)
172            byte_list = []
173            for i in range(256):
174                if i == 16:
175                    byte_list.append(stream_id)
176                elif i == 24:
177                    if is_overflow:
178                        byte_list.append(task_id)
179                    else:
180                        # wrong task_id, should not generate overflow watchpoint hit
181                        byte_list.append(task_id + 1)
182                else:
183                    byte_list.append(0)
184            new_byte_array = bytearray(byte_list)
185            f.write(bytes(new_byte_array))
186        debugger_backend = d.DbgServices(dump_file_path=tmp_dir)
187        debugger_backend.initialize(net_name="Add", is_sync_mode=False)
188        debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=2,
189                                        check_node_list={"Default/Add-op0":
190                                                             {"rank_id": [0], "root_graph_id": [0], "is_output": True
191                                                              }}, parameter_list=[])
192
193        watchpoint_hits_test = debugger_backend.check_watchpoints(iteration=0)
194
195        if is_overflow:
196            assert len(watchpoint_hits_test) == 1
197            if GENERATE_GOLDEN:
198                print_watchpoint_hits(watchpoint_hits_test, 0, True, test_name)
199            else:
200                compare_expect_actual_result(watchpoint_hits_test, 0, test_name)
201        else:
202            assert not watchpoint_hits_test
203
204
205@pytest.mark.level1
206@pytest.mark.platform_arm_ascend_training
207@pytest.mark.platform_x86_ascend_training
208@pytest.mark.env_onecard
209@security_off_wrap
210def test_async_overflow_watchpoints_hit():
211    """
212    Feature: Offline Debugger CheckWatchpoint
213    Description: Test check overflow watchpoint hit
214    Expectation: Overflow watchpoint is hit
215    """
216    run_overflow_watchpoint(True)
217
218
219def compare_expect_actual_result(watchpoint_hits_list, test_index, test_name):
220    """Compare actual result with golden file."""
221    pwd = os.getcwd()
222    golden_file = os.path.realpath(os.path.join(pwd, "golden", test_name + "_expected.json"))
223    with open(golden_file) as f:
224        expected_list = json.load(f)
225        for x, watchpoint_hits in enumerate(watchpoint_hits_list):
226            test_id = "watchpoint_hit" + str(test_index + x + 1)
227            expect_wp = expected_list[x + test_index][test_id]
228            actual_wp = write_watchpoint_to_json(watchpoint_hits)
229            assert actual_wp == expect_wp
230
231
232def print_watchpoint_hits(watchpoint_hits_list, test_index, is_print, test_name):
233    """Print watchpoint hits."""
234    for x, watchpoint_hits in enumerate(watchpoint_hits_list):
235        watchpoint_hit = "watchpoint_hit" + str(test_index + x + 1)
236        wp = write_watchpoint_to_json(watchpoint_hits)
237        watchpoint_hits_json.append({watchpoint_hit: wp})
238    if is_print:
239        with open(test_name + "_expected.json", "w") as dump_f:
240            json.dump(watchpoint_hits_json, dump_f, indent=4, separators=(',', ': '))
241