1# Copyright 2020-2023 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15import os 16import shutil 17import tempfile 18from collections import defaultdict 19import json 20import sys 21import csv 22 23from tests.security_utils import security_off_wrap 24import pytest 25 26import mindspore as ms 27from mindspore import dataset as ds 28from mindspore import nn, Tensor, context 29from mindspore.nn.optim import Momentum 30from mindspore.dataset.transforms import transforms as C 31from mindspore.dataset.vision import transforms as CV 32from mindspore.dataset.vision import Inter 33from mindspore.common import dtype as mstype 34from mindspore.common.initializer import TruncatedNormal 35from mindspore.train import Model, Accuracy 36from mindspore import Profiler 37 38 39mnist_path = '/home/workspace/mindspore_dataset/mnist' 40 41 42def conv(in_channels, out_channels, kernel_size, stride=1, padding=0): 43 """weight initial for conv layer""" 44 weight = weight_variable() 45 return nn.Conv2d(in_channels, out_channels, 46 kernel_size=kernel_size, stride=stride, padding=padding, 47 weight_init=weight, has_bias=False, pad_mode="valid") 48 49 50def fc_with_initialize(input_channels, out_channels): 51 """weight initial for fc layer""" 52 weight = weight_variable() 53 bias = weight_variable() 54 return nn.Dense(input_channels, out_channels, weight, bias) 55 56 57def weight_variable(): 58 """weight initial""" 59 return TruncatedNormal(0.02) 60 61 62class LeNet5(nn.Cell): 63 """Define LeNet5 network.""" 64 65 def __init__(self, num_class=10, channel=1): 66 super(LeNet5, self).__init__() 67 self.num_class = num_class 68 self.conv1 = conv(channel, 6, 5) 69 self.conv1.conv2d.add_prim_attr("primitive_target", "CPU") 70 self.conv2 = conv(6, 16, 5) 71 self.fc1 = fc_with_initialize(16 * 5 * 5, 120) 72 self.fc2 = fc_with_initialize(120, 84) 73 self.fc3 = fc_with_initialize(84, self.num_class) 74 self.relu = nn.ReLU() 75 self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) 76 self.flatten = nn.Flatten() 77 self.channel = Tensor(channel) 78 79 def construct(self, data): 80 """define construct.""" 81 output = self.conv1(data) 82 output = self.relu(output) 83 output = self.max_pool2d(output) 84 output = self.conv2(output) 85 output = self.relu(output) 86 output = self.max_pool2d(output) 87 output = self.flatten(output) 88 output = self.fc1(output) 89 output = self.relu(output) 90 output = self.fc2(output) 91 output = self.relu(output) 92 output = self.fc3(output) 93 return output 94 95 96def create_dataset(data_path, batch_size=32, repeat_size=1, num_parallel_workers=1): 97 """create dataset for train""" 98 # define dataset 99 mnist_ds = ds.MnistDataset(data_path, num_samples=batch_size * 10) 100 101 resize_height, resize_width = 32, 32 102 rescale = 1.0 / 255.0 103 rescale_nml = 1 / 0.3081 104 shift_nml = -1 * 0.1307 / 0.3081 105 106 # define map operations 107 resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode 108 rescale_nml_op = CV.Rescale(rescale_nml, shift_nml) 109 rescale_op = CV.Rescale(rescale, shift=0.0) 110 hwc2chw_op = CV.HWC2CHW() 111 type_cast_op = C.TypeCast(mstype.int32) 112 113 # apply map operations on images 114 mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) 115 mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) 116 mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) 117 mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) 118 mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) 119 120 # apply DatasetOps 121 mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True) 122 mnist_ds = mnist_ds.repeat(repeat_size) 123 124 return mnist_ds 125 126 127@pytest.mark.level3 128@pytest.mark.platform_x86_cpu 129@pytest.mark.env_onecard 130@security_off_wrap 131def test_cpu_profiler(): 132 """ 133 Feature: profiler support cpu mode. 134 Description: profiling op time and timeline. 135 Expectation: No exception. 136 """ 137 if sys.platform != 'linux': 138 return 139 device_id = 0 140 data_path = tempfile.mkdtemp(prefix='profiler_data', dir='/tmp') 141 profiler_path = os.path.join(data_path, 'profiler/') 142 try: 143 _train_with_profiler(data_path=data_path, device_target="CPU", profile_memory=False) 144 _check_cpu_profiling_file(profiler_path, device_id) 145 finally: 146 if os.path.exists(data_path): 147 shutil.rmtree(data_path) 148 149 150@pytest.mark.level1 151@pytest.mark.platform_x86_gpu_training 152@pytest.mark.env_onecard 153@security_off_wrap 154def test_gpu_profiler(): 155 """ 156 Feature: profiler support GPU mode. 157 Description: profiling op time and timeline. 158 Expectation: No exception. 159 """ 160 device_id = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0 161 rank_id = int(os.getenv('RANK_ID')) if os.getenv('RANK_ID') else 0 162 data_path = tempfile.mkdtemp(prefix='profiler_data', dir='/tmp') 163 profiler_path = os.path.join(data_path, 'profiler/') 164 try: 165 _train_with_profiler(data_path=data_path, device_target="GPU", profile_memory=False, 166 context_mode=context.GRAPH_MODE) 167 _check_gpu_profiling_file(profiler_path, device_id) 168 _check_host_profiling_file(profiler_path, rank_id) 169 finally: 170 if os.path.exists(data_path): 171 shutil.rmtree(data_path) 172 173 174@pytest.mark.level1 175@pytest.mark.platform_x86_gpu_training 176@pytest.mark.env_onecard 177@security_off_wrap 178def test_gpu_profiler_pynative(): 179 """ 180 Feature: profiler support GPU pynative mode. 181 Description: profiling l2 GPU pynative mode data, analyze performance issues. 182 Expectation: No exception. 183 """ 184 device_id = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0 185 rank_id = int(os.getenv('RANK_ID')) if os.getenv('RANK_ID') else 0 186 data_path = tempfile.mkdtemp(prefix='profiler_data', dir='/tmp') 187 profiler_path = os.path.join(data_path, 'profiler/') 188 try: 189 _train_with_profiler(data_path=data_path, device_target="GPU", profile_memory=False, 190 context_mode=context.PYNATIVE_MODE) 191 _check_gpu_profiling_file(profiler_path, device_id) 192 _check_host_profiling_file(profiler_path, rank_id) 193 finally: 194 if os.path.exists(data_path): 195 shutil.rmtree(data_path) 196 197 198@pytest.mark.level0 199@pytest.mark.platform_arm_ascend_training 200@pytest.mark.platform_x86_ascend_training 201@pytest.mark.env_onecard 202@security_off_wrap 203def test_ascend_profiler(): 204 """ 205 Feature: profiler support ascend mode. 206 Description: profiling op time, timeline, step trace and host data. 207 Expectation: No exception. 208 """ 209 ms.set_context(jit_level="O2") 210 rank_id = int(os.getenv('RANK_ID')) if os.getenv('RANK_ID') else 0 211 data_path = tempfile.mkdtemp(prefix='profiler_data', dir='/tmp') 212 profiler_path = os.path.join(data_path, 'profiler/') 213 try: 214 _train_with_profiler(data_path=data_path, device_target="Ascend", profile_memory=True) 215 _check_d_profiling_file(profiler_path, rank_id) 216 _check_d_profiling_step_trace_on_multisubgraph(profiler_path, rank_id) 217 _check_host_profiling_file(profiler_path, rank_id) 218 finally: 219 if os.path.exists(data_path): 220 shutil.rmtree(data_path) 221 222 223@pytest.mark.level1 224@pytest.mark.platform_arm_ascend_training 225@pytest.mark.platform_x86_ascend_training 226@pytest.mark.env_onecard 227@security_off_wrap 228@pytest.mark.parametrize("profile_framework", ['all', 'time', 'memory', None]) 229def test_host_profiler(profile_framework): 230 """ 231 Feature: profiling support ascend kbyk mode. 232 Description: profiling kbyk host data. 233 Expectation: No exception. 234 """ 235 rank_id = int(os.getenv('RANK_ID')) if os.getenv('RANK_ID') else 0 236 data_path = tempfile.mkdtemp(prefix='profiler_data', dir='/tmp') 237 profiler_path = os.path.join(data_path, 'profiler/') 238 try: 239 _train_with_profiler(data_path=data_path, device_target="Ascend", profile_memory=False, only_profile_host=True, 240 profile_framework=profile_framework) 241 _check_host_profiling_file(profiler_path, rank_id, profile_framework=profile_framework) 242 finally: 243 if os.path.exists(data_path): 244 shutil.rmtree(data_path) 245 246 247@pytest.mark.level0 248@pytest.mark.platform_arm_ascend_training 249@pytest.mark.platform_x86_ascend_training 250@pytest.mark.env_onecard 251@security_off_wrap 252def test_ascend_kbyk_profiler(): 253 """ 254 Feature: profiling ascend kbyk host data. 255 Description: profiling ascend and host data. 256 Expectation: No exception. 257 """ 258 rank_id = int(os.getenv('RANK_ID')) if os.getenv('RANK_ID') else 0 259 data_path = tempfile.mkdtemp(prefix='profiler_data', dir='/tmp') 260 profiler_path = os.path.join(data_path, 'profiler/') 261 try: 262 _train_with_profiler(data_path=data_path, device_target="Ascend", profile_memory=False, host_stack=True) 263 _check_d_profiling_file(profiler_path, rank_id) 264 _check_host_profiling_file(profiler_path, rank_id) 265 _check_kbyk_profiling_file(profiler_path, rank_id) 266 finally: 267 if os.path.exists(data_path): 268 shutil.rmtree(data_path) 269 270 271def _check_kbyk_profiling_file(profiler_path, rank_id): 272 op_range_file = os.path.join(profiler_path, "FRAMEWORK/op_range_" + str(rank_id)) 273 assert os.path.isfile(op_range_file) 274 275 276def _train_with_profiler(device_target, profile_memory, data_path, context_mode=context.GRAPH_MODE, 277 only_profile_host=False, profile_framework='all', host_stack=True): 278 context.set_context(mode=context_mode, device_target=device_target) 279 ds_train = create_dataset(os.path.join(mnist_path, "train")) 280 if ds_train.get_dataset_size() == 0: 281 raise ValueError("Please check dataset size > 0 and batch_size <= dataset size") 282 if only_profile_host: 283 profiler = Profiler(output_path=data_path, op_time=False, 284 parallel_strategy=False, aicore_metrics=-1, data_process=False, 285 profile_framework=profile_framework, host_stack=host_stack, data_simplification=False) 286 else: 287 profiler = Profiler(profile_memory=profile_memory, output_path=data_path, 288 profile_framework=profile_framework, host_stack=host_stack, data_simplification=False) 289 lenet = LeNet5() 290 loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") 291 optim = Momentum(lenet.trainable_params(), learning_rate=0.1, momentum=0.9) 292 model = Model(lenet, loss_fn=loss, optimizer=optim, metrics={'acc': Accuracy()}) 293 294 model.train(1, ds_train, dataset_sink_mode=True) 295 profiler.analyse() 296 if device_target != 'Ascend': 297 profiler.op_analyse(op_name="Conv2D") 298 299 300def _check_gpu_profiling_file(profiler_path, device_id): 301 op_detail_file = profiler_path + f'gpu_op_detail_info_{device_id}.csv' 302 op_type_file = profiler_path + f'gpu_op_type_info_{device_id}.csv' 303 activity_file = profiler_path + f'gpu_activity_data_{device_id}.csv' 304 timeline_file = profiler_path + f'gpu_timeline_display_{device_id}.json' 305 getnext_file = profiler_path + f'minddata_getnext_profiling_{device_id}.txt' 306 pipeline_file = profiler_path + f'minddata_pipeline_raw_{device_id}.csv' 307 framework_file = profiler_path + f'gpu_framework_{device_id}.txt' 308 309 gpu_profiler_files = (op_detail_file, op_type_file, activity_file, 310 timeline_file, getnext_file, pipeline_file, framework_file) 311 for file in gpu_profiler_files: 312 assert os.path.isfile(file) 313 314 315def _check_d_profiling_step_trace_on_multisubgraph(profiler_path, rank_id): 316 step_trace_file = profiler_path + f'step_trace_raw_{rank_id}_detail_time.csv' 317 assert os.path.isfile(step_trace_file) 318 with open(step_trace_file, 'r') as fr: 319 reader = csv.DictReader(fr) 320 row_count = sum(1 for _ in reader) 321 assert row_count == 11 322 323 324def _check_d_profiling_file(profiler_path, rank_id): 325 aicore_file = profiler_path + f'aicore_intermediate_{rank_id}_detail.csv' 326 timeline_file = profiler_path + f'ascend_timeline_display_{rank_id}.json' 327 aicpu_file = profiler_path + f'aicpu_intermediate_{rank_id}.csv' 328 minddata_pipeline_file = profiler_path + f'minddata_pipeline_raw_{rank_id}.csv' 329 queue_profiling_file = profiler_path + f'device_queue_profiling_{rank_id}.txt' 330 331 d_profiler_files = (aicore_file, timeline_file, aicpu_file, 332 minddata_pipeline_file, queue_profiling_file) 333 for file in d_profiler_files: 334 assert os.path.isfile(file) 335 336 337def _check_cpu_profiling_file(profiler_path, device_id): 338 op_detail_file = profiler_path + f'cpu_op_detail_info_{device_id}.csv' 339 op_type_file = profiler_path + f'cpu_op_type_info_{device_id}.csv' 340 timeline_file = profiler_path + f'cpu_op_execute_timestamp_{device_id}.txt' 341 342 cpu_profiler_files = (op_detail_file, op_type_file, timeline_file) 343 for file in cpu_profiler_files: 344 assert os.path.isfile(file) 345 346 347def _check_host_profiling_file(profiler_path, rank_id, profile_framework='all'): 348 host_dir = os.path.join(profiler_path, 'host_info') 349 if profile_framework is None: 350 assert not os.path.exists(host_dir) 351 return 352 if profile_framework in ['all', 'time']: 353 timeline_file = os.path.join(host_dir, f'timeline_{rank_id}.json') 354 assert os.path.isfile(timeline_file) 355 csv_file = os.path.join(host_dir, f'host_info_{rank_id}.csv') 356 assert os.path.exists(csv_file) 357 with open(csv_file, 'r') as f: 358 f_reader = csv.reader(f) 359 header = next(f_reader) 360 assert header == ['tid', 'pid', 'parent_pid', 'module_name', 'event', 'stage', 'level', 'start_end', 361 'custom_info', 'memory_usage(kB)', 'time_stamp(us)'] 362 for row in f_reader: 363 assert len(row) == 11 364 365 366@pytest.mark.level1 367@pytest.mark.platform_arm_ascend_training 368@pytest.mark.platform_x86_ascend_training 369@pytest.mark.env_onecard 370@security_off_wrap 371def test_ascend_pynative_profiler(): 372 """ 373 Feature: profiling ascend pynative host data. 374 Description: profiling pynative host data. 375 Expectation: No exception. 376 """ 377 rank_id = int(os.getenv('RANK_ID')) if os.getenv('RANK_ID') else 0 378 data_path = tempfile.mkdtemp(prefix='profiler_data', dir='/tmp') 379 profiler_path = os.path.join(data_path, 'profiler/') 380 try: 381 _train_with_profiler(data_path=data_path, device_target='Ascend', profile_memory=False, 382 context_mode=context.PYNATIVE_MODE, host_stack=True) 383 _check_pynative_timeline_host_data(profiler_path, rank_id) 384 finally: 385 if os.path.exists(data_path): 386 shutil.rmtree(data_path) 387 388 389def _check_pynative_timeline_host_data(profiler_path, rank_id): 390 timeline_display_file = os.path.join(profiler_path, f'ascend_timeline_display_{rank_id}.json') 391 assert os.path.isfile(timeline_display_file) 392 with open(timeline_display_file, 'r') as fr: 393 data = json.load(fr) 394 async_ms_dict, async_npu_dict, host_to_device_dict = defaultdict(int), defaultdict(int), defaultdict(int) 395 RunOp_set, FrontendTask_set, DeviceTask_set, LaunchTask_set, KernelLaunch_set \ 396 = set(), set(), set(), set(), set() 397 398 for d in data: 399 ph = d.get('ph') 400 cat = d.get('cat') 401 name = d.get('name') 402 if ph in ('s', 'f'): 403 if cat == 'async_mindspore': 404 async_ms_dict[d.get('id')] += 1 405 elif cat == 'async_npu': 406 async_npu_dict[d.get('id')] += 1 407 elif cat == 'HostToDevice': 408 host_to_device_dict[d.get('id')] += 1 409 elif ph == 'X': 410 if 'RunOp' in name: 411 assert d.get('args', {}).get('Call stack') 412 RunOp_set.add(name) 413 elif 'FrontendTask' in name: 414 FrontendTask_set.add(name) 415 elif 'DeviceTask' in name: 416 DeviceTask_set.add(name) 417 elif 'LaunchTask' in name: 418 LaunchTask_set.add(name) 419 elif 'KernelLaunch' in name: 420 KernelLaunch_set.add(name) 421 422 assert RunOp_set 423 assert FrontendTask_set 424 assert DeviceTask_set 425 assert LaunchTask_set 426 assert KernelLaunch_set 427 for v in async_ms_dict.values(): 428 assert v == 2 429 for v in async_npu_dict.values(): 430 assert v == 2 431 for v in host_to_device_dict.values(): 432 assert v == 2 433