• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020-2023 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15import os
16import shutil
17import tempfile
18from collections import defaultdict
19import json
20import sys
21import csv
22
23from tests.security_utils import security_off_wrap
24import pytest
25
26import mindspore as ms
27from mindspore import dataset as ds
28from mindspore import nn, Tensor, context
29from mindspore.nn.optim import Momentum
30from mindspore.dataset.transforms import transforms as C
31from mindspore.dataset.vision import transforms as CV
32from mindspore.dataset.vision import Inter
33from mindspore.common import dtype as mstype
34from mindspore.common.initializer import TruncatedNormal
35from mindspore.train import Model, Accuracy
36from mindspore import Profiler
37
38
39mnist_path = '/home/workspace/mindspore_dataset/mnist'
40
41
42def conv(in_channels, out_channels, kernel_size, stride=1, padding=0):
43    """weight initial for conv layer"""
44    weight = weight_variable()
45    return nn.Conv2d(in_channels, out_channels,
46                     kernel_size=kernel_size, stride=stride, padding=padding,
47                     weight_init=weight, has_bias=False, pad_mode="valid")
48
49
50def fc_with_initialize(input_channels, out_channels):
51    """weight initial for fc layer"""
52    weight = weight_variable()
53    bias = weight_variable()
54    return nn.Dense(input_channels, out_channels, weight, bias)
55
56
57def weight_variable():
58    """weight initial"""
59    return TruncatedNormal(0.02)
60
61
62class LeNet5(nn.Cell):
63    """Define LeNet5 network."""
64
65    def __init__(self, num_class=10, channel=1):
66        super(LeNet5, self).__init__()
67        self.num_class = num_class
68        self.conv1 = conv(channel, 6, 5)
69        self.conv1.conv2d.add_prim_attr("primitive_target", "CPU")
70        self.conv2 = conv(6, 16, 5)
71        self.fc1 = fc_with_initialize(16 * 5 * 5, 120)
72        self.fc2 = fc_with_initialize(120, 84)
73        self.fc3 = fc_with_initialize(84, self.num_class)
74        self.relu = nn.ReLU()
75        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
76        self.flatten = nn.Flatten()
77        self.channel = Tensor(channel)
78
79    def construct(self, data):
80        """define construct."""
81        output = self.conv1(data)
82        output = self.relu(output)
83        output = self.max_pool2d(output)
84        output = self.conv2(output)
85        output = self.relu(output)
86        output = self.max_pool2d(output)
87        output = self.flatten(output)
88        output = self.fc1(output)
89        output = self.relu(output)
90        output = self.fc2(output)
91        output = self.relu(output)
92        output = self.fc3(output)
93        return output
94
95
96def create_dataset(data_path, batch_size=32, repeat_size=1, num_parallel_workers=1):
97    """create dataset for train"""
98    # define dataset
99    mnist_ds = ds.MnistDataset(data_path, num_samples=batch_size * 10)
100
101    resize_height, resize_width = 32, 32
102    rescale = 1.0 / 255.0
103    rescale_nml = 1 / 0.3081
104    shift_nml = -1 * 0.1307 / 0.3081
105
106    # define map operations
107    resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)  # Bilinear mode
108    rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
109    rescale_op = CV.Rescale(rescale, shift=0.0)
110    hwc2chw_op = CV.HWC2CHW()
111    type_cast_op = C.TypeCast(mstype.int32)
112
113    # apply map operations on images
114    mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
115    mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
116    mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
117    mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
118    mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
119
120    # apply DatasetOps
121    mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
122    mnist_ds = mnist_ds.repeat(repeat_size)
123
124    return mnist_ds
125
126
127@pytest.mark.level3
128@pytest.mark.platform_x86_cpu
129@pytest.mark.env_onecard
130@security_off_wrap
131def test_cpu_profiler():
132    """
133    Feature: profiler support cpu mode.
134    Description: profiling op time and timeline.
135    Expectation: No exception.
136    """
137    if sys.platform != 'linux':
138        return
139    device_id = 0
140    data_path = tempfile.mkdtemp(prefix='profiler_data', dir='/tmp')
141    profiler_path = os.path.join(data_path, 'profiler/')
142    try:
143        _train_with_profiler(data_path=data_path, device_target="CPU", profile_memory=False)
144        _check_cpu_profiling_file(profiler_path, device_id)
145    finally:
146        if os.path.exists(data_path):
147            shutil.rmtree(data_path)
148
149
150@pytest.mark.level1
151@pytest.mark.platform_x86_gpu_training
152@pytest.mark.env_onecard
153@security_off_wrap
154def test_gpu_profiler():
155    """
156    Feature: profiler support GPU  mode.
157    Description: profiling op time and timeline.
158    Expectation: No exception.
159    """
160    device_id = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0
161    rank_id = int(os.getenv('RANK_ID')) if os.getenv('RANK_ID') else 0
162    data_path = tempfile.mkdtemp(prefix='profiler_data', dir='/tmp')
163    profiler_path = os.path.join(data_path, 'profiler/')
164    try:
165        _train_with_profiler(data_path=data_path, device_target="GPU", profile_memory=False,
166                             context_mode=context.GRAPH_MODE)
167        _check_gpu_profiling_file(profiler_path, device_id)
168        _check_host_profiling_file(profiler_path, rank_id)
169    finally:
170        if os.path.exists(data_path):
171            shutil.rmtree(data_path)
172
173
174@pytest.mark.level1
175@pytest.mark.platform_x86_gpu_training
176@pytest.mark.env_onecard
177@security_off_wrap
178def test_gpu_profiler_pynative():
179    """
180    Feature: profiler support GPU pynative mode.
181    Description: profiling l2 GPU pynative mode data, analyze performance issues.
182    Expectation: No exception.
183    """
184    device_id = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0
185    rank_id = int(os.getenv('RANK_ID')) if os.getenv('RANK_ID') else 0
186    data_path = tempfile.mkdtemp(prefix='profiler_data', dir='/tmp')
187    profiler_path = os.path.join(data_path, 'profiler/')
188    try:
189        _train_with_profiler(data_path=data_path, device_target="GPU", profile_memory=False,
190                             context_mode=context.PYNATIVE_MODE)
191        _check_gpu_profiling_file(profiler_path, device_id)
192        _check_host_profiling_file(profiler_path, rank_id)
193    finally:
194        if os.path.exists(data_path):
195            shutil.rmtree(data_path)
196
197
198@pytest.mark.level0
199@pytest.mark.platform_arm_ascend_training
200@pytest.mark.platform_x86_ascend_training
201@pytest.mark.env_onecard
202@security_off_wrap
203def test_ascend_profiler():
204    """
205    Feature: profiler support ascend mode.
206    Description: profiling op time, timeline, step trace and host data.
207    Expectation: No exception.
208    """
209    ms.set_context(jit_level="O2")
210    rank_id = int(os.getenv('RANK_ID')) if os.getenv('RANK_ID') else 0
211    data_path = tempfile.mkdtemp(prefix='profiler_data', dir='/tmp')
212    profiler_path = os.path.join(data_path, 'profiler/')
213    try:
214        _train_with_profiler(data_path=data_path, device_target="Ascend", profile_memory=True)
215        _check_d_profiling_file(profiler_path, rank_id)
216        _check_d_profiling_step_trace_on_multisubgraph(profiler_path, rank_id)
217        _check_host_profiling_file(profiler_path, rank_id)
218    finally:
219        if os.path.exists(data_path):
220            shutil.rmtree(data_path)
221
222
223@pytest.mark.level1
224@pytest.mark.platform_arm_ascend_training
225@pytest.mark.platform_x86_ascend_training
226@pytest.mark.env_onecard
227@security_off_wrap
228@pytest.mark.parametrize("profile_framework", ['all', 'time', 'memory', None])
229def test_host_profiler(profile_framework):
230    """
231    Feature: profiling support ascend kbyk mode.
232    Description: profiling kbyk host data.
233    Expectation: No exception.
234    """
235    rank_id = int(os.getenv('RANK_ID')) if os.getenv('RANK_ID') else 0
236    data_path = tempfile.mkdtemp(prefix='profiler_data', dir='/tmp')
237    profiler_path = os.path.join(data_path, 'profiler/')
238    try:
239        _train_with_profiler(data_path=data_path, device_target="Ascend", profile_memory=False, only_profile_host=True,
240                             profile_framework=profile_framework)
241        _check_host_profiling_file(profiler_path, rank_id, profile_framework=profile_framework)
242    finally:
243        if os.path.exists(data_path):
244            shutil.rmtree(data_path)
245
246
247@pytest.mark.level0
248@pytest.mark.platform_arm_ascend_training
249@pytest.mark.platform_x86_ascend_training
250@pytest.mark.env_onecard
251@security_off_wrap
252def test_ascend_kbyk_profiler():
253    """
254    Feature: profiling ascend kbyk host data.
255    Description: profiling ascend and host data.
256    Expectation: No exception.
257    """
258    rank_id = int(os.getenv('RANK_ID')) if os.getenv('RANK_ID') else 0
259    data_path = tempfile.mkdtemp(prefix='profiler_data', dir='/tmp')
260    profiler_path = os.path.join(data_path, 'profiler/')
261    try:
262        _train_with_profiler(data_path=data_path, device_target="Ascend", profile_memory=False, host_stack=True)
263        _check_d_profiling_file(profiler_path, rank_id)
264        _check_host_profiling_file(profiler_path, rank_id)
265        _check_kbyk_profiling_file(profiler_path, rank_id)
266    finally:
267        if os.path.exists(data_path):
268            shutil.rmtree(data_path)
269
270
271def _check_kbyk_profiling_file(profiler_path, rank_id):
272    op_range_file = os.path.join(profiler_path, "FRAMEWORK/op_range_" + str(rank_id))
273    assert os.path.isfile(op_range_file)
274
275
276def _train_with_profiler(device_target, profile_memory, data_path, context_mode=context.GRAPH_MODE,
277                         only_profile_host=False, profile_framework='all', host_stack=True):
278    context.set_context(mode=context_mode, device_target=device_target)
279    ds_train = create_dataset(os.path.join(mnist_path, "train"))
280    if ds_train.get_dataset_size() == 0:
281        raise ValueError("Please check dataset size > 0 and batch_size <= dataset size")
282    if only_profile_host:
283        profiler = Profiler(output_path=data_path, op_time=False,
284                            parallel_strategy=False, aicore_metrics=-1, data_process=False,
285                            profile_framework=profile_framework, host_stack=host_stack, data_simplification=False)
286    else:
287        profiler = Profiler(profile_memory=profile_memory, output_path=data_path,
288                            profile_framework=profile_framework, host_stack=host_stack, data_simplification=False)
289    lenet = LeNet5()
290    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
291    optim = Momentum(lenet.trainable_params(), learning_rate=0.1, momentum=0.9)
292    model = Model(lenet, loss_fn=loss, optimizer=optim, metrics={'acc': Accuracy()})
293
294    model.train(1, ds_train, dataset_sink_mode=True)
295    profiler.analyse()
296    if device_target != 'Ascend':
297        profiler.op_analyse(op_name="Conv2D")
298
299
300def _check_gpu_profiling_file(profiler_path, device_id):
301    op_detail_file = profiler_path + f'gpu_op_detail_info_{device_id}.csv'
302    op_type_file = profiler_path + f'gpu_op_type_info_{device_id}.csv'
303    activity_file = profiler_path + f'gpu_activity_data_{device_id}.csv'
304    timeline_file = profiler_path + f'gpu_timeline_display_{device_id}.json'
305    getnext_file = profiler_path + f'minddata_getnext_profiling_{device_id}.txt'
306    pipeline_file = profiler_path + f'minddata_pipeline_raw_{device_id}.csv'
307    framework_file = profiler_path + f'gpu_framework_{device_id}.txt'
308
309    gpu_profiler_files = (op_detail_file, op_type_file, activity_file,
310                          timeline_file, getnext_file, pipeline_file, framework_file)
311    for file in gpu_profiler_files:
312        assert os.path.isfile(file)
313
314
315def _check_d_profiling_step_trace_on_multisubgraph(profiler_path, rank_id):
316    step_trace_file = profiler_path + f'step_trace_raw_{rank_id}_detail_time.csv'
317    assert os.path.isfile(step_trace_file)
318    with open(step_trace_file, 'r') as fr:
319        reader = csv.DictReader(fr)
320        row_count = sum(1 for _ in reader)
321    assert row_count == 11
322
323
324def _check_d_profiling_file(profiler_path, rank_id):
325    aicore_file = profiler_path + f'aicore_intermediate_{rank_id}_detail.csv'
326    timeline_file = profiler_path + f'ascend_timeline_display_{rank_id}.json'
327    aicpu_file = profiler_path + f'aicpu_intermediate_{rank_id}.csv'
328    minddata_pipeline_file = profiler_path + f'minddata_pipeline_raw_{rank_id}.csv'
329    queue_profiling_file = profiler_path + f'device_queue_profiling_{rank_id}.txt'
330
331    d_profiler_files = (aicore_file, timeline_file, aicpu_file,
332                        minddata_pipeline_file, queue_profiling_file)
333    for file in d_profiler_files:
334        assert os.path.isfile(file)
335
336
337def _check_cpu_profiling_file(profiler_path, device_id):
338    op_detail_file = profiler_path + f'cpu_op_detail_info_{device_id}.csv'
339    op_type_file = profiler_path + f'cpu_op_type_info_{device_id}.csv'
340    timeline_file = profiler_path + f'cpu_op_execute_timestamp_{device_id}.txt'
341
342    cpu_profiler_files = (op_detail_file, op_type_file, timeline_file)
343    for file in cpu_profiler_files:
344        assert os.path.isfile(file)
345
346
347def _check_host_profiling_file(profiler_path, rank_id, profile_framework='all'):
348    host_dir = os.path.join(profiler_path, 'host_info')
349    if profile_framework is None:
350        assert not os.path.exists(host_dir)
351        return
352    if profile_framework in ['all', 'time']:
353        timeline_file = os.path.join(host_dir, f'timeline_{rank_id}.json')
354        assert os.path.isfile(timeline_file)
355    csv_file = os.path.join(host_dir, f'host_info_{rank_id}.csv')
356    assert os.path.exists(csv_file)
357    with open(csv_file, 'r') as f:
358        f_reader = csv.reader(f)
359        header = next(f_reader)
360        assert header == ['tid', 'pid', 'parent_pid', 'module_name', 'event', 'stage', 'level', 'start_end',
361                          'custom_info', 'memory_usage(kB)', 'time_stamp(us)']
362        for row in f_reader:
363            assert len(row) == 11
364
365
366@pytest.mark.level1
367@pytest.mark.platform_arm_ascend_training
368@pytest.mark.platform_x86_ascend_training
369@pytest.mark.env_onecard
370@security_off_wrap
371def test_ascend_pynative_profiler():
372    """
373    Feature: profiling ascend pynative host data.
374    Description: profiling pynative host data.
375    Expectation: No exception.
376    """
377    rank_id = int(os.getenv('RANK_ID')) if os.getenv('RANK_ID') else 0
378    data_path = tempfile.mkdtemp(prefix='profiler_data', dir='/tmp')
379    profiler_path = os.path.join(data_path, 'profiler/')
380    try:
381        _train_with_profiler(data_path=data_path, device_target='Ascend', profile_memory=False,
382                             context_mode=context.PYNATIVE_MODE, host_stack=True)
383        _check_pynative_timeline_host_data(profiler_path, rank_id)
384    finally:
385        if os.path.exists(data_path):
386            shutil.rmtree(data_path)
387
388
389def _check_pynative_timeline_host_data(profiler_path, rank_id):
390    timeline_display_file = os.path.join(profiler_path, f'ascend_timeline_display_{rank_id}.json')
391    assert os.path.isfile(timeline_display_file)
392    with open(timeline_display_file, 'r') as fr:
393        data = json.load(fr)
394    async_ms_dict, async_npu_dict, host_to_device_dict = defaultdict(int), defaultdict(int), defaultdict(int)
395    RunOp_set, FrontendTask_set, DeviceTask_set, LaunchTask_set, KernelLaunch_set \
396        = set(), set(), set(), set(), set()
397
398    for d in data:
399        ph = d.get('ph')
400        cat = d.get('cat')
401        name = d.get('name')
402        if ph in ('s', 'f'):
403            if cat == 'async_mindspore':
404                async_ms_dict[d.get('id')] += 1
405            elif cat == 'async_npu':
406                async_npu_dict[d.get('id')] += 1
407            elif cat == 'HostToDevice':
408                host_to_device_dict[d.get('id')] += 1
409        elif ph == 'X':
410            if 'RunOp' in name:
411                assert d.get('args', {}).get('Call stack')
412                RunOp_set.add(name)
413            elif 'FrontendTask' in name:
414                FrontendTask_set.add(name)
415            elif 'DeviceTask' in name:
416                DeviceTask_set.add(name)
417            elif 'LaunchTask' in name:
418                LaunchTask_set.add(name)
419            elif 'KernelLaunch' in name:
420                KernelLaunch_set.add(name)
421
422    assert RunOp_set
423    assert FrontendTask_set
424    assert DeviceTask_set
425    assert LaunchTask_set
426    assert KernelLaunch_set
427    for v in async_ms_dict.values():
428        assert v == 2
429    for v in async_npu_dict.values():
430        assert v == 2
431    for v in host_to_device_dict.values():
432        assert v == 2
433