model_zoo_tests/centernet/test_centernet.py

# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
import pytest

from tests.st.model_zoo_tests import utils


@pytest.mark.level1
@pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_single
def test_center_net():
    cur_path = os.path.dirname(os.path.abspath(__file__))
    model_path = "{}/../../../../tests/models/research/cv".format(cur_path)
    model_name = "centernet"
    utils.copy_files(model_path, cur_path, model_name)
    cur_model_path = os.path.join(cur_path, model_name)
    old_list = ['new_repeat_count, dataset', 'args_opt.data_sink_steps']
    new_list = ['5, dataset', '20']
    utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py"))
    old_list = ["device_ips = {}", "device_ip.strip()",
                "rank_size = 0", "this_server = server",
                "this_server\\[\\\"device\\\"\\]",
                "instance\\[\\\"device_id\\\"\\]"]
    new_list = ["device_ips = {}\\n    '''", "device_ip.strip()\\n    '''",
                "rank_size = 8\\n    this_server = hccl_config[\\\"group_list\\\"][0]\\n    '''",
                "this_server = server\\n    '''",
                "this_server[\\\"instance_list\\\"]",
                "instance[\\\"devices\\\"][0][\\\"device_id\\\"]"]
    generator_cmd_file = "scripts/ascend_distributed_launcher/get_distribute_train_cmd.py"
    utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, generator_cmd_file))
    dataset_path = os.path.join(utils.data_root, "coco/coco2017/mindrecord_train/centernet_mindrecord")
    exec_network_shell = "cd centernet; bash scripts/run_distributed_train_ascend.sh {0} {1}"\
        .format(dataset_path, utils.rank_table_path)
    os.system(exec_network_shell)
    cmd = "ps -ef |grep train.py | grep coco | grep -v grep"
    ret = utils.process_check(120, cmd)
    assert ret
    log_file = os.path.join(cur_model_path, "LOG{}/training_log.txt")
    for i in range(8):
        per_step_time = utils.get_perf_data(log_file.format(i))
        assert per_step_time < 435
    loss_list = []
    for i in range(8):
        loss_cmd = "grep -nr \"outputs are\" {} | awk '{{print $14}}' | awk -F\")\" '{{print $1}}'"\
            .format(log_file.format(i))
        loss = utils.get_loss_data_list(log_file.format(i), cmd=loss_cmd)
        loss_list.append(loss[-1])
    assert sum(loss_list) / len(loss_list) < 58.8