• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4""" File Description
5Details
6"""
7
8import os
9import shutil
10import subprocess
11import time
12import re
13import numpy as np
14from mindspore import log as logger
15
16rank_table_path = "/home/workspace/mindspore_config/hccl/rank_table_8p.json"
17data_root = "/home/workspace/mindspore_dataset/"
18ckpt_root = "/home/workspace/mindspore_dataset/checkpoint"
19cur_path = os.path.split(os.path.realpath(__file__))[0]
20geir_root = os.path.join(cur_path, "mindspore_geir")
21arm_main_path = os.path.join(cur_path, "mindir_310infer_exe")
22model_zoo_path = os.path.join(cur_path, "../../../tests/models")
23
24
25def copy_files(from_, to_, model_name):
26    if not os.path.exists(os.path.join(from_, model_name)):
27        raise ValueError("There is no file or path", os.path.join(from_, model_name))
28    if os.path.exists(os.path.join(to_, model_name)):
29        shutil.rmtree(os.path.join(to_, model_name))
30    return os.system("cp -r {0} {1}".format(os.path.join(from_, model_name), to_))
31
32
33def exec_sed_command(old_list, new_list, file):
34    if isinstance(old_list, str):
35        old_list = [old_list]
36    if isinstance(new_list, str):
37        old_list = [new_list]
38    if len(old_list) != len(new_list):
39        raise ValueError("len(old_list) should be equal to len(new_list)")
40    for old, new in zip(old_list, new_list):
41        ret = os.system('sed -i "s#{0}#{1}#g" {2}'.format(old, new, file))
42        if ret != 0:
43            raise ValueError('exec `sed -i "s#{0}#{1}#g" {2}` failed.'.format(old, new, file))
44    return ret
45
46
47def process_check(cycle_time, cmd, wait_time=5):
48    for i in range(cycle_time):
49        time.sleep(wait_time)
50        sub = subprocess.Popen(args="{}".format(cmd), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
51                               stderr=subprocess.PIPE, universal_newlines=True)
52        stdout_data, _ = sub.communicate()
53        if not stdout_data:
54            logger.info("process execute success.")
55            return True
56        logger.warning("process is running, please wait {}".format(i))
57    logger.error("process execute execute timeout.")
58    return False
59
60
61def get_perf_data(log_path, search_str="per step time", cmd=None):
62    if cmd is None:
63        get_step_times_cmd = r"""grep -a "{0}" {1}|egrep -v "loss|\]|\["|awk '{{print $(NF-1)}}'""" \
64            .format(search_str, log_path)
65    else:
66        get_step_times_cmd = cmd
67    sub = subprocess.Popen(args="{}".format(get_step_times_cmd), shell=True,
68                           stdin=subprocess.PIPE, stdout=subprocess.PIPE,
69                           stderr=subprocess.PIPE, universal_newlines=True)
70    stdout, _ = sub.communicate()
71    if sub.returncode != 0:
72        raise RuntimeError("exec {} failed".format(cmd))
73    logger.info("execute {} success".format(cmd))
74    stdout = stdout.strip().split("\n")
75    step_time_list = list(map(float, stdout[1:]))
76    if not step_time_list:
77        cmd = "cat {}".format(log_path)
78        os.system(cmd)
79        raise RuntimeError("step_time_list is empty")
80    per_step_time = sum(step_time_list) / len(step_time_list)
81    return per_step_time
82
83
84def get_loss_data_list(log_path, search_str="loss is", cmd=None):
85    if cmd is None:
86        loss_value_cmd = """ grep -a '{}' {}| awk '{{print $NF}}' """.format(search_str, log_path)
87    else:
88        loss_value_cmd = cmd
89    sub = subprocess.Popen(args="{}".format(loss_value_cmd), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
90                           stderr=subprocess.PIPE, universal_newlines=True)
91    stdout, _ = sub.communicate()
92    if sub.returncode != 0:
93        raise RuntimeError("get loss from {} failed".format(log_path))
94    logger.info("execute {} success".format(cmd))
95    stdout = stdout.strip().split("\n")
96    loss_list = list(map(float, stdout))
97    if not loss_list:
98        cmd = "cat {}".format(log_path)
99        os.system(cmd)
100        raise RuntimeError("loss_list is empty")
101    return loss_list
102
103
104def parse_log_file(pattern, log_path):
105    value_list = []
106    with open(log_path, "r") as file:
107        for line in file.readlines():
108            match_result = re.search(pattern, line)
109            if match_result is not None:
110                value_list.append(float(match_result.group(1)))
111    if not value_list:
112        print("pattern is", pattern)
113        cmd = "cat {}".format(log_path)
114        os.system(cmd)
115    return value_list
116
117def replace_check_param(head_path):
118    """the using of validator is changed in mindspore"""
119    file_path = "{}/tests/models/official/nlp/bert/src/adam.py".format(head_path)
120    old_list = ["from mindspore._checkparam import Validator as validator"]
121    new_list = ["from mindspore import _checkparam as validator"]
122    exec_sed_command(old_list, new_list, file_path)
123
124    old_list = ["from mindspore._checkparam import Rel"]
125    new_list = [""]
126    exec_sed_command(old_list, new_list, file_path)
127
128    old_list = ["Rel"]
129    new_list = ["validator"]
130    exec_sed_command(old_list, new_list, file_path)
131
132
133def get_num_from_log(log_path, search_str, cmd=None, is_loss=False):
134    """return number or number list from log """
135
136    def string_list_to_num(num_list, is_loss=False):
137        for idx, res_str in enumerate(num_list):
138            clean_res_str = "".join(re.findall(r"[\d\.,]", res_str))
139            if is_loss:
140                res_list = clean_res_str.split(",")
141                num_list[idx] = list(map(float, res_list))
142            else:
143                num_list[idx] = float(clean_res_str)
144        return num_list
145
146    if cmd is None:
147        loss_value_cmd = """ grep -a '{}' {}| awk '{{print $NF}}' """.format(search_str, log_path)
148    else:
149        loss_value_cmd = cmd
150    sub = subprocess.Popen(args="{}".format(loss_value_cmd), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
151                           stderr=subprocess.PIPE, universal_newlines=True)
152    stdout, _ = sub.communicate()
153    if sub.returncode != 0:
154        raise RuntimeError("get loss from {} failed".format(log_path))
155    logger.info("execute {} success".format(cmd))
156    stdout = stdout.strip().split("\n")
157    res = string_list_to_num(stdout, is_loss)
158    if is_loss:
159        return res[0]
160    return np.mean(res)
161