1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3 4""" File Description 5Details 6""" 7 8import os 9import shutil 10import subprocess 11import time 12import re 13import numpy as np 14from mindspore import log as logger 15 16rank_table_path = "/home/workspace/mindspore_config/hccl/rank_table_8p.json" 17data_root = "/home/workspace/mindspore_dataset/" 18ckpt_root = "/home/workspace/mindspore_dataset/checkpoint" 19cur_path = os.path.split(os.path.realpath(__file__))[0] 20geir_root = os.path.join(cur_path, "mindspore_geir") 21arm_main_path = os.path.join(cur_path, "mindir_310infer_exe") 22model_zoo_path = os.path.join(cur_path, "../../../tests/models") 23 24 25def copy_files(from_, to_, model_name): 26 if not os.path.exists(os.path.join(from_, model_name)): 27 raise ValueError("There is no file or path", os.path.join(from_, model_name)) 28 if os.path.exists(os.path.join(to_, model_name)): 29 shutil.rmtree(os.path.join(to_, model_name)) 30 return os.system("cp -r {0} {1}".format(os.path.join(from_, model_name), to_)) 31 32 33def exec_sed_command(old_list, new_list, file): 34 if isinstance(old_list, str): 35 old_list = [old_list] 36 if isinstance(new_list, str): 37 old_list = [new_list] 38 if len(old_list) != len(new_list): 39 raise ValueError("len(old_list) should be equal to len(new_list)") 40 for old, new in zip(old_list, new_list): 41 ret = os.system('sed -i "s#{0}#{1}#g" {2}'.format(old, new, file)) 42 if ret != 0: 43 raise ValueError('exec `sed -i "s#{0}#{1}#g" {2}` failed.'.format(old, new, file)) 44 return ret 45 46 47def process_check(cycle_time, cmd, wait_time=5): 48 for i in range(cycle_time): 49 time.sleep(wait_time) 50 sub = subprocess.Popen(args="{}".format(cmd), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, 51 stderr=subprocess.PIPE, universal_newlines=True) 52 stdout_data, _ = sub.communicate() 53 if not stdout_data: 54 logger.info("process execute success.") 55 return True 56 logger.warning("process is running, please wait {}".format(i)) 57 logger.error("process execute execute timeout.") 58 return False 59 60 61def get_perf_data(log_path, search_str="per step time", cmd=None): 62 if cmd is None: 63 get_step_times_cmd = r"""grep -a "{0}" {1}|egrep -v "loss|\]|\["|awk '{{print $(NF-1)}}'""" \ 64 .format(search_str, log_path) 65 else: 66 get_step_times_cmd = cmd 67 sub = subprocess.Popen(args="{}".format(get_step_times_cmd), shell=True, 68 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 69 stderr=subprocess.PIPE, universal_newlines=True) 70 stdout, _ = sub.communicate() 71 if sub.returncode != 0: 72 raise RuntimeError("exec {} failed".format(cmd)) 73 logger.info("execute {} success".format(cmd)) 74 stdout = stdout.strip().split("\n") 75 step_time_list = list(map(float, stdout[1:])) 76 if not step_time_list: 77 cmd = "cat {}".format(log_path) 78 os.system(cmd) 79 raise RuntimeError("step_time_list is empty") 80 per_step_time = sum(step_time_list) / len(step_time_list) 81 return per_step_time 82 83 84def get_loss_data_list(log_path, search_str="loss is", cmd=None): 85 if cmd is None: 86 loss_value_cmd = """ grep -a '{}' {}| awk '{{print $NF}}' """.format(search_str, log_path) 87 else: 88 loss_value_cmd = cmd 89 sub = subprocess.Popen(args="{}".format(loss_value_cmd), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, 90 stderr=subprocess.PIPE, universal_newlines=True) 91 stdout, _ = sub.communicate() 92 if sub.returncode != 0: 93 raise RuntimeError("get loss from {} failed".format(log_path)) 94 logger.info("execute {} success".format(cmd)) 95 stdout = stdout.strip().split("\n") 96 loss_list = list(map(float, stdout)) 97 if not loss_list: 98 cmd = "cat {}".format(log_path) 99 os.system(cmd) 100 raise RuntimeError("loss_list is empty") 101 return loss_list 102 103 104def parse_log_file(pattern, log_path): 105 value_list = [] 106 with open(log_path, "r") as file: 107 for line in file.readlines(): 108 match_result = re.search(pattern, line) 109 if match_result is not None: 110 value_list.append(float(match_result.group(1))) 111 if not value_list: 112 print("pattern is", pattern) 113 cmd = "cat {}".format(log_path) 114 os.system(cmd) 115 return value_list 116 117def replace_check_param(head_path): 118 """the using of validator is changed in mindspore""" 119 file_path = "{}/tests/models/official/nlp/bert/src/adam.py".format(head_path) 120 old_list = ["from mindspore._checkparam import Validator as validator"] 121 new_list = ["from mindspore import _checkparam as validator"] 122 exec_sed_command(old_list, new_list, file_path) 123 124 old_list = ["from mindspore._checkparam import Rel"] 125 new_list = [""] 126 exec_sed_command(old_list, new_list, file_path) 127 128 old_list = ["Rel"] 129 new_list = ["validator"] 130 exec_sed_command(old_list, new_list, file_path) 131 132 133def get_num_from_log(log_path, search_str, cmd=None, is_loss=False): 134 """return number or number list from log """ 135 136 def string_list_to_num(num_list, is_loss=False): 137 for idx, res_str in enumerate(num_list): 138 clean_res_str = "".join(re.findall(r"[\d\.,]", res_str)) 139 if is_loss: 140 res_list = clean_res_str.split(",") 141 num_list[idx] = list(map(float, res_list)) 142 else: 143 num_list[idx] = float(clean_res_str) 144 return num_list 145 146 if cmd is None: 147 loss_value_cmd = """ grep -a '{}' {}| awk '{{print $NF}}' """.format(search_str, log_path) 148 else: 149 loss_value_cmd = cmd 150 sub = subprocess.Popen(args="{}".format(loss_value_cmd), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, 151 stderr=subprocess.PIPE, universal_newlines=True) 152 stdout, _ = sub.communicate() 153 if sub.returncode != 0: 154 raise RuntimeError("get loss from {} failed".format(log_path)) 155 logger.info("execute {} success".format(cmd)) 156 stdout = stdout.strip().split("\n") 157 res = string_list_to_num(stdout, is_loss) 158 if is_loss: 159 return res[0] 160 return np.mean(res) 161