1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3 4""" File Description 5Details 6""" 7 8import os 9import shutil 10import subprocess 11import time 12import re 13from mindspore import log as logger 14 15rank_table_path = "/home/workspace/mindspore_config/hccl/rank_table_8p.json" 16data_root = "/home/workspace/mindspore_dataset/" 17ckpt_root = "/home/workspace/mindspore_dataset/checkpoint" 18cur_path = os.path.split(os.path.realpath(__file__))[0] 19geir_root = os.path.join(cur_path, "mindspore_geir") 20arm_main_path = os.path.join(cur_path, "mindir_310infer_exe") 21model_zoo_path = os.path.join(cur_path, "../../../tests/models") 22 23 24def copy_files(from_, to_, model_name): 25 if not os.path.exists(os.path.join(from_, model_name)): 26 raise ValueError("There is no file or path", os.path.join(from_, model_name)) 27 if os.path.exists(os.path.join(to_, model_name)): 28 shutil.rmtree(os.path.join(to_, model_name)) 29 return os.system("cp -r {0} {1}".format(os.path.join(from_, model_name), to_)) 30 31 32def exec_sed_command(old_list, new_list, file): 33 if isinstance(old_list, str): 34 old_list = [old_list] 35 if isinstance(new_list, str): 36 old_list = [new_list] 37 if len(old_list) != len(new_list): 38 raise ValueError("len(old_list) should be equal to len(new_list)") 39 for old, new in zip(old_list, new_list): 40 ret = os.system('sed -i "s#{0}#{1}#g" {2}'.format(old, new, file)) 41 if ret != 0: 42 raise ValueError('exec `sed -i "s#{0}#{1}#g" {2}` failed.'.format(old, new, file)) 43 return ret 44 45 46def process_check(cycle_time, cmd, wait_time=5): 47 for i in range(cycle_time): 48 time.sleep(wait_time) 49 sub = subprocess.Popen(args="{}".format(cmd), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, 50 stderr=subprocess.PIPE, universal_newlines=True) 51 stdout_data, _ = sub.communicate() 52 if not stdout_data: 53 logger.info("process execute success.") 54 return True 55 logger.warning("process is running, please wait {}".format(i)) 56 logger.error("process execute execute timeout.") 57 return False 58 59 60def get_perf_data(log_path, search_str="per step time", cmd=None): 61 if cmd is None: 62 get_step_times_cmd = r"""grep -a "{0}" {1}|egrep -v "loss|\]|\["|awk '{{print $(NF-1)}}'""" \ 63 .format(search_str, log_path) 64 else: 65 get_step_times_cmd = cmd 66 sub = subprocess.Popen(args="{}".format(get_step_times_cmd), shell=True, 67 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 68 stderr=subprocess.PIPE, universal_newlines=True) 69 stdout, _ = sub.communicate() 70 if sub.returncode != 0: 71 raise RuntimeError("exec {} failed".format(cmd)) 72 logger.info("execute {} success".format(cmd)) 73 stdout = stdout.strip().split("\n") 74 step_time_list = list(map(float, stdout[1:])) 75 if not step_time_list: 76 cmd = "cat {}".format(log_path) 77 os.system(cmd) 78 raise RuntimeError("step_time_list is empty") 79 per_step_time = sum(step_time_list) / len(step_time_list) 80 return per_step_time 81 82 83def get_loss_data_list(log_path, search_str="loss is", cmd=None): 84 if cmd is None: 85 loss_value_cmd = """ grep -a '{}' {}| awk '{{print $NF}}' """.format(search_str, log_path) 86 else: 87 loss_value_cmd = cmd 88 sub = subprocess.Popen(args="{}".format(loss_value_cmd), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, 89 stderr=subprocess.PIPE, universal_newlines=True) 90 stdout, _ = sub.communicate() 91 if sub.returncode != 0: 92 raise RuntimeError("get loss from {} failed".format(log_path)) 93 logger.info("execute {} success".format(cmd)) 94 stdout = stdout.strip().split("\n") 95 loss_list = list(map(float, stdout)) 96 if not loss_list: 97 cmd = "cat {}".format(log_path) 98 os.system(cmd) 99 raise RuntimeError("loss_list is empty") 100 return loss_list 101 102 103def parse_log_file(pattern, log_path): 104 value_list = [] 105 with open(log_path, "r") as file: 106 for line in file.readlines(): 107 match_result = re.search(pattern, line) 108 if match_result is not None: 109 value_list.append(float(match_result.group(1))) 110 if not value_list: 111 print("pattern is", pattern) 112 cmd = "cat {}".format(log_path) 113 os.system(cmd) 114 return value_list 115