1#!/bin/bash 2# Copyright 2020 Huawei Technologies Co., Ltd 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# ============================================================================ 16set -e 17BASE_PATH=$(cd "$(dirname $0)"; pwd) 18CONFIG_PATH=/home/workspace/mindspore_config 19export DEVICE_NUM=8 20export RANK_SIZE=$DEVICE_NUM 21ulimit -n 65535 22source ${BASE_PATH}/env.sh 23unset SLOG_PRINT_TO_STDOUT 24export MINDSPORE_HCCL_CONFIG_PATH=$CONFIG_PATH/hccl/rank_table_${DEVICE_NUM}p.json 25 26process_pid=() 27for((i=0; i<$DEVICE_NUM; i++)); do 28 rm -rf ${BASE_PATH}/onehot_model_parallel${i} 29 mkdir ${BASE_PATH}/onehot_model_parallel${i} 30 cp -r ${BASE_PATH}/onehot_model_parallel.py ${BASE_PATH}/onehot_model_parallel${i}/ 31 cd ${BASE_PATH}/onehot_model_parallel${i} 32 export RANK_ID=${i} 33 export DEVICE_ID=${i} 34 echo "start training for device $i" 35 env > env$i.log 36 pytest -s -v onehot_model_parallel.py > onehot_model_parallel_log$i.log 2>&1 & 37 process_pid[${i}]=`echo $!` 38done 39 40for((i=0; i<${DEVICE_NUM}; i++)); do 41 wait ${process_pid[i]} 42 status=`echo $?` 43 if [ "${status}" != "0" ]; then 44 echo "[ERROR] test_onehot_model_parallel failed. status: ${status}" 45 exit 1 46 else 47 echo "[INFO] test_onehot_model_parallel success." 48 fi 49done 50 51exit 0 52