1#!/bin/bash 2# Copyright 2022 Huawei Technologies Co., Ltd 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# ============================================================================ 16 17# Prepare environment for mindspore gpu compilation on Ubuntu 18.04. 18# 19# This file will: 20# - change deb source to huaweicloud mirror 21# - install compile dependencies via apt like cmake, gcc 22# - install python3 & pip3 via apt and set it to default 23# - install CUDA by run file and cudnn via apt. 24# - compile and install Open MPI if OPENMPI is set to on. 25# - install LLVM if LLVM is set to on. 26# 27# Augments: 28# - PYTHON_VERSION: python version to install. [3.7(default), 3.8, 3.9] 29# - CUDA_VERSION: CUDA version to install. [10.1, 11.1 11.6(default)] 30# - OPENMPI: whether to install optional package Open MPI for distributed training. [on, off(default)] 31# - LLVM: whether to install optional dependency LLVM for graph kernel fusion. [on, off(default)] 32# 33# Usage: 34# Run script like `bash -i ./ubuntu-gpu-source.sh`. 35# To set augments, run it as `PYTHON_VERSION=3.9 CUDA_VERSION=10.1 OPENMPI=on bash -i ./ubuntu-gpu-source.sh`. 36 37set -e 38 39PYTHON_VERSION=${PYTHON_VERSION:-3.7} 40CUDA_VERSION=${CUDA_VERSION:-11.6} 41OPENMPI=${OPENMPI:-off} 42LLVM=${LLVM:-off} 43release_info=$(lsb_release -a | grep Release) 44UBUNTU_VERSION=${release_info//[!0-9]/} 45 46[[ "$UBUNTU_VERSION" == "2004" && "$CUDA_VERSION" == "10.1" ]] && echo "CUDA 10.1 is not supported on Ubuntu 20.04" && exit 1 47 48available_py_version=(3.7 3.8 3.9) 49if [[ " ${available_py_version[*]} " != *" $PYTHON_VERSION "* ]]; then 50 echo "PYTHON_VERSION is '$PYTHON_VERSION', but available versions are [${available_py_version[*]}]." 51 exit 1 52fi 53available_cuda_version=(10.1 11.1) 54if [[ " ${available_cuda_version[*]} " != *" $CUDA_VERSION "* ]]; then 55 echo "CUDA_VERSION is '$CUDA_VERSION', but available versions are [${available_cuda_version[*]}]." 56 exit 1 57fi 58declare -A minimum_driver_version_map=() 59minimum_driver_version_map["10.1"]="418.39" 60minimum_driver_version_map["11.1"]="450.80.02" 61minimum_driver_version_map["11.6"]="510.39.01" 62driver_version=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader --id=0) 63if [[ $driver_version < ${minimum_driver_version_map[$CUDA_VERSION]} ]]; then 64 echo "CUDA $CUDA_VERSION minimum required driver version is ${minimum_driver_version_map[$CUDA_VERSION]}, \ 65 but current nvidia driver version is $driver_version, please upgrade your driver manually." 66 exit 1 67fi 68 69# add value to environment variable if value is not in it 70add_env() { 71 local name=$1 72 if [[ ":${!name}:" != *":$2:"* ]]; then 73 echo -e "export $1=$2:\$$1" >> ~/.bashrc 74 fi 75} 76 77# use huaweicloud mirror in China 78sudo sed -i "s@http://.*archive.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list 79sudo sed -i "s@http://.*security.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list 80 81# base packages 82sudo apt-get update 83sudo apt-get install software-properties-common lsb-release -y 84sudo apt-get install curl tcl automake autoconf libtool gcc-7 git libgmp-dev patch libnuma-dev flex -y 85 86# cmake 87wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | sudo apt-key add - 88sudo apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" 89sudo apt-get install cmake -y 90 91# optional dependency LLVM for graph-computation fusion 92if [[ X"$LLVM" == "Xon" ]]; then 93 wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - 94 sudo add-apt-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-12 main" 95 sudo apt-get update 96 sudo apt-get install llvm-12-dev -y 97fi 98 99# optional openmpi for distributed training 100if [[ X"$OPENMPI" == "Xon" ]]; then 101 echo "installing openmpi" 102 origin_wd=$PWD 103 cd /tmp 104 curl -O https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.4.tar.gz 105 tar xzf openmpi-4.1.4.tar.gz 106 cd openmpi-4.1.4 107 ./configure --prefix=/usr/local/openmpi-4.1.4 108 make 109 sudo make install 110 add_env PATH /usr/local/openmpi-4.1.4/bin 111 add_env LD_LIBRARY_PATH /usr/local/openmpi-4.1.4/lib 112 cd $origin_wd 113fi 114 115# python 116sudo add-apt-repository -y ppa:deadsnakes/ppa 117sudo apt-get install python$PYTHON_VERSION python$PYTHON_VERSION-dev python$PYTHON_VERSION-distutils python3-pip -y 118sudo update-alternatives --install /usr/bin/python python /usr/bin/python$PYTHON_VERSION 100 119# pip 120python -m pip install -U pip -i https://pypi.tuna.tsinghua.edu.cn/simple 121echo -e "alias pip='python -m pip'" >> ~/.bashrc 122python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple 123 124# install cuda/cudnn 125cd /tmp 126echo "installing CUDA and cuDNN" 127declare -A cuda_url_map=() 128cuda_url_map["10.1"]=https://developer.download.nvidia.cn/compute/cuda/10.1/Prod/local_installers/cuda_10.1.243_418.87.00_linux.run 129cuda_url_map["11.1"]=https://developer.download.nvidia.cn/compute/cuda/11.1.1/local_installers/cuda_11.1.1_455.32.00_linux.run 130cuda_url_map["11.6"]=https://developer.download.nvidia.cn/compute/cuda/11.6.0/local_installers/cuda_11.6.0_510.39.01_linux.run 131cuda_url=${cuda_url_map[$CUDA_VERSION]} 132wget $cuda_url 133sudo sh ${cuda_url##*/} --silent --toolkit 134cd - 135sudo apt-key adv --fetch-keys https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/7fa2af80.pub 136sudo add-apt-repository "deb https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/ /" 137sudo add-apt-repository "deb https://developer.download.nvidia.cn/compute/machine-learning/repos/ubuntu${UBUNTU_VERSION}/x86_64/ /" 138sudo apt-get update 139declare -A cudnn_name_map=() 140cudnn_name_map["10.1"]="libcudnn7=7.6.5.32-1+cuda10.1 libcudnn7-dev=7.6.5.32-1+cuda10.1" 141cudnn_name_map["11.1"]="libcudnn8=8.0.5.39-1+cuda11.1 libcudnn8-dev=8.0.5.39-1+cuda11.1" 142cudnn_name_map["11.6"]="libcudnn8=8.5.0.96-1+cuda11.6 libcudnn8-dev=8.5.0.96-1+cuda11.6" 143sudo apt-get install --no-install-recommends ${cudnn_name_map[$CUDA_VERSION]} -y 144 145# add cuda to path 146set +e && source ~/.bashrc 147set -e 148add_env PATH /usr/local/cuda/bin 149add_env LD_LIBRARY_PATH /usr/local/cuda/lib64 150add_env LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu 151set +e && source ~/.bashrc 152set -e 153 154# wheel 155python -m pip install wheel 156# python 3.9 needs setuptools>44.0 157python -m pip install -U setuptools 158 159echo "The environment is ready to clone and compile mindspore." 160