1#!/usr/bin/env bash 2# Copyright 2020 The TensorFlow Authors. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# ============================================================================== 16# Install OpenMPI, OpenSSH and Horovod during Intel(R) MKL container build 17# Usage: install_openmpi_horovod.sh [OPENMPI_VERSION=<openmpi version>] [OPENMPI_DOWNLOAD_URL=<openmpi download url>] 18# [HOROVOD_VERSION=<horovod version>] 19 20set -e 21 22# Set default 23OPENMPI_VERSION=${OPENMPI_VERSION:-openmpi-2.1.1} 24OPENMPI_DOWNLOAD_URL=${OPENMPI_DOWNLOAD_URL:-https://www.open-mpi.org/software/ompi/v2.1/downloads/openmpi-2.1.1.tar.gz} 25INSTALL_HOROVOD_FROM_COMMIT=${INSTALL_HOROVOD_FROM_COMMIT:-no} 26BUILD_SSH=${BUILD_SSH:-no} 27HOROVOD_VERSION=${HOROVOD_VERSION:-0.19.1} 28SSH_CONFIG_PATH=/etc/ssh 29 30# Install Open MPI 31echo "Installing OpenMPI version ${OPENMPI_VERSION} ..." 32echo "OpenMPI Download url ${OPENMPI_DOWNLOAD_URL} ..." 33 34mkdir /tmp/openmpi 35cd /tmp/openmpi 36curl -fSsL -O ${OPENMPI_DOWNLOAD_URL} 37tar zxf ${OPENMPI_VERSION}.tar.gz 38cd ${OPENMPI_VERSION} 39./configure --enable-mpirun-prefix-by-default 40make -j $(nproc) all 41make install 42ldconfig 43cd / 44rm -rf /tmp/openmpi 45 46# Create a wrapper for OpenMPI to allow running as root by default 47mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real 48echo '#!/bin/bash' > /usr/local/bin/mpirun 49echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun 50chmod a+x /usr/local/bin/mpirun 51 52# Configure OpenMPI to run good defaults: 53echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf 54 55# Check mpi version 56echo 'OpenMPI version:' 57mpirun --version 58 59# Install OpenSSH for MPI to communicate between containers 60if [[ ${BUILD_SSH} == "yes" ]]; then 61 mkdir /tmp/buildssh 62 cd /tmp/buildssh && curl -fSsL -O http://www.zlib.net/zlib-1.2.11.tar.gz && tar -xzvf zlib-1.2.11.tar.gz && \ 63 cd /tmp/buildssh/zlib-1.2.11 && ./configure && make && make install 64 cd /tmp/buildssh && curl -fSsL -O https://www.openssl.org/source/openssl-1.1.1.tar.gz && tar -xzvf openssl-1.1.1.tar.gz && \ 65 cd /tmp/buildssh/openssl-1.1.1 && ./config && make && make test && make install 66 cd /tmp/buildssh && curl -fSsL -O https://mirrors.sonic.net/pub/OpenBSD/OpenSSH/portable/openssh-8.4p1.tar.gz && \ 67 tar -xzvf openssh-8.4p1.tar.gz && cd /tmp/buildssh/openssh-8.4p1 && \ 68 ./configure --with-md5-passwords && make && \ 69 groupadd sshd && useradd -M -g sshd -c 'sshd privsep' -d /var/empty -s /sbin/nologin sshd && passwd -l sshd && \ 70 make install 71 apt-get clean && apt-get update && \ 72 apt-get install -y --no-install-recommends --fix-missing \ 73 libnuma-dev cmake 74 SSH_CONFIG_PATH=/usr/local/etc 75else 76 apt-get clean && apt-get update && \ 77 apt-get install -y --no-install-recommends --fix-missing \ 78 openssh-client openssh-server libnuma-dev cmake && \ 79 rm -rf /var/lib/apt/lists/* 80 if [[ $? == "0" ]]; then 81 echo "PASS: OpenSSH installation" 82 else 83 yum -y update && yum -y install numactl-devel openssh-server openssh-clients cmake && \ 84 yum clean all 85 if [[ $? == "0" ]]; then 86 echo "PASS: OpenSSH installation" 87 else 88 echo "Unsupported Linux distribution. Aborting!" && exit 1 89 fi 90 fi 91fi 92mkdir -p /var/run/sshd 93grep -v StrictHostKeyChecking ${SSH_CONFIG_PATH}/ssh_config > ${SSH_CONFIG_PATH}/ssh_config.new 94# Allow OpenSSH to talk to containers without asking for confirmation 95echo " StrictHostKeyChecking no" >> ${SSH_CONFIG_PATH}/ssh_config.new 96mv ${SSH_CONFIG_PATH}/ssh_config.new ${SSH_CONFIG_PATH}/ssh_config 97 98# Install Horovod 99if [[ ${INSTALL_HOROVOD_FROM_COMMIT} == "yes" ]]; then 100 HOROVOD_WITH_TENSORFLOW=1 101 python3 -m pip install --no-cache-dir git+https://github.com/horovod/horovod.git@${HOROVOD_VERSION} 102else 103 HOROVOD_WITH_TENSORFLOW=1 104 python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION} 105fi 106