1set(CUDA_PATH $ENV{CUDA_HOME}) 2include_directories(${CCSRC_DIR}/plugin/device/gpu/kernel) 3set(CUDA_VERSION 11.1) 4set(CUDA_LIB_PATH ${CUDA_PATH}/lib64) 5include_directories(${CUDA_PATH}) 6include_directories(${CUDA_PATH}/include) 7find_package(CUDA) 8 9set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-rpath,$ORIGIN/") 10add_compile_definitions(GPU_TENSORRT) 11set(TENSORRT_PATH $ENV{TENSORRT_PATH}) 12set(TENSORRT_LIB_PATH ${TENSORRT_PATH}/lib) 13include_directories(${TENSORRT_PATH}/include) 14 15include_directories(${CCSRC_DIR}/plugin/device/cpu/kernel) 16include_directories(${CCSRC_DIR}/../) 17include_directories(${CCSRC_DIR}/plugin/device/gpu/kernel/cuda_impl/cuda_ops) 18 19if(DEFINED ENV{MS_ENABLE_CUDA_DISTRIBUTION}) 20 set(MS_ENABLE_CUDA_DISTRIBUTION $ENV{MS_ENABLE_CUDA_DISTRIBUTION}) 21else() 22 set(MS_ENABLE_CUDA_DISTRIBUTION "off") 23endif() 24 25set(NCCL_MPI_SRC_STUB 26 ${CMAKE_CURRENT_SOURCE_DIR}/distribution/distribution_collective.cc 27 ${CMAKE_CURRENT_SOURCE_DIR}/distribution/distribution_base.cc 28 ) 29 30# nccl mpi 31if(MS_ENABLE_CUDA_DISTRIBUTION STREQUAL "on") 32 message("enable cuda gpu distribution collective") 33 file(GLOB NCCL_MPI_SRC LIST_DIRECTORIES false 34 ${CMAKE_CURRENT_SOURCE_DIR}/distribution/*.cc 35 ${CCSRC_DIR}/plugin/device/gpu/hal/device/distribution/collective_wrapper.cc 36 ${CCSRC_DIR}/plugin/device/gpu/hal/device/distribution/mpi_wrapper.cc 37 ${CCSRC_DIR}/plugin/device/gpu/hal/device/distribution/nccl_wrapper.cc 38 ) 39 list(REMOVE_ITEM NCCL_MPI_SRC ${NCCL_MPI_SRC_STUB}) 40 41 add_compile_definitions(LITE_CUDA_DISTRIBUTION) 42 include(${TOP_DIR}/cmake/external_libs/ompi.cmake) 43 include(${TOP_DIR}/cmake/external_libs/nccl.cmake) 44 45 add_library(gpu_distribution_collective OBJECT ${NCCL_MPI_SRC}) 46 add_library(mindspore::nccl ALIAS nccl::nccl) 47 add_library(mindspore::ompi ALIAS ompi::mpi) 48 target_link_libraries(gpu_distribution_collective PRIVATE mindspore::ompi mindspore::nccl) 49else() 50 add_library(gpu_distribution_collective OBJECT ${NCCL_MPI_SRC_STUB}) 51endif() 52add_dependencies(gpu_distribution_collective fbs_src) 53 54file(GLOB TENSORRT_RUNTIME_SRC LIST_DIRECTORIES false 55 ${CMAKE_CURRENT_SOURCE_DIR}/*.cc 56 ${CMAKE_CURRENT_SOURCE_DIR}/op/*.cc 57 ${CMAKE_CURRENT_SOURCE_DIR}/optimizer/*.cc 58 ${CMAKE_CURRENT_SOURCE_DIR}/cuda_impl/*.cc 59 ${CMAKE_CURRENT_SOURCE_DIR}/../../../extendrt/delegate/delegate_utils.cc 60 ${CCSRC_DIR}/plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.cc 61 ${CCSRC_DIR}/plugin/device/cpu/kernel/nnacl/nnacl_common.c 62 ${TOP_DIR}/mindspore/lite/src/common/file_utils.cc 63 ) 64 65# include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache) 66 67#set(TENSORRT_RUNTIME_SRC 68# ${TENSORRT_RUNTIME_SRC} 69# ${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/embedding_cache_manager.cc 70# ${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/load_host_cache_model.cc 71# ${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/lfu_cache.cc 72# ${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/embedding_cache.cc 73# ${CMAKE_CURRENT_SOURCE_DIR}/../parameter_cache/gpu/gpu_cache_mem.cc 74# ) 75 76link_libraries(${CUDA_LIB_PATH}/libcudnn.so) 77link_libraries(${CUDA_LIB_PATH}/libcublasLt.so) 78 79add_library(libcudart SHARED IMPORTED) 80set_target_properties(libcudart PROPERTIES IMPORTED_LOCATION ${CUDA_LIB_PATH}/libcudart.so) 81 82add_library(libnvinfer SHARED IMPORTED) 83set_target_properties(libnvinfer PROPERTIES IMPORTED_LOCATION ${TENSORRT_LIB_PATH}/libnvinfer.so) 84 85add_library(libcublas SHARED IMPORTED) 86set_target_properties(libcublas PROPERTIES IMPORTED_LOCATION ${CUDA_LIB_PATH}/libcublas.so) 87add_library(tensorrt_plugin SHARED ${TENSORRT_RUNTIME_SRC}) 88 89add_dependencies(tensorrt_plugin fbs_src) 90 91target_link_libraries( 92 tensorrt_plugin 93 libcudart 94 libcublas 95 libnvinfer 96) 97 98if(MSLITE_DEPS_AKG_TENSORRT) 99 add_library(libcuda SHARED IMPORTED) 100 set_target_properties(libcuda PROPERTIES IMPORTED_LOCATION ${CUDA_LIB_PATH}/stubs/libcuda.so) 101 target_link_libraries( 102 tensorrt_plugin 103 libcuda 104 ) 105endif() 106 107add_subdirectory(cuda_impl) 108 109target_link_libraries(tensorrt_plugin cuda_kernel_mid gpu_distribution_collective) 110target_link_libraries(tensorrt_plugin mindspore-extendrt mindspore_core mindspore::fast_transformers) 111