• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#.rst:
2# FindCUDA
3# --------
4#
5# Tools for building CUDA C files: libraries and build dependencies.
6#
7# This script locates the NVIDIA CUDA C tools.  It should work on linux,
8# windows, and mac and should be reasonably up to date with CUDA C
9# releases.
10#
11# This script makes use of the standard find_package arguments of
12# <VERSION>, REQUIRED and QUIET.  CUDA_FOUND will report if an
13# acceptable version of CUDA was found.
14#
15# The script will prompt the user to specify CUDA_TOOLKIT_ROOT_DIR if
16# the prefix cannot be determined by the location of nvcc in the system
17# path and REQUIRED is specified to find_package().  To use a different
18# installed version of the toolkit set the environment variable
19# CUDA_BIN_PATH before running cmake (e.g.
20# CUDA_BIN_PATH=/usr/local/cuda1.0 instead of the default
21# /usr/local/cuda) or set CUDA_TOOLKIT_ROOT_DIR after configuring.  If
22# you change the value of CUDA_TOOLKIT_ROOT_DIR, various components that
23# depend on the path will be relocated.
24#
25# It might be necessary to set CUDA_TOOLKIT_ROOT_DIR manually on certain
26# platforms, or to use a cuda runtime not installed in the default
27# location.  In newer versions of the toolkit the cuda library is
28# included with the graphics driver- be sure that the driver version
29# matches what is needed by the cuda runtime version.
30#
31# The following variables affect the behavior of the macros in the
32# script (in alphebetical order).  Note that any of these flags can be
33# changed multiple times in the same directory before calling
34# CUDA_ADD_EXECUTABLE, CUDA_ADD_LIBRARY, CUDA_COMPILE, CUDA_COMPILE_PTX,
35# CUDA_COMPILE_FATBIN, CUDA_COMPILE_CUBIN or CUDA_WRAP_SRCS::
36#
37#   CUDA_64_BIT_DEVICE_CODE (Default matches host bit size)
38#   -- Set to ON to compile for 64 bit device code, OFF for 32 bit device code.
39#      Note that making this different from the host code when generating object
40#      or C files from CUDA code just won't work, because size_t gets defined by
41#      nvcc in the generated source.  If you compile to PTX and then load the
42#      file yourself, you can mix bit sizes between device and host.
43#
44#   CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE (Default ON)
45#   -- Set to ON if you want the custom build rule to be attached to the source
46#      file in Visual Studio.  Turn OFF if you add the same cuda file to multiple
47#      targets.
48#
49#      This allows the user to build the target from the CUDA file; however, bad
50#      things can happen if the CUDA source file is added to multiple targets.
51#      When performing parallel builds it is possible for the custom build
52#      command to be run more than once and in parallel causing cryptic build
53#      errors.  VS runs the rules for every source file in the target, and a
54#      source can have only one rule no matter how many projects it is added to.
55#      When the rule is run from multiple targets race conditions can occur on
56#      the generated file.  Eventually everything will get built, but if the user
57#      is unaware of this behavior, there may be confusion.  It would be nice if
58#      this script could detect the reuse of source files across multiple targets
59#      and turn the option off for the user, but no good solution could be found.
60#
61#   CUDA_BUILD_CUBIN (Default OFF)
62#   -- Set to ON to enable and extra compilation pass with the -cubin option in
63#      Device mode. The output is parsed and register, shared memory usage is
64#      printed during build.
65#
66#   CUDA_BUILD_EMULATION (Default OFF for device mode)
67#   -- Set to ON for Emulation mode. -D_DEVICEEMU is defined for CUDA C files
68#      when CUDA_BUILD_EMULATION is TRUE.
69#
70#   CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR)
71#   -- Set to the path you wish to have the generated files placed.  If it is
72#      blank output files will be placed in CMAKE_CURRENT_BINARY_DIR.
73#      Intermediate files will always be placed in
74#      CMAKE_CURRENT_BINARY_DIR/CMakeFiles.
75#
76#   CUDA_HOST_COMPILATION_CPP (Default ON)
77#   -- Set to OFF for C compilation of host code.
78#
79#   CUDA_HOST_COMPILER (Default CMAKE_C_COMPILER, $(VCInstallDir)/bin for VS)
80#   -- Set the host compiler to be used by nvcc.  Ignored if -ccbin or
81#      --compiler-bindir is already present in the CUDA_NVCC_FLAGS or
82#      CUDA_NVCC_FLAGS_<CONFIG> variables.  For Visual Studio targets
83#      $(VCInstallDir)/bin is a special value that expands out to the path when
84#      the command is run from withing VS.
85#
86#   CUDA_NVCC_FLAGS
87#   CUDA_NVCC_FLAGS_<CONFIG>
88#   -- Additional NVCC command line arguments.  NOTE: multiple arguments must be
89#      semi-colon delimited (e.g. --compiler-options;-Wall)
90#
91#   CUDA_PROPAGATE_HOST_FLAGS (Default ON)
92#   -- Set to ON to propagate CMAKE_{C,CXX}_FLAGS and their configuration
93#      dependent counterparts (e.g. CMAKE_C_FLAGS_DEBUG) automatically to the
94#      host compiler through nvcc's -Xcompiler flag.  This helps make the
95#      generated host code match the rest of the system better.  Sometimes
96#      certain flags give nvcc problems, and this will help you turn the flag
97#      propagation off.  This does not affect the flags supplied directly to nvcc
98#      via CUDA_NVCC_FLAGS or through the OPTION flags specified through
99#      CUDA_ADD_LIBRARY, CUDA_ADD_EXECUTABLE, or CUDA_WRAP_SRCS.  Flags used for
100#      shared library compilation are not affected by this flag.
101#
102#   CUDA_SEPARABLE_COMPILATION (Default OFF)
103#   -- If set this will enable separable compilation for all CUDA runtime object
104#      files.  If used outside of CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY
105#      (e.g. calling CUDA_WRAP_SRCS directly),
106#      CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME and
107#      CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS should be called.
108#
109#   CUDA_VERBOSE_BUILD (Default OFF)
110#   -- Set to ON to see all the commands used when building the CUDA file.  When
111#      using a Makefile generator the value defaults to VERBOSE (run make
112#      VERBOSE=1 to see output), although setting CUDA_VERBOSE_BUILD to ON will
113#      always print the output.
114#
115# The script creates the following macros (in alphebetical order)::
116#
117#   CUDA_ADD_CUFFT_TO_TARGET( cuda_target )
118#   -- Adds the cufft library to the target (can be any target).  Handles whether
119#      you are in emulation mode or not.
120#
121#   CUDA_ADD_CUBLAS_TO_TARGET( cuda_target )
122#   -- Adds the cublas library to the target (can be any target).  Handles
123#      whether you are in emulation mode or not.
124#
125#   CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ...
126#                        [WIN32] [MACOSX_BUNDLE] [EXCLUDE_FROM_ALL] [OPTIONS ...] )
127#   -- Creates an executable "cuda_target" which is made up of the files
128#      specified.  All of the non CUDA C files are compiled using the standard
129#      build rules specified by CMAKE and the cuda files are compiled to object
130#      files using nvcc and the host compiler.  In addition CUDA_INCLUDE_DIRS is
131#      added automatically to include_directories().  Some standard CMake target
132#      calls can be used on the target after calling this macro
133#      (e.g. set_target_properties and target_link_libraries), but setting
134#      properties that adjust compilation flags will not affect code compiled by
135#      nvcc.  Such flags should be modified before calling CUDA_ADD_EXECUTABLE,
136#      CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS.
137#
138#   CUDA_ADD_LIBRARY( cuda_target file0 file1 ...
139#                     [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] )
140#   -- Same as CUDA_ADD_EXECUTABLE except that a library is created.
141#
142#   CUDA_BUILD_CLEAN_TARGET()
143#   -- Creates a convience target that deletes all the dependency files
144#      generated.  You should make clean after running this target to ensure the
145#      dependency files get regenerated.
146#
147#   CUDA_COMPILE( generated_files file0 file1 ... [STATIC | SHARED | MODULE]
148#                 [OPTIONS ...] )
149#   -- Returns a list of generated files from the input source files to be used
150#      with ADD_LIBRARY or ADD_EXECUTABLE.
151#
152#   CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] )
153#   -- Returns a list of PTX files generated from the input source files.
154#
155#   CUDA_COMPILE_FATBIN( generated_files file0 file1 ... [OPTIONS ...] )
156#   -- Returns a list of FATBIN files generated from the input source files.
157#
158#   CUDA_COMPILE_CUBIN( generated_files file0 file1 ... [OPTIONS ...] )
159#   -- Returns a list of CUBIN files generated from the input source files.
160#
161#   CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME( output_file_var
162#                                                        cuda_target
163#                                                        object_files )
164#   -- Compute the name of the intermediate link file used for separable
165#      compilation.  This file name is typically passed into
166#      CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS.  output_file_var is produced
167#      based on cuda_target the list of objects files that need separable
168#      compilation as specified by object_files.  If the object_files list is
169#      empty, then output_file_var will be empty.  This function is called
170#      automatically for CUDA_ADD_LIBRARY and CUDA_ADD_EXECUTABLE.  Note that
171#      this is a function and not a macro.
172#
173#   CUDA_INCLUDE_DIRECTORIES( path0 path1 ... )
174#   -- Sets the directories that should be passed to nvcc
175#      (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu
176#      files.
177#
178#
179#
180#   CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS( output_file_var cuda_target
181#                                            nvcc_flags object_files)
182#
183#   -- Generates the link object required by separable compilation from the given
184#      object files.  This is called automatically for CUDA_ADD_EXECUTABLE and
185#      CUDA_ADD_LIBRARY, but can be called manually when using CUDA_WRAP_SRCS
186#      directly.  When called from CUDA_ADD_LIBRARY or CUDA_ADD_EXECUTABLE the
187#      nvcc_flags passed in are the same as the flags passed in via the OPTIONS
188#      argument.  The only nvcc flag added automatically is the bitness flag as
189#      specified by CUDA_64_BIT_DEVICE_CODE.  Note that this is a function
190#      instead of a macro.
191#
192#   CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ...
193#                    [STATIC | SHARED | MODULE] [OPTIONS ...] )
194#   -- This is where all the magic happens.  CUDA_ADD_EXECUTABLE,
195#      CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this
196#      function under the hood.
197#
198#      Given the list of files (file0 file1 ... fileN) this macro generates
199#      custom commands that generate either PTX or linkable objects (use "PTX" or
200#      "OBJ" for the format argument to switch).  Files that don't end with .cu
201#      or have the HEADER_FILE_ONLY property are ignored.
202#
203#      The arguments passed in after OPTIONS are extra command line options to
204#      give to nvcc.  You can also specify per configuration options by
205#      specifying the name of the configuration followed by the options.  General
206#      options must preceed configuration specific options.  Not all
207#      configurations need to be specified, only the ones provided will be used.
208#
209#         OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag"
210#         DEBUG -g
211#         RELEASE --use_fast_math
212#         RELWITHDEBINFO --use_fast_math;-g
213#         MINSIZEREL --use_fast_math
214#
215#      For certain configurations (namely VS generating object files with
216#      CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will
217#      be produced for the given cuda file.  This is because when you add the
218#      cuda file to Visual Studio it knows that this file produces an object file
219#      and will link in the resulting object file automatically.
220#
221#      This script will also generate a separate cmake script that is used at
222#      build time to invoke nvcc.  This is for several reasons.
223#
224#        1. nvcc can return negative numbers as return values which confuses
225#        Visual Studio into thinking that the command succeeded.  The script now
226#        checks the error codes and produces errors when there was a problem.
227#
228#        2. nvcc has been known to not delete incomplete results when it
229#        encounters problems.  This confuses build systems into thinking the
230#        target was generated when in fact an unusable file exists.  The script
231#        now deletes the output files if there was an error.
232#
233#        3. By putting all the options that affect the build into a file and then
234#        make the build rule dependent on the file, the output files will be
235#        regenerated when the options change.
236#
237#      This script also looks at optional arguments STATIC, SHARED, or MODULE to
238#      determine when to target the object compilation for a shared library.
239#      BUILD_SHARED_LIBS is ignored in CUDA_WRAP_SRCS, but it is respected in
240#      CUDA_ADD_LIBRARY.  On some systems special flags are added for building
241#      objects intended for shared libraries.  A preprocessor macro,
242#      <target_name>_EXPORTS is defined when a shared library compilation is
243#      detected.
244#
245#      Flags passed into add_definitions with -D or /D are passed along to nvcc.
246#
247#
248#
249# The script defines the following variables::
250#
251#   CUDA_VERSION_MAJOR    -- The major version of cuda as reported by nvcc.
252#   CUDA_VERSION_MINOR    -- The minor version.
253#   CUDA_VERSION
254#   CUDA_VERSION_STRING   -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR
255#
256#   CUDA_TOOLKIT_ROOT_DIR -- Path to the CUDA Toolkit (defined if not set).
257#   CUDA_SDK_ROOT_DIR     -- Path to the CUDA SDK.  Use this to find files in the
258#                            SDK.  This script will not directly support finding
259#                            specific libraries or headers, as that isn't
260#                            supported by NVIDIA.  If you want to change
261#                            libraries when the path changes see the
262#                            FindCUDA.cmake script for an example of how to clear
263#                            these variables.  There are also examples of how to
264#                            use the CUDA_SDK_ROOT_DIR to locate headers or
265#                            libraries, if you so choose (at your own risk).
266#   CUDA_INCLUDE_DIRS     -- Include directory for cuda headers.  Added automatically
267#                            for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY.
268#   CUDA_LIBRARIES        -- Cuda RT library.
269#   CUDA_CUFFT_LIBRARIES  -- Device or emulation library for the Cuda FFT
270#                            implementation (alternative to:
271#                            CUDA_ADD_CUFFT_TO_TARGET macro)
272#   CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS
273#                            implementation (alterative to:
274#                            CUDA_ADD_CUBLAS_TO_TARGET macro).
275#   CUDA_cupti_LIBRARY    -- CUDA Profiling Tools Interface library.
276#                            Only available for CUDA version 4.0+.
277#   CUDA_curand_LIBRARY   -- CUDA Random Number Generation library.
278#                            Only available for CUDA version 3.2+.
279#   CUDA_cusparse_LIBRARY -- CUDA Sparse Matrix library.
280#                            Only available for CUDA version 3.2+.
281#   CUDA_npp_LIBRARY      -- NVIDIA Performance Primitives lib.
282#                            Only available for CUDA version 4.0+.
283#   CUDA_nppc_LIBRARY     -- NVIDIA Performance Primitives lib (core).
284#                            Only available for CUDA version 5.5+.
285#   CUDA_nppi_LIBRARY     -- NVIDIA Performance Primitives lib (image processing).
286#                            Only available for CUDA version 5.5+.
287#   CUDA_npps_LIBRARY     -- NVIDIA Performance Primitives lib (signal processing).
288#                            Only available for CUDA version 5.5+.
289#   CUDA_nvcuvenc_LIBRARY -- CUDA Video Encoder library.
290#                            Only available for CUDA version 3.2+.
291#                            Windows only.
292#   CUDA_nvcuvid_LIBRARY  -- CUDA Video Decoder library.
293#                            Only available for CUDA version 3.2+.
294#                            Windows only.
295#
296
297#   James Bigler, NVIDIA Corp (nvidia.com - jbigler)
298#   Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
299#
300#   Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
301#
302#   Copyright (c) 2007-2009
303#   Scientific Computing and Imaging Institute, University of Utah
304#
305#   This code is licensed under the MIT License.  See the FindCUDA.cmake script
306#   for the text of the license.
307
308# The MIT License
309#
310# License for the specific language governing rights and limitations under
311# Permission is hereby granted, free of charge, to any person obtaining a
312# copy of this software and associated documentation files (the "Software"),
313# to deal in the Software without restriction, including without limitation
314# the rights to use, copy, modify, merge, publish, distribute, sublicense,
315# and/or sell copies of the Software, and to permit persons to whom the
316# Software is furnished to do so, subject to the following conditions:
317#
318# The above copyright notice and this permission notice shall be included
319# in all copies or substantial portions of the Software.
320#
321# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
322# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
323# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
324# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
325# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
326# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
327# DEALINGS IN THE SOFTWARE.
328#
329###############################################################################
330
331# FindCUDA.cmake
332
333# This macro helps us find the location of helper files we will need the full path to
334macro(CUDA_FIND_HELPER_FILE _name _extension)
335  set(_full_name "${_name}.${_extension}")
336  # CMAKE_CURRENT_LIST_FILE contains the full path to the file currently being
337  # processed.  Using this variable, we can pull out the current path, and
338  # provide a way to get access to the other files we need local to here.
339  get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
340  set(CUDA_${_name} "${CMAKE_CURRENT_LIST_DIR}/FindCUDA/${_full_name}")
341  if(NOT EXISTS "${CUDA_${_name}}")
342    set(error_message "${_full_name} not found in ${CMAKE_CURRENT_LIST_DIR}/FindCUDA")
343    if(CUDA_FIND_REQUIRED)
344      message(FATAL_ERROR "${error_message}")
345    else()
346      if(NOT CUDA_FIND_QUIETLY)
347        message(STATUS "${error_message}")
348      endif()
349    endif()
350  endif()
351  # Set this variable as internal, so the user isn't bugged with it.
352  set(CUDA_${_name} ${CUDA_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE)
353endmacro()
354
355#####################################################################
356## CUDA_INCLUDE_NVCC_DEPENDENCIES
357##
358
359# So we want to try and include the dependency file if it exists.  If
360# it doesn't exist then we need to create an empty one, so we can
361# include it.
362
363# If it does exist, then we need to check to see if all the files it
364# depends on exist.  If they don't then we should clear the dependency
365# file and regenerate it later.  This covers the case where a header
366# file has disappeared or moved.
367
368macro(CUDA_INCLUDE_NVCC_DEPENDENCIES dependency_file)
369  set(CUDA_NVCC_DEPEND)
370  set(CUDA_NVCC_DEPEND_REGENERATE FALSE)
371
372
373  # Include the dependency file.  Create it first if it doesn't exist .  The
374  # INCLUDE puts a dependency that will force CMake to rerun and bring in the
375  # new info when it changes.  DO NOT REMOVE THIS (as I did and spent a few
376  # hours figuring out why it didn't work.
377  if(NOT EXISTS ${dependency_file})
378    file(WRITE ${dependency_file} "#FindCUDA.cmake generated file.  Do not edit.\n")
379  endif()
380  # Always include this file to force CMake to run again next
381  # invocation and rebuild the dependencies.
382  #message("including dependency_file = ${dependency_file}")
383  include(${dependency_file})
384
385  # Now we need to verify the existence of all the included files
386  # here.  If they aren't there we need to just blank this variable and
387  # make the file regenerate again.
388#   if(DEFINED CUDA_NVCC_DEPEND)
389#     message("CUDA_NVCC_DEPEND set")
390#   else()
391#     message("CUDA_NVCC_DEPEND NOT set")
392#   endif()
393  if(CUDA_NVCC_DEPEND)
394    #message("CUDA_NVCC_DEPEND found")
395    foreach(f ${CUDA_NVCC_DEPEND})
396      # message("searching for ${f}")
397      if(NOT EXISTS ${f})
398        #message("file ${f} not found")
399        set(CUDA_NVCC_DEPEND_REGENERATE TRUE)
400      endif()
401    endforeach()
402  else()
403    #message("CUDA_NVCC_DEPEND false")
404    # No dependencies, so regenerate the file.
405    set(CUDA_NVCC_DEPEND_REGENERATE TRUE)
406  endif()
407
408  #message("CUDA_NVCC_DEPEND_REGENERATE = ${CUDA_NVCC_DEPEND_REGENERATE}")
409  # No incoming dependencies, so we need to generate them.  Make the
410  # output depend on the dependency file itself, which should cause the
411  # rule to re-run.
412  if(CUDA_NVCC_DEPEND_REGENERATE)
413    set(CUDA_NVCC_DEPEND ${dependency_file})
414    #message("Generating an empty dependency_file: ${dependency_file}")
415    file(WRITE ${dependency_file} "#FindCUDA.cmake generated file.  Do not edit.\n")
416  endif()
417
418endmacro()
419
420###############################################################################
421###############################################################################
422# Setup variables' defaults
423###############################################################################
424###############################################################################
425
426# Allow the user to specify if the device code is supposed to be 32 or 64 bit.
427if(CMAKE_SIZEOF_VOID_P EQUAL 8)
428  set(CUDA_64_BIT_DEVICE_CODE_DEFAULT ON)
429else()
430  set(CUDA_64_BIT_DEVICE_CODE_DEFAULT OFF)
431endif()
432option(CUDA_64_BIT_DEVICE_CODE "Compile device code in 64 bit mode" ${CUDA_64_BIT_DEVICE_CODE_DEFAULT})
433
434# Attach the build rule to the source file in VS.  This option
435option(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE "Attach the build rule to the CUDA source file.  Enable only when the CUDA source file is added to at most one target." ON)
436
437# Prints out extra information about the cuda file during compilation
438option(CUDA_BUILD_CUBIN "Generate and parse .cubin files in Device mode." OFF)
439
440# Set whether we are using emulation or device mode.
441option(CUDA_BUILD_EMULATION "Build in Emulation mode" OFF)
442
443# Where to put the generated output.
444set(CUDA_GENERATED_OUTPUT_DIR "" CACHE PATH "Directory to put all the output files.  If blank it will default to the CMAKE_CURRENT_BINARY_DIR")
445
446# Parse HOST_COMPILATION mode.
447option(CUDA_HOST_COMPILATION_CPP "Generated file extension" ON)
448
449# Extra user settable flags
450set(CUDA_NVCC_FLAGS "" CACHE STRING "Semi-colon delimit multiple arguments.")
451
452if(CMAKE_GENERATOR MATCHES "Visual Studio")
453  set(CUDA_HOST_COMPILER "$(VCInstallDir)bin" CACHE FILEPATH "Host side compiler used by NVCC")
454else()
455  # Using cc which is symlink to clang may let NVCC think it is GCC and issue
456  # unhandled -dumpspecs option to clang. Also in case neither
457  # CMAKE_C_COMPILER is defined (project does not use C language) nor
458  # CUDA_HOST_COMPILER is specified manually we should skip -ccbin and let
459  # nvcc use its own default C compiler.
460  if(DEFINED CMAKE_C_COMPILER AND NOT DEFINED CUDA_HOST_COMPILER)
461    get_filename_component(c_compiler_realpath "${CMAKE_C_COMPILER}" REALPATH)
462  else()
463    set(c_compiler_realpath "")
464  endif()
465  set(CUDA_HOST_COMPILER "${c_compiler_realpath}" CACHE FILEPATH "Host side compiler used by NVCC")
466endif()
467
468# Propagate the host flags to the host compiler via -Xcompiler
469option(CUDA_PROPAGATE_HOST_FLAGS "Propage C/CXX_FLAGS and friends to the host compiler via -Xcompile" ON)
470
471# Enable CUDA_SEPARABLE_COMPILATION
472option(CUDA_SEPARABLE_COMPILATION "Compile CUDA objects with separable compilation enabled.  Requires CUDA 5.0+" OFF)
473
474# Specifies whether the commands used when compiling the .cu file will be printed out.
475option(CUDA_VERBOSE_BUILD "Print out the commands run while compiling the CUDA source file.  With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF)
476
477mark_as_advanced(
478  CUDA_64_BIT_DEVICE_CODE
479  CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE
480  CUDA_GENERATED_OUTPUT_DIR
481  CUDA_HOST_COMPILATION_CPP
482  CUDA_NVCC_FLAGS
483  CUDA_PROPAGATE_HOST_FLAGS
484  )
485
486# Makefile and similar generators don't define CMAKE_CONFIGURATION_TYPES, so we
487# need to add another entry for the CMAKE_BUILD_TYPE.  We also need to add the
488# standerd set of 4 build types (Debug, MinSizeRel, Release, and RelWithDebInfo)
489# for completeness.  We need run this loop in order to accomodate the addition
490# of extra configuration types.  Duplicate entries will be removed by
491# REMOVE_DUPLICATES.
492set(CUDA_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo)
493list(REMOVE_DUPLICATES CUDA_configuration_types)
494foreach(config ${CUDA_configuration_types})
495    string(TOUPPER ${config} config_upper)
496    set(CUDA_NVCC_FLAGS_${config_upper} "" CACHE STRING "Semi-colon delimit multiple arguments.")
497    mark_as_advanced(CUDA_NVCC_FLAGS_${config_upper})
498endforeach()
499
500###############################################################################
501###############################################################################
502# Locate CUDA, Set Build Type, etc.
503###############################################################################
504###############################################################################
505
506macro(cuda_unset_include_and_libraries)
507  unset(CUDA_TOOLKIT_INCLUDE CACHE)
508  unset(CUDA_CUDART_LIBRARY CACHE)
509  unset(CUDA_CUDA_LIBRARY CACHE)
510  # Make sure you run this before you unset CUDA_VERSION.
511  if(CUDA_VERSION VERSION_EQUAL "3.0")
512    # This only existed in the 3.0 version of the CUDA toolkit
513    unset(CUDA_CUDARTEMU_LIBRARY CACHE)
514  endif()
515  unset(CUDA_cupti_LIBRARY CACHE)
516  unset(CUDA_cublas_LIBRARY CACHE)
517  unset(CUDA_cublasemu_LIBRARY CACHE)
518  unset(CUDA_cufft_LIBRARY CACHE)
519  unset(CUDA_cufftemu_LIBRARY CACHE)
520  unset(CUDA_curand_LIBRARY CACHE)
521  unset(CUDA_cusparse_LIBRARY CACHE)
522  unset(CUDA_npp_LIBRARY CACHE)
523  unset(CUDA_nppc_LIBRARY CACHE)
524  unset(CUDA_nppi_LIBRARY CACHE)
525  unset(CUDA_npps_LIBRARY CACHE)
526  unset(CUDA_nvcuvenc_LIBRARY CACHE)
527  unset(CUDA_nvcuvid_LIBRARY CACHE)
528endmacro()
529
530# Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed,
531# if they have then clear the cache variables, so that will be detected again.
532if(DEFINED CUDA_TOOLKIT_ROOT_DIR_INTERNAL AND (NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}"))
533  unset(CUDA_TARGET_TRIPLET CACHE)
534  unset(CUDA_TOOLKIT_TARGET_DIR CACHE)
535  unset(CUDA_NVCC_EXECUTABLE CACHE)
536  unset(CUDA_VERSION CACHE)
537  cuda_unset_include_and_libraries()
538endif()
539
540if(DEFINED CUDA_TARGET_TRIPLET_INTERNAL AND (NOT "${CUDA_TARGET_TRIPLET}" STREQUAL "${CUDA_TARGET_TRIPLET_INTERNAL}") OR
541   (DEFINED CUDA_TOOLKIT_TARGET_DIR  AND DEFINED CUDA_TOOLKIT_TARGET_DIR_INTERNAL AND NOT "${CUDA_TOOLKIT_TARGET_DIR}" STREQUAL "${CUDA_TOOLKIT_TARGET_DIR_INTERNAL}"))
542  cuda_unset_include_and_libraries()
543endif()
544
545if(NOT "${CUDA_SDK_ROOT_DIR}" STREQUAL "${CUDA_SDK_ROOT_DIR_INTERNAL}")
546  # No specific variables to catch.  Use this kind of code before calling
547  # find_package(CUDA) to clean up any variables that may depend on this path.
548
549  #   unset(MY_SPECIAL_CUDA_SDK_INCLUDE_DIR CACHE)
550  #   unset(MY_SPECIAL_CUDA_SDK_LIBRARY CACHE)
551endif()
552
553# Search for the cuda distribution.
554if(NOT CUDA_TOOLKIT_ROOT_DIR)
555
556  # Search in the CUDA_BIN_PATH first.
557  find_path(CUDA_TOOLKIT_ROOT_DIR
558    NAMES nvcc nvcc.exe
559    PATHS
560      ENV CUDA_PATH
561      ENV CUDA_BIN_PATH
562    PATH_SUFFIXES bin bin64
563    DOC "Toolkit location."
564    NO_DEFAULT_PATH
565    )
566  # Now search default paths
567  find_path(CUDA_TOOLKIT_ROOT_DIR
568    NAMES nvcc nvcc.exe
569    PATHS /usr/local/bin
570          /usr/local/cuda/bin
571    DOC "Toolkit location."
572    )
573
574  if (CUDA_TOOLKIT_ROOT_DIR)
575    string(REGEX REPLACE "[/\\\\]?bin[64]*[/\\\\]?$" "" CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR})
576    # We need to force this back into the cache.
577    set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR} CACHE PATH "Toolkit location." FORCE)
578  endif()
579  if (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR})
580    if(CUDA_FIND_REQUIRED)
581      message(FATAL_ERROR "Specify CUDA_TOOLKIT_ROOT_DIR")
582    elseif(NOT CUDA_FIND_QUIETLY)
583      message("CUDA_TOOLKIT_ROOT_DIR not found or specified")
584    endif()
585  endif ()
586endif ()
587
588# CUDA_NVCC_EXECUTABLE
589find_program(CUDA_NVCC_EXECUTABLE
590  NAMES nvcc
591  PATHS "${CUDA_TOOLKIT_ROOT_DIR}"
592  ENV CUDA_PATH
593  ENV CUDA_BIN_PATH
594  PATH_SUFFIXES bin bin64
595  NO_DEFAULT_PATH
596  )
597# Search default search paths, after we search our own set of paths.
598find_program(CUDA_NVCC_EXECUTABLE nvcc)
599mark_as_advanced(CUDA_NVCC_EXECUTABLE)
600
601if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION)
602  # Compute the version.
603  execute_process (COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
604  string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${NVCC_OUT})
605  string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT})
606  set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.")
607  mark_as_advanced(CUDA_VERSION)
608else()
609  # Need to set these based off of the cached value
610  string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${CUDA_VERSION}")
611  string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${CUDA_VERSION}")
612endif()
613
614# Always set this convenience variable
615set(CUDA_VERSION_STRING "${CUDA_VERSION}")
616
617# Target CPU architecture
618if(DEFINED CUDA_TARGET_CPU_ARCH)
619  set(_cuda_target_cpu_arch_initial "${CUDA_TARGET_CPU_ARCH}")
620elseif(CUDA_VERSION VERSION_GREATER "5.0" AND CMAKE_CROSSCOMPILING AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|ARM)")
621  set(_cuda_target_cpu_arch_initial "ARM")
622elseif(CUDA_VERSION VERSION_GREATER "6.5" AND CMAKE_CROSSCOMPILING AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64)")
623  set(_cuda_target_cpu_arch_initial "AARCH64")
624else()
625  set(_cuda_target_cpu_arch_initial "")
626endif()
627set(CUDA_TARGET_CPU_ARCH "${_cuda_target_cpu_arch_initial}" CACHE STRING "Specify the name of the class of CPU architecture for which the input files must be compiled.")
628mark_as_advanced(CUDA_TARGET_CPU_ARCH)
629
630# Target OS variant
631if(DEFINED CUDA_TARGET_OS_VARIANT)
632  set(_cuda_target_os_variant_initial "${CUDA_TARGET_OS_VARIANT}")
633else()
634  set(_cuda_target_os_variant_initial "")
635endif()
636set(CUDA_TARGET_OS_VARIANT "${_cuda_target_os_variant_initial}" CACHE STRING "Specify the name of the class of OS for which the input files must be compiled.")
637mark_as_advanced(CUDA_TARGET_OS_VARIANT)
638
639# Target triplet
640if(DEFINED CUDA_TARGET_TRIPLET)
641  set(_cuda_target_triplet_initial "${CUDA_TARGET_TRIPLET}")
642elseif(CUDA_VERSION VERSION_GREATER "5.0" AND CMAKE_CROSSCOMPILING AND "${CUDA_TARGET_CPU_ARCH}" STREQUAL "ARM")
643  if("${CUDA_TARGET_OS_VARIANT}" STREQUAL "Android" AND EXISTS "${CUDA_TOOLKIT_ROOT_DIR}/targets/armv7-linux-androideabi")
644    set(_cuda_target_triplet_initial "armv7-linux-androideabi")
645  elseif(EXISTS "${CUDA_TOOLKIT_ROOT_DIR}/targets/armv7-linux-gnueabihf")
646    set(_cuda_target_triplet_initial "armv7-linux-gnueabihf")
647  endif()
648elseif(CUDA_VERSION VERSION_GREATER "6.5" AND CMAKE_CROSSCOMPILING AND "${CUDA_TARGET_CPU_ARCH}" STREQUAL "AARCH64")
649  if("${CUDA_TARGET_OS_VARIANT}" STREQUAL "Android" AND EXISTS "${CUDA_TOOLKIT_ROOT_DIR}/targets/aarch64-linux-androideabi")
650    set(_cuda_target_triplet_initial "aarch64-linux-androideabi")
651  elseif(EXISTS "${CUDA_TOOLKIT_ROOT_DIR}/targets/aarch64-linux-gnueabihf")
652    set(_cuda_target_triplet_initial "aarch64-linux-gnueabihf")
653  endif()
654endif()
655set(CUDA_TARGET_TRIPLET "${_cuda_target_triplet_initial}" CACHE STRING "Specify the target triplet for which the input files must be compiled.")
656file(GLOB __cuda_available_target_tiplets RELATIVE "${CUDA_TOOLKIT_ROOT_DIR}/targets" "${CUDA_TOOLKIT_ROOT_DIR}/targets/*" )
657set_property(CACHE CUDA_TARGET_TRIPLET PROPERTY STRINGS ${__cuda_available_target_tiplets})
658mark_as_advanced(CUDA_TARGET_TRIPLET)
659
660# Target directory
661if(NOT DEFINED CUDA_TOOLKIT_TARGET_DIR AND CUDA_TARGET_TRIPLET AND EXISTS "${CUDA_TOOLKIT_ROOT_DIR}/targets/${CUDA_TARGET_TRIPLET}")
662  set(CUDA_TOOLKIT_TARGET_DIR "${CUDA_TOOLKIT_ROOT_DIR}/targets/${CUDA_TARGET_TRIPLET}")
663endif()
664
665# CUDA_TOOLKIT_INCLUDE
666find_path(CUDA_TOOLKIT_INCLUDE
667  device_functions.h # Header included in toolkit
668  PATHS "${CUDA_TOOLKIT_TARGET_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}"
669  ENV CUDA_PATH
670  ENV CUDA_INC_PATH
671  PATH_SUFFIXES include
672  NO_DEFAULT_PATH
673  )
674# Search default search paths, after we search our own set of paths.
675find_path(CUDA_TOOLKIT_INCLUDE device_functions.h)
676mark_as_advanced(CUDA_TOOLKIT_INCLUDE)
677
678# Set the user list of include dir to nothing to initialize it.
679set (CUDA_NVCC_INCLUDE_ARGS_USER "")
680set (CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE})
681
682macro(cuda_find_library_local_first_with_path_ext _var _names _doc _path_ext )
683  if(CMAKE_SIZEOF_VOID_P EQUAL 8)
684    # CUDA 3.2+ on Windows moved the library directories, so we need the new
685    # and old paths.
686    set(_cuda_64bit_lib_dir "${_path_ext}lib/x64" "${_path_ext}lib64" "${_path_ext}libx64" )
687  endif()
688  if(CUDA_VERSION VERSION_GREATER "6.0")
689    set(_cuda_static_lib_names "")
690    foreach(name ${_names})
691      list(APPEND _cuda_static_lib_names "${name}_static")
692    endforeach()
693  endif()
694  # CUDA 3.2+ on Windows moved the library directories, so we need to new
695  # (lib/Win32) and the old path (lib).
696  find_library(${_var}
697    NAMES ${_names} ${_cuda_static_lib_names}
698    PATHS "${CUDA_TOOLKIT_TARGET_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}"
699    ENV CUDA_PATH
700    ENV CUDA_LIB_PATH
701    PATH_SUFFIXES ${_cuda_64bit_lib_dir} "${_path_ext}lib/Win32" "${_path_ext}lib" "${_path_ext}libWin32"
702    DOC ${_doc}
703    NO_DEFAULT_PATH
704    )
705  # Search default search paths, after we search our own set of paths.
706  find_library(${_var}
707    NAMES ${_names} ${_cuda_static_lib_names}
708    PATHS "/usr/lib/nvidia-current"
709    DOC ${_doc}
710    )
711endmacro()
712
713macro(cuda_find_library_local_first _var _names _doc)
714  cuda_find_library_local_first_with_path_ext( "${_var}" "${_names}" "${_doc}" "" )
715endmacro()
716
717macro(find_library_local_first _var _names _doc )
718  cuda_find_library_local_first( "${_var}" "${_names}" "${_doc}" "" )
719endmacro()
720
721
722# CUDA_LIBRARIES
723cuda_find_library_local_first(CUDA_CUDART_LIBRARY cudart "\"cudart\" library")
724if(CUDA_VERSION VERSION_EQUAL "3.0")
725  # The cudartemu library only existed for the 3.0 version of CUDA.
726  cuda_find_library_local_first(CUDA_CUDARTEMU_LIBRARY cudartemu "\"cudartemu\" library")
727  mark_as_advanced(
728    CUDA_CUDARTEMU_LIBRARY
729    )
730endif()
731
732# CUPTI library showed up in cuda toolkit 4.0
733if(NOT CUDA_VERSION VERSION_LESS "4.0")
734  cuda_find_library_local_first_with_path_ext(CUDA_cupti_LIBRARY cupti "\"cupti\" library" "extras/CUPTI/")
735  mark_as_advanced(CUDA_cupti_LIBRARY)
736endif()
737
738# If we are using emulation mode and we found the cudartemu library then use
739# that one instead of cudart.
740if(CUDA_BUILD_EMULATION AND CUDA_CUDARTEMU_LIBRARY)
741  set(CUDA_LIBRARIES ${CUDA_CUDARTEMU_LIBRARY})
742else()
743  set(CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY})
744endif()
745
746# 1.1 toolkit on linux doesn't appear to have a separate library on
747# some platforms.
748cuda_find_library_local_first(CUDA_CUDA_LIBRARY cuda "\"cuda\" library (older versions only).")
749
750mark_as_advanced(
751  CUDA_CUDA_LIBRARY
752  CUDA_CUDART_LIBRARY
753  )
754
755#######################
756# Look for some of the toolkit helper libraries
757macro(FIND_CUDA_HELPER_LIBS _name)
758  cuda_find_library_local_first(CUDA_${_name}_LIBRARY ${_name} "\"${_name}\" library")
759  mark_as_advanced(CUDA_${_name}_LIBRARY)
760endmacro()
761
762#######################
763# Disable emulation for v3.1 onward
764if(CUDA_VERSION VERSION_GREATER "3.0")
765  if(CUDA_BUILD_EMULATION)
766    message(FATAL_ERROR "CUDA_BUILD_EMULATION is not supported in version 3.1 and onwards.  You must disable it to proceed.  You have version ${CUDA_VERSION}.")
767  endif()
768endif()
769
770# Search for additional CUDA toolkit libraries.
771if(CUDA_VERSION VERSION_LESS "3.1")
772  # Emulation libraries aren't available in version 3.1 onward.
773  find_cuda_helper_libs(cufftemu)
774  find_cuda_helper_libs(cublasemu)
775endif()
776find_cuda_helper_libs(cufft)
777find_cuda_helper_libs(cublas)
778if(NOT CUDA_VERSION VERSION_LESS "3.2")
779  # cusparse showed up in version 3.2
780  find_cuda_helper_libs(cusparse)
781  find_cuda_helper_libs(curand)
782  if (WIN32)
783    find_cuda_helper_libs(nvcuvenc)
784    find_cuda_helper_libs(nvcuvid)
785  endif()
786endif()
787if(CUDA_VERSION VERSION_GREATER "5.0")
788  # In CUDA 5.5 NPP was splitted onto 3 separate libraries.
789  find_cuda_helper_libs(nppc)
790  find_cuda_helper_libs(nppi)
791  find_cuda_helper_libs(npps)
792  set(CUDA_npp_LIBRARY "${CUDA_nppc_LIBRARY};${CUDA_nppi_LIBRARY};${CUDA_npps_LIBRARY}")
793elseif(NOT CUDA_VERSION VERSION_LESS "4.0")
794  find_cuda_helper_libs(npp)
795endif()
796
797if (CUDA_BUILD_EMULATION)
798  set(CUDA_CUFFT_LIBRARIES ${CUDA_cufftemu_LIBRARY})
799  set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublasemu_LIBRARY})
800else()
801  set(CUDA_CUFFT_LIBRARIES ${CUDA_cufft_LIBRARY})
802  set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY})
803endif()
804
805########################
806# Look for the SDK stuff.  As of CUDA 3.0 NVSDKCUDA_ROOT has been replaced with
807# NVSDKCOMPUTE_ROOT with the old CUDA C contents moved into the C subdirectory
808find_path(CUDA_SDK_ROOT_DIR common/inc/cutil.h
809 HINTS
810  "$ENV{NVSDKCOMPUTE_ROOT}/C"
811  ENV NVSDKCUDA_ROOT
812  "[HKEY_LOCAL_MACHINE\\SOFTWARE\\NVIDIA Corporation\\Installed Products\\NVIDIA SDK 10\\Compute;InstallDir]"
813 PATHS
814  "/Developer/GPU\ Computing/C"
815  )
816
817# Keep the CUDA_SDK_ROOT_DIR first in order to be able to override the
818# environment variables.
819set(CUDA_SDK_SEARCH_PATH
820  "${CUDA_SDK_ROOT_DIR}"
821  "${CUDA_TOOLKIT_ROOT_DIR}/local/NVSDK0.2"
822  "${CUDA_TOOLKIT_ROOT_DIR}/NVSDK0.2"
823  "${CUDA_TOOLKIT_ROOT_DIR}/NV_CUDA_SDK"
824  "$ENV{HOME}/NVIDIA_CUDA_SDK"
825  "$ENV{HOME}/NVIDIA_CUDA_SDK_MACOSX"
826  "/Developer/CUDA"
827  )
828
829# Example of how to find an include file from the CUDA_SDK_ROOT_DIR
830
831# find_path(CUDA_CUT_INCLUDE_DIR
832#   cutil.h
833#   PATHS ${CUDA_SDK_SEARCH_PATH}
834#   PATH_SUFFIXES "common/inc"
835#   DOC "Location of cutil.h"
836#   NO_DEFAULT_PATH
837#   )
838# # Now search system paths
839# find_path(CUDA_CUT_INCLUDE_DIR cutil.h DOC "Location of cutil.h")
840
841# mark_as_advanced(CUDA_CUT_INCLUDE_DIR)
842
843
844# Example of how to find a library in the CUDA_SDK_ROOT_DIR
845
846# # cutil library is called cutil64 for 64 bit builds on windows.  We don't want
847# # to get these confused, so we are setting the name based on the word size of
848# # the build.
849
850# if(CMAKE_SIZEOF_VOID_P EQUAL 8)
851#   set(cuda_cutil_name cutil64)
852# else()
853#   set(cuda_cutil_name cutil32)
854# endif()
855
856# find_library(CUDA_CUT_LIBRARY
857#   NAMES cutil ${cuda_cutil_name}
858#   PATHS ${CUDA_SDK_SEARCH_PATH}
859#   # The new version of the sdk shows up in common/lib, but the old one is in lib
860#   PATH_SUFFIXES "common/lib" "lib"
861#   DOC "Location of cutil library"
862#   NO_DEFAULT_PATH
863#   )
864# # Now search system paths
865# find_library(CUDA_CUT_LIBRARY NAMES cutil ${cuda_cutil_name} DOC "Location of cutil library")
866# mark_as_advanced(CUDA_CUT_LIBRARY)
867# set(CUDA_CUT_LIBRARIES ${CUDA_CUT_LIBRARY})
868
869
870
871#############################
872# Check for required components
873set(CUDA_FOUND TRUE)
874
875set(CUDA_TOOLKIT_ROOT_DIR_INTERNAL "${CUDA_TOOLKIT_ROOT_DIR}" CACHE INTERNAL
876  "This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was set successfully." FORCE)
877set(CUDA_TARGET_TRIPLET_INTERNAL "${CUDA_TARGET_TRIPLET}" CACHE INTERNAL
878  "This is the value of the last time CUDA_TARGET_TRIPLET was set successfully." FORCE)
879set(CUDA_TOOLKIT_TARGET_DIR_INTERNAL "${CUDA_TOOLKIT_TARGET_DIR}" CACHE INTERNAL
880  "This is the value of the last time CUDA_TOOLKIT_TARGET_DIR was set successfully." FORCE)
881set(CUDA_SDK_ROOT_DIR_INTERNAL "${CUDA_SDK_ROOT_DIR}" CACHE INTERNAL
882  "This is the value of the last time CUDA_SDK_ROOT_DIR was set successfully." FORCE)
883
884include(FindPackageHandleStandardArgs)
885find_package_handle_standard_args(CUDA
886  REQUIRED_VARS
887    CUDA_TOOLKIT_ROOT_DIR
888    CUDA_NVCC_EXECUTABLE
889    CUDA_INCLUDE_DIRS
890    CUDA_CUDART_LIBRARY
891  VERSION_VAR
892    CUDA_VERSION
893  )
894
895
896
897###############################################################################
898###############################################################################
899# Macros
900###############################################################################
901###############################################################################
902
903###############################################################################
904# Add include directories to pass to the nvcc command.
905macro(CUDA_INCLUDE_DIRECTORIES)
906  foreach(dir ${ARGN})
907    list(APPEND CUDA_NVCC_INCLUDE_ARGS_USER -I${dir})
908  endforeach()
909endmacro()
910
911
912##############################################################################
913cuda_find_helper_file(parse_cubin cmake)
914cuda_find_helper_file(make2cmake cmake)
915cuda_find_helper_file(run_nvcc cmake)
916
917##############################################################################
918# Separate the OPTIONS out from the sources
919#
920macro(CUDA_GET_SOURCES_AND_OPTIONS _sources _cmake_options _options)
921  set( ${_sources} )
922  set( ${_cmake_options} )
923  set( ${_options} )
924  set( _found_options FALSE )
925  foreach(arg ${ARGN})
926    if("x${arg}" STREQUAL "xOPTIONS")
927      set( _found_options TRUE )
928    elseif(
929        "x${arg}" STREQUAL "xWIN32" OR
930        "x${arg}" STREQUAL "xMACOSX_BUNDLE" OR
931        "x${arg}" STREQUAL "xEXCLUDE_FROM_ALL" OR
932        "x${arg}" STREQUAL "xSTATIC" OR
933        "x${arg}" STREQUAL "xSHARED" OR
934        "x${arg}" STREQUAL "xMODULE"
935        )
936      list(APPEND ${_cmake_options} ${arg})
937    else()
938      if ( _found_options )
939        list(APPEND ${_options} ${arg})
940      else()
941        # Assume this is a file
942        list(APPEND ${_sources} ${arg})
943      endif()
944    endif()
945  endforeach()
946endmacro()
947
948##############################################################################
949# Parse the OPTIONS from ARGN and set the variables prefixed by _option_prefix
950#
951macro(CUDA_PARSE_NVCC_OPTIONS _option_prefix)
952  set( _found_config )
953  foreach(arg ${ARGN})
954    # Determine if we are dealing with a perconfiguration flag
955    foreach(config ${CUDA_configuration_types})
956      string(TOUPPER ${config} config_upper)
957      if (arg STREQUAL "${config_upper}")
958        set( _found_config _${arg})
959        # Set arg to nothing to keep it from being processed further
960        set( arg )
961      endif()
962    endforeach()
963
964    if ( arg )
965      list(APPEND ${_option_prefix}${_found_config} "${arg}")
966    endif()
967  endforeach()
968endmacro()
969
970##############################################################################
971# Helper to add the include directory for CUDA only once
972function(CUDA_ADD_CUDA_INCLUDE_ONCE)
973  get_directory_property(_include_directories INCLUDE_DIRECTORIES)
974  set(_add TRUE)
975  if(_include_directories)
976    foreach(dir ${_include_directories})
977      if("${dir}" STREQUAL "${CUDA_INCLUDE_DIRS}")
978        set(_add FALSE)
979      endif()
980    endforeach()
981  endif()
982  if(_add)
983    include_directories(${CUDA_INCLUDE_DIRS})
984  endif()
985endfunction()
986
987function(CUDA_BUILD_SHARED_LIBRARY shared_flag)
988  set(cmake_args ${ARGN})
989  # If SHARED, MODULE, or STATIC aren't already in the list of arguments, then
990  # add SHARED or STATIC based on the value of BUILD_SHARED_LIBS.
991  list(FIND cmake_args SHARED _cuda_found_SHARED)
992  list(FIND cmake_args MODULE _cuda_found_MODULE)
993  list(FIND cmake_args STATIC _cuda_found_STATIC)
994  if( _cuda_found_SHARED GREATER -1 OR
995      _cuda_found_MODULE GREATER -1 OR
996      _cuda_found_STATIC GREATER -1)
997    set(_cuda_build_shared_libs)
998  else()
999    if (BUILD_SHARED_LIBS)
1000      set(_cuda_build_shared_libs SHARED)
1001    else()
1002      set(_cuda_build_shared_libs STATIC)
1003    endif()
1004  endif()
1005  set(${shared_flag} ${_cuda_build_shared_libs} PARENT_SCOPE)
1006endfunction()
1007
1008##############################################################################
1009# Helper to avoid clashes of files with the same basename but different paths.
1010# This doesn't attempt to do exactly what CMake internals do, which is to only
1011# add this path when there is a conflict, since by the time a second collision
1012# in names is detected it's already too late to fix the first one.  For
1013# consistency sake the relative path will be added to all files.
1014function(CUDA_COMPUTE_BUILD_PATH path build_path)
1015  #message("CUDA_COMPUTE_BUILD_PATH([${path}] ${build_path})")
1016  # Only deal with CMake style paths from here on out
1017  file(TO_CMAKE_PATH "${path}" bpath)
1018  if (IS_ABSOLUTE "${bpath}")
1019    # Absolute paths are generally unnessary, especially if something like
1020    # file(GLOB_RECURSE) is used to pick up the files.
1021
1022    string(FIND "${bpath}" "${CMAKE_CURRENT_BINARY_DIR}" _binary_dir_pos)
1023    if (_binary_dir_pos EQUAL 0)
1024      file(RELATIVE_PATH bpath "${CMAKE_CURRENT_BINARY_DIR}" "${bpath}")
1025    else()
1026      file(RELATIVE_PATH bpath "${CMAKE_CURRENT_SOURCE_DIR}" "${bpath}")
1027    endif()
1028  endif()
1029
1030  # This recipe is from cmLocalGenerator::CreateSafeUniqueObjectFileName in the
1031  # CMake source.
1032
1033  # Remove leading /
1034  string(REGEX REPLACE "^[/]+" "" bpath "${bpath}")
1035  # Avoid absolute paths by removing ':'
1036  string(REPLACE ":" "_" bpath "${bpath}")
1037  # Avoid relative paths that go up the tree
1038  string(REPLACE "../" "__/" bpath "${bpath}")
1039  # Avoid spaces
1040  string(REPLACE " " "_" bpath "${bpath}")
1041
1042  # Strip off the filename.  I wait until here to do it, since removin the
1043  # basename can make a path that looked like path/../basename turn into
1044  # path/.. (notice the trailing slash).
1045  get_filename_component(bpath "${bpath}" PATH)
1046
1047  set(${build_path} "${bpath}" PARENT_SCOPE)
1048  #message("${build_path} = ${bpath}")
1049endfunction()
1050
1051##############################################################################
1052# This helper macro populates the following variables and setups up custom
1053# commands and targets to invoke the nvcc compiler to generate C or PTX source
1054# dependent upon the format parameter.  The compiler is invoked once with -M
1055# to generate a dependency file and a second time with -cuda or -ptx to generate
1056# a .cpp or .ptx file.
1057# INPUT:
1058#   cuda_target         - Target name
1059#   format              - PTX, CUBIN, FATBIN or OBJ
1060#   FILE1 .. FILEN      - The remaining arguments are the sources to be wrapped.
1061#   OPTIONS             - Extra options to NVCC
1062# OUTPUT:
1063#   generated_files     - List of generated files
1064##############################################################################
1065##############################################################################
1066
1067macro(CUDA_WRAP_SRCS cuda_target format generated_files)
1068
1069  # If CMake doesn't support separable compilation, complain
1070  if(CUDA_SEPARABLE_COMPILATION AND CMAKE_VERSION VERSION_LESS "2.8.10.1")
1071    message(SEND_ERROR "CUDA_SEPARABLE_COMPILATION isn't supported for CMake versions less than 2.8.10.1")
1072  endif()
1073
1074  # Set up all the command line flags here, so that they can be overridden on a per target basis.
1075
1076  set(nvcc_flags "")
1077
1078  # Emulation if the card isn't present.
1079  if (CUDA_BUILD_EMULATION)
1080    # Emulation.
1081    set(nvcc_flags ${nvcc_flags} --device-emulation -D_DEVICEEMU -g)
1082  else()
1083    # Device mode.  No flags necessary.
1084  endif()
1085
1086  if(CUDA_HOST_COMPILATION_CPP)
1087    set(CUDA_C_OR_CXX CXX)
1088  else()
1089    if(CUDA_VERSION VERSION_LESS "3.0")
1090      set(nvcc_flags ${nvcc_flags} --host-compilation C)
1091    else()
1092      message(WARNING "--host-compilation flag is deprecated in CUDA version >= 3.0.  Removing --host-compilation C flag" )
1093    endif()
1094    set(CUDA_C_OR_CXX C)
1095  endif()
1096
1097  set(generated_extension ${CMAKE_${CUDA_C_OR_CXX}_OUTPUT_EXTENSION})
1098
1099  if(CUDA_64_BIT_DEVICE_CODE)
1100    set(nvcc_flags ${nvcc_flags} -m64)
1101  else()
1102    set(nvcc_flags ${nvcc_flags} -m32)
1103  endif()
1104
1105  if(CUDA_TARGET_CPU_ARCH AND CUDA_VERSION VERSION_LESS "7.0")
1106    # CPU architecture is either ARM or X86. Patch AARCH64 to be ARM
1107    string(REPLACE "AARCH64" "ARM" CUDA_TARGET_CPU_ARCH_patched ${CUDA_TARGET_CPU_ARCH})
1108    set(nvcc_flags ${nvcc_flags} "--target-cpu-architecture=${CUDA_TARGET_CPU_ARCH_patched}")
1109  endif()
1110
1111  if(CUDA_TARGET_OS_VARIANT AND CUDA_VERSION VERSION_LESS "7.0")
1112    set(nvcc_flags ${nvcc_flags} "-target-os-variant=${CUDA_TARGET_OS_VARIANT}")
1113  endif()
1114
1115  # This needs to be passed in at this stage, because VS needs to fill out the
1116  # value of VCInstallDir from within VS.  Note that CCBIN is only used if
1117  # -ccbin or --compiler-bindir isn't used and CUDA_HOST_COMPILER matches
1118  # $(VCInstallDir)/bin.
1119  if(CMAKE_GENERATOR MATCHES "Visual Studio")
1120    set(ccbin_flags -D "\"CCBIN:PATH=$(VCInstallDir)bin\"" )
1121  else()
1122    set(ccbin_flags)
1123  endif()
1124
1125  # Figure out which configure we will use and pass that in as an argument to
1126  # the script.  We need to defer the decision until compilation time, because
1127  # for VS projects we won't know if we are making a debug or release build
1128  # until build time.
1129  if(CMAKE_GENERATOR MATCHES "Visual Studio")
1130    set( CUDA_build_configuration "$(ConfigurationName)" )
1131  else()
1132    set( CUDA_build_configuration "${CMAKE_BUILD_TYPE}")
1133  endif()
1134
1135  # Initialize our list of includes with the user ones followed by the CUDA system ones.
1136  set(CUDA_NVCC_INCLUDE_ARGS ${CUDA_NVCC_INCLUDE_ARGS_USER} "-I${CUDA_INCLUDE_DIRS}")
1137  # Get the include directories for this directory and use them for our nvcc command.
1138  # Remove duplicate entries which may be present since include_directories
1139  # in CMake >= 2.8.8 does not remove them.
1140  get_directory_property(CUDA_NVCC_INCLUDE_DIRECTORIES INCLUDE_DIRECTORIES)
1141  list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRECTORIES)
1142  if(CUDA_NVCC_INCLUDE_DIRECTORIES)
1143    foreach(dir ${CUDA_NVCC_INCLUDE_DIRECTORIES})
1144      list(APPEND CUDA_NVCC_INCLUDE_ARGS -I${dir})
1145    endforeach()
1146  endif()
1147
1148  # Reset these variables
1149  set(CUDA_WRAP_OPTION_NVCC_FLAGS)
1150  foreach(config ${CUDA_configuration_types})
1151    string(TOUPPER ${config} config_upper)
1152    set(CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper})
1153  endforeach()
1154
1155  CUDA_GET_SOURCES_AND_OPTIONS(_cuda_wrap_sources _cuda_wrap_cmake_options _cuda_wrap_options ${ARGN})
1156  CUDA_PARSE_NVCC_OPTIONS(CUDA_WRAP_OPTION_NVCC_FLAGS ${_cuda_wrap_options})
1157
1158  # Figure out if we are building a shared library.  BUILD_SHARED_LIBS is
1159  # respected in CUDA_ADD_LIBRARY.
1160  set(_cuda_build_shared_libs FALSE)
1161  # SHARED, MODULE
1162  list(FIND _cuda_wrap_cmake_options SHARED _cuda_found_SHARED)
1163  list(FIND _cuda_wrap_cmake_options MODULE _cuda_found_MODULE)
1164  if(_cuda_found_SHARED GREATER -1 OR _cuda_found_MODULE GREATER -1)
1165    set(_cuda_build_shared_libs TRUE)
1166  endif()
1167  # STATIC
1168  list(FIND _cuda_wrap_cmake_options STATIC _cuda_found_STATIC)
1169  if(_cuda_found_STATIC GREATER -1)
1170    set(_cuda_build_shared_libs FALSE)
1171  endif()
1172
1173  # CUDA_HOST_FLAGS
1174  if(_cuda_build_shared_libs)
1175    # If we are setting up code for a shared library, then we need to add extra flags for
1176    # compiling objects for shared libraries.
1177    set(CUDA_HOST_SHARED_FLAGS ${CMAKE_SHARED_LIBRARY_${CUDA_C_OR_CXX}_FLAGS})
1178  else()
1179    set(CUDA_HOST_SHARED_FLAGS)
1180  endif()
1181  # Only add the CMAKE_{C,CXX}_FLAGS if we are propagating host flags.  We
1182  # always need to set the SHARED_FLAGS, though.
1183  if(CUDA_PROPAGATE_HOST_FLAGS)
1184    set(_cuda_host_flags "set(CMAKE_HOST_FLAGS ${CMAKE_${CUDA_C_OR_CXX}_FLAGS} ${CUDA_HOST_SHARED_FLAGS})")
1185  else()
1186    set(_cuda_host_flags "set(CMAKE_HOST_FLAGS ${CUDA_HOST_SHARED_FLAGS})")
1187  endif()
1188
1189  set(_cuda_nvcc_flags_config "# Build specific configuration flags")
1190  # Loop over all the configuration types to generate appropriate flags for run_nvcc.cmake
1191  foreach(config ${CUDA_configuration_types})
1192    string(TOUPPER ${config} config_upper)
1193    # CMAKE_FLAGS are strings and not lists.  By not putting quotes around CMAKE_FLAGS
1194    # we convert the strings to lists (like we want).
1195
1196    if(CUDA_PROPAGATE_HOST_FLAGS)
1197      # nvcc chokes on -g3 in versions previous to 3.0, so replace it with -g
1198      set(_cuda_fix_g3 FALSE)
1199
1200      if(CMAKE_COMPILER_IS_GNUCC)
1201        if (CUDA_VERSION VERSION_LESS  "3.0" OR
1202            CUDA_VERSION VERSION_EQUAL "4.1" OR
1203            CUDA_VERSION VERSION_EQUAL "4.2"
1204            )
1205          set(_cuda_fix_g3 TRUE)
1206        endif()
1207      endif()
1208      if(_cuda_fix_g3)
1209        string(REPLACE "-g3" "-g" _cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}")
1210      else()
1211        set(_cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}")
1212      endif()
1213
1214      set(_cuda_host_flags "${_cuda_host_flags}\nset(CMAKE_HOST_FLAGS_${config_upper} ${_cuda_C_FLAGS})")
1215    endif()
1216
1217    # Note that if we ever want CUDA_NVCC_FLAGS_<CONFIG> to be string (instead of a list
1218    # like it is currently), we can remove the quotes around the
1219    # ${CUDA_NVCC_FLAGS_${config_upper}} variable like the CMAKE_HOST_FLAGS_<CONFIG> variable.
1220    set(_cuda_nvcc_flags_config "${_cuda_nvcc_flags_config}\nset(CUDA_NVCC_FLAGS_${config_upper} ${CUDA_NVCC_FLAGS_${config_upper}} ;; ${CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}})")
1221  endforeach()
1222
1223  # Get the list of definitions from the directory property
1224  get_directory_property(CUDA_NVCC_DEFINITIONS COMPILE_DEFINITIONS)
1225  if(CUDA_NVCC_DEFINITIONS)
1226    foreach(_definition ${CUDA_NVCC_DEFINITIONS})
1227      list(APPEND nvcc_flags "-D${_definition}")
1228    endforeach()
1229  endif()
1230
1231  if(_cuda_build_shared_libs)
1232    list(APPEND nvcc_flags "-D${cuda_target}_EXPORTS")
1233  endif()
1234
1235  # Reset the output variable
1236  set(_cuda_wrap_generated_files "")
1237
1238  # Iterate over the macro arguments and create custom
1239  # commands for all the .cu files.
1240  foreach(file ${ARGN})
1241    # Ignore any file marked as a HEADER_FILE_ONLY
1242    get_source_file_property(_is_header ${file} HEADER_FILE_ONLY)
1243    if(${file} MATCHES "\\.cu$" AND NOT _is_header)
1244
1245      # Allow per source file overrides of the format.
1246      get_source_file_property(_cuda_source_format ${file} CUDA_SOURCE_PROPERTY_FORMAT)
1247      if(NOT _cuda_source_format)
1248        set(_cuda_source_format ${format})
1249      endif()
1250
1251      if( ${_cuda_source_format} MATCHES "OBJ")
1252        set( cuda_compile_to_external_module OFF )
1253      else()
1254        set( cuda_compile_to_external_module ON )
1255        if( ${_cuda_source_format} MATCHES "PTX" )
1256          set( cuda_compile_to_external_module_type "ptx" )
1257        elseif( ${_cuda_source_format} MATCHES "CUBIN")
1258          set( cuda_compile_to_external_module_type "cubin" )
1259        elseif( ${_cuda_source_format} MATCHES "FATBIN")
1260          set( cuda_compile_to_external_module_type "fatbin" )
1261        else()
1262          message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS for file '${file}': '${_cuda_source_format}'.  Use OBJ, PTX, CUBIN or FATBIN.")
1263        endif()
1264      endif()
1265
1266      if(cuda_compile_to_external_module)
1267        # Don't use any of the host compilation flags for PTX targets.
1268        set(CUDA_HOST_FLAGS)
1269        set(CUDA_NVCC_FLAGS_CONFIG)
1270      else()
1271        set(CUDA_HOST_FLAGS ${_cuda_host_flags})
1272        set(CUDA_NVCC_FLAGS_CONFIG ${_cuda_nvcc_flags_config})
1273      endif()
1274
1275      # Determine output directory
1276      cuda_compute_build_path("${file}" cuda_build_path)
1277      set(cuda_compile_intermediate_directory "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${cuda_target}.dir/${cuda_build_path}")
1278      if(CUDA_GENERATED_OUTPUT_DIR)
1279        set(cuda_compile_output_dir "${CUDA_GENERATED_OUTPUT_DIR}")
1280      else()
1281        if ( cuda_compile_to_external_module )
1282          set(cuda_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}")
1283        else()
1284          set(cuda_compile_output_dir "${cuda_compile_intermediate_directory}")
1285        endif()
1286      endif()
1287
1288      # Add a custom target to generate a c or ptx file. ######################
1289
1290      get_filename_component( basename ${file} NAME )
1291      if( cuda_compile_to_external_module )
1292        set(generated_file_path "${cuda_compile_output_dir}")
1293        set(generated_file_basename "${cuda_target}_generated_${basename}.${cuda_compile_to_external_module_type}")
1294        set(format_flag "-${cuda_compile_to_external_module_type}")
1295        file(MAKE_DIRECTORY "${cuda_compile_output_dir}")
1296      else()
1297        set(generated_file_path "${cuda_compile_output_dir}/${CMAKE_CFG_INTDIR}")
1298        set(generated_file_basename "${cuda_target}_generated_${basename}${generated_extension}")
1299        if(CUDA_SEPARABLE_COMPILATION)
1300          set(format_flag "-dc")
1301        else()
1302          set(format_flag "-c")
1303        endif()
1304      endif()
1305
1306      # Set all of our file names.  Make sure that whatever filenames that have
1307      # generated_file_path in them get passed in through as a command line
1308      # argument, so that the ${CMAKE_CFG_INTDIR} gets expanded at run time
1309      # instead of configure time.
1310      set(generated_file "${generated_file_path}/${generated_file_basename}")
1311      set(cmake_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.depend")
1312      set(NVCC_generated_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.NVCC-depend")
1313      set(generated_cubin_file "${generated_file_path}/${generated_file_basename}.cubin.txt")
1314      set(custom_target_script "${cuda_compile_intermediate_directory}/${generated_file_basename}.cmake")
1315
1316      # Setup properties for obj files:
1317      if( NOT cuda_compile_to_external_module )
1318        set_source_files_properties("${generated_file}"
1319          PROPERTIES
1320          EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked.
1321          )
1322      endif()
1323
1324      # Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path.
1325      get_filename_component(file_path "${file}" PATH)
1326      if(IS_ABSOLUTE "${file_path}")
1327        set(source_file "${file}")
1328      else()
1329        set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}")
1330      endif()
1331
1332      if( NOT cuda_compile_to_external_module AND CUDA_SEPARABLE_COMPILATION)
1333        list(APPEND ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS "${generated_file}")
1334      endif()
1335
1336      # Bring in the dependencies.  Creates a variable CUDA_NVCC_DEPEND #######
1337      cuda_include_nvcc_dependencies(${cmake_dependency_file})
1338
1339      # Convience string for output ###########################################
1340      if(CUDA_BUILD_EMULATION)
1341        set(cuda_build_type "Emulation")
1342      else()
1343        set(cuda_build_type "Device")
1344      endif()
1345
1346      # Build the NVCC made dependency file ###################################
1347      set(build_cubin OFF)
1348      if ( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN )
1349         if ( NOT cuda_compile_to_external_module )
1350           set ( build_cubin ON )
1351         endif()
1352      endif()
1353
1354      # Configure the build script
1355      configure_file("${CUDA_run_nvcc}" "${custom_target_script}" @ONLY)
1356
1357      # So if a user specifies the same cuda file as input more than once, you
1358      # can have bad things happen with dependencies.  Here we check an option
1359      # to see if this is the behavior they want.
1360      if(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE)
1361        set(main_dep MAIN_DEPENDENCY ${source_file})
1362      else()
1363        set(main_dep DEPENDS ${source_file})
1364      endif()
1365
1366      if(CUDA_VERBOSE_BUILD)
1367        set(verbose_output ON)
1368      elseif(CMAKE_GENERATOR MATCHES "Makefiles")
1369        set(verbose_output "$(VERBOSE)")
1370      else()
1371        set(verbose_output OFF)
1372      endif()
1373
1374      # Create up the comment string
1375      file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}")
1376      if(cuda_compile_to_external_module)
1377        set(cuda_build_comment_string "Building NVCC ${cuda_compile_to_external_module_type} file ${generated_file_relative_path}")
1378      else()
1379        set(cuda_build_comment_string "Building NVCC (${cuda_build_type}) object ${generated_file_relative_path}")
1380      endif()
1381
1382      # Build the generated file and dependency file ##########################
1383      add_custom_command(
1384        OUTPUT ${generated_file}
1385        # These output files depend on the source_file and the contents of cmake_dependency_file
1386        ${main_dep}
1387        DEPENDS ${CUDA_NVCC_DEPEND}
1388        DEPENDS ${custom_target_script}
1389        # Make sure the output directory exists before trying to write to it.
1390        COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}"
1391        COMMAND ${CMAKE_COMMAND} ARGS
1392          -D verbose:BOOL=${verbose_output}
1393          ${ccbin_flags}
1394          -D build_configuration:STRING=${CUDA_build_configuration}
1395          -D "generated_file:STRING=${generated_file}"
1396          -D "generated_cubin_file:STRING=${generated_cubin_file}"
1397          -P "${custom_target_script}"
1398        WORKING_DIRECTORY "${cuda_compile_intermediate_directory}"
1399        COMMENT "${cuda_build_comment_string}"
1400        )
1401
1402      # Make sure the build system knows the file is generated.
1403      set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE)
1404
1405      list(APPEND _cuda_wrap_generated_files ${generated_file})
1406
1407      # Add the other files that we want cmake to clean on a cleanup ##########
1408      list(APPEND CUDA_ADDITIONAL_CLEAN_FILES "${cmake_dependency_file}")
1409      list(REMOVE_DUPLICATES CUDA_ADDITIONAL_CLEAN_FILES)
1410      set(CUDA_ADDITIONAL_CLEAN_FILES ${CUDA_ADDITIONAL_CLEAN_FILES} CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.")
1411
1412    endif()
1413  endforeach()
1414
1415  # Set the return parameter
1416  set(${generated_files} ${_cuda_wrap_generated_files})
1417endmacro()
1418
1419function(_cuda_get_important_host_flags important_flags flag_string)
1420  if(CMAKE_GENERATOR MATCHES "Visual Studio")
1421    string(REGEX MATCHALL "/M[DT][d]?" flags ${flag_string})
1422    list(APPEND ${important_flags} ${flags})
1423  else()
1424    string(REGEX MATCHALL "-fPIC" flags ${flag_string})
1425    list(APPEND ${important_flags} ${flags})
1426  endif()
1427  set(${important_flags} ${${important_flags}} PARENT_SCOPE)
1428endfunction()
1429
1430###############################################################################
1431###############################################################################
1432# Separable Compilation Link
1433###############################################################################
1434###############################################################################
1435
1436# Compute the filename to be used by CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS
1437function(CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME output_file_var cuda_target object_files)
1438  if (object_files)
1439    set(generated_extension ${CMAKE_${CUDA_C_OR_CXX}_OUTPUT_EXTENSION})
1440    set(output_file "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${cuda_target}.dir/${CMAKE_CFG_INTDIR}/${cuda_target}_intermediate_link${generated_extension}")
1441  else()
1442    set(output_file)
1443  endif()
1444
1445  set(${output_file_var} "${output_file}" PARENT_SCOPE)
1446endfunction()
1447
1448# Setup the build rule for the separable compilation intermediate link file.
1449function(CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS output_file cuda_target options object_files)
1450  if (object_files)
1451
1452    set_source_files_properties("${output_file}"
1453      PROPERTIES
1454      EXTERNAL_OBJECT TRUE # This is an object file not to be compiled, but only
1455                           # be linked.
1456      GENERATED TRUE       # This file is generated during the build
1457      )
1458
1459    # For now we are ignoring all the configuration specific flags.
1460    set(nvcc_flags)
1461    CUDA_PARSE_NVCC_OPTIONS(nvcc_flags ${options})
1462    if(CUDA_64_BIT_DEVICE_CODE)
1463      list(APPEND nvcc_flags -m64)
1464    else()
1465      list(APPEND nvcc_flags -m32)
1466    endif()
1467    # If -ccbin, --compiler-bindir has been specified, don't do anything.  Otherwise add it here.
1468    list( FIND nvcc_flags "-ccbin" ccbin_found0 )
1469    list( FIND nvcc_flags "--compiler-bindir" ccbin_found1 )
1470    if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
1471      list(APPEND nvcc_flags -ccbin "\"${CUDA_HOST_COMPILER}\"")
1472    endif()
1473    # Create a list of flags specified by CUDA_NVCC_FLAGS_${CONFIG}
1474    set(config_specific_flags)
1475    set(flags)
1476    foreach(config ${CUDA_configuration_types})
1477      string(TOUPPER ${config} config_upper)
1478      # Add config specific flags
1479      foreach(f ${CUDA_NVCC_FLAGS_${config_upper}})
1480        list(APPEND config_specific_flags $<$<CONFIG:${config}>:${f}>)
1481      endforeach()
1482      set(important_host_flags)
1483      _cuda_get_important_host_flags(important_host_flags ${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}})
1484      foreach(f ${important_host_flags})
1485        list(APPEND flags $<$<CONFIG:${config}>:-Xcompiler> $<$<CONFIG:${config}>:${f}>)
1486      endforeach()
1487    endforeach()
1488    # Add our general CUDA_NVCC_FLAGS with the configuration specifig flags
1489    set(nvcc_flags ${CUDA_NVCC_FLAGS} ${config_specific_flags} ${nvcc_flags})
1490
1491    file(RELATIVE_PATH output_file_relative_path "${CMAKE_BINARY_DIR}" "${output_file}")
1492
1493    # Some generators don't handle the multiple levels of custom command
1494    # dependencies correctly (obj1 depends on file1, obj2 depends on obj1), so
1495    # we work around that issue by compiling the intermediate link object as a
1496    # pre-link custom command in that situation.
1497    set(do_obj_build_rule TRUE)
1498    if (MSVC_VERSION GREATER 1599)
1499      # VS 2010 and 2012 have this problem.  If future versions fix this issue,
1500      # it should still work, it just won't be as nice as the other method.
1501      set(do_obj_build_rule FALSE)
1502    endif()
1503
1504    if (do_obj_build_rule)
1505      add_custom_command(
1506        OUTPUT ${output_file}
1507        DEPENDS ${object_files}
1508        COMMAND ${CUDA_NVCC_EXECUTABLE} ${nvcc_flags} -dlink ${object_files} -o ${output_file}
1509        ${flags}
1510        COMMENT "Building NVCC intermediate link file ${output_file_relative_path}"
1511        )
1512    else()
1513      add_custom_command(
1514        TARGET ${cuda_target}
1515        PRE_LINK
1516        COMMAND ${CMAKE_COMMAND} -E echo "Building NVCC intermediate link file ${output_file_relative_path}"
1517        COMMAND ${CUDA_NVCC_EXECUTABLE} ${nvcc_flags} ${flags} -dlink ${object_files} -o "${output_file}"
1518        )
1519    endif()
1520 endif()
1521endfunction()
1522
1523###############################################################################
1524###############################################################################
1525# ADD LIBRARY
1526###############################################################################
1527###############################################################################
1528macro(CUDA_ADD_LIBRARY cuda_target)
1529
1530  CUDA_ADD_CUDA_INCLUDE_ONCE()
1531
1532  # Separate the sources from the options
1533  CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
1534  CUDA_BUILD_SHARED_LIBRARY(_cuda_shared_flag ${ARGN})
1535  # Create custom commands and targets for each file.
1536  CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources}
1537    ${_cmake_options} ${_cuda_shared_flag}
1538    OPTIONS ${_options} )
1539
1540  # Compute the file name of the intermedate link file used for separable
1541  # compilation.
1542  CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME(link_file ${cuda_target} "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
1543
1544  # Add the library.
1545  add_library(${cuda_target} ${_cmake_options}
1546    ${_generated_files}
1547    ${_sources}
1548    ${link_file}
1549    )
1550
1551  # Add a link phase for the separable compilation if it has been enabled.  If
1552  # it has been enabled then the ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS
1553  # variable will have been defined.
1554  CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
1555
1556  target_link_libraries(${cuda_target}
1557    ${CUDA_LIBRARIES}
1558    )
1559
1560  # We need to set the linker language based on what the expected generated file
1561  # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP.
1562  set_target_properties(${cuda_target}
1563    PROPERTIES
1564    LINKER_LANGUAGE ${CUDA_C_OR_CXX}
1565    )
1566
1567endmacro()
1568
1569
1570###############################################################################
1571###############################################################################
1572# ADD EXECUTABLE
1573###############################################################################
1574###############################################################################
1575macro(CUDA_ADD_EXECUTABLE cuda_target)
1576
1577  CUDA_ADD_CUDA_INCLUDE_ONCE()
1578
1579  # Separate the sources from the options
1580  CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
1581  # Create custom commands and targets for each file.
1582  CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources} OPTIONS ${_options} )
1583
1584  # Compute the file name of the intermedate link file used for separable
1585  # compilation.
1586  CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME(link_file ${cuda_target} "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
1587
1588  # Add the library.
1589  add_executable(${cuda_target} ${_cmake_options}
1590    ${_generated_files}
1591    ${_sources}
1592    ${link_file}
1593    )
1594
1595  # Add a link phase for the separable compilation if it has been enabled.  If
1596  # it has been enabled then the ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS
1597  # variable will have been defined.
1598  CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
1599
1600  target_link_libraries(${cuda_target}
1601    ${CUDA_LIBRARIES}
1602    )
1603
1604  # We need to set the linker language based on what the expected generated file
1605  # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP.
1606  set_target_properties(${cuda_target}
1607    PROPERTIES
1608    LINKER_LANGUAGE ${CUDA_C_OR_CXX}
1609    )
1610
1611endmacro()
1612
1613
1614###############################################################################
1615###############################################################################
1616# (Internal) helper for manually added cuda source files with specific targets
1617###############################################################################
1618###############################################################################
1619macro(cuda_compile_base cuda_target format generated_files)
1620
1621  # Separate the sources from the options
1622  CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
1623  # Create custom commands and targets for each file.
1624  CUDA_WRAP_SRCS( ${cuda_target} ${format} _generated_files ${_sources} ${_cmake_options}
1625    OPTIONS ${_options} )
1626
1627  set( ${generated_files} ${_generated_files})
1628
1629endmacro()
1630
1631###############################################################################
1632###############################################################################
1633# CUDA COMPILE
1634###############################################################################
1635###############################################################################
1636macro(CUDA_COMPILE generated_files)
1637  cuda_compile_base(cuda_compile OBJ ${generated_files} ${ARGN})
1638endmacro()
1639
1640###############################################################################
1641###############################################################################
1642# CUDA COMPILE PTX
1643###############################################################################
1644###############################################################################
1645macro(CUDA_COMPILE_PTX generated_files)
1646  cuda_compile_base(cuda_compile_ptx PTX ${generated_files} ${ARGN})
1647endmacro()
1648
1649###############################################################################
1650###############################################################################
1651# CUDA COMPILE FATBIN
1652###############################################################################
1653###############################################################################
1654macro(CUDA_COMPILE_FATBIN generated_files)
1655  cuda_compile_base(cuda_compile_fatbin FATBIN ${generated_files} ${ARGN})
1656endmacro()
1657
1658###############################################################################
1659###############################################################################
1660# CUDA COMPILE CUBIN
1661###############################################################################
1662###############################################################################
1663macro(CUDA_COMPILE_CUBIN generated_files)
1664  cuda_compile_base(cuda_compile_cubin CUBIN ${generated_files} ${ARGN})
1665endmacro()
1666
1667
1668###############################################################################
1669###############################################################################
1670# CUDA ADD CUFFT TO TARGET
1671###############################################################################
1672###############################################################################
1673macro(CUDA_ADD_CUFFT_TO_TARGET target)
1674  if (CUDA_BUILD_EMULATION)
1675    target_link_libraries(${target} ${CUDA_cufftemu_LIBRARY})
1676  else()
1677    target_link_libraries(${target} ${CUDA_cufft_LIBRARY})
1678  endif()
1679endmacro()
1680
1681###############################################################################
1682###############################################################################
1683# CUDA ADD CUBLAS TO TARGET
1684###############################################################################
1685###############################################################################
1686macro(CUDA_ADD_CUBLAS_TO_TARGET target)
1687  if (CUDA_BUILD_EMULATION)
1688    target_link_libraries(${target} ${CUDA_cublasemu_LIBRARY})
1689  else()
1690    target_link_libraries(${target} ${CUDA_cublas_LIBRARY})
1691  endif()
1692endmacro()
1693
1694###############################################################################
1695###############################################################################
1696# CUDA BUILD CLEAN TARGET
1697###############################################################################
1698###############################################################################
1699macro(CUDA_BUILD_CLEAN_TARGET)
1700  # Call this after you add all your CUDA targets, and you will get a convience
1701  # target.  You should also make clean after running this target to get the
1702  # build system to generate all the code again.
1703
1704  set(cuda_clean_target_name clean_cuda_depends)
1705  if (CMAKE_GENERATOR MATCHES "Visual Studio")
1706    string(TOUPPER ${cuda_clean_target_name} cuda_clean_target_name)
1707  endif()
1708  add_custom_target(${cuda_clean_target_name}
1709    COMMAND ${CMAKE_COMMAND} -E remove ${CUDA_ADDITIONAL_CLEAN_FILES})
1710
1711  # Clear out the variable, so the next time we configure it will be empty.
1712  # This is useful so that the files won't persist in the list after targets
1713  # have been removed.
1714  set(CUDA_ADDITIONAL_CLEAN_FILES "" CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.")
1715endmacro()
1716