• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Tests that ptxas and fatbinary are correctly during CUDA compilation.
2 //
3 // REQUIRES: clang-driver
4 // REQUIRES: x86-registered-target
5 // REQUIRES: nvptx-registered-target
6 
7 // Regular compiles with -O{0,1,2,3,4,fast}.  -O4 and -Ofast map to ptxas O3.
8 // RUN: %clang -### -target x86_64-linux-gnu -O0 -c %s 2>&1 \
9 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
10 // RUN: %clang -### -target x86_64-linux-gnu -O1 -c %s 2>&1 \
11 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT1 %s
12 // RUN: %clang -### -target x86_64-linux-gnu -O2 -c %s 2>&1 \
13 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
14 // RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \
15 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
16 // RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \
17 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
18 // RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \
19 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT3 %s
20 
21 // With debugging enabled, ptxas should be run with with no ptxas optimizations.
22 // RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug -O2 -c %s 2>&1 \
23 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix DBG %s
24 
25 // --no-cuda-noopt-device-debug overrides --cuda-noopt-device-debug.
26 // RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-debug \
27 // RUN:   --no-cuda-noopt-debug -O2 -c %s 2>&1 \
28 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
29 
30 // Regular compile without -O.  This should result in us passing -O0 to ptxas.
31 // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
32 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
33 
34 // Regular compiles with -Os and -Oz.  For lack of a better option, we map
35 // these to ptxas -O3.
36 // RUN: %clang -### -target x86_64-linux-gnu -Os -c %s 2>&1 \
37 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
38 // RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \
39 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT2 %s
40 
41 // Regular compile targeting sm_35.
42 // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
43 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM35 %s
44 
45 // 32-bit compile.
46 // RUN: %clang -### -target x86_32-linux-gnu -c %s 2>&1 \
47 // RUN: | FileCheck -check-prefix ARCH32 -check-prefix SM20 %s
48 
49 // Compile with -fintegrated-as.  This should still cause us to invoke ptxas.
50 // RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \
51 // RUN: | FileCheck -check-prefix ARCH64 -check-prefix SM20 -check-prefix OPT0 %s
52 
53 // Check -Xcuda-ptxas and -Xcuda-fatbinary
54 // RUN: %clang -### -target x86_64-linux-gnu -c -Xcuda-ptxas -foo1 \
55 // RUN:   -Xcuda-fatbinary -bar1 -Xcuda-ptxas -foo2 -Xcuda-fatbinary -bar2 %s 2>&1 \
56 // RUN: | FileCheck -check-prefix SM20 -check-prefix PTXAS-EXTRA \
57 // RUN:   -check-prefix FATBINARY-EXTRA %s
58 
59 // Match clang job that produces PTX assembly.
60 // CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda"
61 // SM20: "-target-cpu" "sm_20"
62 // SM35: "-target-cpu" "sm_35"
63 // SM20: "-o" "[[PTXFILE:[^"]*]]"
64 // SM35: "-o" "[[PTXFILE:[^"]*]]"
65 
66 // Match the call to ptxas (which assembles PTX to SASS).
67 // CHECK:ptxas
68 // ARCH64: "-m64"
69 // ARCH32: "-m32"
70 // OPT0: "-O0"
71 // OPT0-NOT: "-g"
72 // OPT1: "-O1"
73 // OPT1-NOT: "-g"
74 // OPT2: "-O2"
75 // OPT2-NOT: "-g"
76 // OPT3: "-O3"
77 // OPT3-NOT: "-g"
78 // DBG: "-g" "--dont-merge-basicblocks" "--return-at-end"
79 // SM20: "--gpu-name" "sm_20"
80 // SM35: "--gpu-name" "sm_35"
81 // SM20: "--output-file" "[[CUBINFILE:[^"]*]]"
82 // SM35: "--output-file" "[[CUBINFILE:[^"]*]]"
83 // PTXAS-EXTRA: "-foo1"
84 // PTXAS-EXTRA-SAME: "-foo2"
85 // CHECK-SAME: "[[PTXFILE]]"
86 
87 // Match the call to fatbinary (which combines all our PTX and SASS into one
88 // blob).
89 // CHECK:fatbinary
90 // CHECK-DAG: "--cuda"
91 // ARCH64-DAG: "-64"
92 // ARCH32-DAG: "-32"
93 // CHECK-DAG: "--create" "[[FATBINARY:[^"]*]]"
94 // SM20-DAG: "--image=profile=compute_20,file=[[PTXFILE]]"
95 // SM35-DAG: "--image=profile=compute_35,file=[[PTXFILE]]"
96 // SM20-DAG: "--image=profile=sm_20,file=[[CUBINFILE]]"
97 // SM35-DAG: "--image=profile=sm_35,file=[[CUBINFILE]]"
98 // FATBINARY-EXTRA: "-bar1"
99 // FATBINARY-EXTRA-SAME: "-bar2"
100 
101 // Match the clang job for host compilation.
102 // CHECK: "-cc1" "-triple" "x86_64--linux-gnu"
103 // CHECK-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"
104