• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // Creates some GPU activity to test functionalities of gpuperfcounter/gputrace.
17 #include "tensorflow/core/profiler/backends/gpu/cuda_test.h"
18 
19 #if GOOGLE_CUDA
20 #include <stdio.h>
21 
22 #include "third_party/gpus/cuda/include/cuda_runtime_api.h"
23 #include "third_party/gpus/cuda/include/driver_types.h"
24 #endif
25 
26 #include "tensorflow/core/platform/test.h"
27 
28 namespace tensorflow {
29 namespace profiler {
30 namespace test {
31 
32 #if GOOGLE_CUDA
33 namespace {
34 
35 // Simple printf kernel.
simple_print()36 __global__ void simple_print() { printf("hello, world!\n"); }
37 
38 // Empty kernel.
empty()39 __global__ void empty() {}
40 
41 // Simple kernel accesses memory.
access(int * addr)42 __global__ void access(int *addr) { *addr = *addr * 2; }
43 
44 unsigned *g_device_copy;
45 
46 unsigned *gpu0_buf, *gpu1_buf;
47 
48 }  // namespace
49 #endif  // GOOGLE_CUDA
50 
PrintfKernel(int iters)51 void PrintfKernel(int iters) {
52 #if GOOGLE_CUDA
53   for (int i = 0; i < iters; ++i) {
54     simple_print<<<1, 1>>>();
55   }
56 #else
57   GTEST_FAIL() << "Build with --config=cuda";
58 #endif
59 }
60 
EmptyKernel(int iters)61 void EmptyKernel(int iters) {
62 #if GOOGLE_CUDA
63   for (int i = 0; i < iters; ++i) {
64     empty<<<1, 1>>>();
65   }
66 #else
67   GTEST_FAIL() << "Build with --config=cuda";
68 #endif
69 }
70 
AccessKernel(int * addr)71 void AccessKernel(int *addr) {
72 #if GOOGLE_CUDA
73   access<<<1, 1>>>(addr);
74 #else
75   GTEST_FAIL() << "Build with --config=cuda";
76 #endif
77 }
78 
Synchronize()79 void Synchronize() {
80 #if GOOGLE_CUDA
81   cudaDeviceSynchronize();
82 #else
83   GTEST_FAIL() << "Build with --config=cuda";
84 #endif
85 }
86 
UnifiedMemoryHtoDAndDtoH()87 void UnifiedMemoryHtoDAndDtoH() {
88 #if GOOGLE_CUDA
89   int *addr = nullptr;
90   cudaMallocManaged(reinterpret_cast<void **>(&addr), sizeof(int));
91   // The page is now in host memory.
92   *addr = 1;
93   // The kernel wants to access the page. HtoD transfer happens.
94   AccessKernel(addr);
95   Synchronize();
96   // The page is now in device memory. CPU wants to access the page. DtoH
97   // transfer happens.
98   EXPECT_EQ(*addr, 2);
99 #else
100   GTEST_FAIL() << "Build with --config=cuda";
101 #endif
102 }
103 
MemCopyH2D()104 void MemCopyH2D() {
105 #if GOOGLE_CUDA
106   unsigned host_val = 0x12345678;
107   cudaMalloc(reinterpret_cast<void **>(&g_device_copy), sizeof(unsigned));
108   cudaMemcpy(g_device_copy, &host_val, sizeof(unsigned),
109              cudaMemcpyHostToDevice);
110 #else
111   GTEST_FAIL() << "Build with --config=cuda";
112 #endif
113 }
114 
MemCopyH2D_Async()115 void MemCopyH2D_Async() {
116 #if GOOGLE_CUDA
117   unsigned host_val = 0x12345678;
118   cudaMalloc(reinterpret_cast<void **>(&g_device_copy), sizeof(unsigned));
119   cudaMemcpyAsync(g_device_copy, &host_val, sizeof(unsigned),
120                   cudaMemcpyHostToDevice);
121 #else
122   GTEST_FAIL() << "Build with --config=cuda";
123 #endif
124 }
125 
MemCopyD2H()126 void MemCopyD2H() {
127 #if GOOGLE_CUDA
128   unsigned host_val = 0;
129   cudaMalloc(reinterpret_cast<void **>(&g_device_copy), sizeof(unsigned));
130   cudaMemcpy(&host_val, g_device_copy, sizeof(unsigned),
131              cudaMemcpyDeviceToHost);
132 #else
133   GTEST_FAIL() << "Build with --config=cuda";
134 #endif
135 }
136 
137 namespace {
138 
139 // Helper function to set up memory buffers on two devices.
P2PMemcpyHelper()140 void P2PMemcpyHelper() {
141 #if GOOGLE_CUDA
142   cudaSetDevice(0);
143   cudaMalloc(reinterpret_cast<void **>(&gpu0_buf), sizeof(unsigned));
144   cudaDeviceEnablePeerAccess(/*peerDevice=*/1, /*flags=*/0);
145   cudaSetDevice(1);
146   cudaMalloc(reinterpret_cast<void **>(&gpu1_buf), sizeof(unsigned));
147   cudaDeviceEnablePeerAccess(/*peerDevice=*/0, /*flags=*/0);
148 #else
149   GTEST_FAIL() << "Build with --config=cuda";
150 #endif
151 }
152 
153 }  // namespace
154 
MemCopyP2PAvailable()155 bool MemCopyP2PAvailable() {
156 #if GOOGLE_CUDA
157   int can_access_01 = 0;
158   cudaDeviceCanAccessPeer(&can_access_01, /*device=*/0, /*peerDevice=*/1);
159   int can_access_10 = 0;
160   cudaDeviceCanAccessPeer(&can_access_01, /*device=*/1, /*peerDevice=*/0);
161   return can_access_01 && can_access_10;
162 #else
163   return false;
164 #endif
165 }
166 
MemCopyP2PImplicit()167 void MemCopyP2PImplicit() {
168 #if GOOGLE_CUDA
169   P2PMemcpyHelper();
170   cudaMemcpy(gpu1_buf, gpu0_buf, sizeof(unsigned), cudaMemcpyDefault);
171 #else
172   GTEST_FAIL() << "Build with --config=cuda";
173 #endif
174 }
175 
MemCopyP2PExplicit()176 void MemCopyP2PExplicit() {
177 #if GOOGLE_CUDA
178   P2PMemcpyHelper();
179   cudaMemcpyPeer(gpu1_buf, 1 /* device */, gpu0_buf, 0 /* device */,
180                  sizeof(unsigned));
181 #else
182   GTEST_FAIL() << "Build with --config=cuda";
183 #endif
184 }
185 
186 }  // namespace test
187 }  // namespace profiler
188 }  // namespace tensorflow
189