• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<html>
2<head>
3<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
4<title>CUDA</title>
5<link rel="stylesheet" href="../../../../../../doc/src/boostbook.css" type="text/css">
6<meta name="generator" content="DocBook XSL Stylesheets V1.79.1">
7<link rel="home" href="../../index.html" title="Chapter 1. Fiber">
8<link rel="up" href="../gpu_computing.html" title="GPU computing">
9<link rel="prev" href="../gpu_computing.html" title="GPU computing">
10<link rel="next" href="hip.html" title="ROCm/HIP">
11</head>
12<body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
13<table cellpadding="2" width="100%"><tr>
14<td valign="top"><img alt="Boost C++ Libraries" width="277" height="86" src="../../../../../../boost.png"></td>
15<td align="center"><a href="../../../../../../index.html">Home</a></td>
16<td align="center"><a href="../../../../../../libs/libraries.htm">Libraries</a></td>
17<td align="center"><a href="http://www.boost.org/users/people.html">People</a></td>
18<td align="center"><a href="http://www.boost.org/users/faq.html">FAQ</a></td>
19<td align="center"><a href="../../../../../../more/index.htm">More</a></td>
20</tr></table>
21<hr>
22<div class="spirit-nav">
23<a accesskey="p" href="../gpu_computing.html"><img src="../../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../gpu_computing.html"><img src="../../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../../index.html"><img src="../../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="hip.html"><img src="../../../../../../doc/src/images/next.png" alt="Next"></a>
24</div>
25<div class="section">
26<div class="titlepage"><div><div><h3 class="title">
27<a name="fiber.gpu_computing.cuda"></a><a name="cuda"></a><a class="link" href="cuda.html" title="CUDA">CUDA</a>
28</h3></div></div></div>
29<p>
30        <a href="http://developer.nvidia.com/cuda-zone/" target="_top">CUDA (Compute Unified
31        Device Architecture)</a> is a platform for parallel computing on NVIDIA
32        GPUs. The application programming interface of CUDA gives access to GPU's
33        instruction set and computation resources (Execution of compute kernels).
34      </p>
35<h5>
36<a name="fiber.gpu_computing.cuda.h0"></a>
37        <span class="phrase"><a name="fiber.gpu_computing.cuda.synchronization_with_cuda_streams"></a></span><a class="link" href="cuda.html#fiber.gpu_computing.cuda.synchronization_with_cuda_streams">Synchronization
38        with CUDA streams</a>
39      </h5>
40<p>
41        CUDA operation such as compute kernels or memory transfer (between host and
42        device) can be grouped/queued by CUDA streams. are executed on the GPUs.
43        Boost.Fiber enables a fiber to sleep (suspend) till a CUDA stream has completed
44        its operations. This enables applications to run other fibers on the CPU
45        without the need to spawn an additional OS-threads. And resume the fiber
46        when the CUDA streams has finished.
47      </p>
48<pre class="programlisting"><span class="identifier">__global__</span>
49<span class="keyword">void</span> <span class="identifier">kernel</span><span class="special">(</span> <span class="keyword">int</span> <span class="identifier">size</span><span class="special">,</span> <span class="keyword">int</span> <span class="special">*</span> <span class="identifier">a</span><span class="special">,</span> <span class="keyword">int</span> <span class="special">*</span> <span class="identifier">b</span><span class="special">,</span> <span class="keyword">int</span> <span class="special">*</span> <span class="identifier">c</span><span class="special">)</span> <span class="special">{</span>
50    <span class="keyword">int</span> <span class="identifier">idx</span> <span class="special">=</span> <span class="identifier">threadIdx</span><span class="special">.</span><span class="identifier">x</span> <span class="special">+</span> <span class="identifier">blockIdx</span><span class="special">.</span><span class="identifier">x</span> <span class="special">*</span> <span class="identifier">blockDim</span><span class="special">.</span><span class="identifier">x</span><span class="special">;</span>
51    <span class="keyword">if</span> <span class="special">(</span> <span class="identifier">idx</span> <span class="special">&lt;</span> <span class="identifier">size</span><span class="special">)</span> <span class="special">{</span>
52        <span class="keyword">int</span> <span class="identifier">idx1</span> <span class="special">=</span> <span class="special">(</span><span class="identifier">idx</span> <span class="special">+</span> <span class="number">1</span><span class="special">)</span> <span class="special">%</span> <span class="number">256</span><span class="special">;</span>
53        <span class="keyword">int</span> <span class="identifier">idx2</span> <span class="special">=</span> <span class="special">(</span><span class="identifier">idx</span> <span class="special">+</span> <span class="number">2</span><span class="special">)</span> <span class="special">%</span> <span class="number">256</span><span class="special">;</span>
54        <span class="keyword">float</span> <span class="identifier">as</span> <span class="special">=</span> <span class="special">(</span><span class="identifier">a</span><span class="special">[</span><span class="identifier">idx</span><span class="special">]</span> <span class="special">+</span> <span class="identifier">a</span><span class="special">[</span><span class="identifier">idx1</span><span class="special">]</span> <span class="special">+</span> <span class="identifier">a</span><span class="special">[</span><span class="identifier">idx2</span><span class="special">])</span> <span class="special">/</span> <span class="number">3.0f</span><span class="special">;</span>
55        <span class="keyword">float</span> <span class="identifier">bs</span> <span class="special">=</span> <span class="special">(</span><span class="identifier">b</span><span class="special">[</span><span class="identifier">idx</span><span class="special">]</span> <span class="special">+</span> <span class="identifier">b</span><span class="special">[</span><span class="identifier">idx1</span><span class="special">]</span> <span class="special">+</span> <span class="identifier">b</span><span class="special">[</span><span class="identifier">idx2</span><span class="special">])</span> <span class="special">/</span> <span class="number">3.0f</span><span class="special">;</span>
56        <span class="identifier">c</span><span class="special">[</span><span class="identifier">idx</span><span class="special">]</span> <span class="special">=</span> <span class="special">(</span><span class="identifier">as</span> <span class="special">+</span> <span class="identifier">bs</span><span class="special">)</span> <span class="special">/</span> <span class="number">2</span><span class="special">;</span>
57    <span class="special">}</span>
58<span class="special">}</span>
59
60<span class="identifier">boost</span><span class="special">::</span><span class="identifier">fibers</span><span class="special">::</span><span class="identifier">fiber</span> <span class="identifier">f</span><span class="special">([&amp;</span><span class="identifier">done</span><span class="special">]{</span>
61    <span class="identifier">cudaStream_t</span> <span class="identifier">stream</span><span class="special">;</span>
62    <span class="identifier">cudaStreamCreate</span><span class="special">(</span> <span class="special">&amp;</span> <span class="identifier">stream</span><span class="special">);</span>
63    <span class="keyword">int</span> <span class="identifier">size</span> <span class="special">=</span> <span class="number">1024</span> <span class="special">*</span> <span class="number">1024</span><span class="special">;</span>
64    <span class="keyword">int</span> <span class="identifier">full_size</span> <span class="special">=</span> <span class="number">20</span> <span class="special">*</span> <span class="identifier">size</span><span class="special">;</span>
65    <span class="keyword">int</span> <span class="special">*</span> <span class="identifier">host_a</span><span class="special">,</span> <span class="special">*</span> <span class="identifier">host_b</span><span class="special">,</span> <span class="special">*</span> <span class="identifier">host_c</span><span class="special">;</span>
66    <span class="identifier">cudaHostAlloc</span><span class="special">(</span> <span class="special">&amp;</span> <span class="identifier">host_a</span><span class="special">,</span> <span class="identifier">full_size</span> <span class="special">*</span> <span class="keyword">sizeof</span><span class="special">(</span> <span class="keyword">int</span><span class="special">),</span> <span class="identifier">cudaHostAllocDefault</span><span class="special">);</span>
67    <span class="identifier">cudaHostAlloc</span><span class="special">(</span> <span class="special">&amp;</span> <span class="identifier">host_b</span><span class="special">,</span> <span class="identifier">full_size</span> <span class="special">*</span> <span class="keyword">sizeof</span><span class="special">(</span> <span class="keyword">int</span><span class="special">),</span> <span class="identifier">cudaHostAllocDefault</span><span class="special">);</span>
68    <span class="identifier">cudaHostAlloc</span><span class="special">(</span> <span class="special">&amp;</span> <span class="identifier">host_c</span><span class="special">,</span> <span class="identifier">full_size</span> <span class="special">*</span> <span class="keyword">sizeof</span><span class="special">(</span> <span class="keyword">int</span><span class="special">),</span> <span class="identifier">cudaHostAllocDefault</span><span class="special">);</span>
69    <span class="keyword">int</span> <span class="special">*</span> <span class="identifier">dev_a</span><span class="special">,</span> <span class="special">*</span> <span class="identifier">dev_b</span><span class="special">,</span> <span class="special">*</span> <span class="identifier">dev_c</span><span class="special">;</span>
70    <span class="identifier">cudaMalloc</span><span class="special">(</span> <span class="special">&amp;</span> <span class="identifier">dev_a</span><span class="special">,</span> <span class="identifier">size</span> <span class="special">*</span> <span class="keyword">sizeof</span><span class="special">(</span> <span class="keyword">int</span><span class="special">)</span> <span class="special">);</span>
71    <span class="identifier">cudaMalloc</span><span class="special">(</span> <span class="special">&amp;</span> <span class="identifier">dev_b</span><span class="special">,</span> <span class="identifier">size</span> <span class="special">*</span> <span class="keyword">sizeof</span><span class="special">(</span> <span class="keyword">int</span><span class="special">)</span> <span class="special">);</span>
72    <span class="identifier">cudaMalloc</span><span class="special">(</span> <span class="special">&amp;</span> <span class="identifier">dev_c</span><span class="special">,</span> <span class="identifier">size</span> <span class="special">*</span> <span class="keyword">sizeof</span><span class="special">(</span> <span class="keyword">int</span><span class="special">)</span> <span class="special">);</span>
73    <span class="identifier">std</span><span class="special">::</span><span class="identifier">minstd_rand</span> <span class="identifier">generator</span><span class="special">;</span>
74    <span class="identifier">std</span><span class="special">::</span><span class="identifier">uniform_int_distribution</span><span class="special">&lt;&gt;</span> <span class="identifier">distribution</span><span class="special">(</span><span class="number">1</span><span class="special">,</span> <span class="number">6</span><span class="special">);</span>
75    <span class="keyword">for</span> <span class="special">(</span> <span class="keyword">int</span> <span class="identifier">i</span> <span class="special">=</span> <span class="number">0</span><span class="special">;</span> <span class="identifier">i</span> <span class="special">&lt;</span> <span class="identifier">full_size</span><span class="special">;</span> <span class="special">++</span><span class="identifier">i</span><span class="special">)</span> <span class="special">{</span>
76        <span class="identifier">host_a</span><span class="special">[</span><span class="identifier">i</span><span class="special">]</span> <span class="special">=</span> <span class="identifier">distribution</span><span class="special">(</span> <span class="identifier">generator</span><span class="special">);</span>
77        <span class="identifier">host_b</span><span class="special">[</span><span class="identifier">i</span><span class="special">]</span> <span class="special">=</span> <span class="identifier">distribution</span><span class="special">(</span> <span class="identifier">generator</span><span class="special">);</span>
78    <span class="special">}</span>
79    <span class="keyword">for</span> <span class="special">(</span> <span class="keyword">int</span> <span class="identifier">i</span> <span class="special">=</span> <span class="number">0</span><span class="special">;</span> <span class="identifier">i</span> <span class="special">&lt;</span> <span class="identifier">full_size</span><span class="special">;</span> <span class="identifier">i</span> <span class="special">+=</span> <span class="identifier">size</span><span class="special">)</span> <span class="special">{</span>
80        <span class="identifier">cudaMemcpyAsync</span><span class="special">(</span> <span class="identifier">dev_a</span><span class="special">,</span> <span class="identifier">host_a</span> <span class="special">+</span> <span class="identifier">i</span><span class="special">,</span> <span class="identifier">size</span> <span class="special">*</span> <span class="keyword">sizeof</span><span class="special">(</span> <span class="keyword">int</span><span class="special">),</span> <span class="identifier">cudaMemcpyHostToDevice</span><span class="special">,</span> <span class="identifier">stream</span><span class="special">);</span>
81        <span class="identifier">cudaMemcpyAsync</span><span class="special">(</span> <span class="identifier">dev_b</span><span class="special">,</span> <span class="identifier">host_b</span> <span class="special">+</span> <span class="identifier">i</span><span class="special">,</span> <span class="identifier">size</span> <span class="special">*</span> <span class="keyword">sizeof</span><span class="special">(</span> <span class="keyword">int</span><span class="special">),</span> <span class="identifier">cudaMemcpyHostToDevice</span><span class="special">,</span> <span class="identifier">stream</span><span class="special">);</span>
82        <span class="identifier">kernel</span><span class="special">&lt;&lt;&lt;</span> <span class="identifier">size</span> <span class="special">/</span> <span class="number">256</span><span class="special">,</span> <span class="number">256</span><span class="special">,</span> <span class="number">0</span><span class="special">,</span> <span class="identifier">stream</span> <span class="special">&gt;&gt;&gt;(</span> <span class="identifier">size</span><span class="special">,</span> <span class="identifier">dev_a</span><span class="special">,</span> <span class="identifier">dev_b</span><span class="special">,</span> <span class="identifier">dev_c</span><span class="special">);</span>
83        <span class="identifier">cudaMemcpyAsync</span><span class="special">(</span> <span class="identifier">host_c</span> <span class="special">+</span> <span class="identifier">i</span><span class="special">,</span> <span class="identifier">dev_c</span><span class="special">,</span> <span class="identifier">size</span> <span class="special">*</span> <span class="keyword">sizeof</span><span class="special">(</span> <span class="keyword">int</span><span class="special">),</span> <span class="identifier">cudaMemcpyDeviceToHost</span><span class="special">,</span> <span class="identifier">stream</span><span class="special">);</span>
84    <span class="special">}</span>
85    <span class="keyword">auto</span> <span class="identifier">result</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">fibers</span><span class="special">::</span><span class="identifier">cuda</span><span class="special">::</span><span class="identifier">waitfor_all</span><span class="special">(</span> <span class="identifier">stream</span><span class="special">);</span> <span class="comment">// suspend fiber till CUDA stream has finished</span>
86    <span class="identifier">BOOST_ASSERT</span><span class="special">(</span> <span class="identifier">stream</span> <span class="special">==</span> <span class="identifier">std</span><span class="special">::</span><span class="identifier">get</span><span class="special">&lt;</span> <span class="number">0</span> <span class="special">&gt;(</span> <span class="identifier">result</span><span class="special">)</span> <span class="special">);</span>
87    <span class="identifier">BOOST_ASSERT</span><span class="special">(</span> <span class="identifier">cudaSuccess</span> <span class="special">==</span> <span class="identifier">std</span><span class="special">::</span><span class="identifier">get</span><span class="special">&lt;</span> <span class="number">1</span> <span class="special">&gt;(</span> <span class="identifier">result</span><span class="special">)</span> <span class="special">);</span>
88    <span class="identifier">std</span><span class="special">::</span><span class="identifier">cout</span> <span class="special">&lt;&lt;</span> <span class="string">"f1: GPU computation finished"</span> <span class="special">&lt;&lt;</span> <span class="identifier">std</span><span class="special">::</span><span class="identifier">endl</span><span class="special">;</span>
89    <span class="identifier">cudaFreeHost</span><span class="special">(</span> <span class="identifier">host_a</span><span class="special">);</span>
90    <span class="identifier">cudaFreeHost</span><span class="special">(</span> <span class="identifier">host_b</span><span class="special">);</span>
91    <span class="identifier">cudaFreeHost</span><span class="special">(</span> <span class="identifier">host_c</span><span class="special">);</span>
92    <span class="identifier">cudaFree</span><span class="special">(</span> <span class="identifier">dev_a</span><span class="special">);</span>
93    <span class="identifier">cudaFree</span><span class="special">(</span> <span class="identifier">dev_b</span><span class="special">);</span>
94    <span class="identifier">cudaFree</span><span class="special">(</span> <span class="identifier">dev_c</span><span class="special">);</span>
95    <span class="identifier">cudaStreamDestroy</span><span class="special">(</span> <span class="identifier">stream</span><span class="special">);</span>
96<span class="special">});</span>
97<span class="identifier">f</span><span class="special">.</span><span class="identifier">join</span><span class="special">();</span>
98</pre>
99<h5>
100<a name="fiber.gpu_computing.cuda.h1"></a>
101        <span class="phrase"><a name="fiber.gpu_computing.cuda.synopsis"></a></span><a class="link" href="cuda.html#fiber.gpu_computing.cuda.synopsis">Synopsis</a>
102      </h5>
103<pre class="programlisting"><span class="preprocessor">#include</span> <span class="special">&lt;</span><span class="identifier">boost</span><span class="special">/</span><span class="identifier">fiber</span><span class="special">/</span><span class="identifier">cuda</span><span class="special">/</span><span class="identifier">waitfor</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;</span>
104
105<span class="keyword">namespace</span> <span class="identifier">boost</span> <span class="special">{</span>
106<span class="keyword">namespace</span> <span class="identifier">fibers</span> <span class="special">{</span>
107<span class="keyword">namespace</span> <span class="identifier">cuda</span> <span class="special">{</span>
108
109<span class="identifier">std</span><span class="special">::</span><span class="identifier">tuple</span><span class="special">&lt;</span> <span class="identifier">cudaStream_t</span><span class="special">,</span> <span class="identifier">cudaError_t</span> <span class="special">&gt;</span> <span class="identifier">waitfor_all</span><span class="special">(</span> <span class="identifier">cudaStream_t</span> <span class="identifier">st</span><span class="special">);</span>
110<span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span> <span class="identifier">std</span><span class="special">::</span><span class="identifier">tuple</span><span class="special">&lt;</span> <span class="identifier">cudaStream_t</span><span class="special">,</span> <span class="identifier">cudaError_t</span> <span class="special">&gt;</span> <span class="special">&gt;</span> <span class="identifier">waitfor_all</span><span class="special">(</span> <span class="identifier">cudaStream_t</span> <span class="special">...</span> <span class="identifier">st</span><span class="special">);</span>
111
112<span class="special">}}}</span>
113</pre>
114<p>
115        </p>
116<h5>
117<a name="cuda_waitfor_bridgehead"></a>
118  <span class="phrase"><a name="cuda_waitfor"></a></span>
119  <a class="link" href="cuda.html#cuda_waitfor">Non-member function <code class="computeroutput">cuda::waitfor()</code></a>
120</h5>
121<p>
122      </p>
123<pre class="programlisting"><span class="preprocessor">#include</span> <span class="special">&lt;</span><span class="identifier">boost</span><span class="special">/</span><span class="identifier">fiber</span><span class="special">/</span><span class="identifier">cuda</span><span class="special">/</span><span class="identifier">waitfor</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;</span>
124
125<span class="keyword">namespace</span> <span class="identifier">boost</span> <span class="special">{</span>
126<span class="keyword">namespace</span> <span class="identifier">fibers</span> <span class="special">{</span>
127<span class="keyword">namespace</span> <span class="identifier">cuda</span> <span class="special">{</span>
128
129<span class="identifier">std</span><span class="special">::</span><span class="identifier">tuple</span><span class="special">&lt;</span> <span class="identifier">cudaStream_t</span><span class="special">,</span> <span class="identifier">cudaError_t</span> <span class="special">&gt;</span> <span class="identifier">waitfor_all</span><span class="special">(</span> <span class="identifier">cudaStream_t</span> <span class="identifier">st</span><span class="special">);</span>
130<span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span> <span class="identifier">std</span><span class="special">::</span><span class="identifier">tuple</span><span class="special">&lt;</span> <span class="identifier">cudaStream_t</span><span class="special">,</span> <span class="identifier">cudaError_t</span> <span class="special">&gt;</span> <span class="special">&gt;</span> <span class="identifier">waitfor_all</span><span class="special">(</span> <span class="identifier">cudaStream_t</span> <span class="special">...</span> <span class="identifier">st</span><span class="special">);</span>
131
132<span class="special">}}}</span>
133</pre>
134<div class="variablelist">
135<p class="title"><b></b></p>
136<dl class="variablelist">
137<dt><span class="term">Effects:</span></dt>
138<dd><p>
139              Suspends active fiber till CUDA stream has finished its operations.
140            </p></dd>
141<dt><span class="term">Returns:</span></dt>
142<dd><p>
143              tuple of stream reference and the CUDA stream status
144            </p></dd>
145</dl>
146</div>
147</div>
148<table xmlns:rev="http://www.cs.rpi.edu/~gregod/boost/tools/doc/revision" width="100%"><tr>
149<td align="left"></td>
150<td align="right"><div class="copyright-footer">Copyright © 2013 Oliver Kowalke<p>
151        Distributed under the Boost Software License, Version 1.0. (See accompanying
152        file LICENSE_1_0.txt or copy at <a href="http://www.boost.org/LICENSE_1_0.txt" target="_top">http://www.boost.org/LICENSE_1_0.txt</a>)
153      </p>
154</div></td>
155</tr></table>
156<hr>
157<div class="spirit-nav">
158<a accesskey="p" href="../gpu_computing.html"><img src="../../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../gpu_computing.html"><img src="../../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../../index.html"><img src="../../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="hip.html"><img src="../../../../../../doc/src/images/next.png" alt="Next"></a>
159</div>
160</body>
161</html>
162