1 //---------------------------------------------------------------------------// 2 // Copyright (c) 2015 Muhammad Junaid Muzammil <mjunaidmuzammil@gmail.com> 3 // 4 // Distributed under the Boost Software License, Version 1.0 5 // See accompanying file LICENSE_1_0.txt or copy at 6 // http://www.boost.org/LICENSE_1_0.txt 7 // 8 // See http://boostorg.github.com/compute for more information. 9 //---------------------------------------------------------------------------// 10 11 #ifndef BOOST_COMPUTE_RANDOM_THREEFRY_HPP 12 #define BOOST_COMPUTE_RANDOM_THREEFRY_HPP 13 14 #include <algorithm> 15 16 #include <boost/compute/types.hpp> 17 #include <boost/compute/buffer.hpp> 18 #include <boost/compute/kernel.hpp> 19 #include <boost/compute/context.hpp> 20 #include <boost/compute/program.hpp> 21 #include <boost/compute/command_queue.hpp> 22 #include <boost/compute/algorithm/transform.hpp> 23 #include <boost/compute/detail/iterator_range_size.hpp> 24 #include <boost/compute/utility/program_cache.hpp> 25 #include <boost/compute/container/vector.hpp> 26 #include <boost/compute/iterator/discard_iterator.hpp> 27 28 namespace boost { 29 namespace compute { 30 31 /// \class threefry_engine 32 /// \brief Threefry pseudorandom number generator. 33 template<class T = uint_> 34 class threefry_engine 35 { 36 public: 37 typedef T result_type; 38 static const ulong_ default_seed = 0UL; 39 40 /// Creates a new threefry_engine and seeds it with \p value. threefry_engine(command_queue & queue,ulong_ value=default_seed)41 explicit threefry_engine(command_queue &queue, 42 ulong_ value = default_seed) 43 : m_key(value), 44 m_counter(0), 45 m_context(queue.get_context()) 46 { 47 // Load program 48 load_program(); 49 } 50 51 /// Creates a new threefry_engine object as a copy of \p other. threefry_engine(const threefry_engine<T> & other)52 threefry_engine(const threefry_engine<T> &other) 53 : m_key(other.m_key), 54 m_counter(other.m_counter), 55 m_context(other.m_context), 56 m_program(other.m_program) 57 { 58 } 59 60 /// Copies \p other to \c *this. operator =(const threefry_engine<T> & other)61 threefry_engine<T>& operator=(const threefry_engine<T> &other) 62 { 63 if(this != &other){ 64 m_key = other.m_key; 65 m_counter = other.m_counter; 66 m_context = other.m_context; 67 m_program = other.m_program; 68 } 69 70 return *this; 71 } 72 73 /// Destroys the threefry_engine object. ~threefry_engine()74 ~threefry_engine() 75 { 76 } 77 78 /// Seeds the random number generator with \p value. 79 /// 80 /// \param value seed value for the random-number generator 81 /// \param queue command queue to perform the operation 82 /// 83 /// If no seed value is provided, \c default_seed is used. seed(ulong_ value,command_queue & queue)84 void seed(ulong_ value, command_queue &queue) 85 { 86 (void) queue; 87 m_key = value; 88 // Reset counter 89 m_counter = 0; 90 } 91 92 /// \overload seed(command_queue & queue)93 void seed(command_queue &queue) 94 { 95 seed(default_seed, queue); 96 } 97 98 /// Generates random numbers and stores them to the range [\p first, \p last). 99 template<class OutputIterator> generate(OutputIterator first,OutputIterator last,command_queue & queue)100 void generate(OutputIterator first, OutputIterator last, command_queue &queue) 101 { 102 const size_t size = detail::iterator_range_size(first, last); 103 104 kernel fill_kernel(m_program, "fill"); 105 fill_kernel.set_arg(0, first.get_buffer()); 106 fill_kernel.set_arg(1, static_cast<const uint_>(size)); 107 fill_kernel.set_arg(2, m_key); 108 fill_kernel.set_arg(3, m_counter); 109 110 queue.enqueue_1d_range_kernel(fill_kernel, 0, (size + 1)/2, 0); 111 112 discard(size, queue); 113 } 114 115 /// \internal_ generate(discard_iterator first,discard_iterator last,command_queue & queue)116 void generate(discard_iterator first, discard_iterator last, command_queue &queue) 117 { 118 (void) queue; 119 ulong_ offset = std::distance(first, last); 120 m_counter += offset; 121 } 122 123 /// Generates random numbers, transforms them with \p op, and then stores 124 /// them to the range [\p first, \p last). 125 template<class OutputIterator, class Function> generate(OutputIterator first,OutputIterator last,Function op,command_queue & queue)126 void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue) 127 { 128 vector<T> tmp(std::distance(first, last), queue.get_context()); 129 generate(tmp.begin(), tmp.end(), queue); 130 ::boost::compute::transform(tmp.begin(), tmp.end(), first, op, queue); 131 } 132 133 /// Generates \p z random numbers and discards them. discard(size_t z,command_queue & queue)134 void discard(size_t z, command_queue &queue) 135 { 136 generate(discard_iterator(0), discard_iterator(z), queue); 137 } 138 139 private: load_program()140 void load_program() 141 { 142 boost::shared_ptr<program_cache> cache = 143 program_cache::get_global_cache(m_context); 144 std::string cache_key = 145 std::string("__boost_threefry_engine_32x2"); 146 147 // Copyright 2010-2012, D. E. Shaw Research. 148 // All rights reserved. 149 150 // Redistribution and use in source and binary forms, with or without 151 // modification, are permitted provided that the following conditions are 152 // met: 153 154 // * Redistributions of source code must retain the above copyright 155 // notice, this list of conditions, and the following disclaimer. 156 157 // * Redistributions in binary form must reproduce the above copyright 158 // notice, this list of conditions, and the following disclaimer in the 159 // documentation and/or other materials provided with the distribution. 160 161 // * Neither the name of D. E. Shaw Research nor the names of its 162 // contributors may be used to endorse or promote products derived from 163 // this software without specific prior written permission. 164 165 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 166 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 167 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 168 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 169 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 170 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 171 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 172 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 173 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 174 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 175 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 176 const char source[] = 177 "#define THREEFRY2x32_DEFAULT_ROUNDS 20\n" 178 "#define SKEIN_KS_PARITY_32 0x1BD11BDA\n" 179 180 "enum r123_enum_threefry32x2 {\n" 181 " R_32x2_0_0=13,\n" 182 " R_32x2_1_0=15,\n" 183 " R_32x2_2_0=26,\n" 184 " R_32x2_3_0= 6,\n" 185 " R_32x2_4_0=17,\n" 186 " R_32x2_5_0=29,\n" 187 " R_32x2_6_0=16,\n" 188 " R_32x2_7_0=24\n" 189 "};\n" 190 191 "static uint RotL_32(uint x, uint N)\n" 192 "{\n" 193 " return (x << (N & 31)) | (x >> ((32-N) & 31));\n" 194 "}\n" 195 196 "struct r123array2x32 {\n" 197 " uint v[2];\n" 198 "};\n" 199 "typedef struct r123array2x32 threefry2x32_ctr_t;\n" 200 "typedef struct r123array2x32 threefry2x32_key_t;\n" 201 202 "threefry2x32_ctr_t threefry2x32_R(unsigned int Nrounds, threefry2x32_ctr_t in, threefry2x32_key_t k)\n" 203 "{\n" 204 " threefry2x32_ctr_t X;\n" 205 " uint ks[3];\n" 206 " uint i; \n" 207 " ks[2] = SKEIN_KS_PARITY_32;\n" 208 " for (i=0;i < 2; i++) {\n" 209 " ks[i] = k.v[i];\n" 210 " X.v[i] = in.v[i];\n" 211 " ks[2] ^= k.v[i];\n" 212 " }\n" 213 " X.v[0] += ks[0]; X.v[1] += ks[1];\n" 214 " if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" 215 " if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" 216 " if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" 217 " if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" 218 " if(Nrounds>3){\n" 219 " X.v[0] += ks[1]; X.v[1] += ks[2];\n" 220 " X.v[1] += 1;\n" 221 " }\n" 222 " if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" 223 " if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" 224 " if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" 225 " if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" 226 " if(Nrounds>7){\n" 227 " X.v[0] += ks[2]; X.v[1] += ks[0];\n" 228 " X.v[1] += 2;\n" 229 " }\n" 230 " if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" 231 " if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" 232 " if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" 233 " if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" 234 " if(Nrounds>11){\n" 235 " X.v[0] += ks[0]; X.v[1] += ks[1];\n" 236 " X.v[1] += 3;\n" 237 " }\n" 238 " if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" 239 " if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" 240 " if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" 241 " if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" 242 " if(Nrounds>15){\n" 243 " X.v[0] += ks[1]; X.v[1] += ks[2];\n" 244 " X.v[1] += 4;\n" 245 " }\n" 246 " if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" 247 " if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" 248 " if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" 249 " if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" 250 " if(Nrounds>19){\n" 251 " X.v[0] += ks[2]; X.v[1] += ks[0];\n" 252 " X.v[1] += 5;\n" 253 " }\n" 254 " if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" 255 " if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" 256 " if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" 257 " if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" 258 " if(Nrounds>23){\n" 259 " X.v[0] += ks[0]; X.v[1] += ks[1];\n" 260 " X.v[1] += 6;\n" 261 " }\n" 262 " if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" 263 " if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" 264 " if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" 265 " if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" 266 " if(Nrounds>27){\n" 267 " X.v[0] += ks[1]; X.v[1] += ks[2];\n" 268 " X.v[1] += 7;\n" 269 " }\n" 270 " if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" 271 " if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" 272 " if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" 273 " if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" 274 " if(Nrounds>31){\n" 275 " X.v[0] += ks[2]; X.v[1] += ks[0];\n" 276 " X.v[1] += 8;\n" 277 " }\n" 278 " return X;\n" 279 "}\n" 280 "__kernel void fill(__global uint * output,\n" 281 " const uint output_size,\n" 282 " const uint2 key,\n" 283 " const uint2 counter)\n" 284 "{\n" 285 " uint gid = get_global_id(0);\n" 286 " threefry2x32_ctr_t c;\n" 287 " c.v[0] = counter.x + gid;\n" 288 " c.v[1] = counter.y + (c.v[0] < counter.x ? 1 : 0);\n" 289 "\n" 290 " threefry2x32_key_t k = { {key.x, key.y} };\n" 291 "\n" 292 " threefry2x32_ctr_t result;\n" 293 " result = threefry2x32_R(THREEFRY2x32_DEFAULT_ROUNDS, c, k);\n" 294 "\n" 295 " if(gid < output_size/2)\n" 296 " {\n" 297 " output[2 * gid] = result.v[0];\n" 298 " output[2 * gid + 1] = result.v[1];\n" 299 " }\n" 300 " else if(gid < (output_size+1)/2)\n" 301 " output[2 * gid] = result.v[0];\n" 302 "}\n"; 303 304 m_program = cache->get_or_build(cache_key, std::string(), source, m_context); 305 } 306 307 // Engine state 308 ulong_ m_key; // 2 x 32bit 309 ulong_ m_counter; 310 // OpenCL 311 context m_context; 312 program m_program; 313 }; 314 315 } // end compute namespace 316 } // end boost namespace 317 318 #endif // BOOST_COMPUTE_RANDOM_THREEFRY_HPP 319