1 // Copyright 2004 The Trustees of Indiana University. 2 3 // Use, modification and distribution is subject to the Boost Software 4 // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at 5 // http://www.boost.org/LICENSE_1_0.txt) 6 7 // Authors: Douglas Gregor 8 // Peter Gottschling 9 // Andrew Lumsdaine 10 #ifndef BOOST_PARALLEL_DISTRIBUTION_HPP 11 #define BOOST_PARALLEL_DISTRIBUTION_HPP 12 13 #ifndef BOOST_GRAPH_USE_MPI 14 #error "Parallel BGL files should not be included unless <boost/graph/use_mpi.hpp> has been included" 15 #endif 16 17 #include <cstddef> 18 #include <vector> 19 #include <algorithm> 20 #include <numeric> 21 #include <boost/assert.hpp> 22 #include <boost/iterator/counting_iterator.hpp> 23 #include <boost/random/uniform_int.hpp> 24 #include <boost/shared_ptr.hpp> 25 #include <boost/config.hpp> 26 #include <typeinfo> 27 28 namespace boost { namespace parallel { 29 30 template<typename ProcessGroup, typename SizeType = std::size_t> 31 class variant_distribution 32 { 33 public: 34 typedef typename ProcessGroup::process_id_type process_id_type; 35 typedef typename ProcessGroup::process_size_type process_size_type; 36 typedef SizeType size_type; 37 38 private: 39 struct basic_distribution 40 { ~basic_distributionboost::parallel::variant_distribution::basic_distribution41 virtual ~basic_distribution() {} 42 virtual size_type block_size(process_id_type, size_type) const = 0; 43 virtual process_id_type in_process(size_type) const = 0; 44 virtual size_type local(size_type) const = 0; 45 virtual size_type global(size_type) const = 0; 46 virtual size_type global(process_id_type, size_type) const = 0; 47 virtual void* address() = 0; 48 virtual const void* address() const = 0; 49 virtual const std::type_info& type() const = 0; 50 }; 51 52 template<typename Distribution> 53 struct poly_distribution : public basic_distribution 54 { poly_distributionboost::parallel::variant_distribution::poly_distribution55 explicit poly_distribution(const Distribution& distribution) 56 : distribution_(distribution) { } 57 block_sizeboost::parallel::variant_distribution::poly_distribution58 virtual size_type block_size(process_id_type id, size_type n) const 59 { return distribution_.block_size(id, n); } 60 in_processboost::parallel::variant_distribution::poly_distribution61 virtual process_id_type in_process(size_type i) const 62 { return distribution_(i); } 63 localboost::parallel::variant_distribution::poly_distribution64 virtual size_type local(size_type i) const 65 { return distribution_.local(i); } 66 globalboost::parallel::variant_distribution::poly_distribution67 virtual size_type global(size_type n) const 68 { return distribution_.global(n); } 69 globalboost::parallel::variant_distribution::poly_distribution70 virtual size_type global(process_id_type id, size_type n) const 71 { return distribution_.global(id, n); } 72 addressboost::parallel::variant_distribution::poly_distribution73 virtual void* address() { return &distribution_; } addressboost::parallel::variant_distribution::poly_distribution74 virtual const void* address() const { return &distribution_; } typeboost::parallel::variant_distribution::poly_distribution75 virtual const std::type_info& type() const { return typeid(Distribution); } 76 77 private: 78 Distribution distribution_; 79 }; 80 81 public: variant_distribution()82 variant_distribution() { } 83 84 template<typename Distribution> variant_distribution(const Distribution & distribution)85 variant_distribution(const Distribution& distribution) 86 : distribution_(new poly_distribution<Distribution>(distribution)) { } 87 block_size(process_id_type id,size_type n) const88 size_type block_size(process_id_type id, size_type n) const 89 { return distribution_->block_size(id, n); } 90 operator ()(size_type i) const91 process_id_type operator()(size_type i) const 92 { return distribution_->in_process(i); } 93 local(size_type i) const94 size_type local(size_type i) const 95 { return distribution_->local(i); } 96 global(size_type n) const97 size_type global(size_type n) const 98 { return distribution_->global(n); } 99 global(process_id_type id,size_type n) const100 size_type global(process_id_type id, size_type n) const 101 { return distribution_->global(id, n); } 102 operator bool() const103 operator bool() const { return distribution_; } 104 clear()105 void clear() { distribution_.reset(); } 106 107 template<typename T> as()108 T* as() 109 { 110 if (distribution_->type() == typeid(T)) 111 return static_cast<T*>(distribution_->address()); 112 else 113 return 0; 114 } 115 116 template<typename T> as() const117 const T* as() const 118 { 119 if (distribution_->type() == typeid(T)) 120 return static_cast<T*>(distribution_->address()); 121 else 122 return 0; 123 } 124 125 private: 126 shared_ptr<basic_distribution> distribution_; 127 }; 128 129 struct block 130 { 131 template<typename LinearProcessGroup> blockboost::parallel::block132 explicit block(const LinearProcessGroup& pg, std::size_t n) 133 : id(process_id(pg)), p(num_processes(pg)), n(n) { } 134 135 // If there are n elements in the distributed data structure, returns the number of elements stored locally. 136 template<typename SizeType> block_sizeboost::parallel::block137 SizeType block_size(SizeType n) const 138 { return (n / p) + ((std::size_t)(n % p) > id? 1 : 0); } 139 140 // If there are n elements in the distributed data structure, returns the number of elements stored on processor ID 141 template<typename SizeType, typename ProcessID> block_sizeboost::parallel::block142 SizeType block_size(ProcessID id, SizeType n) const 143 { return (n / p) + ((ProcessID)(n % p) > id? 1 : 0); } 144 145 // Returns the processor on which element with global index i is stored 146 template<typename SizeType> operator ()boost::parallel::block147 SizeType operator()(SizeType i) const 148 { 149 SizeType cutoff_processor = n % p; 150 SizeType cutoff = cutoff_processor * (n / p + 1); 151 152 if (i < cutoff) return i / (n / p + 1); 153 else return cutoff_processor + (i - cutoff) / (n / p); 154 } 155 156 // Find the starting index for processor with the given id 157 template<typename ID> startboost::parallel::block158 std::size_t start(ID id) const 159 { 160 std::size_t estimate = id * (n / p + 1); 161 ID cutoff_processor = n % p; 162 if (id < cutoff_processor) return estimate; 163 else return estimate - (id - cutoff_processor); 164 } 165 166 // Find the local index for the ith global element 167 template<typename SizeType> localboost::parallel::block168 SizeType local(SizeType i) const 169 { 170 SizeType owner = (*this)(i); 171 return i - start(owner); 172 } 173 174 // Returns the global index of local element i 175 template<typename SizeType> globalboost::parallel::block176 SizeType global(SizeType i) const 177 { return global(id, i); } 178 179 // Returns the global index of the ith local element on processor id 180 template<typename ProcessID, typename SizeType> globalboost::parallel::block181 SizeType global(ProcessID id, SizeType i) const 182 { return i + start(id); } 183 184 private: 185 std::size_t id; //< The ID number of this processor 186 std::size_t p; //< The number of processors 187 std::size_t n; //< The size of the problem space 188 }; 189 190 // Block distribution with arbitrary block sizes 191 struct uneven_block 192 { 193 typedef std::vector<std::size_t> size_vector; 194 195 template<typename LinearProcessGroup> uneven_blockboost::parallel::uneven_block196 explicit uneven_block(const LinearProcessGroup& pg, const std::vector<std::size_t>& local_sizes) 197 : id(process_id(pg)), p(num_processes(pg)), local_sizes(local_sizes) 198 { 199 BOOST_ASSERT(local_sizes.size() == p); 200 local_starts.resize(p + 1); 201 local_starts[0] = 0; 202 std::partial_sum(local_sizes.begin(), local_sizes.end(), &local_starts[1]); 203 n = local_starts[p]; 204 } 205 206 // To do maybe: enter local size in each process and gather in constructor (much handier) 207 // template<typename LinearProcessGroup> 208 // explicit uneven_block(const LinearProcessGroup& pg, std::size_t my_local_size) 209 210 // If there are n elements in the distributed data structure, returns the number of elements stored locally. 211 template<typename SizeType> block_sizeboost::parallel::uneven_block212 SizeType block_size(SizeType) const 213 { return local_sizes[id]; } 214 215 // If there are n elements in the distributed data structure, returns the number of elements stored on processor ID 216 template<typename SizeType, typename ProcessID> block_sizeboost::parallel::uneven_block217 SizeType block_size(ProcessID id, SizeType) const 218 { return local_sizes[id]; } 219 220 // Returns the processor on which element with global index i is stored 221 template<typename SizeType> operator ()boost::parallel::uneven_block222 SizeType operator()(SizeType i) const 223 { 224 BOOST_ASSERT (i >= (SizeType) 0 && i < (SizeType) n); // check for valid range 225 size_vector::const_iterator lb = std::lower_bound(local_starts.begin(), local_starts.end(), (std::size_t) i); 226 return ((SizeType)(*lb) == i ? lb : --lb) - local_starts.begin(); 227 } 228 229 // Find the starting index for processor with the given id 230 template<typename ID> startboost::parallel::uneven_block231 std::size_t start(ID id) const 232 { 233 return local_starts[id]; 234 } 235 236 // Find the local index for the ith global element 237 template<typename SizeType> localboost::parallel::uneven_block238 SizeType local(SizeType i) const 239 { 240 SizeType owner = (*this)(i); 241 return i - start(owner); 242 } 243 244 // Returns the global index of local element i 245 template<typename SizeType> globalboost::parallel::uneven_block246 SizeType global(SizeType i) const 247 { return global(id, i); } 248 249 // Returns the global index of the ith local element on processor id 250 template<typename ProcessID, typename SizeType> globalboost::parallel::uneven_block251 SizeType global(ProcessID id, SizeType i) const 252 { return i + start(id); } 253 254 private: 255 std::size_t id; //< The ID number of this processor 256 std::size_t p; //< The number of processors 257 std::size_t n; //< The size of the problem space 258 std::vector<std::size_t> local_sizes; //< The sizes of all blocks 259 std::vector<std::size_t> local_starts; //< Lowest global index of each block 260 }; 261 262 263 struct oned_block_cyclic 264 { 265 template<typename LinearProcessGroup> oned_block_cyclicboost::parallel::oned_block_cyclic266 explicit oned_block_cyclic(const LinearProcessGroup& pg, std::size_t size) 267 : id(process_id(pg)), p(num_processes(pg)), size(size) { } 268 269 template<typename SizeType> block_sizeboost::parallel::oned_block_cyclic270 SizeType block_size(SizeType n) const 271 { 272 return block_size(id, n); 273 } 274 275 template<typename SizeType, typename ProcessID> block_sizeboost::parallel::oned_block_cyclic276 SizeType block_size(ProcessID id, SizeType n) const 277 { 278 SizeType all_blocks = n / size; 279 SizeType extra_elements = n % size; 280 SizeType everyone_gets = all_blocks / p; 281 SizeType extra_blocks = all_blocks % p; 282 SizeType my_blocks = everyone_gets + (p < extra_blocks? 1 : 0); 283 SizeType my_elements = my_blocks * size 284 + (p == extra_blocks? extra_elements : 0); 285 return my_elements; 286 } 287 288 template<typename SizeType> operator ()boost::parallel::oned_block_cyclic289 SizeType operator()(SizeType i) const 290 { 291 return (i / size) % p; 292 } 293 294 template<typename SizeType> localboost::parallel::oned_block_cyclic295 SizeType local(SizeType i) const 296 { 297 return ((i / size) / p) * size + i % size; 298 } 299 300 template<typename SizeType> globalboost::parallel::oned_block_cyclic301 SizeType global(SizeType i) const 302 { return global(id, i); } 303 304 template<typename ProcessID, typename SizeType> globalboost::parallel::oned_block_cyclic305 SizeType global(ProcessID id, SizeType i) const 306 { 307 return ((i / size) * p + id) * size + i % size; 308 } 309 310 private: 311 std::size_t id; //< The ID number of this processor 312 std::size_t p; //< The number of processors 313 std::size_t size; //< Block size 314 }; 315 316 struct twod_block_cyclic 317 { 318 template<typename LinearProcessGroup> twod_block_cyclicboost::parallel::twod_block_cyclic319 explicit twod_block_cyclic(const LinearProcessGroup& pg, 320 std::size_t block_rows, std::size_t block_columns, 321 std::size_t data_columns_per_row) 322 : id(process_id(pg)), p(num_processes(pg)), 323 block_rows(block_rows), block_columns(block_columns), 324 data_columns_per_row(data_columns_per_row) 325 { } 326 327 template<typename SizeType> block_sizeboost::parallel::twod_block_cyclic328 SizeType block_size(SizeType n) const 329 { 330 return block_size(id, n); 331 } 332 333 template<typename SizeType, typename ProcessID> block_sizeboost::parallel::twod_block_cyclic334 SizeType block_size(ProcessID id, SizeType n) const 335 { 336 // TBD: This is really lame :) 337 int result = -1; 338 while (n > 0) { 339 --n; 340 if ((*this)(n) == id && (int)local(n) > result) result = local(n); 341 } 342 ++result; 343 344 // std::cerr << "Block size of id " << id << " is " << result << std::endl; 345 return result; 346 } 347 348 template<typename SizeType> operator ()boost::parallel::twod_block_cyclic349 SizeType operator()(SizeType i) const 350 { 351 SizeType result = get_block_num(i) % p; 352 // std::cerr << "Item " << i << " goes on processor " << result << std::endl; 353 return result; 354 } 355 356 template<typename SizeType> localboost::parallel::twod_block_cyclic357 SizeType local(SizeType i) const 358 { 359 // Compute the start of the block 360 std::size_t block_num = get_block_num(i); 361 // std::cerr << "Item " << i << " is in block #" << block_num << std::endl; 362 363 std::size_t local_block_num = block_num / p; 364 std::size_t block_start = local_block_num * block_rows * block_columns; 365 366 // Compute the offset into the block 367 std::size_t data_row = i / data_columns_per_row; 368 std::size_t data_col = i % data_columns_per_row; 369 std::size_t block_offset = (data_row % block_rows) * block_columns 370 + (data_col % block_columns); 371 372 // std::cerr << "Item " << i << " maps to local index " << block_start+block_offset << std::endl; 373 return block_start + block_offset; 374 } 375 376 template<typename SizeType> globalboost::parallel::twod_block_cyclic377 SizeType global(SizeType i) const 378 { 379 // Compute the (global) block in which this element resides 380 SizeType local_block_num = i / (block_rows * block_columns); 381 SizeType block_offset = i % (block_rows * block_columns); 382 SizeType block_num = local_block_num * p + id; 383 384 // Compute the position of the start of the block (globally) 385 SizeType block_start = block_num * block_rows * block_columns; 386 387 std::cerr << "Block " << block_num << " starts at index " << block_start 388 << std::endl; 389 390 // Compute the row and column of this block 391 SizeType block_row = block_num / (data_columns_per_row / block_columns); 392 SizeType block_col = block_num % (data_columns_per_row / block_columns); 393 394 SizeType row_in_block = block_offset / block_columns; 395 SizeType col_in_block = block_offset % block_columns; 396 397 std::cerr << "Local index " << i << " is in block at row " << block_row 398 << ", column " << block_col << ", in-block row " << row_in_block 399 << ", in-block col " << col_in_block << std::endl; 400 401 SizeType result = block_row * block_rows + block_col * block_columns 402 + row_in_block * block_rows + col_in_block; 403 404 405 std::cerr << "global(" << i << "@" << id << ") = " << result 406 << " =? " << local(result) << std::endl; 407 BOOST_ASSERT(i == local(result)); 408 return result; 409 } 410 411 private: 412 template<typename SizeType> get_block_numboost::parallel::twod_block_cyclic413 std::size_t get_block_num(SizeType i) const 414 { 415 std::size_t data_row = i / data_columns_per_row; 416 std::size_t data_col = i % data_columns_per_row; 417 std::size_t block_row = data_row / block_rows; 418 std::size_t block_col = data_col / block_columns; 419 std::size_t blocks_in_row = data_columns_per_row / block_columns; 420 std::size_t block_num = block_col * blocks_in_row + block_row; 421 return block_num; 422 } 423 424 std::size_t id; //< The ID number of this processor 425 std::size_t p; //< The number of processors 426 std::size_t block_rows; //< The # of rows in each block 427 std::size_t block_columns; //< The # of columns in each block 428 std::size_t data_columns_per_row; //< The # of columns per row of data 429 }; 430 431 class twod_random 432 { 433 template<typename RandomNumberGen> 434 struct random_int 435 { random_intboost::parallel::twod_random::random_int436 explicit random_int(RandomNumberGen& gen) : gen(gen) { } 437 438 template<typename T> operator ()boost::parallel::twod_random::random_int439 T operator()(T n) const 440 { 441 uniform_int<T> distrib(0, n-1); 442 return distrib(gen); 443 } 444 445 private: 446 RandomNumberGen& gen; 447 }; 448 449 public: 450 template<typename LinearProcessGroup, typename RandomNumberGen> twod_random(const LinearProcessGroup & pg,std::size_t block_rows,std::size_t block_columns,std::size_t data_columns_per_row,std::size_t n,RandomNumberGen & gen)451 explicit twod_random(const LinearProcessGroup& pg, 452 std::size_t block_rows, std::size_t block_columns, 453 std::size_t data_columns_per_row, 454 std::size_t n, 455 RandomNumberGen& gen) 456 : id(process_id(pg)), p(num_processes(pg)), 457 block_rows(block_rows), block_columns(block_columns), 458 data_columns_per_row(data_columns_per_row), 459 global_to_local(n / (block_rows * block_columns)) 460 { 461 std::copy(make_counting_iterator(std::size_t(0)), 462 make_counting_iterator(global_to_local.size()), 463 global_to_local.begin()); 464 465 #if defined(BOOST_NO_CXX98_RANDOM_SHUFFLE) 466 std::shuffle(global_to_local.begin(), global_to_local.end(), gen); 467 #else 468 random_int<RandomNumberGen> rand(gen); 469 std::random_shuffle(global_to_local.begin(), global_to_local.end(), rand); 470 #endif 471 } 472 473 template<typename SizeType> block_size(SizeType n) const474 SizeType block_size(SizeType n) const 475 { 476 return block_size(id, n); 477 } 478 479 template<typename SizeType, typename ProcessID> block_size(ProcessID id,SizeType n) const480 SizeType block_size(ProcessID id, SizeType n) const 481 { 482 // TBD: This is really lame :) 483 int result = -1; 484 while (n > 0) { 485 --n; 486 if ((*this)(n) == id && (int)local(n) > result) result = local(n); 487 } 488 ++result; 489 490 // std::cerr << "Block size of id " << id << " is " << result << std::endl; 491 return result; 492 } 493 494 template<typename SizeType> operator ()(SizeType i) const495 SizeType operator()(SizeType i) const 496 { 497 SizeType result = get_block_num(i) % p; 498 // std::cerr << "Item " << i << " goes on processor " << result << std::endl; 499 return result; 500 } 501 502 template<typename SizeType> local(SizeType i) const503 SizeType local(SizeType i) const 504 { 505 // Compute the start of the block 506 std::size_t block_num = get_block_num(i); 507 // std::cerr << "Item " << i << " is in block #" << block_num << std::endl; 508 509 std::size_t local_block_num = block_num / p; 510 std::size_t block_start = local_block_num * block_rows * block_columns; 511 512 // Compute the offset into the block 513 std::size_t data_row = i / data_columns_per_row; 514 std::size_t data_col = i % data_columns_per_row; 515 std::size_t block_offset = (data_row % block_rows) * block_columns 516 + (data_col % block_columns); 517 518 // std::cerr << "Item " << i << " maps to local index " << block_start+block_offset << std::endl; 519 return block_start + block_offset; 520 } 521 522 private: 523 template<typename SizeType> get_block_num(SizeType i) const524 std::size_t get_block_num(SizeType i) const 525 { 526 std::size_t data_row = i / data_columns_per_row; 527 std::size_t data_col = i % data_columns_per_row; 528 std::size_t block_row = data_row / block_rows; 529 std::size_t block_col = data_col / block_columns; 530 std::size_t blocks_in_row = data_columns_per_row / block_columns; 531 std::size_t block_num = block_col * blocks_in_row + block_row; 532 return global_to_local[block_num]; 533 } 534 535 std::size_t id; //< The ID number of this processor 536 std::size_t p; //< The number of processors 537 std::size_t block_rows; //< The # of rows in each block 538 std::size_t block_columns; //< The # of columns in each block 539 std::size_t data_columns_per_row; //< The # of columns per row of data 540 std::vector<std::size_t> global_to_local; 541 }; 542 543 class random_distribution 544 { 545 template<typename RandomNumberGen> 546 struct random_int 547 { random_intboost::parallel::random_distribution::random_int548 explicit random_int(RandomNumberGen& gen) : gen(gen) { } 549 550 template<typename T> operator ()boost::parallel::random_distribution::random_int551 T operator()(T n) const 552 { 553 uniform_int<T> distrib(0, n-1); 554 return distrib(gen); 555 } 556 557 private: 558 RandomNumberGen& gen; 559 }; 560 561 public: 562 template<typename LinearProcessGroup, typename RandomNumberGen> random_distribution(const LinearProcessGroup & pg,RandomNumberGen & gen,std::size_t n)563 random_distribution(const LinearProcessGroup& pg, RandomNumberGen& gen, 564 std::size_t n) 565 : base(pg, n), local_to_global(n), global_to_local(n) 566 { 567 std::copy(make_counting_iterator(std::size_t(0)), 568 make_counting_iterator(n), 569 local_to_global.begin()); 570 571 #if defined(BOOST_NO_CXX98_RANDOM_SHUFFLE) 572 std::shuffle(local_to_global.begin(), local_to_global.end(), gen); 573 #else 574 random_int<RandomNumberGen> rand(gen); 575 std::random_shuffle(local_to_global.begin(), local_to_global.end(), rand); 576 #endif 577 578 for (std::vector<std::size_t>::size_type i = 0; i < n; ++i) 579 global_to_local[local_to_global[i]] = i; 580 } 581 582 template<typename SizeType> block_size(SizeType n) const583 SizeType block_size(SizeType n) const 584 { return base.block_size(n); } 585 586 template<typename SizeType, typename ProcessID> block_size(ProcessID id,SizeType n) const587 SizeType block_size(ProcessID id, SizeType n) const 588 { return base.block_size(id, n); } 589 590 template<typename SizeType> operator ()(SizeType i) const591 SizeType operator()(SizeType i) const 592 { 593 return base(global_to_local[i]); 594 } 595 596 template<typename SizeType> local(SizeType i) const597 SizeType local(SizeType i) const 598 { 599 return base.local(global_to_local[i]); 600 } 601 602 template<typename ProcessID, typename SizeType> global(ProcessID p,SizeType i) const603 SizeType global(ProcessID p, SizeType i) const 604 { 605 return local_to_global[base.global(p, i)]; 606 } 607 608 template<typename SizeType> global(SizeType i) const609 SizeType global(SizeType i) const 610 { 611 return local_to_global[base.global(i)]; 612 } 613 614 private: 615 block base; 616 std::vector<std::size_t> local_to_global; 617 std::vector<std::size_t> global_to_local; 618 }; 619 620 } } // end namespace boost::parallel 621 622 #endif // BOOST_PARALLEL_DISTRIBUTION_HPP 623 624