1 //---------------------------------------------------------------------------// 2 // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> 3 // 4 // Distributed under the Boost Software License, Version 1.0 5 // See accompanying file LICENSE_1_0.txt or copy at 6 // http://www.boost.org/LICENSE_1_0.txt 7 // 8 // See http://boostorg.github.com/compute for more information. 9 //---------------------------------------------------------------------------// 10 11 #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP 12 #define BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP 13 14 #include <iterator> 15 16 #include <boost/compute/algorithm/find_if.hpp> 17 #include <boost/compute/container/vector.hpp> 18 #include <boost/compute/detail/iterator_range_size.hpp> 19 #include <boost/compute/detail/meta_kernel.hpp> 20 #include <boost/compute/lambda.hpp> 21 #include <boost/compute/system.hpp> 22 23 namespace boost { 24 namespace compute { 25 namespace detail { 26 27 /// 28 /// \brief Balanced Path kernel class 29 /// 30 /// Subclass of meta_kernel to break two sets into tiles according 31 /// to their balanced path. 32 /// 33 class balanced_path_kernel : public meta_kernel 34 { 35 public: 36 unsigned int tile_size; 37 balanced_path_kernel()38 balanced_path_kernel() : meta_kernel("balanced_path") 39 { 40 tile_size = 4; 41 } 42 43 template<class InputIterator1, class InputIterator2, 44 class OutputIterator1, class OutputIterator2, 45 class Compare> set_range(InputIterator1 first1,InputIterator1 last1,InputIterator2 first2,InputIterator2 last2,OutputIterator1 result_a,OutputIterator2 result_b,Compare comp)46 void set_range(InputIterator1 first1, 47 InputIterator1 last1, 48 InputIterator2 first2, 49 InputIterator2 last2, 50 OutputIterator1 result_a, 51 OutputIterator2 result_b, 52 Compare comp) 53 { 54 typedef typename std::iterator_traits<InputIterator1>::value_type value_type; 55 56 m_a_count = iterator_range_size(first1, last1); 57 m_a_count_arg = add_arg<uint_>("a_count"); 58 59 m_b_count = iterator_range_size(first2, last2); 60 m_b_count_arg = add_arg<uint_>("b_count"); 61 62 *this << 63 "uint i = get_global_id(0);\n" << 64 "uint target = (i+1)*" << tile_size << ";\n" << 65 "uint start = max(convert_int(0),convert_int(target)-convert_int(b_count));\n" << 66 "uint end = min(target,a_count);\n" << 67 "uint a_index, b_index;\n" << 68 "while(start<end)\n" << 69 "{\n" << 70 " a_index = (start + end)/2;\n" << 71 " b_index = target - a_index - 1;\n" << 72 " if(!(" << comp(first2[expr<uint_>("b_index")], 73 first1[expr<uint_>("a_index")]) << "))\n" << 74 " start = a_index + 1;\n" << 75 " else end = a_index;\n" << 76 "}\n" << 77 "a_index = start;\n" << 78 "b_index = target - start;\n" << 79 "if(b_index < b_count)\n" << 80 "{\n" << 81 " " << decl<const value_type>("x") << " = " << 82 first2[expr<uint_>("b_index")] << ";\n" << 83 " uint a_start = 0, a_end = a_index, a_mid;\n" << 84 " uint b_start = 0, b_end = b_index, b_mid;\n" << 85 " while(a_start<a_end)\n" << 86 " {\n" << 87 " a_mid = (a_start + a_end)/2;\n" << 88 " if(" << comp(first1[expr<uint_>("a_mid")], expr<value_type>("x")) << ")\n" << 89 " a_start = a_mid+1;\n" << 90 " else a_end = a_mid;\n" << 91 " }\n" << 92 " while(b_start<b_end)\n" << 93 " {\n" << 94 " b_mid = (b_start + b_end)/2;\n" << 95 " if(" << comp(first2[expr<uint_>("b_mid")], expr<value_type>("x")) << ")\n" << 96 " b_start = b_mid+1;\n" << 97 " else b_end = b_mid;\n" << 98 " }\n" << 99 " uint a_run = a_index - a_start;\n" << 100 " uint b_run = b_index - b_start;\n" << 101 " uint x_count = a_run + b_run;\n" << 102 " uint b_advance = max(x_count / 2, x_count - a_run);\n" << 103 " b_end = min(b_count, b_start + b_advance + 1);\n" << 104 " uint temp_start = b_index, temp_end = b_end, temp_mid;" << 105 " while(temp_start < temp_end)\n" << 106 " {\n" << 107 " temp_mid = (temp_start + temp_end + 1)/2;\n" << 108 " if(" << comp(expr<value_type>("x"), first2[expr<uint_>("temp_mid")]) << ")\n" << 109 " temp_end = temp_mid-1;\n" << 110 " else temp_start = temp_mid;\n" << 111 " }\n" << 112 " b_run = temp_start - b_start + 1;\n" << 113 " b_advance = min(b_advance, b_run);\n" << 114 " uint a_advance = x_count - b_advance;\n" << 115 " uint star = convert_uint((a_advance == b_advance + 1) " << 116 "&& (b_advance < b_run));\n" << 117 " a_index = a_start + a_advance;\n" << 118 " b_index = target - a_index + star;\n" << 119 "}\n" << 120 result_a[expr<uint_>("i")] << " = a_index;\n" << 121 result_b[expr<uint_>("i")] << " = b_index;\n"; 122 123 } 124 125 template<class InputIterator1, class InputIterator2, 126 class OutputIterator1, class OutputIterator2> set_range(InputIterator1 first1,InputIterator1 last1,InputIterator2 first2,InputIterator2 last2,OutputIterator1 result_a,OutputIterator2 result_b)127 void set_range(InputIterator1 first1, 128 InputIterator1 last1, 129 InputIterator2 first2, 130 InputIterator2 last2, 131 OutputIterator1 result_a, 132 OutputIterator2 result_b) 133 { 134 typedef typename std::iterator_traits<InputIterator1>::value_type value_type; 135 ::boost::compute::less<value_type> less_than; 136 set_range(first1, last1, first2, last2, result_a, result_b, less_than); 137 } 138 exec(command_queue & queue)139 event exec(command_queue &queue) 140 { 141 if((m_a_count + m_b_count)/tile_size == 0) { 142 return event(); 143 } 144 145 set_arg(m_a_count_arg, uint_(m_a_count)); 146 set_arg(m_b_count_arg, uint_(m_b_count)); 147 148 return exec_1d(queue, 0, (m_a_count + m_b_count)/tile_size); 149 } 150 151 private: 152 size_t m_a_count; 153 size_t m_a_count_arg; 154 size_t m_b_count; 155 size_t m_b_count_arg; 156 }; 157 158 } //end detail namespace 159 } //end compute namespace 160 } //end boost namespace 161 162 #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP 163