#pragma once #include #include #include #include #include #include #ifndef AT_PER_OPERATOR_HEADERS #include #include #else #include #include #include #include #endif #ifdef GPUCC #define NAME "sparse_binary_op_intersection_cuda" #else #define NAME "sparse_binary_op_intersection_cpu" #endif namespace at::native { namespace { using at::sparse::get_sparse_impl; // ForwardIt: only legacy random access iterator is supported. template static FUNCAPI INLINE ForwardIt find_bound(ForwardIt first, ForwardIt last, const T& value) { ForwardIt RESTRICT it; typename std::iterator_traits::difference_type count, step; // NOTE: std::distance(first, last) compiles but produces wrong results on CUDA, // so only legacy random access iterators are safe in this code. count = last - first; while (count > 0) { it = first; step = count / 2; // avoiding std::advance(it, step), // although it does work unlike std::distance on CUDA. it += step; // The decision which separates finding a lower bound vs an upper bound. // Note that a lower bound is a value at *it with the smallest index // such that *it >= value if such value exists, or last if does not. // Similarly, an upper bound is a value at *it with the smallest index // such that *it > value if such value exists, or last if does not. // Let is_lower = true and *it < value, then we know that *it and values // preceeding *it cannot contain a lower bound, so we adjust initial iterator range // from [first, first + count] to [first + step + 1, first + count - (step + 1)], // where +1 skips the element at which we have just evaluated *it < value. // Samilar logic holds when is_lower = false. if (is_lower ? *it < value : value >= *it) { first = ++it; count -= step + 1; } else { count = step; } } return first; } template