1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include <functional> 17 #include <unordered_map> 18 #include <utility> 19 20 #include "tensorflow/core/framework/bounds_check.h" 21 #include "tensorflow/core/framework/op_kernel.h" 22 #include "tensorflow/core/framework/register_types.h" 23 #include "tensorflow/core/framework/tensor.h" 24 #include "tensorflow/core/framework/tensor_shape.h" 25 #include "tensorflow/core/lib/core/status.h" 26 #include "tensorflow/core/lib/hash/hash.h" 27 28 namespace tensorflow { 29 30 typedef Eigen::ThreadPoolDevice CPUDevice; 31 32 template <typename T, typename TIndex> 33 class UniqueOp : public OpKernel { 34 public: UniqueOp(OpKernelConstruction * context)35 explicit UniqueOp(OpKernelConstruction* context) : OpKernel(context) {} 36 Compute(OpKernelContext * context)37 void Compute(OpKernelContext* context) override { 38 const Tensor& input = context->input(0); 39 // TODO(dga): Make unique polymorphic for returning int32 and int64 40 // vectors to support large tensors. 41 OP_REQUIRES(context, 42 input.NumElements() <= std::numeric_limits<int32>::max(), 43 errors::InvalidArgument( 44 "unique does not support input tensors larger than ", 45 std::numeric_limits<int32>::max(), " elements")); 46 47 int64 axis = 0; 48 std::vector<int64> new_sizes{1, input.NumElements(), 1}; 49 if (context->num_inputs() == 1) { 50 OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), 51 errors::InvalidArgument("unique expects a 1D vector.")); 52 } else { 53 // In case of UniqueV2, the axis is a 1D vector. The purpose is 54 // to allow specifying either "no axis" or "axis". The `[]` means 55 // "no axis", while `[x]` means `axis = x`. 56 const Tensor& axis_tensor = context->input(1); 57 OP_REQUIRES(context, TensorShapeUtils::IsVector(axis_tensor.shape()), 58 errors::InvalidArgument("axis expects a 1D vector.")); 59 OP_REQUIRES( 60 context, axis_tensor.NumElements() <= 1, 61 errors::InvalidArgument( 62 "axis does not support input tensors larger than 1 elements")); 63 if (axis_tensor.NumElements() == 0) { 64 OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()), 65 errors::InvalidArgument("unique expects a 1D vector.")); 66 } else { 67 OP_REQUIRES(context, 68 (axis_tensor.dtype() == DT_INT32 || 69 axis_tensor.dtype() == DT_INT64), 70 errors::InvalidArgument( 71 "axis tensor should be int32 or int64, but got ", 72 DataTypeString(axis_tensor.dtype()))); 73 if (axis_tensor.dtype() == DT_INT32) { 74 axis = internal::SubtleMustCopy(axis_tensor.scalar<int32>()()); 75 } else { 76 axis = internal::SubtleMustCopy(axis_tensor.scalar<int64>()()); 77 } 78 axis = axis < 0 ? axis + input.dims() : axis; 79 OP_REQUIRES(context, 0 <= axis && axis < input.dims(), 80 errors::InvalidArgument("axis has to be between [0, ", 81 input.dims(), ")")); 82 if (axis > 0) { 83 for (int64 i = 0; i < axis; i++) { 84 new_sizes[0] *= input.dim_size(i); 85 } 86 } 87 new_sizes[1] = input.dim_size(axis); 88 if (axis + 1 < input.dims()) { 89 for (int64 i = axis + 1; i < input.dims(); i++) { 90 new_sizes[2] *= input.dim_size(i); 91 } 92 } 93 } 94 } 95 96 Tensor* idx = nullptr; 97 OP_REQUIRES_OK(context, context->allocate_output( 98 1, TensorShape({new_sizes[1]}), &idx)); 99 auto idx_vec = idx->template vec<TIndex>(); 100 101 int64 uniq_size; 102 if (new_sizes[0] == 1 && new_sizes[2] == 1) { 103 // Specialized and faster implementation when unique is run over single 104 // elements. Here we put T directly into the map rather than ints pointing 105 // to them as in the general case. 106 auto Tin = input.flat<T>(); 107 const int64 N = static_cast<int64>(Tin.size()); 108 109 std::unordered_map<T, TIndex> uniq; 110 uniq.reserve(2 * N); 111 for (Eigen::Index i = 0, j = 0; i < N; ++i) { 112 auto it = uniq.insert(std::make_pair(Tin(i), j)); 113 idx_vec(i) = it.first->second; 114 if (it.second) { 115 ++j; 116 } 117 } 118 119 uniq_size = static_cast<int64>(uniq.size()); 120 TensorShape output_shape(input.shape()); 121 output_shape.set_dim(axis, uniq_size); 122 Tensor* output = nullptr; 123 OP_REQUIRES_OK(context, 124 context->allocate_output(0, output_shape, &output)); 125 auto Tout = output->flat<T>(); 126 127 for (auto it : uniq) { 128 Tout(it.second) = it.first; 129 } 130 } else { 131 // General implementation when unique is run over multiple elements. 132 auto Tin = input.shaped<T, 3>(new_sizes); 133 134 auto hash_fn = [&Tin](const Eigen::Index& key) { 135 size_t h = 0; 136 for (Eigen::Index i = 0; i < Tin.dimension(0); i++) { 137 for (Eigen::Index j = 0; j < Tin.dimension(2); j++) { 138 h = Hash64Combine(h, hash<T>{}(Tin(i, key, j))); 139 } 140 } 141 return h; 142 }; 143 144 auto equal_to_fn = [&Tin](const Eigen::Index& lhs, 145 const Eigen::Index& rhs) { 146 for (Eigen::Index i = 0; i < Tin.dimension(0); i++) { 147 for (Eigen::Index j = 0; j < Tin.dimension(2); j++) { 148 if (Tin(i, lhs, j) != Tin(i, rhs, j)) { 149 return false; 150 } 151 } 152 } 153 return true; 154 }; 155 156 std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)> 157 uniq(0, hash_fn, equal_to_fn); 158 159 uniq.reserve(2 * Tin.dimension(1)); 160 161 for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) { 162 auto it = uniq.insert(std::make_pair(i, j)); 163 idx_vec(i) = it.first->second; 164 if (it.second) { 165 ++j; 166 } 167 } 168 169 uniq_size = static_cast<int64>(uniq.size()); 170 new_sizes[1] = uniq_size; 171 TensorShape output_shape(input.shape()); 172 output_shape.set_dim(axis, uniq_size); 173 Tensor* output = nullptr; 174 OP_REQUIRES_OK(context, 175 context->allocate_output(0, output_shape, &output)); 176 auto Tout = output->shaped<T, 3>(new_sizes); 177 178 for (auto it : uniq) { 179 Tout.chip(it.second, 1) = Tin.chip(it.first, 1); 180 } 181 } 182 183 if (num_outputs() > 2) { 184 Tensor* output = nullptr; 185 OP_REQUIRES_OK(context, context->allocate_output( 186 2, TensorShape({uniq_size}), &output)); 187 auto count_output_vec = output->template vec<TIndex>(); 188 count_output_vec.setZero(); 189 const int N = idx_vec.size(); 190 for (int64 i = 0; i < N; ++i) { 191 count_output_vec(idx_vec(i))++; 192 } 193 } 194 } 195 }; 196 197 #define REGISTER_UNIQUE(type) \ 198 REGISTER_KERNEL_BUILDER(Name("Unique") \ 199 .Device(DEVICE_CPU) \ 200 .TypeConstraint<type>("T") \ 201 .TypeConstraint<int32>("out_idx"), \ 202 UniqueOp<type, int32>); \ 203 REGISTER_KERNEL_BUILDER(Name("Unique") \ 204 .Device(DEVICE_CPU) \ 205 .TypeConstraint<type>("T") \ 206 .TypeConstraint<int64>("out_idx"), \ 207 UniqueOp<type, int64>); \ 208 REGISTER_KERNEL_BUILDER(Name("UniqueV2") \ 209 .Device(DEVICE_CPU) \ 210 .TypeConstraint<type>("T") \ 211 .TypeConstraint<int32>("out_idx"), \ 212 UniqueOp<type, int32>); \ 213 REGISTER_KERNEL_BUILDER(Name("UniqueV2") \ 214 .Device(DEVICE_CPU) \ 215 .TypeConstraint<type>("T") \ 216 .TypeConstraint<int64>("out_idx"), \ 217 UniqueOp<type, int64>); \ 218 REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts") \ 219 .Device(DEVICE_CPU) \ 220 .TypeConstraint<type>("T") \ 221 .TypeConstraint<int32>("out_idx"), \ 222 UniqueOp<type, int32>) \ 223 REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts") \ 224 .Device(DEVICE_CPU) \ 225 .TypeConstraint<type>("T") \ 226 .TypeConstraint<int64>("out_idx"), \ 227 UniqueOp<type, int64>); \ 228 REGISTER_KERNEL_BUILDER(Name("UniqueWithCountsV2") \ 229 .Device(DEVICE_CPU) \ 230 .TypeConstraint<type>("T") \ 231 .TypeConstraint<int32>("out_idx"), \ 232 UniqueOp<type, int32>) \ 233 REGISTER_KERNEL_BUILDER(Name("UniqueWithCountsV2") \ 234 .Device(DEVICE_CPU) \ 235 .TypeConstraint<type>("T") \ 236 .TypeConstraint<int64>("out_idx"), \ 237 UniqueOp<type, int64>) 238 TF_CALL_REAL_NUMBER_TYPES(REGISTER_UNIQUE); 239 REGISTER_UNIQUE(string) 240 #undef REGISTER_UNIQUE 241 242 // Fake integer GPU kernels so that the use of Unique in optimizers (to 243 // de-duplicate sparse gradient indices) does not conflict with gradients being 244 // located on a GPU. These kernels run on the CPU, their inputs and outputs 245 // residing in host (not GPU) memory. 246 REGISTER_KERNEL_BUILDER(Name("Unique") 247 .Device(DEVICE_GPU) 248 .TypeConstraint<int32>("T") 249 .TypeConstraint<int32>("out_idx") 250 .HostMemory("x") 251 .HostMemory("y") 252 .HostMemory("idx"), 253 UniqueOp<int32, int32>); 254 REGISTER_KERNEL_BUILDER(Name("Unique") 255 .Device(DEVICE_GPU) 256 .TypeConstraint<int32>("T") 257 .TypeConstraint<int64>("out_idx") 258 .HostMemory("x") 259 .HostMemory("y") 260 .HostMemory("idx"), 261 UniqueOp<int32, int64>); 262 REGISTER_KERNEL_BUILDER(Name("Unique") 263 .Device(DEVICE_GPU) 264 .TypeConstraint<int64>("T") 265 .TypeConstraint<int32>("out_idx") 266 .HostMemory("x") 267 .HostMemory("y") 268 .HostMemory("idx"), 269 UniqueOp<int64, int32>); 270 REGISTER_KERNEL_BUILDER(Name("Unique") 271 .Device(DEVICE_GPU) 272 .TypeConstraint<int64>("T") 273 .TypeConstraint<int64>("out_idx") 274 .HostMemory("x") 275 .HostMemory("y") 276 .HostMemory("idx"), 277 UniqueOp<int64, int64>); 278 279 #ifdef TENSORFLOW_USE_SYCL 280 REGISTER_KERNEL_BUILDER(Name("Unique") 281 .Device(DEVICE_SYCL) 282 .TypeConstraint<int32>("T") 283 .TypeConstraint<int32>("out_idx") 284 .HostMemory("x") 285 .HostMemory("y") 286 .HostMemory("idx"), 287 UniqueOp<int32, int32>); 288 REGISTER_KERNEL_BUILDER(Name("Unique") 289 .Device(DEVICE_SYCL) 290 .TypeConstraint<int64>("T") 291 .TypeConstraint<int32>("out_idx") 292 .HostMemory("x") 293 .HostMemory("y") 294 .HostMemory("idx"), 295 UniqueOp<int64, int32>); 296 REGISTER_KERNEL_BUILDER(Name("Unique") 297 .Device(DEVICE_SYCL) 298 .TypeConstraint<int32>("T") 299 .TypeConstraint<int64>("out_idx") 300 .HostMemory("x") 301 .HostMemory("y") 302 .HostMemory("idx"), 303 UniqueOp<int32, int64>); 304 REGISTER_KERNEL_BUILDER(Name("Unique") 305 .Device(DEVICE_SYCL) 306 .TypeConstraint<int64>("T") 307 .TypeConstraint<int64>("out_idx") 308 .HostMemory("x") 309 .HostMemory("y") 310 .HostMemory("idx"), 311 UniqueOp<int64, int64>); 312 #endif // TENSORFLOW_USE_SYCL 313 } // namespace tensorflow 314