• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include <functional>
17 #include <unordered_map>
18 #include <utility>
19 
20 #include "tensorflow/core/framework/bounds_check.h"
21 #include "tensorflow/core/framework/op_kernel.h"
22 #include "tensorflow/core/framework/register_types.h"
23 #include "tensorflow/core/framework/tensor.h"
24 #include "tensorflow/core/framework/tensor_shape.h"
25 #include "tensorflow/core/lib/core/status.h"
26 #include "tensorflow/core/lib/hash/hash.h"
27 
28 namespace tensorflow {
29 
30 typedef Eigen::ThreadPoolDevice CPUDevice;
31 
32 template <typename T, typename TIndex>
33 class UniqueOp : public OpKernel {
34  public:
UniqueOp(OpKernelConstruction * context)35   explicit UniqueOp(OpKernelConstruction* context) : OpKernel(context) {}
36 
Compute(OpKernelContext * context)37   void Compute(OpKernelContext* context) override {
38     const Tensor& input = context->input(0);
39     // TODO(dga):  Make unique polymorphic for returning int32 and int64
40     // vectors to support large tensors.
41     OP_REQUIRES(context,
42                 input.NumElements() <= std::numeric_limits<int32>::max(),
43                 errors::InvalidArgument(
44                     "unique does not support input tensors larger than ",
45                     std::numeric_limits<int32>::max(), " elements"));
46 
47     int64 axis = 0;
48     std::vector<int64> new_sizes{1, input.NumElements(), 1};
49     if (context->num_inputs() == 1) {
50       OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()),
51                   errors::InvalidArgument("unique expects a 1D vector."));
52     } else {
53       // In case of UniqueV2, the axis is a 1D vector. The purpose is
54       // to allow specifying either "no axis" or "axis". The `[]` means
55       // "no axis", while `[x]` means `axis = x`.
56       const Tensor& axis_tensor = context->input(1);
57       OP_REQUIRES(context, TensorShapeUtils::IsVector(axis_tensor.shape()),
58                   errors::InvalidArgument("axis expects a 1D vector."));
59       OP_REQUIRES(
60           context, axis_tensor.NumElements() <= 1,
61           errors::InvalidArgument(
62               "axis does not support input tensors larger than 1 elements"));
63       if (axis_tensor.NumElements() == 0) {
64         OP_REQUIRES(context, TensorShapeUtils::IsVector(input.shape()),
65                     errors::InvalidArgument("unique expects a 1D vector."));
66       } else {
67         OP_REQUIRES(context,
68                     (axis_tensor.dtype() == DT_INT32 ||
69                      axis_tensor.dtype() == DT_INT64),
70                     errors::InvalidArgument(
71                         "axis tensor should be int32 or int64, but got ",
72                         DataTypeString(axis_tensor.dtype())));
73         if (axis_tensor.dtype() == DT_INT32) {
74           axis = internal::SubtleMustCopy(axis_tensor.scalar<int32>()());
75         } else {
76           axis = internal::SubtleMustCopy(axis_tensor.scalar<int64>()());
77         }
78         axis = axis < 0 ? axis + input.dims() : axis;
79         OP_REQUIRES(context, 0 <= axis && axis < input.dims(),
80                     errors::InvalidArgument("axis has to be between [0, ",
81                                             input.dims(), ")"));
82         if (axis > 0) {
83           for (int64 i = 0; i < axis; i++) {
84             new_sizes[0] *= input.dim_size(i);
85           }
86         }
87         new_sizes[1] = input.dim_size(axis);
88         if (axis + 1 < input.dims()) {
89           for (int64 i = axis + 1; i < input.dims(); i++) {
90             new_sizes[2] *= input.dim_size(i);
91           }
92         }
93       }
94     }
95 
96     Tensor* idx = nullptr;
97     OP_REQUIRES_OK(context, context->allocate_output(
98                                 1, TensorShape({new_sizes[1]}), &idx));
99     auto idx_vec = idx->template vec<TIndex>();
100 
101     int64 uniq_size;
102     if (new_sizes[0] == 1 && new_sizes[2] == 1) {
103       // Specialized and faster implementation when unique is run over single
104       // elements. Here we put T directly into the map rather than ints pointing
105       // to them as in the general case.
106       auto Tin = input.flat<T>();
107       const int64 N = static_cast<int64>(Tin.size());
108 
109       std::unordered_map<T, TIndex> uniq;
110       uniq.reserve(2 * N);
111       for (Eigen::Index i = 0, j = 0; i < N; ++i) {
112         auto it = uniq.insert(std::make_pair(Tin(i), j));
113         idx_vec(i) = it.first->second;
114         if (it.second) {
115           ++j;
116         }
117       }
118 
119       uniq_size = static_cast<int64>(uniq.size());
120       TensorShape output_shape(input.shape());
121       output_shape.set_dim(axis, uniq_size);
122       Tensor* output = nullptr;
123       OP_REQUIRES_OK(context,
124                      context->allocate_output(0, output_shape, &output));
125       auto Tout = output->flat<T>();
126 
127       for (auto it : uniq) {
128         Tout(it.second) = it.first;
129       }
130     } else {
131       // General implementation when unique is run over multiple elements.
132       auto Tin = input.shaped<T, 3>(new_sizes);
133 
134       auto hash_fn = [&Tin](const Eigen::Index& key) {
135         size_t h = 0;
136         for (Eigen::Index i = 0; i < Tin.dimension(0); i++) {
137           for (Eigen::Index j = 0; j < Tin.dimension(2); j++) {
138             h = Hash64Combine(h, hash<T>{}(Tin(i, key, j)));
139           }
140         }
141         return h;
142       };
143 
144       auto equal_to_fn = [&Tin](const Eigen::Index& lhs,
145                                 const Eigen::Index& rhs) {
146         for (Eigen::Index i = 0; i < Tin.dimension(0); i++) {
147           for (Eigen::Index j = 0; j < Tin.dimension(2); j++) {
148             if (Tin(i, lhs, j) != Tin(i, rhs, j)) {
149               return false;
150             }
151           }
152         }
153         return true;
154       };
155 
156       std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)>
157           uniq(0, hash_fn, equal_to_fn);
158 
159       uniq.reserve(2 * Tin.dimension(1));
160 
161       for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) {
162         auto it = uniq.insert(std::make_pair(i, j));
163         idx_vec(i) = it.first->second;
164         if (it.second) {
165           ++j;
166         }
167       }
168 
169       uniq_size = static_cast<int64>(uniq.size());
170       new_sizes[1] = uniq_size;
171       TensorShape output_shape(input.shape());
172       output_shape.set_dim(axis, uniq_size);
173       Tensor* output = nullptr;
174       OP_REQUIRES_OK(context,
175                      context->allocate_output(0, output_shape, &output));
176       auto Tout = output->shaped<T, 3>(new_sizes);
177 
178       for (auto it : uniq) {
179         Tout.chip(it.second, 1) = Tin.chip(it.first, 1);
180       }
181     }
182 
183     if (num_outputs() > 2) {
184       Tensor* output = nullptr;
185       OP_REQUIRES_OK(context, context->allocate_output(
186                                   2, TensorShape({uniq_size}), &output));
187       auto count_output_vec = output->template vec<TIndex>();
188       count_output_vec.setZero();
189       const int N = idx_vec.size();
190       for (int64 i = 0; i < N; ++i) {
191         count_output_vec(idx_vec(i))++;
192       }
193     }
194   }
195 };
196 
197 #define REGISTER_UNIQUE(type)                                    \
198   REGISTER_KERNEL_BUILDER(Name("Unique")                         \
199                               .Device(DEVICE_CPU)                \
200                               .TypeConstraint<type>("T")         \
201                               .TypeConstraint<int32>("out_idx"), \
202                           UniqueOp<type, int32>);                \
203   REGISTER_KERNEL_BUILDER(Name("Unique")                         \
204                               .Device(DEVICE_CPU)                \
205                               .TypeConstraint<type>("T")         \
206                               .TypeConstraint<int64>("out_idx"), \
207                           UniqueOp<type, int64>);                \
208   REGISTER_KERNEL_BUILDER(Name("UniqueV2")                       \
209                               .Device(DEVICE_CPU)                \
210                               .TypeConstraint<type>("T")         \
211                               .TypeConstraint<int32>("out_idx"), \
212                           UniqueOp<type, int32>);                \
213   REGISTER_KERNEL_BUILDER(Name("UniqueV2")                       \
214                               .Device(DEVICE_CPU)                \
215                               .TypeConstraint<type>("T")         \
216                               .TypeConstraint<int64>("out_idx"), \
217                           UniqueOp<type, int64>);                \
218   REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts")               \
219                               .Device(DEVICE_CPU)                \
220                               .TypeConstraint<type>("T")         \
221                               .TypeConstraint<int32>("out_idx"), \
222                           UniqueOp<type, int32>)                 \
223   REGISTER_KERNEL_BUILDER(Name("UniqueWithCounts")               \
224                               .Device(DEVICE_CPU)                \
225                               .TypeConstraint<type>("T")         \
226                               .TypeConstraint<int64>("out_idx"), \
227                           UniqueOp<type, int64>);                \
228   REGISTER_KERNEL_BUILDER(Name("UniqueWithCountsV2")             \
229                               .Device(DEVICE_CPU)                \
230                               .TypeConstraint<type>("T")         \
231                               .TypeConstraint<int32>("out_idx"), \
232                           UniqueOp<type, int32>)                 \
233   REGISTER_KERNEL_BUILDER(Name("UniqueWithCountsV2")             \
234                               .Device(DEVICE_CPU)                \
235                               .TypeConstraint<type>("T")         \
236                               .TypeConstraint<int64>("out_idx"), \
237                           UniqueOp<type, int64>)
238 TF_CALL_REAL_NUMBER_TYPES(REGISTER_UNIQUE);
239 REGISTER_UNIQUE(string)
240 #undef REGISTER_UNIQUE
241 
242 // Fake integer GPU kernels so that the use of Unique in optimizers (to
243 // de-duplicate sparse gradient indices) does not conflict with gradients being
244 // located on a GPU. These kernels run on the CPU, their inputs and outputs
245 // residing in host (not GPU) memory.
246 REGISTER_KERNEL_BUILDER(Name("Unique")
247                             .Device(DEVICE_GPU)
248                             .TypeConstraint<int32>("T")
249                             .TypeConstraint<int32>("out_idx")
250                             .HostMemory("x")
251                             .HostMemory("y")
252                             .HostMemory("idx"),
253                         UniqueOp<int32, int32>);
254 REGISTER_KERNEL_BUILDER(Name("Unique")
255                             .Device(DEVICE_GPU)
256                             .TypeConstraint<int32>("T")
257                             .TypeConstraint<int64>("out_idx")
258                             .HostMemory("x")
259                             .HostMemory("y")
260                             .HostMemory("idx"),
261                         UniqueOp<int32, int64>);
262 REGISTER_KERNEL_BUILDER(Name("Unique")
263                             .Device(DEVICE_GPU)
264                             .TypeConstraint<int64>("T")
265                             .TypeConstraint<int32>("out_idx")
266                             .HostMemory("x")
267                             .HostMemory("y")
268                             .HostMemory("idx"),
269                         UniqueOp<int64, int32>);
270 REGISTER_KERNEL_BUILDER(Name("Unique")
271                             .Device(DEVICE_GPU)
272                             .TypeConstraint<int64>("T")
273                             .TypeConstraint<int64>("out_idx")
274                             .HostMemory("x")
275                             .HostMemory("y")
276                             .HostMemory("idx"),
277                         UniqueOp<int64, int64>);
278 
279 #ifdef TENSORFLOW_USE_SYCL
280 REGISTER_KERNEL_BUILDER(Name("Unique")
281                             .Device(DEVICE_SYCL)
282                             .TypeConstraint<int32>("T")
283                             .TypeConstraint<int32>("out_idx")
284                             .HostMemory("x")
285                             .HostMemory("y")
286                             .HostMemory("idx"),
287                         UniqueOp<int32, int32>);
288 REGISTER_KERNEL_BUILDER(Name("Unique")
289                             .Device(DEVICE_SYCL)
290                             .TypeConstraint<int64>("T")
291                             .TypeConstraint<int32>("out_idx")
292                             .HostMemory("x")
293                             .HostMemory("y")
294                             .HostMemory("idx"),
295                         UniqueOp<int64, int32>);
296 REGISTER_KERNEL_BUILDER(Name("Unique")
297                             .Device(DEVICE_SYCL)
298                             .TypeConstraint<int32>("T")
299                             .TypeConstraint<int64>("out_idx")
300                             .HostMemory("x")
301                             .HostMemory("y")
302                             .HostMemory("idx"),
303                         UniqueOp<int32, int64>);
304 REGISTER_KERNEL_BUILDER(Name("Unique")
305                             .Device(DEVICE_SYCL)
306                             .TypeConstraint<int64>("T")
307                             .TypeConstraint<int64>("out_idx")
308                             .HostMemory("x")
309                             .HostMemory("y")
310                             .HostMemory("idx"),
311                         UniqueOp<int64, int64>);
312 #endif  // TENSORFLOW_USE_SYCL
313 }  // namespace tensorflow
314