1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_CONTRIB_IMAGE_KERNELS_SEGMENTATION_OPS_H_
17 #define TENSORFLOW_CONTRIB_IMAGE_KERNELS_SEGMENTATION_OPS_H_
18
19 // Connected component analysis. The op is described in ../ops/image_ops.cc. A
20 // description of the algorithm appears below.
21
22 #define EIGEN_USE_THREADS
23
24 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
25 #include "tensorflow/core/framework/op_kernel.h"
26 #include "tensorflow/core/framework/tensor_types.h"
27 #include "tensorflow/core/platform/types.h"
28 #include "tensorflow/core/util/work_sharder.h"
29
30 namespace tensorflow {
31
32 namespace functor {
33
34 template <typename T>
is_nonzero(T value)35 bool is_nonzero(T value) {
36 return value != T(0);
37 }
38
39 template <>
is_nonzero(string value)40 bool is_nonzero(string value) {
41 return value.size() != 0;
42 }
43
44 // Processes each pixel of an image for union-find, in parallel blocks. This is
45 // loosely based on the algorithm in "GPU Computing Gems" by Ondrej Stava and
46 // Bedrich Benes, available here:
47 // http://hpcg.purdue.edu/bbenes/papers/Stava2011CCL.pdf
48 // The bulk of the process uses blocks of each image, which have each been
49 // processed separately. As long as there are multiple blocks in the image, we
50 // double the height and width of the blocks, creating new blocks which each
51 // consist of 2x2 previous sub-blocks. On each new block, we process adjacent
52 // pixels from the previous sub-blocks serially. However, the new blocks are not
53 // connected, so we can process each block in parallel.
54 // The GPU algorithm first processes blocks of a fixed size in GPU shared
55 // memory, with one image block per CUDA thread block. On the CPU, we just start
56 // with a block size of a single pixel, and borrow the rest of the algorithm
57 // unchanged.
58 template <typename T>
59 class BlockedImageUnionFindFunctor {
60 public:
61 using OutputType = int64;
62
BlockedImageUnionFindFunctor(const T * images,const int64 num_rows,const int64 num_cols,OutputType * forest,OutputType * rank)63 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlockedImageUnionFindFunctor(
64 const T* images, const int64 num_rows, const int64 num_cols,
65 OutputType* forest, OutputType* rank)
66 : images_(images),
67 num_rows_(num_rows),
68 num_cols_(num_cols),
69 block_height_(1),
70 block_width_(1),
71 forest_(forest),
72 rank_(rank) {}
73
74 // Returns the root of the tree that the pixel at the given index belongs to.
75 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE OutputType
find(OutputType index)76 find(OutputType index) const {
77 while (forest_[index] != index) {
78 index = forest_[index];
79 }
80 return index;
81 }
82
83 // Returns the number of blocks along the y axis.
num_blocks_vertically()84 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 num_blocks_vertically() const {
85 return (num_rows_ + block_height_ - 1) / block_height_;
86 }
87
88 // Returns the number of blocks along the x axis.
num_blocks_horizontally()89 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 num_blocks_horizontally() const {
90 return (num_cols_ + block_width_ - 1) / block_width_;
91 }
92
93 // Returns the total number of blocks in each image.
num_blocks()94 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 num_blocks() const {
95 return num_blocks_vertically() * num_blocks_horizontally();
96 }
97
block_height()98 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 block_height() const {
99 return block_height_;
100 }
101
block_width()102 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int64 block_width() const {
103 return block_width_;
104 }
105
106 // Returns whether we may merge again (the image contains more than one
107 // block).
can_merge()108 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool can_merge() const {
109 return block_height_ < num_rows_ || block_width_ < num_cols_;
110 }
111
112 // Doubles the block size. After this method, you must call
113 // `merge_internal_block_edges` for each image and each *new* block's xy
114 // coordinates (typically in parallel).
merge_blocks()115 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void merge_blocks() {
116 block_height_ *= 2;
117 block_width_ *= 2;
118 }
119
120 // Processes pairs of pixels within the block which were adjacent in the four
121 // sub-blocks. This must be done at each stage so that the connected
122 // components in each block are joined correctly.
merge_internal_block_edges(int64 image_index,int64 block_vertical_index,int64 block_horizontal_index)123 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void merge_internal_block_edges(
124 int64 image_index, int64 block_vertical_index,
125 int64 block_horizontal_index) const {
126 int64 block_start_y = block_vertical_index * block_height_;
127 int64 block_start_x = block_horizontal_index * block_width_;
128 // Merge the 4 sub-blocks horizontally (fixing the vertical seam).
129 int64 block_center_x = block_start_x + block_width_ / 2 - 1;
130 if (0 <= block_center_x && block_center_x + 1 < num_cols_) {
131 int64 merge_blocks_limit_y =
132 std::min(num_rows_, block_start_y + block_height_);
133 for (int64 y = block_start_y; y < merge_blocks_limit_y; y++) {
134 union_right(image_index, y, block_center_x);
135 }
136 }
137 // Merge the 4 sub-blocks vertically (fixing the horizontal seam).
138 int64 block_center_y = block_start_y + block_height_ / 2 - 1;
139 if (0 <= block_center_y && block_center_y + 1 < num_rows_) {
140 int64 merge_blocks_limit_x =
141 std::min(num_cols_, block_start_x + block_width_);
142 for (int64 x = block_start_x; x < merge_blocks_limit_x; x++) {
143 union_down(image_index, block_center_y, x);
144 }
145 }
146 }
147
148 private:
149 // The input image(s).
150 const T* const images_;
151 const int64 num_rows_;
152 const int64 num_cols_;
153 // Current height of each sub-block of the image.
154 int64 block_height_;
155 // Current width of each sub-block of the image.
156 int64 block_width_;
157 // Union-find forest. This has the same size as `images_`, and each entry
158 // holds the index of its parent in `images_` (roots hold their own index).
159 // Cycles should not occur.
160 OutputType* const forest_;
161 // Union-find rank of each pixel.
162 OutputType* const rank_;
163
164 // Unions the pixel with the pixel below it if applicable (both pixels are
165 // true, and the pixel is not in the last row).
union_down(OutputType batch,OutputType row,OutputType col)166 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void union_down(OutputType batch,
167 OutputType row,
168 OutputType col) const {
169 T pixel = read_pixel(batch, row, col);
170 if (is_nonzero<T>(pixel)) {
171 const int64 index_a = col + num_cols_ * (row + num_rows_ * batch);
172 if (row + 1 < num_rows_ && read_pixel(batch, row + 1, col) == pixel) {
173 const int64 index_b = col + num_cols_ * (row + 1 + num_rows_ * batch);
174 do_union(index_a, index_b);
175 }
176 }
177 }
178
179 // Unions the pixel with the pixel to the right of it if applicable.
union_right(OutputType batch,OutputType row,OutputType col)180 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void union_right(OutputType batch,
181 OutputType row,
182 OutputType col) const {
183 T pixel = read_pixel(batch, row, col);
184 if (is_nonzero<T>(pixel)) {
185 const int64 index_a = col + num_cols_ * (row + num_rows_ * batch);
186 if (col + 1 < num_cols_ && read_pixel(batch, row, col + 1) == pixel) {
187 const int64 index_b = col + 1 + num_cols_ * (row + num_rows_ * batch);
188 do_union(index_a, index_b);
189 }
190 }
191 }
192
193 // Reads a pixel value in the images.
194 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T
read_pixel(const OutputType batch,const OutputType row,const OutputType col)195 read_pixel(const OutputType batch, const OutputType row,
196 const OutputType col) const {
197 return images_[col + num_cols_ * (row + num_rows_ * batch)];
198 }
199
200 // Unions the trees that the two pixels belong to, using their index in the
201 // `images_` array.
do_union(OutputType index_a,OutputType index_b)202 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void do_union(
203 OutputType index_a, OutputType index_b) const {
204 // Find the roots of index_a and index_b in the forest, and make one the
205 // child of the other.
206 index_a = find(index_a);
207 index_b = find(index_b);
208 const OutputType rank_a = rank_[index_a];
209 const OutputType rank_b = rank_[index_b];
210 OutputType parent, child;
211 if (index_a == index_b) {
212 return;
213 } else if (rank_a < rank_b) {
214 parent = index_a;
215 child = index_b;
216 } else {
217 parent = index_b;
218 child = index_a;
219 rank_[parent]++;
220 }
221 forest_[child] = parent;
222 }
223 };
224
225 // Runs the ImageUnionFindFunctor on all pixels. Will require different CPU and
226 // GPU implementations.
227 template <typename Device, typename T>
228 class ImageConnectedComponentsFunctor {
229 public:
230 using OutputType = typename BlockedImageUnionFindFunctor<T>::OutputType;
231
232 void operator()(OpKernelContext* ctx,
233 typename TTypes<T, 3>::ConstTensor images,
234 typename TTypes<OutputType, 3>::Tensor forest,
235 typename TTypes<OutputType, 3>::Tensor rank);
236 };
237
238 // Fills a flat Tensor with indices from 0 to n - 1.
239 template <typename Device>
240 class TensorRangeFunctor {
241 public:
242 using OutputType = typename BlockedImageUnionFindFunctor<bool>::OutputType;
243
operator()244 void operator()(const Device& device,
245 typename TTypes<OutputType>::Flat tensor) {
246 tensor.device(device) = tensor.generate(TensorRangeGenerator());
247 }
248
249 private:
250 class TensorRangeGenerator {
251 public:
252 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE OutputType
operator()253 operator()(const Eigen::array<Eigen::DenseIndex, 1>& coords) const {
254 return coords[0];
255 }
256 };
257 };
258
259 // Given the union-find forest, generates the root index for each node. This
260 // gives us arbitrary, usually non-consecutive ids for each connected component.
261 // The ids are massaged in Python to get deterministic, consecutive ids.
262 template <typename Device, typename T>
263 class FindRootFunctor {
264 public:
265 using OutputType = typename BlockedImageUnionFindFunctor<T>::OutputType;
266
operator()267 void operator()(const Device& device,
268 typename TTypes<OutputType>::Flat component_ids,
269 const T* images,
270 const BlockedImageUnionFindFunctor<T>& union_find) {
271 component_ids.device(device) =
272 component_ids.generate(FindRootGenerator(images, union_find));
273 }
274
275 private:
276 class FindRootGenerator {
277 const T* const images_;
278 const BlockedImageUnionFindFunctor<T> union_find_;
279
280 public:
FindRootGenerator(const T * images,BlockedImageUnionFindFunctor<T> union_find)281 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE FindRootGenerator(
282 const T* images, BlockedImageUnionFindFunctor<T> union_find)
283 : images_(images), union_find_(union_find) {}
284
285 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE OutputType
operator()286 operator()(const Eigen::array<Eigen::DenseIndex, 1>& coords) const {
287 if (is_nonzero<T>(images_[coords[0]])) {
288 // True pixels have an arbitrary segment id > 0. The segment ids will be
289 // made contiguous later.
290 return union_find_.find(coords[0]) + 1;
291 } else {
292 // False pixels have a segment of 0.
293 return 0;
294 }
295 }
296 };
297 };
298
299 } // end namespace functor
300
301 } // namespace tensorflow
302
303 #endif // TENSORFLOW_CONTRIB_IMAGE_KERNELS_SEGMENTATION_OPS_H_
304