1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_
17 #define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_
18
19 #include <stdint.h>
20
21 #include "tensorflow/examples/android/jni/object_tracking/geom.h"
22 #include "tensorflow/examples/android/jni/object_tracking/utils.h"
23
24 // TODO(andrewharp): Make this a cast to uint32_t if/when we go unsigned for
25 // operations.
26 #define ZERO 0
27
28 #ifdef SANITY_CHECKS
29 #define CHECK_PIXEL(IMAGE, X, Y) {\
30 SCHECK((IMAGE)->ValidPixel((X), (Y)), \
31 "CHECK_PIXEL(%d,%d) in %dx%d image.", \
32 static_cast<int>(X), static_cast<int>(Y), \
33 (IMAGE)->GetWidth(), (IMAGE)->GetHeight());\
34 }
35
36 #define CHECK_PIXEL_INTERP(IMAGE, X, Y) {\
37 SCHECK((IMAGE)->validInterpPixel((X), (Y)), \
38 "CHECK_PIXEL_INTERP(%.2f, %.2f) in %dx%d image.", \
39 static_cast<float>(X), static_cast<float>(Y), \
40 (IMAGE)->GetWidth(), (IMAGE)->GetHeight());\
41 }
42 #else
43 #define CHECK_PIXEL(image, x, y) {}
44 #define CHECK_PIXEL_INTERP(IMAGE, X, Y) {}
45 #endif
46
47 namespace tf_tracking {
48
49 #ifdef SANITY_CHECKS
50 // Class which exists solely to provide bounds checking for array-style image
51 // data access.
52 template <typename T>
53 class RowData {
54 public:
RowData(T * const row_data,const int max_col)55 RowData(T* const row_data, const int max_col)
56 : row_data_(row_data), max_col_(max_col) {}
57
58 inline T& operator[](const int col) const {
59 SCHECK(InRange(col, 0, max_col_),
60 "Column out of range: %d (%d max)", col, max_col_);
61 return row_data_[col];
62 }
63
64 inline operator T*() const {
65 return row_data_;
66 }
67
68 private:
69 T* const row_data_;
70 const int max_col_;
71 };
72 #endif
73
74 // Naive templated sorting function.
75 template <typename T>
Comp(const void * a,const void * b)76 int Comp(const void* a, const void* b) {
77 const T val1 = *reinterpret_cast<const T*>(a);
78 const T val2 = *reinterpret_cast<const T*>(b);
79
80 if (val1 == val2) {
81 return 0;
82 } else if (val1 < val2) {
83 return -1;
84 } else {
85 return 1;
86 }
87 }
88
89 // TODO(andrewharp): Make explicit which operations support negative numbers or
90 // struct/class types in image data (possibly create fast multi-dim array class
91 // for data where pixel arithmetic does not make sense).
92
93 // Image class optimized for working on numeric arrays as grayscale image data.
94 // Supports other data types as a 2D array class, so long as no pixel math
95 // operations are called (convolution, downsampling, etc).
96 template <typename T>
97 class Image {
98 public:
99 Image(const int width, const int height);
100 explicit Image(const Size& size);
101
102 // Constructor that creates an image from preallocated data.
103 // Note: The image takes ownership of the data lifecycle, unless own_data is
104 // set to false.
105 Image(const int width, const int height, T* const image_data,
106 const bool own_data = true);
107
108 ~Image();
109
110 // Extract a pixel patch from this image, starting at a subpixel location.
111 // Uses 16:16 fixed point format for representing real values and doing the
112 // bilinear interpolation.
113 //
114 // Arguments fp_x and fp_y tell the subpixel position in fixed point format,
115 // patchwidth/patchheight give the size of the patch in pixels and
116 // to_data must be a valid pointer to a *contiguous* destination data array.
117 template<class DstType>
118 bool ExtractPatchAtSubpixelFixed1616(const int fp_x,
119 const int fp_y,
120 const int patchwidth,
121 const int patchheight,
122 DstType* to_data) const;
123
124 Image<T>* Crop(
125 const int left, const int top, const int right, const int bottom) const;
126
GetWidth()127 inline int GetWidth() const { return width_; }
GetHeight()128 inline int GetHeight() const { return height_; }
129
130 // Bilinearly sample a value between pixels. Values must be within the image.
131 inline float GetPixelInterp(const float x, const float y) const;
132
133 // Bilinearly sample a pixels at a subpixel position using fixed point
134 // arithmetic.
135 // Avoids float<->int conversions.
136 // Values must be within the image.
137 // Arguments fp_x and fp_y tell the subpixel position in
138 // 16:16 fixed point format.
139 //
140 // Important: This function only makes sense for integer-valued images, such
141 // as Image<uint8_t> or Image<int> etc.
142 inline T GetPixelInterpFixed1616(const int fp_x_whole,
143 const int fp_y_whole) const;
144
145 // Returns true iff the pixel is in the image's boundaries.
146 inline bool ValidPixel(const int x, const int y) const;
147
148 inline BoundingBox GetContainingBox() const;
149
150 inline bool Contains(const BoundingBox& bounding_box) const;
151
GetMedianValue()152 inline T GetMedianValue() {
153 qsort(image_data_, data_size_, sizeof(image_data_[0]), Comp<T>);
154 return image_data_[data_size_ >> 1];
155 }
156
157 // Returns true iff the pixel is in the image's boundaries for interpolation
158 // purposes.
159 // TODO(andrewharp): check in interpolation follow-up change.
160 inline bool ValidInterpPixel(const float x, const float y) const;
161
162 // Safe lookup with boundary enforcement.
GetPixelClipped(const int x,const int y)163 inline T GetPixelClipped(const int x, const int y) const {
164 return (*this)[Clip(y, ZERO, height_less_one_)]
165 [Clip(x, ZERO, width_less_one_)];
166 }
167
168 #ifdef SANITY_CHECKS
169 inline RowData<T> operator[](const int row) {
170 SCHECK(InRange(row, 0, height_less_one_),
171 "Row out of range: %d (%d max)", row, height_less_one_);
172 return RowData<T>(image_data_ + row * stride_, width_less_one_);
173 }
174
175 inline const RowData<T> operator[](const int row) const {
176 SCHECK(InRange(row, 0, height_less_one_),
177 "Row out of range: %d (%d max)", row, height_less_one_);
178 return RowData<T>(image_data_ + row * stride_, width_less_one_);
179 }
180 #else
181 inline T* operator[](const int row) {
182 return image_data_ + row * stride_;
183 }
184
185 inline const T* operator[](const int row) const {
186 return image_data_ + row * stride_;
187 }
188 #endif
189
data()190 const T* data() const { return image_data_; }
191
stride()192 inline int stride() const { return stride_; }
193
194 // Clears image to a single value.
Clear(const T & val)195 inline void Clear(const T& val) {
196 memset(image_data_, val, sizeof(*image_data_) * data_size_);
197 }
198
199 #ifdef __ARM_NEON
200 void Downsample2x32ColumnsNeon(const uint8_t* const original,
201 const int stride, const int orig_x);
202
203 void Downsample4x32ColumnsNeon(const uint8_t* const original,
204 const int stride, const int orig_x);
205
206 void DownsampleAveragedNeon(const uint8_t* const original, const int stride,
207 const int factor);
208 #endif
209
210 // Naive downsampler that reduces image size by factor by averaging pixels in
211 // blocks of size factor x factor.
212 void DownsampleAveraged(const T* const original, const int stride,
213 const int factor);
214
215 // Naive downsampler that reduces image size by factor by averaging pixels in
216 // blocks of size factor x factor.
DownsampleAveraged(const Image<T> & original,const int factor)217 inline void DownsampleAveraged(const Image<T>& original, const int factor) {
218 DownsampleAveraged(original.data(), original.GetWidth(), factor);
219 }
220
221 // Native downsampler that reduces image size using nearest interpolation
222 void DownsampleInterpolateNearest(const Image<T>& original);
223
224 // Native downsampler that reduces image size using fixed-point bilinear
225 // interpolation
226 void DownsampleInterpolateLinear(const Image<T>& original);
227
228 // Relatively efficient downsampling of an image by a factor of two with a
229 // low-pass 3x3 smoothing operation thrown in.
230 void DownsampleSmoothed3x3(const Image<T>& original);
231
232 // Relatively efficient downsampling of an image by a factor of two with a
233 // low-pass 5x5 smoothing operation thrown in.
234 void DownsampleSmoothed5x5(const Image<T>& original);
235
236 // Optimized Scharr filter on a single pixel in the X direction.
237 // Scharr filters are like central-difference operators, but have more
238 // rotational symmetry in their response because they also consider the
239 // diagonal neighbors.
240 template <typename U>
241 inline T ScharrPixelX(const Image<U>& original,
242 const int center_x, const int center_y) const;
243
244 // Optimized Scharr filter on a single pixel in the X direction.
245 // Scharr filters are like central-difference operators, but have more
246 // rotational symmetry in their response because they also consider the
247 // diagonal neighbors.
248 template <typename U>
249 inline T ScharrPixelY(const Image<U>& original,
250 const int center_x, const int center_y) const;
251
252 // Convolve the image with a Scharr filter in the X direction.
253 // Much faster than an equivalent generic convolution.
254 template <typename U>
255 inline void ScharrX(const Image<U>& original);
256
257 // Convolve the image with a Scharr filter in the Y direction.
258 // Much faster than an equivalent generic convolution.
259 template <typename U>
260 inline void ScharrY(const Image<U>& original);
261
HalfDiff(int32_t first,int32_t second)262 static inline T HalfDiff(int32_t first, int32_t second) {
263 return (second - first) / 2;
264 }
265
266 template <typename U>
267 void DerivativeX(const Image<U>& original);
268
269 template <typename U>
270 void DerivativeY(const Image<U>& original);
271
272 // Generic function for convolving pixel with 3x3 filter.
273 // Filter pixels should be in row major order.
274 template <typename U>
275 inline T ConvolvePixel3x3(const Image<U>& original,
276 const int* const filter,
277 const int center_x, const int center_y,
278 const int total) const;
279
280 // Generic function for convolving an image with a 3x3 filter.
281 // TODO(andrewharp): Generalize this for any size filter.
282 template <typename U>
283 inline void Convolve3x3(const Image<U>& original,
284 const int32_t* const filter);
285
286 // Load this image's data from a data array. The data at pixels is assumed to
287 // have dimensions equivalent to this image's dimensions * factor.
288 inline void FromArray(const T* const pixels, const int stride,
289 const int factor = 1);
290
291 // Copy the image back out to an appropriately sized data array.
ToArray(T * const pixels)292 inline void ToArray(T* const pixels) const {
293 // If not subsampling, memcpy should be faster.
294 memcpy(pixels, this->image_data_, data_size_ * sizeof(T));
295 }
296
297 // Precompute these for efficiency's sake as they're used by a lot of
298 // clipping code and loop code.
299 // TODO(andrewharp): make these only accessible by other Images.
300 const int width_less_one_;
301 const int height_less_one_;
302
303 // The raw size of the allocated data.
304 const int data_size_;
305
306 private:
Allocate()307 inline void Allocate() {
308 image_data_ = new T[data_size_];
309 if (image_data_ == NULL) {
310 LOGE("Couldn't allocate image data!");
311 }
312 }
313
314 T* image_data_;
315
316 bool own_data_;
317
318 const int width_;
319 const int height_;
320
321 // The image stride (offset to next row).
322 // TODO(andrewharp): Make sure that stride is honored in all code.
323 const int stride_;
324
325 TF_DISALLOW_COPY_AND_ASSIGN(Image);
326 };
327
328 template <typename t>
329 inline std::ostream& operator<<(std::ostream& stream, const Image<t>& image) {
330 for (int y = 0; y < image.GetHeight(); ++y) {
331 for (int x = 0; x < image.GetWidth(); ++x) {
332 stream << image[y][x] << " ";
333 }
334 stream << std::endl;
335 }
336 return stream;
337 }
338
339 } // namespace tf_tracking
340
341 #endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_IMAGE_H_
342