1 ///////////////////////////////////////////////////////////////////////
2 // File: thresholder.cpp
3 // Description: Base API for thresolding images in tesseract.
4 // Author: Ray Smith
5 // Created: Mon May 12 11:28:15 PDT 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24
25 #ifdef HAVE_LIBLEPT
26 // Include leptonica library only if autoconf (or makefile etc) tell us to.
27 #include "allheaders.h"
28 #endif
29
30 #include "thresholder.h"
31
32 #include <string.h>
33
34 #include "img.h"
35 #include "otsuthr.h"
36
37 namespace tesseract {
38
ImageThresholder()39 ImageThresholder::ImageThresholder()
40 :
41 #ifdef HAVE_LIBLEPT
42 pix_(NULL),
43 #endif
44 image_data_(NULL),
45 image_width_(0), image_height_(0),
46 image_bytespp_(0), image_bytespl_(0) {
47 SetRectangle(0, 0, 0, 0);
48 }
49
~ImageThresholder()50 ImageThresholder::~ImageThresholder() {
51 Clear();
52 }
53
54 // Destroy the Pix if there is one, freeing memory.
Clear()55 void ImageThresholder::Clear() {
56 #ifdef HAVE_LIBLEPT
57 if (pix_ != NULL) {
58 pixDestroy(&pix_);
59 pix_ = NULL;
60 }
61 #endif
62 image_data_ = NULL;
63 }
64
65 // Return true if no image has been set.
IsEmpty() const66 bool ImageThresholder::IsEmpty() const {
67 #ifdef HAVE_LIBLEPT
68 if (pix_ != NULL)
69 return false;
70 #endif
71 return image_data_ == NULL;
72 }
73
74 // SetImage makes a copy of only the metadata, not the underlying
75 // image buffer. It promises to treat the source as read-only in either case,
76 // but in return assumes that the Pix or image buffer remain valid
77 // throughout the life of the ImageThresholder.
78 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
79 // Palette color images will not work properly and must be converted to
80 // 24 bit.
81 // Binary images of 1 bit per pixel may also be given but they must be
82 // byte packed with the MSB of the first byte being the first pixel, and a
83 // one pixel is WHITE. For binary images set bytes_per_pixel=0.
SetImage(const unsigned char * imagedata,int width,int height,int bytes_per_pixel,int bytes_per_line)84 void ImageThresholder::SetImage(const unsigned char* imagedata,
85 int width, int height,
86 int bytes_per_pixel, int bytes_per_line) {
87 #ifdef HAVE_LIBLEPT
88 if (pix_ != NULL)
89 pixDestroy(&pix_);
90 pix_ = NULL;
91 #endif
92 image_data_ = imagedata;
93 image_width_ = width;
94 image_height_ = height;
95 image_bytespp_ = bytes_per_pixel;
96 image_bytespl_ = bytes_per_line;
97 Init();
98 }
99
100 // Store the coordinates of the rectangle to process for later use.
101 // Doesn't actually do any thresholding.
SetRectangle(int left,int top,int width,int height)102 void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
103 rect_left_ = left;
104 rect_top_ = top;
105 rect_width_ = width;
106 rect_height_ = height;
107 }
108
109 // Get enough parameters to be able to rebuild bounding boxes in the
110 // original image (not just within the rectangle).
111 // Left and top are enough with top-down coordinates, but
112 // the height of the rectangle and the image are needed for bottom-up.
GetImageSizes(int * left,int * top,int * width,int * height,int * imagewidth,int * imageheight)113 void ImageThresholder::GetImageSizes(int* left, int* top,
114 int* width, int* height,
115 int* imagewidth, int* imageheight) {
116 *left = rect_left_;
117 *top = rect_top_;
118 *width = rect_width_;
119 *height = rect_height_;
120 *imagewidth = image_width_;
121 *imageheight = image_height_;
122 }
123
124 // Return true if HAVE_LIBLEPT and this thresholder implements the Pix
125 // interface.
HasThresholdToPix() const126 bool ImageThresholder::HasThresholdToPix() const {
127 #ifdef HAVE_LIBLEPT
128 return true;
129 #else
130 return false;
131 #endif
132 }
133
134 // Threshold the source image as efficiently as possible to the output
135 // tesseract IMAGE class.
ThresholdToIMAGE(IMAGE * image)136 void ImageThresholder::ThresholdToIMAGE(IMAGE* image) {
137 #ifdef HAVE_LIBLEPT
138 if (pix_ != NULL) {
139 if (image_bytespp_ == 0) {
140 // We have a binary image, so it just has to be converted.
141 CopyBinaryRectPixToIMAGE(image);
142 } else {
143 if (image_bytespp_ == 4) {
144 // Color data can just be passed direct.
145 const uinT32* data = pixGetData(pix_);
146 OtsuThresholdRectToIMAGE(reinterpret_cast<const uinT8*>(data),
147 image_bytespp_, image_bytespl_, image);
148 } else {
149 // Convert 8-bit to IMAGE and then pass its
150 // buffer to the raw interface to complete the conversion.
151 IMAGE temp_image;
152 temp_image.FromPix(pix_);
153 OtsuThresholdRectToIMAGE(temp_image.get_buffer(),
154 image_bytespp_,
155 COMPUTE_IMAGE_XDIM(temp_image.get_xsize(),
156 temp_image.get_bpp()),
157 image);
158 }
159 }
160 return;
161 }
162 #endif
163 if (image_bytespp_ > 0) {
164 // Threshold grey or color.
165 OtsuThresholdRectToIMAGE(image_data_, image_bytespp_, image_bytespl_,
166 image);
167 } else {
168 CopyBinaryRectRawToIMAGE(image);
169 }
170 }
171
172 #ifdef HAVE_LIBLEPT
173 // NOTE: Opposite to SetImage for raw images, SetImage for Pix clones its
174 // input, so the source pix may be pixDestroyed immediately after.
SetImage(const Pix * pix)175 void ImageThresholder::SetImage(const Pix* pix) {
176 image_data_ = NULL;
177 if (pix_ != NULL)
178 pixDestroy(&pix_);
179 Pix* src = const_cast<Pix*>(pix);
180 int depth;
181 pixGetDimensions(src, &image_width_, &image_height_, &depth);
182 // Convert the image as necessary so it is one of binary, plain RGB, or
183 // 8 bit with no colormap.
184 if (depth > 1 && depth < 8) {
185 pix_ = pixConvertTo8(src, false);
186 } else if (pixGetColormap(src)) {
187 pix_ = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
188 } else {
189 pix_ = pixClone(src);
190 }
191 depth = pixGetDepth(pix_);
192 image_bytespp_ = depth / 8;
193 image_bytespl_ = pixGetWpl(pix_) * sizeof(l_uint32);
194 Init();
195 }
196
197 // Threshold the source image as efficiently as possible to the output Pix.
198 // Creates a Pix and sets pix to point to the resulting pointer.
199 // Caller must use pixDestroy to free the created Pix.
ThresholdToPix(Pix ** pix)200 void ImageThresholder::ThresholdToPix(Pix** pix) {
201 if (pix_ != NULL) {
202 if (image_bytespp_ == 0) {
203 // We have a binary image, so it just has to be cloned.
204 *pix = GetPixRect();
205 } else {
206 if (image_bytespp_ == 4) {
207 // Color data can just be passed direct.
208 const uinT32* data = pixGetData(pix_);
209 OtsuThresholdRectToPix(reinterpret_cast<const uinT8*>(data),
210 image_bytespp_, image_bytespl_, pix);
211 } else {
212 // Convert 8-bit to IMAGE and then pass its
213 // buffer to the raw interface to complete the conversion.
214 IMAGE temp_image;
215 temp_image.FromPix(pix_);
216 OtsuThresholdRectToPix(temp_image.get_buffer(),
217 image_bytespp_,
218 COMPUTE_IMAGE_XDIM(temp_image.get_xsize(),
219 temp_image.get_bpp()),
220 pix);
221 }
222 }
223 return;
224 }
225 if (image_bytespp_ > 0) {
226 // Threshold grey or color.
227 OtsuThresholdRectToPix(image_data_, image_bytespp_, image_bytespl_, pix);
228 } else {
229 RawRectToPix(pix);
230 }
231 }
232
233 // Get a clone/copy of the source image rectangle.
234 // The returned Pix must be pixDestroyed.
235 // This function will be used in the future by the page layout analysis, and
236 // the layout analysis that uses it will only be available with Leptonica,
237 // so there is no raw equivalent.
GetPixRect()238 Pix* ImageThresholder::GetPixRect() {
239 if (pix_ != NULL) {
240 if (IsFullImage()) {
241 // Just clone the whole thing.
242 return pixClone(pix_);
243 } else {
244 // Crop to the given rectangle.
245 Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
246 Pix* cropped = pixClipRectangle(pix_, box, NULL);
247 boxDestroy(&box);
248 return cropped;
249 }
250 }
251 // The input is raw, so we have to make a copy of it.
252 Pix* raw_pix;
253 RawRectToPix(&raw_pix);
254 return raw_pix;
255 }
256 #endif
257
258 // Common initialization shared between SetImage methods.
Init()259 void ImageThresholder::Init() {
260 SetRectangle(0, 0, image_width_, image_height_);
261 }
262
263 // Otsu threshold the rectangle, taking everything except the image buffer
264 // pointer from the class, to the output IMAGE.
OtsuThresholdRectToIMAGE(const unsigned char * imagedata,int bytes_per_pixel,int bytes_per_line,IMAGE * image) const265 void ImageThresholder::OtsuThresholdRectToIMAGE(const unsigned char* imagedata,
266 int bytes_per_pixel,
267 int bytes_per_line,
268 IMAGE* image) const {
269 int* thresholds;
270 int* hi_values;
271 OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line,
272 rect_left_, rect_top_, rect_width_, rect_height_,
273 &thresholds, &hi_values);
274
275 // Threshold the image to the given IMAGE.
276 ThresholdRectToIMAGE(imagedata, bytes_per_pixel, bytes_per_line,
277 thresholds, hi_values, image);
278 delete [] thresholds;
279 delete [] hi_values;
280 }
281
282 // Threshold the given grey or color image into the tesseract global
283 // image ready for recognition. Requires thresholds and hi_value
284 // produced by OtsuThreshold in otsuthr.cpp.
ThresholdRectToIMAGE(const unsigned char * imagedata,int bytes_per_pixel,int bytes_per_line,const int * thresholds,const int * hi_values,IMAGE * image) const285 void ImageThresholder::ThresholdRectToIMAGE(const unsigned char* imagedata,
286 int bytes_per_pixel,
287 int bytes_per_line,
288 const int* thresholds,
289 const int* hi_values,
290 IMAGE* image) const {
291 IMAGELINE line;
292 image->create(rect_width_, rect_height_, 1);
293 line.init(rect_width_);
294 // For each line in the image, fill the IMAGELINE class and put it into the
295 // output IMAGE. Note that Tesseract stores images with the
296 // bottom at y=0 and 0 is black, so we need 2 kinds of inversion.
297 const unsigned char* data = imagedata + rect_top_* bytes_per_line +
298 rect_left_ * bytes_per_pixel;
299 for (int y = rect_height_ - 1 ; y >= 0; --y) {
300 const unsigned char* pix = data;
301 for (int x = 0; x < rect_width_; ++x, pix += bytes_per_pixel) {
302 line.pixels[x] = 1;
303 for (int ch = 0; ch < bytes_per_pixel; ++ch) {
304 if (hi_values[ch] >= 0 &&
305 (pix[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
306 line.pixels[x] = 0;
307 break;
308 }
309 }
310 }
311 image->put_line(0, y, rect_width_, &line, 0);
312 data += bytes_per_line;
313 }
314 }
315
316 // Cut out the requested rectangle of the binary image to the output IMAGE.
CopyBinaryRectRawToIMAGE(IMAGE * image) const317 void ImageThresholder::CopyBinaryRectRawToIMAGE(IMAGE* image) const {
318 IMAGE rect_image;
319 rect_image.capture(const_cast<unsigned char*>(image_data_),
320 image_width_, rect_top_ + rect_height_, 1);
321 image->create(rect_width_, rect_height_, 1);
322 // copy_sub_image uses coords starting at the bottom, so the y coord of the
323 // copy is the bottom of the rect_image.
324 copy_sub_image(&rect_image, rect_left_, 0, rect_width_, rect_height_,
325 image, 0, 0, false);
326 }
327
328 #ifdef HAVE_LIBLEPT
329 // Otsu threshold the rectangle, taking everything except the image buffer
330 // pointer from the class, to the output Pix.
OtsuThresholdRectToPix(const unsigned char * imagedata,int bytes_per_pixel,int bytes_per_line,Pix ** pix) const331 void ImageThresholder::OtsuThresholdRectToPix(const unsigned char* imagedata,
332 int bytes_per_pixel,
333 int bytes_per_line,
334 Pix** pix) const {
335 int* thresholds;
336 int* hi_values;
337 OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line,
338 rect_left_, rect_top_, rect_width_, rect_height_,
339 &thresholds, &hi_values);
340
341 // Threshold the image to the given IMAGE.
342 ThresholdRectToPix(imagedata, bytes_per_pixel, bytes_per_line,
343 thresholds, hi_values, pix);
344 delete [] thresholds;
345 delete [] hi_values;
346 }
347
348 // Threshold the rectangle, taking everything except the image buffer pointer
349 // from the class, using thresholds/hi_values to the output IMAGE.
ThresholdRectToPix(const unsigned char * imagedata,int bytes_per_pixel,int bytes_per_line,const int * thresholds,const int * hi_values,Pix ** pix) const350 void ImageThresholder::ThresholdRectToPix(const unsigned char* imagedata,
351 int bytes_per_pixel,
352 int bytes_per_line,
353 const int* thresholds,
354 const int* hi_values,
355 Pix** pix) const {
356 *pix = pixCreate(rect_width_, rect_height_, 1);
357 uinT32* pixdata = pixGetData(*pix);
358 int wpl = pixGetWpl(*pix);
359 const unsigned char* srcdata = imagedata + rect_top_* bytes_per_line +
360 rect_left_ * bytes_per_pixel;
361 for (int y = 0; y < rect_height_; ++y) {
362 const uinT8* linedata = srcdata;
363 uinT32* pixline = pixdata + y * wpl;
364 for (int x = 0; x < rect_width_; ++x, linedata += bytes_per_pixel) {
365 bool white_result = true;
366 for (int ch = 0; ch < bytes_per_pixel; ++ch) {
367 if (hi_values[ch] >= 0 &&
368 (linedata[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
369 white_result = false;
370 break;
371 }
372 }
373 if (white_result)
374 CLEAR_DATA_BIT(pixline, x);
375 else
376 SET_DATA_BIT(pixline, x);
377 }
378 srcdata += bytes_per_line;
379 }
380 }
381
382 // Copy the raw image rectangle, taking all data from the class, to the Pix.
RawRectToPix(Pix ** pix) const383 void ImageThresholder::RawRectToPix(Pix** pix) const {
384 if (image_bytespp_ < 4) {
385 // Go via a tesseract image structure (doesn't copy the data)
386 // and use ToPix.
387 IMAGE image;
388 int bits_per_pixel = image_bytespp_ * 8;
389 if (image_bytespp_ == 0)
390 bits_per_pixel = 1;
391 image.capture(const_cast<uinT8*>(image_data_),
392 image_width_, rect_top_ + rect_height_, bits_per_pixel);
393 if (IsFullImage()) {
394 *pix = image.ToPix();
395 } else {
396 IMAGE rect;
397 rect.create(rect_width_, rect_height_, bits_per_pixel);
398 // The capture chopped the image off at top+height, so copy
399 // the rectangle with y = 0 to get a rectangle of height
400 // starting at the bottom, since copy_sub_image uses bottom-up coords.
401 copy_sub_image(&image, rect_left_, 0, rect_width_, rect_height_,
402 &rect, 0, 0, true);
403 *pix = rect.ToPix();
404 }
405 } else {
406 *pix = pixCreate(rect_width_, rect_height_, 32);
407 uinT32* data = pixGetData(*pix);
408 int wpl = pixGetWpl(*pix);
409 const uinT8* imagedata = image_data_ + rect_top_ * image_bytespl_ +
410 rect_left_ * image_bytespp_;
411 for (int y = 0; y < rect_height_; ++y) {
412 const uinT8* linedata = imagedata;
413 uinT32* line = data + y * wpl;
414 for (int x = 0; x < rect_width_; ++x) {
415 line[x] = (linedata[0] << 24) | (linedata[1] << 16) |
416 (linedata[2] << 8) | linedata[3];
417 linedata += 4;
418 }
419 imagedata += image_bytespl_;
420 }
421 }
422 }
423
424 // Cut out the requested rectangle of the binary image to the output IMAGE.
CopyBinaryRectPixToIMAGE(IMAGE * image) const425 void ImageThresholder::CopyBinaryRectPixToIMAGE(IMAGE* image) const {
426 if (IsFullImage()) {
427 // Just poke it directly into the tess image.
428 image->FromPix(pix_);
429 } else {
430 // Crop to the given rectangle.
431 Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
432 Pix* cropped = pixClipRectangle(pix_, box, NULL);
433 image->FromPix(cropped);
434 pixDestroy(&cropped);
435 boxDestroy(&box);
436 }
437 }
438 #endif
439
440 } // namespace tesseract.
441
442