• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 ///////////////////////////////////////////////////////////////////////
2 // File:        thresholder.cpp
3 // Description: Base API for thresolding images in tesseract.
4 // Author:      Ray Smith
5 // Created:     Mon May 12 11:28:15 PDT 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #ifdef HAVE_LIBLEPT
26 // Include leptonica library only if autoconf (or makefile etc) tell us to.
27 #include "allheaders.h"
28 #endif
29 
30 #include "thresholder.h"
31 
32 #include <string.h>
33 
34 #include "img.h"
35 #include "otsuthr.h"
36 
37 namespace tesseract {
38 
ImageThresholder()39 ImageThresholder::ImageThresholder()
40   :
41 #ifdef HAVE_LIBLEPT
42     pix_(NULL),
43 #endif
44     image_data_(NULL),
45     image_width_(0), image_height_(0),
46     image_bytespp_(0), image_bytespl_(0) {
47   SetRectangle(0, 0, 0, 0);
48 }
49 
~ImageThresholder()50 ImageThresholder::~ImageThresholder() {
51   Clear();
52 }
53 
54 // Destroy the Pix if there is one, freeing memory.
Clear()55 void ImageThresholder::Clear() {
56 #ifdef HAVE_LIBLEPT
57   if (pix_ != NULL) {
58     pixDestroy(&pix_);
59     pix_ = NULL;
60   }
61 #endif
62   image_data_ = NULL;
63 }
64 
65 // Return true if no image has been set.
IsEmpty() const66 bool ImageThresholder::IsEmpty() const {
67 #ifdef HAVE_LIBLEPT
68   if (pix_ != NULL)
69     return false;
70 #endif
71   return image_data_ == NULL;
72 }
73 
74 // SetImage makes a copy of only the metadata, not the underlying
75 // image buffer. It promises to treat the source as read-only in either case,
76 // but in return assumes that the Pix or image buffer remain valid
77 // throughout the life of the ImageThresholder.
78 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
79 // Palette color images will not work properly and must be converted to
80 // 24 bit.
81 // Binary images of 1 bit per pixel may also be given but they must be
82 // byte packed with the MSB of the first byte being the first pixel, and a
83 // one pixel is WHITE. For binary images set bytes_per_pixel=0.
SetImage(const unsigned char * imagedata,int width,int height,int bytes_per_pixel,int bytes_per_line)84 void ImageThresholder::SetImage(const unsigned char* imagedata,
85                                 int width, int height,
86                                 int bytes_per_pixel, int bytes_per_line) {
87 #ifdef HAVE_LIBLEPT
88   if (pix_ != NULL)
89     pixDestroy(&pix_);
90   pix_ = NULL;
91 #endif
92   image_data_ = imagedata;
93   image_width_ = width;
94   image_height_ = height;
95   image_bytespp_ = bytes_per_pixel;
96   image_bytespl_ = bytes_per_line;
97   Init();
98 }
99 
100 // Store the coordinates of the rectangle to process for later use.
101 // Doesn't actually do any thresholding.
SetRectangle(int left,int top,int width,int height)102 void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
103   rect_left_ = left;
104   rect_top_ = top;
105   rect_width_ = width;
106   rect_height_ = height;
107 }
108 
109 // Get enough parameters to be able to rebuild bounding boxes in the
110 // original image (not just within the rectangle).
111 // Left and top are enough with top-down coordinates, but
112 // the height of the rectangle and the image are needed for bottom-up.
GetImageSizes(int * left,int * top,int * width,int * height,int * imagewidth,int * imageheight)113 void ImageThresholder::GetImageSizes(int* left, int* top,
114                                      int* width, int* height,
115                                      int* imagewidth, int* imageheight) {
116   *left = rect_left_;
117   *top = rect_top_;
118   *width = rect_width_;
119   *height = rect_height_;
120   *imagewidth = image_width_;
121   *imageheight = image_height_;
122 }
123 
124 // Return true if HAVE_LIBLEPT and this thresholder implements the Pix
125 // interface.
HasThresholdToPix() const126 bool ImageThresholder::HasThresholdToPix() const {
127 #ifdef HAVE_LIBLEPT
128   return true;
129 #else
130   return false;
131 #endif
132 }
133 
134 // Threshold the source image as efficiently as possible to the output
135 // tesseract IMAGE class.
ThresholdToIMAGE(IMAGE * image)136 void ImageThresholder::ThresholdToIMAGE(IMAGE* image) {
137 #ifdef HAVE_LIBLEPT
138   if (pix_ != NULL) {
139     if (image_bytespp_ == 0) {
140       // We have a binary image, so it just has to be converted.
141       CopyBinaryRectPixToIMAGE(image);
142     } else {
143       if (image_bytespp_ == 4) {
144         // Color data can just be passed direct.
145         const uinT32* data = pixGetData(pix_);
146         OtsuThresholdRectToIMAGE(reinterpret_cast<const uinT8*>(data),
147                                  image_bytespp_, image_bytespl_, image);
148       } else {
149         // Convert 8-bit to IMAGE and then pass its
150         // buffer to the raw interface to complete the conversion.
151         IMAGE temp_image;
152         temp_image.FromPix(pix_);
153         OtsuThresholdRectToIMAGE(temp_image.get_buffer(),
154                                  image_bytespp_,
155                                  COMPUTE_IMAGE_XDIM(temp_image.get_xsize(),
156                                                     temp_image.get_bpp()),
157                                  image);
158       }
159     }
160     return;
161   }
162 #endif
163   if (image_bytespp_ > 0) {
164     // Threshold grey or color.
165     OtsuThresholdRectToIMAGE(image_data_, image_bytespp_, image_bytespl_,
166                              image);
167   } else {
168     CopyBinaryRectRawToIMAGE(image);
169   }
170 }
171 
172 #ifdef HAVE_LIBLEPT
173 // NOTE: Opposite to SetImage for raw images, SetImage for Pix clones its
174 // input, so the source pix may be pixDestroyed immediately after.
SetImage(const Pix * pix)175 void ImageThresholder::SetImage(const Pix* pix) {
176   image_data_ = NULL;
177   if (pix_ != NULL)
178     pixDestroy(&pix_);
179   Pix* src = const_cast<Pix*>(pix);
180   int depth;
181   pixGetDimensions(src, &image_width_, &image_height_, &depth);
182   // Convert the image as necessary so it is one of binary, plain RGB, or
183   // 8 bit with no colormap.
184   if (depth > 1 && depth < 8) {
185     pix_ = pixConvertTo8(src, false);
186   } else if (pixGetColormap(src)) {
187     pix_ = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
188   } else {
189     pix_ = pixClone(src);
190   }
191   depth = pixGetDepth(pix_);
192   image_bytespp_ = depth / 8;
193   image_bytespl_ = pixGetWpl(pix_) * sizeof(l_uint32);
194   Init();
195 }
196 
197 // Threshold the source image as efficiently as possible to the output Pix.
198 // Creates a Pix and sets pix to point to the resulting pointer.
199 // Caller must use pixDestroy to free the created Pix.
ThresholdToPix(Pix ** pix)200 void ImageThresholder::ThresholdToPix(Pix** pix) {
201   if (pix_ != NULL) {
202     if (image_bytespp_ == 0) {
203       // We have a binary image, so it just has to be cloned.
204       *pix = GetPixRect();
205     } else {
206       if (image_bytespp_ == 4) {
207         // Color data can just be passed direct.
208         const uinT32* data = pixGetData(pix_);
209         OtsuThresholdRectToPix(reinterpret_cast<const uinT8*>(data),
210                                image_bytespp_, image_bytespl_, pix);
211       } else {
212         // Convert 8-bit to IMAGE and then pass its
213         // buffer to the raw interface to complete the conversion.
214         IMAGE temp_image;
215         temp_image.FromPix(pix_);
216         OtsuThresholdRectToPix(temp_image.get_buffer(),
217                                image_bytespp_,
218                                COMPUTE_IMAGE_XDIM(temp_image.get_xsize(),
219                                                   temp_image.get_bpp()),
220                                pix);
221       }
222     }
223     return;
224   }
225   if (image_bytespp_ > 0) {
226     // Threshold grey or color.
227     OtsuThresholdRectToPix(image_data_, image_bytespp_, image_bytespl_, pix);
228   } else {
229     RawRectToPix(pix);
230   }
231 }
232 
233 // Get a clone/copy of the source image rectangle.
234 // The returned Pix must be pixDestroyed.
235 // This function will be used in the future by the page layout analysis, and
236 // the layout analysis that uses it will only be available with Leptonica,
237 // so there is no raw equivalent.
GetPixRect()238 Pix* ImageThresholder::GetPixRect() {
239   if (pix_ != NULL) {
240     if (IsFullImage()) {
241       // Just clone the whole thing.
242       return pixClone(pix_);
243     } else {
244       // Crop to the given rectangle.
245       Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
246       Pix* cropped = pixClipRectangle(pix_, box, NULL);
247       boxDestroy(&box);
248       return cropped;
249     }
250   }
251   // The input is raw, so we have to make a copy of it.
252   Pix* raw_pix;
253   RawRectToPix(&raw_pix);
254   return raw_pix;
255 }
256 #endif
257 
258 // Common initialization shared between SetImage methods.
Init()259 void ImageThresholder::Init() {
260   SetRectangle(0, 0, image_width_, image_height_);
261 }
262 
263 // Otsu threshold the rectangle, taking everything except the image buffer
264 // pointer from the class, to the output IMAGE.
OtsuThresholdRectToIMAGE(const unsigned char * imagedata,int bytes_per_pixel,int bytes_per_line,IMAGE * image) const265 void ImageThresholder::OtsuThresholdRectToIMAGE(const unsigned char* imagedata,
266                                                 int bytes_per_pixel,
267                                                 int bytes_per_line,
268                                                 IMAGE* image) const {
269   int* thresholds;
270   int* hi_values;
271   OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line,
272                 rect_left_, rect_top_, rect_width_, rect_height_,
273                 &thresholds, &hi_values);
274 
275   // Threshold the image to the given IMAGE.
276   ThresholdRectToIMAGE(imagedata, bytes_per_pixel, bytes_per_line,
277                        thresholds, hi_values, image);
278   delete [] thresholds;
279   delete [] hi_values;
280 }
281 
282 // Threshold the given grey or color image into the tesseract global
283 // image ready for recognition. Requires thresholds and hi_value
284 // produced by OtsuThreshold in otsuthr.cpp.
ThresholdRectToIMAGE(const unsigned char * imagedata,int bytes_per_pixel,int bytes_per_line,const int * thresholds,const int * hi_values,IMAGE * image) const285 void ImageThresholder::ThresholdRectToIMAGE(const unsigned char* imagedata,
286                                             int bytes_per_pixel,
287                                             int bytes_per_line,
288                                             const int* thresholds,
289                                             const int* hi_values,
290                                             IMAGE* image) const {
291   IMAGELINE line;
292   image->create(rect_width_, rect_height_, 1);
293   line.init(rect_width_);
294   // For each line in the image, fill the IMAGELINE class and put it into the
295   // output IMAGE. Note that Tesseract stores images with the
296   // bottom at y=0 and 0 is black, so we need 2 kinds of inversion.
297   const unsigned char* data = imagedata + rect_top_* bytes_per_line +
298                               rect_left_ * bytes_per_pixel;
299   for (int y = rect_height_ - 1 ; y >= 0; --y) {
300     const unsigned char* pix = data;
301     for (int x = 0; x < rect_width_; ++x, pix += bytes_per_pixel) {
302       line.pixels[x] = 1;
303       for (int ch = 0; ch < bytes_per_pixel; ++ch) {
304         if (hi_values[ch] >= 0 &&
305             (pix[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
306           line.pixels[x] = 0;
307           break;
308         }
309       }
310     }
311     image->put_line(0, y, rect_width_, &line, 0);
312     data += bytes_per_line;
313   }
314 }
315 
316 // Cut out the requested rectangle of the binary image to the output IMAGE.
CopyBinaryRectRawToIMAGE(IMAGE * image) const317 void ImageThresholder::CopyBinaryRectRawToIMAGE(IMAGE* image) const {
318   IMAGE rect_image;
319   rect_image.capture(const_cast<unsigned char*>(image_data_),
320                      image_width_, rect_top_ + rect_height_, 1);
321   image->create(rect_width_, rect_height_, 1);
322   // copy_sub_image uses coords starting at the bottom, so the y coord of the
323   // copy is the bottom of the rect_image.
324   copy_sub_image(&rect_image, rect_left_, 0, rect_width_, rect_height_,
325                  image, 0, 0, false);
326 }
327 
328 #ifdef HAVE_LIBLEPT
329 // Otsu threshold the rectangle, taking everything except the image buffer
330 // pointer from the class, to the output Pix.
OtsuThresholdRectToPix(const unsigned char * imagedata,int bytes_per_pixel,int bytes_per_line,Pix ** pix) const331 void ImageThresholder::OtsuThresholdRectToPix(const unsigned char* imagedata,
332                                               int bytes_per_pixel,
333                                               int bytes_per_line,
334                                               Pix** pix) const {
335   int* thresholds;
336   int* hi_values;
337   OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line,
338                 rect_left_, rect_top_, rect_width_, rect_height_,
339                 &thresholds, &hi_values);
340 
341   // Threshold the image to the given IMAGE.
342   ThresholdRectToPix(imagedata, bytes_per_pixel, bytes_per_line,
343                      thresholds, hi_values, pix);
344   delete [] thresholds;
345   delete [] hi_values;
346 }
347 
348 // Threshold the rectangle, taking everything except the image buffer pointer
349 // from the class, using thresholds/hi_values to the output IMAGE.
ThresholdRectToPix(const unsigned char * imagedata,int bytes_per_pixel,int bytes_per_line,const int * thresholds,const int * hi_values,Pix ** pix) const350 void ImageThresholder::ThresholdRectToPix(const unsigned char* imagedata,
351                                           int bytes_per_pixel,
352                                           int bytes_per_line,
353                                           const int* thresholds,
354                                           const int* hi_values,
355                                           Pix** pix) const {
356   *pix = pixCreate(rect_width_, rect_height_, 1);
357   uinT32* pixdata = pixGetData(*pix);
358   int wpl = pixGetWpl(*pix);
359   const unsigned char* srcdata = imagedata + rect_top_* bytes_per_line +
360                                  rect_left_ * bytes_per_pixel;
361   for (int y = 0; y < rect_height_; ++y) {
362     const uinT8* linedata = srcdata;
363     uinT32* pixline = pixdata + y * wpl;
364     for (int x = 0; x < rect_width_; ++x, linedata += bytes_per_pixel) {
365       bool white_result = true;
366       for (int ch = 0; ch < bytes_per_pixel; ++ch) {
367         if (hi_values[ch] >= 0 &&
368             (linedata[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
369           white_result = false;
370           break;
371         }
372       }
373       if (white_result)
374         CLEAR_DATA_BIT(pixline, x);
375       else
376         SET_DATA_BIT(pixline, x);
377     }
378     srcdata += bytes_per_line;
379   }
380 }
381 
382 // Copy the raw image rectangle, taking all data from the class, to the Pix.
RawRectToPix(Pix ** pix) const383 void ImageThresholder::RawRectToPix(Pix** pix) const {
384   if (image_bytespp_ < 4) {
385     // Go via a tesseract image structure (doesn't copy the data)
386     // and use ToPix.
387     IMAGE image;
388     int bits_per_pixel = image_bytespp_ * 8;
389     if (image_bytespp_ == 0)
390       bits_per_pixel = 1;
391     image.capture(const_cast<uinT8*>(image_data_),
392                   image_width_, rect_top_ + rect_height_, bits_per_pixel);
393     if (IsFullImage()) {
394       *pix = image.ToPix();
395     } else {
396       IMAGE rect;
397       rect.create(rect_width_, rect_height_, bits_per_pixel);
398       // The capture chopped the image off at top+height, so copy
399       // the rectangle with y = 0 to get a rectangle of height
400       // starting at the bottom, since copy_sub_image uses bottom-up coords.
401       copy_sub_image(&image, rect_left_, 0, rect_width_, rect_height_,
402                      &rect, 0, 0, true);
403       *pix = rect.ToPix();
404     }
405   } else {
406     *pix = pixCreate(rect_width_, rect_height_, 32);
407     uinT32* data = pixGetData(*pix);
408     int wpl = pixGetWpl(*pix);
409     const uinT8* imagedata = image_data_ + rect_top_ * image_bytespl_ +
410                              rect_left_ * image_bytespp_;
411     for (int y = 0; y < rect_height_; ++y) {
412       const uinT8* linedata = imagedata;
413       uinT32* line = data + y * wpl;
414       for (int x = 0; x < rect_width_; ++x) {
415         line[x] = (linedata[0] << 24) | (linedata[1] << 16) |
416                   (linedata[2] << 8) | linedata[3];
417         linedata += 4;
418       }
419       imagedata += image_bytespl_;
420     }
421   }
422 }
423 
424 // Cut out the requested rectangle of the binary image to the output IMAGE.
CopyBinaryRectPixToIMAGE(IMAGE * image) const425 void ImageThresholder::CopyBinaryRectPixToIMAGE(IMAGE* image) const {
426   if (IsFullImage()) {
427     // Just poke it directly into the tess image.
428     image->FromPix(pix_);
429   } else {
430     // Crop to the given rectangle.
431     Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
432     Pix* cropped = pixClipRectangle(pix_, box, NULL);
433     image->FromPix(cropped);
434     pixDestroy(&cropped);
435     boxDestroy(&box);
436   }
437 }
438 #endif
439 
440 }  // namespace tesseract.
441 
442