1 /////////////////////////////////////////////////////////////////////// 2 // File: thresholder.h 3 // Description: Base API for thresolding images in tesseract. 4 // Author: Ray Smith 5 // Created: Mon May 12 11:00:15 PDT 2008 6 // 7 // (C) Copyright 2008, Google Inc. 8 // Licensed under the Apache License, Version 2.0 (the "License"); 9 // you may not use this file except in compliance with the License. 10 // You may obtain a copy of the License at 11 // http://www.apache.org/licenses/LICENSE-2.0 12 // Unless required by applicable law or agreed to in writing, software 13 // distributed under the License is distributed on an "AS IS" BASIS, 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 // See the License for the specific language governing permissions and 16 // limitations under the License. 17 // 18 /////////////////////////////////////////////////////////////////////// 19 20 #ifndef TESSERACT_CCMAIN_THRESHOLDER_H__ 21 #define TESSERACT_CCMAIN_THRESHOLDER_H__ 22 23 class IMAGE; 24 struct Pix; 25 26 namespace tesseract { 27 28 // Base class for all tesseract image thresholding classes. 29 // Specific classes can add new thresholding methods by 30 // overriding ThresholdToIMAGE and/or ThresholdToPix. 31 // Each instance deals with a single image, but the design is intended to 32 // be useful for multiple calls to SetRectangle and ThresholdTo* if 33 // desired. 34 class ImageThresholder { 35 public: 36 ImageThresholder(); 37 virtual ~ImageThresholder(); 38 39 // Destroy the Pix if there is one, freeing memory. 40 virtual void Clear(); 41 42 // Return true if no image has been set. 43 bool IsEmpty() const; 44 45 // SetImage makes a copy of only the metadata, not the underlying 46 // image buffer. It promises to treat the source as read-only in either case, 47 // but in return assumes that the Pix or image buffer remain valid 48 // throughout the life of the ImageThresholder. 49 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given. 50 // Palette color images will not work properly and must be converted to 51 // 24 bit. 52 // Binary images of 1 bit per pixel may also be given but they must be 53 // byte packed with the MSB of the first byte being the first pixel, and a 54 // one pixel is WHITE. For binary images set bytes_per_pixel=0. 55 void SetImage(const unsigned char* imagedata, int width, int height, 56 int bytes_per_pixel, int bytes_per_line); 57 58 // Store the coordinates of the rectangle to process for later use. 59 // Doesn't actually do any thresholding. 60 void SetRectangle(int left, int top, int width, int height); 61 62 // Get enough parameters to be able to rebuild bounding boxes in the 63 // original image (not just within the rectangle). 64 // Left and top are enough with top-down coordinates, but 65 // the height of the rectangle and the image are needed for bottom-up. 66 virtual void GetImageSizes(int* left, int* top, int* width, int* height, 67 int* imagewidth, int* imageheight); 68 69 // Return true if HAVE_LIBLEPT and this thresholder implements the Pix 70 // interface. 71 virtual bool HasThresholdToPix() const; 72 73 // Return true if the source image is color. IsColor()74 bool IsColor() const { 75 return image_bytespp_ >= 3; 76 } 77 78 // Threshold the source image as efficiently as possible to the output 79 // tesseract IMAGE class. 80 virtual void ThresholdToIMAGE(IMAGE* image); 81 82 #ifdef HAVE_LIBLEPT 83 // Pix vs raw, which to use? 84 // Implementations should provide the ability to source and target Pix 85 // where possible. A future version of Tesseract may choose to use Pix 86 // as its internal representation and discard IMAGE altogether. 87 // Because of that, an implementation that sources and targets Pix may end up 88 // with less copies than an implementation that does not. 89 // NOTE: Opposite to SetImage for raw images, SetImage for Pix clones its 90 // input, so the source pix may be pixDestroyed immediately after. 91 void SetImage(const Pix* pix); 92 93 // Threshold the source image as efficiently as possible to the output Pix. 94 // Creates a Pix and sets pix to point to the resulting pointer. 95 // Caller must use pixDestroy to free the created Pix. 96 virtual void ThresholdToPix(Pix** pix); 97 98 // Get a clone/copy of the source image rectangle. 99 // The returned Pix must be pixDestroyed. 100 // This function will be used in the future by the page layout analysis, and 101 // the layout analysis that uses it will only be available with Leptonica, 102 // so there is no raw equivalent. 103 Pix* GetPixRect(); 104 #endif 105 106 protected: 107 // ---------------------------------------------------------------------- 108 // Utility functions that may be useful components for other thresholders. 109 110 // Common initialization shared between SetImage methods. 111 virtual void Init(); 112 113 // Return true if we are processing the full image. IsFullImage()114 bool IsFullImage() const { 115 return rect_left_ == 0 && rect_top_ == 0 && 116 rect_width_ == image_width_ && rect_height_ == image_height_; 117 } 118 119 // Otsu threshold the rectangle, taking everything except the image buffer 120 // pointer from the class, to the output IMAGE. 121 void OtsuThresholdRectToIMAGE(const unsigned char* imagedata, 122 int bytes_per_pixel, int bytes_per_line, 123 IMAGE* image) const; 124 125 // Threshold the rectangle, taking everything except the image buffer pointer 126 // from the class, using thresholds/hi_values to the output IMAGE. 127 void ThresholdRectToIMAGE(const unsigned char* imagedata, 128 int bytes_per_pixel, int bytes_per_line, 129 const int* thresholds, const int* hi_values, 130 IMAGE* image) const; 131 132 // Cut out the requested rectangle of the source raw binary image to the 133 // output IMAGE. 134 void CopyBinaryRectRawToIMAGE(IMAGE* image) const; 135 136 #ifdef HAVE_LIBLEPT 137 // Otsu threshold the rectangle, taking everything except the image buffer 138 // pointer from the class, to the output Pix. 139 void OtsuThresholdRectToPix(const unsigned char* imagedata, 140 int bytes_per_pixel, int bytes_per_line, 141 Pix** pix) const; 142 143 // Threshold the rectangle, taking everything except the image buffer pointer 144 // from the class, using thresholds/hi_values to the output IMAGE. 145 void ThresholdRectToPix(const unsigned char* imagedata, 146 int bytes_per_pixel, int bytes_per_line, 147 const int* thresholds, const int* hi_values, 148 Pix** pix) const; 149 150 // Copy the raw image rectangle, taking all data from the class, to the Pix. 151 void RawRectToPix(Pix** pix) const; 152 153 // Cut out the requested rectangle of the binary image to the output IMAGE. 154 void CopyBinaryRectPixToIMAGE(IMAGE* image) const; 155 #endif 156 157 protected: 158 #ifdef HAVE_LIBLEPT 159 // Clone or other copy of the source Pix. 160 // The pix will always be PixDestroy()ed on destruction of the class. 161 Pix* pix_; 162 #endif 163 // Exactly one of pix_ and image_data_ is not NULL. 164 const unsigned char* image_data_; // Raw source image. 165 166 int image_width_; // Width of source image/pix. 167 int image_height_; // Height of source image/pix. 168 int image_bytespp_; // Bytes per pixel of source image/pix. 169 int image_bytespl_; // Bytes per line of source image/pix. 170 // Limits of image rectangle to be processed. 171 int rect_left_; 172 int rect_top_; 173 int rect_width_; 174 int rect_height_; 175 }; 176 177 } // namespace tesseract. 178 179 #endif // TESSERACT_CCMAIN_THRESHOLDER_H__ 180 181