/////////////////////////////////////////////////////////////////////// // File: thresholder.h // Description: Base API for thresolding images in tesseract. // Author: Ray Smith // Created: Mon May 12 11:00:15 PDT 2008 // // (C) Copyright 2008, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // /////////////////////////////////////////////////////////////////////// #ifndef TESSERACT_CCMAIN_THRESHOLDER_H__ #define TESSERACT_CCMAIN_THRESHOLDER_H__ class IMAGE; struct Pix; namespace tesseract { // Base class for all tesseract image thresholding classes. // Specific classes can add new thresholding methods by // overriding ThresholdToIMAGE and/or ThresholdToPix. // Each instance deals with a single image, but the design is intended to // be useful for multiple calls to SetRectangle and ThresholdTo* if // desired. class ImageThresholder { public: ImageThresholder(); virtual ~ImageThresholder(); // Destroy the Pix if there is one, freeing memory. virtual void Clear(); // Return true if no image has been set. bool IsEmpty() const; // SetImage makes a copy of only the metadata, not the underlying // image buffer. It promises to treat the source as read-only in either case, // but in return assumes that the Pix or image buffer remain valid // throughout the life of the ImageThresholder. // Greyscale of 8 and color of 24 or 32 bits per pixel may be given. // Palette color images will not work properly and must be converted to // 24 bit. // Binary images of 1 bit per pixel may also be given but they must be // byte packed with the MSB of the first byte being the first pixel, and a // one pixel is WHITE. For binary images set bytes_per_pixel=0. void SetImage(const unsigned char* imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line); // Store the coordinates of the rectangle to process for later use. // Doesn't actually do any thresholding. void SetRectangle(int left, int top, int width, int height); // Get enough parameters to be able to rebuild bounding boxes in the // original image (not just within the rectangle). // Left and top are enough with top-down coordinates, but // the height of the rectangle and the image are needed for bottom-up. virtual void GetImageSizes(int* left, int* top, int* width, int* height, int* imagewidth, int* imageheight); // Return true if HAVE_LIBLEPT and this thresholder implements the Pix // interface. virtual bool HasThresholdToPix() const; // Return true if the source image is color. bool IsColor() const { return image_bytespp_ >= 3; } // Threshold the source image as efficiently as possible to the output // tesseract IMAGE class. virtual void ThresholdToIMAGE(IMAGE* image); #ifdef HAVE_LIBLEPT // Pix vs raw, which to use? // Implementations should provide the ability to source and target Pix // where possible. A future version of Tesseract may choose to use Pix // as its internal representation and discard IMAGE altogether. // Because of that, an implementation that sources and targets Pix may end up // with less copies than an implementation that does not. // NOTE: Opposite to SetImage for raw images, SetImage for Pix clones its // input, so the source pix may be pixDestroyed immediately after. void SetImage(const Pix* pix); // Threshold the source image as efficiently as possible to the output Pix. // Creates a Pix and sets pix to point to the resulting pointer. // Caller must use pixDestroy to free the created Pix. virtual void ThresholdToPix(Pix** pix); // Get a clone/copy of the source image rectangle. // The returned Pix must be pixDestroyed. // This function will be used in the future by the page layout analysis, and // the layout analysis that uses it will only be available with Leptonica, // so there is no raw equivalent. Pix* GetPixRect(); #endif protected: // ---------------------------------------------------------------------- // Utility functions that may be useful components for other thresholders. // Common initialization shared between SetImage methods. virtual void Init(); // Return true if we are processing the full image. bool IsFullImage() const { return rect_left_ == 0 && rect_top_ == 0 && rect_width_ == image_width_ && rect_height_ == image_height_; } // Otsu threshold the rectangle, taking everything except the image buffer // pointer from the class, to the output IMAGE. void OtsuThresholdRectToIMAGE(const unsigned char* imagedata, int bytes_per_pixel, int bytes_per_line, IMAGE* image) const; // Threshold the rectangle, taking everything except the image buffer pointer // from the class, using thresholds/hi_values to the output IMAGE. void ThresholdRectToIMAGE(const unsigned char* imagedata, int bytes_per_pixel, int bytes_per_line, const int* thresholds, const int* hi_values, IMAGE* image) const; // Cut out the requested rectangle of the source raw binary image to the // output IMAGE. void CopyBinaryRectRawToIMAGE(IMAGE* image) const; #ifdef HAVE_LIBLEPT // Otsu threshold the rectangle, taking everything except the image buffer // pointer from the class, to the output Pix. void OtsuThresholdRectToPix(const unsigned char* imagedata, int bytes_per_pixel, int bytes_per_line, Pix** pix) const; // Threshold the rectangle, taking everything except the image buffer pointer // from the class, using thresholds/hi_values to the output IMAGE. void ThresholdRectToPix(const unsigned char* imagedata, int bytes_per_pixel, int bytes_per_line, const int* thresholds, const int* hi_values, Pix** pix) const; // Copy the raw image rectangle, taking all data from the class, to the Pix. void RawRectToPix(Pix** pix) const; // Cut out the requested rectangle of the binary image to the output IMAGE. void CopyBinaryRectPixToIMAGE(IMAGE* image) const; #endif protected: #ifdef HAVE_LIBLEPT // Clone or other copy of the source Pix. // The pix will always be PixDestroy()ed on destruction of the class. Pix* pix_; #endif // Exactly one of pix_ and image_data_ is not NULL. const unsigned char* image_data_; // Raw source image. int image_width_; // Width of source image/pix. int image_height_; // Height of source image/pix. int image_bytespp_; // Bytes per pixel of source image/pix. int image_bytespl_; // Bytes per line of source image/pix. // Limits of image rectangle to be processed. int rect_left_; int rect_top_; int rect_width_; int rect_height_; }; } // namespace tesseract. #endif // TESSERACT_CCMAIN_THRESHOLDER_H__