• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************************
2  * File:        pagesegmain.cpp
3  * Description: Top-level page segmenter for Tesseract.
4  * Author:      Ray Smith
5  * Created:     Thu Sep 25 17:12:01 PDT 2008
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifdef WIN32
21 #include <windows.h>
22 #else
23 #include <unistd.h>
24 #endif
25 
26 // Include automatically generated configuration file if running autoconf.
27 #ifdef HAVE_CONFIG_H
28 #include "config_auto.h"
29 #endif
30 
31 #ifdef HAVE_LIBLEPT
32 // Include leptonica library only if autoconf (or makefile etc) tell us to.
33 #include "allheaders.h"
34 #endif
35 
36 #include "tesseractclass.h"
37 #include "img.h"
38 #include "blobbox.h"
39 #include "linefind.h"
40 #include "imagefind.h"
41 #include "colfind.h"
42 #include "tabvector.h"
43 #include "blread.h"
44 #include "wordseg.h"
45 #include "makerow.h"
46 #include "baseapi.h"
47 #include "tordmain.h"
48 #include "tessvars.h"
49 
50 namespace tesseract {
51 
52 // Minimum believable resolution.
53 const int kMinCredibleResolution = 70;
54 // Default resolution used if input in not believable.
55 const int kDefaultResolution = 300;
56 
57 // Segment the page according to the current value of tessedit_pageseg_mode.
58 // If the pix_binary_ member is not NULL, it is used as the source image,
59 // and copied to image, otherwise it just uses image as the input.
60 // On return the blocks list owns all the constructed page layout.
SegmentPage(const STRING * input_file,IMAGE * image,BLOCK_LIST * blocks)61 int Tesseract::SegmentPage(const STRING* input_file,
62                            IMAGE* image, BLOCK_LIST* blocks) {
63   int width = image->get_xsize();
64   int height = image->get_ysize();
65   int resolution = image->get_res();
66 #ifdef HAVE_LIBLEPT
67   if (pix_binary_ != NULL) {
68     width = pixGetWidth(pix_binary_);
69     height = pixGetHeight(pix_binary_);
70     resolution = pixGetXRes(pix_binary_);
71   }
72 #endif
73   // Zero resolution messes up the algorithms, so make sure it is credible.
74   if (resolution < kMinCredibleResolution)
75     resolution = kDefaultResolution;
76   // Get page segmentation mode.
77   PageSegMode pageseg_mode = static_cast<PageSegMode>(
78       static_cast<int>(tessedit_pageseg_mode));
79   // If a UNLV zone file can be found, use that instead of segmentation.
80   if (pageseg_mode != tesseract::PSM_AUTO &&
81       input_file != NULL && input_file->length() > 0) {
82     STRING name = *input_file;
83     const char* lastdot = strrchr(name.string(), '.');
84     if (lastdot != NULL)
85       name[lastdot - name.string()] = '\0';
86     read_unlv_file(name, width, height, blocks);
87   }
88   bool single_column = pageseg_mode > PSM_AUTO;
89   if (blocks->empty()) {
90     // No UNLV file present. Work according to the PageSegMode.
91     // First make a single block covering the whole image.
92     BLOCK_IT block_it(blocks);
93     BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
94     block_it.add_to_end(block);
95   } else {
96     // UNLV file present. Use PSM_SINGLE_COLUMN.
97     pageseg_mode = PSM_SINGLE_COLUMN;
98   }
99 
100   TO_BLOCK_LIST land_blocks, port_blocks;
101   TBOX page_box;
102   if (pageseg_mode <= PSM_SINGLE_COLUMN) {
103     if (AutoPageSeg(width, height, resolution, single_column,
104                     image, blocks, &port_blocks) < 0) {
105       return -1;
106     }
107     // To create blobs from the image region bounds uncomment this line:
108     //  port_blocks.clear();  // Uncomment to go back to the old mode.
109   } else {
110 #if HAVE_LIBLEPT
111     image->FromPix(pix_binary_);
112 #endif
113     deskew_ = FCOORD(1.0f, 0.0f);
114     reskew_ = FCOORD(1.0f, 0.0f);
115   }
116   if (blocks->empty()) {
117     tprintf("Empty page\n");
118     return 0;  // AutoPageSeg found an empty page.
119   }
120 
121   if (port_blocks.empty()) {
122     // AutoPageSeg was not used, so we need to find_components first.
123   find_components(blocks, &land_blocks, &port_blocks, &page_box);
124   } else {
125     // AutoPageSeg does not need to find_components as it did that already.
126     page_box.set_left(0);
127     page_box.set_bottom(0);
128     page_box.set_right(width);
129     page_box.set_top(height);
130     // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
131     filter_blobs(page_box.topright(), &port_blocks, true);
132   }
133 
134   TO_BLOCK_IT to_block_it(&port_blocks);
135   ASSERT_HOST(!port_blocks.empty());
136   TO_BLOCK* to_block = to_block_it.data();
137   if (pageseg_mode <= PSM_SINGLE_BLOCK ||
138       to_block->line_size < 2) {
139     // For now, AUTO, SINGLE_COLUMN and SINGLE_BLOCK all map to the old
140     // textord. The difference is the number of blocks and how the are made.
141     textord_page(page_box.topright(), blocks, &land_blocks, &port_blocks,
142                  this);
143   } else {
144     // SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
145     float gradient = make_single_row(page_box.topright(),
146                                      to_block, &port_blocks, this);
147     if (pageseg_mode == PSM_SINGLE_LINE) {
148       // SINGLE_LINE uses the old word maker on the single line.
149       make_words(page_box.topright(), gradient, blocks,
150                  &land_blocks, &port_blocks, this);
151     } else {
152       // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
153       // single word, and in SINGLE_CHAR mode, all the outlines
154       // go in a single blob.
155       make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
156                        to_block->get_rows(), to_block->block->row_list());
157     }
158   }
159   return 0;
160 }
161 
162 // Auto page segmentation. Divide the page image into blocks of uniform
163 // text linespacing and images.
164 // Width, height and resolution are derived from the input image.
165 // If the pix is non-NULL, then it is assumed to be the input, and it is
166 // copied to the image, otherwise the image is used directly.
167 // The output goes in the blocks list with corresponding TO_BLOCKs in the
168 // to_blocks list.
169 // If single_column is true, then no attempt is made to divide the image
170 // into columns, but multiple blocks are still made if the text is of
171 // non-uniform linespacing.
AutoPageSeg(int width,int height,int resolution,bool single_column,IMAGE * image,BLOCK_LIST * blocks,TO_BLOCK_LIST * to_blocks)172 int Tesseract::AutoPageSeg(int width, int height, int resolution,
173                            bool single_column, IMAGE* image,
174                            BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
175   int vertical_x = 0;
176   int vertical_y = 1;
177   TabVector_LIST v_lines;
178   TabVector_LIST h_lines;
179   ICOORD bleft(0, 0);
180   Boxa* boxa = NULL;
181   Pixa* pixa = NULL;
182   // The blocks made by the ColumnFinder. Moved to blocks before return.
183   BLOCK_LIST found_blocks;
184 
185 #ifdef HAVE_LIBLEPT
186   if (pix_binary_ != NULL) {
187     if (textord_debug_images) {
188       Pix* grey_pix = pixCreate(width, height, 8);
189       // Printable images are light grey on white, but for screen display
190       // they are black on dark grey so the other colors show up well.
191       if (textord_debug_printable) {
192         pixSetAll(grey_pix);
193         pixSetMasked(grey_pix, pix_binary_, 192);
194       } else {
195         pixSetAllArbitrary(grey_pix, 64);
196         pixSetMasked(grey_pix, pix_binary_, 0);
197       }
198       AlignedBlob::IncrementDebugPix();
199       pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG);
200       pixDestroy(&grey_pix);
201     }
202     if (tessedit_dump_pageseg_images)
203       pixWrite("tessinput.png", pix_binary_, IFF_PNG);
204     // Leptonica is used to find the lines and image regions in the input.
205     LineFinder::FindVerticalLines(resolution, pix_binary_,
206                                   &vertical_x, &vertical_y, &v_lines);
207     LineFinder::FindHorizontalLines(resolution, pix_binary_, &h_lines);
208     if (tessedit_dump_pageseg_images)
209       pixWrite("tessnolines.png", pix_binary_, IFF_PNG);
210     ImageFinder::FindImages(pix_binary_, &boxa, &pixa);
211     if (tessedit_dump_pageseg_images)
212       pixWrite("tessnoimages.png", pix_binary_, IFF_PNG);
213     // Copy the Pix to the IMAGE.
214     image->FromPix(pix_binary_);
215     if (single_column)
216       v_lines.clear();
217   }
218 #endif
219   TO_BLOCK_LIST land_blocks, port_blocks;
220   TBOX page_box;
221   // The rest of the algorithm uses the usual connected components.
222   find_components(blocks, &land_blocks, &port_blocks, &page_box);
223 
224   TO_BLOCK_IT to_block_it(&port_blocks);
225   ASSERT_HOST(!to_block_it.empty());
226   for (to_block_it.mark_cycle_pt(); !to_block_it.cycled_list();
227        to_block_it.forward()) {
228     TO_BLOCK* to_block = to_block_it.data();
229     TBOX blkbox = to_block->block->bounding_box();
230     if (to_block->line_size >= 2) {
231       // Note: if there are multiple blocks, then v_lines, boxa, and pixa
232       // are empty on the next iteration, but in this case, we assume
233       // that there aren't any interesting line separators or images, since
234       // it means that we have a pre-defined unlv zone file.
235       ColumnFinder finder(static_cast<int>(to_block->line_size),
236                           blkbox.botleft(), blkbox.topright(),
237                           &v_lines, &h_lines, vertical_x, vertical_y);
238       if (finder.FindBlocks(height, resolution, single_column,
239                             to_block, boxa, pixa, &found_blocks, to_blocks) < 0)
240         return -1;
241       finder.ComputeDeskewVectors(&deskew_, &reskew_);
242       boxa = NULL;
243       pixa = NULL;
244     }
245   }
246 #ifdef HAVE_LIBLEPT
247   boxaDestroy(&boxa);
248   pixaDestroy(&pixa);
249 #endif
250   blocks->clear();
251   BLOCK_IT block_it(blocks);
252   // Move the found blocks to the input/output blocks.
253   block_it.add_list_after(&found_blocks);
254 
255   if (textord_debug_images) {
256     // The debug image is no longer needed so delete it.
257     unlink(AlignedBlob::textord_debug_pix().string());
258   }
259   return 0;
260 }
261 
262 }  // namespace tesseract.
263