• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************************
2  * File:        makerow.h  (Formerly makerows.h)
3  * Description: Code to arrange blobs into rows of text.
4  * Author:		Ray Smith
5  * Created:		Mon Sep 21 14:34:48 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef           MAKEROW_H
21 #define           MAKEROW_H
22 
23 #include          "varable.h"
24 #include          "ocrblock.h"
25 #include          "tessclas.h"
26 #include          "blobbox.h"
27 #include          "statistc.h"
28 #include          "notdll.h"
29 #include          "tesseractclass.h"
30 
31 enum OVERLAP_STATE
32 {
33   ASSIGN,                        //assign it to row
34   REJECT,                        //reject it - dual overlap
35   NEW_ROW
36 };
37 
38 enum ROW_CATEGORY {
39   ROW_ASCENDERS_FOUND,
40   ROW_DESCENDERS_FOUND,
41   ROW_UNKNOWN,
42   ROW_INVALID,
43 };
44 
45 extern BOOL_VAR_H (textord_show_initial_rows, FALSE,
46 "Display row accumulation");
47 extern BOOL_VAR_H (textord_show_parallel_rows, FALSE,
48 "Display page correlated rows");
49 extern BOOL_VAR_H (textord_show_expanded_rows, FALSE,
50 "Display rows after expanding");
51 extern BOOL_VAR_H (textord_show_final_rows, FALSE,
52 "Display rows after final fitting");
53 extern BOOL_VAR_H (textord_show_final_blobs, FALSE,
54 "Display blob bounds after pre-ass");
55 extern BOOL_VAR_H (textord_test_landscape, FALSE, "Tests refer to land/port");
56 extern BOOL_VAR_H (textord_parallel_baselines, TRUE,
57 "Force parallel baselines");
58 extern BOOL_VAR_H (textord_straight_baselines, FALSE,
59 "Force straight baselines");
60 extern BOOL_VAR_H (textord_quadratic_baselines, FALSE,
61 "Use quadratic splines");
62 extern BOOL_VAR_H (textord_old_baselines, TRUE, "Use old baseline algorithm");
63 extern BOOL_VAR_H (textord_old_xheight, TRUE, "Use old xheight algorithm");
64 extern BOOL_VAR_H (textord_fix_xheight_bug, TRUE, "Use spline baseline");
65 extern BOOL_VAR_H (textord_fix_makerow_bug, TRUE,
66 "Prevent multiple baselines");
67 extern BOOL_VAR_H (textord_cblob_blockocc, TRUE,
68 "Use new projection for underlines");
69 extern BOOL_VAR_H (textord_debug_xheights, FALSE, "Test xheight algorithms");
70 extern INT_VAR_H (textord_test_x, 0, "coord of test pt");
71 extern INT_VAR_H (textord_test_y, 0, "coord of test pt");
72 extern INT_VAR_H (textord_min_blobs_in_row, 4,
73 "Min blobs before gradient counted");
74 extern INT_VAR_H (textord_spline_minblobs, 8,
75 "Min blobs in each spline segment");
76 extern INT_VAR_H (textord_spline_medianwin, 6,
77 "Size of window for spline segmentation");
78 extern INT_VAR_H (textord_min_xheight, 10, "Min credible pixel xheight");
79 extern double_VAR_H (textord_spline_shift_fraction, 0.02,
80 "Fraction of line spacing for quad");
81 extern double_VAR_H (textord_spline_outlier_fraction, 0.1,
82 "Fraction of line spacing for outlier");
83 extern double_VAR_H (textord_skew_ile, 0.5, "Ile of gradients for page skew");
84 extern double_VAR_H (textord_skew_lag, 0.75,
85 "Lag for skew on row accumulation");
86 extern double_VAR_H (textord_linespace_iqrlimit, 0.2,
87 "Max iqr/median for linespace");
88 extern double_VAR_H (textord_width_limit, 8,
89 "Max width of blobs to make rows");
90 extern double_VAR_H (textord_chop_width, 1.5, "Max width before chopping");
91 extern double_VAR_H (textord_merge_desc, 0.25,
92 "Fraction of linespace for desc drop");
93 extern double_VAR_H (textord_merge_x, 0.5,
94 "Fraction of linespace for x height");
95 extern double_VAR_H (textord_merge_asc, 0.25,
96 "Fraction of linespace for asc height");
97 extern double_VAR_H (textord_minxh, 0.25,
98 "fraction of linesize for min xheight");
99 extern double_VAR_H (textord_min_linesize, 1.25,
100 "* blob height for initial linesize");
101 extern double_VAR_H (textord_excess_blobsize, 1.3,
102 "New row made if blob makes row this big");
103 extern double_VAR_H (textord_occupancy_threshold, 0.4,
104 "Fraction of neighbourhood");
105 extern double_VAR_H (textord_underline_width, 2.0,
106 "Multiple of line_size for underline");
107 extern double_VAR_H(textord_min_blob_height_fraction, 0.75,
108 "Min blob height/top to include blob top into xheight stats");
109 extern double_VAR_H (textord_xheight_mode_fraction, 0.4,
110 "Min pile height to make xheight");
111 extern double_VAR_H (textord_ascheight_mode_fraction, 0.15,
112 "Min pile height to make ascheight");
113 extern double_VAR_H (textord_ascx_ratio_min, 1.2, "Min cap/xheight");
114 extern double_VAR_H (textord_ascx_ratio_max, 1.7, "Max cap/xheight");
115 extern double_VAR_H (textord_descx_ratio_min, 0.15, "Min desc/xheight");
116 extern double_VAR_H (textord_descx_ratio_max, 0.6, "Max desc/xheight");
117 extern double_VAR_H (textord_xheight_error_margin, 0.1, "Accepted variation");
118 
get_min_max_xheight(double block_linesize,int * min_height,int * max_height)119 inline void get_min_max_xheight(double block_linesize,
120                                 int *min_height, int *max_height) {
121   *min_height = static_cast<inT32>(floor(block_linesize * textord_minxh));
122   if (*min_height < textord_min_xheight) *min_height = textord_min_xheight;
123   *max_height = static_cast<inT32>(ceil(block_linesize * 3));
124 }
125 
get_row_category(const TO_ROW * row)126 inline ROW_CATEGORY get_row_category(const TO_ROW *row) {
127   if (row->xheight <= 0) return ROW_INVALID;
128   return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND :
129     (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN;
130 }
131 
within_error_margin(float test,float num,float margin)132 inline bool within_error_margin(float test, float num, float margin) {
133   return (test >= num * (1 - margin) && test <= num * (1 + margin));
134 }
135 
136 void fill_heights(TO_ROW *row, float gradient, int min_height,
137                   int max_height, STATS *heights, STATS *floating_heights);
138 
139 float make_single_row(ICOORD page_tr, TO_BLOCK* block,
140                       TO_BLOCK_LIST* blocks, tesseract::Tesseract* tess);
141 float make_rows(                             //make rows
142                 ICOORD page_tr,              //top right
143                 BLOCK_LIST *blocks,          //block list
144                 TO_BLOCK_LIST *land_blocks,  //rotated for landscape
145                 TO_BLOCK_LIST *port_blocks,  //output list
146                 tesseract::Tesseract* tess
147                );
148 void make_initial_textrows(                  //find lines
149                            ICOORD page_tr,
150                            TO_BLOCK *block,  //block to do
151                            FCOORD rotation,  //for drawing
152                            BOOL8 testing_on  //correct orientation
153                           );
154 void fit_lms_line(             //sort function
155                   TO_ROW *row  //row to fit
156                  );
157 void compute_page_skew(                        //get average gradient
158                        TO_BLOCK_LIST *blocks,  //list of blocks
159                        float &page_m,          //average gradient
160                        float &page_err         //average error
161                       );
162 void cleanup_rows(                   //find lines
163                   ICOORD page_tr,    //top right
164                   TO_BLOCK *block,   //block to do
165                   float gradient,    //gradient to fit
166                   FCOORD rotation,   //for drawing
167                   inT32 block_edge,  //edge of block
168                   BOOL8 testing_on,  //correct orientation
169                   tesseract::Tesseract* tess
170                  );
171 void delete_non_dropout_rows(                   //find lines
172                              TO_BLOCK *block,   //block to do
173                              float gradient,    //global skew
174                              FCOORD rotation,   //deskew vector
175                              inT32 block_edge,  //left edge
176                              BOOL8 testing_on   //correct orientation
177                             );
178 BOOL8 find_best_dropout_row(                    //find neighbours
179                             TO_ROW *row,        //row to test
180                             inT32 distance,     //dropout dist
181                             float dist_limit,   //threshold distance
182                             inT32 line_index,   //index of row
183                             TO_ROW_IT *row_it,  //current position
184                             BOOL8 testing_on    //correct orientation
185                            );
186 TBOX deskew_block_coords(                  //block box
187                         TO_BLOCK *block,  //block to do
188                         float gradient    //global skew
189                        );
190 void compute_line_occupation(                    //project blobs
191                              TO_BLOCK *block,    //block to do
192                              float gradient,     //global skew
193                              inT32 min_y,        //min coord in block
194                              inT32 max_y,        //in block
195                              inT32 *occupation,  //output projection
196                              inT32 *deltas       //derivative
197                             );
198 void compute_occupation_threshold(                    //project blobs
199                                   inT32 low_window,   //below result point
200                                   inT32 high_window,  //above result point
201                                   inT32 line_count,   //array sizes
202                                   inT32 *occupation,  //input projection
203                                   inT32 *thresholds   //output thresholds
204                                  );
205 void compute_dropout_distances(                    //project blobs
206                                inT32 *occupation,  //input projection
207                                inT32 *thresholds,  //output thresholds
208                                inT32 line_count    //array sizes
209                               );
210 void expand_rows(                   //find lines
211                  ICOORD page_tr,    //top right
212                  TO_BLOCK *block,   //block to do
213                  float gradient,    //gradient to fit
214                  FCOORD rotation,   //for drawing
215                  inT32 block_edge,  //edge of block
216                  BOOL8 testing_on   //correct orientation
217                 );
218 void adjust_row_limits(                 //tidy limits
219                        TO_BLOCK *block  //block to do
220                       );
221 void compute_row_stats(                  //find lines
222                        TO_BLOCK *block,  //block to do
223                        BOOL8 testing_on  //correct orientation
224                       );
225 void compute_block_xheight(                  //find lines
226                            TO_BLOCK *block,  //block to do
227                            float gradient,   //global skew
228                            tesseract::Tesseract* tess
229                           );
230 float median_block_xheight(                  //find lines
231                            TO_BLOCK *block,  //block to do
232                            float gradient    //global skew
233                           );
234 void compute_row_xheight(                   //find lines
235                           TO_ROW *row,       //row to do
236                          float gradient,    //global skew
237                          int block_height,  //a guess of block xheight
238                          tesseract::Tesseract* tess
239                          );
240 
241 int compute_xheight_from_modes(
242     STATS *heights, STATS *floating_heights, int min_height,
243     int max_height, float *xheight, float *ascrise);
244 
245 inT32 compute_row_descdrop(                //find lines
246                            TO_ROW *row,    //row to do
247                            float gradient,  // global skew
248                            int xheight_blob_count,
249                            STATS *heights
250                           );
251 inT32 compute_height_modes(                   //find lines
252                            STATS *heights,    //stats to search
253                            inT32 min_height,  //bottom of range
254                            inT32 max_height,  //top of range
255                            inT32 *modes,      //output array
256                            inT32 maxmodes     //size of modes
257                           );
258 void correct_row_xheight(                //fix bad values
259                          TO_ROW *row,    //row to fix
260                          float xheight,  //average values
261                          float ascrise,
262                          float descdrop);
263 void separate_underlines(                  //make rough chars
264                          TO_BLOCK *block,  //block to do
265                          float gradient,   //skew angle
266                          FCOORD rotation,  //inverse landscape
267                          BOOL8 testing_on  //correct orientation
268                         );
269 void pre_associate_blobs(                  //make rough chars
270                          ICOORD page_tr,   //top right
271                          TO_BLOCK *block,  //block to do
272                          FCOORD rotation,  //inverse landscape
273                          BOOL8 testing_on  //correct orientation
274                         );
275 void fit_parallel_rows(                   //find lines
276                        TO_BLOCK *block,   //block to do
277                        float gradient,    //gradient to fit
278                        FCOORD rotation,   //for drawing
279                        inT32 block_edge,  //edge of block
280                        BOOL8 testing_on   //correct orientation
281                       );
282 void fit_parallel_lms(                 //sort function
283                       float gradient,  //forced gradient
284                       TO_ROW *row      //row to fit
285                      );
286 void make_spline_rows(                   //find lines
287                       TO_BLOCK *block,   //block to do
288                       float gradient,    //gradient to fit
289                       FCOORD rotation,   //for drawing
290                       inT32 block_edge,  //edge of block
291                       BOOL8 testing_on,  //correct orientation
292                       tesseract::Tesseract* tess
293                      );
294 void make_baseline_spline(                 //sort function
295                           TO_ROW *row,     //row to fit
296                           TO_BLOCK *block  //block it came from
297                          );
298 BOOL8 segment_baseline (         //split baseline
299 TO_ROW * row,                    //row to fit
300 TO_BLOCK * block,                //block it came from
301 inT32 & segments,                //no fo segments
302 inT32 xstarts[]                  //coords of segments
303 );
304 double *linear_spline_baseline ( //split baseline
305 TO_ROW * row,                    //row to fit
306 TO_BLOCK * block,                //block it came from
307 inT32 & segments,                //no fo segments
308 inT32 xstarts[]                  //coords of segments
309 );
310 void assign_blobs_to_rows(                      //find lines
311                           TO_BLOCK *block,      //block to do
312                           float *gradient,      //block skew
313                           int pass,             //identification
314                           BOOL8 reject_misses,  //chuck big ones out
315                           BOOL8 make_new_rows,  //add rows for unmatched
316                           BOOL8 drawing_skew    //draw smoothed skew
317                          );
318                                  //find best row
319 OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it,  //iterator
320                                    TO_ROW *&best_row,  //output row
321                                    float top,          //top of blob
322                                    float bottom,       //bottom of blob
323                                    float rowsize,      //max row size
324                                    BOOL8 testing_blob  //test stuff
325                                   );
326 int blob_x_order(                    //sort function
327                  const void *item1,  //items to compare
328                  const void *item2);
329 int row_y_order(                    //sort function
330                 const void *item1,  //items to compare
331                 const void *item2);
332 int row_spacing_order(                    //sort function
333                       const void *item1,  //items to compare
334                       const void *item2);
335 
336 void mark_repeated_chars(TO_ROW *row, float block_xheight,
337                          tesseract::Tesseract *tess);
338 #endif
339