1 /********************************************************************** 2 * File: topitch.h (Formerly to_pitch.h) 3 * Description: Code to determine fixed pitchness and the pitch if fixed. 4 * Author: Ray Smith 5 * Created: Tue Aug 24 16:57:29 BST 1993 6 * 7 * (C) Copyright 1993, Hewlett-Packard Ltd. 8 ** Licensed under the Apache License, Version 2.0 (the "License"); 9 ** you may not use this file except in compliance with the License. 10 ** You may obtain a copy of the License at 11 ** http://www.apache.org/licenses/LICENSE-2.0 12 ** Unless required by applicable law or agreed to in writing, software 13 ** distributed under the License is distributed on an "AS IS" BASIS, 14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 ** See the License for the specific language governing permissions and 16 ** limitations under the License. 17 * 18 **********************************************************************/ 19 20 #ifndef TOPITCH_H 21 #define TOPITCH_H 22 23 #include "blobbox.h" 24 #include "notdll.h" 25 26 namespace tesseract { 27 class Tesseract; 28 } 29 extern BOOL_VAR_H (textord_debug_pitch_test, FALSE, 30 "Debug on fixed pitch test"); 31 extern BOOL_VAR_H (textord_debug_pitch_metric, FALSE, 32 "Write full metric stuff"); 33 extern BOOL_VAR_H (textord_show_row_cuts, FALSE, "Draw row-level cuts"); 34 extern BOOL_VAR_H (textord_show_page_cuts, FALSE, "Draw page-level cuts"); 35 extern BOOL_VAR_H (textord_pitch_cheat, FALSE, 36 "Use correct answer for fixed/prop"); 37 extern BOOL_VAR_H (textord_blockndoc_fixed, TRUE, 38 "Attempt whole doc/block fixed pitch"); 39 extern BOOL_VAR_H (textord_fast_pitch_test, FALSE, 40 "Do even faster pitch algorithm"); 41 extern double_VAR_H (textord_projection_scale, 0.125, 42 "Ding rate for mid-cuts"); 43 extern double_VAR_H (textord_balance_factor, 2.0, 44 "Ding rate for unbalanced char cells"); 45 46 void compute_fixed_pitch( //determine pitch 47 ICOORD page_tr, //top right 48 TO_BLOCK_LIST *port_blocks, //input list 49 float gradient, //page skew 50 FCOORD rotation, //for drawing 51 BOOL8 testing_on, //correct orientation 52 tesseract::Tesseract* tess 53 ); 54 void fix_row_pitch( //get some value 55 TO_ROW *bad_row, //row to fix 56 TO_BLOCK *bad_block, //block of bad_row 57 TO_BLOCK_LIST *blocks, //blocks to scan 58 inT32 row_target, //number of row 59 inT32 block_target //number of block 60 ); 61 void compute_block_pitch( //process each block 62 TO_BLOCK *block, //input list 63 FCOORD rotation, //for drawing 64 inT32 block_index, //block number 65 BOOL8 testing_on, //correct orientation 66 tesseract::Tesseract* tess 67 ); 68 BOOL8 compute_rows_pitch( //find line stats 69 TO_BLOCK *block, //block to do 70 inT32 block_index, //block number 71 BOOL8 testing_on //correct orientation 72 ); 73 BOOL8 try_doc_fixed( //determine pitch 74 ICOORD page_tr, //top right 75 TO_BLOCK_LIST *port_blocks, //input list 76 float gradient //page skew 77 ); 78 BOOL8 try_block_fixed( //find line stats 79 TO_BLOCK *block, //block to do 80 inT32 block_index //block number 81 ); 82 BOOL8 try_rows_fixed( //find line stats 83 TO_BLOCK *block, //block to do 84 inT32 block_index, //block number 85 BOOL8 testing_on //correct orientation 86 ); 87 void print_block_counts( //find line stats 88 TO_BLOCK *block, //block to do 89 inT32 block_index //block number 90 ); 91 void count_block_votes( //find line stats 92 TO_BLOCK *block, //block to do 93 inT32 &def_fixed, //add to counts 94 inT32 &def_prop, 95 inT32 &maybe_fixed, 96 inT32 &maybe_prop, 97 inT32 &corr_fixed, 98 inT32 &corr_prop, 99 inT32 &dunno); 100 BOOL8 row_pitch_stats( //find line stats 101 TO_ROW *row, //current row 102 inT32 maxwidth, //of spaces 103 BOOL8 testing_on //correct orientation 104 ); 105 BOOL8 find_row_pitch( //find lines 106 TO_ROW *row, //row to do 107 inT32 maxwidth, //max permitted space 108 inT32 dm_gap, //ignorable gaps 109 TO_BLOCK *block, //block of row 110 inT32 block_index, //block_number 111 inT32 row_index, //number of row 112 BOOL8 testing_on //correct orientation 113 ); 114 BOOL8 fixed_pitch_row( //find lines 115 TO_ROW *row, //row to do 116 inT32 block_index //block_number 117 ); 118 BOOL8 count_pitch_stats( //find lines 119 TO_ROW *row, //row to do 120 STATS *gap_stats, //blob gaps 121 STATS *pitch_stats, //centre-centre stats 122 float initial_pitch, //guess at pitch 123 float min_space, //estimate space size 124 BOOL8 ignore_outsize, //discard big objects 125 BOOL8 split_outsize, //split big objects 126 inT32 dm_gap //ignorable gaps 127 ); 128 float tune_row_pitch( //find fp cells 129 TO_ROW *row, //row to do 130 STATS *projection, //vertical projection 131 inT16 projection_left, //edge of projection 132 inT16 projection_right, //edge of projection 133 float space_size, //size of blank 134 float &initial_pitch, //guess at pitch 135 float &best_sp_sd, //space sd 136 inT16 &best_mid_cuts, //no of cheap cuts 137 ICOORDELT_LIST *best_cells, //row cells 138 BOOL8 testing_on //inidividual words 139 ); 140 float tune_row_pitch2( //find fp cells 141 TO_ROW *row, //row to do 142 STATS *projection, //vertical projection 143 inT16 projection_left, //edge of projection 144 inT16 projection_right, //edge of projection 145 float space_size, //size of blank 146 float &initial_pitch, //guess at pitch 147 float &best_sp_sd, //space sd 148 inT16 &best_mid_cuts, //no of cheap cuts 149 ICOORDELT_LIST *best_cells, //row cells 150 BOOL8 testing_on //inidividual words 151 ); 152 float compute_pitch_sd ( //find fp cells 153 TO_ROW * row, //row to do 154 STATS * projection, //vertical projection 155 inT16 projection_left, //edge 156 inT16 projection_right, //edge 157 float space_size, //size of blank 158 float initial_pitch, //guess at pitch 159 float &sp_sd, //space sd 160 inT16 & mid_cuts, //no of free cuts 161 ICOORDELT_LIST * row_cells, //list of chop pts 162 BOOL8 testing_on, //inidividual words 163 inT16 start = 0, //start of good range 164 inT16 end = 0 //end of good range 165 ); 166 float compute_pitch_sd2 ( //find fp cells 167 TO_ROW * row, //row to do 168 STATS * projection, //vertical projection 169 inT16 projection_left, //edge 170 inT16 projection_right, //edge 171 float initial_pitch, //guess at pitch 172 inT16 & occupation, //no of occupied cells 173 inT16 & mid_cuts, //no of free cuts 174 ICOORDELT_LIST * row_cells, //list of chop pts 175 BOOL8 testing_on, //inidividual words 176 inT16 start = 0, //start of good range 177 inT16 end = 0 //end of good range 178 ); 179 void print_pitch_sd( //find fp cells 180 TO_ROW *row, //row to do 181 STATS *projection, //vertical projection 182 inT16 projection_left, //edges //size of blank 183 inT16 projection_right, 184 float space_size, 185 float initial_pitch //guess at pitch 186 ); 187 int sort_floats2( //qsort function 188 const void *arg1, //ptrs to floats 189 const void *arg2); 190 void find_repeated_chars( //search for equal chars 191 TO_BLOCK *block, //block to search 192 BOOL8 testing_on, //dbug mode 193 tesseract::Tesseract* tess 194 ); 195 void plot_fp_word( //draw block of words 196 TO_BLOCK *block, //block to draw 197 float pitch, //pitch to draw with 198 float nonspace //for space threshold 199 ); 200 #endif 201