1 /**********************************************************************
2 * File: pageres.h (Formerly page_res.h)
3 * Description: Results classes used by control.c
4 * Author: Phil Cheatle
5 * Created: Tue Sep 22 08:42:49 BST 1992
6 *
7 * (C) Copyright 1992, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19 #ifndef PAGERES_H
20 #define PAGERES_H
21
22 #include "elst.h"
23 #include "ocrblock.h"
24 #include "ocrrow.h"
25 #include "werd.h"
26 #include "ratngs.h"
27 #include "rejctmap.h"
28 #include "notdll.h"
29 #include "notdll.h"
30
31 /* Forward declarations */
32
33 class BLOCK_RES;
34
35 ELISTIZEH (BLOCK_RES) CLISTIZEH (BLOCK_RES)
36 class
37 ROW_RES;
38
ELISTIZEH(ROW_RES)39 ELISTIZEH (ROW_RES)
40 class WERD_RES;
41
42 ELISTIZEH (WERD_RES)
43 /*************************************************************************
44 * PAGE_RES - Page results
45 *************************************************************************/
46 class PAGE_RES //page result
47 {
48 public:
49 inT32 char_count;
50 inT32 rej_count;
51 BLOCK_RES_LIST block_res_list;
52 BOOL8 rejected;
53
54 PAGE_RES() {
55 } //empty constructor
56
57 PAGE_RES( //simple constructor
58 BLOCK_LIST *block_list); //real blocks
59
60 ~PAGE_RES () { //destructor
61 }
62 };
63
64 /*************************************************************************
65 * BLOCK_RES - Block results
66 *************************************************************************/
67
68 class BLOCK_RES:public ELIST_LINK
69 //page block result
70 {
71 public:
72 BLOCK * block; //real block
73 inT32 char_count; //chars in block
74 inT32 rej_count; //rejected chars
75 inT16 font_class; //
76 inT16 row_count;
77 float x_height;
78 BOOL8 font_assigned; // block already
79 // processed
80 BOOL8 bold; // all bold
81 BOOL8 italic; // all italic
82
83 ROW_RES_LIST row_res_list;
84
BLOCK_RES()85 BLOCK_RES() {
86 } //empty constructor
87
88 BLOCK_RES( //simple constructor
89 BLOCK *the_block); //real block
90
~BLOCK_RES()91 ~BLOCK_RES () { //destructor
92 }
93 };
94
95 /*************************************************************************
96 * ROW_RES - Row results
97 *************************************************************************/
98
99 class ROW_RES:public ELIST_LINK //row result
100 {
101 public:
102 ROW * row; //real row
103 inT32 char_count; //chars in block
104 inT32 rej_count; //rejected chars
105 inT32 whole_word_rej_count; //rejs in total rej wds
106 WERD_RES_LIST word_res_list;
107 float font_class_score;
108 inT16 font_class; //
109 inT32 italic;
110 inT32 bold;
111 inT8 font1; //primary font
112 inT8 font1_count; //no of voters
113 inT8 font2; //secondary font
114 inT8 font2_count; //no of voters
115
ROW_RES()116 ROW_RES() {
117 } //empty constructor
118
119 ROW_RES( //simple constructor
120 ROW *the_row); //real row
121
~ROW_RES()122 ~ROW_RES () { //destructor
123 }
124 };
125
126 /*************************************************************************
127 * WERD_RES - Word results
128 *************************************************************************/
129 enum CRUNCH_MODE
130 {
131 CR_NONE,
132 CR_KEEP_SPACE,
133 CR_LOOSE_SPACE,
134 CR_DELETE
135 };
136
137 class WERD_RES:public ELIST_LINK //word result
138 {
139 public:
140 WERD * word; //non-bln real word
141 WERD *outword; //bln best choice
142 //segmentation
143 DENORM denorm; //for use on outword
144 WERD_CHOICE *best_choice; //tess output
145 WERD_CHOICE *raw_choice; //top choice permuter
146 WERD_CHOICE *ep_choice; //ep text
147 REJMAP reject_map; //best_choice rejects
148 BOOL8 tess_failed;
149 /*
150 If tess_failed is TRUE, one of the following tests failed when Tess
151 returned:
152 - The outword blob list was not the same length as the best_choice string;
153 - The best_choice string contained ALL blanks;
154 - The best_choice string was zero length
155 */
156 BOOL8 tess_accepted; //Tess thinks its ok?
157 BOOL8 tess_would_adapt; //Tess would adapt?
158 BOOL8 done; //ready for output?
159 inT8 italic;
160 inT8 bold;
161 inT8 font1; //primary font
162 inT8 font1_count; //no of voters
163 inT8 font2; //secondary font
164 inT8 font2_count; //no of voters
165 CRUNCH_MODE unlv_crunch_mode;
166 float x_height; //Post match estimate
167 float caps_height; //Post match estimate
168 BOOL8 guessed_x_ht;
169 BOOL8 guessed_caps_ht;
170 /*
171 To deal with fuzzy spaces we need to be able to combine "words" to form
172 combinations when we suspect that the gap is a non-space. The (new) text
173 ord code generates separate words for EVERY fuzzy gap - flags in the word
174 indicate whether the gap is below the threshold (fuzzy kern) and is thus
175 NOT a real word break by default, or above the threshold (fuzzy space) and
176 this is a real word break by default.
177
178 The WERD_RES list contains all these words PLUS "combination" words built
179 out of (copies of) the words split by fuzzy kerns. The separate parts have
180 their "part_of_combo" flag set true and should be IGNORED on a default
181 reading of the list.
182
183 Combination words are FOLLOWED by the sequence of part_of_combo words
184 which they combine.
185 */
186 BOOL8 combination; //of two fuzzy gap wds
187 BOOL8 part_of_combo; //part of a combo
188 BOOL8 reject_spaces; //Reject spacing?
189
WERD_RES()190 WERD_RES() {
191 } //empty constructor
192
WERD_RES(WERD * the_word)193 WERD_RES( //simple constructor
194 WERD *the_word) { //real word
195 word = the_word;
196 outword = NULL;
197 best_choice = NULL;
198 raw_choice = NULL;
199 ep_choice = NULL;
200 tess_failed = FALSE;
201 tess_accepted = FALSE;
202 tess_would_adapt = FALSE;
203 done = FALSE;
204 unlv_crunch_mode = CR_NONE;
205 italic = FALSE;
206 bold = FALSE;
207 font1 = -1;
208 font1_count = 0;
209 font2 = -1;
210 font2_count = 0;
211 x_height = 0.0;
212 caps_height = 0.0;
213 guessed_x_ht = TRUE;
214 guessed_caps_ht = TRUE;
215 combination = FALSE;
216 part_of_combo = FALSE;
217 reject_spaces = FALSE;
218 }
WERD_RES(const WERD_RES & source)219 WERD_RES(const WERD_RES &source) {
220 *this = source; //see operator=
221 }
222
223 ~WERD_RES (); //destructor
224
225 WERD_RES& operator=(const WERD_RES& source); //from this
226
deep_copy(const WERD_RES * src)227 static WERD_RES* deep_copy(const WERD_RES* src) {
228 return new WERD_RES(*src);
229 }
230
copy_on(WERD_RES * word_res)231 void copy_on( //copy blobs onto word
232 WERD_RES *word_res) { //from this word
233 word->set_flag (W_EOL, word_res->word->flag (W_EOL));
234 word->copy_on (word_res->word);
235 }
236 };
237
238 /*************************************************************************
239 * PAGE_RES_IT - Page results iterator
240 *************************************************************************/
241
242 class PAGE_RES_IT
243 {
244 public:
245 PAGE_RES * page_res; //page being iterated
246
PAGE_RES_IT()247 PAGE_RES_IT() {
248 } //empty contructor
249
PAGE_RES_IT(PAGE_RES * the_page_res)250 PAGE_RES_IT( //empty contructor
251 PAGE_RES *the_page_res) { //page result
252 page_res = the_page_res;
253 restart_page(); //ready to scan
254 }
255
256 WERD_RES *restart_page(); //get ready
257
258 WERD_RES *internal_forward( //get next word
259 BOOL8 new_block);
260
forward()261 WERD_RES *forward() { //get next word
262 return internal_forward (FALSE);
263 }
264
265 WERD_RES *forward_block(); //get first word in
266 //next non-empty block
prev_word()267 WERD_RES *prev_word() { //previous word
268 return prev_word_res;
269 }
prev_row()270 ROW_RES *prev_row() { //row of prev word
271 return prev_row_res;
272 }
prev_block()273 BLOCK_RES *prev_block() { //block of prev word
274 return prev_block_res;
275 }
word()276 WERD_RES *word() { //current word
277 return word_res;
278 }
row()279 ROW_RES *row() { //row of current word
280 return row_res;
281 }
block()282 BLOCK_RES *block() { //block of cur. word
283 return block_res;
284 }
next_word()285 WERD_RES *next_word() { //next word
286 return next_word_res;
287 }
next_row()288 ROW_RES *next_row() { //row of next word
289 return next_row_res;
290 }
next_block()291 BLOCK_RES *next_block() { //block of next word
292 return next_block_res;
293 }
294 void rej_stat_word(); //for page/block/row
295
296 private:
297 WERD_RES * prev_word_res; //previous word
298 ROW_RES *prev_row_res; //row of prev word
299 BLOCK_RES *prev_block_res; //block of prev word
300
301 WERD_RES *word_res; //current word
302 ROW_RES *row_res; //row of current word
303 BLOCK_RES *block_res; //block of cur. word
304
305 WERD_RES *next_word_res; //next word
306 ROW_RES *next_row_res; //row of next word
307 BLOCK_RES *next_block_res; //block of next word
308
309 BLOCK_RES_IT block_res_it; //iterators
310 ROW_RES_IT row_res_it;
311 WERD_RES_IT word_res_it;
312 };
313 #endif
314