• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************************
2  * File:        pageres.h  (Formerly page_res.h)
3  * Description: Results classes used by control.c
4  * Author:		Phil Cheatle
5  * Created:     Tue Sep 22 08:42:49 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 #ifndef           PAGERES_H
20 #define           PAGERES_H
21 
22 #include          "elst.h"
23 #include          "ocrblock.h"
24 #include          "ocrrow.h"
25 #include          "werd.h"
26 #include          "ratngs.h"
27 #include          "rejctmap.h"
28 #include          "notdll.h"
29 #include          "notdll.h"
30 
31 /* Forward declarations */
32 
33 class BLOCK_RES;
34 
35 ELISTIZEH (BLOCK_RES) CLISTIZEH (BLOCK_RES)
36 class
37 ROW_RES;
38 
ELISTIZEH(ROW_RES)39 ELISTIZEH (ROW_RES)
40 class WERD_RES;
41 
42 ELISTIZEH (WERD_RES)
43 /*************************************************************************
44  * PAGE_RES - Page results
45  *************************************************************************/
46 class PAGE_RES                   //page result
47 {
48   public:
49     inT32 char_count;
50     inT32 rej_count;
51     BLOCK_RES_LIST block_res_list;
52     BOOL8 rejected;
53 
54     PAGE_RES() {
55     }                            //empty constructor
56 
57     PAGE_RES(                          //simple constructor
58              BLOCK_LIST *block_list);  //real blocks
59 
60     ~PAGE_RES () {               //destructor
61     }
62 };
63 
64 /*************************************************************************
65  * BLOCK_RES - Block results
66  *************************************************************************/
67 
68 class BLOCK_RES:public ELIST_LINK
69                                  //page block result
70 {
71   public:
72     BLOCK * block;               //real block
73     inT32 char_count;            //chars in block
74     inT32 rej_count;             //rejected chars
75     inT16 font_class;            //
76     inT16 row_count;
77     float x_height;
78     BOOL8 font_assigned;         // block already
79     //      processed
80     BOOL8 bold;                  // all bold
81     BOOL8 italic;                // all italic
82 
83     ROW_RES_LIST row_res_list;
84 
BLOCK_RES()85     BLOCK_RES() {
86     }                            //empty constructor
87 
88     BLOCK_RES(                    //simple constructor
89               BLOCK *the_block);  //real block
90 
~BLOCK_RES()91     ~BLOCK_RES () {              //destructor
92     }
93 };
94 
95 /*************************************************************************
96  * ROW_RES - Row results
97  *************************************************************************/
98 
99 class ROW_RES:public ELIST_LINK  //row result
100 {
101   public:
102     ROW * row;                   //real row
103     inT32 char_count;            //chars in block
104     inT32 rej_count;             //rejected chars
105     inT32 whole_word_rej_count;  //rejs in total rej wds
106     WERD_RES_LIST word_res_list;
107     float font_class_score;
108     inT16 font_class;            //
109     inT32 italic;
110     inT32 bold;
111     inT8 font1;                  //primary font
112     inT8 font1_count;            //no of voters
113     inT8 font2;                  //secondary font
114     inT8 font2_count;            //no of voters
115 
ROW_RES()116     ROW_RES() {
117     }                            //empty constructor
118 
119     ROW_RES(                //simple constructor
120             ROW *the_row);  //real row
121 
~ROW_RES()122     ~ROW_RES () {                //destructor
123     }
124 };
125 
126 /*************************************************************************
127  * WERD_RES - Word results
128  *************************************************************************/
129 enum CRUNCH_MODE
130 {
131   CR_NONE,
132   CR_KEEP_SPACE,
133   CR_LOOSE_SPACE,
134   CR_DELETE
135 };
136 
137 class WERD_RES:public ELIST_LINK //word result
138 {
139   public:
140     WERD * word;                 //non-bln real word
141     WERD *outword;               //bln best choice
142     //segmentation
143     DENORM denorm;               //for use on outword
144     WERD_CHOICE *best_choice;    //tess output
145     WERD_CHOICE *raw_choice;     //top choice permuter
146     WERD_CHOICE *ep_choice;      //ep text
147     REJMAP reject_map;           //best_choice rejects
148     BOOL8 tess_failed;
149     /*
150       If tess_failed is TRUE, one of the following tests failed when Tess
151       returned:
152       - The outword blob list was not the same length as the best_choice string;
153       - The best_choice string contained ALL blanks;
154       - The best_choice string was zero length
155     */
156     BOOL8 tess_accepted;         //Tess thinks its ok?
157     BOOL8 tess_would_adapt;      //Tess would adapt?
158     BOOL8 done;                  //ready for output?
159     inT8 italic;
160     inT8 bold;
161     inT8 font1;                  //primary font
162     inT8 font1_count;            //no of voters
163     inT8 font2;                  //secondary font
164     inT8 font2_count;            //no of voters
165     CRUNCH_MODE unlv_crunch_mode;
166     float x_height;              //Post match estimate
167     float caps_height;           //Post match estimate
168     BOOL8 guessed_x_ht;
169     BOOL8 guessed_caps_ht;
170     /*
171       To deal with fuzzy spaces we need to be able to combine "words" to form
172       combinations when we suspect that the gap is a non-space. The (new) text
173       ord code generates separate words for EVERY fuzzy gap - flags in the word
174       indicate whether the gap is below the threshold (fuzzy kern) and is thus
175       NOT a real word break by default, or above the threshold (fuzzy space) and
176       this is a real word break by default.
177 
178       The WERD_RES list contains all these words PLUS "combination" words built
179       out of (copies of) the words split by fuzzy kerns. The separate parts have
180       their "part_of_combo" flag set true and should be IGNORED on a default
181       reading of the list.
182 
183       Combination words are FOLLOWED by the sequence of part_of_combo words
184       which they combine.
185     */
186     BOOL8 combination;           //of two fuzzy gap wds
187     BOOL8 part_of_combo;         //part of a combo
188     BOOL8 reject_spaces;         //Reject spacing?
189 
WERD_RES()190     WERD_RES() {
191     }                            //empty constructor
192 
WERD_RES(WERD * the_word)193     WERD_RES(                   //simple constructor
194              WERD *the_word) {  //real word
195       word = the_word;
196       outword = NULL;
197       best_choice = NULL;
198       raw_choice = NULL;
199       ep_choice = NULL;
200       tess_failed = FALSE;
201       tess_accepted = FALSE;
202       tess_would_adapt = FALSE;
203       done = FALSE;
204       unlv_crunch_mode = CR_NONE;
205       italic = FALSE;
206       bold = FALSE;
207       font1 = -1;
208       font1_count = 0;
209       font2 = -1;
210       font2_count = 0;
211       x_height = 0.0;
212       caps_height = 0.0;
213       guessed_x_ht = TRUE;
214       guessed_caps_ht = TRUE;
215       combination = FALSE;
216       part_of_combo = FALSE;
217       reject_spaces = FALSE;
218     }
WERD_RES(const WERD_RES & source)219     WERD_RES(const WERD_RES &source) {
220       *this = source;            //see operator=
221     }
222 
223     ~WERD_RES ();                //destructor
224 
225     WERD_RES& operator=(const WERD_RES& source);  //from this
226 
deep_copy(const WERD_RES * src)227     static WERD_RES* deep_copy(const WERD_RES* src) {
228       return new WERD_RES(*src);
229     }
230 
copy_on(WERD_RES * word_res)231     void copy_on(                       //copy blobs onto word
232                  WERD_RES *word_res) {  //from this word
233       word->set_flag (W_EOL, word_res->word->flag (W_EOL));
234       word->copy_on (word_res->word);
235     }
236 };
237 
238 /*************************************************************************
239  * PAGE_RES_IT - Page results iterator
240  *************************************************************************/
241 
242 class PAGE_RES_IT
243 {
244   public:
245     PAGE_RES * page_res;         //page being iterated
246 
PAGE_RES_IT()247     PAGE_RES_IT() {
248     }                            //empty contructor
249 
PAGE_RES_IT(PAGE_RES * the_page_res)250     PAGE_RES_IT(                           //empty contructor
251                 PAGE_RES *the_page_res) {  //page result
252       page_res = the_page_res;
253       restart_page();  //ready to scan
254     }
255 
256     WERD_RES *restart_page();  //get ready
257 
258     WERD_RES *internal_forward(  //get next word
259                                BOOL8 new_block);
260 
forward()261     WERD_RES *forward() {  //get next word
262       return internal_forward (FALSE);
263     }
264 
265     WERD_RES *forward_block();  //get first word in
266     //next non-empty block
prev_word()267     WERD_RES *prev_word() {  //previous word
268       return prev_word_res;
269     }
prev_row()270     ROW_RES *prev_row() {  //row of prev word
271       return prev_row_res;
272     }
prev_block()273     BLOCK_RES *prev_block() {  //block of prev word
274       return prev_block_res;
275     }
word()276     WERD_RES *word() {  //current word
277       return word_res;
278     }
row()279     ROW_RES *row() {  //row of current word
280       return row_res;
281     }
block()282     BLOCK_RES *block() {  //block of cur. word
283       return block_res;
284     }
next_word()285     WERD_RES *next_word() {  //next word
286       return next_word_res;
287     }
next_row()288     ROW_RES *next_row() {  //row of next word
289       return next_row_res;
290     }
next_block()291     BLOCK_RES *next_block() {  //block of next word
292       return next_block_res;
293     }
294     void rej_stat_word();  //for page/block/row
295 
296   private:
297     WERD_RES * prev_word_res;    //previous word
298     ROW_RES *prev_row_res;       //row of prev word
299     BLOCK_RES *prev_block_res;   //block of prev word
300 
301     WERD_RES *word_res;          //current word
302     ROW_RES *row_res;            //row of current word
303     BLOCK_RES *block_res;        //block of cur. word
304 
305     WERD_RES *next_word_res;     //next word
306     ROW_RES *next_row_res;       //row of next word
307     BLOCK_RES *next_block_res;   //block of next word
308 
309     BLOCK_RES_IT block_res_it;   //iterators
310     ROW_RES_IT row_res_it;
311     WERD_RES_IT word_res_it;
312 };
313 #endif
314