• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************************
2  * File:        rejctmap.cpp  (Formerly rejmap.c)
3  * Description: REJ and REJMAP class functions.
4  * Author:		Phil Cheatle
5  * Created:		Thu Jun  9 13:46:38 BST 1994
6  *
7  * (C) Copyright 1994, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "mfcpch.h"
21 #include          "hosthplb.h"
22 //#include                                      "basefile.h"
23 #include          "rejctmap.h"
24 #include          "secname.h"
25 
26 #define EXTERN
27 
28 EXTERN BOOL_VAR (rejword_only_set_if_accepted, TRUE, "Mimic old reject_word");
29 EXTERN BOOL_VAR (rejmap_allow_more_good_qual, FALSE,
30 "Use initial good qual setting");
31 EXTERN BOOL_VAR (rej_use_1Il_rej, TRUE, "1Il rejection enabled");
32 
perm_rejected()33 BOOL8 REJ::perm_rejected() {  //Is char perm reject?
34   return (flag (R_TESS_FAILURE) ||
35     flag (R_SMALL_XHT) ||
36     flag (R_EDGE_CHAR) ||
37     flag (R_1IL_CONFLICT) ||
38     flag (R_POSTNN_1IL) ||
39     flag (R_REJ_CBLOB) ||
40     flag (R_BAD_REPETITION) || flag (R_MM_REJECT));
41 }
42 
43 
rej_before_nn_accept()44 BOOL8 REJ::rej_before_nn_accept() {
45   return flag (R_POOR_MATCH) ||
46     flag (R_NOT_TESS_ACCEPTED) ||
47     flag (R_CONTAINS_BLANKS) || flag (R_BAD_PERMUTER);
48 }
49 
50 
rej_between_nn_and_mm()51 BOOL8 REJ::rej_between_nn_and_mm() {
52   return flag (R_HYPHEN) ||
53     flag (R_DUBIOUS) ||
54     flag (R_NO_ALPHANUMS) || flag (R_MOSTLY_REJ) || flag (R_XHT_FIXUP);
55 }
56 
57 
rej_between_mm_and_quality_accept()58 BOOL8 REJ::rej_between_mm_and_quality_accept() {
59   return flag (R_BAD_QUALITY);
60 }
61 
62 
rej_between_quality_and_minimal_rej_accept()63 BOOL8 REJ::rej_between_quality_and_minimal_rej_accept() {
64   return flag (R_DOC_REJ) ||
65     flag (R_BLOCK_REJ) || flag (R_ROW_REJ) || flag (R_UNLV_REJ);
66 }
67 
68 
rej_before_mm_accept()69 BOOL8 REJ::rej_before_mm_accept() {
70   return rej_between_nn_and_mm () ||
71     (rej_before_nn_accept () &&
72     !flag (R_NN_ACCEPT) && !flag (R_HYPHEN_ACCEPT));
73 }
74 
75 
rej_before_quality_accept()76 BOOL8 REJ::rej_before_quality_accept() {
77   return rej_between_mm_and_quality_accept () ||
78     (!flag (R_MM_ACCEPT) && rej_before_mm_accept ());
79 }
80 
81 
rejected()82 BOOL8 REJ::rejected() {  //Is char rejected?
83   if (flag (R_MINIMAL_REJ_ACCEPT))
84     return FALSE;
85   else
86     return (perm_rejected () ||
87       rej_between_quality_and_minimal_rej_accept () ||
88       (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ()));
89 }
90 
91 
accept_if_good_quality()92 BOOL8 REJ::accept_if_good_quality() {  //potential rej?
93   return (rejected () &&
94     !perm_rejected () &&
95     flag (R_BAD_PERMUTER) &&
96     !flag (R_POOR_MATCH) &&
97     !flag (R_NOT_TESS_ACCEPTED) &&
98     !flag (R_CONTAINS_BLANKS) &&
99     (rejmap_allow_more_good_qual ||
100     (!rej_between_nn_and_mm () &&
101     !rej_between_mm_and_quality_accept () &&
102     !rej_between_quality_and_minimal_rej_accept ())));
103 }
104 
105 
setrej_tess_failure()106 void REJ::setrej_tess_failure() {  //Tess generated blank
107   set_flag(R_TESS_FAILURE);
108 }
109 
110 
setrej_small_xht()111 void REJ::setrej_small_xht() {  //Small xht char/wd
112   set_flag(R_SMALL_XHT);
113 }
114 
115 
setrej_edge_char()116 void REJ::setrej_edge_char() {  //Close to image edge
117   set_flag(R_EDGE_CHAR);
118 }
119 
120 
setrej_1Il_conflict()121 void REJ::setrej_1Il_conflict() {  //Initial reject map
122   if (rej_use_1Il_rej)
123     set_flag(R_1IL_CONFLICT);
124 }
125 
126 
setrej_postNN_1Il()127 void REJ::setrej_postNN_1Il() {  //1Il after NN
128   set_flag(R_POSTNN_1IL);
129 }
130 
131 
setrej_rej_cblob()132 void REJ::setrej_rej_cblob() {  //Insert duff blob
133   set_flag(R_REJ_CBLOB);
134 }
135 
136 
setrej_mm_reject()137 void REJ::setrej_mm_reject() {  //Matrix matcher
138   set_flag(R_MM_REJECT);
139 }
140 
141 
setrej_bad_repetition()142 void REJ::setrej_bad_repetition() {  //Odd repeated char
143   set_flag(R_BAD_REPETITION);
144 }
145 
146 
setrej_poor_match()147 void REJ::setrej_poor_match() {  //Failed Rays heuristic
148   set_flag(R_POOR_MATCH);
149 }
150 
151 
setrej_not_tess_accepted()152 void REJ::setrej_not_tess_accepted() {
153                                  //TEMP reject_word
154   set_flag(R_NOT_TESS_ACCEPTED);
155 }
156 
157 
setrej_contains_blanks()158 void REJ::setrej_contains_blanks() {
159                                  //TEMP reject_word
160   set_flag(R_CONTAINS_BLANKS);
161 }
162 
163 
setrej_bad_permuter()164 void REJ::setrej_bad_permuter() {  //POTENTIAL reject_word
165   set_flag(R_BAD_PERMUTER);
166 }
167 
168 
setrej_hyphen()169 void REJ::setrej_hyphen() {  //PostNN dubious hyphen or .
170   set_flag(R_HYPHEN);
171 }
172 
173 
setrej_dubious()174 void REJ::setrej_dubious() {  //PostNN dubious limit
175   set_flag(R_DUBIOUS);
176 }
177 
178 
setrej_no_alphanums()179 void REJ::setrej_no_alphanums() {  //TEMP reject_word
180   set_flag(R_NO_ALPHANUMS);
181 }
182 
183 
setrej_mostly_rej()184 void REJ::setrej_mostly_rej() {  //TEMP reject_word
185   set_flag(R_MOSTLY_REJ);
186 }
187 
188 
setrej_xht_fixup()189 void REJ::setrej_xht_fixup() {  //xht fixup
190   set_flag(R_XHT_FIXUP);
191 }
192 
193 
setrej_bad_quality()194 void REJ::setrej_bad_quality() {  //TEMP reject_word
195   set_flag(R_BAD_QUALITY);
196 }
197 
198 
setrej_doc_rej()199 void REJ::setrej_doc_rej() {  //TEMP reject_word
200   set_flag(R_DOC_REJ);
201 }
202 
203 
setrej_block_rej()204 void REJ::setrej_block_rej() {  //TEMP reject_word
205   set_flag(R_BLOCK_REJ);
206 }
207 
208 
setrej_row_rej()209 void REJ::setrej_row_rej() {  //TEMP reject_word
210   set_flag(R_ROW_REJ);
211 }
212 
213 
setrej_unlv_rej()214 void REJ::setrej_unlv_rej() {  //TEMP reject_word
215   set_flag(R_UNLV_REJ);
216 }
217 
218 
setrej_hyphen_accept()219 void REJ::setrej_hyphen_accept() {  //NN Flipped a char
220   set_flag(R_HYPHEN_ACCEPT);
221 }
222 
223 
setrej_nn_accept()224 void REJ::setrej_nn_accept() {  //NN Flipped a char
225   set_flag(R_NN_ACCEPT);
226 }
227 
228 
setrej_mm_accept()229 void REJ::setrej_mm_accept() {  //Matrix matcher
230   set_flag(R_MM_ACCEPT);
231 }
232 
233 
setrej_quality_accept()234 void REJ::setrej_quality_accept() {  //Quality flip a char
235   set_flag(R_QUALITY_ACCEPT);
236 }
237 
238 
setrej_minimal_rej_accept()239 void REJ::setrej_minimal_rej_accept() {
240                                  //Accept all except blank
241   set_flag(R_MINIMAL_REJ_ACCEPT);
242 }
243 
244 
full_print(FILE * fp)245 void REJ::full_print(FILE *fp) {
246   #ifndef SECURE_NAMES
247 
248   fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F");
249   fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F");
250   fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F");
251   fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F");
252   fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F");
253   fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F");
254   fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F");
255   fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F");
256   fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F");
257   fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n",
258     flag (R_NOT_TESS_ACCEPTED) ? "T" : "F");
259   fprintf (fp, "R_CONTAINS_BLANKS: %s\n",
260     flag (R_CONTAINS_BLANKS) ? "T" : "F");
261   fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F");
262   fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F");
263   fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F");
264   fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F");
265   fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F");
266   fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F");
267   fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F");
268   fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F");
269   fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F");
270   fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F");
271   fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F");
272   fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F");
273   fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F");
274   fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F");
275   fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F");
276   fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n",
277     flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
278   #endif
279 }
280 
281 
282 //The REJMAP class has been hacked to use alloc_struct instead of new [].
283 //This is to reduce memory fragmentation only as it is rather kludgy.
284 //alloc_struct by-passes the call to the contsructor of REJ on each
285 //array element. Although the constructor is empty, the BITS16 members
286 //do have a constructor which sets all the flags to 0. The memset
287 //replaces this functionality.
288 
REJMAP(const REJMAP & source)289 REJMAP::REJMAP(  //classwise copy
290                const REJMAP &source) {
291   REJ *to;
292   REJ *from = source.ptr;
293   int i;
294 
295   len = source.length ();
296 
297   if (len > 0) {
298     ptr = (REJ *) alloc_struct (len * sizeof (REJ), "REJ");
299     to = ptr;
300     for (i = 0; i < len; i++) {
301       *to = *from;
302       to++;
303       from++;
304     }
305   }
306   else
307     ptr = NULL;
308 }
309 
310 
operator =(const REJMAP & source)311 REJMAP & REJMAP::operator= (     //assign REJMAP
312 const REJMAP & source            //from this
313 ) {
314   REJ *
315     to;
316   REJ *
317     from = source.ptr;
318   int
319     i;
320 
321   initialise (source.len);
322   to = ptr;
323   for (i = 0; i < len; i++) {
324     *to = *from;
325     to++;
326     from++;
327   }
328   return *this;
329 }
330 
331 
initialise(inT16 length)332 void REJMAP::initialise(  //Redefine map
333                         inT16 length) {
334   if (ptr != NULL)
335     free_struct (ptr, len * sizeof (REJ), "REJ");
336   len = length;
337   if (len > 0)
338     ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
339       0, len * sizeof (REJ));
340   else
341     ptr = NULL;
342 }
343 
344 
accept_count()345 inT16 REJMAP::accept_count() {  //How many accepted?
346   int i;
347   inT16 count = 0;
348 
349   for (i = 0; i < len; i++) {
350     if (ptr[i].accepted ())
351       count++;
352   }
353   return count;
354 }
355 
356 
recoverable_rejects()357 BOOL8 REJMAP::recoverable_rejects() {  //Any non perm rejs?
358   int i;
359 
360   for (i = 0; i < len; i++) {
361     if (ptr[i].recoverable ())
362       return TRUE;
363   }
364   return FALSE;
365 }
366 
367 
quality_recoverable_rejects()368 BOOL8 REJMAP::quality_recoverable_rejects() {  //Any potential rejs?
369   int i;
370 
371   for (i = 0; i < len; i++) {
372     if (ptr[i].accept_if_good_quality ())
373       return TRUE;
374   }
375   return FALSE;
376 }
377 
378 
remove_pos(inT16 pos)379 void REJMAP::remove_pos(           //Cut out an element
380                         inT16 pos  //element to remove
381                        ) {
382   REJ *new_ptr;                  //new, smaller map
383   int i;
384 
385   ASSERT_HOST (pos >= 0);
386   ASSERT_HOST (pos < len);
387   ASSERT_HOST (len > 0);
388 
389   len--;
390   if (len > 0)
391     new_ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
392       0, len * sizeof (REJ));
393   else
394     new_ptr = NULL;
395 
396   for (i = 0; i < pos; i++)
397     new_ptr[i] = ptr[i];         //copy pre pos
398 
399   for (; pos < len; pos++)
400     new_ptr[pos] = ptr[pos + 1]; //copy post pos
401 
402                                  //delete old map
403   free_struct (ptr, (len + 1) * sizeof (REJ), "REJ");
404   ptr = new_ptr;
405 }
406 
407 
print(FILE * fp)408 void REJMAP::print(FILE *fp) {
409   int i;
410   char buff[512];
411 
412   for (i = 0; i < len; i++) {
413     buff[i] = ptr[i].display_char ();
414   }
415   buff[i] = '\0';
416   fprintf (fp, "\"%s\"", buff);
417 }
418 
419 
full_print(FILE * fp)420 void REJMAP::full_print(FILE *fp) {
421   int i;
422 
423   for (i = 0; i < len; i++) {
424     ptr[i].full_print (fp);
425     fprintf (fp, "\n");
426   }
427 }
428 
429 
rej_word_small_xht()430 void REJMAP::rej_word_small_xht() {  //Reject whole word
431   int i;
432 
433   for (i = 0; i < len; i++) {
434     ptr[i].setrej_small_xht ();
435   }
436 }
437 
438 
rej_word_tess_failure()439 void REJMAP::rej_word_tess_failure() {  //Reject whole word
440   int i;
441 
442   for (i = 0; i < len; i++) {
443     ptr[i].setrej_tess_failure ();
444   }
445 }
446 
447 
rej_word_not_tess_accepted()448 void REJMAP::rej_word_not_tess_accepted() {  //Reject whole word
449   int i;
450 
451   for (i = 0; i < len; i++) {
452     if (!rejword_only_set_if_accepted || ptr[i].accepted ())
453       ptr[i].setrej_not_tess_accepted ();
454   }
455 }
456 
457 
rej_word_contains_blanks()458 void REJMAP::rej_word_contains_blanks() {  //Reject whole word
459   int i;
460 
461   for (i = 0; i < len; i++) {
462     if (!rejword_only_set_if_accepted || ptr[i].accepted ())
463       ptr[i].setrej_contains_blanks ();
464   }
465 }
466 
467 
rej_word_bad_permuter()468 void REJMAP::rej_word_bad_permuter() {  //Reject whole word
469   int i;
470 
471   for (i = 0; i < len; i++) {
472     if (!rejword_only_set_if_accepted || ptr[i].accepted ())
473       ptr[i].setrej_bad_permuter ();
474   }
475 }
476 
477 
rej_word_xht_fixup()478 void REJMAP::rej_word_xht_fixup() {  //Reject whole word
479   int i;
480 
481   for (i = 0; i < len; i++) {
482     if (!rejword_only_set_if_accepted || ptr[i].accepted ())
483       ptr[i].setrej_xht_fixup ();
484   }
485 }
486 
487 
rej_word_no_alphanums()488 void REJMAP::rej_word_no_alphanums() {  //Reject whole word
489   int i;
490 
491   for (i = 0; i < len; i++) {
492     if (!rejword_only_set_if_accepted || ptr[i].accepted ())
493       ptr[i].setrej_no_alphanums ();
494   }
495 }
496 
497 
rej_word_mostly_rej()498 void REJMAP::rej_word_mostly_rej() {  //Reject whole word
499   int i;
500 
501   for (i = 0; i < len; i++) {
502     if (!rejword_only_set_if_accepted || ptr[i].accepted ())
503       ptr[i].setrej_mostly_rej ();
504   }
505 }
506 
507 
rej_word_bad_quality()508 void REJMAP::rej_word_bad_quality() {  //Reject whole word
509   int i;
510 
511   for (i = 0; i < len; i++) {
512     if (!rejword_only_set_if_accepted || ptr[i].accepted ())
513       ptr[i].setrej_bad_quality ();
514   }
515 }
516 
517 
rej_word_doc_rej()518 void REJMAP::rej_word_doc_rej() {  //Reject whole word
519   int i;
520 
521   for (i = 0; i < len; i++) {
522     if (!rejword_only_set_if_accepted || ptr[i].accepted ())
523       ptr[i].setrej_doc_rej ();
524   }
525 }
526 
527 
rej_word_block_rej()528 void REJMAP::rej_word_block_rej() {  //Reject whole word
529   int i;
530 
531   for (i = 0; i < len; i++) {
532     if (!rejword_only_set_if_accepted || ptr[i].accepted ())
533       ptr[i].setrej_block_rej ();
534   }
535 }
536 
537 
rej_word_row_rej()538 void REJMAP::rej_word_row_rej() {  //Reject whole word
539   int i;
540 
541   for (i = 0; i < len; i++) {
542     if (!rejword_only_set_if_accepted || ptr[i].accepted ())
543       ptr[i].setrej_row_rej ();
544   }
545 }
546