1 /**********************************************************************
2 * File: rejctmap.cpp (Formerly rejmap.c)
3 * Description: REJ and REJMAP class functions.
4 * Author: Phil Cheatle
5 * Created: Thu Jun 9 13:46:38 BST 1994
6 *
7 * (C) Copyright 1994, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19
20 #include "mfcpch.h"
21 #include "hosthplb.h"
22 //#include "basefile.h"
23 #include "rejctmap.h"
24 #include "secname.h"
25
26 #define EXTERN
27
28 EXTERN BOOL_VAR (rejword_only_set_if_accepted, TRUE, "Mimic old reject_word");
29 EXTERN BOOL_VAR (rejmap_allow_more_good_qual, FALSE,
30 "Use initial good qual setting");
31 EXTERN BOOL_VAR (rej_use_1Il_rej, TRUE, "1Il rejection enabled");
32
perm_rejected()33 BOOL8 REJ::perm_rejected() { //Is char perm reject?
34 return (flag (R_TESS_FAILURE) ||
35 flag (R_SMALL_XHT) ||
36 flag (R_EDGE_CHAR) ||
37 flag (R_1IL_CONFLICT) ||
38 flag (R_POSTNN_1IL) ||
39 flag (R_REJ_CBLOB) ||
40 flag (R_BAD_REPETITION) || flag (R_MM_REJECT));
41 }
42
43
rej_before_nn_accept()44 BOOL8 REJ::rej_before_nn_accept() {
45 return flag (R_POOR_MATCH) ||
46 flag (R_NOT_TESS_ACCEPTED) ||
47 flag (R_CONTAINS_BLANKS) || flag (R_BAD_PERMUTER);
48 }
49
50
rej_between_nn_and_mm()51 BOOL8 REJ::rej_between_nn_and_mm() {
52 return flag (R_HYPHEN) ||
53 flag (R_DUBIOUS) ||
54 flag (R_NO_ALPHANUMS) || flag (R_MOSTLY_REJ) || flag (R_XHT_FIXUP);
55 }
56
57
rej_between_mm_and_quality_accept()58 BOOL8 REJ::rej_between_mm_and_quality_accept() {
59 return flag (R_BAD_QUALITY);
60 }
61
62
rej_between_quality_and_minimal_rej_accept()63 BOOL8 REJ::rej_between_quality_and_minimal_rej_accept() {
64 return flag (R_DOC_REJ) ||
65 flag (R_BLOCK_REJ) || flag (R_ROW_REJ) || flag (R_UNLV_REJ);
66 }
67
68
rej_before_mm_accept()69 BOOL8 REJ::rej_before_mm_accept() {
70 return rej_between_nn_and_mm () ||
71 (rej_before_nn_accept () &&
72 !flag (R_NN_ACCEPT) && !flag (R_HYPHEN_ACCEPT));
73 }
74
75
rej_before_quality_accept()76 BOOL8 REJ::rej_before_quality_accept() {
77 return rej_between_mm_and_quality_accept () ||
78 (!flag (R_MM_ACCEPT) && rej_before_mm_accept ());
79 }
80
81
rejected()82 BOOL8 REJ::rejected() { //Is char rejected?
83 if (flag (R_MINIMAL_REJ_ACCEPT))
84 return FALSE;
85 else
86 return (perm_rejected () ||
87 rej_between_quality_and_minimal_rej_accept () ||
88 (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ()));
89 }
90
91
accept_if_good_quality()92 BOOL8 REJ::accept_if_good_quality() { //potential rej?
93 return (rejected () &&
94 !perm_rejected () &&
95 flag (R_BAD_PERMUTER) &&
96 !flag (R_POOR_MATCH) &&
97 !flag (R_NOT_TESS_ACCEPTED) &&
98 !flag (R_CONTAINS_BLANKS) &&
99 (rejmap_allow_more_good_qual ||
100 (!rej_between_nn_and_mm () &&
101 !rej_between_mm_and_quality_accept () &&
102 !rej_between_quality_and_minimal_rej_accept ())));
103 }
104
105
setrej_tess_failure()106 void REJ::setrej_tess_failure() { //Tess generated blank
107 set_flag(R_TESS_FAILURE);
108 }
109
110
setrej_small_xht()111 void REJ::setrej_small_xht() { //Small xht char/wd
112 set_flag(R_SMALL_XHT);
113 }
114
115
setrej_edge_char()116 void REJ::setrej_edge_char() { //Close to image edge
117 set_flag(R_EDGE_CHAR);
118 }
119
120
setrej_1Il_conflict()121 void REJ::setrej_1Il_conflict() { //Initial reject map
122 if (rej_use_1Il_rej)
123 set_flag(R_1IL_CONFLICT);
124 }
125
126
setrej_postNN_1Il()127 void REJ::setrej_postNN_1Il() { //1Il after NN
128 set_flag(R_POSTNN_1IL);
129 }
130
131
setrej_rej_cblob()132 void REJ::setrej_rej_cblob() { //Insert duff blob
133 set_flag(R_REJ_CBLOB);
134 }
135
136
setrej_mm_reject()137 void REJ::setrej_mm_reject() { //Matrix matcher
138 set_flag(R_MM_REJECT);
139 }
140
141
setrej_bad_repetition()142 void REJ::setrej_bad_repetition() { //Odd repeated char
143 set_flag(R_BAD_REPETITION);
144 }
145
146
setrej_poor_match()147 void REJ::setrej_poor_match() { //Failed Rays heuristic
148 set_flag(R_POOR_MATCH);
149 }
150
151
setrej_not_tess_accepted()152 void REJ::setrej_not_tess_accepted() {
153 //TEMP reject_word
154 set_flag(R_NOT_TESS_ACCEPTED);
155 }
156
157
setrej_contains_blanks()158 void REJ::setrej_contains_blanks() {
159 //TEMP reject_word
160 set_flag(R_CONTAINS_BLANKS);
161 }
162
163
setrej_bad_permuter()164 void REJ::setrej_bad_permuter() { //POTENTIAL reject_word
165 set_flag(R_BAD_PERMUTER);
166 }
167
168
setrej_hyphen()169 void REJ::setrej_hyphen() { //PostNN dubious hyphen or .
170 set_flag(R_HYPHEN);
171 }
172
173
setrej_dubious()174 void REJ::setrej_dubious() { //PostNN dubious limit
175 set_flag(R_DUBIOUS);
176 }
177
178
setrej_no_alphanums()179 void REJ::setrej_no_alphanums() { //TEMP reject_word
180 set_flag(R_NO_ALPHANUMS);
181 }
182
183
setrej_mostly_rej()184 void REJ::setrej_mostly_rej() { //TEMP reject_word
185 set_flag(R_MOSTLY_REJ);
186 }
187
188
setrej_xht_fixup()189 void REJ::setrej_xht_fixup() { //xht fixup
190 set_flag(R_XHT_FIXUP);
191 }
192
193
setrej_bad_quality()194 void REJ::setrej_bad_quality() { //TEMP reject_word
195 set_flag(R_BAD_QUALITY);
196 }
197
198
setrej_doc_rej()199 void REJ::setrej_doc_rej() { //TEMP reject_word
200 set_flag(R_DOC_REJ);
201 }
202
203
setrej_block_rej()204 void REJ::setrej_block_rej() { //TEMP reject_word
205 set_flag(R_BLOCK_REJ);
206 }
207
208
setrej_row_rej()209 void REJ::setrej_row_rej() { //TEMP reject_word
210 set_flag(R_ROW_REJ);
211 }
212
213
setrej_unlv_rej()214 void REJ::setrej_unlv_rej() { //TEMP reject_word
215 set_flag(R_UNLV_REJ);
216 }
217
218
setrej_hyphen_accept()219 void REJ::setrej_hyphen_accept() { //NN Flipped a char
220 set_flag(R_HYPHEN_ACCEPT);
221 }
222
223
setrej_nn_accept()224 void REJ::setrej_nn_accept() { //NN Flipped a char
225 set_flag(R_NN_ACCEPT);
226 }
227
228
setrej_mm_accept()229 void REJ::setrej_mm_accept() { //Matrix matcher
230 set_flag(R_MM_ACCEPT);
231 }
232
233
setrej_quality_accept()234 void REJ::setrej_quality_accept() { //Quality flip a char
235 set_flag(R_QUALITY_ACCEPT);
236 }
237
238
setrej_minimal_rej_accept()239 void REJ::setrej_minimal_rej_accept() {
240 //Accept all except blank
241 set_flag(R_MINIMAL_REJ_ACCEPT);
242 }
243
244
full_print(FILE * fp)245 void REJ::full_print(FILE *fp) {
246 #ifndef SECURE_NAMES
247
248 fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F");
249 fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F");
250 fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F");
251 fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F");
252 fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F");
253 fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F");
254 fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F");
255 fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F");
256 fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F");
257 fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n",
258 flag (R_NOT_TESS_ACCEPTED) ? "T" : "F");
259 fprintf (fp, "R_CONTAINS_BLANKS: %s\n",
260 flag (R_CONTAINS_BLANKS) ? "T" : "F");
261 fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F");
262 fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F");
263 fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F");
264 fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F");
265 fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F");
266 fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F");
267 fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F");
268 fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F");
269 fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F");
270 fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F");
271 fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F");
272 fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F");
273 fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F");
274 fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F");
275 fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F");
276 fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n",
277 flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
278 #endif
279 }
280
281
282 //The REJMAP class has been hacked to use alloc_struct instead of new [].
283 //This is to reduce memory fragmentation only as it is rather kludgy.
284 //alloc_struct by-passes the call to the contsructor of REJ on each
285 //array element. Although the constructor is empty, the BITS16 members
286 //do have a constructor which sets all the flags to 0. The memset
287 //replaces this functionality.
288
REJMAP(const REJMAP & source)289 REJMAP::REJMAP( //classwise copy
290 const REJMAP &source) {
291 REJ *to;
292 REJ *from = source.ptr;
293 int i;
294
295 len = source.length ();
296
297 if (len > 0) {
298 ptr = (REJ *) alloc_struct (len * sizeof (REJ), "REJ");
299 to = ptr;
300 for (i = 0; i < len; i++) {
301 *to = *from;
302 to++;
303 from++;
304 }
305 }
306 else
307 ptr = NULL;
308 }
309
310
operator =(const REJMAP & source)311 REJMAP & REJMAP::operator= ( //assign REJMAP
312 const REJMAP & source //from this
313 ) {
314 REJ *
315 to;
316 REJ *
317 from = source.ptr;
318 int
319 i;
320
321 initialise (source.len);
322 to = ptr;
323 for (i = 0; i < len; i++) {
324 *to = *from;
325 to++;
326 from++;
327 }
328 return *this;
329 }
330
331
initialise(inT16 length)332 void REJMAP::initialise( //Redefine map
333 inT16 length) {
334 if (ptr != NULL)
335 free_struct (ptr, len * sizeof (REJ), "REJ");
336 len = length;
337 if (len > 0)
338 ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
339 0, len * sizeof (REJ));
340 else
341 ptr = NULL;
342 }
343
344
accept_count()345 inT16 REJMAP::accept_count() { //How many accepted?
346 int i;
347 inT16 count = 0;
348
349 for (i = 0; i < len; i++) {
350 if (ptr[i].accepted ())
351 count++;
352 }
353 return count;
354 }
355
356
recoverable_rejects()357 BOOL8 REJMAP::recoverable_rejects() { //Any non perm rejs?
358 int i;
359
360 for (i = 0; i < len; i++) {
361 if (ptr[i].recoverable ())
362 return TRUE;
363 }
364 return FALSE;
365 }
366
367
quality_recoverable_rejects()368 BOOL8 REJMAP::quality_recoverable_rejects() { //Any potential rejs?
369 int i;
370
371 for (i = 0; i < len; i++) {
372 if (ptr[i].accept_if_good_quality ())
373 return TRUE;
374 }
375 return FALSE;
376 }
377
378
remove_pos(inT16 pos)379 void REJMAP::remove_pos( //Cut out an element
380 inT16 pos //element to remove
381 ) {
382 REJ *new_ptr; //new, smaller map
383 int i;
384
385 ASSERT_HOST (pos >= 0);
386 ASSERT_HOST (pos < len);
387 ASSERT_HOST (len > 0);
388
389 len--;
390 if (len > 0)
391 new_ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
392 0, len * sizeof (REJ));
393 else
394 new_ptr = NULL;
395
396 for (i = 0; i < pos; i++)
397 new_ptr[i] = ptr[i]; //copy pre pos
398
399 for (; pos < len; pos++)
400 new_ptr[pos] = ptr[pos + 1]; //copy post pos
401
402 //delete old map
403 free_struct (ptr, (len + 1) * sizeof (REJ), "REJ");
404 ptr = new_ptr;
405 }
406
407
print(FILE * fp)408 void REJMAP::print(FILE *fp) {
409 int i;
410 char buff[512];
411
412 for (i = 0; i < len; i++) {
413 buff[i] = ptr[i].display_char ();
414 }
415 buff[i] = '\0';
416 fprintf (fp, "\"%s\"", buff);
417 }
418
419
full_print(FILE * fp)420 void REJMAP::full_print(FILE *fp) {
421 int i;
422
423 for (i = 0; i < len; i++) {
424 ptr[i].full_print (fp);
425 fprintf (fp, "\n");
426 }
427 }
428
429
rej_word_small_xht()430 void REJMAP::rej_word_small_xht() { //Reject whole word
431 int i;
432
433 for (i = 0; i < len; i++) {
434 ptr[i].setrej_small_xht ();
435 }
436 }
437
438
rej_word_tess_failure()439 void REJMAP::rej_word_tess_failure() { //Reject whole word
440 int i;
441
442 for (i = 0; i < len; i++) {
443 ptr[i].setrej_tess_failure ();
444 }
445 }
446
447
rej_word_not_tess_accepted()448 void REJMAP::rej_word_not_tess_accepted() { //Reject whole word
449 int i;
450
451 for (i = 0; i < len; i++) {
452 if (!rejword_only_set_if_accepted || ptr[i].accepted ())
453 ptr[i].setrej_not_tess_accepted ();
454 }
455 }
456
457
rej_word_contains_blanks()458 void REJMAP::rej_word_contains_blanks() { //Reject whole word
459 int i;
460
461 for (i = 0; i < len; i++) {
462 if (!rejword_only_set_if_accepted || ptr[i].accepted ())
463 ptr[i].setrej_contains_blanks ();
464 }
465 }
466
467
rej_word_bad_permuter()468 void REJMAP::rej_word_bad_permuter() { //Reject whole word
469 int i;
470
471 for (i = 0; i < len; i++) {
472 if (!rejword_only_set_if_accepted || ptr[i].accepted ())
473 ptr[i].setrej_bad_permuter ();
474 }
475 }
476
477
rej_word_xht_fixup()478 void REJMAP::rej_word_xht_fixup() { //Reject whole word
479 int i;
480
481 for (i = 0; i < len; i++) {
482 if (!rejword_only_set_if_accepted || ptr[i].accepted ())
483 ptr[i].setrej_xht_fixup ();
484 }
485 }
486
487
rej_word_no_alphanums()488 void REJMAP::rej_word_no_alphanums() { //Reject whole word
489 int i;
490
491 for (i = 0; i < len; i++) {
492 if (!rejword_only_set_if_accepted || ptr[i].accepted ())
493 ptr[i].setrej_no_alphanums ();
494 }
495 }
496
497
rej_word_mostly_rej()498 void REJMAP::rej_word_mostly_rej() { //Reject whole word
499 int i;
500
501 for (i = 0; i < len; i++) {
502 if (!rejword_only_set_if_accepted || ptr[i].accepted ())
503 ptr[i].setrej_mostly_rej ();
504 }
505 }
506
507
rej_word_bad_quality()508 void REJMAP::rej_word_bad_quality() { //Reject whole word
509 int i;
510
511 for (i = 0; i < len; i++) {
512 if (!rejword_only_set_if_accepted || ptr[i].accepted ())
513 ptr[i].setrej_bad_quality ();
514 }
515 }
516
517
rej_word_doc_rej()518 void REJMAP::rej_word_doc_rej() { //Reject whole word
519 int i;
520
521 for (i = 0; i < len; i++) {
522 if (!rejword_only_set_if_accepted || ptr[i].accepted ())
523 ptr[i].setrej_doc_rej ();
524 }
525 }
526
527
rej_word_block_rej()528 void REJMAP::rej_word_block_rej() { //Reject whole word
529 int i;
530
531 for (i = 0; i < len; i++) {
532 if (!rejword_only_set_if_accepted || ptr[i].accepted ())
533 ptr[i].setrej_block_rej ();
534 }
535 }
536
537
rej_word_row_rej()538 void REJMAP::rej_word_row_rej() { //Reject whole word
539 int i;
540
541 for (i = 0; i < len; i++) {
542 if (!rejword_only_set_if_accepted || ptr[i].accepted ())
543 ptr[i].setrej_row_rej ();
544 }
545 }
546