1 /* -*-C-*-
2 ********************************************************************************
3 *
4 * File: context.c (Formerly context.c)
5 * Description: Context checking functions
6 * Author: Mark Seaman, OCR Technology
7 * Created: Thu Feb 15 11:18:24 1990
8 * Modified: Tue Jul 9 17:38:16 1991 (Mark Seaman) marks@hpgrlt
9 * Language: C
10 * Package: N/A
11 * Status: Experimental (Do Not Distribute)
12 *
13 * (c) Copyright 1990, Hewlett-Packard Company.
14 ** Licensed under the Apache License, Version 2.0 (the "License");
15 ** you may not use this file except in compliance with the License.
16 ** You may obtain a copy of the License at
17 ** http://www.apache.org/licenses/LICENSE-2.0
18 ** Unless required by applicable law or agreed to in writing, software
19 ** distributed under the License is distributed on an "AS IS" BASIS,
20 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21 ** See the License for the specific language governing permissions and
22 ** limitations under the License.
23 *
24 *********************************************************************************/
25 #include "context.h"
26
27 #include "callcpp.h"
28 #include "ccutil.h"
29 #include "dict.h"
30 #include "globals.h"
31 #include "image.h"
32 #include "ratngs.h"
33 #include "tordvars.h"
34 #include "unicharset.h"
35
36 #include <stdio.h>
37 #include <ctype.h>
38 #include <string.h>
39 #include <math.h>
40
41 // Initialize probability_in_context to point to a default implementation (a
42 // main program can override this).
43 PROBABILITY_IN_CONTEXT_FUNCTION probability_in_context = &def_probability_in_context;
44
def_probability_in_context(const char * context,int context_bytes,const char * character,int character_bytes)45 double def_probability_in_context(const char* context,
46 int context_bytes,
47 const char* character,
48 int character_bytes) {
49 (void) context;
50 (void) context_bytes;
51 (void) character;
52 (void) character_bytes;
53 return 0.0;
54 }
55
56 /*----------------------------------------------------------------------
57 V a r i a b l e s
58 ----------------------------------------------------------------------*/
59 static FILE *choice_file = NULL; /* File to save choices */
60
61 /*----------------------------------------------------------------------
62 F u n c t i o n s
63 ----------------------------------------------------------------------*/
64 /**********************************************************************
65 * close_choices
66 *
67 * Close the choices file.
68 **********************************************************************/
close_choices()69 void close_choices() {
70 if (choice_file)
71 fclose(choice_file);
72 }
73
74 namespace tesseract {
75
76 /**********************************************************************
77 * case_ok
78 *
79 * Check a string to see if it matches a set of lexical rules.
80 **********************************************************************/
case_ok(const WERD_CHOICE & word,const UNICHARSET & unicharset)81 int Context::case_ok(const WERD_CHOICE &word,
82 const UNICHARSET &unicharset) {
83 static int case_state_table[6][4] = { {
84 /* 0. Begining of word */
85 /* P U L D */
86 /* -1. Error on case */
87 0, 1, 5, 4
88 },
89 { /* 1. After initial capital */
90 0, 3, 2, 4
91 },
92 { /* 2. After lower case */
93 0, -1, 2, -1
94 },
95 { /* 3. After upper case */
96 0, 3, -1, 4
97 },
98 { /* 4. After a digit */
99 0, -1, -1, 4
100 },
101 { /* 5. After initial lower case */
102 5, -1, 2, -1
103 },
104 };
105
106 register int last_state = 0;
107 register int state = 0;
108 register int x;
109
110 for (x = 0; x < word.length(); ++x) {
111 UNICHAR_ID ch_id = word.unichar_id(x);
112 if (unicharset.get_isupper(ch_id))
113 state = case_state_table[state][1];
114 else if (unicharset.get_islower(ch_id))
115 state = case_state_table[state][2];
116 else if (unicharset.get_isdigit(ch_id))
117 state = case_state_table[state][3];
118 else
119 state = case_state_table[state][0];
120
121 if (tord_debug_3)
122 tprintf("Case state = %d, char = %s\n", state,
123 unicharset.id_to_unichar(ch_id));
124 if (state == -1) {
125 /* Handle ACCRONYMs */
126 #if 0
127 if (word[x] == 's' &&
128 !isalpha (word[x + 1]) && !isdigit (word[x + 1]))
129 state = last_state;
130 else
131 #endif
132 return (FALSE);
133 }
134
135 last_state = state;
136 }
137 return state != 5; /*single lower is bad */
138 }
139 } // namespace tesseract
140
141
142 /**********************************************************************
143 * write_choice_line
144 *
145 * Write a blank line to the choices file. This will indicate that
146 * there is a new word that is following.
147 **********************************************************************/
write_choice_line()148 void write_choice_line() {
149 if (choice_file) {
150 fprintf (choice_file, "\n");
151 fflush(choice_file);
152 }
153 }
154