• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File:        context.c  (Formerly context.c)
5  * Description:  Context checking functions
6  * Author:       Mark Seaman, OCR Technology
7  * Created:      Thu Feb 15 11:18:24 1990
8  * Modified:     Tue Jul  9 17:38:16 1991 (Mark Seaman) marks@hpgrlt
9  * Language:     C
10  * Package:      N/A
11  * Status:       Experimental (Do Not Distribute)
12  *
13  * (c) Copyright 1990, Hewlett-Packard Company.
14  ** Licensed under the Apache License, Version 2.0 (the "License");
15  ** you may not use this file except in compliance with the License.
16  ** You may obtain a copy of the License at
17  ** http://www.apache.org/licenses/LICENSE-2.0
18  ** Unless required by applicable law or agreed to in writing, software
19  ** distributed under the License is distributed on an "AS IS" BASIS,
20  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  ** See the License for the specific language governing permissions and
22  ** limitations under the License.
23  *
24  *********************************************************************************/
25 #include "context.h"
26 
27 #include "callcpp.h"
28 #include "ccutil.h"
29 #include "dict.h"
30 #include "globals.h"
31 #include "image.h"
32 #include "ratngs.h"
33 #include "tordvars.h"
34 #include "unicharset.h"
35 
36 #include <stdio.h>
37 #include <ctype.h>
38 #include <string.h>
39 #include <math.h>
40 
41 // Initialize probability_in_context to point to a default implementation (a
42 // main program can override this).
43 PROBABILITY_IN_CONTEXT_FUNCTION probability_in_context = &def_probability_in_context;
44 
def_probability_in_context(const char * context,int context_bytes,const char * character,int character_bytes)45 double def_probability_in_context(const char* context,
46                                   int context_bytes,
47                                   const char* character,
48                                   int character_bytes) {
49   (void) context;
50   (void) context_bytes;
51   (void) character;
52   (void) character_bytes;
53   return 0.0;
54 }
55 
56 /*----------------------------------------------------------------------
57               V a r i a b l e s
58 ----------------------------------------------------------------------*/
59 static FILE *choice_file = NULL; /* File to save choices */
60 
61 /*----------------------------------------------------------------------
62               F u n c t i o n s
63 ----------------------------------------------------------------------*/
64 /**********************************************************************
65  * close_choices
66  *
67  * Close the choices file.
68  **********************************************************************/
close_choices()69 void close_choices() {
70   if (choice_file)
71     fclose(choice_file);
72 }
73 
74 namespace tesseract {
75 
76 /**********************************************************************
77  * case_ok
78  *
79  * Check a string to see if it matches a set of lexical rules.
80  **********************************************************************/
case_ok(const WERD_CHOICE & word,const UNICHARSET & unicharset)81 int Context::case_ok(const WERD_CHOICE &word,
82                      const UNICHARSET &unicharset) {
83   static int case_state_table[6][4] = { {
84                                  /*  0. Begining of word         */
85     /*    P   U   L   D                                     */
86     /* -1. Error on case            */
87       0, 1, 5, 4
88     },
89     {                            /*  1. After initial capital    */
90       0, 3, 2, 4
91     },
92     {                            /*  2. After lower case         */
93       0, -1, 2, -1
94     },
95     {                            /*  3. After upper case         */
96       0, 3, -1, 4
97     },
98     {                            /*  4. After a digit            */
99       0, -1, -1, 4
100     },
101     {                            /*  5. After initial lower case */
102       5, -1, 2, -1
103     },
104   };
105 
106   register int last_state = 0;
107   register int state = 0;
108   register int x;
109 
110   for (x = 0; x < word.length(); ++x) {
111     UNICHAR_ID ch_id = word.unichar_id(x);
112     if (unicharset.get_isupper(ch_id))
113       state = case_state_table[state][1];
114     else if (unicharset.get_islower(ch_id))
115       state = case_state_table[state][2];
116     else if (unicharset.get_isdigit(ch_id))
117       state = case_state_table[state][3];
118     else
119       state = case_state_table[state][0];
120 
121     if (tord_debug_3)
122       tprintf("Case state = %d, char = %s\n", state,
123               unicharset.id_to_unichar(ch_id));
124     if (state == -1) {
125                                  /* Handle ACCRONYMs */
126 #if 0
127       if (word[x] == 's' &&
128         !isalpha (word[x + 1]) && !isdigit (word[x + 1]))
129         state = last_state;
130       else
131 #endif
132         return (FALSE);
133     }
134 
135     last_state = state;
136   }
137   return state != 5;             /*single lower is bad */
138 }
139 }  // namespace tesseract
140 
141 
142 /**********************************************************************
143  * write_choice_line
144  *
145  * Write a blank line to the choices file.  This will indicate that
146  * there is a new word that is following.
147  **********************************************************************/
write_choice_line()148 void write_choice_line() {
149   if (choice_file) {
150     fprintf (choice_file, "\n");
151     fflush(choice_file);
152   }
153 }
154