1 /* -*-C-*-
2 ********************************************************************************
3 *
4 * File: protos.c (Formerly protos.c)
5 * Description:
6 * Author: Mark Seaman, OCR Technology
7 * Created: Fri Oct 16 14:37:00 1987
8 * Modified: Mon Mar 4 14:51:24 1991 (Dan Johnson) danj@hpgrlj
9 * Language: C
10 * Package: N/A
11 * Status: Reusable Software Component
12 *
13 * (c) Copyright 1987, Hewlett-Packard Company.
14 ** Licensed under the Apache License, Version 2.0 (the "License");
15 ** you may not use this file except in compliance with the License.
16 ** You may obtain a copy of the License at
17 ** http://www.apache.org/licenses/LICENSE-2.0
18 ** Unless required by applicable law or agreed to in writing, software
19 ** distributed under the License is distributed on an "AS IS" BASIS,
20 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21 ** See the License for the specific language governing permissions and
22 ** limitations under the License.
23 *
24 *********************************************************************************/
25 /*----------------------------------------------------------------------
26 I n c l u d e s
27 ----------------------------------------------------------------------*/
28 #include "protos.h"
29 #include "const.h"
30 #include "emalloc.h"
31 #include "freelist.h"
32 #include "callcpp.h"
33 #include "tprintf.h"
34 #include "adaptmatch.h"
35 #include "scanutils.h"
36 #include "globals.h"
37 #include "classify.h"
38 #include "varable.h"
39
40 #include <stdio.h>
41 #include <math.h>
42
43 #define PROTO_INCREMENT 32
44 #define CONFIG_INCREMENT 16
45
46 /*----------------------------------------------------------------------
47 V a r i a b l e s
48 ----------------------------------------------------------------------*/
49 CLASS_STRUCT TrainingData[NUMBER_OF_CLASSES];
50
51 STRING_VAR(classify_training_file, "MicroFeatures", "Training file");
52
53 /*----------------------------------------------------------------------
54 F u n c t i o n s
55 ----------------------------------------------------------------------*/
56 /**********************************************************************
57 * AddConfigToClass
58 *
59 * Add a new config to this class. Malloc new space and copy the
60 * old configs if necessary. Return the config id for the new config.
61 **********************************************************************/
AddConfigToClass(CLASS_TYPE Class)62 int AddConfigToClass(CLASS_TYPE Class) {
63 int NewNumConfigs;
64 int NewConfig;
65 int MaxNumProtos;
66 BIT_VECTOR Config;
67
68 MaxNumProtos = Class->MaxNumProtos;
69
70 if (Class->NumConfigs >= Class->MaxNumConfigs) {
71 /* add configs in CONFIG_INCREMENT chunks at a time */
72 NewNumConfigs = (((Class->MaxNumConfigs + CONFIG_INCREMENT) /
73 CONFIG_INCREMENT) * CONFIG_INCREMENT);
74
75 Class->Configurations =
76 (CONFIGS) Erealloc (Class->Configurations,
77 sizeof (BIT_VECTOR) * NewNumConfigs);
78
79 Class->MaxNumConfigs = NewNumConfigs;
80 }
81 NewConfig = Class->NumConfigs++;
82 Config = NewBitVector (MaxNumProtos);
83 Class->Configurations[NewConfig] = Config;
84 zero_all_bits (Config, WordsInVectorOfSize (MaxNumProtos));
85
86 return (NewConfig);
87 }
88
89
90 /**********************************************************************
91 * AddProtoToClass
92 *
93 * Add a new proto to this class. Malloc new space and copy the
94 * old protos if necessary. Return the proto id for the new proto.
95 **********************************************************************/
AddProtoToClass(CLASS_TYPE Class)96 int AddProtoToClass(CLASS_TYPE Class) {
97 int i;
98 int Bit;
99 int NewNumProtos;
100 int NewProto;
101 BIT_VECTOR Config;
102
103 if (Class->NumProtos >= Class->MaxNumProtos) {
104 /* add protos in PROTO_INCREMENT chunks at a time */
105 NewNumProtos = (((Class->MaxNumProtos + PROTO_INCREMENT) /
106 PROTO_INCREMENT) * PROTO_INCREMENT);
107
108 Class->Prototypes = (PROTO) Erealloc (Class->Prototypes,
109 sizeof (PROTO_STRUCT) *
110 NewNumProtos);
111
112 Class->MaxNumProtos = NewNumProtos;
113
114 for (i = 0; i < Class->NumConfigs; i++) {
115 Config = Class->Configurations[i];
116 Class->Configurations[i] = ExpandBitVector (Config, NewNumProtos);
117
118 for (Bit = Class->NumProtos; Bit < NewNumProtos; Bit++)
119 reset_bit(Config, Bit);
120 }
121 }
122 NewProto = Class->NumProtos++;
123 if (Class->NumProtos > MAX_NUM_PROTOS) {
124 tprintf("Ouch! number of protos = %d, vs max of %d!",
125 Class->NumProtos, MAX_NUM_PROTOS);
126 }
127 return (NewProto);
128 }
129
130
131 /**********************************************************************
132 * ClassConfigLength
133 *
134 * Return the length of all the protos in this class.
135 **********************************************************************/
ClassConfigLength(CLASS_TYPE Class,BIT_VECTOR Config)136 FLOAT32 ClassConfigLength(CLASS_TYPE Class, BIT_VECTOR Config) {
137 inT16 Pid;
138 FLOAT32 TotalLength = 0;
139
140 for (Pid = 0; Pid < Class->NumProtos; Pid++) {
141 if (test_bit (Config, Pid)) {
142
143 TotalLength += (ProtoIn (Class, Pid))->Length;
144 }
145 }
146 return (TotalLength);
147 }
148
149
150 /**********************************************************************
151 * ClassProtoLength
152 *
153 * Return the length of all the protos in this class.
154 **********************************************************************/
ClassProtoLength(CLASS_TYPE Class)155 FLOAT32 ClassProtoLength(CLASS_TYPE Class) {
156 inT16 Pid;
157 FLOAT32 TotalLength = 0;
158
159 for (Pid = 0; Pid < Class->NumProtos; Pid++) {
160 TotalLength += (ProtoIn (Class, Pid))->Length;
161 }
162 return (TotalLength);
163 }
164
165
166 /**********************************************************************
167 * CopyProto
168 *
169 * Copy the first proto into the second.
170 **********************************************************************/
CopyProto(PROTO Src,PROTO Dest)171 void CopyProto(PROTO Src, PROTO Dest) {
172 Dest->X = Src->X;
173 Dest->Y = Src->Y;
174 Dest->Length = Src->Length;
175 Dest->Angle = Src->Angle;
176 Dest->A = Src->A;
177 Dest->B = Src->B;
178 Dest->C = Src->C;
179 }
180
181
182 /**********************************************************************
183 * FillABC
184 *
185 * Fill in Protos A, B, C fields based on the X, Y, Angle fields.
186 **********************************************************************/
FillABC(PROTO Proto)187 void FillABC(PROTO Proto) {
188 FLOAT32 Slope, Intercept, Normalizer;
189
190 Slope = tan (Proto->Angle * 2.0 * PI);
191 Intercept = Proto->Y - Slope * Proto->X;
192 Normalizer = 1.0 / sqrt (Slope * Slope + 1.0);
193 Proto->A = Slope * Normalizer;
194 Proto->B = -Normalizer;
195 Proto->C = Intercept * Normalizer;
196 }
197
198
199 /**********************************************************************
200 * FreeClass
201 *
202 * Deallocate the memory consumed by the specified class.
203 **********************************************************************/
FreeClass(CLASS_TYPE Class)204 void FreeClass(CLASS_TYPE Class) {
205 if (Class) {
206 FreeClassFields(Class);
207 delete Class;
208 }
209 }
210
211
212 /**********************************************************************
213 * FreeClassFields
214 *
215 * Deallocate the memory consumed by subfields of the specified class.
216 **********************************************************************/
FreeClassFields(CLASS_TYPE Class)217 void FreeClassFields(CLASS_TYPE Class) {
218 int i;
219
220 if (Class) {
221 if (Class->MaxNumProtos > 0)
222 memfree (Class->Prototypes);
223 if (Class->MaxNumConfigs > 0) {
224 for (i = 0; i < Class->NumConfigs; i++)
225 FreeBitVector (Class->Configurations[i]);
226 memfree (Class->Configurations);
227 }
228 }
229 }
230
231 /**********************************************************************
232 * NewClass
233 *
234 * Allocate a new class with enough memory to hold the specified number
235 * of prototypes and configurations.
236 **********************************************************************/
NewClass(int NumProtos,int NumConfigs)237 CLASS_TYPE NewClass(int NumProtos, int NumConfigs) {
238 CLASS_TYPE Class;
239
240 Class = new CLASS_STRUCT;
241
242 if (NumProtos > 0)
243 Class->Prototypes = (PROTO) Emalloc (NumProtos * sizeof (PROTO_STRUCT));
244
245 if (NumConfigs > 0)
246 Class->Configurations = (CONFIGS) Emalloc (NumConfigs *
247 sizeof (BIT_VECTOR));
248 Class->MaxNumProtos = NumProtos;
249 Class->MaxNumConfigs = NumConfigs;
250 Class->NumProtos = 0;
251 Class->NumConfigs = 0;
252 return (Class);
253
254 }
255
256
257 /**********************************************************************
258 * PrintProtos
259 *
260 * Print the list of prototypes in this class type.
261 **********************************************************************/
PrintProtos(CLASS_TYPE Class)262 void PrintProtos(CLASS_TYPE Class) {
263 inT16 Pid;
264
265 for (Pid = 0; Pid < Class->NumProtos; Pid++) {
266 cprintf ("Proto %d:\t", Pid);
267 PrintProto (ProtoIn (Class, Pid));
268 cprintf ("\t");
269 PrintProtoLine (ProtoIn (Class, Pid));
270 new_line();
271 }
272 }
273
274
275 /**********************************************************************
276 * ReadClassFile
277 *
278 * Read in the training data from a file. All of the classes are read
279 * in. The results are stored in the global variable, 'TrainingData'.
280 **********************************************************************/
281 namespace tesseract {
ReadClassFile()282 void Classify::ReadClassFile() {
283 FILE *File;
284 char TextLine[CHARS_PER_LINE];
285 char unichar[CHARS_PER_LINE];
286
287 cprintf ("Reading training data from '%s' ...",
288 static_cast<STRING>(classify_training_file).string());
289 fflush(stdout);
290
291 File = open_file(static_cast<STRING>(classify_training_file).string(), "r");
292 while (fgets (TextLine, CHARS_PER_LINE, File) != NULL) {
293
294 sscanf(TextLine, "%s", unichar);
295 ReadClassFromFile (File, unicharset.unichar_to_id(unichar));
296 fgets(TextLine, CHARS_PER_LINE, File);
297 fgets(TextLine, CHARS_PER_LINE, File);
298 }
299 fclose(File);
300 new_line();
301 }
302 } // namespace tesseract
303
304 /**********************************************************************
305 * ReadClassFromFile
306 *
307 * Read in a class description (protos and configs) from a file. Update
308 * the class structure record.
309 **********************************************************************/
ReadClassFromFile(FILE * File,UNICHAR_ID unichar_id)310 void ReadClassFromFile(FILE *File, UNICHAR_ID unichar_id) {
311 CLASS_TYPE Class;
312
313 Class = &TrainingData[unichar_id];
314
315 ReadProtos(File, Class);
316
317 ReadConfigs(File, Class);
318 }
319
320 /**********************************************************************
321 * ReadConfigs
322 *
323 * Read the prototype configurations for this class from a file. Read
324 * the requested number of lines.
325 **********************************************************************/
ReadConfigs(register FILE * File,CLASS_TYPE Class)326 void ReadConfigs(register FILE *File, CLASS_TYPE Class) {
327 inT16 Cid;
328 register inT16 Wid;
329 register BIT_VECTOR ThisConfig;
330 int NumWords;
331 int NumConfigs;
332
333 fscanf (File, "%d %d\n", &NumConfigs, &NumWords);
334 Class->NumConfigs = NumConfigs;
335 Class->MaxNumConfigs = NumConfigs;
336 Class->Configurations =
337 (CONFIGS) Emalloc (sizeof (BIT_VECTOR) * NumConfigs);
338 NumWords = WordsInVectorOfSize (Class->NumProtos);
339
340 for (Cid = 0; Cid < NumConfigs; Cid++) {
341
342 ThisConfig = NewBitVector (Class->NumProtos);
343 for (Wid = 0; Wid < NumWords; Wid++)
344 fscanf (File, "%x", &ThisConfig[Wid]);
345 Class->Configurations[Cid] = ThisConfig;
346 }
347 }
348
349
350 /**********************************************************************
351 * ReadProtos
352 *
353 * Read in all the prototype information from a file. Read the number
354 * of lines requested.
355 **********************************************************************/
ReadProtos(register FILE * File,CLASS_TYPE Class)356 void ReadProtos(register FILE *File, CLASS_TYPE Class) {
357 register inT16 Pid;
358 register PROTO Proto;
359 int NumProtos;
360
361 fscanf (File, "%d\n", &NumProtos);
362 Class->NumProtos = NumProtos;
363 Class->MaxNumProtos = NumProtos;
364 Class->Prototypes = (PROTO) Emalloc (sizeof (PROTO_STRUCT) * NumProtos);
365
366 for (Pid = 0; Pid < NumProtos; Pid++) {
367 Proto = ProtoIn (Class, Pid);
368 fscanf (File, "%f %f %f %f %f %f %f\n",
369 &Proto->X,
370 &Proto->Y,
371 &Proto->Length,
372 &Proto->Angle,
373 &Proto->A,
374 &Proto->B, &Proto->C);
375 }
376 }
377
378
379 /**********************************************************************
380 * SplitProto
381 *
382 * Add a new proto to this class. Malloc new space and copy the
383 * old protos if necessary. Return the proto id for the new proto.
384 * Update all configurations so that each config which contained the
385 * specified old proto will also contain the new proto. The caller
386 * is responsible for actually filling in the appropriate proto params.
387 **********************************************************************/
SplitProto(CLASS_TYPE Class,int OldPid)388 int SplitProto(CLASS_TYPE Class, int OldPid) {
389 int i;
390 int NewPid;
391 BIT_VECTOR Config;
392
393 NewPid = AddProtoToClass (Class);
394
395 for (i = 0; i < Class->NumConfigs; i++) {
396 Config = Class->Configurations[i];
397 if (test_bit (Config, OldPid))
398 SET_BIT(Config, NewPid);
399 }
400 return (NewPid);
401 }
402
403
404 /**********************************************************************
405 * WriteOldConfigFile
406 *
407 * Write the configs in the given class to the specified file in the
408 * old config format.
409 **********************************************************************/
WriteOldConfigFile(FILE * File,CLASS_TYPE Class)410 void WriteOldConfigFile(FILE *File, CLASS_TYPE Class) {
411 int Cid, Pid;
412 BIT_VECTOR Config;
413
414 fprintf (File, "%d %d\n", Class->NumConfigs, Class->NumProtos);
415
416 for (Cid = 0; Cid < Class->NumConfigs; Cid++) {
417 fprintf (File, "1 ");
418
419 Config = Class->Configurations[Cid];
420
421 for (Pid = 0; Pid < Class->NumProtos; Pid++) {
422 if (test_bit (Config, Pid))
423 fprintf (File, "1");
424 else
425 fprintf (File, "0");
426 }
427 fprintf (File, "\n");
428 }
429 }
430
431
432 /**********************************************************************
433 * WriteOldProtoFile
434 *
435 * Write the protos in the given class to the specified file in the
436 * old proto format.
437 **********************************************************************/
WriteOldProtoFile(FILE * File,CLASS_TYPE Class)438 void WriteOldProtoFile(FILE *File, CLASS_TYPE Class) {
439 int Pid;
440 PROTO Proto;
441
442 /* print old header */
443 fprintf (File, "6\n");
444 fprintf (File, "linear essential -0.500000 0.500000\n");
445 fprintf (File, "linear essential -0.250000 0.750000\n");
446 fprintf (File, "linear essential 0.000000 1.000000\n");
447 fprintf (File, "circular essential 0.000000 1.000000\n");
448 fprintf (File, "linear non-essential -0.500000 0.500000\n");
449 fprintf (File, "linear non-essential -0.500000 0.500000\n");
450
451 for (Pid = 0; Pid < Class->NumProtos; Pid++) {
452 Proto = ProtoIn (Class, Pid);
453
454 fprintf (File, "significant elliptical 1\n");
455 fprintf (File, " %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
456 Proto->X, Proto->Y,
457 Proto->Length, Proto->Angle, 0.0, 0.0);
458 fprintf (File, " %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
459 0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001);
460 }
461 }
462