1 /******************************************************************************
2 ** Filename: adaptive.c
3 ** Purpose: Adaptive matcher.
4 ** Author: Dan Johnson
5 ** History: Fri Mar 8 10:00:21 1991, DSJ, Created.
6 **
7 ** (c) Copyright Hewlett-Packard Company, 1988.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 ******************************************************************************/
18
19 /**----------------------------------------------------------------------------
20 Include Files and Type Defines
21 ----------------------------------------------------------------------------**/
22 #include "adaptive.h"
23 #include "emalloc.h"
24 #include "freelist.h"
25 #include "globals.h"
26 #include "classify.h"
27
28 #ifdef __UNIX__
29 #include <assert.h>
30 #endif
31 #include <stdio.h>
32
33 /**----------------------------------------------------------------------------
34 Public Code
35 ----------------------------------------------------------------------------**/
36 /*---------------------------------------------------------------------------*/
AddAdaptedClass(ADAPT_TEMPLATES Templates,ADAPT_CLASS Class,CLASS_ID ClassId)37 void AddAdaptedClass(ADAPT_TEMPLATES Templates,
38 ADAPT_CLASS Class,
39 CLASS_ID ClassId) {
40 /*
41 ** Parameters:
42 ** Templates set of templates to add new class to
43 ** Class new class to add to templates
44 ** ClassId class id to associate with new class
45 ** Globals: none
46 ** Operation: This routine adds a new adapted class to an existing
47 ** set of adapted templates.
48 ** Return: none
49 ** Exceptions: none
50 ** History: Thu Mar 14 13:06:09 1991, DSJ, Created.
51 */
52 INT_CLASS IntClass;
53
54 assert (Templates != NULL);
55 assert (Class != NULL);
56 assert (LegalClassId (ClassId));
57 assert (UnusedClassIdIn (Templates->Templates, ClassId));
58 assert (Class->NumPermConfigs == 0);
59
60 IntClass = NewIntClass (1, 1);
61 AddIntClass (Templates->Templates, ClassId, IntClass);
62
63 assert (Templates->Class[ClassId] == NULL);
64 Templates->Class[ClassId] = Class;
65
66 } /* AddAdaptedClass */
67
68
69 /*---------------------------------------------------------------------------*/
FreeTempConfig(TEMP_CONFIG Config)70 void FreeTempConfig(TEMP_CONFIG Config) {
71 /*
72 ** Parameters:
73 ** Config config to be freed
74 ** Globals: none
75 ** Operation: This routine frees all memory consumed by a temporary
76 ** configuration.
77 ** Return: none
78 ** Exceptions: none
79 ** History: Thu Mar 14 13:34:23 1991, DSJ, Created.
80 */
81 assert (Config != NULL);
82
83 destroy_nodes (Config->ContextsSeen, memfree);
84 FreeBitVector (Config->Protos);
85 free_struct (Config, sizeof (TEMP_CONFIG_STRUCT), "TEMP_CONFIG_STRUCT");
86
87 } /* FreeTempConfig */
88
89
90 /*---------------------------------------------------------------------------*/
FreeTempProto(void * arg)91 void FreeTempProto(void *arg) {
92 PROTO proto = (PROTO) arg;
93
94 free_struct (proto, sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT");
95 }
96
97
98 /*---------------------------------------------------------------------------*/
NewAdaptedClass()99 ADAPT_CLASS NewAdaptedClass() {
100 /*
101 ** Parameters: none
102 ** Globals: none
103 ** Operation: This operation allocates and initializes a new adapted
104 ** class data structure and returns a ptr to it.
105 ** Return: Ptr to new class data structure.
106 ** Exceptions: none
107 ** History: Thu Mar 14 12:58:13 1991, DSJ, Created.
108 */
109 ADAPT_CLASS Class;
110 int i;
111
112 Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
113 Class->NumPermConfigs = 0;
114 Class->TempProtos = NIL;
115
116 Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
117 Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
118 zero_all_bits (Class->PermProtos, WordsInVectorOfSize (MAX_NUM_PROTOS));
119 zero_all_bits (Class->PermConfigs, WordsInVectorOfSize (MAX_NUM_CONFIGS));
120
121 for (i = 0; i < MAX_NUM_CONFIGS; i++)
122 TempConfigFor (Class, i) = NULL;
123
124 return (Class);
125
126 } /* NewAdaptedClass */
127
128
129 /*-------------------------------------------------------------------------*/
free_adapted_class(ADAPT_CLASS adapt_class)130 void free_adapted_class(ADAPT_CLASS adapt_class) {
131 int i;
132
133 for (i = 0; i < MAX_NUM_CONFIGS; i++) {
134 if (ConfigIsPermanent (adapt_class, i)
135 && PermConfigFor (adapt_class, i) != NULL)
136 Efree (PermConfigFor (adapt_class, i));
137 else if (!ConfigIsPermanent (adapt_class, i)
138 && TempConfigFor (adapt_class, i) != NULL)
139 FreeTempConfig (TempConfigFor (adapt_class, i));
140 }
141 FreeBitVector (adapt_class->PermProtos);
142 FreeBitVector (adapt_class->PermConfigs);
143 destroy_nodes (adapt_class->TempProtos, FreeTempProto);
144 Efree(adapt_class);
145 }
146
147
148 /*---------------------------------------------------------------------------*/
149 namespace tesseract {
NewAdaptedTemplates(bool InitFromUnicharset)150 ADAPT_TEMPLATES Classify::NewAdaptedTemplates(bool InitFromUnicharset) {
151 /*
152 ** Parameters:
153 ** PopulateFromUnicharset if true, add an empty class for
154 ** each char in unicharset to the
155 ** newly created templates
156 ** Globals: none
157 ** Operation: Allocates memory for adapted tempates.
158 ** Return: Ptr to new adapted templates.
159 ** Exceptions: none
160 ** History: Fri Mar 8 10:15:28 1991, DSJ, Created.
161 */
162 ADAPT_TEMPLATES Templates;
163 int i;
164
165 Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
166
167 Templates->Templates = NewIntTemplates ();
168 Templates->NumPermClasses = 0;
169 Templates->NumNonEmptyClasses = 0;
170
171 /* Insert an empty class for each unichar id in unicharset */
172 for (i = 0; i < MAX_NUM_CLASSES; i++) {
173 Templates->Class[i] = NULL;
174 if (InitFromUnicharset && i < unicharset.size()) {
175 AddAdaptedClass(Templates, NewAdaptedClass(), i);
176 }
177 }
178
179 return (Templates);
180
181 } /* NewAdaptedTemplates */
182 } // namespace tesseract
183
184 /*----------------------------------------------------------------------------*/
free_adapted_templates(ADAPT_TEMPLATES templates)185 void free_adapted_templates(ADAPT_TEMPLATES templates) {
186
187 if (templates != NULL) {
188 int i;
189 for (i = 0; i < (templates->Templates)->NumClasses; i++)
190 free_adapted_class (templates->Class[i]);
191 free_int_templates (templates->Templates);
192 Efree(templates);
193 }
194 }
195
196
197 /*---------------------------------------------------------------------------*/
NewTempConfig(int MaxProtoId)198 TEMP_CONFIG NewTempConfig(int MaxProtoId) {
199 /*
200 ** Parameters:
201 ** MaxProtoId max id of any proto in new config
202 ** Globals: none
203 ** Operation: This routine allocates and returns a new temporary
204 ** config.
205 ** Return: Ptr to new temp config.
206 ** Exceptions: none
207 ** History: Thu Mar 14 13:28:21 1991, DSJ, Created.
208 */
209 TEMP_CONFIG Config;
210 int NumProtos = MaxProtoId + 1;
211
212 Config =
213 (TEMP_CONFIG) alloc_struct (sizeof (TEMP_CONFIG_STRUCT),
214 "TEMP_CONFIG_STRUCT");
215 Config->Protos = NewBitVector (NumProtos);
216
217 Config->NumTimesSeen = 1;
218 Config->MaxProtoId = MaxProtoId;
219 Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos);
220 Config->ContextsSeen = NIL;
221 zero_all_bits (Config->Protos, Config->ProtoVectorSize);
222
223 return (Config);
224
225 } /* NewTempConfig */
226
227
228 /*---------------------------------------------------------------------------*/
NewTempProto()229 TEMP_PROTO NewTempProto() {
230 /*
231 ** Parameters: none
232 ** Globals: none
233 ** Operation: This routine allocates and returns a new temporary proto.
234 ** Return: Ptr to new temporary proto.
235 ** Exceptions: none
236 ** History: Thu Mar 14 13:31:31 1991, DSJ, Created.
237 */
238 return ((TEMP_PROTO)
239 alloc_struct (sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT"));
240 } /* NewTempProto */
241
242
243 /*---------------------------------------------------------------------------*/
244 namespace tesseract {
PrintAdaptedTemplates(FILE * File,ADAPT_TEMPLATES Templates)245 void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) {
246 /*
247 ** Parameters:
248 ** File open text file to print Templates to
249 ** Templates adapted templates to print to File
250 ** Globals: none
251 ** Operation: This routine prints a summary of the adapted templates
252 ** in Templates to File.
253 ** Return: none
254 ** Exceptions: none
255 ** History: Wed Mar 20 13:35:29 1991, DSJ, Created.
256 */
257 int i;
258 INT_CLASS IClass;
259 ADAPT_CLASS AClass;
260
261 #ifndef SECURE_NAMES
262 fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
263 fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
264 Templates->NumNonEmptyClasses, Templates->NumPermClasses);
265 fprintf (File, " Id NC NPC NP NPP\n");
266 fprintf (File, "------------------------\n");
267
268 for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
269 IClass = Templates->Templates->Class[i];
270 AClass = Templates->Class[i];
271 if (!IsEmptyAdaptedClass (AClass)) {
272 fprintf (File, "%5d %s %3d %3d %3d %3d\n",
273 i, unicharset.id_to_unichar(i),
274 IClass->NumConfigs, AClass->NumPermConfigs,
275 IClass->NumProtos,
276 IClass->NumProtos - count (AClass->TempProtos));
277 }
278 }
279 #endif
280 fprintf (File, "\n");
281
282 } /* PrintAdaptedTemplates */
283 } // namespace tesseract
284
285
286 /*---------------------------------------------------------------------------*/
ReadAdaptedClass(FILE * File)287 ADAPT_CLASS ReadAdaptedClass(FILE *File) {
288 /*
289 ** Parameters:
290 ** File open file to read adapted class from
291 ** Globals: none
292 ** Operation: Read an adapted class description from File and return
293 ** a ptr to the adapted class.
294 ** Return: Ptr to new adapted class.
295 ** Exceptions: none
296 ** History: Tue Mar 19 14:11:01 1991, DSJ, Created.
297 */
298 int NumTempProtos;
299 int NumConfigs;
300 int i;
301 ADAPT_CLASS Class;
302 TEMP_PROTO TempProto;
303
304 /* first read high level adapted class structure */
305 Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
306 fread ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
307
308 /* then read in the definitions of the permanent protos and configs */
309 Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
310 Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
311 fread ((char *) Class->PermProtos, sizeof (uinT32),
312 WordsInVectorOfSize (MAX_NUM_PROTOS), File);
313 fread ((char *) Class->PermConfigs, sizeof (uinT32),
314 WordsInVectorOfSize (MAX_NUM_CONFIGS), File);
315
316 /* then read in the list of temporary protos */
317 fread ((char *) &NumTempProtos, sizeof (int), 1, File);
318 Class->TempProtos = NIL;
319 for (i = 0; i < NumTempProtos; i++) {
320 TempProto =
321 (TEMP_PROTO) alloc_struct (sizeof (TEMP_PROTO_STRUCT),
322 "TEMP_PROTO_STRUCT");
323 fread ((char *) TempProto, sizeof (TEMP_PROTO_STRUCT), 1, File);
324 Class->TempProtos = push_last (Class->TempProtos, TempProto);
325 }
326
327 /* then read in the adapted configs */
328 fread ((char *) &NumConfigs, sizeof (int), 1, File);
329 for (i = 0; i < NumConfigs; i++)
330 if (test_bit (Class->PermConfigs, i))
331 Class->Config[i].Perm = ReadPermConfig (File);
332 else
333 Class->Config[i].Temp = ReadTempConfig (File);
334
335 return (Class);
336
337 } /* ReadAdaptedClass */
338
339
340 /*---------------------------------------------------------------------------*/
341 namespace tesseract {
ReadAdaptedTemplates(FILE * File)342 ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(FILE *File) {
343 /*
344 ** Parameters:
345 ** File open text file to read adapted templates from
346 ** Globals: none
347 ** Operation: Read a set of adapted templates from File and return
348 ** a ptr to the templates.
349 ** Return: Ptr to adapted templates read from File.
350 ** Exceptions: none
351 ** History: Mon Mar 18 15:18:10 1991, DSJ, Created.
352 */
353 int i;
354 ADAPT_TEMPLATES Templates;
355
356 /* first read the high level adaptive template struct */
357 Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
358 fread ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
359
360 /* then read in the basic integer templates */
361 Templates->Templates = ReadIntTemplates (File);
362
363 /* then read in the adaptive info for each class */
364 for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
365 Templates->Class[i] = ReadAdaptedClass (File);
366 }
367 return (Templates);
368
369 } /* ReadAdaptedTemplates */
370 } // namespace tesseract
371
372
373 /*---------------------------------------------------------------------------*/
ReadPermConfig(FILE * File)374 PERM_CONFIG ReadPermConfig(FILE *File) {
375 /*
376 ** Parameters:
377 ** File open file to read permanent config from
378 ** Globals: none
379 ** Operation: Read a permanent configuration description from File
380 ** and return a ptr to it.
381 ** Return: Ptr to new permanent configuration description.
382 ** Exceptions: none
383 ** History: Tue Mar 19 14:25:26 1991, DSJ, Created.
384 */
385 PERM_CONFIG Config;
386 uinT8 NumAmbigs;
387
388 fread ((char *) &NumAmbigs, sizeof (uinT8), 1, File);
389 Config = (PERM_CONFIG) Emalloc (sizeof (UNICHAR_ID) * (NumAmbigs + 1));
390 fread (Config, sizeof (UNICHAR_ID), NumAmbigs, File);
391 Config[NumAmbigs] = -1;
392
393 return (Config);
394
395 } /* ReadPermConfig */
396
397
398 /*---------------------------------------------------------------------------*/
ReadTempConfig(FILE * File)399 TEMP_CONFIG ReadTempConfig(FILE *File) {
400 /*
401 ** Parameters:
402 ** File open file to read temporary config from
403 ** Globals: none
404 ** Operation: Read a temporary configuration description from File
405 ** and return a ptr to it.
406 ** Return: Ptr to new temporary configuration description.
407 ** Exceptions: none
408 ** History: Tue Mar 19 14:29:59 1991, DSJ, Created.
409 */
410 TEMP_CONFIG Config;
411
412 Config =
413 (TEMP_CONFIG) alloc_struct (sizeof (TEMP_CONFIG_STRUCT),
414 "TEMP_CONFIG_STRUCT");
415 fread ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
416
417 Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
418 fread ((char *) Config->Protos, sizeof (uinT32),
419 Config->ProtoVectorSize, File);
420
421 return (Config);
422
423 } /* ReadTempConfig */
424
425
426 /*---------------------------------------------------------------------------*/
WriteAdaptedClass(FILE * File,ADAPT_CLASS Class,int NumConfigs)427 void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) {
428 /*
429 ** Parameters:
430 ** File open file to write Class to
431 ** Class adapted class to write to File
432 ** NumConfigs number of configs in Class
433 ** Globals: none
434 ** Operation: This routine writes a binary representation of Class
435 ** to File.
436 ** Return: none
437 ** Exceptions: none
438 ** History: Tue Mar 19 13:33:51 1991, DSJ, Created.
439 */
440 int NumTempProtos;
441 LIST TempProtos;
442 int i;
443
444 /* first write high level adapted class structure */
445 fwrite ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
446
447 /* then write out the definitions of the permanent protos and configs */
448 fwrite ((char *) Class->PermProtos, sizeof (uinT32),
449 WordsInVectorOfSize (MAX_NUM_PROTOS), File);
450 fwrite ((char *) Class->PermConfigs, sizeof (uinT32),
451 WordsInVectorOfSize (MAX_NUM_CONFIGS), File);
452
453 /* then write out the list of temporary protos */
454 NumTempProtos = count (Class->TempProtos);
455 fwrite ((char *) &NumTempProtos, sizeof (int), 1, File);
456 TempProtos = Class->TempProtos;
457 iterate (TempProtos) {
458 void* proto = first_node(TempProtos);
459 fwrite ((char *) proto, sizeof (TEMP_PROTO_STRUCT), 1, File);
460 }
461
462 /* then write out the adapted configs */
463 fwrite ((char *) &NumConfigs, sizeof (int), 1, File);
464 for (i = 0; i < NumConfigs; i++)
465 if (test_bit (Class->PermConfigs, i))
466 WritePermConfig (File, Class->Config[i].Perm);
467 else
468 WriteTempConfig (File, Class->Config[i].Temp);
469
470 } /* WriteAdaptedClass */
471
472
473 /*---------------------------------------------------------------------------*/
474 namespace tesseract {
WriteAdaptedTemplates(FILE * File,ADAPT_TEMPLATES Templates)475 void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) {
476 /*
477 ** Parameters:
478 ** File open text file to write Templates to
479 ** Templates set of adapted templates to write to File
480 ** Globals: none
481 ** Operation: This routine saves Templates to File in a binary format.
482 ** Return: none
483 ** Exceptions: none
484 ** History: Mon Mar 18 15:07:32 1991, DSJ, Created.
485 */
486 int i;
487
488 /* first write the high level adaptive template struct */
489 fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
490
491 /* then write out the basic integer templates */
492 WriteIntTemplates (File, Templates->Templates, unicharset);
493
494 /* then write out the adaptive info for each class */
495 for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
496 WriteAdaptedClass (File, Templates->Class[i],
497 Templates->Templates->Class[i]->NumConfigs);
498 }
499 } /* WriteAdaptedTemplates */
500 } // namespace tesseract
501
502
503 /*---------------------------------------------------------------------------*/
WritePermConfig(FILE * File,PERM_CONFIG Config)504 void WritePermConfig(FILE *File, PERM_CONFIG Config) {
505 /*
506 ** Parameters:
507 ** File open file to write Config to
508 ** Config permanent config to write to File
509 ** Globals: none
510 ** Operation: This routine writes a binary representation of a
511 ** permanent configuration to File.
512 ** Return: none
513 ** Exceptions: none
514 ** History: Tue Mar 19 13:55:44 1991, DSJ, Created.
515 */
516 uinT8 NumAmbigs = 0;
517
518 assert (Config != NULL);
519 while (Config[NumAmbigs] > 0)
520 ++NumAmbigs;
521
522 fwrite ((char *) &NumAmbigs, sizeof (uinT8), 1, File);
523 fwrite (Config, sizeof (UNICHAR_ID), NumAmbigs, File);
524
525 } /* WritePermConfig */
526
527
528 /*---------------------------------------------------------------------------*/
WriteTempConfig(FILE * File,TEMP_CONFIG Config)529 void WriteTempConfig(FILE *File, TEMP_CONFIG Config) {
530 /*
531 ** Parameters:
532 ** File open file to write Config to
533 ** Config temporary config to write to File
534 ** Globals: none
535 ** Operation: This routine writes a binary representation of a
536 ** temporary configuration to File.
537 ** Return: none
538 ** Exceptions: none
539 ** History: Tue Mar 19 14:00:28 1991, DSJ, Created.
540 */
541 assert (Config != NULL);
542 /* contexts not yet implemented */
543 assert (Config->ContextsSeen == NULL);
544
545 fwrite ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
546 fwrite ((char *) Config->Protos, sizeof (uinT32),
547 Config->ProtoVectorSize, File);
548
549 } /* WriteTempConfig */
550