• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  **	Filename:    adaptive.c
3  **	Purpose:     Adaptive matcher.
4  **	Author:      Dan Johnson
5  **	History:     Fri Mar  8 10:00:21 1991, DSJ, Created.
6  **
7  **	(c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 
19 /**----------------------------------------------------------------------------
20           Include Files and Type Defines
21 ----------------------------------------------------------------------------**/
22 #include "adaptive.h"
23 #include "emalloc.h"
24 #include "freelist.h"
25 #include "globals.h"
26 #include "classify.h"
27 
28 #ifdef __UNIX__
29 #include <assert.h>
30 #endif
31 #include <stdio.h>
32 
33 /**----------------------------------------------------------------------------
34               Public Code
35 ----------------------------------------------------------------------------**/
36 /*---------------------------------------------------------------------------*/
AddAdaptedClass(ADAPT_TEMPLATES Templates,ADAPT_CLASS Class,CLASS_ID ClassId)37 void AddAdaptedClass(ADAPT_TEMPLATES Templates,
38                      ADAPT_CLASS Class,
39                      CLASS_ID ClassId) {
40 /*
41  **	Parameters:
42  **		Templates	set of templates to add new class to
43  **		Class		new class to add to templates
44  **		ClassId		class id to associate with new class
45  **	Globals: none
46  **	Operation: This routine adds a new adapted class to an existing
47  **		set of adapted templates.
48  **	Return: none
49  **	Exceptions: none
50  **	History: Thu Mar 14 13:06:09 1991, DSJ, Created.
51  */
52   INT_CLASS IntClass;
53 
54   assert (Templates != NULL);
55   assert (Class != NULL);
56   assert (LegalClassId (ClassId));
57   assert (UnusedClassIdIn (Templates->Templates, ClassId));
58   assert (Class->NumPermConfigs == 0);
59 
60   IntClass = NewIntClass (1, 1);
61   AddIntClass (Templates->Templates, ClassId, IntClass);
62 
63   assert (Templates->Class[ClassId] == NULL);
64   Templates->Class[ClassId] = Class;
65 
66 }                                /* AddAdaptedClass */
67 
68 
69 /*---------------------------------------------------------------------------*/
FreeTempConfig(TEMP_CONFIG Config)70 void FreeTempConfig(TEMP_CONFIG Config) {
71 /*
72  **	Parameters:
73  **		Config	config to be freed
74  **	Globals: none
75  **	Operation: This routine frees all memory consumed by a temporary
76  **		configuration.
77  **	Return: none
78  **	Exceptions: none
79  **	History: Thu Mar 14 13:34:23 1991, DSJ, Created.
80  */
81   assert (Config != NULL);
82 
83   destroy_nodes (Config->ContextsSeen, memfree);
84   FreeBitVector (Config->Protos);
85   free_struct (Config, sizeof (TEMP_CONFIG_STRUCT), "TEMP_CONFIG_STRUCT");
86 
87 }                                /* FreeTempConfig */
88 
89 
90 /*---------------------------------------------------------------------------*/
FreeTempProto(void * arg)91 void FreeTempProto(void *arg) {
92   PROTO proto = (PROTO) arg;
93 
94   free_struct (proto, sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT");
95 }
96 
97 
98 /*---------------------------------------------------------------------------*/
NewAdaptedClass()99 ADAPT_CLASS NewAdaptedClass() {
100 /*
101  **	Parameters: none
102  **	Globals: none
103  **	Operation: This operation allocates and initializes a new adapted
104  **		class data structure and returns a ptr to it.
105  **	Return: Ptr to new class data structure.
106  **	Exceptions: none
107  **	History: Thu Mar 14 12:58:13 1991, DSJ, Created.
108  */
109   ADAPT_CLASS Class;
110   int i;
111 
112   Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
113   Class->NumPermConfigs = 0;
114   Class->TempProtos = NIL;
115 
116   Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
117   Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
118   zero_all_bits (Class->PermProtos, WordsInVectorOfSize (MAX_NUM_PROTOS));
119   zero_all_bits (Class->PermConfigs, WordsInVectorOfSize (MAX_NUM_CONFIGS));
120 
121   for (i = 0; i < MAX_NUM_CONFIGS; i++)
122     TempConfigFor (Class, i) = NULL;
123 
124   return (Class);
125 
126 }                                /* NewAdaptedClass */
127 
128 
129 /*-------------------------------------------------------------------------*/
free_adapted_class(ADAPT_CLASS adapt_class)130 void free_adapted_class(ADAPT_CLASS adapt_class) {
131   int i;
132 
133   for (i = 0; i < MAX_NUM_CONFIGS; i++) {
134     if (ConfigIsPermanent (adapt_class, i)
135       && PermConfigFor (adapt_class, i) != NULL)
136       Efree (PermConfigFor (adapt_class, i));
137     else if (!ConfigIsPermanent (adapt_class, i)
138       && TempConfigFor (adapt_class, i) != NULL)
139       FreeTempConfig (TempConfigFor (adapt_class, i));
140   }
141   FreeBitVector (adapt_class->PermProtos);
142   FreeBitVector (adapt_class->PermConfigs);
143   destroy_nodes (adapt_class->TempProtos, FreeTempProto);
144   Efree(adapt_class);
145 }
146 
147 
148 /*---------------------------------------------------------------------------*/
149 namespace tesseract {
NewAdaptedTemplates(bool InitFromUnicharset)150 ADAPT_TEMPLATES Classify::NewAdaptedTemplates(bool InitFromUnicharset) {
151 /*
152  **	Parameters:
153  **		PopulateFromUnicharset      if true, add an empty class for
154  **                                         each char in unicharset to the
155  **                                         newly created templates
156  **	Globals: none
157  **	Operation: Allocates memory for adapted tempates.
158  **	Return: Ptr to new adapted templates.
159  **	Exceptions: none
160  **	History: Fri Mar  8 10:15:28 1991, DSJ, Created.
161  */
162   ADAPT_TEMPLATES Templates;
163   int i;
164 
165   Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
166 
167   Templates->Templates = NewIntTemplates ();
168   Templates->NumPermClasses = 0;
169   Templates->NumNonEmptyClasses = 0;
170 
171   /* Insert an empty class for each unichar id in unicharset */
172   for (i = 0; i < MAX_NUM_CLASSES; i++) {
173     Templates->Class[i] = NULL;
174     if (InitFromUnicharset && i < unicharset.size()) {
175       AddAdaptedClass(Templates, NewAdaptedClass(), i);
176     }
177   }
178 
179   return (Templates);
180 
181 }                                /* NewAdaptedTemplates */
182 }  // namespace tesseract
183 
184 /*----------------------------------------------------------------------------*/
free_adapted_templates(ADAPT_TEMPLATES templates)185 void free_adapted_templates(ADAPT_TEMPLATES templates) {
186 
187   if (templates != NULL) {
188     int i;
189     for (i = 0; i < (templates->Templates)->NumClasses; i++)
190       free_adapted_class (templates->Class[i]);
191     free_int_templates (templates->Templates);
192     Efree(templates);
193   }
194 }
195 
196 
197 /*---------------------------------------------------------------------------*/
NewTempConfig(int MaxProtoId)198 TEMP_CONFIG NewTempConfig(int MaxProtoId) {
199 /*
200  **	Parameters:
201  **		MaxProtoId	max id of any proto in new config
202  **	Globals: none
203  **	Operation: This routine allocates and returns a new temporary
204  **		config.
205  **	Return: Ptr to new temp config.
206  **	Exceptions: none
207  **	History: Thu Mar 14 13:28:21 1991, DSJ, Created.
208  */
209   TEMP_CONFIG Config;
210   int NumProtos = MaxProtoId + 1;
211 
212   Config =
213     (TEMP_CONFIG) alloc_struct (sizeof (TEMP_CONFIG_STRUCT),
214     "TEMP_CONFIG_STRUCT");
215   Config->Protos = NewBitVector (NumProtos);
216 
217   Config->NumTimesSeen = 1;
218   Config->MaxProtoId = MaxProtoId;
219   Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos);
220   Config->ContextsSeen = NIL;
221   zero_all_bits (Config->Protos, Config->ProtoVectorSize);
222 
223   return (Config);
224 
225 }                                /* NewTempConfig */
226 
227 
228 /*---------------------------------------------------------------------------*/
NewTempProto()229 TEMP_PROTO NewTempProto() {
230 /*
231  **	Parameters: none
232  **	Globals: none
233  **	Operation: This routine allocates and returns a new temporary proto.
234  **	Return: Ptr to new temporary proto.
235  **	Exceptions: none
236  **	History: Thu Mar 14 13:31:31 1991, DSJ, Created.
237  */
238   return ((TEMP_PROTO)
239     alloc_struct (sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT"));
240 }                                /* NewTempProto */
241 
242 
243 /*---------------------------------------------------------------------------*/
244 namespace tesseract {
PrintAdaptedTemplates(FILE * File,ADAPT_TEMPLATES Templates)245 void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) {
246 /*
247  **	Parameters:
248  **		File		open text file to print Templates to
249  **		Templates	adapted templates to print to File
250  **	Globals: none
251  **	Operation: This routine prints a summary of the adapted templates
252  **		in Templates to File.
253  **	Return: none
254  **	Exceptions: none
255  **	History: Wed Mar 20 13:35:29 1991, DSJ, Created.
256  */
257   int i;
258   INT_CLASS IClass;
259   ADAPT_CLASS AClass;
260 
261   #ifndef SECURE_NAMES
262   fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
263   fprintf (File, "Num classes = %d;  Num permanent classes = %d\n\n",
264            Templates->NumNonEmptyClasses, Templates->NumPermClasses);
265   fprintf (File, "   Id  NC NPC  NP NPP\n");
266   fprintf (File, "------------------------\n");
267 
268   for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
269     IClass = Templates->Templates->Class[i];
270     AClass = Templates->Class[i];
271     if (!IsEmptyAdaptedClass (AClass)) {
272       fprintf (File, "%5d  %s %3d %3d %3d %3d\n",
273         i, unicharset.id_to_unichar(i),
274       IClass->NumConfigs, AClass->NumPermConfigs,
275       IClass->NumProtos,
276       IClass->NumProtos - count (AClass->TempProtos));
277     }
278   }
279   #endif
280   fprintf (File, "\n");
281 
282 }                                /* PrintAdaptedTemplates */
283 }  // namespace tesseract
284 
285 
286 /*---------------------------------------------------------------------------*/
ReadAdaptedClass(FILE * File)287 ADAPT_CLASS ReadAdaptedClass(FILE *File) {
288 /*
289  **	Parameters:
290  **		File	open file to read adapted class from
291  **	Globals: none
292  **	Operation: Read an adapted class description from File and return
293  **		a ptr to the adapted class.
294  **	Return: Ptr to new adapted class.
295  **	Exceptions: none
296  **	History: Tue Mar 19 14:11:01 1991, DSJ, Created.
297  */
298   int NumTempProtos;
299   int NumConfigs;
300   int i;
301   ADAPT_CLASS Class;
302   TEMP_PROTO TempProto;
303 
304   /* first read high level adapted class structure */
305   Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
306   fread ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
307 
308   /* then read in the definitions of the permanent protos and configs */
309   Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
310   Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
311   fread ((char *) Class->PermProtos, sizeof (uinT32),
312     WordsInVectorOfSize (MAX_NUM_PROTOS), File);
313   fread ((char *) Class->PermConfigs, sizeof (uinT32),
314     WordsInVectorOfSize (MAX_NUM_CONFIGS), File);
315 
316   /* then read in the list of temporary protos */
317   fread ((char *) &NumTempProtos, sizeof (int), 1, File);
318   Class->TempProtos = NIL;
319   for (i = 0; i < NumTempProtos; i++) {
320     TempProto =
321       (TEMP_PROTO) alloc_struct (sizeof (TEMP_PROTO_STRUCT),
322       "TEMP_PROTO_STRUCT");
323     fread ((char *) TempProto, sizeof (TEMP_PROTO_STRUCT), 1, File);
324     Class->TempProtos = push_last (Class->TempProtos, TempProto);
325   }
326 
327   /* then read in the adapted configs */
328   fread ((char *) &NumConfigs, sizeof (int), 1, File);
329   for (i = 0; i < NumConfigs; i++)
330     if (test_bit (Class->PermConfigs, i))
331       Class->Config[i].Perm = ReadPermConfig (File);
332     else
333       Class->Config[i].Temp = ReadTempConfig (File);
334 
335   return (Class);
336 
337 }                                /* ReadAdaptedClass */
338 
339 
340 /*---------------------------------------------------------------------------*/
341 namespace tesseract {
ReadAdaptedTemplates(FILE * File)342 ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(FILE *File) {
343 /*
344  **	Parameters:
345  **		File	open text file to read adapted templates from
346  **	Globals: none
347  **	Operation: Read a set of adapted templates from File and return
348  **		a ptr to the templates.
349  **	Return: Ptr to adapted templates read from File.
350  **	Exceptions: none
351  **	History: Mon Mar 18 15:18:10 1991, DSJ, Created.
352  */
353   int i;
354   ADAPT_TEMPLATES Templates;
355 
356   /* first read the high level adaptive template struct */
357   Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
358   fread ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
359 
360   /* then read in the basic integer templates */
361   Templates->Templates = ReadIntTemplates (File);
362 
363   /* then read in the adaptive info for each class */
364   for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
365     Templates->Class[i] = ReadAdaptedClass (File);
366   }
367   return (Templates);
368 
369 }                                /* ReadAdaptedTemplates */
370 }  // namespace tesseract
371 
372 
373 /*---------------------------------------------------------------------------*/
ReadPermConfig(FILE * File)374 PERM_CONFIG ReadPermConfig(FILE *File) {
375 /*
376  **	Parameters:
377  **		File	open file to read permanent config from
378  **	Globals: none
379  **	Operation: Read a permanent configuration description from File
380  **		and return a ptr to it.
381  **	Return: Ptr to new permanent configuration description.
382  **	Exceptions: none
383  **	History: Tue Mar 19 14:25:26 1991, DSJ, Created.
384  */
385   PERM_CONFIG Config;
386   uinT8 NumAmbigs;
387 
388   fread ((char *) &NumAmbigs, sizeof (uinT8), 1, File);
389   Config = (PERM_CONFIG) Emalloc (sizeof (UNICHAR_ID) * (NumAmbigs + 1));
390   fread (Config, sizeof (UNICHAR_ID), NumAmbigs, File);
391   Config[NumAmbigs] = -1;
392 
393   return (Config);
394 
395 }                                /* ReadPermConfig */
396 
397 
398 /*---------------------------------------------------------------------------*/
ReadTempConfig(FILE * File)399 TEMP_CONFIG ReadTempConfig(FILE *File) {
400 /*
401  **	Parameters:
402  **		File	open file to read temporary config from
403  **	Globals: none
404  **	Operation:  Read a temporary configuration description from File
405  **		and return a ptr to it.
406  **	Return: Ptr to new temporary configuration description.
407  **	Exceptions: none
408  **	History: Tue Mar 19 14:29:59 1991, DSJ, Created.
409  */
410   TEMP_CONFIG Config;
411 
412   Config =
413     (TEMP_CONFIG) alloc_struct (sizeof (TEMP_CONFIG_STRUCT),
414     "TEMP_CONFIG_STRUCT");
415   fread ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
416 
417   Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
418   fread ((char *) Config->Protos, sizeof (uinT32),
419     Config->ProtoVectorSize, File);
420 
421   return (Config);
422 
423 }                                /* ReadTempConfig */
424 
425 
426 /*---------------------------------------------------------------------------*/
WriteAdaptedClass(FILE * File,ADAPT_CLASS Class,int NumConfigs)427 void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) {
428 /*
429  **	Parameters:
430  **		File		open file to write Class to
431  **		Class		adapted class to write to File
432  **		NumConfigs	number of configs in Class
433  **	Globals: none
434  **	Operation: This routine writes a binary representation of Class
435  **		to File.
436  **	Return: none
437  **	Exceptions: none
438  **	History: Tue Mar 19 13:33:51 1991, DSJ, Created.
439  */
440   int NumTempProtos;
441   LIST TempProtos;
442   int i;
443 
444   /* first write high level adapted class structure */
445   fwrite ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
446 
447   /* then write out the definitions of the permanent protos and configs */
448   fwrite ((char *) Class->PermProtos, sizeof (uinT32),
449     WordsInVectorOfSize (MAX_NUM_PROTOS), File);
450   fwrite ((char *) Class->PermConfigs, sizeof (uinT32),
451     WordsInVectorOfSize (MAX_NUM_CONFIGS), File);
452 
453   /* then write out the list of temporary protos */
454   NumTempProtos = count (Class->TempProtos);
455   fwrite ((char *) &NumTempProtos, sizeof (int), 1, File);
456   TempProtos = Class->TempProtos;
457   iterate (TempProtos) {
458     void* proto = first_node(TempProtos);
459     fwrite ((char *) proto, sizeof (TEMP_PROTO_STRUCT), 1, File);
460   }
461 
462   /* then write out the adapted configs */
463   fwrite ((char *) &NumConfigs, sizeof (int), 1, File);
464   for (i = 0; i < NumConfigs; i++)
465     if (test_bit (Class->PermConfigs, i))
466       WritePermConfig (File, Class->Config[i].Perm);
467     else
468       WriteTempConfig (File, Class->Config[i].Temp);
469 
470 }                                /* WriteAdaptedClass */
471 
472 
473 /*---------------------------------------------------------------------------*/
474 namespace tesseract {
WriteAdaptedTemplates(FILE * File,ADAPT_TEMPLATES Templates)475 void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) {
476 /*
477  **	Parameters:
478  **		File		open text file to write Templates to
479  **		Templates	set of adapted templates to write to File
480  **	Globals: none
481  **	Operation: This routine saves Templates to File in a binary format.
482  **	Return: none
483  **	Exceptions: none
484  **	History: Mon Mar 18 15:07:32 1991, DSJ, Created.
485  */
486   int i;
487 
488   /* first write the high level adaptive template struct */
489   fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
490 
491   /* then write out the basic integer templates */
492   WriteIntTemplates (File, Templates->Templates, unicharset);
493 
494   /* then write out the adaptive info for each class */
495   for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
496     WriteAdaptedClass (File, Templates->Class[i],
497       Templates->Templates->Class[i]->NumConfigs);
498   }
499 }                                /* WriteAdaptedTemplates */
500 }  // namespace tesseract
501 
502 
503 /*---------------------------------------------------------------------------*/
WritePermConfig(FILE * File,PERM_CONFIG Config)504 void WritePermConfig(FILE *File, PERM_CONFIG Config) {
505 /*
506  **	Parameters:
507  **		File	open file to write Config to
508  **		Config	permanent config to write to File
509  **	Globals: none
510  **	Operation: This routine writes a binary representation of a
511  **		permanent configuration to File.
512  **	Return: none
513  **	Exceptions: none
514  **	History: Tue Mar 19 13:55:44 1991, DSJ, Created.
515  */
516   uinT8 NumAmbigs = 0;
517 
518   assert (Config != NULL);
519   while (Config[NumAmbigs] > 0)
520     ++NumAmbigs;
521 
522   fwrite ((char *) &NumAmbigs, sizeof (uinT8), 1, File);
523   fwrite (Config, sizeof (UNICHAR_ID), NumAmbigs, File);
524 
525 }                                /* WritePermConfig */
526 
527 
528 /*---------------------------------------------------------------------------*/
WriteTempConfig(FILE * File,TEMP_CONFIG Config)529 void WriteTempConfig(FILE *File, TEMP_CONFIG Config) {
530 /*
531  **	Parameters:
532  **		File	open file to write Config to
533  **		Config	temporary config to write to File
534  **	Globals: none
535  **	Operation: This routine writes a binary representation of a
536  **		temporary configuration to File.
537  **	Return: none
538  **	Exceptions: none
539  **	History: Tue Mar 19 14:00:28 1991, DSJ, Created.
540  */
541   assert (Config != NULL);
542                                  /* contexts not yet implemented */
543   assert (Config->ContextsSeen == NULL);
544 
545   fwrite ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
546   fwrite ((char *) Config->Protos, sizeof (uinT32),
547     Config->ProtoVectorSize, File);
548 
549 }                                /* WriteTempConfig */
550