• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File:        protos.c  (Formerly protos.c)
5  * Description:
6  * Author:       Mark Seaman, OCR Technology
7  * Created:      Fri Oct 16 14:37:00 1987
8  * Modified:     Mon Mar  4 14:51:24 1991 (Dan Johnson) danj@hpgrlj
9  * Language:     C
10  * Package:      N/A
11  * Status:       Reusable Software Component
12  *
13  * (c) Copyright 1987, Hewlett-Packard Company.
14  ** Licensed under the Apache License, Version 2.0 (the "License");
15  ** you may not use this file except in compliance with the License.
16  ** You may obtain a copy of the License at
17  ** http://www.apache.org/licenses/LICENSE-2.0
18  ** Unless required by applicable law or agreed to in writing, software
19  ** distributed under the License is distributed on an "AS IS" BASIS,
20  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  ** See the License for the specific language governing permissions and
22  ** limitations under the License.
23  *
24  *********************************************************************************/
25 /*----------------------------------------------------------------------
26               I n c l u d e s
27 ----------------------------------------------------------------------*/
28 #include "protos.h"
29 #include "const.h"
30 #include "emalloc.h"
31 #include "freelist.h"
32 #include "callcpp.h"
33 #include "tprintf.h"
34 #include "adaptmatch.h"
35 #include "scanutils.h"
36 #include "globals.h"
37 #include "classify.h"
38 #include "varable.h"
39 
40 #include <stdio.h>
41 #include <math.h>
42 
43 #define PROTO_INCREMENT   32
44 #define CONFIG_INCREMENT  16
45 
46 /*----------------------------------------------------------------------
47               V a r i a b l e s
48 ----------------------------------------------------------------------*/
49 CLASS_STRUCT TrainingData[NUMBER_OF_CLASSES];
50 
51 STRING_VAR(classify_training_file, "MicroFeatures", "Training file");
52 
53 /*----------------------------------------------------------------------
54               F u n c t i o n s
55 ----------------------------------------------------------------------*/
56 /**********************************************************************
57  * AddConfigToClass
58  *
59  * Add a new config to this class.  Malloc new space and copy the
60  * old configs if necessary.  Return the config id for the new config.
61  **********************************************************************/
AddConfigToClass(CLASS_TYPE Class)62 int AddConfigToClass(CLASS_TYPE Class) {
63   int NewNumConfigs;
64   int NewConfig;
65   int MaxNumProtos;
66   BIT_VECTOR Config;
67 
68   MaxNumProtos = Class->MaxNumProtos;
69 
70   if (Class->NumConfigs >= Class->MaxNumConfigs) {
71     /* add configs in CONFIG_INCREMENT chunks at a time */
72     NewNumConfigs = (((Class->MaxNumConfigs + CONFIG_INCREMENT) /
73       CONFIG_INCREMENT) * CONFIG_INCREMENT);
74 
75     Class->Configurations =
76       (CONFIGS) Erealloc (Class->Configurations,
77       sizeof (BIT_VECTOR) * NewNumConfigs);
78 
79     Class->MaxNumConfigs = NewNumConfigs;
80   }
81   NewConfig = Class->NumConfigs++;
82   Config = NewBitVector (MaxNumProtos);
83   Class->Configurations[NewConfig] = Config;
84   zero_all_bits (Config, WordsInVectorOfSize (MaxNumProtos));
85 
86   return (NewConfig);
87 }
88 
89 
90 /**********************************************************************
91  * AddProtoToClass
92  *
93  * Add a new proto to this class.  Malloc new space and copy the
94  * old protos if necessary.  Return the proto id for the new proto.
95  **********************************************************************/
AddProtoToClass(CLASS_TYPE Class)96 int AddProtoToClass(CLASS_TYPE Class) {
97   int i;
98   int Bit;
99   int NewNumProtos;
100   int NewProto;
101   BIT_VECTOR Config;
102 
103   if (Class->NumProtos >= Class->MaxNumProtos) {
104     /* add protos in PROTO_INCREMENT chunks at a time */
105     NewNumProtos = (((Class->MaxNumProtos + PROTO_INCREMENT) /
106       PROTO_INCREMENT) * PROTO_INCREMENT);
107 
108     Class->Prototypes = (PROTO) Erealloc (Class->Prototypes,
109       sizeof (PROTO_STRUCT) *
110       NewNumProtos);
111 
112     Class->MaxNumProtos = NewNumProtos;
113 
114     for (i = 0; i < Class->NumConfigs; i++) {
115       Config = Class->Configurations[i];
116       Class->Configurations[i] = ExpandBitVector (Config, NewNumProtos);
117 
118       for (Bit = Class->NumProtos; Bit < NewNumProtos; Bit++)
119         reset_bit(Config, Bit);
120     }
121   }
122   NewProto = Class->NumProtos++;
123   if (Class->NumProtos > MAX_NUM_PROTOS) {
124     tprintf("Ouch! number of protos = %d, vs max of %d!",
125             Class->NumProtos, MAX_NUM_PROTOS);
126   }
127   return (NewProto);
128 }
129 
130 
131 /**********************************************************************
132  * ClassConfigLength
133  *
134  * Return the length of all the protos in this class.
135  **********************************************************************/
ClassConfigLength(CLASS_TYPE Class,BIT_VECTOR Config)136 FLOAT32 ClassConfigLength(CLASS_TYPE Class, BIT_VECTOR Config) {
137   inT16 Pid;
138   FLOAT32 TotalLength = 0;
139 
140   for (Pid = 0; Pid < Class->NumProtos; Pid++) {
141     if (test_bit (Config, Pid)) {
142 
143       TotalLength += (ProtoIn (Class, Pid))->Length;
144     }
145   }
146   return (TotalLength);
147 }
148 
149 
150 /**********************************************************************
151  * ClassProtoLength
152  *
153  * Return the length of all the protos in this class.
154  **********************************************************************/
ClassProtoLength(CLASS_TYPE Class)155 FLOAT32 ClassProtoLength(CLASS_TYPE Class) {
156   inT16 Pid;
157   FLOAT32 TotalLength = 0;
158 
159   for (Pid = 0; Pid < Class->NumProtos; Pid++) {
160     TotalLength += (ProtoIn (Class, Pid))->Length;
161   }
162   return (TotalLength);
163 }
164 
165 
166 /**********************************************************************
167  * CopyProto
168  *
169  * Copy the first proto into the second.
170  **********************************************************************/
CopyProto(PROTO Src,PROTO Dest)171 void CopyProto(PROTO Src, PROTO Dest) {
172   Dest->X = Src->X;
173   Dest->Y = Src->Y;
174   Dest->Length = Src->Length;
175   Dest->Angle = Src->Angle;
176   Dest->A = Src->A;
177   Dest->B = Src->B;
178   Dest->C = Src->C;
179 }
180 
181 
182 /**********************************************************************
183  * FillABC
184  *
185  * Fill in Protos A, B, C fields based on the X, Y, Angle fields.
186  **********************************************************************/
FillABC(PROTO Proto)187 void FillABC(PROTO Proto) {
188   FLOAT32 Slope, Intercept, Normalizer;
189 
190   Slope = tan (Proto->Angle * 2.0 * PI);
191   Intercept = Proto->Y - Slope * Proto->X;
192   Normalizer = 1.0 / sqrt (Slope * Slope + 1.0);
193   Proto->A = Slope * Normalizer;
194   Proto->B = -Normalizer;
195   Proto->C = Intercept * Normalizer;
196 }
197 
198 
199 /**********************************************************************
200  * FreeClass
201  *
202  * Deallocate the memory consumed by the specified class.
203  **********************************************************************/
FreeClass(CLASS_TYPE Class)204 void FreeClass(CLASS_TYPE Class) {
205   if (Class) {
206     FreeClassFields(Class);
207     delete Class;
208   }
209 }
210 
211 
212 /**********************************************************************
213  * FreeClassFields
214  *
215  * Deallocate the memory consumed by subfields of the specified class.
216  **********************************************************************/
FreeClassFields(CLASS_TYPE Class)217 void FreeClassFields(CLASS_TYPE Class) {
218   int i;
219 
220   if (Class) {
221     if (Class->MaxNumProtos > 0)
222       memfree (Class->Prototypes);
223     if (Class->MaxNumConfigs > 0) {
224       for (i = 0; i < Class->NumConfigs; i++)
225         FreeBitVector (Class->Configurations[i]);
226       memfree (Class->Configurations);
227     }
228   }
229 }
230 
231 /**********************************************************************
232  * NewClass
233  *
234  * Allocate a new class with enough memory to hold the specified number
235  * of prototypes and configurations.
236  **********************************************************************/
NewClass(int NumProtos,int NumConfigs)237 CLASS_TYPE NewClass(int NumProtos, int NumConfigs) {
238   CLASS_TYPE Class;
239 
240   Class = new CLASS_STRUCT;
241 
242   if (NumProtos > 0)
243     Class->Prototypes = (PROTO) Emalloc (NumProtos * sizeof (PROTO_STRUCT));
244 
245   if (NumConfigs > 0)
246     Class->Configurations = (CONFIGS) Emalloc (NumConfigs *
247       sizeof (BIT_VECTOR));
248   Class->MaxNumProtos = NumProtos;
249   Class->MaxNumConfigs = NumConfigs;
250   Class->NumProtos = 0;
251   Class->NumConfigs = 0;
252   return (Class);
253 
254 }
255 
256 
257 /**********************************************************************
258  * PrintProtos
259  *
260  * Print the list of prototypes in this class type.
261  **********************************************************************/
PrintProtos(CLASS_TYPE Class)262 void PrintProtos(CLASS_TYPE Class) {
263   inT16 Pid;
264 
265   for (Pid = 0; Pid < Class->NumProtos; Pid++) {
266     cprintf ("Proto %d:\t", Pid);
267     PrintProto (ProtoIn (Class, Pid));
268     cprintf ("\t");
269     PrintProtoLine (ProtoIn (Class, Pid));
270     new_line();
271   }
272 }
273 
274 
275 /**********************************************************************
276  * ReadClassFile
277  *
278  * Read in the training data from a file.  All of the classes are read
279  * in.  The results are stored in the global variable, 'TrainingData'.
280  **********************************************************************/
281 namespace tesseract {
ReadClassFile()282 void Classify::ReadClassFile() {
283  FILE *File;
284  char TextLine[CHARS_PER_LINE];
285  char unichar[CHARS_PER_LINE];
286 
287  cprintf ("Reading training data from '%s' ...",
288           static_cast<STRING>(classify_training_file).string());
289  fflush(stdout);
290 
291  File = open_file(static_cast<STRING>(classify_training_file).string(), "r");
292  while (fgets (TextLine, CHARS_PER_LINE, File) != NULL) {
293 
294    sscanf(TextLine, "%s", unichar);
295    ReadClassFromFile (File, unicharset.unichar_to_id(unichar));
296    fgets(TextLine, CHARS_PER_LINE, File);
297    fgets(TextLine, CHARS_PER_LINE, File);
298  }
299  fclose(File);
300  new_line();
301 }
302 }  // namespace tesseract
303 
304 /**********************************************************************
305  * ReadClassFromFile
306  *
307  * Read in a class description (protos and configs) from a file.  Update
308  * the class structure record.
309  **********************************************************************/
ReadClassFromFile(FILE * File,UNICHAR_ID unichar_id)310 void ReadClassFromFile(FILE *File, UNICHAR_ID unichar_id) {
311   CLASS_TYPE Class;
312 
313   Class = &TrainingData[unichar_id];
314 
315   ReadProtos(File, Class);
316 
317   ReadConfigs(File, Class);
318 }
319 
320 /**********************************************************************
321  * ReadConfigs
322  *
323  * Read the prototype configurations for this class from a file.  Read
324  * the requested number of lines.
325  **********************************************************************/
ReadConfigs(register FILE * File,CLASS_TYPE Class)326 void ReadConfigs(register FILE *File, CLASS_TYPE Class) {
327   inT16 Cid;
328   register inT16 Wid;
329   register BIT_VECTOR ThisConfig;
330   int NumWords;
331   int NumConfigs;
332 
333   fscanf (File, "%d %d\n", &NumConfigs, &NumWords);
334   Class->NumConfigs = NumConfigs;
335   Class->MaxNumConfigs = NumConfigs;
336   Class->Configurations =
337     (CONFIGS) Emalloc (sizeof (BIT_VECTOR) * NumConfigs);
338   NumWords = WordsInVectorOfSize (Class->NumProtos);
339 
340   for (Cid = 0; Cid < NumConfigs; Cid++) {
341 
342     ThisConfig = NewBitVector (Class->NumProtos);
343     for (Wid = 0; Wid < NumWords; Wid++)
344       fscanf (File, "%x", &ThisConfig[Wid]);
345     Class->Configurations[Cid] = ThisConfig;
346   }
347 }
348 
349 
350 /**********************************************************************
351  * ReadProtos
352  *
353  * Read in all the prototype information from a file.  Read the number
354  * of lines requested.
355  **********************************************************************/
ReadProtos(register FILE * File,CLASS_TYPE Class)356 void ReadProtos(register FILE *File, CLASS_TYPE Class) {
357   register inT16 Pid;
358   register PROTO Proto;
359   int NumProtos;
360 
361   fscanf (File, "%d\n", &NumProtos);
362   Class->NumProtos = NumProtos;
363   Class->MaxNumProtos = NumProtos;
364   Class->Prototypes = (PROTO) Emalloc (sizeof (PROTO_STRUCT) * NumProtos);
365 
366   for (Pid = 0; Pid < NumProtos; Pid++) {
367     Proto = ProtoIn (Class, Pid);
368     fscanf (File, "%f %f %f %f %f %f %f\n",
369       &Proto->X,
370       &Proto->Y,
371       &Proto->Length,
372       &Proto->Angle,
373       &Proto->A,
374       &Proto->B, &Proto->C);
375   }
376 }
377 
378 
379 /**********************************************************************
380  * SplitProto
381  *
382  * Add a new proto to this class.  Malloc new space and copy the
383  * old protos if necessary.  Return the proto id for the new proto.
384  * Update all configurations so that each config which contained the
385  * specified old proto will also contain the new proto.  The caller
386  * is responsible for actually filling in the appropriate proto params.
387  **********************************************************************/
SplitProto(CLASS_TYPE Class,int OldPid)388 int SplitProto(CLASS_TYPE Class, int OldPid) {
389   int i;
390   int NewPid;
391   BIT_VECTOR Config;
392 
393   NewPid = AddProtoToClass (Class);
394 
395   for (i = 0; i < Class->NumConfigs; i++) {
396     Config = Class->Configurations[i];
397     if (test_bit (Config, OldPid))
398       SET_BIT(Config, NewPid);
399   }
400   return (NewPid);
401 }
402 
403 
404 /**********************************************************************
405  * WriteOldConfigFile
406  *
407  * Write the configs in the given class to the specified file in the
408  * old config format.
409  **********************************************************************/
WriteOldConfigFile(FILE * File,CLASS_TYPE Class)410 void WriteOldConfigFile(FILE *File, CLASS_TYPE Class) {
411   int Cid, Pid;
412   BIT_VECTOR Config;
413 
414   fprintf (File, "%d %d\n", Class->NumConfigs, Class->NumProtos);
415 
416   for (Cid = 0; Cid < Class->NumConfigs; Cid++) {
417     fprintf (File, "1 ");
418 
419     Config = Class->Configurations[Cid];
420 
421     for (Pid = 0; Pid < Class->NumProtos; Pid++) {
422       if (test_bit (Config, Pid))
423         fprintf (File, "1");
424       else
425         fprintf (File, "0");
426     }
427     fprintf (File, "\n");
428   }
429 }
430 
431 
432 /**********************************************************************
433  * WriteOldProtoFile
434  *
435  * Write the protos in the given class to the specified file in the
436  * old proto format.
437  **********************************************************************/
WriteOldProtoFile(FILE * File,CLASS_TYPE Class)438 void WriteOldProtoFile(FILE *File, CLASS_TYPE Class) {
439   int Pid;
440   PROTO Proto;
441 
442   /* print old header */
443   fprintf (File, "6\n");
444   fprintf (File, "linear   essential      -0.500000   0.500000\n");
445   fprintf (File, "linear   essential      -0.250000   0.750000\n");
446   fprintf (File, "linear   essential       0.000000   1.000000\n");
447   fprintf (File, "circular essential       0.000000   1.000000\n");
448   fprintf (File, "linear   non-essential  -0.500000   0.500000\n");
449   fprintf (File, "linear   non-essential  -0.500000   0.500000\n");
450 
451   for (Pid = 0; Pid < Class->NumProtos; Pid++) {
452     Proto = ProtoIn (Class, Pid);
453 
454     fprintf (File, "significant   elliptical   1\n");
455     fprintf (File, "     %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
456       Proto->X, Proto->Y,
457       Proto->Length, Proto->Angle, 0.0, 0.0);
458     fprintf (File, "     %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
459       0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001);
460   }
461 }
462