• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File:        metrics.c  (Formerly metrics.c)
5  * Description:
6  * Author:       Mark Seaman, OCR Technology
7  * Created:      Fri Oct 16 14:37:00 1987
8  * Modified:     Tue Jul 30 17:02:07 1991 (Mark Seaman) marks@hpgrlt
9  * Language:     C
10  * Package:      N/A
11  * Status:       Reusable Software Component
12  *
13  * (c) Copyright 1987, Hewlett-Packard Company.
14  ** Licensed under the Apache License, Version 2.0 (the "License");
15  ** you may not use this file except in compliance with the License.
16  ** You may obtain a copy of the License at
17  ** http://www.apache.org/licenses/LICENSE-2.0
18  ** Unless required by applicable law or agreed to in writing, software
19  ** distributed under the License is distributed on an "AS IS" BASIS,
20  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  ** See the License for the specific language governing permissions and
22  ** limitations under the License.
23  *
24  *********************************************************************************/
25 /*----------------------------------------------------------------------
26               I n c l u d e s
27 ----------------------------------------------------------------------*/
28 #include "metrics.h"
29 #include "bestfirst.h"
30 #include "associate.h"
31 #include "tally.h"
32 #include "plotseg.h"
33 #include "globals.h"
34 #include "wordclass.h"
35 #include "intmatcher.h"
36 #include "freelist.h"
37 #include "callcpp.h"
38 #include "ndminx.h"
39 #include "wordrec.h"
40 
41 /*----------------------------------------------------------------------
42               V a r i a b l e s
43 ----------------------------------------------------------------------*/
44 static int states_timed_out1;    /* Counters */
45 static int states_timed_out2;
46 static int words_segmented1;
47 static int words_segmented2;
48 static int segmentation_states1;
49 static int segmentation_states2;
50 static int save_priorities;
51 
52 int words_chopped1;
53 int words_chopped2;
54 int chops_attempted1;
55 int chops_performed1;
56 int chops_attempted2;
57 int chops_performed2;
58 
59 int character_count;
60 int word_count;
61 int chars_classified;
62 
63 MEASUREMENT num_pieces;
64 MEASUREMENT width_measure;
65 
66 MEASUREMENT width_priority_range;/* Help to normalize */
67 MEASUREMENT match_priority_range;
68 
69 TALLY states_before_best;
70 TALLY best_certainties[2];
71 TALLY character_widths;          /* Width histogram */
72 
73 FILE *priority_file_1;           /* Output to cluster */
74 FILE *priority_file_2;
75 FILE *priority_file_3;
76 
77 STATE *known_best_state = NULL;  /* The right answer */
78 
79 /*----------------------------------------------------------------------
80               M a c r o s
81 ----------------------------------------------------------------------*/
82 #define   CERTAINTY_BUCKET_SIZE -0.5
83 #define   CERTAINTY_BUCKETS     40
84 
85 /*----------------------------------------------------------------------
86               F u n c t i o n s
87 ----------------------------------------------------------------------*/
88 /**********************************************************************
89  * init_metrics
90  *
91  * Set up the appropriate variables to record information about the
92  * OCR process. Later calls will log the data and save a summary.
93  **********************************************************************/
init_metrics()94 void init_metrics() {
95   words_chopped1 = 0;
96   words_chopped2 = 0;
97   chops_performed1 = 0;
98   chops_performed2 = 0;
99   chops_attempted1 = 0;
100   chops_attempted2 = 0;
101 
102   words_segmented1 = 0;
103   words_segmented2 = 0;
104   states_timed_out1 = 0;
105   states_timed_out2 = 0;
106   segmentation_states1 = 0;
107   segmentation_states2 = 0;
108 
109   save_priorities = 0;
110 
111   character_count = 0;
112   word_count = 0;
113   chars_classified = 0;
114   permutation_count = 0;
115 
116   end_metrics();
117 
118   states_before_best = new_tally (MIN (100, wordrec_num_seg_states));
119 
120   best_certainties[0] = new_tally (CERTAINTY_BUCKETS);
121   best_certainties[1] = new_tally (CERTAINTY_BUCKETS);
122   reset_width_tally();
123 }
124 
end_metrics()125 void end_metrics() {
126   if (states_before_best != NULL) {
127     memfree(states_before_best);
128     memfree(best_certainties[0]);
129     memfree(best_certainties[1]);
130     memfree(character_widths);
131     states_before_best = NULL;
132     best_certainties[0] = NULL;
133     best_certainties[1] = NULL;
134     character_widths = NULL;
135   }
136 }
137 
138 
139 /**********************************************************************
140  * record_certainty
141  *
142  * Maintain a record of the best certainty values achieved on each
143  * word recognition.
144  **********************************************************************/
record_certainty(float certainty,int pass)145 void record_certainty(float certainty, int pass) {
146   int bucket;
147 
148   if (certainty / CERTAINTY_BUCKET_SIZE < MAX_INT32)
149     bucket = (int) (certainty / CERTAINTY_BUCKET_SIZE);
150   else
151     bucket = MAX_INT32;
152 
153   inc_tally_bucket (best_certainties[pass - 1], bucket);
154 }
155 
156 
157 /**********************************************************************
158  * record_search_status
159  *
160  * Record information about each iteration of the search.  This  data
161  * is kept in global memory and accumulated over multiple segmenter
162  * searches.
163  **********************************************************************/
record_search_status(int num_states,int before_best,float closeness)164 void record_search_status(int num_states, int before_best, float closeness) {
165   inc_tally_bucket(states_before_best, before_best);
166 
167   if (first_pass) {
168     if (num_states == wordrec_num_seg_states + 1)
169       states_timed_out1++;
170     segmentation_states1 += num_states;
171     words_segmented1++;
172   }
173   else {
174     if (num_states == wordrec_num_seg_states + 1)
175       states_timed_out2++;
176     segmentation_states2 += num_states;
177     words_segmented2++;
178   }
179 }
180 
181 
182 /**********************************************************************
183  * save_summary
184  *
185  * Save the summary information into the file "file.sta".
186  **********************************************************************/
187 namespace tesseract {
save_summary(inT32 elapsed_time)188 void Wordrec::save_summary(inT32 elapsed_time) {
189   #ifndef SECURE_NAMES
190   STRING outfilename;
191   FILE *f;
192   int x;
193   int total;
194 
195   outfilename = imagefile + ".sta";
196   f = open_file (outfilename.string(), "w");
197 
198   fprintf (f, INT32FORMAT " seconds elapsed\n", elapsed_time);
199   fprintf (f, "\n");
200 
201   fprintf (f, "%d characters\n", character_count);
202   fprintf (f, "%d words\n", word_count);
203   fprintf (f, "\n");
204 
205   fprintf (f, "%d permutations performed\n", permutation_count);
206   fprintf (f, "%d characters classified\n", chars_classified);
207   fprintf (f, "%4.0f%% classification overhead\n",
208     (float) chars_classified / character_count * 100.0 - 100.0);
209   fprintf (f, "\n");
210 
211   fprintf (f, "%d words chopped (pass 1) ", words_chopped1);
212   fprintf (f, " (%0.0f%%)\n", (float) words_chopped1 / word_count * 100);
213   fprintf (f, "%d chops performed\n", chops_performed1);
214   fprintf (f, "%d chops attempted\n", chops_attempted1);
215   fprintf (f, "\n");
216 
217   fprintf (f, "%d words joined (pass 1)", words_segmented1);
218   fprintf (f, " (%0.0f%%)\n", (float) words_segmented1 / word_count * 100);
219   fprintf (f, "%d segmentation states\n", segmentation_states1);
220   fprintf (f, "%d segmentations timed out\n", states_timed_out1);
221   fprintf (f, "\n");
222 
223   fprintf (f, "%d words chopped (pass 2) ", words_chopped2);
224   fprintf (f, " (%0.0f%%)\n", (float) words_chopped2 / word_count * 100);
225   fprintf (f, "%d chops performed\n", chops_performed2);
226   fprintf (f, "%d chops attempted\n", chops_attempted2);
227   fprintf (f, "\n");
228 
229   fprintf (f, "%d words joined (pass 2)", words_segmented2);
230   fprintf (f, " (%0.0f%%)\n", (float) words_segmented2 / word_count * 100);
231   fprintf (f, "%d segmentation states\n", segmentation_states2);
232   fprintf (f, "%d segmentations timed out\n", states_timed_out2);
233   fprintf (f, "\n");
234 
235   total = 0;
236   iterate_tally (states_before_best, x)
237     total += (tally_entry (states_before_best, x) * x);
238   fprintf (f, "segmentations (before best) = %d\n", total);
239   if (total != 0.0)
240     fprintf (f, "%4.0f%% segmentation overhead\n",
241       (float) (segmentation_states1 + segmentation_states2) /
242       total * 100.0 - 100.0);
243   fprintf (f, "\n");
244 
245   print_tally (f, "segmentations (before best)", states_before_best);
246 
247   iterate_tally (best_certainties[0], x)
248     cprintf ("best certainty of %8.4f = %4d %4d\n",
249     x * CERTAINTY_BUCKET_SIZE,
250     tally_entry (best_certainties[0], x),
251     tally_entry (best_certainties[1], x));
252 
253   PrintIntMatcherStats(f);
254   dj_statistics(f);
255   fclose(f);
256   #endif
257 }
258 }  // namespace tesseract
259 
260 
261 /**********************************************************************
262  * record_priorities
263  *
264  * If the record mode is set then record the priorities returned by
265  * each of the priority voters.  Save them in a file that is set up for
266  * doing clustering.
267  **********************************************************************/
record_priorities(SEARCH_RECORD * the_search,FLOAT32 priority_1,FLOAT32 priority_2)268 void record_priorities(SEARCH_RECORD *the_search,
269                        FLOAT32 priority_1,
270                        FLOAT32 priority_2) {
271   record_samples(priority_1, priority_2);
272 }
273 
274 
275 /**********************************************************************
276  * record_samples
277  *
278  * Remember the priority samples to summarize them later.
279  **********************************************************************/
record_samples(FLOAT32 match_pri,FLOAT32 width_pri)280 void record_samples(FLOAT32 match_pri, FLOAT32 width_pri) {
281   ADD_SAMPLE(match_priority_range, match_pri);
282   ADD_SAMPLE(width_priority_range, width_pri);
283 }
284 
285 
286 /**********************************************************************
287  * reset_width_tally
288  *
289  * Create a tally record and initialize it.
290  **********************************************************************/
reset_width_tally()291 void reset_width_tally() {
292   character_widths = new_tally (20);
293   new_measurement(width_measure);
294   width_measure.num_samples = 158;
295   width_measure.sum_of_samples = 125.0;
296   width_measure.sum_of_squares = 118.0;
297 }
298 
299 
300 #ifndef GRAPHICS_DISABLED
301 /**********************************************************************
302  * save_best_state
303  *
304  * Save this state away to be compared later.
305  **********************************************************************/
save_best_state(CHUNKS_RECORD * chunks_record)306 void save_best_state(CHUNKS_RECORD *chunks_record) {
307   STATE state;
308   SEARCH_STATE chunk_groups;
309   int num_joints;
310 
311   if (save_priorities) {
312     num_joints = chunks_record->ratings->dimension() - 1;
313 
314     state.part1 = 0xffffffff;
315     state.part2 = 0xffffffff;
316 
317     chunk_groups = bin_to_chunks (&state, num_joints);
318     display_segmentation (chunks_record->chunks, chunk_groups);
319     memfree(chunk_groups);
320 
321     cprintf ("Enter the correct segmentation > ");
322     fflush(stdout);
323     state.part1 = 0;
324     scanf ("%x", &state.part2);
325 
326     chunk_groups = bin_to_chunks (&state, num_joints);
327     display_segmentation (chunks_record->chunks, chunk_groups);
328     memfree(chunk_groups);
329     window_wait(segm_window);  /* == 'n') */
330 
331     if (known_best_state)
332       free_state(known_best_state);
333     known_best_state = new_state (&state);
334   }
335 }
336 #endif
337 
338 
339 /**********************************************************************
340  * start_record
341  *
342  * Set up everything needed to record the priority voters.
343  **********************************************************************/
start_recording()344 void start_recording() {
345   if (save_priorities) {
346     priority_file_1 = open_file ("Priorities1", "w");
347     priority_file_2 = open_file ("Priorities2", "w");
348     priority_file_3 = open_file ("Priorities3", "w");
349   }
350 }
351 
352 
353 /**********************************************************************
354  * stop_recording
355  *
356  * Put an end to the priority recording mechanism.
357  **********************************************************************/
stop_recording()358 void stop_recording() {
359   if (save_priorities) {
360     fclose(priority_file_1);
361     fclose(priority_file_2);
362     fclose(priority_file_3);
363   }
364 }
365