1 /* -*-C-*-
2 ********************************************************************************
3 *
4 * File: metrics.c (Formerly metrics.c)
5 * Description:
6 * Author: Mark Seaman, OCR Technology
7 * Created: Fri Oct 16 14:37:00 1987
8 * Modified: Tue Jul 30 17:02:07 1991 (Mark Seaman) marks@hpgrlt
9 * Language: C
10 * Package: N/A
11 * Status: Reusable Software Component
12 *
13 * (c) Copyright 1987, Hewlett-Packard Company.
14 ** Licensed under the Apache License, Version 2.0 (the "License");
15 ** you may not use this file except in compliance with the License.
16 ** You may obtain a copy of the License at
17 ** http://www.apache.org/licenses/LICENSE-2.0
18 ** Unless required by applicable law or agreed to in writing, software
19 ** distributed under the License is distributed on an "AS IS" BASIS,
20 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21 ** See the License for the specific language governing permissions and
22 ** limitations under the License.
23 *
24 *********************************************************************************/
25 /*----------------------------------------------------------------------
26 I n c l u d e s
27 ----------------------------------------------------------------------*/
28 #include "metrics.h"
29 #include "bestfirst.h"
30 #include "associate.h"
31 #include "tally.h"
32 #include "plotseg.h"
33 #include "globals.h"
34 #include "wordclass.h"
35 #include "intmatcher.h"
36 #include "freelist.h"
37 #include "callcpp.h"
38 #include "ndminx.h"
39 #include "wordrec.h"
40
41 /*----------------------------------------------------------------------
42 V a r i a b l e s
43 ----------------------------------------------------------------------*/
44 static int states_timed_out1; /* Counters */
45 static int states_timed_out2;
46 static int words_segmented1;
47 static int words_segmented2;
48 static int segmentation_states1;
49 static int segmentation_states2;
50 static int save_priorities;
51
52 int words_chopped1;
53 int words_chopped2;
54 int chops_attempted1;
55 int chops_performed1;
56 int chops_attempted2;
57 int chops_performed2;
58
59 int character_count;
60 int word_count;
61 int chars_classified;
62
63 MEASUREMENT num_pieces;
64 MEASUREMENT width_measure;
65
66 MEASUREMENT width_priority_range;/* Help to normalize */
67 MEASUREMENT match_priority_range;
68
69 TALLY states_before_best;
70 TALLY best_certainties[2];
71 TALLY character_widths; /* Width histogram */
72
73 FILE *priority_file_1; /* Output to cluster */
74 FILE *priority_file_2;
75 FILE *priority_file_3;
76
77 STATE *known_best_state = NULL; /* The right answer */
78
79 /*----------------------------------------------------------------------
80 M a c r o s
81 ----------------------------------------------------------------------*/
82 #define CERTAINTY_BUCKET_SIZE -0.5
83 #define CERTAINTY_BUCKETS 40
84
85 /*----------------------------------------------------------------------
86 F u n c t i o n s
87 ----------------------------------------------------------------------*/
88 /**********************************************************************
89 * init_metrics
90 *
91 * Set up the appropriate variables to record information about the
92 * OCR process. Later calls will log the data and save a summary.
93 **********************************************************************/
init_metrics()94 void init_metrics() {
95 words_chopped1 = 0;
96 words_chopped2 = 0;
97 chops_performed1 = 0;
98 chops_performed2 = 0;
99 chops_attempted1 = 0;
100 chops_attempted2 = 0;
101
102 words_segmented1 = 0;
103 words_segmented2 = 0;
104 states_timed_out1 = 0;
105 states_timed_out2 = 0;
106 segmentation_states1 = 0;
107 segmentation_states2 = 0;
108
109 save_priorities = 0;
110
111 character_count = 0;
112 word_count = 0;
113 chars_classified = 0;
114 permutation_count = 0;
115
116 end_metrics();
117
118 states_before_best = new_tally (MIN (100, wordrec_num_seg_states));
119
120 best_certainties[0] = new_tally (CERTAINTY_BUCKETS);
121 best_certainties[1] = new_tally (CERTAINTY_BUCKETS);
122 reset_width_tally();
123 }
124
end_metrics()125 void end_metrics() {
126 if (states_before_best != NULL) {
127 memfree(states_before_best);
128 memfree(best_certainties[0]);
129 memfree(best_certainties[1]);
130 memfree(character_widths);
131 states_before_best = NULL;
132 best_certainties[0] = NULL;
133 best_certainties[1] = NULL;
134 character_widths = NULL;
135 }
136 }
137
138
139 /**********************************************************************
140 * record_certainty
141 *
142 * Maintain a record of the best certainty values achieved on each
143 * word recognition.
144 **********************************************************************/
record_certainty(float certainty,int pass)145 void record_certainty(float certainty, int pass) {
146 int bucket;
147
148 if (certainty / CERTAINTY_BUCKET_SIZE < MAX_INT32)
149 bucket = (int) (certainty / CERTAINTY_BUCKET_SIZE);
150 else
151 bucket = MAX_INT32;
152
153 inc_tally_bucket (best_certainties[pass - 1], bucket);
154 }
155
156
157 /**********************************************************************
158 * record_search_status
159 *
160 * Record information about each iteration of the search. This data
161 * is kept in global memory and accumulated over multiple segmenter
162 * searches.
163 **********************************************************************/
record_search_status(int num_states,int before_best,float closeness)164 void record_search_status(int num_states, int before_best, float closeness) {
165 inc_tally_bucket(states_before_best, before_best);
166
167 if (first_pass) {
168 if (num_states == wordrec_num_seg_states + 1)
169 states_timed_out1++;
170 segmentation_states1 += num_states;
171 words_segmented1++;
172 }
173 else {
174 if (num_states == wordrec_num_seg_states + 1)
175 states_timed_out2++;
176 segmentation_states2 += num_states;
177 words_segmented2++;
178 }
179 }
180
181
182 /**********************************************************************
183 * save_summary
184 *
185 * Save the summary information into the file "file.sta".
186 **********************************************************************/
187 namespace tesseract {
save_summary(inT32 elapsed_time)188 void Wordrec::save_summary(inT32 elapsed_time) {
189 #ifndef SECURE_NAMES
190 STRING outfilename;
191 FILE *f;
192 int x;
193 int total;
194
195 outfilename = imagefile + ".sta";
196 f = open_file (outfilename.string(), "w");
197
198 fprintf (f, INT32FORMAT " seconds elapsed\n", elapsed_time);
199 fprintf (f, "\n");
200
201 fprintf (f, "%d characters\n", character_count);
202 fprintf (f, "%d words\n", word_count);
203 fprintf (f, "\n");
204
205 fprintf (f, "%d permutations performed\n", permutation_count);
206 fprintf (f, "%d characters classified\n", chars_classified);
207 fprintf (f, "%4.0f%% classification overhead\n",
208 (float) chars_classified / character_count * 100.0 - 100.0);
209 fprintf (f, "\n");
210
211 fprintf (f, "%d words chopped (pass 1) ", words_chopped1);
212 fprintf (f, " (%0.0f%%)\n", (float) words_chopped1 / word_count * 100);
213 fprintf (f, "%d chops performed\n", chops_performed1);
214 fprintf (f, "%d chops attempted\n", chops_attempted1);
215 fprintf (f, "\n");
216
217 fprintf (f, "%d words joined (pass 1)", words_segmented1);
218 fprintf (f, " (%0.0f%%)\n", (float) words_segmented1 / word_count * 100);
219 fprintf (f, "%d segmentation states\n", segmentation_states1);
220 fprintf (f, "%d segmentations timed out\n", states_timed_out1);
221 fprintf (f, "\n");
222
223 fprintf (f, "%d words chopped (pass 2) ", words_chopped2);
224 fprintf (f, " (%0.0f%%)\n", (float) words_chopped2 / word_count * 100);
225 fprintf (f, "%d chops performed\n", chops_performed2);
226 fprintf (f, "%d chops attempted\n", chops_attempted2);
227 fprintf (f, "\n");
228
229 fprintf (f, "%d words joined (pass 2)", words_segmented2);
230 fprintf (f, " (%0.0f%%)\n", (float) words_segmented2 / word_count * 100);
231 fprintf (f, "%d segmentation states\n", segmentation_states2);
232 fprintf (f, "%d segmentations timed out\n", states_timed_out2);
233 fprintf (f, "\n");
234
235 total = 0;
236 iterate_tally (states_before_best, x)
237 total += (tally_entry (states_before_best, x) * x);
238 fprintf (f, "segmentations (before best) = %d\n", total);
239 if (total != 0.0)
240 fprintf (f, "%4.0f%% segmentation overhead\n",
241 (float) (segmentation_states1 + segmentation_states2) /
242 total * 100.0 - 100.0);
243 fprintf (f, "\n");
244
245 print_tally (f, "segmentations (before best)", states_before_best);
246
247 iterate_tally (best_certainties[0], x)
248 cprintf ("best certainty of %8.4f = %4d %4d\n",
249 x * CERTAINTY_BUCKET_SIZE,
250 tally_entry (best_certainties[0], x),
251 tally_entry (best_certainties[1], x));
252
253 PrintIntMatcherStats(f);
254 dj_statistics(f);
255 fclose(f);
256 #endif
257 }
258 } // namespace tesseract
259
260
261 /**********************************************************************
262 * record_priorities
263 *
264 * If the record mode is set then record the priorities returned by
265 * each of the priority voters. Save them in a file that is set up for
266 * doing clustering.
267 **********************************************************************/
record_priorities(SEARCH_RECORD * the_search,FLOAT32 priority_1,FLOAT32 priority_2)268 void record_priorities(SEARCH_RECORD *the_search,
269 FLOAT32 priority_1,
270 FLOAT32 priority_2) {
271 record_samples(priority_1, priority_2);
272 }
273
274
275 /**********************************************************************
276 * record_samples
277 *
278 * Remember the priority samples to summarize them later.
279 **********************************************************************/
record_samples(FLOAT32 match_pri,FLOAT32 width_pri)280 void record_samples(FLOAT32 match_pri, FLOAT32 width_pri) {
281 ADD_SAMPLE(match_priority_range, match_pri);
282 ADD_SAMPLE(width_priority_range, width_pri);
283 }
284
285
286 /**********************************************************************
287 * reset_width_tally
288 *
289 * Create a tally record and initialize it.
290 **********************************************************************/
reset_width_tally()291 void reset_width_tally() {
292 character_widths = new_tally (20);
293 new_measurement(width_measure);
294 width_measure.num_samples = 158;
295 width_measure.sum_of_samples = 125.0;
296 width_measure.sum_of_squares = 118.0;
297 }
298
299
300 #ifndef GRAPHICS_DISABLED
301 /**********************************************************************
302 * save_best_state
303 *
304 * Save this state away to be compared later.
305 **********************************************************************/
save_best_state(CHUNKS_RECORD * chunks_record)306 void save_best_state(CHUNKS_RECORD *chunks_record) {
307 STATE state;
308 SEARCH_STATE chunk_groups;
309 int num_joints;
310
311 if (save_priorities) {
312 num_joints = chunks_record->ratings->dimension() - 1;
313
314 state.part1 = 0xffffffff;
315 state.part2 = 0xffffffff;
316
317 chunk_groups = bin_to_chunks (&state, num_joints);
318 display_segmentation (chunks_record->chunks, chunk_groups);
319 memfree(chunk_groups);
320
321 cprintf ("Enter the correct segmentation > ");
322 fflush(stdout);
323 state.part1 = 0;
324 scanf ("%x", &state.part2);
325
326 chunk_groups = bin_to_chunks (&state, num_joints);
327 display_segmentation (chunks_record->chunks, chunk_groups);
328 memfree(chunk_groups);
329 window_wait(segm_window); /* == 'n') */
330
331 if (known_best_state)
332 free_state(known_best_state);
333 known_best_state = new_state (&state);
334 }
335 }
336 #endif
337
338
339 /**********************************************************************
340 * start_record
341 *
342 * Set up everything needed to record the priority voters.
343 **********************************************************************/
start_recording()344 void start_recording() {
345 if (save_priorities) {
346 priority_file_1 = open_file ("Priorities1", "w");
347 priority_file_2 = open_file ("Priorities2", "w");
348 priority_file_3 = open_file ("Priorities3", "w");
349 }
350 }
351
352
353 /**********************************************************************
354 * stop_recording
355 *
356 * Put an end to the priority recording mechanism.
357 **********************************************************************/
stop_recording()358 void stop_recording() {
359 if (save_priorities) {
360 fclose(priority_file_1);
361 fclose(priority_file_2);
362 fclose(priority_file_3);
363 }
364 }
365