1 /* -*-C-*-
2 ********************************************************************************
3 *
4 * File: bestfirst.c (Formerly bestfirst.c)
5 * Description: Best first search functions
6 * Author: Mark Seaman, OCR Technology
7 * Created: Mon May 14 11:23:29 1990
8 * Modified: Tue Jul 30 16:08:47 1991 (Mark Seaman) marks@hpgrlt
9 * Language: C
10 * Package: N/A
11 * Status: Experimental (Do Not Distribute)
12 *
13 * (c) Copyright 1990, Hewlett-Packard Company.
14 ** Licensed under the Apache License, Version 2.0 (the "License");
15 ** you may not use this file except in compliance with the License.
16 ** You may obtain a copy of the License at
17 ** http://www.apache.org/licenses/LICENSE-2.0
18 ** Unless required by applicable law or agreed to in writing, software
19 ** distributed under the License is distributed on an "AS IS" BASIS,
20 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21 ** See the License for the specific language governing permissions and
22 ** limitations under the License.
23 *
24 ***************************************************************************/
25
26 /*----------------------------------------------------------------------
27 I n c l u d e s
28 ---------------------------------------------------------------------*/
29 #include <assert.h>
30
31 #include "bestfirst.h"
32 #include "baseline.h"
33 #include "bitvec.h"
34 #include "callback.h"
35 #include "dict.h"
36 #include "freelist.h"
37 #include "globals.h"
38 #include "heuristic.h"
39 #include "metrics.h"
40 #include "permute.h"
41 #include "pieces.h"
42 #include "plotseg.h"
43 #include "ratngs.h"
44 #include "states.h"
45 #include "stopper.h"
46 #include "structures.h"
47 #include "tordvars.h"
48 #include "unicharset.h"
49 #include "wordclass.h"
50 #include "wordrec.h"
51
52 void call_caller();
53
54 /*----------------------------------------------------------------------
55 V a r i a b l e s
56 ---------------------------------------------------------------------*/
57 int num_joints; /* Number of chunks - 1 */
58 int num_pushed = 0;
59 int num_popped = 0;
60
61 INT_VAR(wordrec_num_seg_states, 30, "Segmentation states");
62
63 double_VAR(wordrec_worst_state, 1, "Worst segmentation state");
64
65 /**/
66 /*----------------------------------------------------------------------
67 F u n c t i o n s
68 ----------------------------------------------------------------------*/
69
70 /**********************************************************************
71 * best_first_search
72 *
73 * Find the best segmentation by doing a best first search of the
74 * solution space.
75 **********************************************************************/
76 namespace tesseract {
best_first_search(CHUNKS_RECORD * chunks_record,WERD_CHOICE * best_choice,WERD_CHOICE * raw_choice,STATE * state,DANGERR * fixpt,STATE * best_state)77 void Wordrec::best_first_search(CHUNKS_RECORD *chunks_record,
78 WERD_CHOICE *best_choice,
79 WERD_CHOICE *raw_choice,
80 STATE *state,
81 DANGERR *fixpt,
82 STATE *best_state) {
83 SEARCH_RECORD *the_search;
84 inT16 keep_going;
85 STATE guided_state; // not used
86
87 num_joints = chunks_record->ratings->dimension() - 1;
88 the_search = new_search (chunks_record, num_joints,
89 best_choice, raw_choice, state);
90
91 // The default state is initialized as the best choice. In order to apply
92 // segmentation adjustment, or any other contextual processing in permute,
93 // we give the best choice a poor rating to force the processed raw choice
94 // to be promoted to best choice.
95 the_search->best_choice->set_rating(100000.0);
96 evaluate_state(chunks_record, the_search, fixpt);
97 if (permute_debug) {
98 tprintf("\n\n\n =========== BestFirstSearch ==============\n");
99 best_choice->print("**Initial BestChoice**");
100 }
101
102 #ifndef GRAPHICS_DISABLED
103 save_best_state(chunks_record);
104 #endif
105 start_recording();
106 FLOAT32 worst_priority = 2.0f * prioritize_state(chunks_record, the_search);
107 if (worst_priority < wordrec_worst_state)
108 worst_priority = wordrec_worst_state;
109 if (segment_debug) {
110 print_state("BestFirstSearch", best_state, num_joints);
111 }
112
113 guided_state = *state;
114 do {
115 /* Look for answer */
116 if (!hash_lookup (the_search->closed_states, the_search->this_state)) {
117
118 if (tord_blob_skip) {
119 free_state (the_search->this_state);
120 break;
121 }
122
123 guided_state = *(the_search->this_state);
124 keep_going = evaluate_state(chunks_record, the_search, fixpt);
125 hash_add (the_search->closed_states, the_search->this_state);
126
127 if (!keep_going ||
128 (the_search->num_states > wordrec_num_seg_states) ||
129 (tord_blob_skip)) {
130 if (segment_debug)
131 tprintf("Breaking best_first_search on keep_going %s numstates %d\n",
132 ((keep_going) ? "T" :"F"), the_search->num_states);
133 free_state (the_search->this_state);
134 break;
135 }
136
137 FLOAT32 new_worst_priority = 2.0f * prioritize_state(chunks_record,
138 the_search);
139 if (new_worst_priority < worst_priority) {
140 if (segment_debug)
141 tprintf("Lowering WorstPriority %f --> %f\n",
142 worst_priority, new_worst_priority);
143 // Tighten the threshold for admitting new paths as better search
144 // candidates are found. After lowering this threshold, we can safely
145 // popout everything that is worse than this score also.
146 worst_priority = new_worst_priority;
147 }
148 expand_node(worst_priority, chunks_record, the_search);
149 }
150
151 free_state (the_search->this_state);
152 num_popped++;
153 the_search->this_state = pop_queue (the_search->open_states);
154 if (segment_debug && !the_search->this_state)
155 tprintf("No more states to evalaute after %d evals", num_popped);
156 }
157 while (the_search->this_state);
158
159 state->part1 = the_search->best_state->part1;
160 state->part2 = the_search->best_state->part2;
161 stop_recording();
162 if (permute_debug) {
163 tprintf("\n\n\n =========== BestFirstSearch ==============\n");
164 // best_choice->debug_string(getDict().getUnicharset()).string());
165 best_choice->print("**Final BestChoice**");
166 }
167 // save the best_state stats
168 delete_search(the_search);
169 }
170 } // namespace tesseract
171
172
173 /**********************************************************************
174 * chunks_width
175 *
176 * Return the width of a chunk which is a composed of several blobs
177 * blobs[start_blob..last_blob] inclusively,
178 * whose individual widths and gaps are record in width_record in the form
179 * width_record->num_char = n
180 * width_record->widths[2*n-1] = w0,g0,w1,g1..w(n-1),g(n-1)
181 **********************************************************************/
chunks_width(WIDTH_RECORD * width_record,int start_blob,int last_blob)182 int chunks_width(WIDTH_RECORD *width_record, int start_blob, int last_blob) {
183 int result = 0;
184 for (int x = start_blob * 2; x <= last_blob * 2; x++)
185 result += width_record->widths[x];
186 return (result);
187 }
188
189 /**********************************************************************
190 * chunks_gap
191 *
192 * Return the width of between the specified chunk and next.
193 **********************************************************************/
chunks_gap(WIDTH_RECORD * width_record,int last_chunk)194 int chunks_gap(WIDTH_RECORD *width_record, int last_chunk) {
195 return (last_chunk < width_record->num_chars - 1) ?
196 width_record->widths[last_chunk * 2 + 1] : 0;
197 }
198
199
200 /**********************************************************************
201 * delete_search
202 *
203 * Terminate the current search and free all the memory involved.
204 **********************************************************************/
delete_search(SEARCH_RECORD * the_search)205 void delete_search(SEARCH_RECORD *the_search) {
206 float closeness;
207
208 closeness = (the_search->num_joints ?
209 (hamming_distance(reinterpret_cast<uinT32*>(the_search->first_state),
210 reinterpret_cast<uinT32*>(the_search->best_state), 2) /
211 (float) the_search->num_joints) : 0.0f);
212
213 record_search_status (the_search->num_states,
214 the_search->before_best, closeness);
215
216 free_state (the_search->first_state);
217 free_state (the_search->best_state);
218
219 free_hash_table (the_search->closed_states);
220 FreeHeapData (the_search->open_states, (void_dest) free_state);
221
222 memfree(the_search);
223 }
224
225
226 /**********************************************************************
227 * evaluate_chunks
228 *
229 * A particular word level segmentation has been chosen. Evaluation
230 * this to find the word list that corresponds to it.
231 **********************************************************************/
232 namespace tesseract {
evaluate_chunks(CHUNKS_RECORD * chunks_record,SEARCH_STATE search_state)233 BLOB_CHOICE_LIST_VECTOR *Wordrec::evaluate_chunks(CHUNKS_RECORD *chunks_record,
234 SEARCH_STATE search_state) {
235 BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR();
236 BLOB_CHOICE_LIST *blob_choices;
237 BLOB_CHOICE_IT blob_choice_it;
238 int i;
239 int x = 0;
240 int y;
241
242 /* Iterate sub-paths */
243 for (i = 1; i <= search_state[0] + 1; i++) {
244 if (i > search_state[0])
245 y = count_blobs (chunks_record->chunks) - 1;
246 else
247 y = x + search_state[i];
248
249 if (tord_blob_skip) {
250 delete char_choices;
251 return (NULL);
252 } /* Process one square */
253 /* Classify if needed */
254 blob_choices = get_piece_rating(chunks_record->ratings,
255 chunks_record->chunks,
256 chunks_record->splits,
257 x, y);
258
259 if (blob_choices == NULL) {
260 delete char_choices;
261 return (NULL);
262 }
263
264 /* Add permuted ratings */
265 blob_choice_it.set_to_list(blob_choices);
266 last_segmentation[i - 1].certainty = blob_choice_it.data()->certainty();
267 last_segmentation[i - 1].match = blob_choice_it.data()->rating();
268
269 last_segmentation[i - 1].width =
270 chunks_width (chunks_record->chunk_widths, x, y);
271 last_segmentation[i - 1].gap =
272 chunks_gap (chunks_record->chunk_widths, y);
273
274 *char_choices += blob_choices;
275 x = y + 1;
276 }
277 return (char_choices);
278 }
279
280 /**********************************************************************
281 * evaluate_state
282 *
283 * Evaluate the segmentation that is represented by this state in the
284 * best first search. Add this state to the "states_seen" list.
285 **********************************************************************/
evaluate_state(CHUNKS_RECORD * chunks_record,SEARCH_RECORD * the_search,DANGERR * fixpt)286 inT16 Wordrec::evaluate_state(CHUNKS_RECORD *chunks_record,
287 SEARCH_RECORD *the_search,
288 DANGERR *fixpt) {
289 BLOB_CHOICE_LIST_VECTOR *char_choices;
290 SEARCH_STATE chunk_groups;
291 float rating_limit = the_search->best_choice->rating();
292 inT16 keep_going = TRUE;
293 PIECES_STATE widths;
294
295 the_search->num_states++;
296 chunk_groups = bin_to_chunks(the_search->this_state,
297 the_search->num_joints);
298 bin_to_pieces (the_search->this_state, the_search->num_joints, widths);
299 getDict().LogNewSegmentation(widths);
300
301 char_choices = evaluate_chunks(chunks_record, chunk_groups);
302 wordseg_rating_adjust_factor = -1.0f;
303 if (char_choices != NULL && char_choices->length() > 0) {
304 // Compute the segmentation cost and include the cost in word rating.
305 // TODO(dsl): We should change the SEARCH_RECORD to store this cost
306 // from state evaluation and avoid recomputing it here.
307 prioritize_state(chunks_record, the_search);
308 wordseg_rating_adjust_factor = the_search->segcost_bias;
309 getDict().permute_characters(*char_choices, rating_limit,
310 the_search->best_choice,
311 the_search->raw_choice);
312 bool replaced = false;
313 if (getDict().AcceptableChoice(char_choices, the_search->best_choice,
314 *(the_search->raw_choice), fixpt,
315 ASSOCIATOR_CALLER, &replaced)) {
316 keep_going = FALSE;
317 }
318 }
319 wordseg_rating_adjust_factor = -1.0f;
320
321 #ifndef GRAPHICS_DISABLED
322 if (wordrec_display_segmentations) {
323 display_segmentation (chunks_record->chunks, chunk_groups);
324 if (wordrec_display_segmentations > 1)
325 window_wait(segm_window);
326 }
327 #endif
328
329 if (rating_limit != the_search->best_choice->rating()) {
330 the_search->before_best = the_search->num_states;
331 the_search->best_state->part1 = the_search->this_state->part1;
332 the_search->best_state->part2 = the_search->this_state->part2;
333 replace_char_widths(chunks_record, chunk_groups);
334 }
335 else if (char_choices != NULL)
336 fixpt->index = -1;
337
338 if (char_choices != NULL) delete char_choices;
339 memfree(chunk_groups);
340
341 return (keep_going);
342 }
343
344
345 /**********************************************************************
346 * rebuild_current_state
347 *
348 * Evaluate the segmentation that is represented by this state in the
349 * best first search. Add this state to the "states_seen" list.
350 **********************************************************************/
rebuild_current_state(TBLOB * blobs,SEAMS seam_list,STATE * state,BLOB_CHOICE_LIST_VECTOR * old_choices,int fx,bool force_rebuild,const WERD_CHOICE & best_choice,const MATRIX * ratings)351 BLOB_CHOICE_LIST_VECTOR *Wordrec::rebuild_current_state(
352 TBLOB *blobs,
353 SEAMS seam_list,
354 STATE *state,
355 BLOB_CHOICE_LIST_VECTOR *old_choices,
356 int fx,
357 bool force_rebuild,
358 const WERD_CHOICE &best_choice,
359 const MATRIX *ratings) {
360 // Initialize search_state, num_joints, x, y.
361 int num_joints = array_count(seam_list);
362 #ifndef GRAPHICS_DISABLED
363 if (wordrec_display_segmentations) {
364 print_state("Rebuiling state", state, num_joints);
365 }
366 #endif
367 SEARCH_STATE search_state = bin_to_chunks(state, num_joints);
368 int x = 0;
369 int y;
370 int i;
371 for (i = 1; i <= search_state[0]; i++) {
372 y = x + search_state[i];
373 x = y + 1;
374 }
375 y = count_blobs (blobs) - 1;
376
377 // Initialize char_choices, expanded_fragment_lengths:
378 // e.g. if fragment_lengths = {1 1 2 3 1},
379 // expanded_fragment_lengths_str = {1 1 2 2 3 3 3 1}.
380 BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR();
381 STRING expanded_fragment_lengths_str = "";
382 bool state_has_fragments = false;
383 const char *fragment_lengths = NULL;
384
385 if (best_choice.length() > 0) {
386 fragment_lengths = best_choice.fragment_lengths();
387 }
388 if (fragment_lengths) {
389 for (int i = 0; i < best_choice.length(); ++i) {
390 *char_choices += NULL;
391 if (fragment_lengths[i] > 1) {
392 state_has_fragments = true;
393 }
394 for (int j = 0; j < fragment_lengths[i]; ++j) {
395 expanded_fragment_lengths_str += fragment_lengths[i];
396 }
397 }
398 } else {
399 for (i = 0; i <= search_state[0]; ++i) {
400 expanded_fragment_lengths_str += (char)1;
401 *char_choices += NULL;
402 }
403 }
404
405 // Finish early if force_rebuld is false and there are no fragments to merge.
406 if (!force_rebuild && !state_has_fragments) {
407 delete char_choices;
408 memfree(search_state);
409 return old_choices;
410 }
411
412 // Set up variables for concatenating fragments.
413 const char *word_lengths_ptr = NULL;
414 const char *word_ptr = NULL;
415 if (state_has_fragments) {
416 // Make word_lengths_ptr point to the last element in
417 // best_choice->unichar_lengths().
418 word_lengths_ptr = best_choice.unichar_lengths().string();
419 word_lengths_ptr += (strlen(word_lengths_ptr)-1);
420 // Make word_str point to the beginning of the last
421 // unichar in best_choice->unichar_string().
422 word_ptr = best_choice.unichar_string().string();
423 word_ptr += (strlen(word_ptr)-*word_lengths_ptr);
424 }
425 const char *expanded_fragment_lengths =
426 expanded_fragment_lengths_str.string();
427 bool merging_fragment = false;
428 int true_y = -1;
429 char unichar[UNICHAR_LEN + 1];
430 int fragment_pieces = -1;
431 float rating = 0.0;
432 float certainty = -MAX_FLOAT32;
433
434 // Populate char_choices list such that it corresponds to search_state.
435 //
436 // If we are rebuilding a state that contains character fragments:
437 // -- combine blobs that belong to character fragments
438 // -- re-classify the blobs to obtain choices list for the merged blob
439 // -- ensure that correct classification appears in the new choices list
440 // NOTE: a choice composed form original fragment choices will be always
441 // added to the new choices list for each character composed from
442 // fragments (even if the choice for the corresponding character appears
443 // in the re-classified choices list of for the newly merged blob).
444 BLOB_CHOICE_IT temp_it;
445 int char_choices_index = char_choices->length() - 1;
446 for (i = search_state[0]; i >= 0; i--) {
447 BLOB_CHOICE_LIST *current_choices = join_blobs_and_classify(
448 blobs, seam_list, x, y, fx, ratings, old_choices);
449 // Combine character fragments.
450 if (expanded_fragment_lengths[i] > 1) {
451 // Start merging character fragments.
452 if (!merging_fragment) {
453 merging_fragment = true;
454 true_y = y;
455 fragment_pieces = expanded_fragment_lengths[i];
456 rating = 0.0;
457 certainty = -MAX_FLOAT32;
458 strncpy(unichar, word_ptr, *word_lengths_ptr);
459 unichar[*word_lengths_ptr] = '\0';
460 }
461 // Take into account the fact that we could have joined pieces
462 // since we first recorded the ending point of a fragment (true_y).
463 true_y -= y - x;
464 // Populate fragment with updated values and look for the
465 // fragment with the same values in current_choices.
466 // Update rating and certainty of the character being composed.
467 fragment_pieces--;
468 CHAR_FRAGMENT fragment;
469 fragment.set_all(unichar, fragment_pieces,
470 expanded_fragment_lengths[i]);
471 temp_it.set_to_list(current_choices);
472 for (temp_it.mark_cycle_pt(); !temp_it.cycled_list();
473 temp_it.forward()) {
474 const CHAR_FRAGMENT *current_fragment =
475 getDict().getUnicharset().get_fragment(temp_it.data()->unichar_id());
476 if (current_fragment && fragment.equals(current_fragment)) {
477 rating += temp_it.data()->rating();
478 if (temp_it.data()->certainty() > certainty) {
479 certainty = temp_it.data()->certainty();
480 }
481 break;
482 }
483 }
484 assert(!temp_it.cycled_list()); // make sure we found the fragment
485 // Free current_choices for the fragmented character.
486 delete current_choices;
487
488 // Finish composing character from fragments.
489 if (fragment_pieces == 0) {
490 // Populate current_choices with the classification of
491 // the blob merged from blobs of each character fragment.
492 current_choices = join_blobs_and_classify(blobs, seam_list, x,
493 true_y, fx, ratings, NULL);
494 BLOB_CHOICE *merged_choice =
495 new BLOB_CHOICE(getDict().getUnicharset().unichar_to_id(unichar),
496 rating, certainty, 0, NO_PERM);
497
498 // Insert merged_blob into current_choices, such that current_choices
499 // are still sorted in non-descending order by rating.
500 ASSERT_HOST(!current_choices->empty());
501 temp_it.set_to_list(current_choices);
502 for (temp_it.mark_cycle_pt();
503 !temp_it.cycled_list() &&
504 merged_choice->rating() > temp_it.data()->rating();
505 temp_it.forward());
506 temp_it.add_before_stay_put(merged_choice);
507
508 // Done merging this fragmented character.
509 merging_fragment = false;
510 }
511 }
512 if (!merging_fragment) {
513 // Get rid of fragments in current_choices.
514 temp_it.set_to_list(current_choices);
515 for (temp_it.mark_cycle_pt(); !temp_it.cycled_list();
516 temp_it.forward()) {
517 if (getDict().getUnicharset().get_fragment(
518 temp_it.data()->unichar_id())) {
519 delete temp_it.extract();
520 }
521 }
522 char_choices->set(current_choices, char_choices_index);
523 char_choices_index--;
524
525 // Update word_ptr and word_lengths_ptr.
526 if (word_lengths_ptr != NULL && word_ptr != NULL) {
527 word_lengths_ptr--;
528 word_ptr -= (*word_lengths_ptr);
529 }
530 }
531 y = x - 1;
532 x = y - search_state[i];
533 }
534 old_choices->delete_data_pointers();
535 delete old_choices;
536 memfree(search_state);
537
538 return (char_choices);
539 }
540 } // namespace tesseract
541
542
543 /**********************************************************************
544 * expand_node
545 *
546 * Create the states that are attached to this one. Check to see that
547 * each one has not already been visited. If not add it to the priority
548 * queue.
549 **********************************************************************/
550 namespace tesseract {
expand_node(FLOAT32 worst_priority,CHUNKS_RECORD * chunks_record,SEARCH_RECORD * the_search)551 void Wordrec::expand_node(FLOAT32 worst_priority,
552 CHUNKS_RECORD *chunks_record,
553 SEARCH_RECORD *the_search) {
554 STATE old_state;
555 int nodes_added = 0;
556 int x;
557 uinT32 mask = 1 << (the_search->num_joints - 1 - 32);
558
559 old_state.part1 = the_search->this_state->part1;
560 old_state.part2 = the_search->this_state->part2;
561
562 // We need to expand the search more intelligently, or we get stuck
563 // with a bad starting segmentation in a long word sequence as in CJK.
564 // Expand a child node only if it is within the global bound, and no
565 // worse than 2x of its parent.
566 // TODO(dsl): There is some redudency here in recomputing the priority,
567 // and in filtering of old_merit and worst_priority.
568 the_search->this_state->part2 = old_state.part2;
569 for (x = the_search->num_joints; x > 32; x--) {
570 the_search->this_state->part1 = mask ^ old_state.part1;
571 if (!hash_lookup (the_search->closed_states, the_search->this_state)) {
572 FLOAT32 new_merit = prioritize_state(chunks_record, the_search);
573 if (segment_debug && permute_debug) {
574 cprintf ("....checking state: %8.3f ", new_merit);
575 print_state ("", the_search->this_state, num_joints);
576 }
577 if (new_merit < worst_priority) {
578 push_queue (the_search->open_states, the_search->this_state,
579 worst_priority, new_merit);
580 nodes_added++;
581 }
582 }
583 mask >>= 1;
584 }
585
586 if (the_search->num_joints > 32) {
587 mask = 1 << 31;
588 }
589 else {
590 mask = 1 << (the_search->num_joints - 1);
591 }
592
593 the_search->this_state->part1 = old_state.part1;
594 while (x--) {
595 the_search->this_state->part2 = mask ^ old_state.part2;
596 if (!hash_lookup (the_search->closed_states, the_search->this_state)) {
597 FLOAT32 new_merit = prioritize_state(chunks_record, the_search);
598 if (segment_debug && permute_debug) {
599 cprintf ("....checking state: %8.3f ", new_merit);
600 print_state ("", the_search->this_state, num_joints);
601 }
602 if (new_merit < worst_priority) {
603 push_queue (the_search->open_states, the_search->this_state,
604 worst_priority, new_merit);
605 nodes_added++;
606 }
607 }
608 mask >>= 1;
609 }
610 }
611 } // namespace tesseract
612
613
614 /**********************************************************************
615 * new_search
616 *
617 * Create and initialize a new search record.
618 **********************************************************************/
new_search(CHUNKS_RECORD * chunks_record,int num_joints,WERD_CHOICE * best_choice,WERD_CHOICE * raw_choice,STATE * state)619 SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record,
620 int num_joints,
621 WERD_CHOICE *best_choice,
622 WERD_CHOICE *raw_choice,
623 STATE *state) {
624 SEARCH_RECORD *this_search;
625
626 this_search = (SEARCH_RECORD *) memalloc (sizeof (SEARCH_RECORD));
627
628 this_search->open_states = MakeHeap (wordrec_num_seg_states * 20);
629 this_search->closed_states = new_hash_table ();
630
631 if (state)
632 this_search->this_state = new_state (state);
633 else
634 cprintf ("error: bad initial state in new_search\n");
635
636 this_search->first_state = new_state (this_search->this_state);
637 this_search->best_state = new_state (this_search->this_state);
638
639 this_search->best_choice = best_choice;
640 this_search->raw_choice = raw_choice;
641
642 this_search->num_joints = num_joints;
643 this_search->num_states = 0;
644 this_search->before_best = 0;
645 this_search->segcost_bias = 0;
646
647 return (this_search);
648 }
649
650
651 /**********************************************************************
652 * pop_queue
653 *
654 * Get this state from the priority queue. It should be the state that
655 * has the greatest urgency to be evaluated.
656 **********************************************************************/
pop_queue(HEAP * queue)657 STATE *pop_queue(HEAP *queue) {
658 HEAPENTRY entry;
659
660 if (GetTopOfHeap (queue, &entry) == OK) {
661 #ifndef GRAPHICS_DISABLED
662 if (wordrec_display_segmentations) {
663 cprintf ("eval state: %8.3f ", entry.Key);
664 print_state ("", (STATE *) entry.Data, num_joints);
665 }
666 #endif
667 return ((STATE *) entry.Data);
668 }
669 else {
670 return (NULL);
671 }
672 }
673
674
675 /**********************************************************************
676 * push_queue
677 *
678 * Add this state into the priority queue.
679 **********************************************************************/
push_queue(HEAP * queue,STATE * state,FLOAT32 worst_priority,FLOAT32 priority)680 void push_queue(HEAP *queue, STATE *state, FLOAT32 worst_priority,
681 FLOAT32 priority) {
682 HEAPENTRY entry;
683
684 if (priority < worst_priority) {
685 if (SizeOfHeap (queue) >= MaxSizeOfHeap(queue)) {
686 if (segment_debug) tprintf("Heap is Full\n");
687 return;
688 }
689 if (segment_debug)
690 tprintf("\tpushing %d node %f\n", num_pushed, priority);
691 entry.Data = (char *) new_state (state);
692 num_pushed++;
693 entry.Key = priority;
694 HeapStore(queue, &entry);
695 }
696 }
697
698
699 /**********************************************************************
700 * replace_char_widths
701 *
702 * Replace the value of the char_width field in the chunks_record with
703 * the updated width measurements from the last_segmentation.
704 **********************************************************************/
replace_char_widths(CHUNKS_RECORD * chunks_record,SEARCH_STATE state)705 void replace_char_widths(CHUNKS_RECORD *chunks_record, SEARCH_STATE state) {
706 WIDTH_RECORD *width_record;
707 int num_blobs;
708 int i;
709
710 free_widths (chunks_record->char_widths);
711
712 num_blobs = state[0] + 1;
713 width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2);
714 width_record->num_chars = num_blobs;
715
716 for (i = 0; i < num_blobs; i++) {
717
718 width_record->widths[2 * i] = last_segmentation[i].width;
719
720 if (i + 1 < num_blobs)
721 width_record->widths[2 * i + 1] = last_segmentation[i].gap;
722 }
723 chunks_record->char_widths = width_record;
724 }
725
726 namespace tesseract {
join_blobs_and_classify(TBLOB * blobs,SEAMS seam_list,int x,int y,int fx,const MATRIX * ratings,BLOB_CHOICE_LIST_VECTOR * old_choices)727 BLOB_CHOICE_LIST *Wordrec::join_blobs_and_classify(
728 TBLOB *blobs, SEAMS seam_list,
729 int x, int y, int fx, const MATRIX *ratings,
730 BLOB_CHOICE_LIST_VECTOR *old_choices) {
731 BLOB_CHOICE_LIST *choices = NULL;
732 // First check to see if we can look up the classificaiton
733 // in old_choices (if there is no need to merge blobs).
734 if (x == y && old_choices != NULL && ratings == NULL) {
735 choices = old_choices->get(x);
736 old_choices->set(NULL, x);
737 return choices;
738 }
739 // The ratings matrix filled in by the associator will contain the most
740 // up-to-date classification info. Thus we look up the classification there
741 // first, and only call classify_blob() if the classification is not found.
742 if (ratings != NULL) {
743 BLOB_CHOICE_LIST *choices_ptr = ratings->get(x, y);
744 if (choices_ptr != NOT_CLASSIFIED) {
745 choices = new BLOB_CHOICE_LIST();
746 choices->deep_copy(choices_ptr, &BLOB_CHOICE::deep_copy);
747 }
748 }
749 if (x != y) {
750 join_pieces(blobs, seam_list, x, y);
751
752 int blobindex; // current blob
753 TBLOB *p_blob;
754 TBLOB *blob;
755 TBLOB *next_blob;
756 for (blob = blobs, blobindex = 0, p_blob = NULL;
757 blobindex < x; blobindex++) {
758 p_blob = blob;
759 blob = blob->next;
760 }
761 while (blobindex < y) {
762 next_blob = blob->next;
763 blob->next = next_blob->next;
764 oldblob(next_blob); // junk dead blobs
765 blobindex++;
766 }
767 if (choices == NULL) {
768 choices = classify_blob(p_blob, blob, blob->next,
769 NULL, "rebuild", Orange);
770 }
771 }
772 return choices;
773 }
774 } // namespace tesseract
775