• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File:        bestfirst.c  (Formerly bestfirst.c)
5  * Description:  Best first search functions
6  * Author:       Mark Seaman, OCR Technology
7  * Created:      Mon May 14 11:23:29 1990
8  * Modified:     Tue Jul 30 16:08:47 1991 (Mark Seaman) marks@hpgrlt
9  * Language:     C
10  * Package:      N/A
11  * Status:       Experimental (Do Not Distribute)
12  *
13  * (c) Copyright 1990, Hewlett-Packard Company.
14  ** Licensed under the Apache License, Version 2.0 (the "License");
15  ** you may not use this file except in compliance with the License.
16  ** You may obtain a copy of the License at
17  ** http://www.apache.org/licenses/LICENSE-2.0
18  ** Unless required by applicable law or agreed to in writing, software
19  ** distributed under the License is distributed on an "AS IS" BASIS,
20  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  ** See the License for the specific language governing permissions and
22  ** limitations under the License.
23  *
24  ***************************************************************************/
25 
26 /*----------------------------------------------------------------------
27           I n c l u d e s
28 ---------------------------------------------------------------------*/
29 #include <assert.h>
30 
31 #include "bestfirst.h"
32 #include "baseline.h"
33 #include "bitvec.h"
34 #include "callback.h"
35 #include "dict.h"
36 #include "freelist.h"
37 #include "globals.h"
38 #include "heuristic.h"
39 #include "metrics.h"
40 #include "permute.h"
41 #include "pieces.h"
42 #include "plotseg.h"
43 #include "ratngs.h"
44 #include "states.h"
45 #include "stopper.h"
46 #include "structures.h"
47 #include "tordvars.h"
48 #include "unicharset.h"
49 #include "wordclass.h"
50 #include "wordrec.h"
51 
52 void call_caller();
53 
54 /*----------------------------------------------------------------------
55           V a r i a b l e s
56 ---------------------------------------------------------------------*/
57 int num_joints;                  /* Number of chunks - 1 */
58 int num_pushed = 0;
59 int num_popped = 0;
60 
61 INT_VAR(wordrec_num_seg_states, 30, "Segmentation states");
62 
63 double_VAR(wordrec_worst_state, 1, "Worst segmentation state");
64 
65 /**/
66 /*----------------------------------------------------------------------
67           F u n c t i o n s
68 ----------------------------------------------------------------------*/
69 
70 /**********************************************************************
71  * best_first_search
72  *
73  * Find the best segmentation by doing a best first search of the
74  * solution space.
75  **********************************************************************/
76 namespace tesseract {
best_first_search(CHUNKS_RECORD * chunks_record,WERD_CHOICE * best_choice,WERD_CHOICE * raw_choice,STATE * state,DANGERR * fixpt,STATE * best_state)77 void Wordrec::best_first_search(CHUNKS_RECORD *chunks_record,
78                                 WERD_CHOICE *best_choice,
79                                 WERD_CHOICE *raw_choice,
80                                 STATE *state,
81                                 DANGERR *fixpt,
82                                 STATE *best_state) {
83   SEARCH_RECORD *the_search;
84   inT16 keep_going;
85   STATE guided_state;   // not used
86 
87   num_joints = chunks_record->ratings->dimension() - 1;
88   the_search = new_search (chunks_record, num_joints,
89     best_choice, raw_choice, state);
90 
91   // The default state is initialized as the best choice.  In order to apply
92   // segmentation adjustment, or any other contextual processing in permute,
93   // we give the best choice a poor rating to force the processed raw choice
94   // to be promoted to best choice.
95   the_search->best_choice->set_rating(100000.0);
96   evaluate_state(chunks_record, the_search, fixpt);
97   if (permute_debug) {
98     tprintf("\n\n\n =========== BestFirstSearch ==============\n");
99     best_choice->print("**Initial BestChoice**");
100   }
101 
102 #ifndef GRAPHICS_DISABLED
103   save_best_state(chunks_record);
104 #endif
105   start_recording();
106   FLOAT32 worst_priority = 2.0f * prioritize_state(chunks_record, the_search);
107   if (worst_priority < wordrec_worst_state)
108     worst_priority = wordrec_worst_state;
109   if (segment_debug) {
110     print_state("BestFirstSearch", best_state, num_joints);
111   }
112 
113   guided_state = *state;
114   do {
115                                  /* Look for answer */
116     if (!hash_lookup (the_search->closed_states, the_search->this_state)) {
117 
118       if (tord_blob_skip) {
119         free_state (the_search->this_state);
120         break;
121       }
122 
123       guided_state = *(the_search->this_state);
124       keep_going = evaluate_state(chunks_record, the_search, fixpt);
125       hash_add (the_search->closed_states, the_search->this_state);
126 
127       if (!keep_going ||
128           (the_search->num_states > wordrec_num_seg_states) ||
129           (tord_blob_skip)) {
130         if (segment_debug)
131           tprintf("Breaking best_first_search on keep_going %s numstates %d\n",
132                   ((keep_going) ? "T" :"F"), the_search->num_states);
133         free_state (the_search->this_state);
134         break;
135       }
136 
137       FLOAT32 new_worst_priority = 2.0f * prioritize_state(chunks_record,
138                                                            the_search);
139       if (new_worst_priority < worst_priority) {
140         if (segment_debug)
141           tprintf("Lowering WorstPriority %f --> %f\n",
142                   worst_priority, new_worst_priority);
143         // Tighten the threshold for admitting new paths as better search
144         // candidates are found.  After lowering this threshold, we can safely
145         // popout everything that is worse than this score also.
146         worst_priority = new_worst_priority;
147       }
148       expand_node(worst_priority, chunks_record, the_search);
149     }
150 
151     free_state (the_search->this_state);
152     num_popped++;
153     the_search->this_state = pop_queue (the_search->open_states);
154     if (segment_debug && !the_search->this_state)
155       tprintf("No more states to evalaute after %d evals", num_popped);
156   }
157   while (the_search->this_state);
158 
159   state->part1 = the_search->best_state->part1;
160   state->part2 = the_search->best_state->part2;
161   stop_recording();
162   if (permute_debug) {
163     tprintf("\n\n\n =========== BestFirstSearch ==============\n");
164             // best_choice->debug_string(getDict().getUnicharset()).string());
165     best_choice->print("**Final BestChoice**");
166   }
167   // save the best_state stats
168   delete_search(the_search);
169 }
170 }  // namespace tesseract
171 
172 
173 /**********************************************************************
174  * chunks_width
175  *
176  * Return the width of a chunk which is a composed of several blobs
177  * blobs[start_blob..last_blob] inclusively,
178  * whose individual widths and gaps are record in width_record in the form
179  * width_record->num_char = n
180  * width_record->widths[2*n-1] = w0,g0,w1,g1..w(n-1),g(n-1)
181  **********************************************************************/
chunks_width(WIDTH_RECORD * width_record,int start_blob,int last_blob)182 int chunks_width(WIDTH_RECORD *width_record, int start_blob, int last_blob) {
183   int result = 0;
184   for (int x = start_blob * 2; x <= last_blob * 2; x++)
185     result += width_record->widths[x];
186   return (result);
187 }
188 
189 /**********************************************************************
190  * chunks_gap
191  *
192  * Return the width of between the specified chunk and next.
193  **********************************************************************/
chunks_gap(WIDTH_RECORD * width_record,int last_chunk)194 int chunks_gap(WIDTH_RECORD *width_record, int last_chunk) {
195   return (last_chunk < width_record->num_chars - 1) ?
196       width_record->widths[last_chunk * 2 + 1] : 0;
197 }
198 
199 
200 /**********************************************************************
201  * delete_search
202  *
203  * Terminate the current search and free all the memory involved.
204  **********************************************************************/
delete_search(SEARCH_RECORD * the_search)205 void delete_search(SEARCH_RECORD *the_search) {
206   float closeness;
207 
208   closeness = (the_search->num_joints ?
209     (hamming_distance(reinterpret_cast<uinT32*>(the_search->first_state),
210                       reinterpret_cast<uinT32*>(the_search->best_state), 2) /
211       (float) the_search->num_joints) : 0.0f);
212 
213   record_search_status (the_search->num_states,
214     the_search->before_best, closeness);
215 
216   free_state (the_search->first_state);
217   free_state (the_search->best_state);
218 
219   free_hash_table (the_search->closed_states);
220   FreeHeapData (the_search->open_states, (void_dest) free_state);
221 
222   memfree(the_search);
223 }
224 
225 
226 /**********************************************************************
227  * evaluate_chunks
228  *
229  * A particular word level segmentation has been chosen.  Evaluation
230  * this to find the word list that corresponds to it.
231  **********************************************************************/
232 namespace tesseract {
evaluate_chunks(CHUNKS_RECORD * chunks_record,SEARCH_STATE search_state)233 BLOB_CHOICE_LIST_VECTOR *Wordrec::evaluate_chunks(CHUNKS_RECORD *chunks_record,
234                                                   SEARCH_STATE search_state) {
235   BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR();
236   BLOB_CHOICE_LIST *blob_choices;
237   BLOB_CHOICE_IT blob_choice_it;
238   int i;
239   int x = 0;
240   int y;
241 
242   /* Iterate sub-paths */
243   for (i = 1; i <= search_state[0] + 1; i++) {
244     if (i > search_state[0])
245       y = count_blobs (chunks_record->chunks) - 1;
246     else
247       y = x + search_state[i];
248 
249     if (tord_blob_skip) {
250       delete char_choices;
251       return (NULL);
252     }                            /* Process one square */
253     /* Classify if needed */
254     blob_choices = get_piece_rating(chunks_record->ratings,
255                                     chunks_record->chunks,
256                                     chunks_record->splits,
257                                     x, y);
258 
259     if (blob_choices == NULL) {
260       delete char_choices;
261       return (NULL);
262     }
263 
264     /* Add permuted ratings */
265     blob_choice_it.set_to_list(blob_choices);
266     last_segmentation[i - 1].certainty = blob_choice_it.data()->certainty();
267     last_segmentation[i - 1].match = blob_choice_it.data()->rating();
268 
269     last_segmentation[i - 1].width =
270       chunks_width (chunks_record->chunk_widths, x, y);
271     last_segmentation[i - 1].gap =
272       chunks_gap (chunks_record->chunk_widths, y);
273 
274     *char_choices += blob_choices;
275     x = y + 1;
276   }
277   return (char_choices);
278 }
279 
280 /**********************************************************************
281  * evaluate_state
282  *
283  * Evaluate the segmentation that is represented by this state in the
284  * best first search.  Add this state to the "states_seen" list.
285  **********************************************************************/
evaluate_state(CHUNKS_RECORD * chunks_record,SEARCH_RECORD * the_search,DANGERR * fixpt)286 inT16 Wordrec::evaluate_state(CHUNKS_RECORD *chunks_record,
287                               SEARCH_RECORD *the_search,
288                               DANGERR *fixpt) {
289   BLOB_CHOICE_LIST_VECTOR *char_choices;
290   SEARCH_STATE chunk_groups;
291   float rating_limit = the_search->best_choice->rating();
292   inT16 keep_going = TRUE;
293   PIECES_STATE widths;
294 
295   the_search->num_states++;
296   chunk_groups = bin_to_chunks(the_search->this_state,
297                                the_search->num_joints);
298   bin_to_pieces (the_search->this_state, the_search->num_joints, widths);
299   getDict().LogNewSegmentation(widths);
300 
301   char_choices = evaluate_chunks(chunks_record, chunk_groups);
302   wordseg_rating_adjust_factor = -1.0f;
303   if (char_choices != NULL && char_choices->length() > 0) {
304     // Compute the segmentation cost and include the cost in word rating.
305     // TODO(dsl): We should change the SEARCH_RECORD to store this cost
306     // from state evaluation and avoid recomputing it here.
307     prioritize_state(chunks_record, the_search);
308     wordseg_rating_adjust_factor = the_search->segcost_bias;
309     getDict().permute_characters(*char_choices, rating_limit,
310                                  the_search->best_choice,
311                                  the_search->raw_choice);
312     bool replaced = false;
313     if (getDict().AcceptableChoice(char_choices, the_search->best_choice,
314                                    *(the_search->raw_choice), fixpt,
315                                    ASSOCIATOR_CALLER, &replaced)) {
316       keep_going = FALSE;
317     }
318   }
319   wordseg_rating_adjust_factor = -1.0f;
320 
321 #ifndef GRAPHICS_DISABLED
322   if (wordrec_display_segmentations) {
323     display_segmentation (chunks_record->chunks, chunk_groups);
324     if (wordrec_display_segmentations > 1)
325       window_wait(segm_window);
326   }
327 #endif
328 
329   if (rating_limit != the_search->best_choice->rating()) {
330     the_search->before_best = the_search->num_states;
331     the_search->best_state->part1 = the_search->this_state->part1;
332     the_search->best_state->part2 = the_search->this_state->part2;
333     replace_char_widths(chunks_record, chunk_groups);
334   }
335   else if (char_choices != NULL)
336     fixpt->index = -1;
337 
338   if (char_choices != NULL) delete char_choices;
339   memfree(chunk_groups);
340 
341   return (keep_going);
342 }
343 
344 
345 /**********************************************************************
346  * rebuild_current_state
347  *
348  * Evaluate the segmentation that is represented by this state in the
349  * best first search.  Add this state to the "states_seen" list.
350  **********************************************************************/
rebuild_current_state(TBLOB * blobs,SEAMS seam_list,STATE * state,BLOB_CHOICE_LIST_VECTOR * old_choices,int fx,bool force_rebuild,const WERD_CHOICE & best_choice,const MATRIX * ratings)351 BLOB_CHOICE_LIST_VECTOR *Wordrec::rebuild_current_state(
352     TBLOB *blobs,
353     SEAMS seam_list,
354     STATE *state,
355     BLOB_CHOICE_LIST_VECTOR *old_choices,
356     int fx,
357     bool force_rebuild,
358     const WERD_CHOICE &best_choice,
359     const MATRIX *ratings) {
360   // Initialize search_state, num_joints, x, y.
361   int num_joints = array_count(seam_list);
362 #ifndef GRAPHICS_DISABLED
363     if (wordrec_display_segmentations) {
364       print_state("Rebuiling state", state, num_joints);
365     }
366 #endif
367   SEARCH_STATE search_state = bin_to_chunks(state, num_joints);
368   int x = 0;
369   int y;
370   int i;
371   for (i = 1; i <= search_state[0]; i++) {
372     y = x + search_state[i];
373     x = y + 1;
374   }
375   y = count_blobs (blobs) - 1;
376 
377   // Initialize char_choices, expanded_fragment_lengths:
378   // e.g. if fragment_lengths = {1 1 2 3 1},
379   // expanded_fragment_lengths_str = {1 1 2 2 3 3 3 1}.
380   BLOB_CHOICE_LIST_VECTOR *char_choices = new BLOB_CHOICE_LIST_VECTOR();
381   STRING expanded_fragment_lengths_str = "";
382   bool state_has_fragments = false;
383   const char *fragment_lengths = NULL;
384 
385   if (best_choice.length() > 0) {
386     fragment_lengths = best_choice.fragment_lengths();
387   }
388   if (fragment_lengths) {
389     for (int i = 0; i < best_choice.length(); ++i) {
390       *char_choices += NULL;
391       if (fragment_lengths[i] > 1) {
392         state_has_fragments = true;
393       }
394       for (int j = 0; j < fragment_lengths[i]; ++j) {
395         expanded_fragment_lengths_str += fragment_lengths[i];
396       }
397     }
398   } else {
399     for (i = 0; i <= search_state[0]; ++i) {
400       expanded_fragment_lengths_str += (char)1;
401       *char_choices += NULL;
402     }
403   }
404 
405   // Finish early if force_rebuld is false and there are no fragments to merge.
406   if (!force_rebuild && !state_has_fragments) {
407     delete char_choices;
408     memfree(search_state);
409     return old_choices;
410   }
411 
412   // Set up variables for concatenating fragments.
413   const char *word_lengths_ptr = NULL;
414   const char *word_ptr = NULL;
415   if (state_has_fragments) {
416     // Make word_lengths_ptr point to the last element in
417     // best_choice->unichar_lengths().
418     word_lengths_ptr = best_choice.unichar_lengths().string();
419     word_lengths_ptr += (strlen(word_lengths_ptr)-1);
420     // Make word_str point to the beginning of the last
421     // unichar in best_choice->unichar_string().
422     word_ptr = best_choice.unichar_string().string();
423     word_ptr += (strlen(word_ptr)-*word_lengths_ptr);
424   }
425   const char *expanded_fragment_lengths =
426     expanded_fragment_lengths_str.string();
427   bool merging_fragment = false;
428   int true_y = -1;
429   char unichar[UNICHAR_LEN + 1];
430   int fragment_pieces = -1;
431   float rating = 0.0;
432   float certainty = -MAX_FLOAT32;
433 
434   // Populate char_choices list such that it corresponds to search_state.
435   //
436   // If we are rebuilding a state that contains character fragments:
437   // -- combine blobs that belong to character fragments
438   // -- re-classify the blobs to obtain choices list for the merged blob
439   // -- ensure that correct classification appears in the new choices list
440   //    NOTE: a choice composed form original fragment choices will be always
441   //    added to the new choices list for each character composed from
442   //    fragments (even if the choice for the corresponding character appears
443   //    in the re-classified choices list of for the newly merged blob).
444   BLOB_CHOICE_IT temp_it;
445   int char_choices_index = char_choices->length() - 1;
446   for (i = search_state[0]; i >= 0; i--) {
447     BLOB_CHOICE_LIST *current_choices = join_blobs_and_classify(
448         blobs, seam_list, x, y, fx, ratings, old_choices);
449     // Combine character fragments.
450     if (expanded_fragment_lengths[i] > 1) {
451       // Start merging character fragments.
452       if (!merging_fragment) {
453         merging_fragment = true;
454         true_y = y;
455         fragment_pieces = expanded_fragment_lengths[i];
456         rating = 0.0;
457         certainty = -MAX_FLOAT32;
458         strncpy(unichar, word_ptr, *word_lengths_ptr);
459         unichar[*word_lengths_ptr] = '\0';
460       }
461       // Take into account the fact that we could have joined pieces
462       // since we first recorded the ending point of a fragment (true_y).
463       true_y -= y - x;
464       // Populate fragment with updated values and look for the
465       // fragment with the same values in current_choices.
466       // Update rating and certainty of the character being composed.
467       fragment_pieces--;
468       CHAR_FRAGMENT fragment;
469       fragment.set_all(unichar, fragment_pieces,
470                        expanded_fragment_lengths[i]);
471       temp_it.set_to_list(current_choices);
472       for (temp_it.mark_cycle_pt(); !temp_it.cycled_list();
473            temp_it.forward()) {
474         const CHAR_FRAGMENT *current_fragment =
475           getDict().getUnicharset().get_fragment(temp_it.data()->unichar_id());
476         if (current_fragment && fragment.equals(current_fragment)) {
477           rating += temp_it.data()->rating();
478           if (temp_it.data()->certainty() > certainty) {
479             certainty = temp_it.data()->certainty();
480           }
481           break;
482         }
483       }
484       assert(!temp_it.cycled_list());  // make sure we found the fragment
485       // Free current_choices for the fragmented character.
486       delete current_choices;
487 
488       // Finish composing character from fragments.
489       if (fragment_pieces == 0) {
490         // Populate current_choices with the classification of
491         // the blob merged from blobs of each character fragment.
492         current_choices = join_blobs_and_classify(blobs, seam_list, x,
493                                                   true_y, fx, ratings, NULL);
494         BLOB_CHOICE *merged_choice =
495           new BLOB_CHOICE(getDict().getUnicharset().unichar_to_id(unichar),
496                           rating, certainty, 0, NO_PERM);
497 
498         // Insert merged_blob into current_choices, such that current_choices
499         // are still sorted in non-descending order by rating.
500         ASSERT_HOST(!current_choices->empty());
501         temp_it.set_to_list(current_choices);
502         for (temp_it.mark_cycle_pt();
503              !temp_it.cycled_list() &&
504              merged_choice->rating() > temp_it.data()->rating();
505              temp_it.forward());
506         temp_it.add_before_stay_put(merged_choice);
507 
508         // Done merging this fragmented character.
509         merging_fragment = false;
510       }
511     }
512     if (!merging_fragment) {
513       // Get rid of fragments in current_choices.
514       temp_it.set_to_list(current_choices);
515       for (temp_it.mark_cycle_pt(); !temp_it.cycled_list();
516            temp_it.forward()) {
517         if (getDict().getUnicharset().get_fragment(
518             temp_it.data()->unichar_id())) {
519           delete temp_it.extract();
520         }
521       }
522       char_choices->set(current_choices, char_choices_index);
523       char_choices_index--;
524 
525       // Update word_ptr and word_lengths_ptr.
526       if (word_lengths_ptr != NULL && word_ptr != NULL) {
527         word_lengths_ptr--;
528         word_ptr -= (*word_lengths_ptr);
529       }
530     }
531     y = x - 1;
532     x = y - search_state[i];
533   }
534   old_choices->delete_data_pointers();
535   delete old_choices;
536   memfree(search_state);
537 
538   return (char_choices);
539 }
540 }  // namespace tesseract
541 
542 
543 /**********************************************************************
544  * expand_node
545  *
546  * Create the states that are attached to this one.  Check to see that
547  * each one has not already been visited.  If not add it to the priority
548  * queue.
549  **********************************************************************/
550 namespace tesseract {
expand_node(FLOAT32 worst_priority,CHUNKS_RECORD * chunks_record,SEARCH_RECORD * the_search)551 void Wordrec::expand_node(FLOAT32 worst_priority,
552                           CHUNKS_RECORD *chunks_record,
553                           SEARCH_RECORD *the_search) {
554   STATE old_state;
555   int nodes_added = 0;
556   int x;
557   uinT32 mask = 1 << (the_search->num_joints - 1 - 32);
558 
559   old_state.part1 = the_search->this_state->part1;
560   old_state.part2 = the_search->this_state->part2;
561 
562   // We need to expand the search more intelligently, or we get stuck
563   // with a bad starting segmentation in a long word sequence as in CJK.
564   // Expand a child node only if it is within the global bound, and no
565   // worse than 2x of its parent.
566   // TODO(dsl): There is some redudency here in recomputing the priority,
567   // and in filtering of old_merit and worst_priority.
568   the_search->this_state->part2 = old_state.part2;
569   for (x = the_search->num_joints; x > 32; x--) {
570     the_search->this_state->part1 = mask ^ old_state.part1;
571     if (!hash_lookup (the_search->closed_states, the_search->this_state)) {
572       FLOAT32 new_merit = prioritize_state(chunks_record, the_search);
573       if (segment_debug && permute_debug) {
574         cprintf ("....checking state: %8.3f ", new_merit);
575         print_state ("", the_search->this_state, num_joints);
576       }
577       if (new_merit < worst_priority) {
578       push_queue (the_search->open_states, the_search->this_state,
579                     worst_priority, new_merit);
580         nodes_added++;
581       }
582     }
583     mask >>= 1;
584   }
585 
586   if (the_search->num_joints > 32) {
587     mask = 1 << 31;
588   }
589   else {
590     mask = 1 << (the_search->num_joints - 1);
591   }
592 
593   the_search->this_state->part1 = old_state.part1;
594   while (x--) {
595     the_search->this_state->part2 = mask ^ old_state.part2;
596     if (!hash_lookup (the_search->closed_states, the_search->this_state)) {
597       FLOAT32 new_merit = prioritize_state(chunks_record, the_search);
598       if (segment_debug && permute_debug) {
599         cprintf ("....checking state: %8.3f ", new_merit);
600         print_state ("", the_search->this_state, num_joints);
601       }
602       if (new_merit < worst_priority) {
603       push_queue (the_search->open_states, the_search->this_state,
604                    worst_priority, new_merit);
605         nodes_added++;
606       }
607     }
608     mask >>= 1;
609   }
610 }
611 }  // namespace tesseract
612 
613 
614 /**********************************************************************
615  * new_search
616  *
617  * Create and initialize a new search record.
618  **********************************************************************/
new_search(CHUNKS_RECORD * chunks_record,int num_joints,WERD_CHOICE * best_choice,WERD_CHOICE * raw_choice,STATE * state)619 SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record,
620                           int num_joints,
621                           WERD_CHOICE *best_choice,
622                           WERD_CHOICE *raw_choice,
623                           STATE *state) {
624   SEARCH_RECORD *this_search;
625 
626   this_search = (SEARCH_RECORD *) memalloc (sizeof (SEARCH_RECORD));
627 
628   this_search->open_states = MakeHeap (wordrec_num_seg_states * 20);
629   this_search->closed_states = new_hash_table ();
630 
631   if (state)
632     this_search->this_state = new_state (state);
633   else
634     cprintf ("error: bad initial state in new_search\n");
635 
636   this_search->first_state = new_state (this_search->this_state);
637   this_search->best_state = new_state (this_search->this_state);
638 
639   this_search->best_choice = best_choice;
640   this_search->raw_choice = raw_choice;
641 
642   this_search->num_joints = num_joints;
643   this_search->num_states = 0;
644   this_search->before_best = 0;
645   this_search->segcost_bias = 0;
646 
647   return (this_search);
648 }
649 
650 
651 /**********************************************************************
652  * pop_queue
653  *
654  * Get this state from the priority queue.  It should be the state that
655  * has the greatest urgency to be evaluated.
656  **********************************************************************/
pop_queue(HEAP * queue)657 STATE *pop_queue(HEAP *queue) {
658   HEAPENTRY entry;
659 
660   if (GetTopOfHeap (queue, &entry) == OK) {
661 #ifndef GRAPHICS_DISABLED
662     if (wordrec_display_segmentations) {
663       cprintf ("eval state: %8.3f ", entry.Key);
664       print_state ("", (STATE *) entry.Data, num_joints);
665     }
666 #endif
667     return ((STATE *) entry.Data);
668   }
669   else {
670     return (NULL);
671   }
672 }
673 
674 
675 /**********************************************************************
676  * push_queue
677  *
678  * Add this state into the priority queue.
679  **********************************************************************/
push_queue(HEAP * queue,STATE * state,FLOAT32 worst_priority,FLOAT32 priority)680 void push_queue(HEAP *queue, STATE *state, FLOAT32 worst_priority,
681                 FLOAT32 priority) {
682   HEAPENTRY entry;
683 
684   if (priority < worst_priority) {
685     if (SizeOfHeap (queue) >= MaxSizeOfHeap(queue)) {
686       if (segment_debug) tprintf("Heap is Full\n");
687       return;
688     }
689     if (segment_debug)
690       tprintf("\tpushing %d node  %f\n", num_pushed, priority);
691     entry.Data = (char *) new_state (state);
692     num_pushed++;
693     entry.Key = priority;
694     HeapStore(queue, &entry);
695   }
696 }
697 
698 
699 /**********************************************************************
700  * replace_char_widths
701  *
702  * Replace the value of the char_width field in the chunks_record with
703  * the updated width measurements from the last_segmentation.
704  **********************************************************************/
replace_char_widths(CHUNKS_RECORD * chunks_record,SEARCH_STATE state)705 void replace_char_widths(CHUNKS_RECORD *chunks_record, SEARCH_STATE state) {
706   WIDTH_RECORD *width_record;
707   int num_blobs;
708   int i;
709 
710   free_widths (chunks_record->char_widths);
711 
712   num_blobs = state[0] + 1;
713   width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2);
714   width_record->num_chars = num_blobs;
715 
716   for (i = 0; i < num_blobs; i++) {
717 
718     width_record->widths[2 * i] = last_segmentation[i].width;
719 
720     if (i + 1 < num_blobs)
721       width_record->widths[2 * i + 1] = last_segmentation[i].gap;
722   }
723   chunks_record->char_widths = width_record;
724 }
725 
726 namespace tesseract {
join_blobs_and_classify(TBLOB * blobs,SEAMS seam_list,int x,int y,int fx,const MATRIX * ratings,BLOB_CHOICE_LIST_VECTOR * old_choices)727 BLOB_CHOICE_LIST *Wordrec::join_blobs_and_classify(
728     TBLOB *blobs, SEAMS seam_list,
729     int x, int y, int fx, const MATRIX *ratings,
730     BLOB_CHOICE_LIST_VECTOR *old_choices) {
731   BLOB_CHOICE_LIST *choices = NULL;
732   // First check to see if we can look up the classificaiton
733   // in old_choices (if there is no need to merge blobs).
734   if (x == y && old_choices != NULL && ratings == NULL) {
735     choices = old_choices->get(x);
736     old_choices->set(NULL, x);
737     return choices;
738   }
739   // The ratings matrix filled in by the associator will contain the most
740   // up-to-date classification info. Thus we look up the classification there
741   // first, and only call classify_blob() if the classification is not found.
742   if (ratings != NULL) {
743     BLOB_CHOICE_LIST *choices_ptr = ratings->get(x, y);
744     if (choices_ptr != NOT_CLASSIFIED) {
745       choices = new BLOB_CHOICE_LIST();
746       choices->deep_copy(choices_ptr, &BLOB_CHOICE::deep_copy);
747     }
748   }
749   if (x != y) {
750   join_pieces(blobs, seam_list, x, y);
751 
752   int blobindex;  // current blob
753   TBLOB *p_blob;
754   TBLOB *blob;
755   TBLOB *next_blob;
756   for (blob = blobs, blobindex = 0, p_blob = NULL;
757        blobindex < x; blobindex++) {
758     p_blob = blob;
759     blob = blob->next;
760   }
761   while (blobindex < y) {
762     next_blob = blob->next;
763     blob->next = next_blob->next;
764     oldblob(next_blob);  // junk dead blobs
765     blobindex++;
766   }
767     if (choices == NULL) {
768       choices = classify_blob(p_blob, blob, blob->next,
769                               NULL, "rebuild", Orange);
770     }
771   }
772   return choices;
773 }
774 }  // namespace tesseract
775