• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 ///////////////////////////////////////////////////////////////////////
2 // File:        conversion.cpp
3 // Description: Collection of utility functions for A_CHOICE conversions.
4 //              TODO(daria): delete this file when conversion to unichar_ids
5 //              is finished and all permuters are completely updated/replaced.
6 // Author:      Daria Antonova
7 // Created:     Mon Jun 23 11:26:43 PDT 2008
8 //
9 // (C) Copyright 2007, Google Inc.
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 // http://www.apache.org/licenses/LICENSE-2.0
14 // Unless required by applicable law or agreed to in writing, software
15 // distributed under the License is distributed on an "AS IS" BASIS,
16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 // See the License for the specific language governing permissions and
18 // limitations under the License.
19 //
20 ///////////////////////////////////////////////////////////////////////
21 
22 #include "conversion.h"
23 
24 #include "callcpp.h"
25 #include "choicearr.h"
26 #include "choices.h"
27 #include "dict.h"
28 #include "ratngs.h"
29 #include "stopper.h"
30 #include "unicharset.h"
31 
32 namespace tesseract {
valid_word(const char * string)33 int Dict::valid_word(const char *string) {
34   WERD_CHOICE word(string, getUnicharset());
35   return valid_word(word);
36 }
37 
LogNewWordChoice(A_CHOICE * a_choice,FLOAT32 adjust_factor,const float certainties[],const UNICHARSET & unicharset)38 void Dict::LogNewWordChoice(A_CHOICE *a_choice,
39                             FLOAT32 adjust_factor,
40                             const float certainties[],
41                             const UNICHARSET &unicharset) {
42   WERD_CHOICE word_choice(strlen(a_choice->lengths));
43   convert_to_word_choice(a_choice, unicharset, &word_choice);
44   LogNewChoice(word_choice, adjust_factor, certainties, false);
45 }
46 }  // namespace tesseract
47 
48 // Fills in the given WERD_CHOICE with information from the given A_CHOICE.
49 // Assumes that word_choice pointer is not NULL.
convert_to_word_choice(const A_CHOICE * a_choice,const UNICHARSET & current_unicharset,WERD_CHOICE * word_choice)50 void convert_to_word_choice(const A_CHOICE *a_choice,
51                             const UNICHARSET &current_unicharset,
52                             WERD_CHOICE *word_choice) {
53   if (a_choice == NULL) return;
54   const char *string = a_choice->string;
55   const char *lengths = a_choice->lengths;
56   const char *fragment_lengths = a_choice->fragment_lengths;
57   int offset = 0;
58   for (int x = 0; x < strlen(a_choice->lengths); ++x) {
59     UNICHAR_ID unichar_id =
60       current_unicharset.unichar_to_id(string + offset, lengths[x]);
61     word_choice->append_unichar_id(unichar_id, fragment_lengths[x], 0.0, 0.0);
62     offset += lengths[x];
63   }
64   word_choice->set_rating(a_choice->rating);
65   word_choice->set_certainty(a_choice->certainty);
66   word_choice->set_permuter(a_choice->permuter);
67   word_choice->set_fragment_mark(a_choice->fragment_mark);
68 }
69 
70 // Returns the best of two choices and deletes the other (worse) choice.
71 // A choice is better if it has a non-empty string and has a lower
72 // rating than the other choice. If the ratings are the same,
73 // a_choice is preferred over choice.
74 // If the best choice is in the A_CHOICE form, copies it to a new
75 // WERD_CHOICE and deletes A_CHOICE.
get_best_delete_other(const UNICHARSET & current_unicharset,WERD_CHOICE * choice,A_CHOICE * a_choice)76 WERD_CHOICE *get_best_delete_other(const UNICHARSET &current_unicharset,
77                                    WERD_CHOICE *choice,
78                                    A_CHOICE *a_choice) {
79   if (!a_choice) return choice;
80   if (choice != NULL &&
81       (choice->rating() < a_choice->rating || a_choice->string == NULL)) {
82     free_choice(a_choice);
83     return choice;
84   } else {
85     delete choice;
86     WERD_CHOICE *word_choice = new WERD_CHOICE();
87     convert_to_word_choice(a_choice, current_unicharset, word_choice);
88     free_choice(a_choice);
89     return word_choice;
90   }
91 }
92 
93 // Convert BLOB_CHOICE_LIST_VECTOR to CHOICES_LIST.
94 // The caller is responsible for deleting the returned CHOICES_LIST.
convert_to_choices_list(const BLOB_CHOICE_LIST_VECTOR & char_choices,const UNICHARSET & current_unicharset)95 CHOICES_LIST convert_to_choices_list(
96     const BLOB_CHOICE_LIST_VECTOR &char_choices,
97     const UNICHARSET &current_unicharset) {
98   CHOICES_LIST old_char_choices = new_choice_list();
99   int x;
100   BLOB_CHOICE_IT it;
101   BLOB_CHOICE *blob_choice;
102   char choice_lengths[2] = {0, 0};
103   char unichar[UNICHAR_LEN + 1];
104   for (x = 0; x < char_choices.length(); ++x) {
105     it.set_to_list(char_choices.get(x));
106     LIST result = NIL;
107     for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
108       blob_choice = it.data();
109       strcpy(unichar,
110              current_unicharset.id_to_unichar(blob_choice->unichar_id()));
111       choice_lengths[0] = strlen(unichar);
112       result = append_char_choice(result, unichar, choice_lengths,
113                                   blob_choice->rating(),
114                                   blob_choice->certainty(),
115                                   blob_choice->config(), NULL);
116     }
117     old_char_choices = array_push(old_char_choices, result);
118   }
119   return old_char_choices;
120 }
121