1 ///////////////////////////////////////////////////////////////////////
2 // File: conversion.cpp
3 // Description: Collection of utility functions for A_CHOICE conversions.
4 // TODO(daria): delete this file when conversion to unichar_ids
5 // is finished and all permuters are completely updated/replaced.
6 // Author: Daria Antonova
7 // Created: Mon Jun 23 11:26:43 PDT 2008
8 //
9 // (C) Copyright 2007, Google Inc.
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 // http://www.apache.org/licenses/LICENSE-2.0
14 // Unless required by applicable law or agreed to in writing, software
15 // distributed under the License is distributed on an "AS IS" BASIS,
16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 // See the License for the specific language governing permissions and
18 // limitations under the License.
19 //
20 ///////////////////////////////////////////////////////////////////////
21
22 #include "conversion.h"
23
24 #include "callcpp.h"
25 #include "choicearr.h"
26 #include "choices.h"
27 #include "dict.h"
28 #include "ratngs.h"
29 #include "stopper.h"
30 #include "unicharset.h"
31
32 namespace tesseract {
valid_word(const char * string)33 int Dict::valid_word(const char *string) {
34 WERD_CHOICE word(string, getUnicharset());
35 return valid_word(word);
36 }
37
LogNewWordChoice(A_CHOICE * a_choice,FLOAT32 adjust_factor,const float certainties[],const UNICHARSET & unicharset)38 void Dict::LogNewWordChoice(A_CHOICE *a_choice,
39 FLOAT32 adjust_factor,
40 const float certainties[],
41 const UNICHARSET &unicharset) {
42 WERD_CHOICE word_choice(strlen(a_choice->lengths));
43 convert_to_word_choice(a_choice, unicharset, &word_choice);
44 LogNewChoice(word_choice, adjust_factor, certainties, false);
45 }
46 } // namespace tesseract
47
48 // Fills in the given WERD_CHOICE with information from the given A_CHOICE.
49 // Assumes that word_choice pointer is not NULL.
convert_to_word_choice(const A_CHOICE * a_choice,const UNICHARSET & current_unicharset,WERD_CHOICE * word_choice)50 void convert_to_word_choice(const A_CHOICE *a_choice,
51 const UNICHARSET ¤t_unicharset,
52 WERD_CHOICE *word_choice) {
53 if (a_choice == NULL) return;
54 const char *string = a_choice->string;
55 const char *lengths = a_choice->lengths;
56 const char *fragment_lengths = a_choice->fragment_lengths;
57 int offset = 0;
58 for (int x = 0; x < strlen(a_choice->lengths); ++x) {
59 UNICHAR_ID unichar_id =
60 current_unicharset.unichar_to_id(string + offset, lengths[x]);
61 word_choice->append_unichar_id(unichar_id, fragment_lengths[x], 0.0, 0.0);
62 offset += lengths[x];
63 }
64 word_choice->set_rating(a_choice->rating);
65 word_choice->set_certainty(a_choice->certainty);
66 word_choice->set_permuter(a_choice->permuter);
67 word_choice->set_fragment_mark(a_choice->fragment_mark);
68 }
69
70 // Returns the best of two choices and deletes the other (worse) choice.
71 // A choice is better if it has a non-empty string and has a lower
72 // rating than the other choice. If the ratings are the same,
73 // a_choice is preferred over choice.
74 // If the best choice is in the A_CHOICE form, copies it to a new
75 // WERD_CHOICE and deletes A_CHOICE.
get_best_delete_other(const UNICHARSET & current_unicharset,WERD_CHOICE * choice,A_CHOICE * a_choice)76 WERD_CHOICE *get_best_delete_other(const UNICHARSET ¤t_unicharset,
77 WERD_CHOICE *choice,
78 A_CHOICE *a_choice) {
79 if (!a_choice) return choice;
80 if (choice != NULL &&
81 (choice->rating() < a_choice->rating || a_choice->string == NULL)) {
82 free_choice(a_choice);
83 return choice;
84 } else {
85 delete choice;
86 WERD_CHOICE *word_choice = new WERD_CHOICE();
87 convert_to_word_choice(a_choice, current_unicharset, word_choice);
88 free_choice(a_choice);
89 return word_choice;
90 }
91 }
92
93 // Convert BLOB_CHOICE_LIST_VECTOR to CHOICES_LIST.
94 // The caller is responsible for deleting the returned CHOICES_LIST.
convert_to_choices_list(const BLOB_CHOICE_LIST_VECTOR & char_choices,const UNICHARSET & current_unicharset)95 CHOICES_LIST convert_to_choices_list(
96 const BLOB_CHOICE_LIST_VECTOR &char_choices,
97 const UNICHARSET ¤t_unicharset) {
98 CHOICES_LIST old_char_choices = new_choice_list();
99 int x;
100 BLOB_CHOICE_IT it;
101 BLOB_CHOICE *blob_choice;
102 char choice_lengths[2] = {0, 0};
103 char unichar[UNICHAR_LEN + 1];
104 for (x = 0; x < char_choices.length(); ++x) {
105 it.set_to_list(char_choices.get(x));
106 LIST result = NIL;
107 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
108 blob_choice = it.data();
109 strcpy(unichar,
110 current_unicharset.id_to_unichar(blob_choice->unichar_id()));
111 choice_lengths[0] = strlen(unichar);
112 result = append_char_choice(result, unichar, choice_lengths,
113 blob_choice->rating(),
114 blob_choice->certainty(),
115 blob_choice->config(), NULL);
116 }
117 old_char_choices = array_push(old_char_choices, result);
118 }
119 return old_char_choices;
120 }
121