• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 ///////////////////////////////////////////////////////////////////////
2 // File:        tesseractclass.cpp
3 // Description: An instance of Tesseract. For thread safety, *every*
4 //              global variable goes in here, directly, or indirectly.
5 // Author:      Ray Smith
6 // Created:     Fri Mar 07 08:17:01 PST 2008
7 //
8 // (C) Copyright 2008, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
19 ///////////////////////////////////////////////////////////////////////
20 
21 #include "tesseractclass.h"
22 #include "globals.h"
23 
24 // Include automatically generated configuration file if running autoconf.
25 #ifdef HAVE_CONFIG_H
26 #include "config_auto.h"
27 #endif
28 
29 #ifdef HAVE_LIBLEPT
30 // Include leptonica library only if autoconf (or makefile etc) tell us to.
31 #include "allheaders.h"
32 #endif
33 
34 namespace tesseract {
35 
Tesseract()36 Tesseract::Tesseract()
37   : BOOL_MEMBER(tessedit_resegment_from_boxes, false,
38                 "Take segmentation and labeling from box file"),
39     BOOL_MEMBER(tessedit_train_from_boxes, false,
40                 "Generate training data from boxed chars"),
41     BOOL_MEMBER(tessedit_dump_pageseg_images, false,
42                "Dump itermediate images made during page segmentation"),
43     // The default for pageseg_mode is the old behaviour, so as not to
44     // upset anything that relies on that.
45     INT_MEMBER(tessedit_pageseg_mode, 2,
46                "Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char"
47                " (Values from PageSegMode enum in baseapi.h)"),
48     INT_MEMBER(tessedit_accuracyvspeed, 0,
49                "Accuracy V Speed tradeoff: 0 fastest, 100 most accurate"
50                " (Values from AccuracyVSpeed enum in baseapi.h)"),
51     BOOL_MEMBER(tessedit_train_from_boxes_word_level, false,
52                 "Generate training data from boxed chars at word level."),
53     STRING_MEMBER(tessedit_char_blacklist, "",
54                   "Blacklist of chars not to recognize"),
55     STRING_MEMBER(tessedit_char_whitelist, "",
56                   "Whitelist of chars to recognize"),
57     BOOL_MEMBER(global_tessedit_ambigs_training, false,
58                 "Perform training for ambiguities"),
59     pix_binary_(NULL),
60     deskew_(1.0f, 0.0f),
61     reskew_(1.0f, 0.0f),
62     hindi_image_(false) {
63 }
64 
~Tesseract()65 Tesseract::~Tesseract() {
66   Clear();
67 }
68 
Clear()69 void Tesseract::Clear() {
70 #ifdef HAVE_LIBLEPT
71   if (pix_binary_ != NULL)
72     pixDestroy(&pix_binary_);
73 #endif
74  deskew_ = FCOORD(1.0f, 0.0f);
75  reskew_ = FCOORD(1.0f, 0.0f);
76 }
77 
SetBlackAndWhitelist()78 void Tesseract::SetBlackAndWhitelist() {
79   // Set the white and blacklists (if any)
80   unicharset.set_black_and_whitelist(tessedit_char_blacklist.string(),
81                                      tessedit_char_whitelist.string());
82 }
83 
84 }  // namespace tesseract
85