1 /**********************************************************************
2 * File: tstruct.cpp (Formerly tstruct.c)
3 * Description: Code to manipulate the structures of the C++/C interface.
4 * Author: Ray Smith
5 * Created: Thu Apr 23 15:49:29 BST 1992
6 *
7 * (C) Copyright 1992, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19
20 #include "mfcpch.h"
21 #include "tfacep.h"
22 #include "tstruct.h"
23 #include "makerow.h"
24 #include "ocrblock.h"
25 //#include "structures.h"
26
27 static ERRCODE BADFRAGMENTS = "Couldn't find matching fragment ends";
28
ELISTIZE(FRAGMENT)29 ELISTIZE (FRAGMENT)
30 //extern /*"C"*/ oldoutline(TESSLINE*);
31 /**********************************************************************
32 * FRAGMENT::FRAGMENT
33 *
34 * Constructor for fragments.
35 **********************************************************************/
36 FRAGMENT::FRAGMENT ( //constructor
37 EDGEPT * head_pt, //start point
38 EDGEPT * tail_pt //end point
39 ):head (head_pt->pos.x, head_pt->pos.y), tail (tail_pt->pos.x,
40 tail_pt->pos.y) {
41 headpt = head_pt; // save ptrs
42 tailpt = tail_pt;
43 }
44
45 // Helper function to make a fake PBLOB formed from the bounding box
46 // of the given old-format outline.
MakeRectBlob(TESSLINE * ol)47 static PBLOB* MakeRectBlob(TESSLINE* ol) {
48 POLYPT_LIST poly_list;
49 POLYPT_IT poly_it = &poly_list;
50 FCOORD pos, vec;
51 POLYPT *polypt;
52
53 // Create points at each of the 4 corners of the rectangle in turn.
54 pos = FCOORD(ol->topleft.x, ol->topleft.y);
55 vec = FCOORD(0.0f, ol->botright.y - ol->topleft.y);
56 polypt = new POLYPT(pos, vec);
57 poly_it.add_after_then_move(polypt);
58 pos = FCOORD(ol->topleft.x, ol->botright.y);
59 vec = FCOORD(ol->botright.x - ol->topleft.x, 0.0f);
60 polypt = new POLYPT(pos, vec);
61 poly_it.add_after_then_move(polypt);
62 pos = FCOORD(ol->botright.x, ol->botright.y);
63 vec = FCOORD(0.0f, ol->topleft.y - ol->botright.y);
64 polypt = new POLYPT(pos, vec);
65 poly_it.add_after_then_move(polypt);
66 pos = FCOORD(ol->botright.x, ol->topleft.y);
67 vec = FCOORD(ol->topleft.x - ol->botright.x, 0.0f);
68 polypt = new POLYPT(pos, vec);
69 poly_it.add_after_then_move(polypt);
70
71 OUTLINE_LIST out_list;
72 OUTLINE_IT out_it = &out_list;
73 out_it.add_after_then_move(new OUTLINE(&poly_it));
74 return new PBLOB(&out_list);
75 }
76
77 /**********************************************************************
78 * make_ed_word
79 *
80 * Make an editor format word from the tess style word.
81 **********************************************************************/
82
make_ed_word(TWERD * tessword,WERD * clone)83 WERD *make_ed_word( //construct word
84 TWERD *tessword, //word to convert
85 WERD *clone //clone this one
86 ) {
87 WERD *word; //converted word
88 TBLOB *tblob; //current blob
89 PBLOB *blob; //new blob
90 PBLOB_LIST blobs; //list of blobs
91 PBLOB_IT blob_it = &blobs; //iterator
92
93 for (tblob = tessword->blobs; tblob != NULL; tblob = tblob->next) {
94 blob = make_ed_blob (tblob);
95 if (blob == NULL && tblob->outlines != NULL) {
96 // Make a fake blob using the bounding box rectangle of the 1st outline.
97 blob = MakeRectBlob(tblob->outlines);
98 }
99 if (blob != NULL) {
100 blob_it.add_after_then_move (blob);
101 }
102 }
103 if (!blobs.empty ())
104 word = new WERD (&blobs, clone);
105 else
106 word = NULL;
107 return word;
108 }
109
110
111 /**********************************************************************
112 * make_ed_blob
113 *
114 * Make an editor format blob from the tess style blob.
115 **********************************************************************/
116
make_ed_blob(TBLOB * tessblob)117 PBLOB *make_ed_blob( //construct blob
118 TBLOB *tessblob //blob to convert
119 ) {
120 TESSLINE *tessol; //tess outline
121 FRAGMENT_LIST fragments; //list of fragments
122 OUTLINE *outline; //current outline
123 OUTLINE_LIST out_list; //list of outlines
124 OUTLINE_IT out_it = &out_list; //iterator
125
126 for (tessol = tessblob->outlines; tessol != NULL; tessol = tessol->next) {
127 //stick in list
128 register_outline(tessol, &fragments);
129 }
130 while (!fragments.empty ()) {
131 outline = make_ed_outline (&fragments);
132 if (outline != NULL) {
133 out_it.add_after_then_move (outline);
134 }
135 }
136 if (out_it.empty())
137 return NULL; //couldn't do it
138 return new PBLOB (&out_list); //turn to blob
139 }
140
141
142 /**********************************************************************
143 * make_ed_outline
144 *
145 * Make an editor format outline from the list of fragments.
146 **********************************************************************/
147
make_ed_outline(FRAGMENT_LIST * list)148 OUTLINE *make_ed_outline( //constructoutline
149 FRAGMENT_LIST *list //list of fragments
150 ) {
151 FRAGMENT *fragment; //current fragment
152 EDGEPT *edgept; //current point
153 ICOORD headpos; //coords of head
154 ICOORD tailpos; //coords of tail
155 FCOORD pos; //coords of edgept
156 FCOORD vec; //empty
157 POLYPT *polypt; //current point
158 POLYPT_LIST poly_list; //list of point
159 POLYPT_IT poly_it = &poly_list;//iterator
160 FRAGMENT_IT fragment_it = list;//fragment
161
162 headpos = fragment_it.data ()->head;
163 do {
164 fragment = fragment_it.data ();
165 edgept = fragment->headpt; //start of segment
166 do {
167 pos = FCOORD (edgept->pos.x, edgept->pos.y);
168 vec = FCOORD (edgept->vec.x, edgept->vec.y);
169 polypt = new POLYPT (pos, vec);
170 //add to list
171 poly_it.add_after_then_move (polypt);
172 edgept = edgept->next;
173 }
174 while (edgept != fragment->tailpt);
175 tailpos = ICOORD (edgept->pos.x, edgept->pos.y);
176 //get rid of it
177 delete fragment_it.extract ();
178 if (tailpos != headpos) {
179 if (fragment_it.empty ()) {
180 return NULL;
181 }
182 fragment_it.forward ();
183 //find next segment
184 for (fragment_it.mark_cycle_pt (); !fragment_it.cycled_list () &&
185 fragment_it.data ()->head != tailpos;
186 fragment_it.forward ());
187 if (fragment_it.data ()->head != tailpos) {
188 // It is legitimate for the heads to not all match to tails,
189 // since not all combinations of seams always make sense.
190 for (fragment_it.mark_cycle_pt ();
191 !fragment_it.cycled_list (); fragment_it.forward ()) {
192 fragment = fragment_it.extract ();
193 delete fragment;
194 }
195 return NULL; //can't do it
196 }
197 }
198 }
199 while (tailpos != headpos);
200 return new OUTLINE (&poly_it); //turn to outline
201 }
202
203
204 /**********************************************************************
205 * register_outline
206 *
207 * Add the fragments in the given outline to the list
208 **********************************************************************/
209
register_outline(TESSLINE * outline,FRAGMENT_LIST * list)210 void register_outline( //add fragments
211 TESSLINE *outline, //tess format
212 FRAGMENT_LIST *list //list to add to
213 ) {
214 EDGEPT *startpt; //start of outline
215 EDGEPT *headpt; //start of fragment
216 EDGEPT *tailpt; //end of fragment
217 FRAGMENT *fragment; //new fragment
218 FRAGMENT_IT it = list; //iterator
219
220 startpt = outline->loop;
221 do {
222 startpt = startpt->next;
223 if (startpt == NULL)
224 return; //illegal!
225 }
226 while (startpt->flags[0] == 0 && startpt != outline->loop);
227 headpt = startpt;
228 do
229 startpt = startpt->next;
230 while (startpt->flags[0] != 0 && startpt != headpt);
231 if (startpt->flags[0] != 0)
232 return; //all hidden!
233
234 headpt = startpt;
235 do {
236 tailpt = headpt;
237 do
238 tailpt = tailpt->next;
239 while (tailpt->flags[0] == 0 && tailpt != startpt);
240 fragment = new FRAGMENT (headpt, tailpt);
241 it.add_after_then_move (fragment);
242 while (tailpt->flags[0] != 0)
243 tailpt = tailpt->next;
244 headpt = tailpt;
245 }
246 while (tailpt != startpt);
247 }
248
249
250 /**********************************************************************
251 * make_tess_row
252 *
253 * Make a fake row structure to pass to the tesseract matchers.
254 **********************************************************************/
255
make_tess_row(DENORM * denorm,TEXTROW * tessrow)256 void make_tess_row( //make fake row
257 DENORM *denorm, //row info
258 TEXTROW *tessrow //output row
259 ) {
260 tessrow->baseline.segments = 1;
261 tessrow->baseline.xstarts[0] = -32767;
262 tessrow->baseline.xstarts[1] = 32767;
263 tessrow->baseline.quads[0].a = 0;
264 tessrow->baseline.quads[0].b = 0;
265 tessrow->baseline.quads[0].c = bln_baseline_offset;
266 tessrow->xheight.segments = 1;
267 tessrow->xheight.xstarts[0] = -32767;
268 tessrow->xheight.xstarts[1] = 32767;
269 tessrow->xheight.quads[0].a = 0;
270 tessrow->xheight.quads[0].b = 0;
271 tessrow->xheight.quads[0].c = bln_x_height + bln_baseline_offset;
272 tessrow->lineheight = bln_x_height;
273 if (denorm != NULL) {
274 tessrow->ascrise = denorm->row ()->ascenders () * denorm->scale ();
275 tessrow->descdrop = denorm->row ()->descenders () * denorm->scale ();
276 } else {
277 tessrow->ascrise = bln_baseline_offset;
278 tessrow->descdrop = -bln_baseline_offset;
279 }
280 }
281
282
283 /**********************************************************************
284 * make_tess_word
285 *
286 * Convert the word to Tess format.
287 **********************************************************************/
288
make_tess_word(WERD * word,TEXTROW * row)289 TWERD *make_tess_word( //convert word
290 WERD *word, //word to do
291 TEXTROW *row //fake row
292 ) {
293 TWERD *tessword; //tess format
294
295 tessword = newword (); //use old allocator
296 tessword->row = row; //give them something
297 //copy string
298 tessword->correct = strsave (word->text ());
299 tessword->guess = NULL;
300 tessword->blobs = make_tess_blobs (word->blob_list ());
301 tessword->blanks = 1;
302 tessword->blobcount = word->blob_list ()->length ();
303 tessword->next = NULL;
304 return tessword;
305 }
306
307
308 /**********************************************************************
309 * make_tess_blobs
310 *
311 * Make Tess style blobs from a list of BLOBs.
312 **********************************************************************/
313
make_tess_blobs(PBLOB_LIST * bloblist)314 TBLOB *make_tess_blobs( //make tess blobs
315 PBLOB_LIST *bloblist //list to convert
316 ) {
317 PBLOB_IT it = bloblist; //iterator
318 PBLOB *blob; //current blob
319 TBLOB *head; //output list
320 TBLOB *tail; //end of list
321 TBLOB *tessblob;
322
323 head = NULL;
324 tail = NULL;
325 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
326 blob = it.data ();
327 tessblob = make_tess_blob (blob, TRUE);
328 if (head)
329 tail->next = tessblob;
330 else
331 head = tessblob;
332 tail = tessblob;
333 }
334 return head;
335 }
336
337 /**********************************************************************
338 * make_rotated_tess_blob
339 *
340 * Make a single Tess style blob, applying the given rotation and
341 * renormalizing.
342 **********************************************************************/
make_rotated_tess_blob(const DENORM * denorm,PBLOB * blob,BOOL8 flatten)343 TBLOB *make_rotated_tess_blob(const DENORM* denorm, PBLOB *blob,
344 BOOL8 flatten) {
345 if (denorm != NULL && denorm->block() != NULL &&
346 denorm->block()->classify_rotation().y() != 0.0) {
347 TBOX box = blob->bounding_box();
348 int src_width = box.width();
349 int src_height = box.height();
350 src_width = static_cast<int>(src_width / denorm->scale() + 0.5);
351 src_height = static_cast<int>(src_height / denorm->scale() + 0.5);
352 int x_middle = (box.left() + box.right()) / 2;
353 int y_middle = (box.top() + box.bottom()) / 2;
354 PBLOB* rotated_blob = PBLOB::deep_copy(blob);
355 rotated_blob->move(FCOORD(-x_middle, -y_middle));
356 rotated_blob->rotate(denorm->block()->classify_rotation());
357 ICOORD median_size = denorm->block()->median_size();
358 int tolerance = median_size.x() / 8;
359 // TODO(dsl/rays) find a better normalization solution. In the mean time
360 // make it work for CJK by normalizing for Cap height in the same way
361 // as is applied in compute_block_xheight when the row is presumed to
362 // be ALLCAPS, i.e. the x-height is the fixed fraction
363 // blob height * textord_merge_x / (textord_merge_x + textord_merge_asc)
364 if (NearlyEqual(src_width, static_cast<int>(median_size.x()), tolerance) &&
365 NearlyEqual(src_height, static_cast<int>(median_size.y()), tolerance)) {
366 float target_height = bln_x_height * (textord_merge_x + textord_merge_asc)
367 / textord_merge_x;
368 rotated_blob->scale(target_height / box.width());
369 rotated_blob->move(FCOORD(0.0f,
370 bln_baseline_offset -
371 rotated_blob->bounding_box().bottom()));
372 }
373 TBLOB* result = make_tess_blob(rotated_blob, flatten);
374 delete rotated_blob;
375 return result;
376 } else {
377 return make_tess_blob(blob, flatten);
378 }
379 }
380
381 /**********************************************************************
382 * make_tess_blob
383 *
384 * Make a single Tess style blob
385 **********************************************************************/
386
make_tess_blob(PBLOB * blob,BOOL8 flatten)387 TBLOB *make_tess_blob( //make tess blob
388 PBLOB *blob, //blob to convert
389 BOOL8 flatten //flatten outline structure
390 ) {
391 inT32 index;
392 TBLOB *tessblob;
393
394 tessblob = newblob ();
395 tessblob->outlines = (struct olinestruct *)
396 make_tess_outlines (blob->out_list (), flatten);
397 for (index = 0; index < TBLOBFLAGS; index++)
398 tessblob->flags[index] = 0; //!!
399 tessblob->correct = 0;
400 tessblob->guess = 0;
401 for (index = 0; index < MAX_WO_CLASSES; index++) {
402 tessblob->classes[index] = 0;
403 tessblob->values[index] = 0;
404 }
405 tessblob->next = NULL;
406 return tessblob;
407 }
408
409
410 /**********************************************************************
411 * make_tess_outlines
412 *
413 * Make Tess style outlines from a list of OUTLINEs.
414 **********************************************************************/
415
make_tess_outlines(OUTLINE_LIST * outlinelist,BOOL8 flatten)416 TESSLINE *make_tess_outlines( //make tess outlines
417 OUTLINE_LIST *outlinelist, //list to convert
418 BOOL8 flatten //flatten outline structure
419 ) {
420 OUTLINE_IT it = outlinelist; //iterator
421 OUTLINE *outline; //current outline
422 TESSLINE *head; //output list
423 TESSLINE *tail; //end of list
424 TESSLINE *tessoutline;
425
426 head = NULL;
427 tail = NULL;
428 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
429 outline = it.data ();
430 tessoutline = newoutline ();
431 tessoutline->compactloop = NULL;
432 tessoutline->loop = make_tess_edgepts (outline->polypts (),
433 tessoutline->topleft,
434 tessoutline->botright);
435 if (tessoutline->loop == NULL) {
436 oldoutline(tessoutline);
437 continue;
438 }
439 tessoutline->start = tessoutline->loop->pos;
440 tessoutline->node = NULL;
441 tessoutline->next = NULL;
442 tessoutline->child = NULL;
443 if (!outline->child ()->empty ()) {
444 if (flatten)
445 tessoutline->next = (struct olinestruct *)
446 make_tess_outlines (outline->child (), flatten);
447 else {
448 tessoutline->next = NULL;
449 tessoutline->child = (struct olinestruct *)
450 make_tess_outlines (outline->child (), flatten);
451 }
452 }
453 else
454 tessoutline->next = NULL;
455 if (head)
456 tail->next = tessoutline;
457 else
458 head = tessoutline;
459 while (tessoutline->next != NULL)
460 tessoutline = tessoutline->next;
461 tail = tessoutline;
462 }
463 return head;
464 }
465
466
467 /**********************************************************************
468 * make_tess_edgepts
469 *
470 * Make Tess style edgepts from a list of POLYPTs.
471 **********************************************************************/
472
make_tess_edgepts(POLYPT_LIST * edgeptlist,TPOINT & tl,TPOINT & br)473 EDGEPT *make_tess_edgepts( //make tess edgepts
474 POLYPT_LIST *edgeptlist, //list to convert
475 TPOINT &tl, //bounding box
476 TPOINT &br) {
477 inT32 index;
478 POLYPT_IT it = edgeptlist; //iterator
479 POLYPT *edgept; //current edgept
480 EDGEPT *head; //output list
481 EDGEPT *tail; //end of list
482 EDGEPT *tessedgept;
483
484 head = NULL;
485 tail = NULL;
486 tl.x = MAX_INT16;
487 tl.y = -MAX_INT16;
488 br.x = -MAX_INT16;
489 br.y = MAX_INT16;
490 for (it.mark_cycle_pt (); !it.cycled_list ();) {
491 edgept = it.data ();
492 tessedgept = newedgept ();
493 tessedgept->pos.x = (inT16) edgept->pos.x ();
494 tessedgept->pos.y = (inT16) edgept->pos.y ();
495 if (tessedgept->pos.x < tl.x)
496 tl.x = tessedgept->pos.x;
497 if (tessedgept->pos.x > br.x)
498 br.x = tessedgept->pos.x;
499 if (tessedgept->pos.y > tl.y)
500 tl.y = tessedgept->pos.y;
501 if (tessedgept->pos.y < br.y)
502 br.y = tessedgept->pos.y;
503 if (head != NULL && tessedgept->pos.x == tail->pos.x
504 && tessedgept->pos.y == tail->pos.y) {
505 oldedgept(tessedgept);
506 }
507 else {
508 for (index = 0; index < EDGEPTFLAGS; index++)
509 tessedgept->flags[index] = 0;
510 if (head != NULL) {
511 tail->vec.x = tessedgept->pos.x - tail->pos.x;
512 tail->vec.y = tessedgept->pos.y - tail->pos.y;
513 tessedgept->prev = tail;
514 }
515 tessedgept->next = head;
516 if (head)
517 tail->next = tessedgept;
518 else
519 head = tessedgept;
520 tail = tessedgept;
521 }
522 it.forward ();
523 }
524 head->prev = tail;
525 tail->vec.x = head->pos.x - tail->pos.x;
526 tail->vec.y = head->pos.y - tail->pos.y;
527 if (head == tail) {
528 oldedgept(head);
529 return NULL; //empty
530 }
531 return head;
532 }
533