1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 - This software is distributed in the hope that it will be
4 - useful, but with NO WARRANTY OF ANY KIND.
5 - No author or distributor accepts responsibility to anyone for the
6 - consequences of using this software, or for whether it serves any
7 - particular purpose or works at all, unless he or she says so in
8 - writing. Everyone is granted permission to copy, modify and
9 - redistribute this source code, for commercial or non-commercial
10 - purposes, with the following restrictions: (1) the origin of this
11 - source code must not be misrepresented; (2) modified versions must
12 - be plainly marked as such; and (3) this notice may not be removed
13 - or altered from any source or modified source distribution.
14 *====================================================================*/
15
16 /*
17 * boxfunc2.c
18 *
19 * Boxa/Box transform (shift, scale) and orthogonal rotation
20 * BOXA *boxaTransform()
21 * BOX *boxTransform()
22 * BOXA *boxaTransformOrdered()
23 * BOX *boxTransformOrdered()
24 * BOXA *boxaRotateOrth()
25 * BOX *boxRotateOrth()
26 *
27 * Boxa sort
28 * BOXA *boxaSort()
29 * BOXA *boxaBinSort()
30 * BOXA *boxaSortByIndex()
31 * BOXAA *boxaSort2d()
32 * BOXAA *boxaSort2dByIndex()
33 *
34 * Other boxaa functions
35 * l_int32 boxaaGetExtent()
36 * BOXA *boxaaFlattenToBoxa()
37 * l_int32 boxaaAlignBox()
38 */
39
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <math.h>
43 #include "allheaders.h"
44
45 /* For more than this number of c.c. in a binarized image of
46 * semi-perimeter (w + h) about 5000 or less, the O(n) binsort
47 * is faster than the O(nlogn) shellsort. */
48 static const l_int32 MIN_COMPS_FOR_BIN_SORT = 500;
49
50
51 /*---------------------------------------------------------------------*
52 * Boxa/Box transform (shift, scale) and orthogonal rotation *
53 *---------------------------------------------------------------------*/
54 /*!
55 * boxaTransform()
56 *
57 * Input: boxa
58 * shiftx, shifty
59 * scalex, scaley
60 * Return: boxad, or null on error
61 *
62 * Notes:
63 * (1) This is a very simple function that first shifts, then scales.
64 */
65 BOXA *
boxaTransform(BOXA * boxas,l_int32 shiftx,l_int32 shifty,l_float32 scalex,l_float32 scaley)66 boxaTransform(BOXA *boxas,
67 l_int32 shiftx,
68 l_int32 shifty,
69 l_float32 scalex,
70 l_float32 scaley)
71 {
72 l_int32 i, n;
73 BOX *boxs, *boxd;
74 BOXA *boxad;
75
76 PROCNAME("boxaTransform");
77
78 if (!boxas)
79 return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
80 n = boxaGetCount(boxas);
81 if ((boxad = boxaCreate(n)) == NULL)
82 return (BOXA *)ERROR_PTR("boxad not made", procName, NULL);
83 for (i = 0; i < n; i++) {
84 if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL)
85 return (BOXA *)ERROR_PTR("boxs not found", procName, NULL);
86 boxd = boxTransform(boxs, shiftx, shifty, scalex, scaley);
87 boxDestroy(&boxs);
88 boxaAddBox(boxad, boxd, L_INSERT);
89 }
90
91 return boxad;
92 }
93
94
95 /*!
96 * boxTransform()
97 *
98 * Input: box
99 * shiftx, shifty
100 * scalex, scaley
101 * Return: boxd, or null on error
102 *
103 * Notes:
104 * (1) This is a very simple function that first shifts, then scales.
105 */
106 BOX *
boxTransform(BOX * box,l_int32 shiftx,l_int32 shifty,l_float32 scalex,l_float32 scaley)107 boxTransform(BOX *box,
108 l_int32 shiftx,
109 l_int32 shifty,
110 l_float32 scalex,
111 l_float32 scaley)
112 {
113 PROCNAME("boxTransform");
114
115 if (!box)
116 return (BOX *)ERROR_PTR("box not defined", procName, NULL);
117 return boxCreate((l_int32)(scalex * (box->x + shiftx) + 0.5),
118 (l_int32)(scaley * (box->y + shifty) + 0.5),
119 (l_int32)(L_MAX(1.0, scalex * box->w + 0.5)),
120 (l_int32)(L_MAX(1.0, scaley * box->h + 0.5)));
121 }
122
123
124 /*!
125 * boxaTransformOrdered()
126 *
127 * Input: boxa
128 * shiftx, shifty
129 * scalex, scaley
130 * xcen, ycen (center of rotation)
131 * angle (in radians; clockwise is positive)
132 * order (one of 6 combinations: L_TR_SC_RO, ...)
133 * Return: boxd, or null on error
134 *
135 * Notes:
136 * (1) This allows a sequence of linear transforms on each box.
137 * the transforms are from the affine set, composed of
138 * shift, scaling and rotation, and the order of the
139 * transforms is specified.
140 * (2) Although these operations appear to be on an infinite
141 * 2D plane, in practice the region of interest is clipped
142 * to a finite image. The center of rotation is usually taken
143 * with respect to the image (either the UL corner or the
144 * center). A translation can have two very different effects:
145 * (a) Moves the boxes across the fixed image region.
146 * (b) Moves the image origin, causing a change in the image
147 * region and an opposite effective translation of the boxes.
148 * This function should only be used for (a), where the image
149 * region is fixed on translation. If the image region is
150 * changed by the translation, use instead the functions
151 * in affinecompose.c, where the image region and rotation
152 * center can be computed from the actual clipping due to
153 * translation of the image origin.
154 * (3) See boxTransformOrdered() for usage and implementation details.
155 */
156 BOXA *
boxaTransformOrdered(BOXA * boxas,l_int32 shiftx,l_int32 shifty,l_float32 scalex,l_float32 scaley,l_int32 xcen,l_int32 ycen,l_float32 angle,l_int32 order)157 boxaTransformOrdered(BOXA *boxas,
158 l_int32 shiftx,
159 l_int32 shifty,
160 l_float32 scalex,
161 l_float32 scaley,
162 l_int32 xcen,
163 l_int32 ycen,
164 l_float32 angle,
165 l_int32 order)
166 {
167 l_int32 i, n;
168 BOX *boxs, *boxd;
169 BOXA *boxad;
170
171 PROCNAME("boxaTransformOrdered");
172
173 if (!boxas)
174 return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
175 n = boxaGetCount(boxas);
176 if ((boxad = boxaCreate(n)) == NULL)
177 return (BOXA *)ERROR_PTR("boxad not made", procName, NULL);
178 for (i = 0; i < n; i++) {
179 if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL)
180 return (BOXA *)ERROR_PTR("boxs not found", procName, NULL);
181 boxd = boxTransformOrdered(boxs, shiftx, shifty, scalex, scaley,
182 xcen, ycen, angle, order);
183 boxDestroy(&boxs);
184 boxaAddBox(boxad, boxd, L_INSERT);
185 }
186
187 return boxad;
188 }
189
190
191 /*!
192 * boxTransformOrdered()
193 *
194 * Input: boxs
195 * shiftx, shifty
196 * scalex, scaley
197 * xcen, ycen (center of rotation)
198 * angle (in radians; clockwise is positive)
199 * order (one of 6 combinations: L_TR_SC_RO, ...)
200 * Return: boxd, or null on error
201 *
202 * Notes:
203 * (1) This allows a sequence of linear transforms, composed of
204 * shift, scaling and rotation, where the order of the
205 * transforms is specified.
206 * (2) The rotation is taken about a point specified by (xcen, ycen).
207 * Let the components of the vector from the center of rotation
208 * to the box center be (xdif, ydif):
209 * xdif = (bx + 0.5 * bw) - xcen
210 * ydif = (by + 0.5 * bh) - ycen
211 * Then the box center after rotation has new components:
212 * bxcen = xcen + xdif * cosa + ydif * sina
213 * bycen = ycen + ydif * cosa - xdif * sina
214 * where cosa and sina are the cos and sin of the angle,
215 * and the enclosing box for the rotated box has size:
216 * rw = |bw * cosa| + |bh * sina|
217 * rh = |bh * cosa| + |bw * sina|
218 * where bw and bh are the unrotated width and height.
219 * Then the box UL corner (rx, ry) is
220 * rx = bxcen - 0.5 * rw
221 * ry = bycen - 0.5 * rh
222 * (3) The center of rotation specified by args @xcen and @ycen
223 * is the point BEFORE any translation or scaling. If the
224 * rotation is not the first operation, this function finds
225 * the actual center at the time of rotation. It does this
226 * by making the following assumptions:
227 * (1) Any scaling is with respect to the UL corner, so
228 * that the center location scales accordingly.
229 * (2) A translation does not affect the center of
230 * the image; it just moves the boxes.
231 * We always use assumption (1). However, assumption (2)
232 * will be incorrect if the apparent translation is due
233 * to a clipping operation that, in effect, moves the
234 * origin of the image. In that case, you should NOT use
235 * these simple functions. Instead, use the functions
236 * in affinecompose.c, where the rotation center can be
237 * computed from the actual clipping due to translation
238 * of the image origin.
239 */
240 BOX *
boxTransformOrdered(BOX * boxs,l_int32 shiftx,l_int32 shifty,l_float32 scalex,l_float32 scaley,l_int32 xcen,l_int32 ycen,l_float32 angle,l_int32 order)241 boxTransformOrdered(BOX *boxs,
242 l_int32 shiftx,
243 l_int32 shifty,
244 l_float32 scalex,
245 l_float32 scaley,
246 l_int32 xcen,
247 l_int32 ycen,
248 l_float32 angle,
249 l_int32 order)
250 {
251 l_int32 bx, by, bw, bh, tx, ty, tw, th;
252 l_int32 xcent, ycent; /* transformed center of rotation due to scaling */
253 l_float32 sina, cosa, xdif, ydif, rx, ry, rw, rh;
254 BOX *boxd;
255
256 PROCNAME("boxTransformOrdered");
257
258 if (!boxs)
259 return (BOX *)ERROR_PTR("boxs not defined", procName, NULL);
260 if (order != L_TR_SC_RO && order != L_SC_RO_TR && order != L_RO_TR_SC &&
261 order != L_TR_RO_SC && order != L_RO_SC_TR && order != L_SC_TR_RO)
262 return (BOX *)ERROR_PTR("order invalid", procName, NULL);
263
264 boxGetGeometry(boxs, &bx, &by, &bw, &bh);
265 if (angle != 0.0) {
266 sina = sin(angle);
267 cosa = cos(angle);
268 }
269
270 if (order == L_TR_SC_RO) {
271 tx = (l_int32)(scalex * (bx + shiftx) + 0.5);
272 ty = (l_int32)(scaley * (by + shifty) + 0.5);
273 tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
274 th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
275 xcent = (l_int32)(scalex * xcen + 0.5);
276 ycent = (l_int32)(scaley * ycen + 0.5);
277 if (angle == 0.0)
278 boxd = boxCreate(tx, ty, tw, th);
279 else {
280 xdif = tx + 0.5 * tw - xcent;
281 ydif = ty + 0.5 * th - ycent;
282 rw = L_ABS(tw * cosa) + L_ABS(th * sina);
283 rh = L_ABS(th * cosa) + L_ABS(tw * sina);
284 rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
285 ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
286 boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw,
287 (l_int32)rh);
288 }
289 }
290 else if (order == L_SC_TR_RO) {
291 tx = (l_int32)(scalex * bx + shiftx + 0.5);
292 ty = (l_int32)(scaley * by + shifty + 0.5);
293 tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
294 th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
295 xcent = (l_int32)(scalex * xcen + 0.5);
296 ycent = (l_int32)(scaley * ycen + 0.5);
297 if (angle == 0.0)
298 boxd = boxCreate(tx, ty, tw, th);
299 else {
300 xdif = tx + 0.5 * tw - xcent;
301 ydif = ty + 0.5 * th - ycent;
302 rw = L_ABS(tw * cosa) + L_ABS(th * sina);
303 rh = L_ABS(th * cosa) + L_ABS(tw * sina);
304 rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
305 ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
306 boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw,
307 (l_int32)rh);
308 }
309 }
310 else if (order == L_RO_TR_SC) {
311 if (angle == 0.0) {
312 rx = bx;
313 ry = by;
314 rw = bw;
315 rh = bh;
316 }
317 else {
318 xdif = bx + 0.5 * bw - xcen;
319 ydif = by + 0.5 * bh - ycen;
320 rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
321 rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
322 rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
323 ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
324 }
325 tx = (l_int32)(scalex * (rx + shiftx) + 0.5);
326 ty = (l_int32)(scaley * (ry + shifty) + 0.5);
327 tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
328 th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
329 boxd = boxCreate(tx, ty, tw, th);
330 }
331 else if (order == L_RO_SC_TR) {
332 if (angle == 0.0) {
333 rx = bx;
334 ry = by;
335 rw = bw;
336 rh = bh;
337 }
338 else {
339 xdif = bx + 0.5 * bw - xcen;
340 ydif = by + 0.5 * bh - ycen;
341 rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
342 rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
343 rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
344 ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
345 }
346 tx = (l_int32)(scalex * rx + shiftx + 0.5);
347 ty = (l_int32)(scaley * ry + shifty + 0.5);
348 tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
349 th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
350 boxd = boxCreate(tx, ty, tw, th);
351 }
352 else if (order == L_TR_RO_SC) {
353 tx = bx + shiftx;
354 ty = by + shifty;
355 if (angle == 0.0) {
356 rx = tx;
357 ry = ty;
358 rw = bw;
359 rh = bh;
360 }
361 else {
362 xdif = tx + 0.5 * bw - xcen;
363 ydif = ty + 0.5 * bh - ycen;
364 rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
365 rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
366 rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
367 ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
368 }
369 tx = (l_int32)(scalex * rx + 0.5);
370 ty = (l_int32)(scaley * ry + 0.5);
371 tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
372 th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
373 boxd = boxCreate(tx, ty, tw, th);
374 }
375 else { /* order == L_SC_RO_TR) */
376 tx = (l_int32)(scalex * bx + 0.5);
377 ty = (l_int32)(scaley * by + 0.5);
378 tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
379 th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
380 xcent = (l_int32)(scalex * xcen + 0.5);
381 ycent = (l_int32)(scaley * ycen + 0.5);
382 if (angle == 0.0) {
383 rx = tx;
384 ry = ty;
385 rw = tw;
386 rh = th;
387 }
388 else {
389 xdif = tx + 0.5 * tw - xcent;
390 ydif = ty + 0.5 * th - ycent;
391 rw = L_ABS(tw * cosa) + L_ABS(th * sina);
392 rh = L_ABS(th * cosa) + L_ABS(tw * sina);
393 rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
394 ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
395 }
396 tx = (l_int32)(rx + shiftx + 0.5);
397 ty = (l_int32)(ry + shifty + 0.5);
398 tw = (l_int32)(rw + 0.5);
399 th = (l_int32)(rh + 0.5);
400 boxd = boxCreate(tx, ty, tw, th);
401 }
402
403 return boxd;
404 }
405
406
407 /*!
408 * boxaRotateOrth()
409 *
410 * Input: boxa
411 * w, h (of image in which the boxa is embedded)
412 * rotation (0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
413 * all rotations are clockwise)
414 * Return: boxad, or null on error
415 *
416 * Notes:
417 * (1) See boxRotateOrth() for details.
418 */
419 BOXA *
boxaRotateOrth(BOXA * boxas,l_int32 w,l_int32 h,l_int32 rotation)420 boxaRotateOrth(BOXA *boxas,
421 l_int32 w,
422 l_int32 h,
423 l_int32 rotation)
424 {
425 l_int32 i, n;
426 BOX *boxs, *boxd;
427 BOXA *boxad;
428
429 PROCNAME("boxaRotateOrth");
430
431 if (!boxas)
432 return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
433 if (rotation == 0)
434 return boxaCopy(boxas, L_COPY);
435 if (rotation < 1 || rotation > 3)
436 return (BOXA *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL);
437
438 n = boxaGetCount(boxas);
439 if ((boxad = boxaCreate(n)) == NULL)
440 return (BOXA *)ERROR_PTR("boxad not made", procName, NULL);
441 for (i = 0; i < n; i++) {
442 if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL)
443 return (BOXA *)ERROR_PTR("boxs not found", procName, NULL);
444 boxd = boxRotateOrth(boxs, w, h, rotation);
445 boxDestroy(&boxs);
446 boxaAddBox(boxad, boxd, L_INSERT);
447 }
448
449 return boxad;
450 }
451
452
453 /*!
454 * boxRotateOrth()
455 *
456 * Input: box
457 * w, h (of image in which the box is embedded)
458 * rotation (0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
459 * all rotations are clockwise)
460 * Return: boxd, or null on error
461 *
462 * Notes:
463 * (1) Rotate the image with the embedded box by the specified amount.
464 * (2) After rotation, the rotated box is always measured with
465 * respect to the UL corner of the image.
466 */
467 BOX *
boxRotateOrth(BOX * box,l_int32 w,l_int32 h,l_int32 rotation)468 boxRotateOrth(BOX *box,
469 l_int32 w,
470 l_int32 h,
471 l_int32 rotation)
472 {
473 l_int32 bx, by, bw, bh, xdist, ydist;
474
475 PROCNAME("boxRotateOrth");
476
477 if (!box)
478 return (BOX *)ERROR_PTR("box not defined", procName, NULL);
479 if (rotation == 0)
480 return boxCopy(box);
481 if (rotation < 1 || rotation > 3)
482 return (BOX *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL);
483 boxGetGeometry(box, &bx, &by, &bw, &bh);
484 ydist = h - by - bh; /* below box */
485 xdist = w - bx - bw; /* to right of box */
486 if (rotation == 1) /* 90 deg cw */
487 return boxCreate(ydist, bx, bh, bw);
488 else if (rotation == 2) /* 180 deg cw */
489 return boxCreate(xdist, ydist, bw, bh);
490 else /* rotation == 3, 270 deg cw */
491 return boxCreate(by, xdist, bh, bw);
492 }
493
494
495 /*---------------------------------------------------------------------*
496 * Boxa sort *
497 *---------------------------------------------------------------------*/
498 /*!
499 * boxaSort()
500 *
501 * Input: boxa
502 * sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
503 * L_SORT_BY_HEIGHT, L_SORT_BY_MIN_DIMENSION,
504 * L_SORT_BY_MAX_DIMENSION, L_SORT_BY_PERIMETER,
505 * L_SORT_BY_AREA, L_SORT_BY_ASPECT_RATIO)
506 * sortorder (L_SORT_INCREASING, L_SORT_DECREASING)
507 * &naindex (<optional return> index of sorted order into
508 * original array)
509 * Return: boxad (sorted version of boxas), or null on error
510 */
511 BOXA *
boxaSort(BOXA * boxas,l_int32 sorttype,l_int32 sortorder,NUMA ** pnaindex)512 boxaSort(BOXA *boxas,
513 l_int32 sorttype,
514 l_int32 sortorder,
515 NUMA **pnaindex)
516 {
517 l_int32 i, n, x, y, w, h, size;
518 BOXA *boxad;
519 NUMA *na, *naindex;
520
521 PROCNAME("boxaSort");
522
523 if (pnaindex) *pnaindex = NULL;
524 if (!boxas)
525 return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
526 if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y &&
527 sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
528 sorttype != L_SORT_BY_MIN_DIMENSION &&
529 sorttype != L_SORT_BY_MAX_DIMENSION &&
530 sorttype != L_SORT_BY_PERIMETER &&
531 sorttype != L_SORT_BY_AREA &&
532 sorttype != L_SORT_BY_ASPECT_RATIO)
533 return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL);
534 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
535 return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL);
536
537 /* Use O(n) binsort if possible */
538 n = boxaGetCount(boxas);
539 if (n > MIN_COMPS_FOR_BIN_SORT &&
540 ((sorttype == L_SORT_BY_X) || (sorttype == L_SORT_BY_Y) ||
541 (sorttype == L_SORT_BY_WIDTH) || (sorttype == L_SORT_BY_HEIGHT) ||
542 (sorttype == L_SORT_BY_PERIMETER)))
543 return boxaBinSort(boxas, sorttype, sortorder, pnaindex);
544
545 /* Build up numa of specific data */
546 if ((na = numaCreate(n)) == NULL)
547 return (BOXA *)ERROR_PTR("na not made", procName, NULL);
548 for (i = 0; i < n; i++) {
549 boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h);
550 switch (sorttype)
551 {
552 case L_SORT_BY_X:
553 numaAddNumber(na, x);
554 break;
555 case L_SORT_BY_Y:
556 numaAddNumber(na, y);
557 break;
558 case L_SORT_BY_WIDTH:
559 numaAddNumber(na, w);
560 break;
561 case L_SORT_BY_HEIGHT:
562 numaAddNumber(na, h);
563 break;
564 case L_SORT_BY_MIN_DIMENSION:
565 size = L_MIN(w, h);
566 numaAddNumber(na, size);
567 break;
568 case L_SORT_BY_MAX_DIMENSION:
569 size = L_MAX(w, h);
570 numaAddNumber(na, size);
571 break;
572 case L_SORT_BY_PERIMETER:
573 size = w + h;
574 numaAddNumber(na, size);
575 break;
576 case L_SORT_BY_AREA:
577 size = w * h;
578 numaAddNumber(na, size);
579 break;
580 case L_SORT_BY_ASPECT_RATIO:
581 numaAddNumber(na, (l_float32)w / (l_float32)h);
582 break;
583 default:
584 L_WARNING("invalid sort type", procName);
585 }
586 }
587
588 /* Get the sort index for data array */
589 if ((naindex = numaGetSortIndex(na, sortorder)) == NULL)
590 return (BOXA *)ERROR_PTR("naindex not made", procName, NULL);
591
592 /* Build up sorted boxa using sort index */
593 boxad = boxaSortByIndex(boxas, naindex);
594
595 if (pnaindex)
596 *pnaindex = naindex;
597 else
598 numaDestroy(&naindex);
599 numaDestroy(&na);
600 return boxad;
601 }
602
603
604 /*!
605 * boxaBinSort()
606 *
607 * Input: boxa
608 * sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
609 * L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER)
610 * sortorder (L_SORT_INCREASING, L_SORT_DECREASING)
611 * &naindex (<optional return> index of sorted order into
612 * original array)
613 * Return: boxad (sorted version of boxas), or null on error
614 *
615 * Notes:
616 * (1) For a large number of boxes (say, greater than 1000), this
617 * O(n) binsort is much faster than the O(nlogn) shellsort.
618 * For 5000 components, this is over 20x faster than boxaSort().
619 * (2) Consequently, boxaSort() calls this function if it will
620 * likely go much faster.
621 */
622 BOXA *
boxaBinSort(BOXA * boxas,l_int32 sorttype,l_int32 sortorder,NUMA ** pnaindex)623 boxaBinSort(BOXA *boxas,
624 l_int32 sorttype,
625 l_int32 sortorder,
626 NUMA **pnaindex)
627 {
628 l_int32 i, n, x, y, w, h;
629 BOXA *boxad;
630 NUMA *na, *naindex;
631
632 PROCNAME("boxaBinSort");
633
634 if (pnaindex) *pnaindex = NULL;
635 if (!boxas)
636 return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
637 if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y &&
638 sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
639 sorttype != L_SORT_BY_PERIMETER)
640 return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL);
641 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
642 return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL);
643
644 /* Generate Numa of appropriate box dimensions */
645 n = boxaGetCount(boxas);
646 if ((na = numaCreate(n)) == NULL)
647 return (BOXA *)ERROR_PTR("na not made", procName, NULL);
648 for (i = 0; i < n; i++) {
649 boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h);
650 switch (sorttype)
651 {
652 case L_SORT_BY_X:
653 numaAddNumber(na, x);
654 break;
655 case L_SORT_BY_Y:
656 numaAddNumber(na, y);
657 break;
658 case L_SORT_BY_WIDTH:
659 numaAddNumber(na, w);
660 break;
661 case L_SORT_BY_HEIGHT:
662 numaAddNumber(na, h);
663 break;
664 case L_SORT_BY_PERIMETER:
665 numaAddNumber(na, w + h);
666 break;
667 default:
668 L_WARNING("invalid sort type", procName);
669 }
670 }
671
672 /* Get the sort index for data array */
673 if ((naindex = numaGetBinSortIndex(na, sortorder)) == NULL)
674 return (BOXA *)ERROR_PTR("naindex not made", procName, NULL);
675
676 /* Build up sorted boxa using the sort index */
677 boxad = boxaSortByIndex(boxas, naindex);
678
679 if (pnaindex)
680 *pnaindex = naindex;
681 else
682 numaDestroy(&naindex);
683 numaDestroy(&na);
684 return boxad;
685 }
686
687
688 /*!
689 * boxaSortByIndex()
690 *
691 * Input: boxas
692 * naindex (na that maps from the new boxa to the input boxa)
693 * Return: boxad (sorted), or null on error
694 */
695 BOXA *
boxaSortByIndex(BOXA * boxas,NUMA * naindex)696 boxaSortByIndex(BOXA *boxas,
697 NUMA *naindex)
698 {
699 l_int32 i, n, index;
700 BOX *box;
701 BOXA *boxad;
702
703 PROCNAME("boxaSortByIndex");
704
705 if (!boxas)
706 return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
707 if (!naindex)
708 return (BOXA *)ERROR_PTR("naindex not defined", procName, NULL);
709
710 n = boxaGetCount(boxas);
711 boxad = boxaCreate(n);
712 for (i = 0; i < n; i++) {
713 numaGetIValue(naindex, i, &index);
714 box = boxaGetBox(boxas, index, L_COPY);
715 boxaAddBox(boxad, box, L_INSERT);
716 }
717
718 return boxad;
719 }
720
721
722 /*!
723 * boxaSort2d()
724 *
725 * Input: boxas
726 * &naa (<optional return> numaa with sorted indices
727 * whose values are the indices of the input array)
728 * delta1 (min overlap that permits aggregation of a box
729 * onto a boxa of horizontally-aligned boxes; pass 1)
730 * delta2 (min overlap that permits aggregation of a box
731 * onto a boxa of horizontally-aligned boxes; pass 2)
732 * minh1 (components less than this height either join an
733 * existing boxa or are set aside for pass 2)
734 * Return: boxaa (2d sorted version of boxa), or null on error
735 *
736 * Notes:
737 * (1) The final result is a sort where the 'fast scan' direction is
738 * left to right, and the 'slow scan' direction is from top
739 * to bottom. Each boxa in the boxaa represents a sorted set
740 * of boxes from left to right.
741 * (2) Two passes are used to aggregate the boxas, which can corresond
742 * to characters or words in a line of text. In pass 1, only
743 * taller components, which correspond to xheight or larger,
744 * are permitted to start a new boxa, whereas in pass 2,
745 * the remaining vertically-challenged components are allowed
746 * to join an existing boxa or start a new one.
747 * (3) If delta1 < 0, the first pass allows aggregation when
748 * boxes in the same boxa do not overlap vertically.
749 * The distance by which they can miss and still be aggregated
750 * is the absolute value |delta1|. Similar for delta2 on
751 * the second pass.
752 * (4) On the first pass, any component of height less than minh1
753 * cannot start a new boxa; it's put aside for later insertion.
754 * (5) On the second pass, any small component that doesn't align
755 * with an existing boxa can start a new one.
756 * (6) This can be used to identify lines of text from
757 * character or word bounding boxes.
758 */
759 BOXAA *
boxaSort2d(BOXA * boxas,NUMAA ** pnaad,l_int32 delta1,l_int32 delta2,l_int32 minh1)760 boxaSort2d(BOXA *boxas,
761 NUMAA **pnaad,
762 l_int32 delta1,
763 l_int32 delta2,
764 l_int32 minh1)
765 {
766 l_int32 i, index, h, nt, ne, n, m, ival;
767 BOX *box;
768 BOXA *boxa, *boxae, *boxan, *boxat1, *boxat2, *boxav, *boxavs;
769 BOXAA *baa, *baad;
770 NUMA *naindex, *nae, *nan, *nah, *nav, *nat1, *nat2, *nad;
771 NUMAA *naa, *naad;
772
773 PROCNAME("boxaSort2d");
774
775 if (pnaad) *pnaad = NULL;
776 if (!boxas)
777 return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL);
778
779 /* Sort from left to right */
780 if ((boxa = boxaSort(boxas, L_SORT_BY_X, L_SORT_INCREASING, &naindex))
781 == NULL)
782 return (BOXAA *)ERROR_PTR("boxa not made", procName, NULL);
783
784 /* First pass: assign taller boxes to boxa by row */
785 nt = boxaGetCount(boxa);
786 baa = boxaaCreate(0);
787 naa = numaaCreate(0);
788 boxae = boxaCreate(0); /* save small height boxes here */
789 nae = numaCreate(0); /* keep track of small height boxes */
790 for (i = 0; i < nt; i++) {
791 box = boxaGetBox(boxa, i, L_CLONE);
792 boxGetGeometry(box, NULL, NULL, NULL, &h);
793 if (h < minh1) { /* save for 2nd pass */
794 boxaAddBox(boxae, box, L_INSERT);
795 numaAddNumber(nae, i);
796 }
797 else {
798 n = boxaaGetCount(baa);
799 boxaaAlignBox(baa, box, delta1, &index);
800 if (index < n) { /* append to an existing boxa */
801 boxaaAddBox(baa, index, box, L_INSERT);
802 }
803 else { /* doesn't align, need new boxa */
804 boxan = boxaCreate(0);
805 boxaAddBox(boxan, box, L_INSERT);
806 boxaaAddBoxa(baa, boxan, L_INSERT);
807 nan = numaCreate(0);
808 numaaAddNuma(naa, nan, L_INSERT);
809 }
810 numaGetIValue(naindex, i, &ival);
811 numaaAddNumber(naa, index, ival);
812 }
813 }
814 boxaDestroy(&boxa);
815 numaDestroy(&naindex);
816
817 /* Second pass: feed in small height boxes;
818 * TODO: this correctly, using local y position! */
819 ne = boxaGetCount(boxae);
820 for (i = 0; i < ne; i++) {
821 box = boxaGetBox(boxae, i, L_CLONE);
822 n = boxaaGetCount(baa);
823 boxaaAlignBox(baa, box, delta2, &index);
824 if (index < n) { /* append to an existing boxa */
825 boxaaAddBox(baa, index, box, L_INSERT);
826 }
827 else { /* doesn't align, need new boxa */
828 boxan = boxaCreate(0);
829 boxaAddBox(boxan, box, L_INSERT);
830 boxaaAddBoxa(baa, boxan, L_INSERT);
831 nan = numaCreate(0);
832 numaaAddNuma(naa, nan, L_INSERT);
833 }
834 numaGetIValue(nae, i, &ival); /* location in original boxas */
835 numaaAddNumber(naa, index, ival);
836 }
837
838 /* Sort each boxa in the boxaa */
839 m = boxaaGetCount(baa);
840 for (i = 0; i < m; i++) {
841 boxat1 = boxaaGetBoxa(baa, i, L_CLONE);
842 boxat2 = boxaSort(boxat1, L_SORT_BY_X, L_SORT_INCREASING, &nah);
843 boxaaReplaceBoxa(baa, i, boxat2);
844 nat1 = numaaGetNuma(naa, i, L_CLONE);
845 nat2 = numaSortByIndex(nat1, nah);
846 numaaReplaceNuma(naa, i, nat2);
847 boxaDestroy(&boxat1);
848 numaDestroy(&nat1);
849 numaDestroy(&nah);
850 }
851
852 /* Sort boxa vertically within boxaa, using the first box
853 * in each boxa. */
854 m = boxaaGetCount(baa);
855 boxav = boxaCreate(m); /* holds first box in each boxa in baa */
856 naad = numaaCreate(m);
857 if (pnaad)
858 *pnaad = naad;
859 baad = boxaaCreate(m);
860 for (i = 0; i < m; i++) {
861 boxat1 = boxaaGetBoxa(baa, i, L_CLONE);
862 box = boxaGetBox(boxat1, 0, L_CLONE);
863 boxaAddBox(boxav, box, L_INSERT);
864 boxaDestroy(&boxat1);
865 }
866 boxavs = boxaSort(boxav, L_SORT_BY_Y, L_SORT_INCREASING, &nav);
867 for (i = 0; i < m; i++) {
868 numaGetIValue(nav, i, &index);
869 boxa = boxaaGetBoxa(baa, index, L_CLONE);
870 boxaaAddBoxa(baad, boxa, L_INSERT);
871 nad = numaaGetNuma(naa, index, L_CLONE);
872 numaaAddNuma(naad, nad, L_INSERT);
873 }
874
875 /* fprintf(stderr, "box count = %d, numaa count = %d\n", nt,
876 numaaGetNumberCount(naad)); */
877
878 boxaaDestroy(&baa);
879 boxaDestroy(&boxav);
880 boxaDestroy(&boxavs);
881 boxaDestroy(&boxae);
882 numaDestroy(&nav);
883 numaDestroy(&nae);
884 numaaDestroy(&naa);
885 if (!pnaad)
886 numaaDestroy(&naad);
887
888 return baad;
889 }
890
891
892 /*!
893 * boxaSort2dByIndex()
894 *
895 * Input: boxas
896 * naa (numaa that maps from the new baa to the input boxa)
897 * Return: baa (sorted boxaa), or null on error
898 */
899 BOXAA *
boxaSort2dByIndex(BOXA * boxas,NUMAA * naa)900 boxaSort2dByIndex(BOXA *boxas,
901 NUMAA *naa)
902 {
903 l_int32 ntot, boxtot, i, j, n, nn, index;
904 BOX *box;
905 BOXA *boxa;
906 BOXAA *baa;
907 NUMA *na;
908
909 PROCNAME("boxaSort2dByIndex");
910
911 if (!boxas)
912 return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL);
913 if (!naa)
914 return (BOXAA *)ERROR_PTR("naindex not defined", procName, NULL);
915
916 /* Check counts */
917 ntot = numaaGetNumberCount(naa);
918 boxtot = boxaGetCount(boxas);
919 if (ntot != boxtot)
920 return (BOXAA *)ERROR_PTR("element count mismatch", procName, NULL);
921
922 n = numaaGetCount(naa);
923 baa = boxaaCreate(n);
924 for (i = 0; i < n; i++) {
925 na = numaaGetNuma(naa, i, L_CLONE);
926 nn = numaGetCount(na);
927 boxa = boxaCreate(nn);
928 for (j = 0; j < nn; j++) {
929 numaGetIValue(na, i, &index);
930 box = boxaGetBox(boxas, index, L_COPY);
931 boxaAddBox(boxa, box, L_INSERT);
932 }
933 boxaaAddBoxa(baa, boxa, L_INSERT);
934 numaDestroy(&na);
935 }
936
937 return baa;
938 }
939
940
941 /*---------------------------------------------------------------------*
942 * Other Boxaa functions *
943 *---------------------------------------------------------------------*/
944 /*!
945 * boxaaGetExtent()
946 *
947 * Input: boxaa
948 * &w (<optional return> width)
949 * &h (<optional return> height)
950 * &box (<optional return>, minimum box containing all boxa
951 * in boxaa)
952 * Return: 0 if OK, 1 on error
953 *
954 * Notes:
955 * (1) The returned w and h are the minimum size image
956 * that would contain all boxes untranslated.
957 */
958 l_int32
boxaaGetExtent(BOXAA * boxaa,l_int32 * pw,l_int32 * ph,BOX ** pbox)959 boxaaGetExtent(BOXAA *boxaa,
960 l_int32 *pw,
961 l_int32 *ph,
962 BOX **pbox)
963 {
964 l_int32 i, j, n, m, x, y, w, h, xmax, ymax, xmin, ymin;
965 BOXA *boxa;
966
967 PROCNAME("boxaaGetExtent");
968
969 if (!pw && !ph && !pbox)
970 return ERROR_INT("no ptrs defined", procName, 1);
971 if (pbox) *pbox = NULL;
972 if (pw) *pw = 0;
973 if (ph) *ph = 0;
974 if (!boxaa)
975 return ERROR_INT("boxaa not defined", procName, 1);
976
977 n = boxaaGetCount(boxaa);
978 if (n == 0)
979 return ERROR_INT("no boxa in boxaa", procName, 1);
980
981 xmax = ymax = 0;
982 xmin = ymin = 100000000;
983 for (i = 0; i < n; i++) {
984 boxa = boxaaGetBoxa(boxaa, i, L_CLONE);
985 m = boxaGetCount(boxa);
986 for (j = 0; j < m; j++) {
987 boxaGetBoxGeometry(boxa, j, &x, &y, &w, &h);
988 xmin = L_MIN(xmin, x);
989 ymin = L_MIN(ymin, y);
990 xmax = L_MAX(xmax, x + w);
991 ymax = L_MAX(ymax, y + h);
992 }
993 }
994 if (pw) *pw = xmax;
995 if (ph) *ph = ymax;
996 if (pbox)
997 *pbox = boxCreate(xmin, ymin, xmax - xmin, ymax - ymin);
998
999 return 0;
1000 }
1001
1002
1003 /*!
1004 * boxaaFlattenToBoxa()
1005 *
1006 * Input: boxaa
1007 * &naindex (<optional return> the boxa index in the boxaa)
1008 * copyflag (L_COPY or L_CLONE)
1009 * Return: boxa, or null on error
1010 *
1011 * Notes:
1012 * (1) This 'flattens' the boxaa to a boxa, taking the boxes in
1013 * order in the first boxa, then the second, etc.
1014 * (2) If &naindex is defined, we generate a Numa that gives, for
1015 * each box in the boxaa, the index of the boxa to which it belongs.
1016 */
1017 BOXA *
boxaaFlattenToBoxa(BOXAA * baa,NUMA ** pnaindex,l_int32 copyflag)1018 boxaaFlattenToBoxa(BOXAA *baa,
1019 NUMA **pnaindex,
1020 l_int32 copyflag)
1021 {
1022 l_int32 i, j, m, n;
1023 BOXA *boxa, *boxat;
1024 BOX *box;
1025 NUMA *naindex;
1026
1027 PROCNAME("boxaaFlattenToBoxa");
1028
1029 if (pnaindex) *pnaindex = NULL;
1030 if (!baa)
1031 return (BOXA *)ERROR_PTR("baa not defined", procName, NULL);
1032 if (copyflag != L_COPY && copyflag != L_CLONE)
1033 return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL);
1034 if (pnaindex) {
1035 naindex = numaCreate(0);
1036 *pnaindex = naindex;
1037 }
1038
1039 n = boxaaGetCount(baa);
1040 boxa = boxaCreate(n);
1041 for (i = 0; i < n; i++) {
1042 boxat = boxaaGetBoxa(baa, i, L_CLONE);
1043 m = boxaGetCount(boxat);
1044 for (j = 0; j < m; j++) {
1045 box = boxaGetBox(boxat, j, copyflag);
1046 boxaAddBox(boxa, box, L_INSERT);
1047 if (pnaindex)
1048 numaAddNumber(naindex, i); /* save 'row' number */
1049 }
1050 boxaDestroy(&boxat);
1051 }
1052
1053 return boxa;
1054 }
1055
1056
1057 /*!
1058 * boxaaAlignBox()
1059 *
1060 * Input: boxaa
1061 * box (to be aligned with the last of one of the boxa
1062 * in boxaa, if possible)
1063 * delta (amount by which consecutive components can miss
1064 * in overlap and still be included in the array)
1065 * &index (of boxa with best overlap, or if none match,
1066 * this is the index of the next boxa to be generated)
1067 * Return: 0 if OK, 1 on error
1068 *
1069 * Notes:
1070 * (1) This is not greedy; it finds the boxa whose last box has
1071 * the biggest overlap with the input box.
1072 */
1073 l_int32
boxaaAlignBox(BOXAA * baa,BOX * box,l_int32 delta,l_int32 * pindex)1074 boxaaAlignBox(BOXAA *baa,
1075 BOX *box,
1076 l_int32 delta,
1077 l_int32 *pindex)
1078 {
1079 l_int32 i, n, m, y, yt, h, ht, ovlp, maxovlp, maxindex;
1080 BOXA *boxa;
1081
1082 PROCNAME("boxaaAlignBox");
1083
1084 if (!baa)
1085 return ERROR_INT("baa not defined", procName, 1);
1086 if (!box)
1087 return ERROR_INT("box not defined", procName, 1);
1088 if (!pindex)
1089 return ERROR_INT("&index not defined", procName, 1);
1090
1091 n = boxaaGetCount(baa);
1092 boxGetGeometry(box, NULL, &y, NULL, &h);
1093 maxovlp = -10000000;
1094 for (i = 0; i < n; i++) {
1095 boxa = boxaaGetBoxa(baa, i, L_CLONE);
1096 if ((m = boxaGetCount(boxa)) == 0) {
1097 L_WARNING("no boxes in boxa", procName);
1098 continue;
1099 }
1100 boxaGetBoxGeometry(boxa, m - 1, NULL, &yt, NULL, &ht); /* last one */
1101 boxaDestroy(&boxa);
1102
1103 /* Overlap < 0 means the components do not overlap vertically */
1104 if (yt >= y)
1105 ovlp = y + h - 1 - yt;
1106 else
1107 ovlp = yt + ht - 1 - y;
1108 if (ovlp > maxovlp) {
1109 maxovlp = ovlp;
1110 maxindex = i;
1111 }
1112 }
1113
1114 if (maxovlp + delta >= 0)
1115 *pindex = maxindex;
1116 else
1117 *pindex = n;
1118 return 0;
1119 }
1120
1121
1122