• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -  This software is distributed in the hope that it will be
4  -  useful, but with NO WARRANTY OF ANY KIND.
5  -  No author or distributor accepts responsibility to anyone for the
6  -  consequences of using this software, or for whether it serves any
7  -  particular purpose or works at all, unless he or she says so in
8  -  writing.  Everyone is granted permission to copy, modify and
9  -  redistribute this source code, for commercial or non-commercial
10  -  purposes, with the following restrictions: (1) the origin of this
11  -  source code must not be misrepresented; (2) modified versions must
12  -  be plainly marked as such; and (3) this notice may not be removed
13  -  or altered from any source or modified source distribution.
14  *====================================================================*/
15 
16 /*
17  *   boxfunc2.c
18  *
19  *      Boxa/Box transform (shift, scale) and orthogonal rotation
20  *           BOXA            *boxaTransform()
21  *           BOX             *boxTransform()
22  *           BOXA            *boxaTransformOrdered()
23  *           BOX             *boxTransformOrdered()
24  *           BOXA            *boxaRotateOrth()
25  *           BOX             *boxRotateOrth()
26  *
27  *      Boxa sort
28  *           BOXA            *boxaSort()
29  *           BOXA            *boxaBinSort()
30  *           BOXA            *boxaSortByIndex()
31  *           BOXAA           *boxaSort2d()
32  *           BOXAA           *boxaSort2dByIndex()
33  *
34  *      Other boxaa functions
35  *           l_int32          boxaaGetExtent()
36  *           BOXA            *boxaaFlattenToBoxa()
37  *           l_int32          boxaaAlignBox()
38  */
39 
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <math.h>
43 #include "allheaders.h"
44 
45     /* For more than this number of c.c. in a binarized image of
46      * semi-perimeter (w + h) about 5000 or less, the O(n) binsort
47      * is faster than the O(nlogn) shellsort.  */
48 static const l_int32   MIN_COMPS_FOR_BIN_SORT = 500;
49 
50 
51 /*---------------------------------------------------------------------*
52  *      Boxa/Box transform (shift, scale) and orthogonal rotation      *
53  *---------------------------------------------------------------------*/
54 /*!
55  *  boxaTransform()
56  *
57  *      Input:  boxa
58  *              shiftx, shifty
59  *              scalex, scaley
60  *      Return: boxad, or null on error
61  *
62  *  Notes:
63  *      (1) This is a very simple function that first shifts, then scales.
64  */
65 BOXA *
boxaTransform(BOXA * boxas,l_int32 shiftx,l_int32 shifty,l_float32 scalex,l_float32 scaley)66 boxaTransform(BOXA      *boxas,
67               l_int32    shiftx,
68               l_int32    shifty,
69               l_float32  scalex,
70               l_float32  scaley)
71 {
72 l_int32  i, n;
73 BOX     *boxs, *boxd;
74 BOXA    *boxad;
75 
76     PROCNAME("boxaTransform");
77 
78     if (!boxas)
79         return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
80     n = boxaGetCount(boxas);
81     if ((boxad = boxaCreate(n)) == NULL)
82         return (BOXA *)ERROR_PTR("boxad not made", procName, NULL);
83     for (i = 0; i < n; i++) {
84         if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL)
85             return (BOXA *)ERROR_PTR("boxs not found", procName, NULL);
86         boxd = boxTransform(boxs, shiftx, shifty, scalex, scaley);
87         boxDestroy(&boxs);
88         boxaAddBox(boxad, boxd, L_INSERT);
89     }
90 
91     return boxad;
92 }
93 
94 
95 /*!
96  *  boxTransform()
97  *
98  *      Input:  box
99  *              shiftx, shifty
100  *              scalex, scaley
101  *      Return: boxd, or null on error
102  *
103  *  Notes:
104  *      (1) This is a very simple function that first shifts, then scales.
105  */
106 BOX *
boxTransform(BOX * box,l_int32 shiftx,l_int32 shifty,l_float32 scalex,l_float32 scaley)107 boxTransform(BOX       *box,
108              l_int32    shiftx,
109              l_int32    shifty,
110              l_float32  scalex,
111              l_float32  scaley)
112 {
113     PROCNAME("boxTransform");
114 
115     if (!box)
116         return (BOX *)ERROR_PTR("box not defined", procName, NULL);
117     return boxCreate((l_int32)(scalex * (box->x + shiftx) + 0.5),
118                      (l_int32)(scaley * (box->y + shifty) + 0.5),
119                      (l_int32)(L_MAX(1.0, scalex * box->w + 0.5)),
120                      (l_int32)(L_MAX(1.0, scaley * box->h + 0.5)));
121 }
122 
123 
124 /*!
125  *  boxaTransformOrdered()
126  *
127  *      Input:  boxa
128  *              shiftx, shifty
129  *              scalex, scaley
130  *              xcen, ycen (center of rotation)
131  *              angle (in radians; clockwise is positive)
132  *              order (one of 6 combinations: L_TR_SC_RO, ...)
133  *      Return: boxd, or null on error
134  *
135  *  Notes:
136  *      (1) This allows a sequence of linear transforms on each box.
137  *          the transforms are from the affine set, composed of
138  *          shift, scaling and rotation, and the order of the
139  *          transforms is specified.
140  *      (2) Although these operations appear to be on an infinite
141  *          2D plane, in practice the region of interest is clipped
142  *          to a finite image.  The center of rotation is usually taken
143  *          with respect to the image (either the UL corner or the
144  *          center).  A translation can have two very different effects:
145  *            (a) Moves the boxes across the fixed image region.
146  *            (b) Moves the image origin, causing a change in the image
147  *                region and an opposite effective translation of the boxes.
148  *          This function should only be used for (a), where the image
149  *          region is fixed on translation.  If the image region is
150  *          changed by the translation, use instead the functions
151  *          in affinecompose.c, where the image region and rotation
152  *          center can be computed from the actual clipping due to
153  *          translation of the image origin.
154  *      (3) See boxTransformOrdered() for usage and implementation details.
155  */
156 BOXA *
boxaTransformOrdered(BOXA * boxas,l_int32 shiftx,l_int32 shifty,l_float32 scalex,l_float32 scaley,l_int32 xcen,l_int32 ycen,l_float32 angle,l_int32 order)157 boxaTransformOrdered(BOXA      *boxas,
158                      l_int32    shiftx,
159                      l_int32    shifty,
160                      l_float32  scalex,
161                      l_float32  scaley,
162                      l_int32    xcen,
163                      l_int32    ycen,
164                      l_float32  angle,
165                      l_int32    order)
166 {
167 l_int32  i, n;
168 BOX     *boxs, *boxd;
169 BOXA    *boxad;
170 
171     PROCNAME("boxaTransformOrdered");
172 
173     if (!boxas)
174         return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
175     n = boxaGetCount(boxas);
176     if ((boxad = boxaCreate(n)) == NULL)
177         return (BOXA *)ERROR_PTR("boxad not made", procName, NULL);
178     for (i = 0; i < n; i++) {
179         if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL)
180             return (BOXA *)ERROR_PTR("boxs not found", procName, NULL);
181         boxd = boxTransformOrdered(boxs, shiftx, shifty, scalex, scaley,
182                                    xcen, ycen, angle, order);
183         boxDestroy(&boxs);
184         boxaAddBox(boxad, boxd, L_INSERT);
185     }
186 
187     return boxad;
188 }
189 
190 
191 /*!
192  *  boxTransformOrdered()
193  *
194  *      Input:  boxs
195  *              shiftx, shifty
196  *              scalex, scaley
197  *              xcen, ycen (center of rotation)
198  *              angle (in radians; clockwise is positive)
199  *              order (one of 6 combinations: L_TR_SC_RO, ...)
200  *      Return: boxd, or null on error
201  *
202  *  Notes:
203  *      (1) This allows a sequence of linear transforms, composed of
204  *          shift, scaling and rotation, where the order of the
205  *          transforms is specified.
206  *      (2) The rotation is taken about a point specified by (xcen, ycen).
207  *          Let the components of the vector from the center of rotation
208  *          to the box center be (xdif, ydif):
209  *            xdif = (bx + 0.5 * bw) - xcen
210  *            ydif = (by + 0.5 * bh) - ycen
211  *          Then the box center after rotation has new components:
212  *            bxcen = xcen + xdif * cosa + ydif * sina
213  *            bycen = ycen + ydif * cosa - xdif * sina
214  *          where cosa and sina are the cos and sin of the angle,
215  *          and the enclosing box for the rotated box has size:
216  *            rw = |bw * cosa| + |bh * sina|
217  *            rh = |bh * cosa| + |bw * sina|
218  *          where bw and bh are the unrotated width and height.
219  *          Then the box UL corner (rx, ry) is
220  *            rx = bxcen - 0.5 * rw
221  *            ry = bycen - 0.5 * rh
222  *      (3) The center of rotation specified by args @xcen and @ycen
223  *          is the point BEFORE any translation or scaling.  If the
224  *          rotation is not the first operation, this function finds
225  *          the actual center at the time of rotation.  It does this
226  *          by making the following assumptions:
227  *             (1) Any scaling is with respect to the UL corner, so
228  *                 that the center location scales accordingly.
229  *             (2) A translation does not affect the center of
230  *                 the image; it just moves the boxes.
231  *          We always use assumption (1).  However, assumption (2)
232  *          will be incorrect if the apparent translation is due
233  *          to a clipping operation that, in effect, moves the
234  *          origin of the image.  In that case, you should NOT use
235  *          these simple functions.  Instead, use the functions
236  *          in affinecompose.c, where the rotation center can be
237  *          computed from the actual clipping due to translation
238  *          of the image origin.
239  */
240 BOX *
boxTransformOrdered(BOX * boxs,l_int32 shiftx,l_int32 shifty,l_float32 scalex,l_float32 scaley,l_int32 xcen,l_int32 ycen,l_float32 angle,l_int32 order)241 boxTransformOrdered(BOX       *boxs,
242                     l_int32    shiftx,
243                     l_int32    shifty,
244                     l_float32  scalex,
245                     l_float32  scaley,
246                     l_int32    xcen,
247                     l_int32    ycen,
248                     l_float32  angle,
249                     l_int32    order)
250 {
251 l_int32    bx, by, bw, bh, tx, ty, tw, th;
252 l_int32    xcent, ycent;  /* transformed center of rotation due to scaling */
253 l_float32  sina, cosa, xdif, ydif, rx, ry, rw, rh;
254 BOX       *boxd;
255 
256     PROCNAME("boxTransformOrdered");
257 
258     if (!boxs)
259         return (BOX *)ERROR_PTR("boxs not defined", procName, NULL);
260     if (order != L_TR_SC_RO && order != L_SC_RO_TR && order != L_RO_TR_SC &&
261         order != L_TR_RO_SC && order != L_RO_SC_TR && order != L_SC_TR_RO)
262         return (BOX *)ERROR_PTR("order invalid", procName, NULL);
263 
264     boxGetGeometry(boxs, &bx, &by, &bw, &bh);
265     if (angle != 0.0) {
266         sina = sin(angle);
267         cosa = cos(angle);
268     }
269 
270     if (order == L_TR_SC_RO) {
271         tx = (l_int32)(scalex * (bx + shiftx) + 0.5);
272         ty = (l_int32)(scaley * (by + shifty) + 0.5);
273         tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
274         th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
275         xcent = (l_int32)(scalex * xcen + 0.5);
276         ycent = (l_int32)(scaley * ycen + 0.5);
277         if (angle == 0.0)
278             boxd = boxCreate(tx, ty, tw, th);
279         else {
280             xdif = tx + 0.5 * tw - xcent;
281             ydif = ty + 0.5 * th - ycent;
282             rw = L_ABS(tw * cosa) + L_ABS(th * sina);
283             rh = L_ABS(th * cosa) + L_ABS(tw * sina);
284             rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
285             ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
286             boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw,
287                              (l_int32)rh);
288         }
289     }
290     else if (order == L_SC_TR_RO) {
291         tx = (l_int32)(scalex * bx + shiftx + 0.5);
292         ty = (l_int32)(scaley * by + shifty + 0.5);
293         tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
294         th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
295         xcent = (l_int32)(scalex * xcen + 0.5);
296         ycent = (l_int32)(scaley * ycen + 0.5);
297         if (angle == 0.0)
298             boxd = boxCreate(tx, ty, tw, th);
299         else {
300             xdif = tx + 0.5 * tw - xcent;
301             ydif = ty + 0.5 * th - ycent;
302             rw = L_ABS(tw * cosa) + L_ABS(th * sina);
303             rh = L_ABS(th * cosa) + L_ABS(tw * sina);
304             rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
305             ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
306             boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw,
307                              (l_int32)rh);
308         }
309     }
310     else if (order == L_RO_TR_SC) {
311         if (angle == 0.0) {
312             rx = bx;
313             ry = by;
314             rw = bw;
315             rh = bh;
316         }
317         else {
318             xdif = bx + 0.5 * bw - xcen;
319             ydif = by + 0.5 * bh - ycen;
320             rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
321             rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
322             rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
323             ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
324         }
325         tx = (l_int32)(scalex * (rx + shiftx) + 0.5);
326         ty = (l_int32)(scaley * (ry + shifty) + 0.5);
327         tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
328         th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
329         boxd = boxCreate(tx, ty, tw, th);
330     }
331     else if (order == L_RO_SC_TR) {
332         if (angle == 0.0) {
333             rx = bx;
334             ry = by;
335             rw = bw;
336             rh = bh;
337         }
338         else {
339             xdif = bx + 0.5 * bw - xcen;
340             ydif = by + 0.5 * bh - ycen;
341             rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
342             rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
343             rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
344             ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
345         }
346         tx = (l_int32)(scalex * rx + shiftx + 0.5);
347         ty = (l_int32)(scaley * ry + shifty + 0.5);
348         tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
349         th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
350         boxd = boxCreate(tx, ty, tw, th);
351     }
352     else if (order == L_TR_RO_SC) {
353         tx = bx + shiftx;
354         ty = by + shifty;
355         if (angle == 0.0) {
356             rx = tx;
357             ry = ty;
358             rw = bw;
359             rh = bh;
360         }
361         else {
362             xdif = tx + 0.5 * bw - xcen;
363             ydif = ty + 0.5 * bh - ycen;
364             rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
365             rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
366             rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
367             ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
368         }
369         tx = (l_int32)(scalex * rx + 0.5);
370         ty = (l_int32)(scaley * ry + 0.5);
371         tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
372         th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
373         boxd = boxCreate(tx, ty, tw, th);
374     }
375     else {  /* order == L_SC_RO_TR) */
376         tx = (l_int32)(scalex * bx + 0.5);
377         ty = (l_int32)(scaley * by + 0.5);
378         tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
379         th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
380         xcent = (l_int32)(scalex * xcen + 0.5);
381         ycent = (l_int32)(scaley * ycen + 0.5);
382         if (angle == 0.0) {
383             rx = tx;
384             ry = ty;
385             rw = tw;
386             rh = th;
387         }
388         else {
389             xdif = tx + 0.5 * tw - xcent;
390             ydif = ty + 0.5 * th - ycent;
391             rw = L_ABS(tw * cosa) + L_ABS(th * sina);
392             rh = L_ABS(th * cosa) + L_ABS(tw * sina);
393             rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
394             ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
395         }
396         tx = (l_int32)(rx + shiftx + 0.5);
397         ty = (l_int32)(ry + shifty + 0.5);
398         tw = (l_int32)(rw + 0.5);
399         th = (l_int32)(rh + 0.5);
400         boxd = boxCreate(tx, ty, tw, th);
401     }
402 
403     return boxd;
404 }
405 
406 
407 /*!
408  *  boxaRotateOrth()
409  *
410  *      Input:  boxa
411  *              w, h (of image in which the boxa is embedded)
412  *              rotation (0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
413  *                        all rotations are clockwise)
414  *      Return: boxad, or null on error
415  *
416  *  Notes:
417  *      (1) See boxRotateOrth() for details.
418  */
419 BOXA *
boxaRotateOrth(BOXA * boxas,l_int32 w,l_int32 h,l_int32 rotation)420 boxaRotateOrth(BOXA    *boxas,
421                l_int32  w,
422                l_int32  h,
423                l_int32  rotation)
424 {
425 l_int32  i, n;
426 BOX     *boxs, *boxd;
427 BOXA    *boxad;
428 
429     PROCNAME("boxaRotateOrth");
430 
431     if (!boxas)
432         return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
433     if (rotation == 0)
434         return boxaCopy(boxas, L_COPY);
435     if (rotation < 1 || rotation > 3)
436         return (BOXA *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL);
437 
438     n = boxaGetCount(boxas);
439     if ((boxad = boxaCreate(n)) == NULL)
440         return (BOXA *)ERROR_PTR("boxad not made", procName, NULL);
441     for (i = 0; i < n; i++) {
442         if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL)
443             return (BOXA *)ERROR_PTR("boxs not found", procName, NULL);
444         boxd = boxRotateOrth(boxs, w, h, rotation);
445         boxDestroy(&boxs);
446         boxaAddBox(boxad, boxd, L_INSERT);
447     }
448 
449     return boxad;
450 }
451 
452 
453 /*!
454  *  boxRotateOrth()
455  *
456  *      Input:  box
457  *              w, h (of image in which the box is embedded)
458  *              rotation (0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
459  *                        all rotations are clockwise)
460  *      Return: boxd, or null on error
461  *
462  *  Notes:
463  *      (1) Rotate the image with the embedded box by the specified amount.
464  *      (2) After rotation, the rotated box is always measured with
465  *          respect to the UL corner of the image.
466  */
467 BOX *
boxRotateOrth(BOX * box,l_int32 w,l_int32 h,l_int32 rotation)468 boxRotateOrth(BOX     *box,
469               l_int32  w,
470               l_int32  h,
471               l_int32  rotation)
472 {
473 l_int32  bx, by, bw, bh, xdist, ydist;
474 
475     PROCNAME("boxRotateOrth");
476 
477     if (!box)
478         return (BOX *)ERROR_PTR("box not defined", procName, NULL);
479     if (rotation == 0)
480         return boxCopy(box);
481     if (rotation < 1 || rotation > 3)
482         return (BOX *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL);
483     boxGetGeometry(box, &bx, &by, &bw, &bh);
484     ydist = h - by - bh;  /* below box */
485     xdist = w - bx - bw;  /* to right of box */
486     if (rotation == 1)   /* 90 deg cw */
487         return boxCreate(ydist, bx, bh, bw);
488     else if (rotation == 2)  /* 180 deg cw */
489         return boxCreate(xdist, ydist, bw, bh);
490     else  /*  rotation == 3, 270 deg cw */
491         return boxCreate(by, xdist, bh, bw);
492 }
493 
494 
495 /*---------------------------------------------------------------------*
496  *                              Boxa sort                              *
497  *---------------------------------------------------------------------*/
498 /*!
499  *  boxaSort()
500  *
501  *      Input:  boxa
502  *              sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
503  *                        L_SORT_BY_HEIGHT, L_SORT_BY_MIN_DIMENSION,
504  *                        L_SORT_BY_MAX_DIMENSION, L_SORT_BY_PERIMETER,
505  *                        L_SORT_BY_AREA, L_SORT_BY_ASPECT_RATIO)
506  *              sortorder  (L_SORT_INCREASING, L_SORT_DECREASING)
507  *              &naindex (<optional return> index of sorted order into
508  *                        original array)
509  *      Return: boxad (sorted version of boxas), or null on error
510  */
511 BOXA *
boxaSort(BOXA * boxas,l_int32 sorttype,l_int32 sortorder,NUMA ** pnaindex)512 boxaSort(BOXA    *boxas,
513          l_int32  sorttype,
514          l_int32  sortorder,
515          NUMA   **pnaindex)
516 {
517 l_int32    i, n, x, y, w, h, size;
518 BOXA      *boxad;
519 NUMA      *na, *naindex;
520 
521     PROCNAME("boxaSort");
522 
523     if (pnaindex) *pnaindex = NULL;
524     if (!boxas)
525         return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
526     if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y &&
527         sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
528         sorttype != L_SORT_BY_MIN_DIMENSION &&
529         sorttype != L_SORT_BY_MAX_DIMENSION &&
530         sorttype != L_SORT_BY_PERIMETER &&
531         sorttype != L_SORT_BY_AREA &&
532         sorttype != L_SORT_BY_ASPECT_RATIO)
533         return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL);
534     if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
535         return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL);
536 
537         /* Use O(n) binsort if possible */
538     n = boxaGetCount(boxas);
539     if (n > MIN_COMPS_FOR_BIN_SORT &&
540         ((sorttype == L_SORT_BY_X) || (sorttype == L_SORT_BY_Y) ||
541          (sorttype == L_SORT_BY_WIDTH) || (sorttype == L_SORT_BY_HEIGHT) ||
542          (sorttype == L_SORT_BY_PERIMETER)))
543         return boxaBinSort(boxas, sorttype, sortorder, pnaindex);
544 
545         /* Build up numa of specific data */
546     if ((na = numaCreate(n)) == NULL)
547         return (BOXA *)ERROR_PTR("na not made", procName, NULL);
548     for (i = 0; i < n; i++) {
549         boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h);
550         switch (sorttype)
551         {
552         case L_SORT_BY_X:
553             numaAddNumber(na, x);
554             break;
555         case L_SORT_BY_Y:
556             numaAddNumber(na, y);
557             break;
558         case L_SORT_BY_WIDTH:
559             numaAddNumber(na, w);
560             break;
561         case L_SORT_BY_HEIGHT:
562             numaAddNumber(na, h);
563             break;
564         case L_SORT_BY_MIN_DIMENSION:
565             size = L_MIN(w, h);
566             numaAddNumber(na, size);
567             break;
568         case L_SORT_BY_MAX_DIMENSION:
569             size = L_MAX(w, h);
570             numaAddNumber(na, size);
571             break;
572         case L_SORT_BY_PERIMETER:
573             size = w + h;
574             numaAddNumber(na, size);
575             break;
576         case L_SORT_BY_AREA:
577             size = w * h;
578             numaAddNumber(na, size);
579             break;
580         case L_SORT_BY_ASPECT_RATIO:
581             numaAddNumber(na, (l_float32)w / (l_float32)h);
582             break;
583         default:
584             L_WARNING("invalid sort type", procName);
585         }
586     }
587 
588         /* Get the sort index for data array */
589     if ((naindex = numaGetSortIndex(na, sortorder)) == NULL)
590         return (BOXA *)ERROR_PTR("naindex not made", procName, NULL);
591 
592         /* Build up sorted boxa using sort index */
593     boxad = boxaSortByIndex(boxas, naindex);
594 
595     if (pnaindex)
596         *pnaindex = naindex;
597     else
598         numaDestroy(&naindex);
599     numaDestroy(&na);
600     return boxad;
601 }
602 
603 
604 /*!
605  *  boxaBinSort()
606  *
607  *      Input:  boxa
608  *              sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
609  *                        L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER)
610  *              sortorder  (L_SORT_INCREASING, L_SORT_DECREASING)
611  *              &naindex (<optional return> index of sorted order into
612  *                        original array)
613  *      Return: boxad (sorted version of boxas), or null on error
614  *
615  *  Notes:
616  *      (1) For a large number of boxes (say, greater than 1000), this
617  *          O(n) binsort is much faster than the O(nlogn) shellsort.
618  *          For 5000 components, this is over 20x faster than boxaSort().
619  *      (2) Consequently, boxaSort() calls this function if it will
620  *          likely go much faster.
621  */
622 BOXA *
boxaBinSort(BOXA * boxas,l_int32 sorttype,l_int32 sortorder,NUMA ** pnaindex)623 boxaBinSort(BOXA    *boxas,
624             l_int32  sorttype,
625             l_int32  sortorder,
626             NUMA   **pnaindex)
627 {
628 l_int32  i, n, x, y, w, h;
629 BOXA    *boxad;
630 NUMA    *na, *naindex;
631 
632     PROCNAME("boxaBinSort");
633 
634     if (pnaindex) *pnaindex = NULL;
635     if (!boxas)
636         return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
637     if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y &&
638         sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
639         sorttype != L_SORT_BY_PERIMETER)
640         return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL);
641     if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
642         return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL);
643 
644         /* Generate Numa of appropriate box dimensions */
645     n = boxaGetCount(boxas);
646     if ((na = numaCreate(n)) == NULL)
647         return (BOXA *)ERROR_PTR("na not made", procName, NULL);
648     for (i = 0; i < n; i++) {
649         boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h);
650         switch (sorttype)
651         {
652         case L_SORT_BY_X:
653             numaAddNumber(na, x);
654             break;
655         case L_SORT_BY_Y:
656             numaAddNumber(na, y);
657             break;
658         case L_SORT_BY_WIDTH:
659             numaAddNumber(na, w);
660             break;
661         case L_SORT_BY_HEIGHT:
662             numaAddNumber(na, h);
663             break;
664         case L_SORT_BY_PERIMETER:
665             numaAddNumber(na, w + h);
666             break;
667         default:
668             L_WARNING("invalid sort type", procName);
669         }
670     }
671 
672         /* Get the sort index for data array */
673     if ((naindex = numaGetBinSortIndex(na, sortorder)) == NULL)
674         return (BOXA *)ERROR_PTR("naindex not made", procName, NULL);
675 
676         /* Build up sorted boxa using the sort index */
677     boxad = boxaSortByIndex(boxas, naindex);
678 
679     if (pnaindex)
680         *pnaindex = naindex;
681     else
682         numaDestroy(&naindex);
683     numaDestroy(&na);
684     return boxad;
685 }
686 
687 
688 /*!
689  *  boxaSortByIndex()
690  *
691  *      Input:  boxas
692  *              naindex (na that maps from the new boxa to the input boxa)
693  *      Return: boxad (sorted), or null on error
694  */
695 BOXA *
boxaSortByIndex(BOXA * boxas,NUMA * naindex)696 boxaSortByIndex(BOXA  *boxas,
697                 NUMA  *naindex)
698 {
699 l_int32  i, n, index;
700 BOX     *box;
701 BOXA    *boxad;
702 
703     PROCNAME("boxaSortByIndex");
704 
705     if (!boxas)
706         return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
707     if (!naindex)
708         return (BOXA *)ERROR_PTR("naindex not defined", procName, NULL);
709 
710     n = boxaGetCount(boxas);
711     boxad = boxaCreate(n);
712     for (i = 0; i < n; i++) {
713         numaGetIValue(naindex, i, &index);
714         box = boxaGetBox(boxas, index, L_COPY);
715         boxaAddBox(boxad, box, L_INSERT);
716     }
717 
718     return boxad;
719 }
720 
721 
722 /*!
723  *  boxaSort2d()
724  *
725  *      Input:  boxas
726  *              &naa (<optional return> numaa with sorted indices
727  *                    whose values are the indices of the input array)
728  *              delta1 (min overlap that permits aggregation of a box
729  *                      onto a boxa of horizontally-aligned boxes; pass 1)
730  *              delta2 (min overlap that permits aggregation of a box
731  *                      onto a boxa of horizontally-aligned boxes; pass 2)
732  *              minh1 (components less than this height either join an
733  *                     existing boxa or are set aside for pass 2)
734  *      Return: boxaa (2d sorted version of boxa), or null on error
735  *
736  *  Notes:
737  *      (1) The final result is a sort where the 'fast scan' direction is
738  *          left to right, and the 'slow scan' direction is from top
739  *          to bottom.  Each boxa in the boxaa represents a sorted set
740  *          of boxes from left to right.
741  *      (2) Two passes are used to aggregate the boxas, which can corresond
742  *          to characters or words in a line of text.  In pass 1, only
743  *          taller components, which correspond to xheight or larger,
744  *          are permitted to start a new boxa, whereas in pass 2,
745  *          the remaining vertically-challenged components are allowed
746  *          to join an existing boxa or start a new one.
747  *      (3) If delta1 < 0, the first pass allows aggregation when
748  *          boxes in the same boxa do not overlap vertically.
749  *          The distance by which they can miss and still be aggregated
750  *          is the absolute value |delta1|.   Similar for delta2 on
751  *          the second pass.
752  *      (4) On the first pass, any component of height less than minh1
753  *          cannot start a new boxa; it's put aside for later insertion.
754  *      (5) On the second pass, any small component that doesn't align
755  *          with an existing boxa can start a new one.
756  *      (6) This can be used to identify lines of text from
757  *          character or word bounding boxes.
758  */
759 BOXAA *
boxaSort2d(BOXA * boxas,NUMAA ** pnaad,l_int32 delta1,l_int32 delta2,l_int32 minh1)760 boxaSort2d(BOXA    *boxas,
761            NUMAA  **pnaad,
762            l_int32  delta1,
763            l_int32  delta2,
764            l_int32  minh1)
765 {
766 l_int32  i, index, h, nt, ne, n, m, ival;
767 BOX     *box;
768 BOXA    *boxa, *boxae, *boxan, *boxat1, *boxat2, *boxav, *boxavs;
769 BOXAA   *baa, *baad;
770 NUMA    *naindex, *nae, *nan, *nah, *nav, *nat1, *nat2, *nad;
771 NUMAA   *naa, *naad;
772 
773     PROCNAME("boxaSort2d");
774 
775     if (pnaad) *pnaad = NULL;
776     if (!boxas)
777         return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL);
778 
779         /* Sort from left to right */
780     if ((boxa = boxaSort(boxas, L_SORT_BY_X, L_SORT_INCREASING, &naindex))
781                     == NULL)
782         return (BOXAA *)ERROR_PTR("boxa not made", procName, NULL);
783 
784         /* First pass: assign taller boxes to boxa by row */
785     nt = boxaGetCount(boxa);
786     baa = boxaaCreate(0);
787     naa = numaaCreate(0);
788     boxae = boxaCreate(0);  /* save small height boxes here */
789     nae = numaCreate(0);  /* keep track of small height boxes */
790     for (i = 0; i < nt; i++) {
791         box = boxaGetBox(boxa, i, L_CLONE);
792         boxGetGeometry(box, NULL, NULL, NULL, &h);
793         if (h < minh1) {  /* save for 2nd pass */
794             boxaAddBox(boxae, box, L_INSERT);
795             numaAddNumber(nae, i);
796         }
797         else {
798             n = boxaaGetCount(baa);
799             boxaaAlignBox(baa, box, delta1, &index);
800             if (index < n) {  /* append to an existing boxa */
801                 boxaaAddBox(baa, index, box, L_INSERT);
802             }
803             else {  /* doesn't align, need new boxa */
804                 boxan = boxaCreate(0);
805                 boxaAddBox(boxan, box, L_INSERT);
806                 boxaaAddBoxa(baa, boxan, L_INSERT);
807                 nan = numaCreate(0);
808                 numaaAddNuma(naa, nan, L_INSERT);
809             }
810             numaGetIValue(naindex, i, &ival);
811             numaaAddNumber(naa, index, ival);
812         }
813     }
814     boxaDestroy(&boxa);
815     numaDestroy(&naindex);
816 
817         /* Second pass: feed in small height boxes;
818          * TODO: this correctly, using local y position! */
819     ne = boxaGetCount(boxae);
820     for (i = 0; i < ne; i++) {
821         box = boxaGetBox(boxae, i, L_CLONE);
822         n = boxaaGetCount(baa);
823         boxaaAlignBox(baa, box, delta2, &index);
824         if (index < n) {  /* append to an existing boxa */
825             boxaaAddBox(baa, index, box, L_INSERT);
826         }
827         else {  /* doesn't align, need new boxa */
828             boxan = boxaCreate(0);
829             boxaAddBox(boxan, box, L_INSERT);
830             boxaaAddBoxa(baa, boxan, L_INSERT);
831             nan = numaCreate(0);
832             numaaAddNuma(naa, nan, L_INSERT);
833         }
834         numaGetIValue(nae, i, &ival);  /* location in original boxas */
835         numaaAddNumber(naa, index, ival);
836     }
837 
838         /* Sort each boxa in the boxaa */
839     m = boxaaGetCount(baa);
840     for (i = 0; i < m; i++) {
841         boxat1 = boxaaGetBoxa(baa, i, L_CLONE);
842         boxat2 = boxaSort(boxat1, L_SORT_BY_X, L_SORT_INCREASING, &nah);
843         boxaaReplaceBoxa(baa, i, boxat2);
844         nat1 = numaaGetNuma(naa, i, L_CLONE);
845         nat2 = numaSortByIndex(nat1, nah);
846         numaaReplaceNuma(naa, i, nat2);
847         boxaDestroy(&boxat1);
848         numaDestroy(&nat1);
849         numaDestroy(&nah);
850     }
851 
852         /* Sort boxa vertically within boxaa, using the first box
853          * in each boxa. */
854     m = boxaaGetCount(baa);
855     boxav = boxaCreate(m);  /* holds first box in each boxa in baa */
856     naad = numaaCreate(m);
857     if (pnaad)
858         *pnaad = naad;
859     baad = boxaaCreate(m);
860     for (i = 0; i < m; i++) {
861         boxat1 = boxaaGetBoxa(baa, i, L_CLONE);
862         box = boxaGetBox(boxat1, 0, L_CLONE);
863         boxaAddBox(boxav, box, L_INSERT);
864         boxaDestroy(&boxat1);
865     }
866     boxavs = boxaSort(boxav, L_SORT_BY_Y, L_SORT_INCREASING, &nav);
867     for (i = 0; i < m; i++) {
868         numaGetIValue(nav, i, &index);
869         boxa = boxaaGetBoxa(baa, index, L_CLONE);
870         boxaaAddBoxa(baad, boxa, L_INSERT);
871         nad = numaaGetNuma(naa, index, L_CLONE);
872         numaaAddNuma(naad, nad, L_INSERT);
873     }
874 
875 /*    fprintf(stderr, "box count = %d, numaa count = %d\n", nt,
876             numaaGetNumberCount(naad)); */
877 
878     boxaaDestroy(&baa);
879     boxaDestroy(&boxav);
880     boxaDestroy(&boxavs);
881     boxaDestroy(&boxae);
882     numaDestroy(&nav);
883     numaDestroy(&nae);
884     numaaDestroy(&naa);
885     if (!pnaad)
886         numaaDestroy(&naad);
887 
888     return baad;
889 }
890 
891 
892 /*!
893  *  boxaSort2dByIndex()
894  *
895  *      Input:  boxas
896  *              naa (numaa that maps from the new baa to the input boxa)
897  *      Return: baa (sorted boxaa), or null on error
898  */
899 BOXAA *
boxaSort2dByIndex(BOXA * boxas,NUMAA * naa)900 boxaSort2dByIndex(BOXA   *boxas,
901                   NUMAA  *naa)
902 {
903 l_int32  ntot, boxtot, i, j, n, nn, index;
904 BOX     *box;
905 BOXA    *boxa;
906 BOXAA   *baa;
907 NUMA    *na;
908 
909     PROCNAME("boxaSort2dByIndex");
910 
911     if (!boxas)
912         return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL);
913     if (!naa)
914         return (BOXAA *)ERROR_PTR("naindex not defined", procName, NULL);
915 
916         /* Check counts */
917     ntot = numaaGetNumberCount(naa);
918     boxtot = boxaGetCount(boxas);
919     if (ntot != boxtot)
920         return (BOXAA *)ERROR_PTR("element count mismatch", procName, NULL);
921 
922     n = numaaGetCount(naa);
923     baa = boxaaCreate(n);
924     for (i = 0; i < n; i++) {
925         na = numaaGetNuma(naa, i, L_CLONE);
926         nn = numaGetCount(na);
927         boxa = boxaCreate(nn);
928         for (j = 0; j < nn; j++) {
929             numaGetIValue(na, i, &index);
930             box = boxaGetBox(boxas, index, L_COPY);
931             boxaAddBox(boxa, box, L_INSERT);
932         }
933         boxaaAddBoxa(baa, boxa, L_INSERT);
934         numaDestroy(&na);
935     }
936 
937     return baa;
938 }
939 
940 
941 /*---------------------------------------------------------------------*
942  *                        Other Boxaa functions                        *
943  *---------------------------------------------------------------------*/
944 /*!
945  *  boxaaGetExtent()
946  *
947  *      Input:  boxaa
948  *              &w  (<optional return> width)
949  *              &h  (<optional return> height)
950  *              &box (<optional return>, minimum box containing all boxa
951  *                    in boxaa)
952  *      Return: 0 if OK, 1 on error
953  *
954  *  Notes:
955  *      (1) The returned w and h are the minimum size image
956  *          that would contain all boxes untranslated.
957  */
958 l_int32
boxaaGetExtent(BOXAA * boxaa,l_int32 * pw,l_int32 * ph,BOX ** pbox)959 boxaaGetExtent(BOXAA    *boxaa,
960                l_int32  *pw,
961                l_int32  *ph,
962                BOX     **pbox)
963 {
964 l_int32  i, j, n, m, x, y, w, h, xmax, ymax, xmin, ymin;
965 BOXA    *boxa;
966 
967     PROCNAME("boxaaGetExtent");
968 
969     if (!pw && !ph && !pbox)
970         return ERROR_INT("no ptrs defined", procName, 1);
971     if (pbox) *pbox = NULL;
972     if (pw) *pw = 0;
973     if (ph) *ph = 0;
974     if (!boxaa)
975         return ERROR_INT("boxaa not defined", procName, 1);
976 
977     n = boxaaGetCount(boxaa);
978     if (n == 0)
979         return ERROR_INT("no boxa in boxaa", procName, 1);
980 
981     xmax = ymax = 0;
982     xmin = ymin = 100000000;
983     for (i = 0; i < n; i++) {
984         boxa = boxaaGetBoxa(boxaa, i, L_CLONE);
985         m = boxaGetCount(boxa);
986         for (j = 0; j < m; j++) {
987             boxaGetBoxGeometry(boxa, j, &x, &y, &w, &h);
988             xmin = L_MIN(xmin, x);
989             ymin = L_MIN(ymin, y);
990             xmax = L_MAX(xmax, x + w);
991             ymax = L_MAX(ymax, y + h);
992         }
993     }
994     if (pw) *pw = xmax;
995     if (ph) *ph = ymax;
996     if (pbox)
997       *pbox = boxCreate(xmin, ymin, xmax - xmin, ymax - ymin);
998 
999     return 0;
1000 }
1001 
1002 
1003 /*!
1004  *  boxaaFlattenToBoxa()
1005  *
1006  *      Input:  boxaa
1007  *              &naindex  (<optional return> the boxa index in the boxaa)
1008  *              copyflag  (L_COPY or L_CLONE)
1009  *      Return: boxa, or null on error
1010  *
1011  *  Notes:
1012  *      (1) This 'flattens' the boxaa to a boxa, taking the boxes in
1013  *          order in the first boxa, then the second, etc.
1014  *      (2) If &naindex is defined, we generate a Numa that gives, for
1015  *          each box in the boxaa, the index of the boxa to which it belongs.
1016  */
1017 BOXA *
boxaaFlattenToBoxa(BOXAA * baa,NUMA ** pnaindex,l_int32 copyflag)1018 boxaaFlattenToBoxa(BOXAA   *baa,
1019                    NUMA   **pnaindex,
1020                    l_int32  copyflag)
1021 {
1022 l_int32  i, j, m, n;
1023 BOXA    *boxa, *boxat;
1024 BOX     *box;
1025 NUMA    *naindex;
1026 
1027     PROCNAME("boxaaFlattenToBoxa");
1028 
1029     if (pnaindex) *pnaindex = NULL;
1030     if (!baa)
1031         return (BOXA *)ERROR_PTR("baa not defined", procName, NULL);
1032     if (copyflag != L_COPY && copyflag != L_CLONE)
1033         return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL);
1034     if (pnaindex) {
1035         naindex = numaCreate(0);
1036         *pnaindex = naindex;
1037     }
1038 
1039     n = boxaaGetCount(baa);
1040     boxa = boxaCreate(n);
1041     for (i = 0; i < n; i++) {
1042         boxat = boxaaGetBoxa(baa, i, L_CLONE);
1043         m = boxaGetCount(boxat);
1044         for (j = 0; j < m; j++) {
1045             box = boxaGetBox(boxat, j, copyflag);
1046             boxaAddBox(boxa, box, L_INSERT);
1047             if (pnaindex)
1048                 numaAddNumber(naindex, i);  /* save 'row' number */
1049         }
1050         boxaDestroy(&boxat);
1051     }
1052 
1053     return boxa;
1054 }
1055 
1056 
1057 /*!
1058  *  boxaaAlignBox()
1059  *
1060  *      Input:  boxaa
1061  *              box (to be aligned with the last of one of the boxa
1062  *                   in boxaa, if possible)
1063  *              delta (amount by which consecutive components can miss
1064  *                     in overlap and still be included in the array)
1065  *              &index (of boxa with best overlap, or if none match,
1066  *                      this is the index of the next boxa to be generated)
1067  *      Return: 0 if OK, 1 on error
1068  *
1069  *  Notes:
1070  *      (1) This is not greedy; it finds the boxa whose last box has
1071  *          the biggest overlap with the input box.
1072  */
1073 l_int32
boxaaAlignBox(BOXAA * baa,BOX * box,l_int32 delta,l_int32 * pindex)1074 boxaaAlignBox(BOXAA    *baa,
1075               BOX      *box,
1076               l_int32   delta,
1077               l_int32  *pindex)
1078 {
1079 l_int32  i, n, m, y, yt, h, ht, ovlp, maxovlp, maxindex;
1080 BOXA    *boxa;
1081 
1082     PROCNAME("boxaaAlignBox");
1083 
1084     if (!baa)
1085         return ERROR_INT("baa not defined", procName, 1);
1086     if (!box)
1087         return ERROR_INT("box not defined", procName, 1);
1088     if (!pindex)
1089         return ERROR_INT("&index not defined", procName, 1);
1090 
1091     n = boxaaGetCount(baa);
1092     boxGetGeometry(box, NULL, &y, NULL, &h);
1093     maxovlp = -10000000;
1094     for (i = 0; i < n; i++) {
1095         boxa = boxaaGetBoxa(baa, i, L_CLONE);
1096         if ((m = boxaGetCount(boxa)) == 0) {
1097             L_WARNING("no boxes in boxa", procName);
1098             continue;
1099         }
1100         boxaGetBoxGeometry(boxa, m - 1, NULL, &yt, NULL, &ht);  /* last one */
1101         boxaDestroy(&boxa);
1102 
1103             /* Overlap < 0 means the components do not overlap vertically */
1104         if (yt >= y)
1105             ovlp = y + h - 1 - yt;
1106         else
1107             ovlp = yt + ht - 1 - y;
1108         if (ovlp > maxovlp) {
1109             maxovlp = ovlp;
1110             maxindex = i;
1111         }
1112     }
1113 
1114     if (maxovlp + delta >= 0)
1115         *pindex = maxindex;
1116     else
1117         *pindex = n;
1118     return 0;
1119 }
1120 
1121 
1122