1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 - This software is distributed in the hope that it will be
4 - useful, but with NO WARRANTY OF ANY KIND.
5 - No author or distributor accepts responsibility to anyone for the
6 - consequences of using this software, or for whether it serves any
7 - particular purpose or works at all, unless he or she says so in
8 - writing. Everyone is granted permission to copy, modify and
9 - redistribute this source code, for commercial or non-commercial
10 - purposes, with the following restrictions: (1) the origin of this
11 - source code must not be misrepresented; (2) modified versions must
12 - be plainly marked as such; and (3) this notice may not be removed
13 - or altered from any source or modified source distribution.
14 *====================================================================*/
15
16 /*
17 * numafunc1.c
18 *
19 * Arithmetic and logic
20 * NUMA *numaArithOp()
21 * NUMA *numaLogicalOp()
22 * NUMA *numaInvert()
23 *
24 * Simple extractions
25 * l_int32 numaGetMin()
26 * l_int32 numaGetMax()
27 * l_int32 numaGetSum()
28 * NUMA *numaGetPartialSums()
29 * l_int32 numaGetSumOnInterval()
30 * l_int32 numaHasOnlyIntegers()
31 * NUMA *numaSubsample()
32 * NUMA *numaMakeSequence()
33 * NUMA *numaMakeConstant()
34 * l_int32 numaGetNonzeroRange()
35 * l_int32 numaGetCountRelativeToZero()
36 * NUMA *numaClipToInterval()
37 * NUMA *numaMakeThresholdIndicator()
38 *
39 * Interpolation
40 * l_int32 numaInterpolateEqxVal()
41 * l_int32 numaInterpolateEqxInterval()
42 * l_int32 numaInterpolateArbxVal()
43 * l_int32 numaInterpolateArbxInterval()
44 *
45 * Functions requiring interpolation
46 * l_int32 numaFitMax()
47 * l_int32 numaDifferentiateInterval()
48 * l_int32 numaIntegrateInterval()
49 *
50 * Sorting
51 * NUMA *numaSort()
52 * NUMA *numaGetSortIndex()
53 * NUMA *numaSortByIndex()
54 * l_int32 numaIsSorted()
55 * l_int32 numaSortPair()
56 * NUMA *numaPseudorandomSequence()
57 *
58 * Functions requiring sorting
59 * l_int32 numaGetMedian()
60 * l_int32 numaGetMode()
61 *
62 * Numa combination
63 * l_int32 numaJoin()
64 * NUMA *numaaFlattenToNuma()
65 *
66 *
67 * Things to remember when using the Numa:
68 *
69 * (1) The numa is a struct, not an array. Always use accessors
70 * (see numabasic.c), never the fields directly.
71 *
72 * (2) The number array holds l_float32 values. It can also
73 * be used to store l_int32 values. See numabasic.c for
74 * details on using the accessors.
75 *
76 * (3) If you use numaCreate(), no numbers are stored and the size is 0.
77 * You have to add numbers to increase the size.
78 * If you want to start with a numa of a fixed size, with each
79 * entry initialized to the same value, use numaMakeConstant().
80 *
81 * (4) Occasionally, in the comments we denote the i-th element of a
82 * numa by na[i]. This is conceptual only -- the numa is not an array!
83 */
84
85 #include <stdio.h>
86 #include <string.h>
87 #include <stdlib.h>
88 #include <math.h>
89 #include "allheaders.h"
90
91
92 /*----------------------------------------------------------------------*
93 * Arithmetic and logical ops on Numas *
94 *----------------------------------------------------------------------*/
95 /*!
96 * numaArithOp()
97 *
98 * Input: nad (<optional> can be null or equal to na1 (in-place)
99 * na1
100 * na2
101 * op (L_ARITH_ADD, L_ARITH_SUBTRACT,
102 * L_ARITH_MULTIPLY, L_ARITH_DIVIDE)
103 * Return: nad (always: operation applied to na1 and na2)
104 *
105 * Notes:
106 * (1) The sizes of na1 and na2 must be equal.
107 * (2) nad can only null or equal to na1.
108 * (3) To add a constant to a numa, or to multipy a numa by
109 * a constant, use numaTransform().
110 */
111 NUMA *
numaArithOp(NUMA * nad,NUMA * na1,NUMA * na2,l_int32 op)112 numaArithOp(NUMA *nad,
113 NUMA *na1,
114 NUMA *na2,
115 l_int32 op)
116 {
117 l_int32 i, n;
118 l_float32 val1, val2;
119
120 PROCNAME("numaArithOp");
121
122 if (!na1 || !na2)
123 return (NUMA *)ERROR_PTR("na1, na2 not both defined", procName, nad);
124 n = numaGetCount(na1);
125 if (n != numaGetCount(na2))
126 return (NUMA *)ERROR_PTR("na1, na2 sizes differ", procName, nad);
127 if (nad && nad != na1)
128 return (NUMA *)ERROR_PTR("nad defined but not in-place", procName, nad);
129 if (op != L_ARITH_ADD && op != L_ARITH_SUBTRACT &&
130 op != L_ARITH_MULTIPLY && op != L_ARITH_DIVIDE)
131 return (NUMA *)ERROR_PTR("invalid op", procName, nad);
132 if (op == L_ARITH_DIVIDE) {
133 for (i = 0; i < n; i++) {
134 numaGetFValue(na2, i, &val2);
135 if (val2 == 0.0)
136 return (NUMA *)ERROR_PTR("na2 has 0 element", procName, nad);
137 }
138 }
139
140 /* If nad is not identical to na1, make it an identical copy */
141 if (!nad)
142 nad = numaCopy(na1);
143
144 for (i = 0; i < n; i++) {
145 numaGetFValue(nad, i, &val1);
146 numaGetFValue(na2, i, &val2);
147 switch (op) {
148 case L_ARITH_ADD:
149 numaSetValue(nad, i, val1 + val2);
150 break;
151 case L_ARITH_SUBTRACT:
152 numaSetValue(nad, i, val1 - val2);
153 break;
154 case L_ARITH_MULTIPLY:
155 numaSetValue(nad, i, val1 * val2);
156 break;
157 case L_ARITH_DIVIDE:
158 numaSetValue(nad, i, val1 / val2);
159 break;
160 default:
161 fprintf(stderr, " Unknown arith op: %d\n", op);
162 return nad;
163 }
164 }
165
166 return nad;
167 }
168
169
170 /*!
171 * numaLogicalOp()
172 *
173 * Input: nad (<optional> can be null or equal to na1 (in-place)
174 * na1
175 * na2
176 * op (L_UNION, L_INTERSECTION, L_SUBTRACTION, L_EXCLUSIVE_OR)
177 * Return: nad (always: operation applied to na1 and na2)
178 *
179 * Notes:
180 * (1) The sizes of na1 and na2 must be equal.
181 * (2) nad can only null or equal to na1.
182 * (3) This is intended for use with indicator arrays (0s and 1s).
183 * Input data is extracted as integers (0 == false, anything
184 * else == true); output results are 0 and 1.
185 * (4) L_SUBTRACTION is subtraction of val2 from val1. For bit logical
186 * arithmetic this is (val1 & ~val2), but because these values
187 * are integers, we use (val1 && !val2).
188 */
189 NUMA *
numaLogicalOp(NUMA * nad,NUMA * na1,NUMA * na2,l_int32 op)190 numaLogicalOp(NUMA *nad,
191 NUMA *na1,
192 NUMA *na2,
193 l_int32 op)
194 {
195 l_int32 i, n, val1, val2, val;
196
197 PROCNAME("numaLogicalOp");
198
199 if (!na1 || !na2)
200 return (NUMA *)ERROR_PTR("na1, na2 not both defined", procName, nad);
201 n = numaGetCount(na1);
202 if (n != numaGetCount(na2))
203 return (NUMA *)ERROR_PTR("na1, na2 sizes differ", procName, nad);
204 if (nad && nad != na1)
205 return (NUMA *)ERROR_PTR("nad defined; not in-place", procName, nad);
206 if (op != L_UNION && op != L_INTERSECTION &&
207 op != L_SUBTRACTION && op != L_EXCLUSIVE_OR)
208 return (NUMA *)ERROR_PTR("invalid op", procName, nad);
209
210 /* If nad is not identical to na1, make it an identical copy */
211 if (!nad)
212 nad = numaCopy(na1);
213
214 for (i = 0; i < n; i++) {
215 numaGetIValue(nad, i, &val1);
216 numaGetIValue(na2, i, &val2);
217 switch (op) {
218 case L_UNION:
219 val = (val1 || val2) ? 1 : 0;
220 numaSetValue(nad, i, val);
221 break;
222 case L_INTERSECTION:
223 val = (val1 && val2) ? 1 : 0;
224 numaSetValue(nad, i, val);
225 break;
226 case L_SUBTRACTION:
227 val = (val1 && !val2) ? 1 : 0;
228 numaSetValue(nad, i, val);
229 break;
230 case L_EXCLUSIVE_OR:
231 val = ((val1 && !val2) || (!val1 && val2)) ? 1 : 0;
232 numaSetValue(nad, i, val);
233 break;
234 default:
235 fprintf(stderr, " Unknown logical op: %d\n", op);
236 return nad;
237 }
238 }
239
240 return nad;
241 }
242
243
244 /*!
245 * numaInvert()
246 *
247 * Input: nad (<optional> can be null or equal to nas (in-place)
248 * nas
249 * Return: nad (always: 'inverts' nas)
250 *
251 * Notes:
252 * (1) This is intended for use with indicator arrays (0s and 1s).
253 * It gives a boolean-type output, taking the input as
254 * an integer and inverting it:
255 * 0 --> 1
256 * anything else --> 0
257 */
258 NUMA *
numaInvert(NUMA * nad,NUMA * nas)259 numaInvert(NUMA *nad,
260 NUMA *nas)
261 {
262 l_int32 i, n, val;
263
264 PROCNAME("numaInvert");
265
266 if (!nas)
267 return (NUMA *)ERROR_PTR("nas not defined", procName, nad);
268 if (nad && nad != nas)
269 return (NUMA *)ERROR_PTR("nad defined; not in-place", procName, nad);
270
271 if (!nad)
272 nad = numaCopy(nas);
273 n = numaGetCount(nad);
274 for (i = 0; i < n; i++) {
275 numaGetIValue(nad, i, &val);
276 if (!val)
277 val = 1;
278 else
279 val = 0;
280 numaSetValue(nad, i, val);
281 }
282
283 return nad;
284 }
285
286
287 /*----------------------------------------------------------------------*
288 * Simple extractions *
289 *----------------------------------------------------------------------*/
290 /*!
291 * numaGetMin()
292 *
293 * Input: na
294 * &minval (<optional return> min value)
295 * &iminloc (<optional return> index of min location)
296 * Return: 0 if OK; 1 on error
297 */
298 l_int32
numaGetMin(NUMA * na,l_float32 * pminval,l_int32 * piminloc)299 numaGetMin(NUMA *na,
300 l_float32 *pminval,
301 l_int32 *piminloc)
302 {
303 l_int32 i, n, iminloc;
304 l_float32 val, minval;
305
306 PROCNAME("numaGetMin");
307
308 if (!pminval && !piminloc)
309 return ERROR_INT("nothing to do", procName, 1);
310 if (pminval) *pminval = 0.0;
311 if (piminloc) *piminloc = 0;
312 if (!na)
313 return ERROR_INT("na not defined", procName, 1);
314
315 minval = +1000000000.;
316 iminloc = 0;
317 n = numaGetCount(na);
318 for (i = 0; i < n; i++) {
319 numaGetFValue(na, i, &val);
320 if (val < minval) {
321 minval = val;
322 iminloc = i;
323 }
324 }
325
326 if (pminval) *pminval = minval;
327 if (piminloc) *piminloc = iminloc;
328 return 0;
329 }
330
331
332 /*!
333 * numaGetMax()
334 *
335 * Input: na
336 * &maxval (<optional return> max value)
337 * &imaxloc (<optional return> index of max location)
338 * Return: 0 if OK; 1 on error
339 */
340 l_int32
numaGetMax(NUMA * na,l_float32 * pmaxval,l_int32 * pimaxloc)341 numaGetMax(NUMA *na,
342 l_float32 *pmaxval,
343 l_int32 *pimaxloc)
344 {
345 l_int32 i, n, imaxloc;
346 l_float32 val, maxval;
347
348 PROCNAME("numaGetMax");
349
350 if (!pmaxval && !pimaxloc)
351 return ERROR_INT("nothing to do", procName, 1);
352 if (pmaxval) *pmaxval = 0.0;
353 if (pimaxloc) *pimaxloc = 0;
354 if (!na)
355 return ERROR_INT("na not defined", procName, 1);
356
357 maxval = -1000000000.;
358 imaxloc = 0;
359 n = numaGetCount(na);
360 for (i = 0; i < n; i++) {
361 numaGetFValue(na, i, &val);
362 if (val > maxval) {
363 maxval = val;
364 imaxloc = i;
365 }
366 }
367
368 if (pmaxval) *pmaxval = maxval;
369 if (pimaxloc) *pimaxloc = imaxloc;
370 return 0;
371 }
372
373
374 /*!
375 * numaGetSum()
376 *
377 * Input: na
378 * &sum (<return> sum of values)
379 * Return: 0 if OK, 1 on error
380 */
381 l_int32
numaGetSum(NUMA * na,l_float32 * psum)382 numaGetSum(NUMA *na,
383 l_float32 *psum)
384 {
385 l_int32 i, n;
386 l_float32 val, sum;
387
388 PROCNAME("numaGetSum");
389
390 if (!na)
391 return ERROR_INT("na not defined", procName, 1);
392 if (!psum)
393 return ERROR_INT("&sum not defined", procName, 1);
394
395 sum = 0.0;
396 n = numaGetCount(na);
397 for (i = 0; i < n; i++) {
398 numaGetFValue(na, i, &val);
399 sum += val;
400 }
401 *psum = sum;
402 return 0;
403 }
404
405
406 /*!
407 * numaGetPartialSums()
408 *
409 * Input: na
410 * Return: nasum, or null on error
411 *
412 * Notes:
413 * (1) nasum[i] is the sum for all j <= i of na[j].
414 * So nasum[0] = na[0].
415 * (2) If you want to generate a rank function, where rank[0] - 0.0,
416 * insert a 0.0 at the beginning of the nasum array.
417 */
418 NUMA *
numaGetPartialSums(NUMA * na)419 numaGetPartialSums(NUMA *na)
420 {
421 l_int32 i, n;
422 l_float32 val, sum;
423 NUMA *nasum;
424
425 PROCNAME("numaGetPartialSums");
426
427 if (!na)
428 return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
429
430 n = numaGetCount(na);
431 nasum = numaCreate(n);
432 sum = 0.0;
433 for (i = 0; i < n; i++) {
434 numaGetFValue(na, i, &val);
435 sum += val;
436 numaAddNumber(nasum, sum);
437 }
438 return nasum;
439 }
440
441
442 /*!
443 * numaGetSumOnInterval()
444 *
445 * Input: na
446 * first (beginning index)
447 * last (final index)
448 * &sum (<return> sum of values in the index interval range)
449 * Return: 0 if OK, 1 on error
450 */
451 l_int32
numaGetSumOnInterval(NUMA * na,l_int32 first,l_int32 last,l_float32 * psum)452 numaGetSumOnInterval(NUMA *na,
453 l_int32 first,
454 l_int32 last,
455 l_float32 *psum)
456 {
457 l_int32 i, n, truelast;
458 l_float32 val, sum;
459
460 PROCNAME("numaGetSumOnInterval");
461
462 if (!na)
463 return ERROR_INT("na not defined", procName, 1);
464 if (!psum)
465 return ERROR_INT("&sum not defined", procName, 1);
466 *psum = 0.0;
467
468 sum = 0.0;
469 n = numaGetCount(na);
470 if (first >= n) /* not an error */
471 return 0;
472 truelast = L_MIN(last, n - 1);
473
474 for (i = first; i <= truelast; i++) {
475 numaGetFValue(na, i, &val);
476 sum += val;
477 }
478 *psum = sum;
479 return 0;
480 }
481
482
483 /*!
484 * numaHasOnlyIntegers()
485 *
486 * Input: na
487 * maxsamples (maximum number of samples to check)
488 * &allints (<return> 1 if all sampled values are ints; else 0)
489 * Return: 0 if OK, 1 on error
490 *
491 * Notes:
492 * (1) Set @maxsamples == 0 to check every integer in na. Otherwise,
493 * this samples no more than @maxsamples.
494 */
495 l_int32
numaHasOnlyIntegers(NUMA * na,l_int32 maxsamples,l_int32 * pallints)496 numaHasOnlyIntegers(NUMA *na,
497 l_int32 maxsamples,
498 l_int32 *pallints)
499 {
500 l_int32 i, n, incr;
501 l_float32 val;
502
503 PROCNAME("numaHasOnlyIntegers");
504
505 if (!pallints)
506 return ERROR_INT("&allints not defined", procName, 1);
507 *pallints = TRUE;
508 if (!na)
509 return ERROR_INT("na not defined", procName, 1);
510
511 if ((n = numaGetCount(na)) == 0)
512 return ERROR_INT("na empty", procName, 1);
513 if (maxsamples <= 0)
514 incr = 1;
515 else
516 incr = (l_int32)((n + maxsamples - 1) / maxsamples);
517 for (i = 0; i < n; i += incr) {
518 numaGetFValue(na, i, &val);
519 if (val != (l_int32)val) {
520 *pallints = FALSE;
521 return 0;
522 }
523 }
524
525 return 0;
526 }
527
528
529 /*!
530 * numaSubsample()
531 *
532 * Input: nas
533 * subfactor (subsample factor, >= 1)
534 * Return: nad (evenly sampled values from nas), or null on error
535 */
536 NUMA *
numaSubsample(NUMA * nas,l_int32 subfactor)537 numaSubsample(NUMA *nas,
538 l_int32 subfactor)
539 {
540 l_int32 i, n;
541 l_float32 val;
542 NUMA *nad;
543
544 PROCNAME("numaSubsample");
545
546 if (!nas)
547 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
548 if (subfactor < 1)
549 return (NUMA *)ERROR_PTR("subfactor < 1", procName, NULL);
550
551 nad = numaCreate(0);
552 n = numaGetCount(nas);
553 for (i = 0; i < n; i++) {
554 if (i % subfactor != 0) continue;
555 numaGetFValue(nas, i, &val);
556 numaAddNumber(nad, val);
557 }
558
559 return nad;
560 }
561
562
563 /*!
564 * numaMakeSequence()
565 *
566 * Input: startval
567 * increment
568 * size (of sequence)
569 * Return: numa of sequence of evenly spaced values, or null on error
570 */
571 NUMA *
numaMakeSequence(l_float32 startval,l_float32 increment,l_int32 size)572 numaMakeSequence(l_float32 startval,
573 l_float32 increment,
574 l_int32 size)
575 {
576 l_int32 i;
577 l_float32 val;
578 NUMA *na;
579
580 PROCNAME("numaMakeSequence");
581
582 if ((na = numaCreate(size)) == NULL)
583 return (NUMA *)ERROR_PTR("na not made", procName, NULL);
584
585 for (i = 0; i < size; i++) {
586 val = startval + i * increment;
587 numaAddNumber(na, val);
588 }
589
590 return na;
591 }
592
593
594 /*!
595 * numaMakeConstant()
596 *
597 * Input: val
598 * size (of numa)
599 * Return: numa of given size with all entries equal to 'val',
600 * or null on error
601 */
602 NUMA *
numaMakeConstant(l_float32 val,l_int32 size)603 numaMakeConstant(l_float32 val,
604 l_int32 size)
605 {
606 return numaMakeSequence(val, 0.0, size);
607 }
608
609
610 /*!
611 * numaGetNonzeroRange()
612 *
613 * Input: numa
614 * eps (largest value considered to be zero)
615 * &first, &last (<return> interval of array indices
616 * where values are nonzero)
617 * Return: 0 if OK, 1 on error or if no nonzero range is found.
618 */
619 l_int32
numaGetNonzeroRange(NUMA * na,l_float32 eps,l_int32 * pfirst,l_int32 * plast)620 numaGetNonzeroRange(NUMA *na,
621 l_float32 eps,
622 l_int32 *pfirst,
623 l_int32 *plast)
624 {
625 l_int32 n, i, found;
626 l_float32 val;
627
628 PROCNAME("numaGetNonzeroRange");
629
630 if (!na)
631 return ERROR_INT("na not defined", procName, 1);
632 if (!pfirst || !plast)
633 return ERROR_INT("pfirst and plast not both defined", procName, 1);
634 n = numaGetCount(na);
635 found = FALSE;
636 for (i = 0; i < n; i++) {
637 numaGetFValue(na, i, &val);
638 if (val > eps) {
639 found = TRUE;
640 break;
641 }
642 }
643 if (!found) {
644 *pfirst = n - 1;
645 *plast = 0;
646 return 1;
647 }
648
649 *pfirst = i;
650 for (i = n - 1; i >= 0; i--) {
651 numaGetFValue(na, i, &val);
652 if (val > eps)
653 break;
654 }
655 *plast = i;
656 return 0;
657 }
658
659
660 /*!
661 * numaGetCountRelativeToZero()
662 *
663 * Input: numa
664 * type (L_LESS_THAN_ZERO, L_EQUAL_TO_ZERO, L_GREATER_THAN_ZERO)
665 * &count (<return> count of values of given type)
666 * Return: 0 if OK, 1 on error
667 */
668 l_int32
numaGetCountRelativeToZero(NUMA * na,l_int32 type,l_int32 * pcount)669 numaGetCountRelativeToZero(NUMA *na,
670 l_int32 type,
671 l_int32 *pcount)
672 {
673 l_int32 n, i, count;
674 l_float32 val;
675
676 PROCNAME("numaGetCountRelativeToZero");
677
678 if (!pcount)
679 return ERROR_INT("&count not defined", procName, 1);
680 *pcount = 0;
681 if (!na)
682 return ERROR_INT("na not defined", procName, 1);
683 n = numaGetCount(na);
684 for (i = 0, count = 0; i < n; i++) {
685 numaGetFValue(na, i, &val);
686 if (type == L_LESS_THAN_ZERO && val < 0.0)
687 count++;
688 else if (type == L_EQUAL_TO_ZERO && val == 0.0)
689 count++;
690 else if (type == L_GREATER_THAN_ZERO && val > 0.0)
691 count++;
692 }
693
694 *pcount = count;
695 return 0;
696 }
697
698
699 /*!
700 * numaClipToInterval()
701 *
702 * Input: numa
703 * first, last (clipping interval)
704 * Return: numa with the same values as the input, but clipped
705 * to the specified interval
706 *
707 * Note: If you want the indices of the array values to be unchanged,
708 * use first = 0.
709 * Usage: This is useful to clip a histogram that has a few nonzero
710 * values to its nonzero range.
711 */
712 NUMA *
numaClipToInterval(NUMA * nas,l_int32 first,l_int32 last)713 numaClipToInterval(NUMA *nas,
714 l_int32 first,
715 l_int32 last)
716 {
717 l_int32 n, i, truelast;
718 l_float32 val;
719 NUMA *nad;
720
721 PROCNAME("numaClipToInterval");
722
723 if (!nas)
724 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
725 if (first > last)
726 return (NUMA *)ERROR_PTR("range not valid", procName, NULL);
727
728 n = numaGetCount(nas);
729 if (first >= n)
730 return (NUMA *)ERROR_PTR("no elements in range", procName, NULL);
731 truelast = L_MIN(last, n - 1);
732 if ((nad = numaCreate(truelast - first + 1)) == NULL)
733 return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
734 for (i = first; i <= truelast; i++) {
735 numaGetFValue(nas, i, &val);
736 numaAddNumber(nad, val);
737 }
738
739 return nad;
740 }
741
742
743 /*!
744 * numaMakeThresholdIndicator()
745 *
746 * Input: nas (input numa)
747 * thresh (threshold value)
748 * type (L_SELECT_IF_LT, L_SELECT_IF_GT,
749 * L_SELECT_IF_LTE, L_SELECT_IF_GTE)
750 * Output: nad (indicator array: values are 0 and 1)
751 *
752 * Notes:
753 * (1) For each element in nas, if the constraint given by 'type'
754 * correctly specifies its relation to thresh, a value of 1
755 * is recorded in nad.
756 */
757 NUMA *
numaMakeThresholdIndicator(NUMA * nas,l_float32 thresh,l_int32 type)758 numaMakeThresholdIndicator(NUMA *nas,
759 l_float32 thresh,
760 l_int32 type)
761 {
762 l_int32 n, i, ival;
763 l_float32 fval;
764 NUMA *nai;
765
766 PROCNAME("numaMakeThresholdIndicator");
767
768 n = numaGetCount(nas);
769 nai = numaCreate(n);
770 for (i = 0; i < n; i++) {
771 numaGetFValue(nas, i, &fval);
772 ival = 0;
773 switch (type)
774 {
775 case L_SELECT_IF_LT:
776 if (fval < thresh) ival = 1;
777 break;
778 case L_SELECT_IF_GT:
779 if (fval > thresh) ival = 1;
780 break;
781 case L_SELECT_IF_LTE:
782 if (fval <= thresh) ival = 1;
783 break;
784 case L_SELECT_IF_GTE:
785 if (fval >= thresh) ival = 1;
786 break;
787 default:
788 numaDestroy(&nai);
789 return (NUMA *)ERROR_PTR("invalid type", procName, NULL);
790 }
791 numaAddNumber(nai, ival);
792 }
793
794 return nai;
795 }
796
797
798 /*----------------------------------------------------------------------*
799 * Interpolation *
800 *----------------------------------------------------------------------*/
801 /*!
802 * numaInterpolateEqxVal()
803 *
804 * Input: startx (xval corresponding to first element in array)
805 * deltax (x increment between array elements)
806 * nay (numa of ordinate values, assumed equally spaced)
807 * type (L_LINEAR_INTERP, L_QUADRATIC_INTERP)
808 * xval
809 * &yval (<return> interpolated value)
810 * Return: 0 if OK, 1 on error (e.g., if xval is outside range)
811 *
812 * Notes:
813 * (1) Considering nay as a function of x, the x values
814 * are equally spaced
815 * (2) Caller should check for valid return.
816 *
817 * For linear Lagrangian interpolation (through 2 data pts):
818 * y(x) = y1(x-x2)/(x1-x2) + y2(x-x1)/(x2-x1)
819 *
820 * For quadratic Lagrangian interpolation (through 3 data pts):
821 * y(x) = y1(x-x2)(x-x3)/((x1-x2)(x1-x3)) +
822 * y2(x-x1)(x-x3)/((x2-x1)(x2-x3)) +
823 * y3(x-x1)(x-x2)/((x3-x1)(x3-x2))
824 *
825 */
826 l_int32
numaInterpolateEqxVal(l_float32 startx,l_float32 deltax,NUMA * nay,l_int32 type,l_float32 xval,l_float32 * pyval)827 numaInterpolateEqxVal(l_float32 startx,
828 l_float32 deltax,
829 NUMA *nay,
830 l_int32 type,
831 l_float32 xval,
832 l_float32 *pyval)
833 {
834 l_int32 i, n, i1, i2, i3;
835 l_float32 x1, x2, x3, fy1, fy2, fy3, d1, d2, d3, del, fi, maxx;
836 l_float32 *fa;
837
838 PROCNAME("numaInterpolateEqxVal");
839
840 if (!pyval)
841 return ERROR_INT("&yval not defined", procName, 1);
842 *pyval = 0.0;
843 if (!nay)
844 return ERROR_INT("nay not defined", procName, 1);
845 if (deltax <= 0.0)
846 return ERROR_INT("deltax not > 0", procName, 1);
847 if (type != L_LINEAR_INTERP && type != L_QUADRATIC_INTERP)
848 return ERROR_INT("invalid interp type", procName, 1);
849 n = numaGetCount(nay);
850 if (n < 2)
851 return ERROR_INT("not enough points", procName, 1);
852 if (type == L_QUADRATIC_INTERP && n == 2) {
853 type = L_LINEAR_INTERP;
854 L_WARNING("only 2 points; using linear interp", procName);
855 }
856 maxx = startx + deltax * (n - 1);
857 if (xval < startx || xval > maxx)
858 return ERROR_INT("xval is out of bounds", procName, 1);
859
860 fa = numaGetFArray(nay, L_NOCOPY);
861 fi = (xval - startx) / deltax;
862 i = (l_int32)fi;
863 del = fi - i;
864 if (del == 0.0) { /* no interpolation required */
865 *pyval = fa[i];
866 return 0;
867 }
868
869 if (type == L_LINEAR_INTERP) {
870 *pyval = fa[i] + del * (fa[i + 1] - fa[i]);
871 return 0;
872 }
873
874 /* Quadratic interpolation */
875 d1 = d3 = 0.5 / (deltax * deltax);
876 d2 = -2. * d1;
877 if (i == 0) {
878 i1 = i;
879 i2 = i + 1;
880 i3 = i + 2;
881 }
882 else {
883 i1 = i - 1;
884 i2 = i;
885 i3 = i + 1;
886 }
887 x1 = startx + i1 * deltax;
888 x2 = startx + i2 * deltax;
889 x3 = startx + i3 * deltax;
890 fy1 = d1 * fa[i1];
891 fy2 = d2 * fa[i2];
892 fy3 = d3 * fa[i3];
893 *pyval = fy1 * (xval - x2) * (xval - x3) +
894 fy2 * (xval - x1) * (xval - x3) +
895 fy3 * (xval - x1) * (xval - x2);
896 return 0;
897 }
898
899
900 /*!
901 * numaInterpolateArbxVal()
902 *
903 * Input: nax (numa of abscissa values)
904 * nay (numa of ordinate values, corresponding to nax)
905 * type (L_LINEAR_INTERP, L_QUADRATIC_INTERP)
906 * xval
907 * &yval (<return> interpolated value)
908 * Return: 0 if OK, 1 on error (e.g., if xval is outside range)
909 *
910 * Notes:
911 * (1) The values in nax must be sorted in increasing order.
912 * If, additionally, they are equally spaced, you can use
913 * numaInterpolateEqxVal().
914 * (2) Caller should check for valid return.
915 * (3) Uses lagrangian interpolation. See numaInterpolateEqxVal()
916 * for formulas.
917 */
918 l_int32
numaInterpolateArbxVal(NUMA * nax,NUMA * nay,l_int32 type,l_float32 xval,l_float32 * pyval)919 numaInterpolateArbxVal(NUMA *nax,
920 NUMA *nay,
921 l_int32 type,
922 l_float32 xval,
923 l_float32 *pyval)
924 {
925 l_int32 i, im, nx, ny, i1, i2, i3;
926 l_float32 delu, dell, fract, d1, d2, d3;
927 l_float32 minx, maxx;
928 l_float32 *fax, *fay;
929
930 PROCNAME("numaInterpolateArbxVal");
931
932 if (!pyval)
933 return ERROR_INT("&yval not defined", procName, 1);
934 *pyval = 0.0;
935 if (!nax)
936 return ERROR_INT("nax not defined", procName, 1);
937 if (!nay)
938 return ERROR_INT("nay not defined", procName, 1);
939 if (type != L_LINEAR_INTERP && type != L_QUADRATIC_INTERP)
940 return ERROR_INT("invalid interp type", procName, 1);
941 ny = numaGetCount(nay);
942 nx = numaGetCount(nax);
943 if (nx != ny)
944 return ERROR_INT("nax and nay not same size arrays", procName, 1);
945 if (ny < 2)
946 return ERROR_INT("not enough points", procName, 1);
947 if (type == L_QUADRATIC_INTERP && ny == 2) {
948 type = L_LINEAR_INTERP;
949 L_WARNING("only 2 points; using linear interp", procName);
950 }
951 numaGetFValue(nax, 0, &minx);
952 numaGetFValue(nax, nx - 1, &maxx);
953 if (xval < minx || xval > maxx)
954 return ERROR_INT("xval is out of bounds", procName, 1);
955
956 fax = numaGetFArray(nax, L_NOCOPY);
957 fay = numaGetFArray(nay, L_NOCOPY);
958
959 /* Linear search for interval. We are guaranteed
960 * to either return or break out of the loop.
961 * In addition, we are assured that fax[i] - fax[im] > 0.0 */
962 if (xval == fax[0]) {
963 *pyval = fay[0];
964 return 0;
965 }
966 for (i = 1; i < nx; i++) {
967 delu = fax[i] - xval;
968 if (delu >= 0.0) { /* we've passed it */
969 if (delu == 0.0) {
970 *pyval = fay[i];
971 return 0;
972 }
973 im = i - 1;
974 dell = xval - fax[im]; /* >= 0 */
975 break;
976 }
977 }
978 fract = dell / (fax[i] - fax[im]);
979
980 if (type == L_LINEAR_INTERP) {
981 *pyval = fay[i] + fract * (fay[i + 1] - fay[i]);
982 return 0;
983 }
984
985 /* Quadratic interpolation */
986 if (im == 0) {
987 i1 = im;
988 i2 = im + 1;
989 i3 = im + 2;
990 }
991 else {
992 i1 = im - 1;
993 i2 = im;
994 i3 = im + 1;
995 }
996 d1 = (fax[i1] - fax[i2]) * (fax[i1] - fax[i3]);
997 d2 = (fax[i2] - fax[i1]) * (fax[i2] - fax[i3]);
998 d3 = (fax[i3] - fax[i1]) * (fax[i3] - fax[i2]);
999 *pyval = fay[i1] * (xval - fax[i2]) * (xval - fax[i3]) / d1 +
1000 fay[i2] * (xval - fax[i1]) * (xval - fax[i3]) / d2 +
1001 fay[i3] * (xval - fax[i1]) * (xval - fax[i2]) / d3;
1002 return 0;
1003 }
1004
1005
1006 /*!
1007 * numaInterpolateEqxInterval()
1008 *
1009 * Input: startx (xval corresponding to first element in nas)
1010 * deltax (x increment between array elements in nas)
1011 * nasy (numa of ordinate values, assumed equally spaced)
1012 * type (L_LINEAR_INTERP, L_QUADRATIC_INTERP)
1013 * x0 (start value of interval)
1014 * x1 (end value of interval)
1015 * npts (number of points to evaluate function in interval)
1016 * &nax (<optional return> array of x values in interval)
1017 * &nay (<return> array of y values in interval)
1018 * Return: 0 if OK, 1 on error
1019 *
1020 * Notes:
1021 * (1) Considering nasy as a function of x, the x values
1022 * are equally spaced.
1023 * (2) This creates nay (and optionally nax) of interpolated
1024 * values over the specified interval (x0, x1).
1025 * (3) If the interval (x0, x1) lies partially outside the array
1026 * nasy (as interpreted by startx and deltax), it is an
1027 * error and returns 1.
1028 * (4) Note that deltax is the intrinsic x-increment for the input
1029 * array nasy, whereas delx is the intrinsic x-increment for the
1030 * output interpolated array nay.
1031 */
1032 l_int32
numaInterpolateEqxInterval(l_float32 startx,l_float32 deltax,NUMA * nasy,l_int32 type,l_float32 x0,l_float32 x1,l_int32 npts,NUMA ** pnax,NUMA ** pnay)1033 numaInterpolateEqxInterval(l_float32 startx,
1034 l_float32 deltax,
1035 NUMA *nasy,
1036 l_int32 type,
1037 l_float32 x0,
1038 l_float32 x1,
1039 l_int32 npts,
1040 NUMA **pnax,
1041 NUMA **pnay)
1042 {
1043 l_int32 i, n;
1044 l_float32 x, yval, maxx, delx;
1045 NUMA *nax, *nay;
1046
1047 PROCNAME("numaInterpolateEqxInterval");
1048
1049 if (pnax) *pnax = NULL;
1050 if (!pnay)
1051 return ERROR_INT("&nay not defined", procName, 1);
1052 *pnay = NULL;
1053 if (!nasy)
1054 return ERROR_INT("nasy not defined", procName, 1);
1055 if (deltax <= 0.0)
1056 return ERROR_INT("deltax not > 0", procName, 1);
1057 if (type != L_LINEAR_INTERP && type != L_QUADRATIC_INTERP)
1058 return ERROR_INT("invalid interp type", procName, 1);
1059 n = numaGetCount(nasy);
1060 if (type == L_QUADRATIC_INTERP && n == 2) {
1061 type = L_LINEAR_INTERP;
1062 L_WARNING("only 2 points; using linear interp", procName);
1063 }
1064 maxx = startx + deltax * (n - 1);
1065 if (x0 < startx || x1 > maxx || x1 <= x0)
1066 return ERROR_INT("[x0 ... x1] is not valid", procName, 1);
1067 if (npts < 3)
1068 return ERROR_INT("npts < 3", procName, 1);
1069 delx = (x1 - x0) / (l_float32)(npts - 1); /* delx is for output nay */
1070
1071 if ((nay = numaCreate(npts)) == NULL)
1072 return ERROR_INT("nay not made", procName, 1);
1073 numaSetXParameters(nay, x0, delx);
1074 *pnay = nay;
1075 if (pnax) {
1076 nax = numaCreate(npts);
1077 *pnax = nax;
1078 }
1079
1080 for (i = 0; i < npts; i++) {
1081 x = x0 + i * delx;
1082 if (pnax)
1083 numaAddNumber(nax, x);
1084 numaInterpolateEqxVal(startx, deltax, nasy, type, x, &yval);
1085 numaAddNumber(nay, yval);
1086 }
1087
1088 return 0;
1089 }
1090
1091
1092 /*!
1093 * numaInterpolateArbxInterval()
1094 *
1095 * Input: nax (numa of abscissa values)
1096 * nay (numa of ordinate values, corresponding to nax)
1097 * type (L_LINEAR_INTERP, L_QUADRATIC_INTERP)
1098 * x0 (start value of interval)
1099 * x1 (end value of interval)
1100 * npts (number of points to evaluate function in interval)
1101 * &nadx (<optional return> array of x values in interval)
1102 * &nady (<return> array of y values in interval)
1103 * Return: 0 if OK, 1 on error (e.g., if x0 or x1 is outside range)
1104 *
1105 * Notes:
1106 * (1) The values in nax must be sorted in increasing order.
1107 * If they are not sorted, we do it here, and complain.
1108 * (2) If the values in nax are equally spaced, you can use
1109 * numaInterpolateEqxInterval().
1110 * (3) Caller should check for valid return.
1111 * (4) We don't call numaInterpolateArbxVal() for each output
1112 * point, because that requires an O(n) search for
1113 * each point. Instead, we do a single O(n) pass through
1114 * nax, saving the indices to be used for each output yval.
1115 * (5) Uses lagrangian interpolation. See numaInterpolateEqxVal()
1116 * for formulas.
1117 */
1118 l_int32
numaInterpolateArbxInterval(NUMA * nax,NUMA * nay,l_int32 type,l_float32 x0,l_float32 x1,l_int32 npts,NUMA ** pnadx,NUMA ** pnady)1119 numaInterpolateArbxInterval(NUMA *nax,
1120 NUMA *nay,
1121 l_int32 type,
1122 l_float32 x0,
1123 l_float32 x1,
1124 l_int32 npts,
1125 NUMA **pnadx,
1126 NUMA **pnady)
1127 {
1128 l_int32 i, im, j, nx, ny, i1, i2, i3, sorted;
1129 l_int32 *index;
1130 l_float32 del, xval, yval, excess, fract, minx, maxx, d1, d2, d3;
1131 l_float32 *fax, *fay;
1132 NUMA *nasx, *nasy, *nadx, *nady;
1133
1134 PROCNAME("numaInterpolateArbxInterval");
1135
1136 if (pnadx) *pnadx = NULL;
1137 if (!pnady)
1138 return ERROR_INT("&nady not defined", procName, 1);
1139 *pnady = NULL;
1140 if (!nay)
1141 return ERROR_INT("nay not defined", procName, 1);
1142 if (!nax)
1143 return ERROR_INT("nax not defined", procName, 1);
1144 if (type != L_LINEAR_INTERP && type != L_QUADRATIC_INTERP)
1145 return ERROR_INT("invalid interp type", procName, 1);
1146 if (x0 > x1)
1147 return ERROR_INT("x0 > x1", procName, 1);
1148 ny = numaGetCount(nay);
1149 nx = numaGetCount(nax);
1150 if (nx != ny)
1151 return ERROR_INT("nax and nay not same size arrays", procName, 1);
1152 if (ny < 2)
1153 return ERROR_INT("not enough points", procName, 1);
1154 if (type == L_QUADRATIC_INTERP && ny == 2) {
1155 type = L_LINEAR_INTERP;
1156 L_WARNING("only 2 points; using linear interp", procName);
1157 }
1158 numaGetMin(nax, &minx, NULL);
1159 numaGetMax(nax, &maxx, NULL);
1160 if (x0 < minx || x1 > maxx)
1161 return ERROR_INT("xval is out of bounds", procName, 1);
1162
1163 /* Make sure that nax is sorted in increasing order */
1164 numaIsSorted(nax, L_SORT_INCREASING, &sorted);
1165 if (!sorted) {
1166 L_WARNING("we are sorting nax in increasing order", procName);
1167 numaSortPair(nax, nay, L_SORT_INCREASING, &nasx, &nasy);
1168 }
1169 else {
1170 nasx = numaClone(nax);
1171 nasy = numaClone(nay);
1172 }
1173
1174 fax = numaGetFArray(nasx, L_NOCOPY);
1175 fay = numaGetFArray(nasy, L_NOCOPY);
1176
1177 /* Get array of indices into fax for interpolated locations */
1178 if ((index = (l_int32 *)CALLOC(npts, sizeof(l_int32))) == NULL)
1179 return ERROR_INT("ind not made", procName, 1);
1180 del = (x1 - x0) / (npts - 1.0);
1181 for (i = 0, j = 0; j < nx && i < npts; i++) {
1182 xval = x0 + i * del;
1183 while (j < nx - 1 && xval > fax[j])
1184 j++;
1185 if (xval == fax[j])
1186 index[i] = L_MIN(j, nx - 1);
1187 else /* the index of fax[] is just below xval */
1188 index[i] = L_MAX(j - 1, 0);
1189 }
1190
1191 /* For each point to be interpolated, get the y value */
1192 nady = numaCreate(npts);
1193 *pnady = nady;
1194 if (pnadx) {
1195 nadx = numaCreate(npts);
1196 *pnadx = nadx;
1197 }
1198 for (i = 0; i < npts; i++) {
1199 xval = x0 + i * del;
1200 if (pnadx)
1201 numaAddNumber(nadx, xval);
1202 im = index[i];
1203 excess = xval - fax[im];
1204 if (excess == 0.0) {
1205 numaAddNumber(nady, fay[im]);
1206 continue;
1207 }
1208 fract = excess / (fax[im + 1] - fax[im]);
1209
1210 if (type == L_LINEAR_INTERP) {
1211 yval = fay[im] + fract * (fay[im + 1] - fay[im]);
1212 numaAddNumber(nady, yval);
1213 continue;
1214 }
1215
1216 /* Quadratic interpolation */
1217 if (im == 0) {
1218 i1 = im;
1219 i2 = im + 1;
1220 i3 = im + 2;
1221 }
1222 else {
1223 i1 = im - 1;
1224 i2 = im;
1225 i3 = im + 1;
1226 }
1227 d1 = (fax[i1] - fax[i2]) * (fax[i1] - fax[i3]);
1228 d2 = (fax[i2] - fax[i1]) * (fax[i2] - fax[i3]);
1229 d3 = (fax[i3] - fax[i1]) * (fax[i3] - fax[i2]);
1230 yval = fay[i1] * (xval - fax[i2]) * (xval - fax[i3]) / d1 +
1231 fay[i2] * (xval - fax[i1]) * (xval - fax[i3]) / d2 +
1232 fay[i3] * (xval - fax[i1]) * (xval - fax[i2]) / d3;
1233 numaAddNumber(nady, yval);
1234 }
1235
1236 FREE(index);
1237 numaDestroy(&nasx);
1238 numaDestroy(&nasy);
1239 return 0;
1240 }
1241
1242
1243 /*----------------------------------------------------------------------*
1244 * Functions requiring interpolation *
1245 *----------------------------------------------------------------------*/
1246 /*!
1247 * numaFitMax()
1248 *
1249 * Input: na (numa of ordinate values, to fit a max to)
1250 * &maxval (<return> max value)
1251 * naloc (<optional> associated numa of abscissa values)
1252 * &maxloc (<return> abscissa value that gives max value in na;
1253 * if naloc == null, this is given as an interpolated
1254 * index value)
1255 * Return: 0 if OK; 1 on error
1256 *
1257 * Note: if naloc is given, there is no requirement that the
1258 * data points are evenly spaced. Lagrangian interpolation
1259 * handles that. The only requirement is that the
1260 * data points are ordered so that the values in naloc
1261 * are either increasing or decreasing. We test to make
1262 * sure that the sizes of na and naloc are equal, and it
1263 * is assumed that the correspondences na[i] as a function
1264 * of naloc[i] are properly arranged for all i.
1265 *
1266 * The formula for Lagrangian interpolation through 3 data pts is:
1267 * y(x) = y1(x-x2)(x-x3)/((x1-x2)(x1-x3)) +
1268 * y2(x-x1)(x-x3)/((x2-x1)(x2-x3)) +
1269 * y3(x-x1)(x-x2)/((x3-x1)(x3-x2))
1270 *
1271 * Then the derivative, using the constants (c1,c2,c3) defined below,
1272 * is set to 0:
1273 * y'(x) = 2x(c1+c2+c3) - c1(x2+x3) - c2(x1+x3) - c3(x1+x2) = 0
1274 */
1275 l_int32
numaFitMax(NUMA * na,l_float32 * pmaxval,NUMA * naloc,l_float32 * pmaxloc)1276 numaFitMax(NUMA *na,
1277 l_float32 *pmaxval,
1278 NUMA *naloc,
1279 l_float32 *pmaxloc)
1280 {
1281 l_float32 val;
1282 l_float32 smaxval; /* start value of maximum sample, before interpolating */
1283 l_int32 n, imaxloc;
1284 l_float32 x1, x2, x3, y1, y2, y3, c1, c2, c3, a, b, xmax, ymax;
1285
1286 PROCNAME("numaFitMax");
1287
1288 *pmaxval = *pmaxloc = 0.0; /* init */
1289
1290 if (!na)
1291 return ERROR_INT("na not defined", procName, 1);
1292 if (!pmaxval)
1293 return ERROR_INT("&maxval not defined", procName, 1);
1294 if (!pmaxloc)
1295 return ERROR_INT("&maxloc not defined", procName, 1);
1296
1297 n = numaGetCount(na);
1298 if (naloc) {
1299 if (n != numaGetCount(naloc))
1300 return ERROR_INT("na and naloc of unequal size", procName, 1);
1301 }
1302
1303 numaGetMax(na, &smaxval, &imaxloc);
1304
1305 /* Simple case: max is at end point */
1306 if (imaxloc == 0 || imaxloc == n - 1) {
1307 *pmaxval = smaxval;
1308 if (naloc) {
1309 numaGetFValue(naloc, imaxloc, &val);
1310 *pmaxloc = val;
1311 }
1312 else
1313 *pmaxloc = imaxloc;
1314 return 0;
1315 }
1316
1317 /* Interior point; use quadratic interpolation */
1318 y2 = smaxval;
1319 numaGetFValue(na, imaxloc - 1, &val);
1320 y1 = val;
1321 numaGetFValue(na, imaxloc + 1, &val);
1322 y3 = val;
1323 if (naloc) {
1324 numaGetFValue(naloc, imaxloc - 1, &val);
1325 x1 = val;
1326 numaGetFValue(naloc, imaxloc, &val);
1327 x2 = val;
1328 numaGetFValue(naloc, imaxloc + 1, &val);
1329 x3 = val;
1330 }
1331 else {
1332 x1 = imaxloc - 1;
1333 x2 = imaxloc;
1334 x3 = imaxloc + 1;
1335 }
1336
1337 /* Can't interpolate; just use the max val in na
1338 * and the corresponding one in naloc */
1339 if (x1 == x2 || x1 == x3 || x2 == x3) {
1340 *pmaxval = y2;
1341 *pmaxloc = x2;
1342 return 0;
1343 }
1344
1345 /* Use lagrangian interpolation; set dy/dx = 0 */
1346 c1 = y1 / ((x1 - x2) * (x1 - x3));
1347 c2 = y2 / ((x2 - x1) * (x2 - x3));
1348 c3 = y3 / ((x3 - x1) * (x3 - x2));
1349 a = c1 + c2 + c3;
1350 b = c1 * (x2 + x3) + c2 * (x1 + x3) + c3 * (x1 + x2);
1351 xmax = b / (2 * a);
1352 ymax = c1 * (xmax - x2) * (xmax - x3) +
1353 c2 * (xmax - x1) * (xmax - x3) +
1354 c3 * (xmax - x1) * (xmax - x2);
1355 *pmaxval = ymax;
1356 *pmaxloc = xmax;
1357
1358 return 0;
1359 }
1360
1361
1362 /*!
1363 * numaDifferentiateInterval()
1364 *
1365 * Input: nax (numa of abscissa values)
1366 * nay (numa of ordinate values, corresponding to nax)
1367 * x0 (start value of interval)
1368 * x1 (end value of interval)
1369 * npts (number of points to evaluate function in interval)
1370 * &nadx (<optional return> array of x values in interval)
1371 * &nady (<return> array of derivatives in interval)
1372 * Return: 0 if OK, 1 on error (e.g., if x0 or x1 is outside range)
1373 *
1374 * Notes:
1375 * (1) The values in nax must be sorted in increasing order.
1376 * If they are not sorted, it is done in the interpolation
1377 * step, and a warning is issued.
1378 * (2) Caller should check for valid return.
1379 */
1380 l_int32
numaDifferentiateInterval(NUMA * nax,NUMA * nay,l_float32 x0,l_float32 x1,l_int32 npts,NUMA ** pnadx,NUMA ** pnady)1381 numaDifferentiateInterval(NUMA *nax,
1382 NUMA *nay,
1383 l_float32 x0,
1384 l_float32 x1,
1385 l_int32 npts,
1386 NUMA **pnadx,
1387 NUMA **pnady)
1388 {
1389 l_int32 i, nx, ny;
1390 l_float32 minx, maxx, der, invdel;
1391 l_float32 *fay;
1392 NUMA *nady, *naiy;
1393
1394 PROCNAME("numaDifferentiateInterval");
1395
1396 if (pnadx) *pnadx = NULL;
1397 if (!pnady)
1398 return ERROR_INT("&nady not defined", procName, 1);
1399 *pnady = NULL;
1400 if (!nay)
1401 return ERROR_INT("nay not defined", procName, 1);
1402 if (!nax)
1403 return ERROR_INT("nax not defined", procName, 1);
1404 if (x0 > x1)
1405 return ERROR_INT("x0 > x1", procName, 1);
1406 ny = numaGetCount(nay);
1407 nx = numaGetCount(nax);
1408 if (nx != ny)
1409 return ERROR_INT("nax and nay not same size arrays", procName, 1);
1410 if (ny < 2)
1411 return ERROR_INT("not enough points", procName, 1);
1412 numaGetMin(nax, &minx, NULL);
1413 numaGetMax(nax, &maxx, NULL);
1414 if (x0 < minx || x1 > maxx)
1415 return ERROR_INT("xval is out of bounds", procName, 1);
1416 if (npts < 2)
1417 return ERROR_INT("npts < 2", procName, 1);
1418
1419 /* Generate interpolated array over specified interval */
1420 if (numaInterpolateArbxInterval(nax, nay, L_LINEAR_INTERP, x0, x1,
1421 npts, pnadx, &naiy))
1422 return ERROR_INT("interpolation failed", procName, 1);
1423
1424 nady = numaCreate(npts);
1425 *pnady = nady;
1426 invdel = 0.5 * ((l_float32)npts - 1.0) / (x1 - x0);
1427 fay = numaGetFArray(naiy, L_NOCOPY);
1428
1429 /* Compute and save derivatives */
1430 der = 0.5 * invdel * (fay[1] - fay[0]);
1431 numaAddNumber(nady, der);
1432 for (i = 1; i < npts - 1; i++) {
1433 der = invdel * (fay[i + 1] - fay[i - 1]);
1434 numaAddNumber(nady, der);
1435 }
1436 der = 0.5 * invdel * (fay[npts - 1] - fay[npts - 2]);
1437 numaAddNumber(nady, der);
1438
1439 numaDestroy(&naiy);
1440 return 0;
1441 }
1442
1443
1444 /*!
1445 * numaIntegrateInterval()
1446 *
1447 * Input: nax (numa of abscissa values)
1448 * nay (numa of ordinate values, corresponding to nax)
1449 * x0 (start value of interval)
1450 * x1 (end value of interval)
1451 * npts (number of points to evaluate function in interval)
1452 * &sum (<return> integral of function over interval)
1453 * Return: 0 if OK, 1 on error (e.g., if x0 or x1 is outside range)
1454 *
1455 * Notes:
1456 * (1) The values in nax must be sorted in increasing order.
1457 * If they are not sorted, it is done in the interpolation
1458 * step, and a warning is issued.
1459 * (2) Caller should check for valid return.
1460 */
1461 l_int32
numaIntegrateInterval(NUMA * nax,NUMA * nay,l_float32 x0,l_float32 x1,l_int32 npts,l_float32 * psum)1462 numaIntegrateInterval(NUMA *nax,
1463 NUMA *nay,
1464 l_float32 x0,
1465 l_float32 x1,
1466 l_int32 npts,
1467 l_float32 *psum)
1468 {
1469 l_int32 i, nx, ny;
1470 l_float32 minx, maxx, sum, del;
1471 l_float32 *fay;
1472 NUMA *naiy;
1473
1474 PROCNAME("numaIntegrateInterval");
1475
1476 if (!psum)
1477 return ERROR_INT("&sum not defined", procName, 1);
1478 *psum = 0.0;
1479 if (!nay)
1480 return ERROR_INT("nay not defined", procName, 1);
1481 if (!nax)
1482 return ERROR_INT("nax not defined", procName, 1);
1483 if (x0 > x1)
1484 return ERROR_INT("x0 > x1", procName, 1);
1485 if (npts < 2)
1486 return ERROR_INT("npts < 2", procName, 1);
1487 ny = numaGetCount(nay);
1488 nx = numaGetCount(nax);
1489 if (nx != ny)
1490 return ERROR_INT("nax and nay not same size arrays", procName, 1);
1491 if (ny < 2)
1492 return ERROR_INT("not enough points", procName, 1);
1493 numaGetMin(nax, &minx, NULL);
1494 numaGetMax(nax, &maxx, NULL);
1495 if (x0 < minx || x1 > maxx)
1496 return ERROR_INT("xval is out of bounds", procName, 1);
1497
1498 /* Generate interpolated array over specified interval */
1499 if (numaInterpolateArbxInterval(nax, nay, L_LINEAR_INTERP, x0, x1,
1500 npts, NULL, &naiy))
1501 return ERROR_INT("interpolation failed", procName, 1);
1502
1503 del = (x1 - x0) / ((l_float32)npts - 1.0);
1504 fay = numaGetFArray(naiy, L_NOCOPY);
1505
1506 /* Compute integral (simple trapezoid) */
1507 sum = 0.5 * (fay[0] + fay[npts - 1]);
1508 for (i = 1; i < npts - 1; i++)
1509 sum += fay[i];
1510 *psum = del * sum;
1511
1512 numaDestroy(&naiy);
1513 return 0;
1514 }
1515
1516
1517 /*----------------------------------------------------------------------*
1518 * Sorting *
1519 *----------------------------------------------------------------------*/
1520 /*!
1521 * numaSort()
1522 *
1523 * Input: naout (output numa; can be NULL or equal to nain)
1524 * nain (input numa)
1525 * sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
1526 * Return: naout (output sorted numa), or null on error
1527 *
1528 * Notes:
1529 * (1) Set naout = nain for in-place; otherwise, set naout = NULL.
1530 * (2) Source: Shell sort, modified from K&R, 2nd edition, p.62.
1531 * Slow but simple O(n logn) sort.
1532 */
1533 NUMA *
numaSort(NUMA * naout,NUMA * nain,l_int32 sortorder)1534 numaSort(NUMA *naout,
1535 NUMA *nain,
1536 l_int32 sortorder)
1537 {
1538 l_int32 i, n, gap, j;
1539 l_float32 tmp;
1540 l_float32 *array;
1541
1542 PROCNAME("numaSort");
1543
1544 if (!nain)
1545 return (NUMA *)ERROR_PTR("nain not defined", procName, NULL);
1546
1547 /* Make naout if necessary; otherwise do in-place */
1548 if (!naout)
1549 naout = numaCopy(nain);
1550 else if (nain != naout)
1551 return (NUMA *)ERROR_PTR("invalid: not in-place", procName, NULL);
1552 array = naout->array; /* operate directly on the array */
1553 n = numaGetCount(naout);
1554
1555 /* Shell sort */
1556 for (gap = n/2; gap > 0; gap = gap / 2) {
1557 for (i = gap; i < n; i++) {
1558 for (j = i - gap; j >= 0; j -= gap) {
1559 if ((sortorder == L_SORT_INCREASING &&
1560 array[j] > array[j + gap]) ||
1561 (sortorder == L_SORT_DECREASING &&
1562 array[j] < array[j + gap]))
1563 {
1564 tmp = array[j];
1565 array[j] = array[j + gap];
1566 array[j + gap] = tmp;
1567 }
1568 }
1569 }
1570 }
1571
1572 return naout;
1573 }
1574
1575
1576 /*!
1577 * numaGetSortIndex()
1578 *
1579 * Input: na
1580 * sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
1581 * Return: na giving an array of indices that would sort
1582 * the input array, or null on error
1583 */
1584 NUMA *
numaGetSortIndex(NUMA * na,l_int32 sortorder)1585 numaGetSortIndex(NUMA *na,
1586 l_int32 sortorder)
1587 {
1588 l_int32 i, n, gap, j;
1589 l_float32 tmp;
1590 l_float32 *array; /* copy of input array */
1591 l_float32 *iarray; /* array of indices */
1592 NUMA *naisort;
1593
1594 PROCNAME("numaGetSortIndex");
1595
1596 if (!na)
1597 return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
1598 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
1599 return (NUMA *)ERROR_PTR("invalid sortorder", procName, NULL);
1600
1601 n = numaGetCount(na);
1602 if ((array = numaGetFArray(na, L_COPY)) == NULL)
1603 return (NUMA *)ERROR_PTR("array not made", procName, NULL);
1604 if ((iarray = (l_float32 *)CALLOC(n, sizeof(l_float32))) == NULL)
1605 return (NUMA *)ERROR_PTR("iarray not made", procName, NULL);
1606 for (i = 0; i < n; i++)
1607 iarray[i] = i;
1608
1609 /* Shell sort */
1610 for (gap = n/2; gap > 0; gap = gap / 2) {
1611 for (i = gap; i < n; i++) {
1612 for (j = i - gap; j >= 0; j -= gap) {
1613 if ((sortorder == L_SORT_INCREASING &&
1614 array[j] > array[j + gap]) ||
1615 (sortorder == L_SORT_DECREASING &&
1616 array[j] < array[j + gap]))
1617 {
1618 tmp = array[j];
1619 array[j] = array[j + gap];
1620 array[j + gap] = tmp;
1621 tmp = iarray[j];
1622 iarray[j] = iarray[j + gap];
1623 iarray[j + gap] = tmp;
1624 }
1625 }
1626 }
1627 }
1628
1629 naisort = numaCreate(n);
1630 for (i = 0; i < n; i++)
1631 numaAddNumber(naisort, iarray[i]);
1632
1633 FREE(array);
1634 FREE(iarray);
1635 return naisort;
1636 }
1637
1638
1639 /*!
1640 * numaSortByIndex()
1641 *
1642 * Input: nas
1643 * naindex (na that maps from the new numa to the input numa)
1644 * Return: nad (sorted), or null on error
1645 */
1646 NUMA *
numaSortByIndex(NUMA * nas,NUMA * naindex)1647 numaSortByIndex(NUMA *nas,
1648 NUMA *naindex)
1649 {
1650 l_int32 i, n, index;
1651 l_float32 val;
1652 NUMA *nad;
1653
1654 PROCNAME("numaSortByIndex");
1655
1656 if (!nas)
1657 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
1658 if (!naindex)
1659 return (NUMA *)ERROR_PTR("naindex not defined", procName, NULL);
1660
1661 n = numaGetCount(nas);
1662 nad = numaCreate(n);
1663 for (i = 0; i < n; i++) {
1664 numaGetIValue(naindex, i, &index);
1665 numaGetFValue(nas, index, &val);
1666 numaAddNumber(nad, val);
1667 }
1668
1669 return nad;
1670 }
1671
1672
1673 /*!
1674 * numaIsSorted()
1675 *
1676 * Input: nas
1677 * sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
1678 * &sorted (<return> 1 if sorted; 0 if not)
1679 * Return: 1 if OK; 0 on error
1680 *
1681 * Notes:
1682 * (1) This is a quick O(n) test if nas is sorted. It is useful
1683 * in situations where the array is likely to be already
1684 * sorted, and a sort operation can be avoided.
1685 */
1686 l_int32
numaIsSorted(NUMA * nas,l_int32 sortorder,l_int32 * psorted)1687 numaIsSorted(NUMA *nas,
1688 l_int32 sortorder,
1689 l_int32 *psorted)
1690 {
1691 l_int32 i, n;
1692 l_float32 preval, val;
1693
1694 PROCNAME("numaIsSorted");
1695
1696 if (!psorted)
1697 return ERROR_INT("&sorted not defined", procName, 1);
1698 *psorted = FALSE;
1699 if (!nas)
1700 return ERROR_INT("nas not defined", procName, 1);
1701 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
1702 return ERROR_INT("invalid sortorder", procName, 1);
1703
1704 n = numaGetCount(nas);
1705 numaGetFValue(nas, 0, &preval);
1706 for (i = 1; i < n; i++) {
1707 numaGetFValue(nas, i, &val);
1708 if ((sortorder == L_SORT_INCREASING && val < preval) ||
1709 (sortorder == L_SORT_DECREASING && val > preval))
1710 return 0;
1711 }
1712
1713 *psorted = TRUE;
1714 return 0;
1715 }
1716
1717
1718 /*!
1719 * numaSortPair()
1720 *
1721 * Input: nax, nay (input arrays)
1722 * sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
1723 * &nasx (<return> sorted)
1724 * &naxy (<return> sorted exactly in order of nasx)
1725 * Return: 0 if OK, 1 on error
1726 *
1727 * Notes:
1728 * (1) This function sorts the two input arrays, nax and nay,
1729 * together, using nax as the key for sorting.
1730 */
1731 l_int32
numaSortPair(NUMA * nax,NUMA * nay,l_int32 sortorder,NUMA ** pnasx,NUMA ** pnasy)1732 numaSortPair(NUMA *nax,
1733 NUMA *nay,
1734 l_int32 sortorder,
1735 NUMA **pnasx,
1736 NUMA **pnasy)
1737 {
1738 l_int32 sorted;
1739 NUMA *naindex;
1740
1741 PROCNAME("numaSortPair");
1742
1743 if (!pnasx)
1744 return ERROR_INT("&nasx not defined", procName, 1);
1745 if (!pnasy)
1746 return ERROR_INT("&nasy not defined", procName, 1);
1747 *pnasx = *pnasy = NULL;
1748 if (!nax)
1749 return ERROR_INT("nax not defined", procName, 1);
1750 if (!nay)
1751 return ERROR_INT("nay not defined", procName, 1);
1752 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
1753 return ERROR_INT("invalid sortorder", procName, 1);
1754
1755 numaIsSorted(nax, sortorder, &sorted);
1756 if (sorted == TRUE) {
1757 *pnasx = numaCopy(nax);
1758 *pnasy = numaCopy(nay);
1759 }
1760 else {
1761 naindex = numaGetSortIndex(nax, sortorder);
1762 *pnasx = numaSortByIndex(nax, naindex);
1763 *pnasy = numaSortByIndex(nay, naindex);
1764 numaDestroy(&naindex);
1765 }
1766
1767 return 0;
1768 }
1769
1770
1771 /*!
1772 * numaPseudorandomSequence()
1773 *
1774 * Input: size (of sequence)
1775 * seed (prime number; use 0 for default)
1776 * Return: na (pseudorandom on {0,...,size - 1}), or null on error
1777 *
1778 * Notes:
1779 * (1) Result is a permutation of the sequence of integers
1780 * from 0 to size - 1, where (seed % size) is repeatedly
1781 * added to the previous result, and the result is taken mod size.
1782 * This is not particularly random!
1783 */
1784 NUMA *
numaPseudorandomSequence(l_int32 size,l_int32 seed)1785 numaPseudorandomSequence(l_int32 size,
1786 l_int32 seed)
1787 {
1788 l_int32 i, val;
1789 NUMA *na;
1790
1791 PROCNAME("numaPseudorandomSequence");
1792
1793 if (size <= 0)
1794 return (NUMA *)ERROR_PTR("size <= 0", procName, NULL);
1795 if (seed == 0)
1796 seed = 165653;
1797
1798 if ((na = numaCreate(size)) == NULL)
1799 return (NUMA *)ERROR_PTR("na not made", procName, NULL);
1800 val = seed / 7;
1801 for (i = 0; i < size; i++) {
1802 val = (val + seed) % size;
1803 numaAddNumber(na, val);
1804 }
1805
1806 return na;
1807 }
1808
1809
1810 /*----------------------------------------------------------------------*
1811 * Functions requiring sorting *
1812 *----------------------------------------------------------------------*/
1813 /*!
1814 * numaGetMedian()
1815 *
1816 * Input: na
1817 * &val (<return> median val)
1818 * Return: 0 if OK; 1 on error
1819 *
1820 * Notes:
1821 * (1) Computes the median value of the numbers in the numa, by
1822 * sorting and finding the middle value in the sorted array.
1823 */
1824 l_int32
numaGetMedian(NUMA * na,l_float32 * pval)1825 numaGetMedian(NUMA *na,
1826 l_float32 *pval)
1827 {
1828 l_int32 n;
1829 NUMA *nasort;
1830
1831 PROCNAME("numaGetMedian");
1832
1833 if (!na)
1834 return ERROR_INT("na not defined", procName, 1);
1835 if (!pval)
1836 return ERROR_INT("&val not defined", procName, 1);
1837 *pval = 0.0; /* init */
1838
1839 n = numaGetCount(na);
1840 if (n == 0)
1841 return 1;
1842 if ((nasort = numaSort(NULL, na, L_SORT_DECREASING)) == NULL)
1843 return ERROR_INT("nasort not made", procName, 1);
1844 numaGetFValue(nasort, n / 2, pval);
1845
1846 numaDestroy(&nasort);
1847 return 0;
1848 }
1849
1850
1851 /*!
1852 * numaGetMode()
1853 *
1854 * Input: na
1855 * &val (<return> mode val)
1856 * &count (<optional return> mode count)
1857 * Return: 0 if OK; 1 on error
1858 *
1859 * Notes:
1860 * (1) Computes the mode value of the numbers in the numa, by
1861 * sorting and finding the value of the number with the
1862 * largest count.
1863 * (2) Optionally, also returns that count.
1864 */
1865 l_int32
numaGetMode(NUMA * na,l_float32 * pval,l_int32 * pcount)1866 numaGetMode(NUMA *na,
1867 l_float32 *pval,
1868 l_int32 *pcount)
1869 {
1870 l_int32 i, n, maxcount, prevcount;
1871 l_float32 val, maxval, prevval;
1872 l_float32 *array;
1873 NUMA *nasort;
1874
1875 PROCNAME("numaGetMode");
1876
1877 if (!na)
1878 return ERROR_INT("na not defined", procName, 1);
1879 if (!pval)
1880 return ERROR_INT("&val not defined", procName, 1);
1881
1882 *pval = 0.0;
1883 if (pcount) *pcount = 0;
1884 if ((n = numaGetCount(na)) == 0)
1885 return 1;
1886
1887 if ((nasort = numaSort(NULL, na, L_SORT_DECREASING)) == NULL)
1888 return ERROR_INT("nas not made", procName, 1);
1889 array = numaGetFArray(nasort, L_NOCOPY);
1890
1891 /* Initialize with array[0] */
1892 prevval = array[0];
1893 prevcount = 1;
1894 maxval = prevval;
1895 maxcount = prevcount;
1896
1897 /* Scan the sorted array, aggregating duplicates */
1898 for (i = 1; i < n; i++) {
1899 val = array[i];
1900 if (val == prevval)
1901 prevcount++;
1902 else { /* new value */
1903 if (prevcount > maxcount) { /* new max */
1904 maxcount = prevcount;
1905 maxval = prevval;
1906 }
1907 prevval = val;
1908 prevcount = 1;
1909 }
1910 }
1911
1912 /* Was the mode the last run of elements? */
1913 if (prevcount > maxcount) {
1914 maxcount = prevcount;
1915 maxval = prevval;
1916 }
1917
1918 *pval = maxval;
1919 if (pcount)
1920 *pcount = maxcount;
1921
1922 numaDestroy(&nasort);
1923 return 0;
1924 }
1925
1926
1927 /*----------------------------------------------------------------------*
1928 * Numa combination *
1929 *----------------------------------------------------------------------*/
1930 /*!
1931 * numaJoin()
1932 *
1933 * Input: nad (dest numa; add to this one)
1934 * nas (<optional> source numa; add from this one)
1935 * istart (starting index in nas)
1936 * iend (ending index in nas; use 0 to cat all)
1937 * Return: 0 if OK, 1 on error
1938 *
1939 * Notes:
1940 * (1) istart < 0 is taken to mean 'read from the start' (istart = 0)
1941 * (2) iend <= 0 means 'read to the end'
1942 * (3) if nas == NULL, this is a no-op
1943 */
1944 l_int32
numaJoin(NUMA * nad,NUMA * nas,l_int32 istart,l_int32 iend)1945 numaJoin(NUMA *nad,
1946 NUMA *nas,
1947 l_int32 istart,
1948 l_int32 iend)
1949 {
1950 l_int32 ns, i;
1951 l_float32 val;
1952
1953 PROCNAME("numaJoin");
1954
1955 if (!nad)
1956 return ERROR_INT("nad not defined", procName, 1);
1957 if (!nas)
1958 return 0;
1959 ns = numaGetCount(nas);
1960 if (istart < 0)
1961 istart = 0;
1962 if (istart >= ns)
1963 return ERROR_INT("istart out of bounds", procName, 1);
1964 if (iend <= 0)
1965 iend = ns - 1;
1966 if (iend >= ns)
1967 return ERROR_INT("iend out of bounds", procName, 1);
1968 if (istart > iend)
1969 return ERROR_INT("istart > iend; nothing to add", procName, 1);
1970
1971 for (i = istart; i <= iend; i++) {
1972 numaGetFValue(nas, i, &val);
1973 numaAddNumber(nad, val);
1974 }
1975
1976 return 0;
1977 }
1978
1979
1980 /*!
1981 * numaaFlattenToNuma()
1982 *
1983 * Input: numaa
1984 * Return: numa, or null on error
1985 *
1986 * Notes:
1987 * (1) This 'flattens' the Numaa to a Numa, by joining successively
1988 * each Numa in the Numaa.
1989 * (2) It doesn't make any assumptions about the location of the
1990 * Numas in the Numaa array, unlike most Numaa functions.
1991 * (3) It leaves the input Numaa unchanged.
1992 */
1993 NUMA *
numaaFlattenToNuma(NUMAA * naa)1994 numaaFlattenToNuma(NUMAA *naa)
1995 {
1996 l_int32 i, nalloc;
1997 NUMA *na, *nad;
1998 NUMA **array;
1999
2000 PROCNAME("numaaFlattenToNuma");
2001
2002 if (!naa)
2003 return (NUMA *)ERROR_PTR("naa not defined", procName, NULL);
2004
2005 nalloc = naa->nalloc;
2006 array = numaaGetPtrArray(naa);
2007 nad = numaCreate(0);
2008 for (i = 0; i < nalloc; i++) {
2009 na = array[i];
2010 if (!na) continue;
2011 numaJoin(nad, na, 0, 0);
2012 }
2013
2014 return nad;
2015 }
2016
2017
2018