1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 - This software is distributed in the hope that it will be
4 - useful, but with NO WARRANTY OF ANY KIND.
5 - No author or distributor accepts responsibility to anyone for the
6 - consequences of using this software, or for whether it serves any
7 - particular purpose or works at all, unless he or she says so in
8 - writing. Everyone is granted permission to copy, modify and
9 - redistribute this source code, for commercial or non-commercial
10 - purposes, with the following restrictions: (1) the origin of this
11 - source code must not be misrepresented; (2) modified versions must
12 - be plainly marked as such; and (3) this notice may not be removed
13 - or altered from any source or modified source distribution.
14 *====================================================================*/
15
16
17 /*
18 * sarray.c
19 *
20 * Create/Destroy/Copy
21 * SARRAY *sarrayCreate()
22 * SARRAY *sarrayCreateWordsFromString()
23 * SARRAY *sarrayCreateLinesFromString()
24 * void *sarrayDestroy()
25 * SARRAY *sarrayCopy()
26 * SARRAY *sarrayClone()
27 *
28 * Add/Remove string
29 * l_int32 sarrayAddString()
30 * l_int32 sarrayExtendArray()
31 * char *sarrayRemoveString()
32 * l_int32 sarrayClear()
33 *
34 * Accessors
35 * l_int32 sarrayGetCount()
36 * char **sarrayGetArray()
37 * char *sarrayGetString()
38 * l_int32 sarrayGetRefcount()
39 * l_int32 sarrayChangeRefcount()
40 *
41 * Conversion back to string
42 * char *sarrayToString()
43 * char *sarrayToStringRange()
44 *
45 * Concatenate 2 sarrays
46 * l_int32 sarrayConcatenate()
47 * l_int32 sarrayAppendRange()
48 *
49 * Convert word sarray to (formatted) line sarray
50 * SARRAY *sarrayConvertWordsToLines()
51 *
52 * Split string on separator list
53 * SARRAY *sarraySplitString()
54 *
55 * Filter sarray
56 * SARRAY *sarraySelectBySubstring()
57 * l_int32 sarrayParseRange()
58 *
59 * Sort
60 * SARRAY *sarraySort()
61 * l_int32 stringCompareLexical()
62 *
63 * Serialize for I/O
64 * SARRAY *sarrayRead()
65 * SARRAY *sarrayReadStream()
66 * l_int32 sarrayWrite()
67 * l_int32 sarrayWriteStream()
68 * l_int32 sarrayAppend()
69 *
70 * Directory filenames
71 * SARRAY *getSortedPathnamesInDirectory()
72 * SARRAY *getFilenamesInDirectory()
73 *
74 *
75 * Comments on usage:
76 *
77 * These functions are important for efficient manipulation
78 * of string data. They have been used in leptonica for
79 * generating and parsing text files, and for generating
80 * code for compilation. The user is responsible for
81 * correctly disposing of strings that have been extracted
82 * from sarrays.
83 *
84 * - When you want a string from an Sarray to inspect it, or
85 * plan to make a copy of it later, use sarrayGetString()
86 * with copyflag = 0. In this case, you must neither free
87 * the string nor put it directly in another array.
88 * We provide the copyflag constant L_NOCOPY, which is 0,
89 * for this purpose:
90 * str-not-owned = sarrayGetString(sa, index, L_NOCOPY);
91 * To extract a copy of a string, use:
92 * str-owned = sarrayGetString(sa, index, L_COPY);
93 *
94 * - When you want to insert a string that is in one
95 * array into another array (always leaving the first
96 * array intact), you have two options:
97 * (1) use copyflag = L_COPY to make an immediate copy,
98 * which you must then add to the second array
99 * by insertion; namely,
100 * str-owned = sarrayGetString(sa, index, L_COPY);
101 * sarrayAddString(sa, str-owned, L_INSERT);
102 * (2) use copyflag = L_NOCOPY to get another handle to
103 * the string, in which case you must add
104 * a copy of it to the second string array:
105 * str-not-owned = sarrayGetString(sa, index, L_NOCOPY);
106 * sarrayAddString(sa, str-not-owned, L_COPY).
107 *
108 * In all cases, when you use copyflag = L_COPY to extract
109 * a string from an array, you must either free it
110 * or insert it in an array that will be freed later.
111 */
112
113 #include <stdio.h>
114 #include <string.h>
115 #include <stdlib.h>
116 #ifndef COMPILER_MSVC
117 #include <dirent.h> /* unix only */
118 #endif /* !COMPILER_MSVC */
119 #include "allheaders.h"
120
121 static const l_int32 INITIAL_PTR_ARRAYSIZE = 50; /* n'importe quoi */
122 static const l_int32 L_BUF_SIZE = 512;
123
124
125 /*--------------------------------------------------------------------------*
126 * String array create/destroy/copy/extend *
127 *--------------------------------------------------------------------------*/
128 /*!
129 * sarrayCreate()
130 *
131 * Input: size of string ptr array to be alloc'd
132 * (use 0 for default)
133 * Return: sarray, or null on error
134 */
135 SARRAY *
sarrayCreate(l_int32 n)136 sarrayCreate(l_int32 n)
137 {
138 SARRAY *sa;
139
140 PROCNAME("sarrayCreate");
141
142 if (n <= 0)
143 n = INITIAL_PTR_ARRAYSIZE;
144
145 if ((sa = (SARRAY *)CALLOC(1, sizeof(SARRAY))) == NULL)
146 return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
147 if ((sa->array = (char **)CALLOC(n, sizeof(char *))) == NULL)
148 return (SARRAY *)ERROR_PTR("ptr array not made", procName, NULL);
149
150 sa->nalloc = n;
151 sa->n = 0;
152 sa->refcount = 1;
153 return sa;
154 }
155
156
157 /*!
158 * sarrayCreateWordsFromString()
159 *
160 * Input: string
161 * Return: sarray, or null on error
162 *
163 * Notes:
164 * (1) This finds the number of word substrings, creates an sarray
165 * of this size, and puts copies of each substring into the sarray.
166 */
167 SARRAY *
sarrayCreateWordsFromString(const char * string)168 sarrayCreateWordsFromString(const char *string)
169 {
170 char separators[] = " \n\t";
171 l_int32 i, nsub, size, inword;
172 SARRAY *sa;
173
174 PROCNAME("sarrayCreateWordsFromString");
175
176 if (!string)
177 return (SARRAY *)ERROR_PTR("textstr not defined", procName, NULL);
178
179 /* Find the number of words */
180 size = strlen(string);
181 nsub = 0;
182 inword = FALSE;
183 for (i = 0; i < size; i++) {
184 if (inword == FALSE &&
185 (string[i] != ' ' && string[i] != '\t' && string[i] != '\n')) {
186 inword = TRUE;
187 nsub++;
188 }
189 else if (inword == TRUE &&
190 (string[i] == ' ' || string[i] == '\t' || string[i] == '\n')) {
191 inword = FALSE;
192 }
193 }
194
195 if ((sa = sarrayCreate(nsub)) == NULL)
196 return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
197 sarraySplitString(sa, string, separators);
198
199 return sa;
200 }
201
202
203 /*!
204 * sarrayCreateLinesFromString()
205 *
206 * Input: string
207 * blankflag (0 to exclude blank lines; 1 to include)
208 * Return: sarray, or null on error
209 *
210 * Notes:
211 * (1) This finds the number of line substrings, creates an sarray of
212 * this size, and puts copies of each substring into the sarray.
213 */
214 SARRAY *
sarrayCreateLinesFromString(char * string,l_int32 blankflag)215 sarrayCreateLinesFromString(char *string,
216 l_int32 blankflag)
217 {
218 l_int32 i, nsub, size, startptr;
219 char *cstring, *substring;
220 SARRAY *sa;
221
222 PROCNAME("sarrayCreateLinesFromString");
223
224 if (!string)
225 return (SARRAY *)ERROR_PTR("textstr not defined", procName, NULL);
226
227 /* find the number of lines */
228 size = strlen(string);
229 nsub = 0;
230 for (i = 0; i < size; i++) {
231 if (string[i] == '\n')
232 nsub++;
233 }
234
235 if ((sa = sarrayCreate(nsub)) == NULL)
236 return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
237
238 if (blankflag) { /* keep blank lines as null strings */
239 /* Make a copy for munging */
240 if ((cstring = stringNew(string)) == NULL)
241 return (SARRAY *)ERROR_PTR("cstring not made", procName, NULL);
242 /* We'll insert nulls like strtok */
243 startptr = 0;
244 for (i = 0; i < size; i++) {
245 if (cstring[i] == '\n') {
246 cstring[i] = '\0';
247 if ((substring = stringNew(cstring + startptr)) == NULL)
248 return (SARRAY *)ERROR_PTR("substring not made",
249 procName, NULL);
250 sarrayAddString(sa, substring, L_INSERT);
251 /* fprintf(stderr, "substring = %s\n", substring); */
252 startptr = i + 1;
253 }
254 }
255 if (startptr < size) { /* no newline at end of last line */
256 if ((substring = stringNew(cstring + startptr)) == NULL)
257 return (SARRAY *)ERROR_PTR("substring not made",
258 procName, NULL);
259 sarrayAddString(sa, substring, L_INSERT);
260 /* fprintf(stderr, "substring = %s\n", substring); */
261 }
262 FREE(cstring);
263 }
264 else { /* remove blank lines; use strtok */
265 sarraySplitString(sa, string, "\n");
266 }
267
268 return sa;
269 }
270
271
272 /*!
273 * sarrayDestroy()
274 *
275 * Input: &sarray <to be nulled>
276 * Return: void
277 *
278 * Notes:
279 * (1) Decrements the ref count and, if 0, destroys the sarray.
280 * (2) Always nulls the input ptr.
281 */
282 void
sarrayDestroy(SARRAY ** psa)283 sarrayDestroy(SARRAY **psa)
284 {
285 l_int32 i;
286 SARRAY *sa;
287
288 PROCNAME("sarrayDestroy");
289
290 if (psa == NULL) {
291 L_WARNING("ptr address is NULL!", procName);
292 return;
293 }
294 if ((sa = *psa) == NULL)
295 return;
296
297 sarrayChangeRefcount(sa, -1);
298 if (sarrayGetRefcount(sa) <= 0) {
299 if (sa->array) {
300 for (i = 0; i < sa->n; i++)
301 FREE(sa->array[i]);
302 FREE(sa->array);
303 }
304 FREE(sa);
305 }
306
307 *psa = NULL;
308 return;
309 }
310
311
312 /*!
313 * sarrayCopy()
314 *
315 * Input: sarray
316 * Return: copy of sarray, or null on error
317 */
318 SARRAY *
sarrayCopy(SARRAY * sa)319 sarrayCopy(SARRAY *sa)
320 {
321 l_int32 i;
322 SARRAY *csa;
323
324 PROCNAME("sarrayCopy");
325
326 if (!sa)
327 return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL);
328
329 if ((csa = sarrayCreate(sa->nalloc)) == NULL)
330 return (SARRAY *)ERROR_PTR("csa not made", procName, NULL);
331
332 for (i = 0; i < sa->n; i++)
333 sarrayAddString(csa, sa->array[i], L_COPY);
334
335 return csa;
336 }
337
338
339 /*!
340 * sarrayClone()
341 *
342 * Input: sarray
343 * Return: ptr to same sarray, or null on error
344 */
345 SARRAY *
sarrayClone(SARRAY * sa)346 sarrayClone(SARRAY *sa)
347 {
348 PROCNAME("sarrayClone");
349
350 if (!sa)
351 return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL);
352 sarrayChangeRefcount(sa, 1);
353 return sa;
354 }
355
356
357 /*!
358 * sarrayAddString()
359 *
360 * Input: sarray
361 * string (string to be added)
362 * copyflag (L_INSERT, L_COPY)
363 * Return: 0 if OK, 1 on error
364 *
365 * Notes:
366 * (1) Legacy usage decrees that we always use 0 to insert a string
367 * directly and 1 to insert a copy of the string. The
368 * enums for L_INSERT and L_COPY agree with this convention,
369 * and will not change in the future.
370 * (2) See usage comments at the top of this file.
371 */
372 l_int32
sarrayAddString(SARRAY * sa,char * string,l_int32 copyflag)373 sarrayAddString(SARRAY *sa,
374 char *string,
375 l_int32 copyflag)
376 {
377 l_int32 n;
378
379 PROCNAME("sarrayAddString");
380
381 if (!sa)
382 return ERROR_INT("sa not defined", procName, 1);
383 if (!string)
384 return ERROR_INT("string not defined", procName, 1);
385 if (copyflag != L_INSERT && copyflag != L_COPY)
386 return ERROR_INT("invalid copyflag", procName, 1);
387
388 n = sarrayGetCount(sa);
389 if (n >= sa->nalloc)
390 sarrayExtendArray(sa);
391
392 if (copyflag == L_INSERT)
393 sa->array[n] = string;
394 else /* L_COPY */
395 sa->array[n] = stringNew(string);
396 sa->n++;
397
398 return 0;
399 }
400
401
402 /*!
403 * sarrayExtendArray()
404 *
405 * Input: sarray
406 * Return: 0 if OK, 1 on error
407 */
408 l_int32
sarrayExtendArray(SARRAY * sa)409 sarrayExtendArray(SARRAY *sa)
410 {
411 PROCNAME("sarrayExtendArray");
412
413 if (!sa)
414 return ERROR_INT("sa not defined", procName, 1);
415
416 if ((sa->array = (char **)reallocNew((void **)&sa->array,
417 sizeof(char *) * sa->nalloc,
418 2 * sizeof(char *) * sa->nalloc)) == NULL)
419 return ERROR_INT("new ptr array not returned", procName, 1);
420
421 sa->nalloc *= 2;
422 return 0;
423 }
424
425
426 /*!
427 * sarrayRemoveString()
428 *
429 * Input: sarray
430 * index (of string within sarray)
431 * Return: removed string, or null on error
432 */
433 char *
sarrayRemoveString(SARRAY * sa,l_int32 index)434 sarrayRemoveString(SARRAY *sa,
435 l_int32 index)
436 {
437 char *string;
438 char **array;
439 l_int32 i, n, nalloc;
440
441 PROCNAME("sarrayRemoveString");
442
443 if (!sa)
444 return (char *)ERROR_PTR("sa not defined", procName, NULL);
445
446 if ((array = sarrayGetArray(sa, &nalloc, &n)) == NULL)
447 return (char *)ERROR_PTR("array not returned", procName, NULL);
448
449 if (index < 0 || index >= n)
450 return (char *)ERROR_PTR("array index out of bounds", procName, NULL);
451
452 string = array[index];
453
454 /* If removed string is not at end of array, shift
455 * to fill in, maintaining original ordering.
456 * Note: if we didn't care about the order, we could
457 * put the last string array[n - 1] directly into the hole. */
458 for (i = index; i < n - 1; i++)
459 array[i] = array[i + 1];
460
461 sa->n--;
462 return string;
463 }
464
465
466 /*!
467 * sarrayClear()
468 *
469 * Input: sarray
470 * Return: 0 if OK; 1 on error
471 */
472 l_int32
sarrayClear(SARRAY * sa)473 sarrayClear(SARRAY *sa)
474 {
475 l_int32 i;
476
477 PROCNAME("sarrayClear");
478
479 if (!sa)
480 return ERROR_INT("sa not defined", procName, 1);
481 for (i = 0; i < sa->n; i++) { /* free strings and null ptrs */
482 FREE(sa->array[i]);
483 sa->array[i] = NULL;
484 }
485 sa->n = 0;
486 return 0;
487 }
488
489
490 /*----------------------------------------------------------------------*
491 * Accessors *
492 *----------------------------------------------------------------------*/
493 /*!
494 * sarrayGetCount()
495 *
496 * Input: sarray
497 * Return: count, or 0 if no strings or on error
498 */
499 l_int32
sarrayGetCount(SARRAY * sa)500 sarrayGetCount(SARRAY *sa)
501 {
502 PROCNAME("sarrayGetCount");
503
504 if (!sa)
505 return ERROR_INT("sa not defined", procName, 0);
506 return sa->n;
507 }
508
509
510 /*!
511 * sarrayGetArray()
512 *
513 * Input: sarray
514 * &nalloc (<optional return> number allocated string ptrs)
515 * &n (<optional return> number allocated strings)
516 * Return: ptr to string array, or null on error
517 *
518 * Notes:
519 * (1) Caution: the returned array is not a copy, so caller
520 * must not destroy it!
521 */
522 char **
sarrayGetArray(SARRAY * sa,l_int32 * pnalloc,l_int32 * pn)523 sarrayGetArray(SARRAY *sa,
524 l_int32 *pnalloc,
525 l_int32 *pn)
526 {
527 char **array;
528
529 PROCNAME("sarrayGetArray");
530
531 if (!sa)
532 return (char **)ERROR_PTR("sa not defined", procName, NULL);
533
534 array = sa->array;
535 if (pnalloc) *pnalloc = sa->nalloc;
536 if (pn) *pn = sa->n;
537
538 return array;
539 }
540
541
542 /*!
543 * sarrayGetString()
544 *
545 * Input: sarray
546 * index (to the index-th string)
547 * copyflag (L_NOCOPY or L_COPY)
548 * Return: string, or null on error
549 *
550 * Notes:
551 * (1) Legacy usage decrees that we always use 0 to get the
552 * pointer to the string itself, and 1 to get a copy of
553 * the string.
554 * (2) See usage comments at the top of this file.
555 * (3) To get a pointer to the string itself, use for copyflag:
556 * L_NOCOPY or 0 or FALSE
557 * To get a copy of the string, use for copyflag:
558 * L_COPY or 1 or TRUE
559 * The const values of L_NOCOPY and L_COPY are guaranteed not
560 * to change.
561 */
562 char *
sarrayGetString(SARRAY * sa,l_int32 index,l_int32 copyflag)563 sarrayGetString(SARRAY *sa,
564 l_int32 index,
565 l_int32 copyflag)
566 {
567 PROCNAME("sarrayGetString");
568
569 if (!sa)
570 return (char *)ERROR_PTR("sa not defined", procName, NULL);
571 if (index < 0 || index >= sa->n)
572 return (char *)ERROR_PTR("index not valid", procName, NULL);
573 if (copyflag != L_NOCOPY && copyflag != L_COPY)
574 return (char *)ERROR_PTR("invalid copyflag", procName, NULL);
575
576 if (copyflag == L_NOCOPY)
577 return sa->array[index];
578 else /* L_COPY */
579 return stringNew(sa->array[index]);
580 }
581
582
583 /*!
584 * sarrayGetRefCount()
585 *
586 * Input: sarray
587 * Return: refcount, or UNDEF on error
588 */
589 l_int32
sarrayGetRefcount(SARRAY * sa)590 sarrayGetRefcount(SARRAY *sa)
591 {
592 PROCNAME("sarrayGetRefcount");
593
594 if (!sa)
595 return ERROR_INT("sa not defined", procName, UNDEF);
596 return sa->refcount;
597 }
598
599
600 /*!
601 * sarrayChangeRefCount()
602 *
603 * Input: sarray
604 * delta (change to be applied)
605 * Return: 0 if OK, 1 on error
606 */
607 l_int32
sarrayChangeRefcount(SARRAY * sa,l_int32 delta)608 sarrayChangeRefcount(SARRAY *sa,
609 l_int32 delta)
610 {
611 PROCNAME("sarrayChangeRefcount");
612
613 if (!sa)
614 return ERROR_INT("sa not defined", procName, UNDEF);
615 sa->refcount += delta;
616 return 0;
617 }
618
619
620 /*----------------------------------------------------------------------*
621 * Conversion to string *
622 *----------------------------------------------------------------------*/
623 /*!
624 * sarrayToString()
625 *
626 * Input: sarray
627 * addnlflag (flag: 0 adds nothing to each substring
628 * 1 adds '\n' to each substring
629 * 2 adds ' ' to each substring)
630 * Return: dest string, or null on error
631 *
632 * Notes:
633 * (1) Concatenates all the strings in the sarray, preserving
634 * all white space.
635 * (2) If addnlflag != 0, adds either a '\n' or a ' ' after
636 * each substring.
637 * (3) This function was NOT implemented as:
638 * for (i = 0; i < n; i++)
639 * strcat(dest, sarrayGetString(sa, i, L_NOCOPY));
640 * Do you see why?
641 */
642 char *
sarrayToString(SARRAY * sa,l_int32 addnlflag)643 sarrayToString(SARRAY *sa,
644 l_int32 addnlflag)
645 {
646 PROCNAME("sarrayToString");
647
648 if (!sa)
649 return (char *)ERROR_PTR("sa not defined", procName, NULL);
650
651 return sarrayToStringRange(sa, 0, 0, addnlflag);
652 }
653
654
655 /*!
656 * sarrayToStringRange()
657 *
658 * Input: sarray
659 * first (index of first string to use; starts with 0)
660 * nstrings (number of strings to append into the result; use
661 * 0 to append to the end of the sarray)
662 * addnlflag (flag: 0 adds nothing to each substring
663 * 1 adds '\n' to each substring
664 * 2 adds ' ' to each substring)
665 * Return: dest string, or null on error
666 *
667 * Notes:
668 * (1) Concatenates the specified strings inthe sarray, preserving
669 * all white space.
670 * (2) If addnlflag != 0, adds either a '\n' or a ' ' after
671 * each substring.
672 * (3) If the sarray is empty, this returns a string with just
673 * the character corresponding to @addnlflag.
674 */
675 char *
sarrayToStringRange(SARRAY * sa,l_int32 first,l_int32 nstrings,l_int32 addnlflag)676 sarrayToStringRange(SARRAY *sa,
677 l_int32 first,
678 l_int32 nstrings,
679 l_int32 addnlflag)
680 {
681 char *dest, *src;
682 l_int32 n, i, last, size, index, len;
683
684 PROCNAME("sarrayToStringRange");
685
686 if (!sa)
687 return (char *)ERROR_PTR("sa not defined", procName, NULL);
688 if (addnlflag != 0 && addnlflag != 1 && addnlflag != 2)
689 return (char *)ERROR_PTR("invalid addnlflag", procName, NULL);
690
691 n = sarrayGetCount(sa);
692
693 /* Empty sa; return char corresponding to addnlflag only */
694 if (n == 0) {
695 if (first == 0) {
696 if (addnlflag == 0)
697 return stringNew("");
698 if (addnlflag == 1)
699 return stringNew("\n");
700 else /* addnlflag == 2) */
701 return stringNew(" ");
702 }
703 else
704 return (char *)ERROR_PTR("first not valid", procName, NULL);
705 }
706
707 if (first < 0 || first >= n)
708 return (char *)ERROR_PTR("first not valid", procName, NULL);
709 if (nstrings == 0 || (nstrings > n - first))
710 nstrings = n - first; /* no overflow */
711 last = first + nstrings - 1;
712
713 size = 0;
714 for (i = first; i <= last; i++)
715 size += strlen(sarrayGetString(sa, i, L_NOCOPY)) + 2;
716
717 if ((dest = (char *)CALLOC(size + 1, sizeof(char))) == NULL)
718 return (char *)ERROR_PTR("dest not made", procName, NULL);
719
720 index = 0;
721 for (i = first; i <= last; i++) {
722 src = sa->array[i];
723 len = strlen(src);
724 memcpy(dest + index, src, len);
725 index += len;
726 if (addnlflag == 1) {
727 dest[index] = '\n';
728 index++;
729 }
730 else if (addnlflag == 2) {
731 dest[index] = ' ';
732 index++;
733 }
734 }
735
736 return dest;
737 }
738
739
740 /*----------------------------------------------------------------------*
741 * Concatenate 2 sarrays *
742 *----------------------------------------------------------------------*/
743 /*!
744 * sarrayConcatenate()
745 *
746 * Input: sa1 (to be added to)
747 * sa2 (append to sa1)
748 * Return: 0 if OK, 1 on error
749 *
750 * Notes:
751 * (1) Copies of the strings in sarray2 are added to sarray1.
752 */
753 l_int32
sarrayConcatenate(SARRAY * sa1,SARRAY * sa2)754 sarrayConcatenate(SARRAY *sa1,
755 SARRAY *sa2)
756 {
757 char *str;
758 l_int32 n, i;
759
760 PROCNAME("sarrayConcatenate");
761
762 if (!sa1)
763 return ERROR_INT("sa1 not defined", procName, 1);
764 if (!sa2)
765 return ERROR_INT("sa2 not defined", procName, 1);
766
767 n = sarrayGetCount(sa2);
768 for (i = 0; i < n; i++) {
769 str = sarrayGetString(sa2, i, L_NOCOPY);
770 sarrayAddString(sa1, str, L_COPY);
771 }
772
773 return 0;
774 }
775
776
777 /*!
778 * sarrayAppendRange()
779 *
780 * Input: sa1 (to be added to)
781 * sa2 (append specified range of strings in sa2 to sa1)
782 * start (index of first string of sa2 to append)
783 * end (index of last string of sa2 to append)
784 * Return: 0 if OK, 1 on error
785 *
786 * Notes:
787 * (1) Copies of the strings in sarray2 are added to sarray1.
788 * (2) The [start ... end] range is truncated if necessary.
789 */
790 l_int32
sarrayAppendRange(SARRAY * sa1,SARRAY * sa2,l_int32 start,l_int32 end)791 sarrayAppendRange(SARRAY *sa1,
792 SARRAY *sa2,
793 l_int32 start,
794 l_int32 end)
795 {
796 char *str;
797 l_int32 n, i;
798
799 PROCNAME("sarrayAppendRange");
800
801 if (!sa1)
802 return ERROR_INT("sa1 not defined", procName, 1);
803 if (!sa2)
804 return ERROR_INT("sa2 not defined", procName, 1);
805 if (start < 0)
806 start = 0;
807 n = sarrayGetCount(sa2);
808 if (end >= n)
809 end = n - 1;
810 if (start > end)
811 return ERROR_INT("start > end", procName, 1);
812
813 for (i = start; i <= end; i++) {
814 str = sarrayGetString(sa2, i, L_NOCOPY);
815 sarrayAddString(sa1, str, L_COPY);
816 }
817
818 return 0;
819 }
820
821
822 /*----------------------------------------------------------------------*
823 * Convert word sarray to line sarray *
824 *----------------------------------------------------------------------*/
825 /*!
826 * sarrayConvertWordsToLines()
827 *
828 * Input: sa (sa of individual words)
829 * linesize (max num of chars in each line)
830 * Return: saout (sa of formatted lines), or null on error
831 *
832 * This is useful for re-typesetting text to a specific maximum
833 * line length. The individual words in the input sarray
834 * are concatenated into textlines. An input word string of zero
835 * length is taken to be a paragraph separator. Each time
836 * such a string is found, the current line is ended and
837 * a new line is also produced that contains just the
838 * string of zero length (""). When the output sarray
839 * of lines is eventually converted to a string with newlines
840 * (typically) appended to each line string, the empty
841 * strings are just converted to newlines, producing the visible
842 * paragraph separation.
843 *
844 * What happens when a word is larger than linesize?
845 * We write it out as a single line anyway! Words preceding
846 * or following this long word are placed on lines preceding
847 * or following the line with the long word. Why this choice?
848 * Long "words" found in text documents are typically URLs, and
849 * it's often desirable not to put newlines in the middle of a URL.
850 * The text display program (e.g., text editor) will typically
851 * wrap the long "word" to fit in the window.
852 */
853 SARRAY *
sarrayConvertWordsToLines(SARRAY * sa,l_int32 linesize)854 sarrayConvertWordsToLines(SARRAY *sa,
855 l_int32 linesize)
856 {
857 char *wd, *strl;
858 char emptystring[] = "";
859 l_int32 n, i, len, totlen;
860 SARRAY *sal, *saout;
861
862 PROCNAME("sarrayConvertWordsToLines");
863
864 if (!sa)
865 return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL);
866
867 if ((saout = sarrayCreate(0)) == NULL)
868 return (SARRAY *)ERROR_PTR("saout not defined", procName, NULL);
869
870 n = sarrayGetCount(sa);
871 totlen = 0;
872 sal = NULL;
873 for (i = 0; i < n; i++) {
874 if (!sal) {
875 if ((sal = sarrayCreate(0)) == NULL)
876 return (SARRAY *)ERROR_PTR("sal not made", procName, NULL);
877 }
878 wd = sarrayGetString(sa, i, L_NOCOPY);
879 len = strlen(wd);
880 if (len == 0) { /* end of paragraph: end line & insert blank line */
881 if (totlen > 0) {
882 strl = sarrayToString(sal, 2);
883 sarrayAddString(saout, strl, L_INSERT);
884 }
885 sarrayAddString(saout, emptystring, L_COPY);
886 sarrayDestroy(&sal);
887 totlen = 0;
888 }
889 else if (totlen == 0 && len + 1 > linesize) { /* long word! */
890 sarrayAddString(saout, wd, L_COPY); /* copy to one line */
891 }
892 else if (totlen + len + 1 > linesize) { /* end line & start new one */
893 strl = sarrayToString(sal, 2);
894 sarrayAddString(saout, strl, L_INSERT);
895 sarrayDestroy(&sal);
896 if ((sal = sarrayCreate(0)) == NULL)
897 return (SARRAY *)ERROR_PTR("sal not made", procName, NULL);
898 sarrayAddString(sal, wd, L_COPY);
899 totlen = len + 1;
900 }
901 else { /* add to current line */
902 sarrayAddString(sal, wd, L_COPY);
903 totlen += len + 1;
904 }
905 }
906 if (totlen > 0) { /* didn't end with blank line; output last line */
907 strl = sarrayToString(sal, 2);
908 sarrayAddString(saout, strl, L_INSERT);
909 sarrayDestroy(&sal);
910 }
911
912 return saout;
913
914 }
915
916
917 /*----------------------------------------------------------------------*
918 * Split string on separator list *
919 *----------------------------------------------------------------------*/
920 /*
921 * sarraySplitString()
922 *
923 * Input: sa (to append to; typically empty initially)
924 * str (string to split; not changed)
925 * separators (characters that split input string)
926 * Return: 0 if OK, 1 on error.
927 *
928 * Notes:
929 * (1) This uses strtokSafe(). See the notes there in utils.c.
930 */
931 l_int32
sarraySplitString(SARRAY * sa,const char * str,const char * separators)932 sarraySplitString(SARRAY *sa,
933 const char *str,
934 const char *separators)
935 {
936 char *cstr, *substr, *saveptr;
937
938 PROCNAME("sarraySplitString");
939
940 if (!sa)
941 return ERROR_INT("sa not defined", procName, 1);
942 if (!str)
943 return ERROR_INT("str not defined", procName, 1);
944 if (!separators)
945 return ERROR_INT("separators not defined", procName, 1);
946
947 cstr = stringNew(str); /* preserves const-ness of input str */
948 substr = strtokSafe(cstr, separators, &saveptr);
949 if (substr)
950 sarrayAddString(sa, substr, L_INSERT);
951 while ((substr = strtokSafe(NULL, separators, &saveptr)))
952 sarrayAddString(sa, substr, L_INSERT);
953 FREE(cstr);
954
955 return 0;
956 }
957
958
959 /*----------------------------------------------------------------------*
960 * Filter sarray *
961 *----------------------------------------------------------------------*/
962 /*!
963 * sarraySelectBySubstring()
964 *
965 * Input: sain (input sarray)
966 * substr (<optional> substring for matching; can be NULL)
967 * Return: saout (output sarray, filtered with substring) or null on error
968 *
969 * Notes:
970 * (1) This selects all strings in sain that have substr as a substring.
971 * Note that we can't use strncmp() because we're looking for
972 * a match to the substring anywhere within each filename.
973 * (2) If substr == NULL, returns a copy of the sarray.
974 */
975 SARRAY *
sarraySelectBySubstring(SARRAY * sain,const char * substr)976 sarraySelectBySubstring(SARRAY *sain,
977 const char *substr)
978 {
979 char *str;
980 l_int32 n, i, offset, found;
981 SARRAY *saout;
982
983 PROCNAME("sarraySelectBySubstring");
984
985 if (!sain)
986 return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL);
987
988 n = sarrayGetCount(sain);
989 if (!substr || n == 0)
990 return sarrayCopy(sain);
991
992 saout = sarrayCreate(n);
993 for (i = 0; i < n; i++) {
994 str = sarrayGetString(sain, i, L_NOCOPY);
995 arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr,
996 strlen(substr), &offset, &found);
997 if (found)
998 sarrayAddString(saout, str, L_COPY);
999 }
1000
1001 return saout;
1002 }
1003
1004
1005 /*!
1006 * sarrayParseRange()
1007 *
1008 * Input: sa (input sarray)
1009 * start (index to start range search)
1010 * &actualstart (<return> index of actual start; may be > 'start')
1011 * &end (<return> index of end)
1012 * &newstart (<return> index of start of next range)
1013 * substr (substring for matching at beginning of string)
1014 * loc (byte offset within the string for the pattern; use
1015 * -1 if the location does not matter);
1016 * Return: 0 if valid range found; 1 otherwise
1017 *
1018 * Notes:
1019 * (1) This finds the range of the next set of strings in SA,
1020 * beginning the search at 'start', that does NOT have
1021 * the substring 'substr' either at the indicated location
1022 * in the string or anywhere in the string. The input
1023 * variable 'loc' is the specified offset within the string;
1024 * use -1 to indicate 'anywhere in the string'.
1025 * (2) Always check the return value to verify that a valid range
1026 * was found.
1027 * (3) If a valid range is not found, the values of actstart,
1028 * end and newstart are all set to the size of sa.
1029 * (4) If this is the last valid range, newstart returns the value n.
1030 * In use, this should be tested before calling the function.
1031 * (5) Usage example. To find all the valid ranges in a file
1032 * where the invalid lines begin with two dashes, copy each
1033 * line in the file to a string in an sarray, and do:
1034 * start = 0;
1035 * while (!sarrayParseRange(sa, start, &actstart, &end, &start,
1036 * "--", 0))
1037 * fprintf(stderr, "start = %d, end = %d\n", actstart, end);
1038 */
1039 l_int32
sarrayParseRange(SARRAY * sa,l_int32 start,l_int32 * pactualstart,l_int32 * pend,l_int32 * pnewstart,const char * substr,l_int32 loc)1040 sarrayParseRange(SARRAY *sa,
1041 l_int32 start,
1042 l_int32 *pactualstart,
1043 l_int32 *pend,
1044 l_int32 *pnewstart,
1045 const char *substr,
1046 l_int32 loc)
1047 {
1048 char *str;
1049 l_int32 n, i, offset, found;
1050
1051 PROCNAME("sarrayParseRange");
1052
1053 if (!sa)
1054 return ERROR_INT("sa not defined", procName, 1);
1055 if (!pactualstart || !pend || !pnewstart)
1056 return ERROR_INT("not all range addresses defined", procName, 1);
1057 n = sarrayGetCount(sa);
1058 *pactualstart = *pend = *pnewstart = n;
1059 if (!substr)
1060 return ERROR_INT("substr not defined", procName, 1);
1061
1062 /* Look for the first string without the marker */
1063 if (start < 0 || start >= n)
1064 return 1;
1065 for (i = start; i < n; i++) {
1066 str = sarrayGetString(sa, i, L_NOCOPY);
1067 arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr,
1068 strlen(substr), &offset, &found);
1069 if (loc < 0) {
1070 if (!found) break;
1071 } else {
1072 if (!found || offset != loc) break;
1073 }
1074 }
1075 start = i;
1076 if (i == n) /* couldn't get started */
1077 return 1;
1078
1079 /* Look for the last string without the marker */
1080 *pactualstart = start;
1081 for (i = start + 1; i < n; i++) {
1082 str = sarrayGetString(sa, i, L_NOCOPY);
1083 arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr,
1084 strlen(substr), &offset, &found);
1085 if (loc < 0) {
1086 if (found) break;
1087 } else {
1088 if (found && offset == loc) break;
1089 }
1090 }
1091 *pend = i - 1;
1092 start = i;
1093 if (i == n) /* no further range */
1094 return 0;
1095
1096 /* Look for the first string after *pend without the marker.
1097 * This will start the next run of strings, if it exists. */
1098 for (i = start; i < n; i++) {
1099 str = sarrayGetString(sa, i, L_NOCOPY);
1100 arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr,
1101 strlen(substr), &offset, &found);
1102 if (loc < 0) {
1103 if (!found) break;
1104 } else {
1105 if (!found || offset != loc) break;
1106 }
1107 }
1108 if (i < n)
1109 *pnewstart = i;
1110
1111 return 0;
1112 }
1113
1114
1115 /*----------------------------------------------------------------------*
1116 * Sort *
1117 *----------------------------------------------------------------------*/
1118 /*!
1119 * sarraySort()
1120 *
1121 * Input: saout (output sarray; can be NULL or equal to sain)
1122 * sain (input sarray)
1123 * sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
1124 * Return: saout (output sarray, sorted by ascii value), or null on error
1125 *
1126 * Notes:
1127 * (1) Set saout = sain for in-place; otherwise, set naout = NULL.
1128 * (2) Shell sort, modified from K&R, 2nd edition, p.62.
1129 * Slow but simple O(n logn) sort.
1130 */
1131 SARRAY *
sarraySort(SARRAY * saout,SARRAY * sain,l_int32 sortorder)1132 sarraySort(SARRAY *saout,
1133 SARRAY *sain,
1134 l_int32 sortorder)
1135 {
1136 char **array;
1137 char *tmp;
1138 l_int32 n, i, j, gap;
1139
1140 PROCNAME("sarraySort");
1141
1142 if (!sain)
1143 return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL);
1144
1145 /* Make saout if necessary; otherwise do in-place */
1146 if (!saout)
1147 saout = sarrayCopy(sain);
1148 else if (sain != saout)
1149 return (SARRAY *)ERROR_PTR("invalid: not in-place", procName, NULL);
1150 array = saout->array; /* operate directly on the array */
1151 n = sarrayGetCount(saout);
1152
1153 /* Shell sort */
1154 for (gap = n/2; gap > 0; gap = gap / 2) {
1155 for (i = gap; i < n; i++) {
1156 for (j = i - gap; j >= 0; j -= gap) {
1157 if ((sortorder == L_SORT_INCREASING &&
1158 stringCompareLexical(array[j], array[j + gap])) ||
1159 (sortorder == L_SORT_DECREASING &&
1160 stringCompareLexical(array[j + gap], array[j])))
1161 {
1162 tmp = array[j];
1163 array[j] = array[j + gap];
1164 array[j + gap] = tmp;
1165 }
1166 }
1167 }
1168 }
1169
1170 return saout;
1171 }
1172
1173
1174 /*!
1175 * stringCompareLexical()
1176 *
1177 * Input: str1
1178 * str2
1179 * Return: 1 if str1 > str2 (lexically); 0 otherwise
1180 *
1181 * Notes:
1182 * (1) If the lexical values are identical, return a 0, to
1183 * indicate that no swapping is required to sort the strings.
1184 */
1185 l_int32
stringCompareLexical(const char * str1,const char * str2)1186 stringCompareLexical(const char *str1,
1187 const char *str2)
1188 {
1189 l_int32 i, len1, len2, len;
1190
1191 PROCNAME("sarrayCompareLexical");
1192
1193 if (!str1)
1194 return ERROR_INT("str1 not defined", procName, 1);
1195 if (!str2)
1196 return ERROR_INT("str2 not defined", procName, 1);
1197
1198 len1 = strlen(str1);
1199 len2 = strlen(str2);
1200 len = L_MIN(len1, len2);
1201
1202 for (i = 0; i < len; i++) {
1203 if (str1[i] == str2[i])
1204 continue;
1205 if (str1[i] > str2[i])
1206 return 1;
1207 else
1208 return 0;
1209 }
1210
1211 if (len1 > len2)
1212 return 1;
1213 else
1214 return 0;
1215 }
1216
1217
1218 /*----------------------------------------------------------------------*
1219 * Serialize for I/O *
1220 *----------------------------------------------------------------------*/
1221 /*!
1222 * sarrayRead()
1223 *
1224 * Input: filename
1225 * Return: sarray, or null on error
1226 */
1227 SARRAY *
sarrayRead(const char * filename)1228 sarrayRead(const char *filename)
1229 {
1230 FILE *fp;
1231 SARRAY *sa;
1232
1233 PROCNAME("sarrayRead");
1234
1235 if (!filename)
1236 return (SARRAY *)ERROR_PTR("filename not defined", procName, NULL);
1237
1238 if ((fp = fopenReadStream(filename)) == NULL)
1239 return (SARRAY *)ERROR_PTR("stream not opened", procName, NULL);
1240
1241 if ((sa = sarrayReadStream(fp)) == NULL) {
1242 fclose(fp);
1243 return (SARRAY *)ERROR_PTR("sa not read", procName, NULL);
1244 }
1245
1246 fclose(fp);
1247 return sa;
1248 }
1249
1250
1251 /*!
1252 * sarrayReadStream()
1253 *
1254 * Input: stream
1255 * Return: sarray, or null on error
1256 *
1257 * Notes:
1258 * (1) We store the size of each string along with the string.
1259 * (2) This allows a string to have embedded newlines. By reading
1260 * the entire string, as determined by its size, we are
1261 * not affected by any number of embedded newlines.
1262 */
1263 SARRAY *
sarrayReadStream(FILE * fp)1264 sarrayReadStream(FILE *fp)
1265 {
1266 char *stringbuf;
1267 l_int32 i, n, size, index, bufsize, ret, version;
1268 SARRAY *sa;
1269
1270 PROCNAME("sarrayReadStream");
1271
1272 if (!fp)
1273 return (SARRAY *)ERROR_PTR("stream not defined", procName, NULL);
1274
1275 ret = fscanf(fp, "\nSarray Version %d\n", &version);
1276 if (ret != 1)
1277 return (SARRAY *)ERROR_PTR("not an sarray file", procName, NULL);
1278 if (version != SARRAY_VERSION_NUMBER)
1279 return (SARRAY *)ERROR_PTR("invalid sarray version", procName, NULL);
1280 fscanf(fp, "Number of strings = %d\n", &n);
1281
1282 if ((sa = sarrayCreate(n)) == NULL)
1283 return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
1284 bufsize = L_BUF_SIZE + 1;
1285 if ((stringbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL)
1286 return (SARRAY *)ERROR_PTR("stringbuf not made", procName, NULL);
1287
1288 for (i = 0; i < n; i++) {
1289 /* Get the size of the stored string */
1290 fscanf(fp, "%d[%d]:", &index, &size);
1291 /* Expand the string buffer if necessary */
1292 if (size > bufsize - 5) {
1293 FREE(stringbuf);
1294 bufsize = (l_int32)(1.5 * size);
1295 stringbuf = (char *)CALLOC(bufsize, sizeof(char));
1296 }
1297 /* Read the stored string, plus leading spaces and trailing \n */
1298 fread(stringbuf, 1, size + 3, fp);
1299 /* Remove the \n that was added by sarrayWriteStream() */
1300 stringbuf[size + 2] = '\0';
1301 /* Copy it in, skipping the 2 leading spaces */
1302 sarrayAddString(sa, stringbuf + 2, L_COPY);
1303 }
1304 fscanf(fp, "\n");
1305
1306 FREE(stringbuf);
1307 return sa;
1308 }
1309
1310
1311 /*!
1312 * sarrayWrite()
1313 *
1314 * Input: filename
1315 * sarray
1316 * Return: 0 if OK; 1 on error
1317 */
1318 l_int32
sarrayWrite(const char * filename,SARRAY * sa)1319 sarrayWrite(const char *filename,
1320 SARRAY *sa)
1321 {
1322 FILE *fp;
1323
1324 PROCNAME("sarrayWrite");
1325
1326 if (!filename)
1327 return ERROR_INT("filename not defined", procName, 1);
1328 if (!sa)
1329 return ERROR_INT("sa not defined", procName, 1);
1330
1331 if ((fp = fopen(filename, "w")) == NULL)
1332 return ERROR_INT("stream not opened", procName, 1);
1333
1334 if (sarrayWriteStream(fp, sa))
1335 return ERROR_INT("sa not written to stream", procName, 1);
1336
1337 fclose(fp);
1338 return 0;
1339 }
1340
1341
1342 /*!
1343 * sarrayWriteStream()
1344 *
1345 * Input: stream
1346 * sarray
1347 * Returns 0 if OK; 1 on error
1348 *
1349 * Notes:
1350 * (1) This appends a '\n' to each string, which is stripped
1351 * off by sarrayReadStream().
1352 */
1353 l_int32
sarrayWriteStream(FILE * fp,SARRAY * sa)1354 sarrayWriteStream(FILE *fp,
1355 SARRAY *sa)
1356 {
1357 l_int32 i, n, len;
1358
1359 PROCNAME("sarrayWriteStream");
1360
1361 if (!fp)
1362 return ERROR_INT("stream not defined", procName, 1);
1363 if (!sa)
1364 return ERROR_INT("sa not defined", procName, 1);
1365
1366 n = sarrayGetCount(sa);
1367 fprintf(fp, "\nSarray Version %d\n", SARRAY_VERSION_NUMBER);
1368 fprintf(fp, "Number of strings = %d\n", n);
1369 for (i = 0; i < n; i++) {
1370 len = strlen(sa->array[i]);
1371 fprintf(fp, " %d[%d]: %s\n", i, len, sa->array[i]);
1372 }
1373 fprintf(fp, "\n");
1374
1375 return 0;
1376 }
1377
1378
1379 /*!
1380 * sarrayAppend()
1381 *
1382 * Input: filename
1383 * sarray
1384 * Return: 0 if OK; 1 on error
1385 */
1386 l_int32
sarrayAppend(const char * filename,SARRAY * sa)1387 sarrayAppend(const char *filename,
1388 SARRAY *sa)
1389 {
1390 FILE *fp;
1391
1392 PROCNAME("sarrayAppend");
1393
1394 if (!filename)
1395 return ERROR_INT("filename not defined", procName, 1);
1396 if (!sa)
1397 return ERROR_INT("sa not defined", procName, 1);
1398
1399 if ((fp = fopen(filename, "a")) == NULL)
1400 return ERROR_INT("stream not opened", procName, 1);
1401
1402 if (sarrayWriteStream(fp, sa))
1403 return ERROR_INT("sa not appended to stream", procName, 1);
1404
1405 fclose(fp);
1406 return 0;
1407 }
1408
1409
1410 /*---------------------------------------------------------------------*
1411 * Directory filenames *
1412 *---------------------------------------------------------------------*/
1413 /*!
1414 * getSortedPathnamesInDirectory()
1415 *
1416 * Input: directory name
1417 * substr (<optional> substring filter on filenames; can be NULL)
1418 * firstpage (0-based)
1419 * npages (use 0 for all to the end)
1420 * Return: sarray of sorted pathnames, or NULL on error
1421 *
1422 * Notes:
1423 * (1) If 'substr' is not NULL, only filenames that contain
1424 * the substring can be returned. If 'substr' is NULL,
1425 * none of the filenames are filtered out.
1426 * (2) The files in the directory, after optional filtering by
1427 * the substring, are lexically sorted in increasing order.
1428 * The full pathnames are returned for the requested sequence.
1429 * If no files are found after filtering, returns an empty sarray.
1430 */
1431 SARRAY *
getSortedPathnamesInDirectory(const char * dirname,const char * substr,l_int32 firstpage,l_int32 npages)1432 getSortedPathnamesInDirectory(const char *dirname,
1433 const char *substr,
1434 l_int32 firstpage,
1435 l_int32 npages)
1436 {
1437 char *fname, *fullname;
1438 l_int32 i, nfiles, lastpage;
1439 SARRAY *sa, *safiles, *saout;
1440
1441 PROCNAME("getSortedPathnamesInDirectory");
1442
1443 if (!dirname)
1444 return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL);
1445
1446 if ((sa = getFilenamesInDirectory(dirname)) == NULL)
1447 return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
1448 safiles = sarraySelectBySubstring(sa, substr);
1449 sarrayDestroy(&sa);
1450 nfiles = sarrayGetCount(safiles);
1451 if (nfiles == 0) {
1452 L_WARNING("no files found", procName);
1453 return safiles;
1454 }
1455
1456 sarraySort(safiles, safiles, L_SORT_INCREASING);
1457
1458 firstpage = L_MIN(L_MAX(firstpage, 0), nfiles - 1);
1459 if (npages == 0)
1460 npages = nfiles - firstpage;
1461 lastpage = L_MIN(firstpage + npages - 1, nfiles - 1);
1462
1463 saout = sarrayCreate(lastpage - firstpage + 1);
1464 for (i = firstpage; i <= lastpage; i++) {
1465 fname = sarrayGetString(safiles, i, L_NOCOPY);
1466 fullname = genPathname(dirname, fname);
1467 sarrayAddString(saout, fullname, L_INSERT);
1468 }
1469
1470 sarrayDestroy(&safiles);
1471 return saout;
1472 }
1473
1474
1475 /*!
1476 * getFilenamesInDirectory()
1477 *
1478 * Input: directory name
1479 * Return: sarray of file names, or NULL on error
1480 *
1481 * Notes:
1482 * (1) The versions compiled under unix and cygwin use the POSIX C
1483 * library commands for handling directories. For windows,
1484 * there is a separate implementation.
1485 * (2) It returns an array of filename tails; i.e., only the part of
1486 * the path after the last slash.
1487 * (3) Use of the d_type field of dirent is not portable:
1488 * "According to POSIX, the dirent structure contains a field
1489 * char d_name[] of unspecified size, with at most NAME_MAX
1490 * characters preceding the terminating null character. Use
1491 * of other fields will harm the portability of your programs."
1492 * (4) As a consequence of (3), we note several things:
1493 * - MINGW doesn't have a d_type member.
1494 * - Older versions of gcc (e.g., 2.95.3) return DT_UNKNOWN
1495 * for d_type from all files.
1496 * On these systems, this function will return directories
1497 * (except for '.' and '..', which are eliminated using
1498 * the d_name field).
1499 */
1500 #ifndef COMPILER_MSVC
1501
1502 SARRAY *
getFilenamesInDirectory(const char * dirname)1503 getFilenamesInDirectory(const char *dirname)
1504 {
1505 char *name;
1506 l_int32 len;
1507 SARRAY *safiles;
1508 DIR *pdir;
1509 struct dirent *pdirentry;
1510
1511 PROCNAME("getFilenamesInDirectory");
1512
1513 if (!dirname)
1514 return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL);
1515
1516 if ((safiles = sarrayCreate(0)) == NULL)
1517 return (SARRAY *)ERROR_PTR("safiles not made", procName, NULL);
1518 if ((pdir = opendir(dirname)) == NULL)
1519 return (SARRAY *)ERROR_PTR("pdir not opened", procName, NULL);
1520 while ((pdirentry = readdir(pdir))) {
1521
1522 /* It's nice to ignore directories. For this it is necessary to
1523 * define _BSD_SOURCE in the CC command, because the DT_DIR
1524 * flag is non-standard. */
1525 #if !defined(__MINGW32__) && !defined(_CYGWIN_ENVIRON) && !defined(__SOLARIS__)
1526 if (pdirentry->d_type == DT_DIR)
1527 continue;
1528 #endif
1529
1530 /* Filter out "." and ".." if they're passed through */
1531 name = pdirentry->d_name;
1532 len = strlen(name);
1533 if (len == 1 && name[len - 1] == '.') continue;
1534 if (len == 2 && name[len - 1] == '.' && name[len - 2] == '.') continue;
1535 sarrayAddString(safiles, name, L_COPY);
1536 }
1537 closedir(pdir);
1538
1539 return safiles;
1540 }
1541
1542 #else /* COMPILER_MSVC */
1543
1544 /* http://msdn2.microsoft.com/en-us/library/aa365200(VS.85).aspx */
1545 #include <windows.h>
1546 #include <tchar.h>
1547 SARRAY *
getFilenamesInDirectory(const char * dirname)1548 getFilenamesInDirectory(const char *dirname)
1549 {
1550 SARRAY *safiles;
1551 WIN32_FIND_DATAA ffd;
1552 size_t length_of_path;
1553 CHAR szDir[MAX_PATH]; /* MAX_PATH is defined in stdlib.h */
1554 HANDLE hFind = INVALID_HANDLE_VALUE;
1555
1556 PROCNAME("getFilenamesInDirectory");
1557
1558 if (!dirname)
1559 return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL);
1560
1561 length_of_path = strlen(dirname);
1562 if (length_of_path > (MAX_PATH - 2))
1563 return (SARRAY *)ERROR_PTR("dirname is to long", procName, NULL);
1564
1565 strncpy(szDir, dirname, MAX_PATH);
1566 szDir[MAX_PATH - 1] = '\0';
1567 strncat(szDir, TEXT("\\*"), MAX_PATH - strlen(szDir));
1568
1569 if ((safiles = sarrayCreate(0)) == NULL)
1570 return (SARRAY *)ERROR_PTR("safiles not made", procName, NULL);
1571 hFind = FindFirstFileA(szDir, &ffd);
1572 if (INVALID_HANDLE_VALUE == hFind) {
1573 sarrayDestroy(&safiles);
1574 return (SARRAY *)ERROR_PTR("hFind not opened", procName, NULL);
1575 }
1576
1577 while (FindNextFileA(hFind, &ffd) != 0) {
1578 if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) /* skip dirs */
1579 continue;
1580 sarrayAddString(safiles, ffd.cFileName, L_COPY);
1581 }
1582
1583 FindClose(hFind);
1584 return safiles;
1585 }
1586
1587 #endif /* COMPILER_MSVC */
1588
1589