• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2003-2014, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  sorttest.c
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2003aug04
14 *   created by: Markus W. Scherer
15 *
16 *   Test internal sorting functions.
17 */
18 
19 #include <stdio.h>
20 
21 #include "unicode/utypes.h"
22 #include "unicode/ucol.h"
23 #include "unicode/ustring.h"
24 #include "cmemory.h"
25 #include "cintltst.h"
26 #include "uarrsort.h"
27 
28 static void
SortTest()29 SortTest() {
30     uint16_t small[]={ 8, 1, 2, 5, 4, 3, 7, 6 };
31     int32_t medium[]={ 10, 8, 1, 2, 5, 5, -1, 6, 4, 3, 9, 7, 5 };
32     uint32_t large[]={ 21, 10, 20, 19, 11, 12, 13, 10, 10, 10, 10,
33                        8, 1, 2, 5, 10, 10, 4, 17, 18, 3, 9, 10, 7, 6, 14, 15, 16 };
34 
35     int32_t i;
36     UErrorCode errorCode;
37 
38     /* sort small array (stable) */
39     errorCode=U_ZERO_ERROR;
40     uprv_sortArray(small, UPRV_LENGTHOF(small), sizeof(small[0]), uprv_uint16Comparator, NULL, TRUE, &errorCode);
41     if(U_FAILURE(errorCode)) {
42         log_err("uprv_sortArray(small) failed - %s\n", u_errorName(errorCode));
43         return;
44     }
45     for(i=1; i<UPRV_LENGTHOF(small); ++i) {
46         if(small[i-1]>small[i]) {
47             log_err("uprv_sortArray(small) mis-sorted [%d]=%u > [%d]=%u\n", i-1, small[i-1], i, small[i]);
48             return;
49         }
50     }
51 
52     /* for medium, add bits that will not be compared, to test stability */
53     for(i=0; i<UPRV_LENGTHOF(medium); ++i) {
54         medium[i]=(medium[i]<<4)|i;
55     }
56 
57     /* sort medium array (stable) */
58     uprv_sortArray(medium, UPRV_LENGTHOF(medium), sizeof(medium[0]), uprv_int32Comparator, NULL, TRUE, &errorCode);
59     if(U_FAILURE(errorCode)) {
60         log_err("uprv_sortArray(medium) failed - %s\n", u_errorName(errorCode));
61         return;
62     }
63     for(i=1; i<UPRV_LENGTHOF(medium); ++i) {
64         if(medium[i-1]>=medium[i]) {
65             log_err("uprv_sortArray(medium) mis-sorted [%d]=%u > [%d]=%u\n", i-1, medium[i-1], i, medium[i]);
66             return;
67         }
68     }
69 
70     /* sort large array (not stable) */
71     errorCode=U_ZERO_ERROR;
72     uprv_sortArray(large, UPRV_LENGTHOF(large), sizeof(large[0]), uprv_uint32Comparator, NULL, FALSE, &errorCode);
73     if(U_FAILURE(errorCode)) {
74         log_err("uprv_sortArray(large) failed - %s\n", u_errorName(errorCode));
75         return;
76     }
77     for(i=1; i<UPRV_LENGTHOF(large); ++i) {
78         if(large[i-1]>large[i]) {
79             log_err("uprv_sortArray(large) mis-sorted [%d]=%u > [%d]=%u\n", i-1, large[i-1], i, large[i]);
80             return;
81         }
82     }
83 }
84 
85 #if !UCONFIG_NO_COLLATION
86 
87 /*
88  * Fill an array with semi-random short strings.
89  * Vary them enough to be interesting, but create duplicates.
90  * With CYCLE=10 characters per STR_LEN=3 string positions there are only 1000 unique strings.
91  * NUM_LINES should be larger than this.
92  */
93 #define NUM_LINES 10000
94 #define STR_LEN 3
95 #define CYCLE 10
96 
97 /*
98  * Use characters beyond the Latin Extended A block to avoid a collator fastpath.
99  * They should sort unique, so that we can later use a binary comparison for string equality.
100  */
101 #define BASE_CHAR 0x200
102 
103 typedef struct Line {
104     UChar s[STR_LEN];
105     int32_t recordNumber;
106 } Line;
107 
108 static void
printLines(const Line * lines)109 printLines(const Line *lines) {
110 #if 0
111     int32_t i, j;
112     for(i=0; i<NUM_LINES; ++i) {
113         const Line *line=lines+i;
114         for(j=0; j<STR_LEN; ++j) {
115             printf("%04x ", line->s[j]);
116         }
117         printf(" #%5d\n", line->recordNumber);
118     }
119 #endif
120 }
121 
122 /* Use a collator so that the comparisons are not essentially free, for simple benchmarking. */
123 static int32_t U_EXPORT2
linesComparator(const void * context,const void * left,const void * right)124 linesComparator(const void *context, const void *left, const void *right) {
125     const UCollator *coll=(const UCollator *)context;
126     const Line *leftLine=(const Line *)left;
127     const Line *rightLine=(const Line *)right;
128     /* compare the strings but not the record number */
129     return ucol_strcoll(coll, leftLine->s, STR_LEN, rightLine->s, STR_LEN);
130 }
131 
StableSortTest()132 static void StableSortTest() {
133     UErrorCode errorCode=U_ZERO_ERROR;
134     UCollator *coll;
135     Line *lines, *p;
136     UChar s[STR_LEN];
137     int32_t i, j;
138 
139     coll=ucol_open("root", &errorCode);
140     if(U_FAILURE(errorCode)) {
141         log_data_err("ucol_open(root) failed - %s\n", u_errorName(errorCode));
142         return;
143     }
144 
145     lines=p=(Line *)uprv_malloc(NUM_LINES*sizeof(Line));
146     uprv_memset(lines, 0, NUM_LINES*sizeof(Line));  /* avoid uninitialized memory */
147 
148     for(j=0; j<STR_LEN; ++j) { s[j]=BASE_CHAR; }
149     j=0;
150     for(i=0; i<NUM_LINES; ++i) {
151         UChar c;
152         u_memcpy(p->s, s, STR_LEN);
153         p->recordNumber=i;
154         /* Modify the string for the next line. */
155         c=s[j]+1;
156         if(c==BASE_CHAR+CYCLE) { c=BASE_CHAR; }
157         s[j]=c;
158         if(++j==STR_LEN) { j=0; }
159         ++p;
160     }
161     puts("\n* lines before sorting");
162     printLines(lines);
163 
164     uprv_sortArray(lines, NUM_LINES, (int32_t)sizeof(Line),
165                    linesComparator, coll, TRUE, &errorCode);
166     if(U_FAILURE(errorCode)) {
167         log_err("uprv_sortArray() failed - %s\n", u_errorName(errorCode));
168         return;
169     }
170     puts("* lines after sorting");
171     printLines(lines);
172 
173     /* Verify that the array is sorted correctly. */
174     p=lines;
175     for(i=1; i<NUM_LINES; ++i) {
176         Line *q=p+1;  /* =lines+i */
177         /* Binary comparison first, for speed. In this case, equal strings must be identical. */
178         int32_t diff=u_strCompare(p->s, STR_LEN, q->s, STR_LEN, FALSE);
179         if(diff==0) {
180             if(p->recordNumber>=q->recordNumber) {
181                 log_err("equal strings %d and %d out of order at sorted index %d\n",
182                         (int)p->recordNumber, (int)q->recordNumber, (int)i);
183                 break;
184             }
185         } else {
186             /* Compare unequal strings with the collator. */
187             diff=ucol_strcoll(coll, p->s, STR_LEN, q->s, STR_LEN);
188             if(diff>=0) {
189                 log_err("unequal strings %d and %d out of order at sorted index %d\n",
190                         (int)p->recordNumber, (int)q->recordNumber, (int)i);
191                 break;
192             }
193         }
194         p=q;
195     }
196 
197     uprv_free(lines);
198     ucol_close(coll);
199 }
200 
201 #endif  /* !UCONFIG_NO_COLLATION */
202 
203 void
204 addSortTest(TestNode** root);
205 
206 void
addSortTest(TestNode ** root)207 addSortTest(TestNode** root) {
208     addTest(root, &SortTest, "tsutil/sorttest/SortTest");
209 #if !UCONFIG_NO_COLLATION
210     addTest(root, &StableSortTest, "tsutil/sorttest/StableSortTest");
211 #endif
212 }
213