1 /*
2 * sssort.c for libdivsufsort
3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person
6 * obtaining a copy of this software and associated documentation
7 * files (the "Software"), to deal in the Software without
8 * restriction, including without limitation the rights to use,
9 * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following
12 * conditions:
13 *
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "divsufsort_private.h"
28
29
30 /*- Private Functions -*/
31
32 static const saint_t lg_table[256]= {
33 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
34 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
35 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
36 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
37 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
38 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
39 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
40 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
41 };
42
43 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
44
45 static INLINE
46 saint_t
ss_ilg(saidx_t n)47 ss_ilg(saidx_t n) {
48 #if SS_BLOCKSIZE == 0
49 # if defined(BUILD_DIVSUFSORT64)
50 return (n >> 32) ?
51 ((n >> 48) ?
52 ((n >> 56) ?
53 56 + lg_table[(n >> 56) & 0xff] :
54 48 + lg_table[(n >> 48) & 0xff]) :
55 ((n >> 40) ?
56 40 + lg_table[(n >> 40) & 0xff] :
57 32 + lg_table[(n >> 32) & 0xff])) :
58 ((n & 0xffff0000) ?
59 ((n & 0xff000000) ?
60 24 + lg_table[(n >> 24) & 0xff] :
61 16 + lg_table[(n >> 16) & 0xff]) :
62 ((n & 0x0000ff00) ?
63 8 + lg_table[(n >> 8) & 0xff] :
64 0 + lg_table[(n >> 0) & 0xff]));
65 # else
66 return (n & 0xffff0000) ?
67 ((n & 0xff000000) ?
68 24 + lg_table[(n >> 24) & 0xff] :
69 16 + lg_table[(n >> 16) & 0xff]) :
70 ((n & 0x0000ff00) ?
71 8 + lg_table[(n >> 8) & 0xff] :
72 0 + lg_table[(n >> 0) & 0xff]);
73 # endif
74 #elif SS_BLOCKSIZE < 256
75 return lg_table[n];
76 #else
77 return (n & 0xff00) ?
78 8 + lg_table[(n >> 8) & 0xff] :
79 0 + lg_table[(n >> 0) & 0xff];
80 #endif
81 }
82
83 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
84
85 #if SS_BLOCKSIZE != 0
86
87 static const saint_t sqq_table[256] = {
88 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
89 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
90 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
91 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
92 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
93 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
94 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
95 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
96 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
97 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
98 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
99 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
100 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
101 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
102 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
103 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
104 };
105
106 static INLINE
107 saidx_t
ss_isqrt(saidx_t x)108 ss_isqrt(saidx_t x) {
109 saidx_t y, e;
110
111 if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
112 e = (x & 0xffff0000) ?
113 ((x & 0xff000000) ?
114 24 + lg_table[(x >> 24) & 0xff] :
115 16 + lg_table[(x >> 16) & 0xff]) :
116 ((x & 0x0000ff00) ?
117 8 + lg_table[(x >> 8) & 0xff] :
118 0 + lg_table[(x >> 0) & 0xff]);
119
120 if(e >= 16) {
121 y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
122 if(e >= 24) { y = (y + 1 + x / y) >> 1; }
123 y = (y + 1 + x / y) >> 1;
124 } else if(e >= 8) {
125 y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
126 } else {
127 return sqq_table[x] >> 4;
128 }
129
130 return (x < (y * y)) ? y - 1 : y;
131 }
132
133 #endif /* SS_BLOCKSIZE != 0 */
134
135
136 /*---------------------------------------------------------------------------*/
137
138 /* Compares two suffixes. */
139 static INLINE
140 saint_t
ss_compare(const sauchar_t * T,const saidx_t * p1,const saidx_t * p2,saidx_t depth)141 ss_compare(const sauchar_t *T,
142 const saidx_t *p1, const saidx_t *p2,
143 saidx_t depth) {
144 const sauchar_t *U1, *U2, *U1n, *U2n;
145
146 for(U1 = T + depth + *p1,
147 U2 = T + depth + *p2,
148 U1n = T + *(p1 + 1) + 2,
149 U2n = T + *(p2 + 1) + 2;
150 (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
151 ++U1, ++U2) {
152 }
153
154 return U1 < U1n ?
155 (U2 < U2n ? *U1 - *U2 : 1) :
156 (U2 < U2n ? -1 : 0);
157 }
158
159
160 /*---------------------------------------------------------------------------*/
161
162 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
163
164 /* Insertionsort for small size groups */
165 static
166 void
ss_insertionsort(const sauchar_t * T,const saidx_t * PA,saidx_t * first,saidx_t * last,saidx_t depth)167 ss_insertionsort(const sauchar_t *T, const saidx_t *PA,
168 saidx_t *first, saidx_t *last, saidx_t depth) {
169 saidx_t *i, *j;
170 saidx_t t;
171 saint_t r;
172
173 for(i = last - 2; first <= i; --i) {
174 for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
175 do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
176 if(last <= j) { break; }
177 }
178 if(r == 0) { *j = ~*j; }
179 *(j - 1) = t;
180 }
181 }
182
183 #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
184
185
186 /*---------------------------------------------------------------------------*/
187
188 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
189
190 static INLINE
191 void
ss_fixdown(const sauchar_t * Td,const saidx_t * PA,saidx_t * SA,saidx_t i,saidx_t size)192 ss_fixdown(const sauchar_t *Td, const saidx_t *PA,
193 saidx_t *SA, saidx_t i, saidx_t size) {
194 saidx_t j, k;
195 saidx_t v;
196 saint_t c, d, e;
197
198 for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
199 d = Td[PA[SA[k = j++]]];
200 if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
201 if(d <= c) { break; }
202 }
203 SA[i] = v;
204 }
205
206 /* Simple top-down heapsort. */
207 static
208 void
ss_heapsort(const sauchar_t * Td,const saidx_t * PA,saidx_t * SA,saidx_t size)209 ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) {
210 saidx_t i, m;
211 saidx_t t;
212
213 m = size;
214 if((size % 2) == 0) {
215 m--;
216 if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
217 }
218
219 for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
220 if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
221 for(i = m - 1; 0 < i; --i) {
222 t = SA[0], SA[0] = SA[i];
223 ss_fixdown(Td, PA, SA, 0, i);
224 SA[i] = t;
225 }
226 }
227
228
229 /*---------------------------------------------------------------------------*/
230
231 /* Returns the median of three elements. */
232 static INLINE
233 saidx_t *
ss_median3(const sauchar_t * Td,const saidx_t * PA,saidx_t * v1,saidx_t * v2,saidx_t * v3)234 ss_median3(const sauchar_t *Td, const saidx_t *PA,
235 saidx_t *v1, saidx_t *v2, saidx_t *v3) {
236 saidx_t *t;
237 if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
238 if(Td[PA[*v2]] > Td[PA[*v3]]) {
239 if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
240 else { return v3; }
241 }
242 return v2;
243 }
244
245 /* Returns the median of five elements. */
246 static INLINE
247 saidx_t *
ss_median5(const sauchar_t * Td,const saidx_t * PA,saidx_t * v1,saidx_t * v2,saidx_t * v3,saidx_t * v4,saidx_t * v5)248 ss_median5(const sauchar_t *Td, const saidx_t *PA,
249 saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
250 saidx_t *t;
251 if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
252 if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
253 if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
254 if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
255 if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
256 if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
257 return v3;
258 }
259
260 /* Returns the pivot element. */
261 static INLINE
262 saidx_t *
ss_pivot(const sauchar_t * Td,const saidx_t * PA,saidx_t * first,saidx_t * last)263 ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) {
264 saidx_t *middle;
265 saidx_t t;
266
267 t = last - first;
268 middle = first + t / 2;
269
270 if(t <= 512) {
271 if(t <= 32) {
272 return ss_median3(Td, PA, first, middle, last - 1);
273 } else {
274 t >>= 2;
275 return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
276 }
277 }
278 t >>= 3;
279 first = ss_median3(Td, PA, first, first + t, first + (t << 1));
280 middle = ss_median3(Td, PA, middle - t, middle, middle + t);
281 last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
282 return ss_median3(Td, PA, first, middle, last);
283 }
284
285
286 /*---------------------------------------------------------------------------*/
287
288 /* Binary partition for substrings. */
289 static INLINE
290 saidx_t *
ss_partition(const saidx_t * PA,saidx_t * first,saidx_t * last,saidx_t depth)291 ss_partition(const saidx_t *PA,
292 saidx_t *first, saidx_t *last, saidx_t depth) {
293 saidx_t *a, *b;
294 saidx_t t;
295 for(a = first - 1, b = last;;) {
296 for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
297 for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
298 if(b <= a) { break; }
299 t = ~*b;
300 *b = *a;
301 *a = t;
302 }
303 if(first < a) { *first = ~*first; }
304 return a;
305 }
306
307 /* Multikey introsort for medium size groups. */
308 static
309 void
ss_mintrosort(const sauchar_t * T,const saidx_t * PA,saidx_t * first,saidx_t * last,saidx_t depth)310 ss_mintrosort(const sauchar_t *T, const saidx_t *PA,
311 saidx_t *first, saidx_t *last,
312 saidx_t depth) {
313 #define STACK_SIZE SS_MISORT_STACKSIZE
314 struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE];
315 const sauchar_t *Td;
316 saidx_t *a, *b, *c, *d, *e, *f;
317 saidx_t s, t;
318 saint_t ssize;
319 saint_t limit;
320 saint_t v, x = 0;
321
322 for(ssize = 0, limit = ss_ilg(last - first);;) {
323
324 if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
325 #if 1 < SS_INSERTIONSORT_THRESHOLD
326 if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
327 #endif
328 STACK_POP(first, last, depth, limit);
329 continue;
330 }
331
332 Td = T + depth;
333 if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
334 if(limit < 0) {
335 for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
336 if((x = Td[PA[*a]]) != v) {
337 if(1 < (a - first)) { break; }
338 v = x;
339 first = a;
340 }
341 }
342 if(Td[PA[*first] - 1] < v) {
343 first = ss_partition(PA, first, a, depth);
344 }
345 if((a - first) <= (last - a)) {
346 if(1 < (a - first)) {
347 STACK_PUSH(a, last, depth, -1);
348 last = a, depth += 1, limit = ss_ilg(a - first);
349 } else {
350 first = a, limit = -1;
351 }
352 } else {
353 if(1 < (last - a)) {
354 STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
355 first = a, limit = -1;
356 } else {
357 last = a, depth += 1, limit = ss_ilg(a - first);
358 }
359 }
360 continue;
361 }
362
363 /* choose pivot */
364 a = ss_pivot(Td, PA, first, last);
365 v = Td[PA[*a]];
366 SWAP(*first, *a);
367
368 /* partition */
369 for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
370 if(((a = b) < last) && (x < v)) {
371 for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
372 if(x == v) { SWAP(*b, *a); ++a; }
373 }
374 }
375 for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
376 if((b < (d = c)) && (x > v)) {
377 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
378 if(x == v) { SWAP(*c, *d); --d; }
379 }
380 }
381 for(; b < c;) {
382 SWAP(*b, *c);
383 for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
384 if(x == v) { SWAP(*b, *a); ++a; }
385 }
386 for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
387 if(x == v) { SWAP(*c, *d); --d; }
388 }
389 }
390
391 if(a <= d) {
392 c = b - 1;
393
394 if((s = a - first) > (t = b - a)) { s = t; }
395 for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
396 if((s = d - c) > (t = last - d - 1)) { s = t; }
397 for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
398
399 a = first + (b - a), c = last - (d - c);
400 b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
401
402 if((a - first) <= (last - c)) {
403 if((last - c) <= (c - b)) {
404 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
405 STACK_PUSH(c, last, depth, limit);
406 last = a;
407 } else if((a - first) <= (c - b)) {
408 STACK_PUSH(c, last, depth, limit);
409 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
410 last = a;
411 } else {
412 STACK_PUSH(c, last, depth, limit);
413 STACK_PUSH(first, a, depth, limit);
414 first = b, last = c, depth += 1, limit = ss_ilg(c - b);
415 }
416 } else {
417 if((a - first) <= (c - b)) {
418 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
419 STACK_PUSH(first, a, depth, limit);
420 first = c;
421 } else if((last - c) <= (c - b)) {
422 STACK_PUSH(first, a, depth, limit);
423 STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
424 first = c;
425 } else {
426 STACK_PUSH(first, a, depth, limit);
427 STACK_PUSH(c, last, depth, limit);
428 first = b, last = c, depth += 1, limit = ss_ilg(c - b);
429 }
430 }
431 } else {
432 limit += 1;
433 if(Td[PA[*first] - 1] < v) {
434 first = ss_partition(PA, first, last, depth);
435 limit = ss_ilg(last - first);
436 }
437 depth += 1;
438 }
439 }
440 #undef STACK_SIZE
441 }
442
443 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
444
445
446 /*---------------------------------------------------------------------------*/
447
448 #if SS_BLOCKSIZE != 0
449
450 static INLINE
451 void
ss_blockswap(saidx_t * a,saidx_t * b,saidx_t n)452 ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) {
453 saidx_t t;
454 for(; 0 < n; --n, ++a, ++b) {
455 t = *a, *a = *b, *b = t;
456 }
457 }
458
459 static INLINE
460 void
ss_rotate(saidx_t * first,saidx_t * middle,saidx_t * last)461 ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) {
462 saidx_t *a, *b, t;
463 saidx_t l, r;
464 l = middle - first, r = last - middle;
465 for(; (0 < l) && (0 < r);) {
466 if(l == r) { ss_blockswap(first, middle, l); break; }
467 if(l < r) {
468 a = last - 1, b = middle - 1;
469 t = *a;
470 do {
471 *a-- = *b, *b-- = *a;
472 if(b < first) {
473 *a = t;
474 last = a;
475 if((r -= l + 1) <= l) { break; }
476 a -= 1, b = middle - 1;
477 t = *a;
478 }
479 } while(1);
480 } else {
481 a = first, b = middle;
482 t = *a;
483 do {
484 *a++ = *b, *b++ = *a;
485 if(last <= b) {
486 *a = t;
487 first = a + 1;
488 if((l -= r + 1) <= r) { break; }
489 a += 1, b = middle;
490 t = *a;
491 }
492 } while(1);
493 }
494 }
495 }
496
497
498 /*---------------------------------------------------------------------------*/
499
500 static
501 void
ss_inplacemerge(const sauchar_t * T,const saidx_t * PA,saidx_t * first,saidx_t * middle,saidx_t * last,saidx_t depth)502 ss_inplacemerge(const sauchar_t *T, const saidx_t *PA,
503 saidx_t *first, saidx_t *middle, saidx_t *last,
504 saidx_t depth) {
505 const saidx_t *p;
506 saidx_t *a, *b;
507 saidx_t len, half;
508 saint_t q, r;
509 saint_t x;
510
511 for(;;) {
512 if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
513 else { x = 0; p = PA + *(last - 1); }
514 for(a = first, len = middle - first, half = len >> 1, r = -1;
515 0 < len;
516 len = half, half >>= 1) {
517 b = a + half;
518 q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
519 if(q < 0) {
520 a = b + 1;
521 half -= (len & 1) ^ 1;
522 } else {
523 r = q;
524 }
525 }
526 if(a < middle) {
527 if(r == 0) { *a = ~*a; }
528 ss_rotate(a, middle, last);
529 last -= middle - a;
530 middle = a;
531 if(first == middle) { break; }
532 }
533 --last;
534 if(x != 0) { while(*--last < 0) { } }
535 if(middle == last) { break; }
536 }
537 }
538
539
540 /*---------------------------------------------------------------------------*/
541
542 /* Merge-forward with internal buffer. */
543 static
544 void
ss_mergeforward(const sauchar_t * T,const saidx_t * PA,saidx_t * first,saidx_t * middle,saidx_t * last,saidx_t * buf,saidx_t depth)545 ss_mergeforward(const sauchar_t *T, const saidx_t *PA,
546 saidx_t *first, saidx_t *middle, saidx_t *last,
547 saidx_t *buf, saidx_t depth) {
548 saidx_t *a, *b, *c, *bufend;
549 saidx_t t;
550 saint_t r;
551
552 bufend = buf + (middle - first) - 1;
553 ss_blockswap(buf, first, middle - first);
554
555 for(t = *(a = first), b = buf, c = middle;;) {
556 r = ss_compare(T, PA + *b, PA + *c, depth);
557 if(r < 0) {
558 do {
559 *a++ = *b;
560 if(bufend <= b) { *bufend = t; return; }
561 *b++ = *a;
562 } while(*b < 0);
563 } else if(r > 0) {
564 do {
565 *a++ = *c, *c++ = *a;
566 if(last <= c) {
567 while(b < bufend) { *a++ = *b, *b++ = *a; }
568 *a = *b, *b = t;
569 return;
570 }
571 } while(*c < 0);
572 } else {
573 *c = ~*c;
574 do {
575 *a++ = *b;
576 if(bufend <= b) { *bufend = t; return; }
577 *b++ = *a;
578 } while(*b < 0);
579
580 do {
581 *a++ = *c, *c++ = *a;
582 if(last <= c) {
583 while(b < bufend) { *a++ = *b, *b++ = *a; }
584 *a = *b, *b = t;
585 return;
586 }
587 } while(*c < 0);
588 }
589 }
590 }
591
592 /* Merge-backward with internal buffer. */
593 static
594 void
ss_mergebackward(const sauchar_t * T,const saidx_t * PA,saidx_t * first,saidx_t * middle,saidx_t * last,saidx_t * buf,saidx_t depth)595 ss_mergebackward(const sauchar_t *T, const saidx_t *PA,
596 saidx_t *first, saidx_t *middle, saidx_t *last,
597 saidx_t *buf, saidx_t depth) {
598 const saidx_t *p1, *p2;
599 saidx_t *a, *b, *c, *bufend;
600 saidx_t t;
601 saint_t r;
602 saint_t x;
603
604 bufend = buf + (last - middle) - 1;
605 ss_blockswap(buf, middle, last - middle);
606
607 x = 0;
608 if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
609 else { p1 = PA + *bufend; }
610 if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
611 else { p2 = PA + *(middle - 1); }
612 for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
613 r = ss_compare(T, p1, p2, depth);
614 if(0 < r) {
615 if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
616 *a-- = *b;
617 if(b <= buf) { *buf = t; break; }
618 *b-- = *a;
619 if(*b < 0) { p1 = PA + ~*b; x |= 1; }
620 else { p1 = PA + *b; }
621 } else if(r < 0) {
622 if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
623 *a-- = *c, *c-- = *a;
624 if(c < first) {
625 while(buf < b) { *a-- = *b, *b-- = *a; }
626 *a = *b, *b = t;
627 break;
628 }
629 if(*c < 0) { p2 = PA + ~*c; x |= 2; }
630 else { p2 = PA + *c; }
631 } else {
632 if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
633 *a-- = ~*b;
634 if(b <= buf) { *buf = t; break; }
635 *b-- = *a;
636 if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
637 *a-- = *c, *c-- = *a;
638 if(c < first) {
639 while(buf < b) { *a-- = *b, *b-- = *a; }
640 *a = *b, *b = t;
641 break;
642 }
643 if(*b < 0) { p1 = PA + ~*b; x |= 1; }
644 else { p1 = PA + *b; }
645 if(*c < 0) { p2 = PA + ~*c; x |= 2; }
646 else { p2 = PA + *c; }
647 }
648 }
649 }
650
651 /* D&C based merge. */
652 static
653 void
ss_swapmerge(const sauchar_t * T,const saidx_t * PA,saidx_t * first,saidx_t * middle,saidx_t * last,saidx_t * buf,saidx_t bufsize,saidx_t depth)654 ss_swapmerge(const sauchar_t *T, const saidx_t *PA,
655 saidx_t *first, saidx_t *middle, saidx_t *last,
656 saidx_t *buf, saidx_t bufsize, saidx_t depth) {
657 #define STACK_SIZE SS_SMERGE_STACKSIZE
658 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
659 #define MERGE_CHECK(a, b, c)\
660 do {\
661 if(((c) & 1) ||\
662 (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
663 *(a) = ~*(a);\
664 }\
665 if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
666 *(b) = ~*(b);\
667 }\
668 } while(0)
669 struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE];
670 saidx_t *l, *r, *lm, *rm;
671 saidx_t m, len, half;
672 saint_t ssize;
673 saint_t check, next;
674
675 for(check = 0, ssize = 0;;) {
676 if((last - middle) <= bufsize) {
677 if((first < middle) && (middle < last)) {
678 ss_mergebackward(T, PA, first, middle, last, buf, depth);
679 }
680 MERGE_CHECK(first, last, check);
681 STACK_POP(first, middle, last, check);
682 continue;
683 }
684
685 if((middle - first) <= bufsize) {
686 if(first < middle) {
687 ss_mergeforward(T, PA, first, middle, last, buf, depth);
688 }
689 MERGE_CHECK(first, last, check);
690 STACK_POP(first, middle, last, check);
691 continue;
692 }
693
694 for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
695 0 < len;
696 len = half, half >>= 1) {
697 if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
698 PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
699 m += half + 1;
700 half -= (len & 1) ^ 1;
701 }
702 }
703
704 if(0 < m) {
705 lm = middle - m, rm = middle + m;
706 ss_blockswap(lm, middle, m);
707 l = r = middle, next = 0;
708 if(rm < last) {
709 if(*rm < 0) {
710 *rm = ~*rm;
711 if(first < lm) { for(; *--l < 0;) { } next |= 4; }
712 next |= 1;
713 } else if(first < lm) {
714 for(; *r < 0; ++r) { }
715 next |= 2;
716 }
717 }
718
719 if((l - first) <= (last - r)) {
720 STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
721 middle = lm, last = l, check = (check & 3) | (next & 4);
722 } else {
723 if((next & 2) && (r == middle)) { next ^= 6; }
724 STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
725 first = r, middle = rm, check = (next & 3) | (check & 4);
726 }
727 } else {
728 if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
729 *middle = ~*middle;
730 }
731 MERGE_CHECK(first, last, check);
732 STACK_POP(first, middle, last, check);
733 }
734 }
735 #undef STACK_SIZE
736 }
737
738 #endif /* SS_BLOCKSIZE != 0 */
739
740
741 /*---------------------------------------------------------------------------*/
742
743 /*- Function -*/
744
745 /* Substring sort */
746 void
sssort(const sauchar_t * T,const saidx_t * PA,saidx_t * first,saidx_t * last,saidx_t * buf,saidx_t bufsize,saidx_t depth,saidx_t n,saint_t lastsuffix)747 sssort(const sauchar_t *T, const saidx_t *PA,
748 saidx_t *first, saidx_t *last,
749 saidx_t *buf, saidx_t bufsize,
750 saidx_t depth, saidx_t n, saint_t lastsuffix) {
751 saidx_t *a;
752 #if SS_BLOCKSIZE != 0
753 saidx_t *b, *middle, *curbuf;
754 saidx_t j, k, curbufsize, limit;
755 #endif
756 saidx_t i;
757
758 if(lastsuffix != 0) { ++first; }
759
760 #if SS_BLOCKSIZE == 0
761 ss_mintrosort(T, PA, first, last, depth);
762 #else
763 if((bufsize < SS_BLOCKSIZE) &&
764 (bufsize < (last - first)) &&
765 (bufsize < (limit = ss_isqrt(last - first)))) {
766 if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
767 buf = middle = last - limit, bufsize = limit;
768 } else {
769 middle = last, limit = 0;
770 }
771 for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
772 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
773 ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
774 #elif 1 < SS_BLOCKSIZE
775 ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
776 #endif
777 curbufsize = last - (a + SS_BLOCKSIZE);
778 curbuf = a + SS_BLOCKSIZE;
779 if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
780 for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
781 ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
782 }
783 }
784 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
785 ss_mintrosort(T, PA, a, middle, depth);
786 #elif 1 < SS_BLOCKSIZE
787 ss_insertionsort(T, PA, a, middle, depth);
788 #endif
789 for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
790 if(i & 1) {
791 ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
792 a -= k;
793 }
794 }
795 if(limit != 0) {
796 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
797 ss_mintrosort(T, PA, middle, last, depth);
798 #elif 1 < SS_BLOCKSIZE
799 ss_insertionsort(T, PA, middle, last, depth);
800 #endif
801 ss_inplacemerge(T, PA, first, middle, last, depth);
802 }
803 #endif
804
805 if(lastsuffix != 0) {
806 /* Insert last type B* suffix. */
807 saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
808 for(a = first, i = *(first - 1);
809 (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
810 ++a) {
811 *(a - 1) = *a;
812 }
813 *(a - 1) = i;
814 }
815 }
816