1 /* libxml2 - Library for parsing XML documents
2 * Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 *
4 * This file is not part of the GNU gettext program, but is used with
5 * GNU gettext.
6 *
7 * The original copyright notice is as follows:
8 */
9
10 /*
11 * Copyright (C) 1998-2012 Daniel Veillard. All Rights Reserved.
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this software and associated documentation files (the "Software"), to deal
15 * in the Software without restriction, including without limitation the rights
16 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 * copies of the Software, and to permit persons to whom the Software is fur-
18 * nished to do so, subject to the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
25 * NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 * THE SOFTWARE.
30 */
31
32 /*
33 * The MIT License (MIT)
34 *
35 * Copyright (c) 2010-2017 Christopher Swenson.
36 * Copyright (c) 2012 Vojtech Fried.
37 * Copyright (c) 2012 Google Inc. All Rights Reserved.
38 *
39 * Permission is hereby granted, free of charge, to any person obtaining a
40 * copy of this software and associated documentation files (the "Software"),
41 * to deal in the Software without restriction, including without limitation
42 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
43 * and/or sell copies of the Software, and to permit persons to whom the
44 * Software is furnished to do so, subject to the following conditions:
45 *
46 * The above copyright notice and this permission notice shall be included in
47 * all copies or substantial portions of the Software.
48 *
49 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
50 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
52 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
53 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
54 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
55 * DEALINGS IN THE SOFTWARE.
56 */
57
58 /*
59 * Taken from https://github.com/swenson/sort
60 * Revision: 05fd77bfec049ce8b7c408c4d3dd2d51ee061a15
61 * Removed all code unrelated to Timsort and made minor adjustments for
62 * cross-platform compatibility.
63 */
64
65 #include <stdlib.h>
66 #include <stdio.h>
67 #include <string.h>
68 #ifdef HAVE_STDINT_H
69 #include <stdint.h>
70 #elif defined(_WIN32)
71 typedef unsigned __int64 uint64_t;
72 #endif
73
74 #ifndef SORT_NAME
75 #error "Must declare SORT_NAME"
76 #endif
77
78 #ifndef SORT_TYPE
79 #error "Must declare SORT_TYPE"
80 #endif
81
82 #ifndef SORT_CMP
83 #define SORT_CMP(x, y) ((x) < (y) ? -1 : ((x) == (y) ? 0 : 1))
84 #endif
85
86 #ifndef TIM_SORT_STACK_SIZE
87 #define TIM_SORT_STACK_SIZE 128
88 #endif
89
90 #define SORT_SWAP(x,y) {SORT_TYPE __SORT_SWAP_t = (x); (x) = (y); (y) = __SORT_SWAP_t;}
91
92
93 /* Common, type-agnosting functions and constants that we don't want to declare twice. */
94 #ifndef SORT_COMMON_H
95 #define SORT_COMMON_H
96
97 #ifndef MAX
98 #define MAX(x,y) (((x) > (y) ? (x) : (y)))
99 #endif
100
101 #ifndef MIN
102 #define MIN(x,y) (((x) < (y) ? (x) : (y)))
103 #endif
104
105 static int compute_minrun(const uint64_t);
106
107 #ifndef CLZ
108 #if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
109 #define CLZ __builtin_clzll
110 #else
111
112 static int clzll(uint64_t);
113
114 /* adapted from Hacker's Delight */
clzll(uint64_t x)115 static int clzll(uint64_t x) {
116 int n;
117
118 if (x == 0) {
119 return 64;
120 }
121
122 n = 0;
123
124 if (x <= 0x00000000FFFFFFFFL) {
125 n = n + 32;
126 x = x << 32;
127 }
128
129 if (x <= 0x0000FFFFFFFFFFFFL) {
130 n = n + 16;
131 x = x << 16;
132 }
133
134 if (x <= 0x00FFFFFFFFFFFFFFL) {
135 n = n + 8;
136 x = x << 8;
137 }
138
139 if (x <= 0x0FFFFFFFFFFFFFFFL) {
140 n = n + 4;
141 x = x << 4;
142 }
143
144 if (x <= 0x3FFFFFFFFFFFFFFFL) {
145 n = n + 2;
146 x = x << 2;
147 }
148
149 if (x <= 0x7FFFFFFFFFFFFFFFL) {
150 n = n + 1;
151 }
152
153 return n;
154 }
155
156 #define CLZ clzll
157 #endif
158 #endif
159
compute_minrun(const uint64_t size)160 static __inline int compute_minrun(const uint64_t size) {
161 const int top_bit = 64 - CLZ(size);
162 const int shift = MAX(top_bit, 6) - 6;
163 const int minrun = size >> shift;
164 const uint64_t mask = (1ULL << shift) - 1;
165
166 if (mask & size) {
167 return minrun + 1;
168 }
169
170 return minrun;
171 }
172
173 #endif /* SORT_COMMON_H */
174
175 #define SORT_CONCAT(x, y) x ## _ ## y
176 #define SORT_MAKE_STR1(x, y) SORT_CONCAT(x,y)
177 #define SORT_MAKE_STR(x) SORT_MAKE_STR1(SORT_NAME,x)
178
179 #define BINARY_INSERTION_FIND SORT_MAKE_STR(binary_insertion_find)
180 #define BINARY_INSERTION_SORT_START SORT_MAKE_STR(binary_insertion_sort_start)
181 #define BINARY_INSERTION_SORT SORT_MAKE_STR(binary_insertion_sort)
182 #define REVERSE_ELEMENTS SORT_MAKE_STR(reverse_elements)
183 #define COUNT_RUN SORT_MAKE_STR(count_run)
184 #define CHECK_INVARIANT SORT_MAKE_STR(check_invariant)
185 #define TIM_SORT SORT_MAKE_STR(tim_sort)
186 #define TIM_SORT_RESIZE SORT_MAKE_STR(tim_sort_resize)
187 #define TIM_SORT_MERGE SORT_MAKE_STR(tim_sort_merge)
188 #define TIM_SORT_COLLAPSE SORT_MAKE_STR(tim_sort_collapse)
189
190 #ifndef MAX
191 #define MAX(x,y) (((x) > (y) ? (x) : (y)))
192 #endif
193 #ifndef MIN
194 #define MIN(x,y) (((x) < (y) ? (x) : (y)))
195 #endif
196
197 typedef struct {
198 size_t start;
199 size_t length;
200 } TIM_SORT_RUN_T;
201
202
203 void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size);
204 void TIM_SORT(SORT_TYPE *dst, const size_t size);
205
206
207 /* Function used to do a binary search for binary insertion sort */
BINARY_INSERTION_FIND(SORT_TYPE * dst,const SORT_TYPE x,const size_t size)208 static __inline size_t BINARY_INSERTION_FIND(SORT_TYPE *dst, const SORT_TYPE x,
209 const size_t size) {
210 size_t l, c, r;
211 SORT_TYPE cx;
212 l = 0;
213 r = size - 1;
214 c = r >> 1;
215
216 /* check for out of bounds at the beginning. */
217 if (SORT_CMP(x, dst[0]) < 0) {
218 return 0;
219 } else if (SORT_CMP(x, dst[r]) > 0) {
220 return r;
221 }
222
223 cx = dst[c];
224
225 while (1) {
226 const int val = SORT_CMP(x, cx);
227
228 if (val < 0) {
229 if (c - l <= 1) {
230 return c;
231 }
232
233 r = c;
234 } else { /* allow = for stability. The binary search favors the right. */
235 if (r - c <= 1) {
236 return c + 1;
237 }
238
239 l = c;
240 }
241
242 c = l + ((r - l) >> 1);
243 cx = dst[c];
244 }
245 }
246
247 /* Binary insertion sort, but knowing that the first "start" entries are sorted. Used in timsort. */
BINARY_INSERTION_SORT_START(SORT_TYPE * dst,const size_t start,const size_t size)248 static void BINARY_INSERTION_SORT_START(SORT_TYPE *dst, const size_t start, const size_t size) {
249 size_t i;
250
251 for (i = start; i < size; i++) {
252 size_t j;
253 SORT_TYPE x;
254 size_t location;
255
256 /* If this entry is already correct, just move along */
257 if (SORT_CMP(dst[i - 1], dst[i]) <= 0) {
258 continue;
259 }
260
261 /* Else we need to find the right place, shift everything over, and squeeze in */
262 x = dst[i];
263 location = BINARY_INSERTION_FIND(dst, x, i);
264
265 for (j = i - 1; j >= location; j--) {
266 dst[j + 1] = dst[j];
267
268 if (j == 0) { /* check edge case because j is unsigned */
269 break;
270 }
271 }
272
273 dst[location] = x;
274 }
275 }
276
277 /* Binary insertion sort */
BINARY_INSERTION_SORT(SORT_TYPE * dst,const size_t size)278 void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size) {
279 /* don't bother sorting an array of size <= 1 */
280 if (size <= 1) {
281 return;
282 }
283
284 BINARY_INSERTION_SORT_START(dst, 1, size);
285 }
286
287 /* timsort implementation, based on timsort.txt */
288
REVERSE_ELEMENTS(SORT_TYPE * dst,size_t start,size_t end)289 static __inline void REVERSE_ELEMENTS(SORT_TYPE *dst, size_t start, size_t end) {
290 while (1) {
291 if (start >= end) {
292 return;
293 }
294
295 SORT_SWAP(dst[start], dst[end]);
296 start++;
297 end--;
298 }
299 }
300
COUNT_RUN(SORT_TYPE * dst,const size_t start,const size_t size)301 static size_t COUNT_RUN(SORT_TYPE *dst, const size_t start, const size_t size) {
302 size_t curr;
303
304 if (size - start == 1) {
305 return 1;
306 }
307
308 if (start >= size - 2) {
309 if (SORT_CMP(dst[size - 2], dst[size - 1]) > 0) {
310 SORT_SWAP(dst[size - 2], dst[size - 1]);
311 }
312
313 return 2;
314 }
315
316 curr = start + 2;
317
318 if (SORT_CMP(dst[start], dst[start + 1]) <= 0) {
319 /* increasing run */
320 while (1) {
321 if (curr == size - 1) {
322 break;
323 }
324
325 if (SORT_CMP(dst[curr - 1], dst[curr]) > 0) {
326 break;
327 }
328
329 curr++;
330 }
331
332 return curr - start;
333 } else {
334 /* decreasing run */
335 while (1) {
336 if (curr == size - 1) {
337 break;
338 }
339
340 if (SORT_CMP(dst[curr - 1], dst[curr]) <= 0) {
341 break;
342 }
343
344 curr++;
345 }
346
347 /* reverse in-place */
348 REVERSE_ELEMENTS(dst, start, curr - 1);
349 return curr - start;
350 }
351 }
352
CHECK_INVARIANT(TIM_SORT_RUN_T * stack,const int stack_curr)353 static int CHECK_INVARIANT(TIM_SORT_RUN_T *stack, const int stack_curr) {
354 size_t A, B, C;
355
356 if (stack_curr < 2) {
357 return 1;
358 }
359
360 if (stack_curr == 2) {
361 const size_t A1 = stack[stack_curr - 2].length;
362 const size_t B1 = stack[stack_curr - 1].length;
363
364 if (A1 <= B1) {
365 return 0;
366 }
367
368 return 1;
369 }
370
371 A = stack[stack_curr - 3].length;
372 B = stack[stack_curr - 2].length;
373 C = stack[stack_curr - 1].length;
374
375 if ((A <= B + C) || (B <= C)) {
376 return 0;
377 }
378
379 return 1;
380 }
381
382 typedef struct {
383 size_t alloc;
384 SORT_TYPE *storage;
385 } TEMP_STORAGE_T;
386
TIM_SORT_RESIZE(TEMP_STORAGE_T * store,const size_t new_size)387 static void TIM_SORT_RESIZE(TEMP_STORAGE_T *store, const size_t new_size) {
388 if (store->alloc < new_size) {
389 SORT_TYPE *tempstore = (SORT_TYPE *)realloc(store->storage, new_size * sizeof(SORT_TYPE));
390
391 if (tempstore == NULL) {
392 fprintf(stderr, "Error allocating temporary storage for tim sort: need %lu bytes",
393 (unsigned long)(sizeof(SORT_TYPE) * new_size));
394 exit(1);
395 }
396
397 store->storage = tempstore;
398 store->alloc = new_size;
399 }
400 }
401
TIM_SORT_MERGE(SORT_TYPE * dst,const TIM_SORT_RUN_T * stack,const int stack_curr,TEMP_STORAGE_T * store)402 static void TIM_SORT_MERGE(SORT_TYPE *dst, const TIM_SORT_RUN_T *stack, const int stack_curr,
403 TEMP_STORAGE_T *store) {
404 const size_t A = stack[stack_curr - 2].length;
405 const size_t B = stack[stack_curr - 1].length;
406 const size_t curr = stack[stack_curr - 2].start;
407 SORT_TYPE *storage;
408 size_t i, j, k;
409 TIM_SORT_RESIZE(store, MIN(A, B));
410 storage = store->storage;
411
412 /* left merge */
413 if (A < B) {
414 memcpy(storage, &dst[curr], A * sizeof(SORT_TYPE));
415 i = 0;
416 j = curr + A;
417
418 for (k = curr; k < curr + A + B; k++) {
419 if ((i < A) && (j < curr + A + B)) {
420 if (SORT_CMP(storage[i], dst[j]) <= 0) {
421 dst[k] = storage[i++];
422 } else {
423 dst[k] = dst[j++];
424 }
425 } else if (i < A) {
426 dst[k] = storage[i++];
427 } else {
428 break;
429 }
430 }
431 } else {
432 /* right merge */
433 memcpy(storage, &dst[curr + A], B * sizeof(SORT_TYPE));
434 i = B;
435 j = curr + A;
436 k = curr + A + B;
437
438 while (k-- > curr) {
439 if ((i > 0) && (j > curr)) {
440 if (SORT_CMP(dst[j - 1], storage[i - 1]) > 0) {
441 dst[k] = dst[--j];
442 } else {
443 dst[k] = storage[--i];
444 }
445 } else if (i > 0) {
446 dst[k] = storage[--i];
447 } else {
448 break;
449 }
450 }
451 }
452 }
453
TIM_SORT_COLLAPSE(SORT_TYPE * dst,TIM_SORT_RUN_T * stack,int stack_curr,TEMP_STORAGE_T * store,const size_t size)454 static int TIM_SORT_COLLAPSE(SORT_TYPE *dst, TIM_SORT_RUN_T *stack, int stack_curr,
455 TEMP_STORAGE_T *store, const size_t size) {
456 while (1) {
457 size_t A, B, C, D;
458 int ABC, BCD, CD;
459
460 /* if the stack only has one thing on it, we are done with the collapse */
461 if (stack_curr <= 1) {
462 break;
463 }
464
465 /* if this is the last merge, just do it */
466 if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) {
467 TIM_SORT_MERGE(dst, stack, stack_curr, store);
468 stack[0].length += stack[1].length;
469 stack_curr--;
470 break;
471 }
472 /* check if the invariant is off for a stack of 2 elements */
473 else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) {
474 TIM_SORT_MERGE(dst, stack, stack_curr, store);
475 stack[0].length += stack[1].length;
476 stack_curr--;
477 break;
478 } else if (stack_curr == 2) {
479 break;
480 }
481
482 B = stack[stack_curr - 3].length;
483 C = stack[stack_curr - 2].length;
484 D = stack[stack_curr - 1].length;
485
486 if (stack_curr >= 4) {
487 A = stack[stack_curr - 4].length;
488 ABC = (A <= B + C);
489 } else {
490 ABC = 0;
491 }
492
493 BCD = (B <= C + D) || ABC;
494 CD = (C <= D);
495
496 /* Both invariants are good */
497 if (!BCD && !CD) {
498 break;
499 }
500
501 /* left merge */
502 if (BCD && !CD) {
503 TIM_SORT_MERGE(dst, stack, stack_curr - 1, store);
504 stack[stack_curr - 3].length += stack[stack_curr - 2].length;
505 stack[stack_curr - 2] = stack[stack_curr - 1];
506 stack_curr--;
507 } else {
508 /* right merge */
509 TIM_SORT_MERGE(dst, stack, stack_curr, store);
510 stack[stack_curr - 2].length += stack[stack_curr - 1].length;
511 stack_curr--;
512 }
513 }
514
515 return stack_curr;
516 }
517
PUSH_NEXT(SORT_TYPE * dst,const size_t size,TEMP_STORAGE_T * store,const size_t minrun,TIM_SORT_RUN_T * run_stack,size_t * stack_curr,size_t * curr)518 static __inline int PUSH_NEXT(SORT_TYPE *dst,
519 const size_t size,
520 TEMP_STORAGE_T *store,
521 const size_t minrun,
522 TIM_SORT_RUN_T *run_stack,
523 size_t *stack_curr,
524 size_t *curr) {
525 size_t len = COUNT_RUN(dst, *curr, size);
526 size_t run = minrun;
527
528 if (run > size - *curr) {
529 run = size - *curr;
530 }
531
532 if (run > len) {
533 BINARY_INSERTION_SORT_START(&dst[*curr], len, run);
534 len = run;
535 }
536
537 run_stack[*stack_curr].start = *curr;
538 run_stack[*stack_curr].length = len;
539 (*stack_curr)++;
540 *curr += len;
541
542 if (*curr == size) {
543 /* finish up */
544 while (*stack_curr > 1) {
545 TIM_SORT_MERGE(dst, run_stack, *stack_curr, store);
546 run_stack[*stack_curr - 2].length += run_stack[*stack_curr - 1].length;
547 (*stack_curr)--;
548 }
549
550 if (store->storage != NULL) {
551 free(store->storage);
552 store->storage = NULL;
553 }
554
555 return 0;
556 }
557
558 return 1;
559 }
560
TIM_SORT(SORT_TYPE * dst,const size_t size)561 void TIM_SORT(SORT_TYPE *dst, const size_t size) {
562 size_t minrun;
563 TEMP_STORAGE_T _store, *store;
564 TIM_SORT_RUN_T run_stack[TIM_SORT_STACK_SIZE];
565 size_t stack_curr = 0;
566 size_t curr = 0;
567
568 /* don't bother sorting an array of size 1 */
569 if (size <= 1) {
570 return;
571 }
572
573 if (size < 64) {
574 BINARY_INSERTION_SORT(dst, size);
575 return;
576 }
577
578 /* compute the minimum run length */
579 minrun = compute_minrun(size);
580 /* temporary storage for merges */
581 store = &_store;
582 store->alloc = 0;
583 store->storage = NULL;
584
585 if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
586 return;
587 }
588
589 if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
590 return;
591 }
592
593 if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
594 return;
595 }
596
597 while (1) {
598 if (!CHECK_INVARIANT(run_stack, stack_curr)) {
599 stack_curr = TIM_SORT_COLLAPSE(dst, run_stack, stack_curr, store, size);
600 continue;
601 }
602
603 if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) {
604 return;
605 }
606 }
607 }
608
609 #undef SORT_CONCAT
610 #undef SORT_MAKE_STR1
611 #undef SORT_MAKE_STR
612 #undef SORT_NAME
613 #undef SORT_TYPE
614 #undef SORT_CMP
615 #undef TEMP_STORAGE_T
616 #undef TIM_SORT_RUN_T
617 #undef PUSH_NEXT
618 #undef SORT_SWAP
619 #undef SORT_CONCAT
620 #undef SORT_MAKE_STR1
621 #undef SORT_MAKE_STR
622 #undef BINARY_INSERTION_FIND
623 #undef BINARY_INSERTION_SORT_START
624 #undef BINARY_INSERTION_SORT
625 #undef REVERSE_ELEMENTS
626 #undef COUNT_RUN
627 #undef TIM_SORT
628 #undef TIM_SORT_RESIZE
629 #undef TIM_SORT_COLLAPSE
630 #undef TIM_SORT_RUN_T
631 #undef TEMP_STORAGE_T
632