1 /*
2 *******************************************************************************
3 * Copyright (C) 2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: denseranges.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2010sep25
12 * created by: Markus W. Scherer
13 *
14 * Helper code for finding a small number of dense ranges.
15 */
16
17 #include "unicode/utypes.h"
18 #include "denseranges.h"
19
20 // Definitions in the anonymous namespace are invisible outside this file.
21 namespace {
22
23 /**
24 * Collect up to 15 range gaps and sort them by ascending gap size.
25 */
26 class LargestGaps {
27 public:
LargestGaps(int32_t max)28 LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}
29
add(int32_t gapStart,int64_t gapLength)30 void add(int32_t gapStart, int64_t gapLength) {
31 int32_t i=length;
32 while(i>0 && gapLength>gapLengths[i-1]) {
33 --i;
34 }
35 if(i<maxLength) {
36 // The new gap is now one of the maxLength largest.
37 // Insert the new gap, moving up smaller ones of the previous
38 // length largest.
39 int32_t j= length<maxLength ? length++ : maxLength-1;
40 while(j>i) {
41 gapStarts[j]=gapStarts[j-1];
42 gapLengths[j]=gapLengths[j-1];
43 --j;
44 }
45 gapStarts[i]=gapStart;
46 gapLengths[i]=gapLength;
47 }
48 }
49
truncate(int32_t newLength)50 void truncate(int32_t newLength) {
51 if(newLength<length) {
52 length=newLength;
53 }
54 }
55
count() const56 int32_t count() const { return length; }
gapStart(int32_t i) const57 int32_t gapStart(int32_t i) const { return gapStarts[i]; }
gapLength(int32_t i) const58 int64_t gapLength(int32_t i) const { return gapLengths[i]; }
59
firstAfter(int32_t value) const60 int32_t firstAfter(int32_t value) const {
61 if(length==0) {
62 return -1;
63 }
64 int32_t minValue=0;
65 int32_t minIndex=-1;
66 for(int32_t i=0; i<length; ++i) {
67 if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) {
68 minValue=gapStarts[i];
69 minIndex=i;
70 }
71 }
72 return minIndex;
73 }
74
75 private:
76 static const int32_t kCapacity=15;
77
78 int32_t maxLength;
79 int32_t length;
80 int32_t gapStarts[kCapacity];
81 int64_t gapLengths[kCapacity];
82 };
83
84 } // namespace
85
86 /**
87 * Does it make sense to write 1..capacity ranges?
88 * Returns 0 if not, otherwise the number of ranges.
89 * @param values Sorted array of signed-integer values.
90 * @param length Number of values.
91 * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
92 * Should be 0x80..0x100, must be 1..0x100.
93 * @param ranges Output ranges array.
94 * @param capacity Maximum number of ranges.
95 * @return Minimum number of ranges (at most capacity) that have the desired density,
96 * or 0 if that density cannot be achieved.
97 */
98 U_CAPI int32_t U_EXPORT2
uprv_makeDenseRanges(const int32_t values[],int32_t length,int32_t density,int32_t ranges[][2],int32_t capacity)99 uprv_makeDenseRanges(const int32_t values[], int32_t length,
100 int32_t density,
101 int32_t ranges[][2], int32_t capacity) {
102 if(length<=2) {
103 return 0;
104 }
105 int32_t minValue=values[0];
106 int32_t maxValue=values[length-1]; // Assume minValue<=maxValue.
107 // Use int64_t variables for intermediate-value precision and to avoid
108 // signed-int32_t overflow of maxValue-minValue.
109 int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
110 if(length>=(density*maxLength)/0x100) {
111 // Use one range.
112 ranges[0][0]=minValue;
113 ranges[0][1]=maxValue;
114 return 1;
115 }
116 if(length<=4) {
117 return 0;
118 }
119 // See if we can split [minValue, maxValue] into 2..capacity ranges,
120 // divided by the 1..(capacity-1) largest gaps.
121 LargestGaps gaps(capacity-1);
122 int32_t i;
123 int32_t expectedValue=minValue;
124 for(i=1; i<length; ++i) {
125 ++expectedValue;
126 int32_t actualValue=values[i];
127 if(expectedValue!=actualValue) {
128 gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
129 expectedValue=actualValue;
130 }
131 }
132 // We know gaps.count()>=1 because we have fewer values (length) than
133 // the length of the [minValue..maxValue] range (maxLength).
134 // (Otherwise we would have returned with the one range above.)
135 int32_t num;
136 for(i=0, num=2;; ++i, ++num) {
137 if(i>=gaps.count()) {
138 // The values are too sparse for capacity or fewer ranges
139 // of the requested density.
140 return 0;
141 }
142 maxLength-=gaps.gapLength(i);
143 if(length>num*2 && length>=(density*maxLength)/0x100) {
144 break;
145 }
146 }
147 // Use the num ranges with the num-1 largest gaps.
148 gaps.truncate(num-1);
149 ranges[0][0]=minValue;
150 for(i=0; i<=num-2; ++i) {
151 int32_t gapIndex=gaps.firstAfter(minValue);
152 int32_t gapStart=gaps.gapStart(gapIndex);
153 ranges[i][1]=gapStart-1;
154 ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
155 }
156 ranges[num-1][1]=maxValue;
157 return num;
158 }
159