• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ** 2013-05-28
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file contains code to implement the percentile(Y,P) SQL function
14 ** as described below:
15 **
16 **   (1)  The percentile(Y,P) function is an aggregate function taking
17 **        exactly two arguments.
18 **
19 **   (2)  If the P argument to percentile(Y,P) is not the same for every
20 **        row in the aggregate then an error is thrown.  The word "same"
21 **        in the previous sentence means that the value differ by less
22 **        than 0.001.
23 **
24 **   (3)  If the P argument to percentile(Y,P) evaluates to anything other
25 **        than a number in the range of 0.0 to 100.0 inclusive then an
26 **        error is thrown.
27 **
28 **   (4)  If any Y argument to percentile(Y,P) evaluates to a value that
29 **        is not NULL and is not numeric then an error is thrown.
30 **
31 **   (5)  If any Y argument to percentile(Y,P) evaluates to plus or minus
32 **        infinity then an error is thrown.  (SQLite always interprets NaN
33 **        values as NULL.)
34 **
35 **   (6)  Both Y and P in percentile(Y,P) can be arbitrary expressions,
36 **        including CASE WHEN expressions.
37 **
38 **   (7)  The percentile(Y,P) aggregate is able to handle inputs of at least
39 **        one million (1,000,000) rows.
40 **
41 **   (8)  If there are no non-NULL values for Y, then percentile(Y,P)
42 **        returns NULL.
43 **
44 **   (9)  If there is exactly one non-NULL value for Y, the percentile(Y,P)
45 **        returns the one Y value.
46 **
47 **  (10)  If there N non-NULL values of Y where N is two or more and
48 **        the Y values are ordered from least to greatest and a graph is
49 **        drawn from 0 to N-1 such that the height of the graph at J is
50 **        the J-th Y value and such that straight lines are drawn between
51 **        adjacent Y values, then the percentile(Y,P) function returns
52 **        the height of the graph at P*(N-1)/100.
53 **
54 **  (11)  The percentile(Y,P) function always returns either a floating
55 **        point number or NULL.
56 **
57 **  (12)  The percentile(Y,P) is implemented as a single C99 source-code
58 **        file that compiles into a shared-library or DLL that can be loaded
59 **        into SQLite using the sqlite3_load_extension() interface.
60 */
61 #include "sqlite3ext.h"
62 SQLITE_EXTENSION_INIT1
63 #include <assert.h>
64 #include <string.h>
65 #include <stdlib.h>
66 
67 /* The following object is the session context for a single percentile()
68 ** function.  We have to remember all input Y values until the very end.
69 ** Those values are accumulated in the Percentile.a[] array.
70 */
71 typedef struct Percentile Percentile;
72 struct Percentile {
73   unsigned nAlloc;     /* Number of slots allocated for a[] */
74   unsigned nUsed;      /* Number of slots actually used in a[] */
75   double rPct;         /* 1.0 more than the value for P */
76   double *a;           /* Array of Y values */
77 };
78 
79 /*
80 ** Return TRUE if the input floating-point number is an infinity.
81 */
isInfinity(double r)82 static int isInfinity(double r){
83   sqlite3_uint64 u;
84   assert( sizeof(u)==sizeof(r) );
85   memcpy(&u, &r, sizeof(u));
86   return ((u>>52)&0x7ff)==0x7ff;
87 }
88 
89 /*
90 ** Return TRUE if two doubles differ by 0.001 or less
91 */
sameValue(double a,double b)92 static int sameValue(double a, double b){
93   a -= b;
94   return a>=-0.001 && a<=0.001;
95 }
96 
97 /*
98 ** The "step" function for percentile(Y,P) is called once for each
99 ** input row.
100 */
percentStep(sqlite3_context * pCtx,int argc,sqlite3_value ** argv)101 static void percentStep(sqlite3_context *pCtx, int argc, sqlite3_value **argv){
102   Percentile *p;
103   double rPct;
104   int eType;
105   double y;
106   assert( argc==2 );
107 
108   /* Requirement 3:  P must be a number between 0 and 100 */
109   eType = sqlite3_value_numeric_type(argv[1]);
110   rPct = sqlite3_value_double(argv[1]);
111   if( (eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT)
112    || rPct<0.0 || rPct>100.0 ){
113     sqlite3_result_error(pCtx, "2nd argument to percentile() is not "
114                          "a number between 0.0 and 100.0", -1);
115     return;
116   }
117 
118   /* Allocate the session context. */
119   p = (Percentile*)sqlite3_aggregate_context(pCtx, sizeof(*p));
120   if( p==0 ) return;
121 
122   /* Remember the P value.  Throw an error if the P value is different
123   ** from any prior row, per Requirement (2). */
124   if( p->rPct==0.0 ){
125     p->rPct = rPct+1.0;
126   }else if( !sameValue(p->rPct,rPct+1.0) ){
127     sqlite3_result_error(pCtx, "2nd argument to percentile() is not the "
128                                "same for all input rows", -1);
129     return;
130   }
131 
132   /* Ignore rows for which Y is NULL */
133   eType = sqlite3_value_type(argv[0]);
134   if( eType==SQLITE_NULL ) return;
135 
136   /* If not NULL, then Y must be numeric.  Otherwise throw an error.
137   ** Requirement 4 */
138   if( eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT ){
139     sqlite3_result_error(pCtx, "1st argument to percentile() is not "
140                                "numeric", -1);
141     return;
142   }
143 
144   /* Throw an error if the Y value is infinity or NaN */
145   y = sqlite3_value_double(argv[0]);
146   if( isInfinity(y) ){
147     sqlite3_result_error(pCtx, "Inf input to percentile()", -1);
148     return;
149   }
150 
151   /* Allocate and store the Y */
152   if( p->nUsed>=p->nAlloc ){
153     unsigned n = p->nAlloc*2 + 250;
154     double *a = sqlite3_realloc64(p->a, sizeof(double)*n);
155     if( a==0 ){
156       sqlite3_free(p->a);
157       memset(p, 0, sizeof(*p));
158       sqlite3_result_error_nomem(pCtx);
159       return;
160     }
161     p->nAlloc = n;
162     p->a = a;
163   }
164   p->a[p->nUsed++] = y;
165 }
166 
167 /*
168 ** Compare to doubles for sorting using qsort()
169 */
doubleCmp(const void * pA,const void * pB)170 static int SQLITE_CDECL doubleCmp(const void *pA, const void *pB){
171   double a = *(double*)pA;
172   double b = *(double*)pB;
173   if( a==b ) return 0;
174   if( a<b ) return -1;
175   return +1;
176 }
177 
178 /*
179 ** Called to compute the final output of percentile() and to clean
180 ** up all allocated memory.
181 */
percentFinal(sqlite3_context * pCtx)182 static void percentFinal(sqlite3_context *pCtx){
183   Percentile *p;
184   unsigned i1, i2;
185   double v1, v2;
186   double ix, vx;
187   p = (Percentile*)sqlite3_aggregate_context(pCtx, 0);
188   if( p==0 ) return;
189   if( p->a==0 ) return;
190   if( p->nUsed ){
191     qsort(p->a, p->nUsed, sizeof(double), doubleCmp);
192     ix = (p->rPct-1.0)*(p->nUsed-1)*0.01;
193     i1 = (unsigned)ix;
194     i2 = ix==(double)i1 || i1==p->nUsed-1 ? i1 : i1+1;
195     v1 = p->a[i1];
196     v2 = p->a[i2];
197     vx = v1 + (v2-v1)*(ix-i1);
198     sqlite3_result_double(pCtx, vx);
199   }
200   sqlite3_free(p->a);
201   memset(p, 0, sizeof(*p));
202 }
203 
204 
205 #ifdef _WIN32
206 __declspec(dllexport)
207 #endif
sqlite3_percentile_init(sqlite3 * db,char ** pzErrMsg,const sqlite3_api_routines * pApi)208 int sqlite3_percentile_init(
209   sqlite3 *db,
210   char **pzErrMsg,
211   const sqlite3_api_routines *pApi
212 ){
213   int rc = SQLITE_OK;
214   SQLITE_EXTENSION_INIT2(pApi);
215   (void)pzErrMsg;  /* Unused parameter */
216   rc = sqlite3_create_function(db, "percentile", 2,
217                                SQLITE_UTF8|SQLITE_INNOCUOUS, 0,
218                                0, percentStep, percentFinal);
219   return rc;
220 }
221