1 #include "frequent.h"
2 #include <assert.h>
3 #include <stdlib.h>
4
5 // misra-gries
6 // http://www2.research.att.com/~marioh/papers/vldb08-2.pdf
7
8 struct _FREQUENT {
9 int size,czero;
10 char sorted;
11 struct { intptr_t key; int count,zero; } pair[];
12 };
13
14 // size is the precision/return size: in sequence with n _add(), it will find at most >size elements with occurence > n/(size+1) times
frequent_new(int size)15 FREQUENT *frequent_new(int size) // {{{ - just free() it
16 {
17 assert(size>0);
18 FREQUENT *ret=malloc(sizeof(ret[0])+sizeof(ret->pair[0])*size);
19 if (!ret) {
20 return NULL;
21 }
22 ret->size=size;
23 ret->czero=0;
24 ret->sorted=1;
25 int iA;
26 for (iA=0;iA<size;iA++) {
27 ret->pair[iA].key=INTPTR_MIN;
28 ret->pair[iA].count=0;
29 ret->pair[iA].zero=0;
30 }
31
32 return ret;
33 }
34 // }}}
35
frequent_add(FREQUENT * freq,intptr_t key)36 void frequent_add(FREQUENT *freq,intptr_t key) // {{{
37 {
38 assert(freq);
39 int iA,zero=-1;
40 for (iA=freq->size-1;iA>=0;iA--) {
41 if (freq->pair[iA].key==key) {
42 freq->pair[iA].count++;
43 freq->sorted=0;
44 return;
45 } else if (freq->pair[iA].count==freq->czero) {
46 zero=iA;
47 }
48 }
49 if (zero>=0) { // insert into set
50 freq->pair[zero].key=key;
51 freq->pair[zero].count++; // i.e. czero+1
52 freq->pair[zero].zero=freq->czero;
53 // if it was sorted, the free entries are at the end. zero points to the first free entry, because of the loop direction
54 } else { // out-of-set count
55 freq->czero++;
56 }
57 }
58 // }}}
59
frequent_cmp(const void * a,const void * b)60 static int frequent_cmp(const void *a,const void *b) // {{{
61 {
62 const typeof(((FREQUENT *)0)->pair[0]) *aa=a;
63 const typeof(((FREQUENT *)0)->pair[0]) *bb=b;
64 return (bb->count-bb->zero)-(aa->count-aa->zero);
65 }
66 // }}}
67
68 // true frequency is somewhere between (count-zero) and count
frequent_get(FREQUENT * freq,int pos)69 intptr_t frequent_get(FREQUENT *freq,int pos) // {{{
70 {
71 assert(freq);
72 if (!freq->sorted) {
73 // sort by (count-zero)
74 qsort(freq->pair,freq->size,sizeof(freq->pair[0]),frequent_cmp);
75 freq->sorted=1;
76 }
77 if ( (pos<0)||(pos>=freq->size) ) {
78 return INTPTR_MIN;
79 }
80 return freq->pair[pos].key;
81 }
82 // }}}
83
84