• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "frequent.h"
2 #include <assert.h>
3 #include <stdlib.h>
4 
5 // misra-gries
6 // http://www2.research.att.com/~marioh/papers/vldb08-2.pdf
7 
8 struct _FREQUENT {
9   int size,czero;
10   char sorted;
11   struct { intptr_t key; int count,zero; } pair[];
12 };
13 
14 // size is the precision/return size: in sequence with n _add(), it will find at most >size elements with occurence > n/(size+1) times
frequent_new(int size)15 FREQUENT *frequent_new(int size) // {{{ - just free() it
16 {
17   assert(size>0);
18   FREQUENT *ret=malloc(sizeof(ret[0])+sizeof(ret->pair[0])*size);
19   if (!ret) {
20     return NULL;
21   }
22   ret->size=size;
23   ret->czero=0;
24   ret->sorted=1;
25   int iA;
26   for (iA=0;iA<size;iA++) {
27     ret->pair[iA].key=INTPTR_MIN;
28     ret->pair[iA].count=0;
29     ret->pair[iA].zero=0;
30   }
31 
32   return ret;
33 }
34 // }}}
35 
frequent_add(FREQUENT * freq,intptr_t key)36 void frequent_add(FREQUENT *freq,intptr_t key) // {{{
37 {
38   assert(freq);
39   int iA,zero=-1;
40   for (iA=freq->size-1;iA>=0;iA--) {
41     if (freq->pair[iA].key==key) {
42       freq->pair[iA].count++;
43       freq->sorted=0;
44       return;
45     } else if (freq->pair[iA].count==freq->czero) {
46       zero=iA;
47     }
48   }
49   if (zero>=0) { // insert into set
50     freq->pair[zero].key=key;
51     freq->pair[zero].count++; // i.e. czero+1
52     freq->pair[zero].zero=freq->czero;
53     // if it was sorted, the free entries are at the end. zero points to the first free entry, because of the loop direction
54   } else { // out-of-set count
55     freq->czero++;
56   }
57 }
58 // }}}
59 
frequent_cmp(const void * a,const void * b)60 static int frequent_cmp(const void *a,const void *b) // {{{
61 {
62   const typeof(((FREQUENT *)0)->pair[0]) *aa=a;
63   const typeof(((FREQUENT *)0)->pair[0]) *bb=b;
64   return (bb->count-bb->zero)-(aa->count-aa->zero);
65 }
66 // }}}
67 
68 // true frequency is somewhere between (count-zero) and count
frequent_get(FREQUENT * freq,int pos)69 intptr_t frequent_get(FREQUENT *freq,int pos) // {{{
70 {
71   assert(freq);
72   if (!freq->sorted) {
73     // sort by (count-zero)
74     qsort(freq->pair,freq->size,sizeof(freq->pair[0]),frequent_cmp);
75     freq->sorted=1;
76   }
77   if ( (pos<0)||(pos>=freq->size) ) {
78     return INTPTR_MIN;
79   }
80   return freq->pair[pos].key;
81 }
82 // }}}
83 
84