• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  **	Filename:	cluster.h
3  **	Purpose:	Definition of feature space clustering routines
4  **	Author:		Dan Johnson
5  **	History:	5/29/89, DSJ, Created.
6  **
7  **	(c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 #ifndef   CLUSTER_H
19 #define   CLUSTER_H
20 
21 #include "kdtree.h"
22 #include "oldlist.h"
23 
24 /*----------------------------------------------------------------------
25           Types
26 ----------------------------------------------------------------------*/
27 typedef struct sample
28 {
29   unsigned Clustered:1;          // TRUE if included in a higher cluster
30   unsigned Prototype:1;          // TRUE if cluster represented by a proto
31   unsigned SampleCount:30;       // number of samples in this cluster
32   struct sample *Left;           // ptr to left sub-cluster
33   struct sample *Right;          // ptr to right sub-cluster
34   inT32 CharID;                  // identifier of char sample came from
35   FLOAT32 Mean[1];               // mean of cluster - SampleSize floats
36 }
37 
38 
39 CLUSTER;
40 
41 typedef CLUSTER SAMPLE;          // can refer to as either sample or cluster
42 
43 typedef enum {
44   spherical, elliptical, mixed, automatic
45 }
46 
47 
48 PROTOSTYLE;
49 
50 typedef struct                   // parameters to control clustering
51 {
52   PROTOSTYLE ProtoStyle;         // specifies types of protos to be made
53   FLOAT32 MinSamples;            // min # of samples per proto - % of total
54   FLOAT32 MaxIllegal;            // max percentage of samples in a cluster which have
55   // more than 1 feature in that cluster
56   FLOAT32 Independence;          // desired independence between dimensions
57   FLOAT64 Confidence;            // desired confidence in prototypes created
58   int MagicSamples;              // Ideal number of samples in a cluster.
59 }
60 
61 
62 CLUSTERCONFIG;
63 
64 typedef enum {
65   normal, uniform, D_random
66 }
67 
68 
69 DISTRIBUTION;
70 
71 typedef union
72 {
73   FLOAT32 Spherical;
74   FLOAT32 *Elliptical;
75 
76 }
77 
78 
79 FLOATUNION;
80 
81 typedef struct
82 {
83   unsigned Significant:1;        // TRUE if prototype is significant
84   unsigned Merged:1;             // Merged after clustering so do not output
85                                  // but kept for display purposes. If it has no
86                                  // samples then it was actually merged.
87                                  // Otherwise it matched an already significant
88                                  // cluster.
89   unsigned Style:2;              // spherical, elliptical, or mixed
90   unsigned NumSamples:28;        // number of samples in the cluster
91   CLUSTER *Cluster;              // ptr to cluster which made prototype
92   DISTRIBUTION *Distrib;         // different distribution for each dimension
93   FLOAT32 *Mean;                 // prototype mean
94   FLOAT32 TotalMagnitude;        // total magnitude over all dimensions
95   FLOAT32 LogMagnitude;          // log base e of TotalMagnitude
96   FLOATUNION Variance;           // prototype variance
97   FLOATUNION Magnitude;          // magnitude of density function
98   FLOATUNION Weight;             // weight of density function
99 }
100 
101 
102 PROTOTYPE;
103 
104 typedef struct
105 {
106   inT16 SampleSize;              // number of parameters per sample
107   PARAM_DESC *ParamDesc;         // description of each parameter
108   inT32 NumberOfSamples;         // total number of samples being clustered
109   KDTREE *KDTree;                // for optimal nearest neighbor searching
110   CLUSTER *Root;                 // ptr to root cluster of cluster tree
111   LIST ProtoList;                // list of prototypes
112   inT32 NumChar;                 // # of characters represented by samples
113 }
114 
115 
116 CLUSTERER;
117 
118 typedef struct
119 {
120   inT32 NumSamples;              // number of samples in list
121   inT32 MaxNumSamples;           // maximum size of list
122   SAMPLE *Sample[1];             // array of ptrs to sample data structures
123 }
124 
125 
126 SAMPLELIST;
127 
128 // low level cluster tree analysis routines.
129 #define InitSampleSearch(S,C) (((C)==NULL)?(S=NIL):(S=push(NIL,(C))))
130 
131 /*--------------------------------------------------------------------------
132         Public Function Prototypes
133 --------------------------------------------------------------------------*/
134 CLUSTERER *MakeClusterer (inT16 SampleSize, PARAM_DESC ParamDesc[]);
135 
136 SAMPLE *MakeSample (CLUSTERER * Clusterer, FLOAT32 Feature[], inT32 CharID);
137 
138 LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config);
139 
140 void FreeClusterer(CLUSTERER *Clusterer);
141 
142 void FreeProtoList(LIST *ProtoList);
143 
144 void FreePrototype(void *arg);  //PROTOTYPE     *Prototype);
145 
146 CLUSTER *NextSample(LIST *SearchState);
147 
148 FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension);
149 
150 FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension);
151 
152 inT32 MergeClusters(inT16 N, PARAM_DESC ParamDesc[], inT32 n1, inT32 n2,
153                     FLOAT32 m[], FLOAT32 m1[], FLOAT32 m2[]);
154 
155 //--------------Global Data Definitions and Declarations---------------------------
156 // define errors that can be trapped
157 #define ALREADYCLUSTERED  4000
158 #endif
159