1 /****************************************************************************** 2 ** Filename: cluster.h 3 ** Purpose: Definition of feature space clustering routines 4 ** Author: Dan Johnson 5 ** History: 5/29/89, DSJ, Created. 6 ** 7 ** (c) Copyright Hewlett-Packard Company, 1988. 8 ** Licensed under the Apache License, Version 2.0 (the "License"); 9 ** you may not use this file except in compliance with the License. 10 ** You may obtain a copy of the License at 11 ** http://www.apache.org/licenses/LICENSE-2.0 12 ** Unless required by applicable law or agreed to in writing, software 13 ** distributed under the License is distributed on an "AS IS" BASIS, 14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 ** See the License for the specific language governing permissions and 16 ** limitations under the License. 17 ******************************************************************************/ 18 #ifndef CLUSTER_H 19 #define CLUSTER_H 20 21 #include "kdtree.h" 22 #include "oldlist.h" 23 24 /*---------------------------------------------------------------------- 25 Types 26 ----------------------------------------------------------------------*/ 27 typedef struct sample 28 { 29 unsigned Clustered:1; // TRUE if included in a higher cluster 30 unsigned Prototype:1; // TRUE if cluster represented by a proto 31 unsigned SampleCount:30; // number of samples in this cluster 32 struct sample *Left; // ptr to left sub-cluster 33 struct sample *Right; // ptr to right sub-cluster 34 inT32 CharID; // identifier of char sample came from 35 FLOAT32 Mean[1]; // mean of cluster - SampleSize floats 36 } 37 38 39 CLUSTER; 40 41 typedef CLUSTER SAMPLE; // can refer to as either sample or cluster 42 43 typedef enum { 44 spherical, elliptical, mixed, automatic 45 } 46 47 48 PROTOSTYLE; 49 50 typedef struct // parameters to control clustering 51 { 52 PROTOSTYLE ProtoStyle; // specifies types of protos to be made 53 FLOAT32 MinSamples; // min # of samples per proto - % of total 54 FLOAT32 MaxIllegal; // max percentage of samples in a cluster which have 55 // more than 1 feature in that cluster 56 FLOAT32 Independence; // desired independence between dimensions 57 FLOAT64 Confidence; // desired confidence in prototypes created 58 int MagicSamples; // Ideal number of samples in a cluster. 59 } 60 61 62 CLUSTERCONFIG; 63 64 typedef enum { 65 normal, uniform, D_random 66 } 67 68 69 DISTRIBUTION; 70 71 typedef union 72 { 73 FLOAT32 Spherical; 74 FLOAT32 *Elliptical; 75 76 } 77 78 79 FLOATUNION; 80 81 typedef struct 82 { 83 unsigned Significant:1; // TRUE if prototype is significant 84 unsigned Merged:1; // Merged after clustering so do not output 85 // but kept for display purposes. If it has no 86 // samples then it was actually merged. 87 // Otherwise it matched an already significant 88 // cluster. 89 unsigned Style:2; // spherical, elliptical, or mixed 90 unsigned NumSamples:28; // number of samples in the cluster 91 CLUSTER *Cluster; // ptr to cluster which made prototype 92 DISTRIBUTION *Distrib; // different distribution for each dimension 93 FLOAT32 *Mean; // prototype mean 94 FLOAT32 TotalMagnitude; // total magnitude over all dimensions 95 FLOAT32 LogMagnitude; // log base e of TotalMagnitude 96 FLOATUNION Variance; // prototype variance 97 FLOATUNION Magnitude; // magnitude of density function 98 FLOATUNION Weight; // weight of density function 99 } 100 101 102 PROTOTYPE; 103 104 typedef struct 105 { 106 inT16 SampleSize; // number of parameters per sample 107 PARAM_DESC *ParamDesc; // description of each parameter 108 inT32 NumberOfSamples; // total number of samples being clustered 109 KDTREE *KDTree; // for optimal nearest neighbor searching 110 CLUSTER *Root; // ptr to root cluster of cluster tree 111 LIST ProtoList; // list of prototypes 112 inT32 NumChar; // # of characters represented by samples 113 } 114 115 116 CLUSTERER; 117 118 typedef struct 119 { 120 inT32 NumSamples; // number of samples in list 121 inT32 MaxNumSamples; // maximum size of list 122 SAMPLE *Sample[1]; // array of ptrs to sample data structures 123 } 124 125 126 SAMPLELIST; 127 128 // low level cluster tree analysis routines. 129 #define InitSampleSearch(S,C) (((C)==NULL)?(S=NIL):(S=push(NIL,(C)))) 130 131 /*-------------------------------------------------------------------------- 132 Public Function Prototypes 133 --------------------------------------------------------------------------*/ 134 CLUSTERER *MakeClusterer (inT16 SampleSize, PARAM_DESC ParamDesc[]); 135 136 SAMPLE *MakeSample (CLUSTERER * Clusterer, FLOAT32 Feature[], inT32 CharID); 137 138 LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config); 139 140 void FreeClusterer(CLUSTERER *Clusterer); 141 142 void FreeProtoList(LIST *ProtoList); 143 144 void FreePrototype(void *arg); //PROTOTYPE *Prototype); 145 146 CLUSTER *NextSample(LIST *SearchState); 147 148 FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension); 149 150 FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension); 151 152 inT32 MergeClusters(inT16 N, PARAM_DESC ParamDesc[], inT32 n1, inT32 n2, 153 FLOAT32 m[], FLOAT32 m1[], FLOAT32 m2[]); 154 155 //--------------Global Data Definitions and Declarations--------------------------- 156 // define errors that can be trapped 157 #define ALREADYCLUSTERED 4000 158 #endif 159