OSSIM - Open Source Software Image Map  Version 1.9.0 (20180803)
ossimKMeansClustering.cpp
Go to the documentation of this file.
1 //**************************************************************************************************
2 //
3 // OSSIM Open Source Geospatial Data Processing Library
4 // See top level LICENSE.txt file for license information
5 //
6 //**************************************************************************************************
7 
9 #include <iostream>
10 
11 using namespace std;
12 
14 : m_numEntries(0),
15  m_samples(0),
16  m_populations(0),
17  m_clustersValid(false),
18  m_verbose(false)
19 {
20 
21 }
22 
24 {
25  delete m_samples;
26  delete m_populations;
27  m_clusters.clear();
28 }
29 
31 {
32  m_clusters.clear();
33  m_clusters.resize(K);
34  m_clustersValid = false;
35 }
36 
38 {
39  if ((m_numEntries == 0) || (m_samples == 0))
40  return false;
41 
42  if (m_verbose)
43  std::cout.precision(6);
44 
45  // If populations aren't provided, assume only 1 of each sample:
46  if (m_populations == 0)
47  {
48  m_populations = new double [m_numEntries];
49  for (ossim_uint32 i=0; i<m_numEntries; ++i)
50  m_populations[i] = 1.0;
51  }
52 
53  // Scan for min and max:
54  double overall_min = OSSIM_DEFAULT_MAX_PIX_DOUBLE;
55  double overall_max = OSSIM_DEFAULT_MIN_PIX_DOUBLE;
56  for (ossim_uint32 i=0; i<m_numEntries; i++)
57  {
58  if (m_populations[i] == 0)
59  continue;
60 
61  if (m_samples[i] < overall_min)
62  overall_min = m_samples[i];
63  else if (m_samples[i] > overall_max)
64  overall_max = m_samples[i];
65  }
66 
67  double max_delta = overall_max - overall_min;
68  // double convergenceThreshold = 0.1*(max_delta)/m_numEntries;
69  ossim_uint32 numClusters = m_clusters.size();
70  double* variances = new double [numClusters];
71  ossim_uint32* priorCounts = new ossim_uint32 [numClusters];
72  double interm_samples = (overall_max - overall_min) / numClusters;
73  double mean_i = overall_min + interm_samples / 2.0; // initial mean for cluster 0;
74  double bound = overall_min;
75  for (ossim_uint32 gid=0; gid<numClusters; ++gid)
76  {
77  //Initialize cluster with even spread and initial mean:
78  m_clusters[gid].min = bound;
79  bound += interm_samples;
80  m_clusters[gid].max = bound;
81  m_clusters[gid].mean = mean_i;
82  m_clusters[gid].new_mean = 0;
83  mean_i += interm_samples;
84  m_clusters[gid].n = 0;
85  m_clusters[gid].sigma = 1.0;
86  variances[gid] = 0.0;
87  priorCounts[gid] = 0;
88  }
89  double delta, min_delta;
90  // ossim_uint32 best_gid, np, iters = 0, max_iters = 20;
91  ossim_uint32 best_gid, iters = 0, max_iters = 20;
92  bool converged = false;
93 
94  // Loop until converged on best solution:
95  while (!converged && (iters < max_iters))
96  {
97  converged = true; // prove otherwise
98  ++iters;
99 
100  for (ossim_uint32 gid=0; gid<numClusters; ++gid)
101  {
102  m_clusters[gid].min = overall_max;
103  m_clusters[gid].max = overall_min;
104  }
105 
106  for (ossim_uint32 i=0; i<m_numEntries; i++)
107  {
108  if ( m_populations[i] == 0)
109  continue;
110 
111  // Find the current cluster:
112  best_gid = 0;
113  min_delta = OSSIM_DEFAULT_MAX_PIX_DOUBLE;
114  for (ossim_uint32 gid=0; gid<numClusters; ++gid)
115  {
116  delta = fabs((m_samples[i] - m_clusters[gid].mean));
117  if (delta < min_delta)
118  {
119  min_delta = delta;
120  best_gid = gid;
121  }
122  }
123 
124  // Possible update of min/max for current cluster:
125  if (m_samples[i] < m_clusters[best_gid].min)
126  m_clusters[best_gid].min = m_samples[i];
127  else if (m_samples[i] > m_clusters[best_gid].max)
128  m_clusters[best_gid].max = m_samples[i];
129 
130  // Accumulate sample for the new mean for this cluster:
131  m_clusters[best_gid].new_mean += m_populations[i]*m_samples[i];
132  m_clusters[best_gid].n += m_populations[i];
133 
134  // Add this bin's contribution to the cluster stats. Use the sigma member as accumulator,
135  // normalize later:
136  delta = m_clusters[best_gid].mean - m_samples[i];
137  variances[best_gid] += m_populations[i]*delta*delta;
138 
139  } // End loop over bins
140 
141  // Finished processing all input pixels for this iteration. Update the means:
142  for (ossim_uint32 gid=0; gid<numClusters; ++gid)
143  {
144  // Compute new mean from accumulation:
145  if (m_clusters[gid].n)
146  {
147  m_clusters[gid].new_mean /= m_clusters[gid].n;
148  m_clusters[gid].sigma = sqrt(variances[gid] / m_clusters[gid].n);
149  variances[gid] = 0.0;
150  }
151 
152  //if (fabs(m_clusters[gid].mean - m_clusters[gid].new_mean) > convergenceThreshold )
153  if (m_clusters[gid].n != priorCounts[gid])
154  converged = false;
155 
156 // if (m_verbose)
157 // {
158 // cout<<"iteration: "<<iters<<endl;
159 // cout<<"cluster["<<gid<<"].mean = "<<m_clusters[gid].mean<<endl;
160 // cout<<"cluster["<<gid<<"].new_mean = "<<m_clusters[gid].new_mean<<endl;
161 // cout<<"cluster["<<gid<<"].sigma = "<<m_clusters[gid].sigma<<endl;
162 // cout<<"cluster["<<gid<<"].n = "<<(int)m_clusters[gid].n<<" prior: "<<priorCounts[gid]<<endl;
163 // cout<<"cluster["<<gid<<"].min = "<<m_clusters[gid].min<<endl;
164 // cout<<"cluster["<<gid<<"].max = "<<m_clusters[gid].max<<endl;
165 // cout << endl;
166 // }
167  if (m_clusters[gid].n)
168  m_clusters[gid].mean = m_clusters[gid].new_mean;
169 
170  if (!converged)
171  {
172  priorCounts[gid] = m_clusters[gid].n;
173  m_clusters[gid].n = 0;
174  m_clusters[gid].new_mean = 0;
175  }
176  }
177  } // End overall loop for convergence:
178 
179  if (m_verbose)
180  {
181  cout<<"\nossimKMeansClustering Summary ("<<iters<<" iterations):"<<endl;
182  for (ossim_uint32 gid=0; gid<numClusters; gid++)
183  {
184  cout<<"\n cluster["<<gid<<"] n = "<<(int)m_clusters[gid].n<<endl;
185  cout<<" mean = "<<m_clusters[gid].mean<<endl;
186  cout<<" sigma = "<<m_clusters[gid].sigma<<endl;
187  cout<<" min = "<<m_clusters[gid].min<<endl;
188  cout<<" max = "<<m_clusters[gid].max<<endl;
189  }
190  cout << endl;
191  }
192 
193  delete [] variances;
194  delete [] priorCounts;
195  m_clustersValid = true;
196  return true;
197 
198 }
199 
201 {
202  if (clusterId >= m_clusters.size())
203  return ossim::nan();
204 
205  return m_clusters[clusterId].mean;
206 }
207 
209 {
210  if (clusterId >= m_clusters.size())
211  return ossim::nan();
212 
213  return m_clusters[clusterId].sigma;
214 }
215 
217 {
218  if (clusterId >= m_clusters.size())
219  return ossim::nan();
220 
221  return m_clusters[clusterId].min;
222 }
223 
225 {
226  if (clusterId >= m_clusters.size())
227  return ossim::nan();
228 
229  return m_clusters[clusterId].max;
230 }
231 
234 {
235  if (i >= m_clusters.size())
236  return 0;
237  return &(m_clusters[i]);
238 }
239 
240 
double getMaxValue(ossim_uint32 groupId) const
#define OSSIM_DEFAULT_MAX_PIX_DOUBLE
double nan()
Method to return ieee floating point double precision NAN.
Definition: ossimCommon.h:135
const ossimKMeansClustering::Cluster * getCluster(ossim_uint32 i) const
#define OSSIM_DEFAULT_MIN_PIX_DOUBLE
os2<< "> n<< " > nendobj n
void setNumClusters(ossim_uint32 K)
unsigned int ossim_uint32
std::vector< Cluster > m_clusters
#define max(a, b)
Definition: auxiliary.h:76
double getSigma(ossim_uint32 groupId) const
double getMean(ossim_uint32 groupId) const
double getMinValue(ossim_uint32 groupId) const
#define min(a, b)
Definition: auxiliary.h:75