/*******************************************************************************
+
+	cluster_ech.cc
+
+   Copyright (C) 2000
+	Kevin Pulo, kev@hons.cs.usyd.edu.au.
+	Garrick Welsh, gaz@hons.cs.usyd.edu.au.
+
+	This program is free software; you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation; either version 2 of the License, or
+	(at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with this program; if not, write to the Free Software
+	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+	$Id: cluster_ech.cc,v 1.4 2000/05/28 12:29:30 kev Exp kev $
+
*******************************************************************************/

#include "cluster_ech.hh"

#include <LEDA/node_partition.h>


static const char *rcsid = "$Id: cluster_ech.cc,v 1.4 2000/05/28 12:29:30 kev Exp kev $";
static const char *rcsrevision = "$Revision: 1.4 $";


static dictionary<node,bool> visited;
static dictionary<node, list<node> > unng;	// u Nearest Neighbour "Graph"

static int u;
static int nu = 10;


static inline int cmpDelaunayEdges(const edge &e1, const edge &e2) {
	return compare( T.seg(e1).length(), T.seg(e2).length() );
}

static void debugPrintClusters() {
	int i;
	int clustnum = 0;
	point p;

	for (i = clusters.low(); i <= clusters.high(); i++) {
		Cluster &c = clusters[i];
		list<point> &pts = c.getList();
		(*debug) << c.rep << endl;
		forall(p, pts) {
			(*debug) << clustnum << " " << p << " " << annotation.access(p) << endl;
		}
		clustnum++;
		(*debug) << endl;
	}
}


static point findClusterRepresentative(Cluster &c)
{
	//assert(c.getList().length() > 0);
	if (c.getList().length() <= 0) {
		// Unchanged.
		return c.rep;
	} else {
		return c.meanPoint();
	}
}


static Cluster &findClosestRepresentative(point p)
{
	int i;

	//assert(clusters.size() > 0);

	int closest = 0;
	double d = p.sqr_dist(clusters[closest].rep);
	for (i = clusters.low(); i <= clusters.high(); i++) {
		Cluster &c = clusters[i];
		double d2 = p.sqr_dist(c.rep);
		if (d2 < d) {
			d = d2;
			closest = i;
		}
	}
	return clusters[closest];
}


static double findAllClosestRepresentatives() {
	point p;
	list<point> points = T.points();
	double MC = 0.0;
	double dist;

	forall(p, points) {
		Cluster &c = findClosestRepresentative(p);
		dist = sqrt(p.sqr_dist(c.rep));
		MC += dist;
		c.append(p);
	}
	return MC;
}


static double findNearestClosestRepresentatives() {
	point p;
	list<point> points = T.points();
	double MC = 0.0;
	list<node> nearestPoints;
	node n;
	int i;
	int numProcessed;
	double maxDist;

	// Get the uk nearest points of the representatives.
	for (i = clusters.low(); i <= clusters.high(); i++) {
		Cluster &c = clusters[i];
		n = T.lookup(c.rep);
		list<node> L = unng.access(n);
		nearestPoints.conc(L);
		//(*debug) << "nearestPoints.length() = " << nearestPoints.length() << endl;
	}

	// Initialise the visited structure so that we avoid doing any
	// duplicate members of nearestPoints.
	visited.clear();
	forall(n, nearestPoints) {
		if (visited.lookup(n) == nil) {
			visited.insert(n, false);
			//(*debug) << "Point is fine " << T.pos(n) << endl;
		//} else {
			//(*debug) << "Point is duplicate " << T.pos(n) << endl;
		}
	}

	maxDist = 0.0;
	numProcessed = 0;
	forall(n, nearestPoints) {
		p = T.pos(n);
		if (visited.lookup(n) == nil) {
			(*debug) << "ASSERTION VIOLATION" << endl;
		}
		if (!visited.access(n)) {
			//(*debug) << "Processing point " << p << endl;
			visited.insert(n, true);
			Cluster &c = findClosestRepresentative(p);
			double dist = sqrt(p.sqr_dist(c.rep));
			//(*debug) << "dist = " << dist << endl;
			MC += dist;
			numProcessed++;
			if (dist > maxDist) {
				maxDist = dist;
				//(*debug) << "maxDist = " << maxDist << endl;
			}
			c.append(p);
		} else {
			//(*debug) << "Skipping duplicate point " << p << endl;
		}
	}
	(*debug) << "numProcessed = " << numProcessed << endl;

	// At this point the contributions to MC of u*k - numMissing nearest points
	// have been taken into account.  Now take into account numMissing times
	// the furthest point, which is at a distance maxDist.
	int numMissing;
	if (u * clusters.size() < points.length()) {
		numMissing = u * clusters.size() - numProcessed;
	} else {
		numMissing = points.length() - numProcessed;
	}

	(*debug) << "numMissing = " << numMissing << endl;
	(*debug) << "maxDist = " << maxDist << endl;
	(*debug) << "MC = " << MC << endl;
	if (numMissing > 0) {
		MC += maxDist * numMissing;
		(*debug) << "Adjusting MC by " << maxDist*numMissing << endl;
	}

	return MC;
}


static void clearClusteringPoints() {
	int i;

	// Clear the contents of each cluster in preparation for the new clustering.
	for (i = clusters.low(); i <= clusters.high(); i++) {
		Cluster &c = clusters[i];
		c.getList().clear();
	}
}


void cluster_ech(int k) {
	int i;
	point p;
	list<point> points = T.points();
	array<point> oldReps;
	list_item l;
	list_item firstl;
	double MC = 0.0;
	double exactMC = 0.0;
	bool algorithmFinished = false;
	bool madeASwap = false;
	int n;


	n = points.length();

	// Create k clusters.
	//clusters.resize(k);

	// Get the initial k representatives.
	/*
	points.permute();
	l = points.first();
	for (i = clusters.low(); i <= clusters.high(); i++) {
		clusters[i].rep = points.contents(l);
		l = points.succ(l);
	}
	*/

	list<edge> edges = T.all_edges();
	edges.sort(&cmpDelaunayEdges);
	edge e;
	ofstream outfile("delaunay_edge_profile");
	forall(e, edges) {
		outfile << T.seg(e).length() << endl;
	}
	outfile.close();

	double maxEdgeLength = T.seg(edges[edges.last()]).length();

	/*
	node_partition uf(T);
	int numLargeSets = 0;
	//k = 0;
	forall(e, edges) {
		if (numLargeSets >= k) {
			break;
		}
		(*debug) << "Merging " << T.pos(source(e)) << " " << T.pos(target(e)) << endl;
		int prevSourceSize = uf.size(source(e));
		int prevTargetSize = uf.size(target(e));
		uf.union_blocks(source(e), target(e));
		(*debug) << "Merged size = " << uf.size(source(e)) << endl;
		//if (uf.size(source(e)) == nu) {
		if ( (uf.size(source(e)) >= nu) && (prevSourceSize < nu) && (prevTargetSize < nu) ) {
			(*debug) << "Setting " << numLargeSets << "-th rep to something from this set = " << T.pos(uf.find(source(e))) << endl;
			clusters[numLargeSets].rep = T.pos(uf.find(source(e)));
			numLargeSets++;
		}
	}
	*/

	node_partition uf(T);
	double maxMergeEdgeLength = 0.1*maxEdgeLength;
	forall(e, edges) {
		if (T.seg(e).length() > maxMergeEdgeLength) {
			break;
		}
		(*debug) << "Merging " << T.pos(source(e)) << " " << T.pos(target(e)) << endl;
		int prevSourceSize = uf.size(source(e));
		int prevTargetSize = uf.size(target(e));
		uf.union_blocks(source(e), target(e));
		(*debug) << "Merged size = " << uf.size(source(e)) << endl;
	}

	h_array<node, bool> reps;
	forall(p, points) {
		node n = T.lookup(p);
		if (uf.size(n) > nu) {
			(*debug) << "Point in large set " << p << endl;
			node rep = uf.find(n);
			(*debug) << "Representative " << rep << endl;
			if (!reps.defined(rep)) {
				(*debug) << "First time seen, adding" << endl;
				reps[rep] = true;
			} else {
				(*debug) << "Seen this rep before" << endl;
			}
		}
	}
	(*debug) << "Number of representatives found " << reps.size() << endl;
	k = reps.size();
	clusters.resize(k);

	node nd;
	i = 0;
	forall_defined(nd, reps) {
		p = T.pos(nd);
		(*debug) << "Representative " << p << endl;
		clusters[i].rep = p;
		i++;
	}

	// DEBUG
	//clearClusteringPoints();
	//findAllClosestRepresentatives();
	//return;


	// Since the whole point of doing everything is that uk < n,
	// setting u > n/k is pretty stupid.  u = n/k should give TB results.
	//u = log(n)/log(log(n));
	u = 3*log(n)/log(log(n));
	//u = (double)n/(double)k;    // Brutal (same as TB
	//u = n-1;    // Brutally stupid
	(*debug) << "u = " << u << endl;

	// Find the u nearest neighbours of all the points.
	forall(p, points) {
		node n = T.lookup(p);
		unng.insert(n, T.nearest_neighbors(n, u));
	}


	// Shuffle the points again.
	//points.permute();
	l = points.first();

	int j = 0;
	while (true) {
		//debugPrintClusters();

		(*debug) << endl;

		// Debug: unconditionally exit after the j iterations.
		if (j == -1) {
			clearClusteringPoints();
			findAllClosestRepresentatives();
			(*debug) << "Exiting (prematurely) after " << j << " iterations." << endl;
			break;
		}
		j++;


		// Recompute the clusterings based on these cluster representatives.
		clearClusteringPoints();
		(*debug) << "EXACT MC = " << findAllClosestRepresentatives() << endl;

		clearClusteringPoints();
		(*debug) << "MC before = " << MC << endl;
		MC = findNearestClosestRepresentatives();
		(*debug) << "MC after = " << MC << endl;


		// Okay, now scan through points (using the variable l), for each
		// non-representative point, consider swapping it with every
		// representative.  For each consideration, calculate MC', and if
		// the minimum of these is < MC, then do the corresponding swap.
		firstl = l;
		int innerLoops = 0;
		while (true) {
			madeASwap = false;
			(*debug) << endl;
			p = points.contents(l);
			if (findClosestRepresentative(p).rep != p) {
				// Weird way of finding out if p is not a representative.

				// Swap this point with each of the representatives.
				// For each swap, find MC and update things if it's the smallest.
				int minIndex = 0;
				double minMCdash = -1.0;
				for (i = clusters.low(); i <= clusters.high(); i++) {
					// Swap the points.
					point oldRep = clusters[i].rep;
					clusters[i].rep = p;

					// Find the clustering (and therefore MCdash).
					clearClusteringPoints();
					double MCdash = findNearestClosestRepresentatives();

					// Update the minimum records if required.
					(*debug) << i << ": " << MCdash << endl;
					if ( (minMCdash == -1.0) || (MCdash < minMCdash) ) {
						(*debug) << "new minimum." << endl;
						minMCdash = MCdash;
						minIndex = i;
					}

					// Swap the points back to their original state.
					// ie. Restore the old rep.
					clusters[i].rep = oldRep;
				}
				(*debug) << "minIndex = " << minIndex << " minMCdash = " << minMCdash << endl;
				(*debug) << "MC = " << MC << endl;

				if (minMCdash < MC) {
					(*debug) << "minMCdash < MC  !!!" << endl;
					// Swap p with the minIndex-th representative.
					clusters[minIndex].rep = p;
					// Break, we have a new and better solution.
					madeASwap = true;
				}

				// Recalculate the clustering, because all of our swapping
				// then testing has destroyed the current clustering.
				clearClusteringPoints();
				exactMC = findAllClosestRepresentatives();
				(*debug) << "Final EXACT MC = " << exactMC << endl;
			}

			l = points.cyclic_succ(l);
			innerLoops++;

			if (madeASwap) {
				// If we made a swap of representative and non-representative,
				// break the inner loop and go back to the outer loop to do
				// another iteration of the algorithm.
				break;
			}

			if (l == firstl) {
				// Break BOTH levels of while loops - this is the end of the
				// whole algorithm.
				(*debug) << "Exiting because we went all the way through the points with no improvement." << endl;
				(*debug) << "Inner loop ran " << innerLoops << " times." << endl;
				algorithmFinished = true;
				break;
			}
		}

		if (algorithmFinished) {
			break;
		}
	}

}


