// Implemented by Zoe Xi @zoexi for GSOC 2016 // https://github.com/cytoscape/cytoscape.js-k-means // References for k-means: https://harthur.github.io/clusterfck/ // References for k-medoids: http://www.math.le.ac.uk/people/ag153/homepage/KmeansKmedoids/Kmeans_Kmedoids.html // References for fuzzy c-means: Ross, Fuzzy Logic w/Engineering Applications (2010), pages 352-353 // http://yaikhom.com/2013/03/16/implementing-the-fuzzy-c-means-algorithm.html import clusteringDistance from './clustering-distances'; import * as util from '../../util'; let defaults = util.defaults({ k: 2, m: 2, sensitivityThreshold: 0.0001, distance: 'euclidean', maxIterations: 10, attributes: [], testMode: false, testCentroids: null }); var setOptions = ( options ) => defaults( options ); if( process.env.NODE_ENV !== 'production' ){ /* eslint-disable no-console, no-unused-vars */ var printMatrix = function( M ) { // used for debugging purposes only for ( let i = 0; i < M.length; i++ ) { let row = ''; for ( let j = 0; j < M[0].length; j++ ) { row += Number(M[i][j]).toFixed(3) + ' '; } console.log(row); } console.log(''); }; } /* eslint-enable */ let getDist = function(type, node, centroid, attributes, mode){ let noNodeP = mode !== 'kMedoids'; let getP = noNodeP ? ( i => centroid[i] ) : ( i => attributes[i](centroid) ); let getQ = i => attributes[i](node); let nodeP = centroid; let nodeQ = node; return clusteringDistance( type, attributes.length, getP, getQ, nodeP, nodeQ ); }; let randomCentroids = function( nodes, k, attributes ) { let ndim = attributes.length; let min = new Array(ndim); let max = new Array(ndim); let centroids = new Array(k); let centroid = null; // Find min, max values for each attribute dimension for ( let i = 0; i < ndim; i++ ) { min[i] = nodes.min( attributes[i] ).value; max[i] = nodes.max( attributes[i] ).value; } // Build k centroids, each represented as an n-dim feature vector for ( let c = 0; c < k; c++ ) { centroid = []; for ( let i = 0; i < ndim; i++ ) { centroid[i] = Math.random() * (max[i] - min[i]) + min[i]; // random initial value } centroids[c] = centroid; } return centroids; }; let classify = function( node, centroids, distance, attributes, type ) { let min = Infinity; let index = 0; for ( let i = 0; i < centroids.length; i++ ) { let dist = getDist( distance, node, centroids[i], attributes, type ); if (dist < min) { min = dist; index = i; } } return index; }; let buildCluster = function( centroid, nodes, assignment ) { let cluster = []; let node = null; for ( let n = 0; n < nodes.length; n++ ) { node = nodes[n]; if ( assignment[ node.id() ] === centroid ) { //console.log("Node " + node.id() + " is associated with medoid #: " + m); cluster.push( node ); } } return cluster; }; let haveValuesConverged = function( v1, v2, sensitivityThreshold ){ return Math.abs( v2 - v1 ) <= sensitivityThreshold; }; let haveMatricesConverged = function( v1, v2, sensitivityThreshold ) { for ( let i = 0; i < v1.length; i++ ) { for (let j = 0; j < v1[i].length; j++ ) { let diff = Math.abs( v1[i][j] - v2[i][j] ); if( diff > sensitivityThreshold ){ return false; } } } return true; }; let seenBefore = function ( node, medoids, n ) { for ( let i = 0; i < n; i++ ) { if ( node === medoids[i] ) return true; } return false; }; let randomMedoids = function( nodes, k ) { let medoids = new Array(k); // For small data sets, the probability of medoid conflict is greater, // so we need to check to see if we've already seen or chose this node before. if (nodes.length < 50) { // Randomly select k medoids from the n nodes for (let i = 0; i < k; i++) { let node = nodes[ Math.floor( Math.random() * nodes.length ) ]; // If we've already chosen this node to be a medoid, don't choose it again (for small data sets). // Instead choose a different random node. while ( seenBefore( node, medoids, i ) ) { node = nodes[ Math.floor( Math.random() * nodes.length ) ]; } medoids[i] = node; } } else { // Relatively large data set, so pretty safe to not check and just select random nodes for (let i = 0; i < k; i++) { medoids[i] = nodes[ Math.floor( Math.random() * nodes.length ) ]; } } return medoids; }; let findCost = function( potentialNewMedoid, cluster, attributes ) { let cost = 0; for ( let n = 0; n < cluster.length; n++ ) { cost += getDist( 'manhattan', cluster[n], potentialNewMedoid, attributes, 'kMedoids' ); } return cost; }; let kMeans = function( options ){ let cy = this.cy(); let nodes = this.nodes(); let node = null; // Set parameters of algorithm: # of clusters, distance metric, etc. let opts = setOptions( options ); // Begin k-means algorithm let clusters = new Array(opts.k); let assignment = {}; let centroids; // Step 1: Initialize centroid positions if ( opts.testMode ) { if( typeof opts.testCentroids === 'number') { // TODO: implement a seeded random number generator. let seed = opts.testCentroids; centroids = randomCentroids( nodes, opts.k, opts.attributes, seed ); } else if ( typeof opts.testCentroids === 'object') { centroids = opts.testCentroids; } else { centroids = randomCentroids( nodes, opts.k, opts.attributes ); } } else { centroids = randomCentroids( nodes, opts.k, opts.attributes ); } let isStillMoving = true; let iterations = 0; while ( isStillMoving && iterations < opts.maxIterations ) { // Step 2: Assign nodes to the nearest centroid for ( let n = 0; n < nodes.length; n++ ) { node = nodes[n]; // Determine which cluster this node belongs to: node id => cluster # assignment[ node.id() ] = classify( node, centroids, opts.distance, opts.attributes, 'kMeans' ); } // Step 3: For each of the k clusters, update its centroid isStillMoving = false; for ( let c = 0; c < opts.k; c++ ) { // Get all nodes that belong to this cluster let cluster = buildCluster( c, nodes, assignment ); if ( cluster.length === 0 ) { // If cluster is empty, break out early & move to next cluster continue; } // Update centroids by calculating avg of all nodes within the cluster. let ndim = opts.attributes.length; let centroid = centroids[c]; // [ dim_1, dim_2, dim_3, ... , dim_n ] let newCentroid = new Array(ndim); let sum = new Array(ndim); for ( let d = 0; d < ndim; d++ ) { sum[d] = 0.0; for ( let i = 0; i < cluster.length; i++ ) { node = cluster[i]; sum[d] += opts.attributes[d](node); } newCentroid[d] = sum[d] / cluster.length; // Check to see if algorithm has converged, i.e. when centroids no longer change if ( !haveValuesConverged(newCentroid[d], centroid[d], opts.sensitivityThreshold) ) { isStillMoving = true; } } centroids[c] = newCentroid; clusters[c] = cy.collection( cluster ); } iterations++; } return clusters; }; let kMedoids = function( options ) { let cy = this.cy(); let nodes = this.nodes(); let node = null; let opts = setOptions( options ); // Begin k-medoids algorithm let clusters = new Array(opts.k); let medoids; let assignment = {}; let curCost; let minCosts = new Array(opts.k); // minimum cost configuration for each cluster // Step 1: Initialize k medoids if ( opts.testMode ) { if( typeof opts.testCentroids === 'number') { // TODO: implement random generator so user can just input seed number } else if ( typeof opts.testCentroids === 'object') { medoids = opts.testCentroids; } else { medoids = randomMedoids(nodes, opts.k); } } else { medoids = randomMedoids(nodes, opts.k); } let isStillMoving = true; let iterations = 0; while ( isStillMoving && iterations < opts.maxIterations ) { // Step 2: Assign nodes to the nearest medoid for ( let n = 0; n < nodes.length; n++ ) { node = nodes[n]; // Determine which cluster this node belongs to: node id => cluster # assignment[ node.id() ] = classify( node, medoids, opts.distance, opts.attributes, 'kMedoids' ); } isStillMoving = false; // Step 3: For each medoid m, and for each node associated with mediod m, // select the node with the lowest configuration cost as new medoid. for ( let m = 0; m < medoids.length; m++ ) { // Get all nodes that belong to this medoid let cluster = buildCluster( m, nodes, assignment ); if ( cluster.length === 0 ) { // If cluster is empty, break out early & move to next cluster continue; } minCosts[m] = findCost( medoids[m], cluster, opts.attributes ); // original cost // Select different medoid if its configuration has the lowest cost for ( let n = 0; n < cluster.length; n++ ) { curCost = findCost( cluster[n], cluster, opts.attributes ); if ( curCost < minCosts[m] ) { minCosts[m] = curCost; medoids[m] = cluster[n]; isStillMoving = true; } } clusters[m] = cy.collection( cluster ); } iterations++; } return clusters; }; let updateCentroids = function( centroids, nodes, U, weight, opts ) { let numerator, denominator; for ( let n = 0; n < nodes.length; n++ ) { for ( let c = 0; c < centroids.length; c++ ) { weight[n][c] = Math.pow( U[n][c], opts.m ); } } for ( let c = 0; c < centroids.length; c++ ) { for ( let dim = 0; dim < opts.attributes.length; dim++ ) { numerator = 0; denominator = 0; for ( let n = 0; n < nodes.length; n++ ) { numerator += weight[n][c] * opts.attributes[dim](nodes[n]); denominator += weight[n][c]; } centroids[c][dim] = numerator / denominator; } } }; let updateMembership = function( U, _U, centroids, nodes, opts ) { // Save previous step for (let i = 0; i < U.length; i++) { _U[i] = U[i].slice(); } let sum, numerator, denominator; let pow = 2 / (opts.m - 1); for ( let c = 0; c < centroids.length; c++ ) { for ( let n = 0; n < nodes.length; n++ ) { sum = 0; for ( let k = 0; k < centroids.length; k++ ) { // against all other centroids numerator = getDist( opts.distance, nodes[n], centroids[c], opts.attributes, 'cmeans' ); denominator = getDist( opts.distance, nodes[n], centroids[k], opts.attributes, 'cmeans' ); sum += Math.pow( numerator / denominator, pow ); } U[n][c] = 1 / sum; } } }; let assign = function( nodes, U, opts, cy ) { let clusters = new Array(opts.k); for ( let c = 0; c < clusters.length; c++ ) { clusters[c] = []; } let max; let index; for ( let n = 0; n < U.length; n++ ) { // for each node (U is N x C matrix) max = -Infinity; index = -1; // Determine which cluster the node is most likely to belong in for ( let c = 0; c < U[0].length; c++ ) { if ( U[n][c] > max ) { max = U[n][c]; index = c; } } clusters[index].push( nodes[n] ); } // Turn every array into a collection of nodes for ( let c = 0; c < clusters.length; c++ ) { clusters[c] = cy.collection( clusters[c] ); } return clusters; }; let fuzzyCMeans = function( options ) { let cy = this.cy(); let nodes = this.nodes(); let opts = setOptions( options ); // Begin fuzzy c-means algorithm let clusters; let centroids; let U; let _U; let weight; // Step 1: Initialize letiables. _U = new Array(nodes.length); for ( let i = 0; i < nodes.length; i++ ) { // N x C matrix _U[i] = new Array(opts.k); } U = new Array(nodes.length); for ( let i = 0; i < nodes.length; i++ ) { // N x C matrix U[i] = new Array(opts.k); } for (let i = 0; i < nodes.length; i++) { let total = 0; for (let j = 0; j < opts.k; j++) { U[i][j] = Math.random(); total += U[i][j]; } for (let j = 0; j < opts.k; j++) { U[i][j] = U[i][j] / total; } } centroids = new Array(opts.k); for ( let i = 0; i < opts.k; i++ ) { centroids[i] = new Array(opts.attributes.length); } weight = new Array(nodes.length); for ( let i = 0; i < nodes.length; i++ ) { // N x C matrix weight[i] = new Array(opts.k); } // end init FCM let isStillMoving = true; let iterations = 0; while ( isStillMoving && iterations < opts.maxIterations ) { isStillMoving = false; // Step 2: Calculate the centroids for each step. updateCentroids( centroids, nodes, U, weight, opts ); // Step 3: Update the partition matrix U. updateMembership( U, _U, centroids, nodes, opts ); // Step 4: Check for convergence. if ( !haveMatricesConverged( U, _U, opts.sensitivityThreshold ) ) { isStillMoving = true; } iterations++; } // Assign nodes to clusters with highest probability. clusters = assign( nodes, U, opts, cy ); return { clusters: clusters, degreeOfMembership: U }; }; export default { kMeans, kMedoids, fuzzyCMeans, fcm: fuzzyCMeans };