CliqueClustering.java |
/* Copyright (C) 2009 CSE,IIT Bombay http://www.cse.iitb.ac.in This file is part of the ConStore open source storage facility for concept-nets. ConStore is free software and distributed under the Creative Commons Attribution-Noncommercial-No Derivative Works 3.0 Unported License; you can copy, distribute and transmit the work with the work attribution in the manner specified by the author or licensor. You may not use this work for commercial purposes and may not alter, transform, or build upon this work. Please refer the legal code of the license, available at http://creativecommons.org/licenses/by-nc-nd/3.0/legalcode ConStore is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ package iitb.con.clustering; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * Clusters the existing instances based on its relationships. * Concept networks can be viewed as a directed graph with unit * distance between connected vertices. Clustering in ConStore * is based on the criterion that if two vertices (entities) * are tagged with the same cluster id, the distance between * them is less or equal to a given cluster diameter k. * One exception is made to this rule in the case of vertices * which get singled out in clusters of their own. * Such vertices are allowed to merge with a neigbouring cluster. * Thus, any two nodes within cluster can be reached in at most k + 1 hops. * * * @author Prathab K * */ public class CliqueClustering { /** Entity instances are grouped as cluster nodes */ private Map<Integer,ClusterNode> clusterNodes = new HashMap<Integer,ClusterNode>(); /** To track the diameter of the cluster, * a counter is used for each cluster */ private Map<Short, Short> clusterCountMap = new HashMap<Short,Short>(); /** Maintains flags for visited nodes */ private Map<Integer, Boolean> visited = new HashMap<Integer,Boolean>(); /** BFS queue */ private List<ClusterNode> queue = new ArrayList<ClusterNode>(); /** Initial cluster count */ private short clusterCount = 1; /** Default cluster diameter */ private short K = 2; //default value /** * Initializes the concept-net graph and the node to * start along with cluster diameter * @param graph concept-net graph as hash map structure * @param startNode start entity node * @param clusterDiameter cluster diameter */ public void cluster(Map<Integer,List<ClusterNode>> graph, ClusterNode startNode, short clusterDiameter) { clear(); K = clusterDiameter; startNode.clusterId = 1; startNode.distance = 0; addToQueue(startNode); while (!queue.isEmpty()) { ClusterNode currentNode = queue.remove(0); List<ClusterNode> successors = graph.get(currentNode.key); if(successors!= null && successors.size() > 0) { Short id = getNextClusterId(currentNode.clusterId); Short distance = new Short(currentNode.distance); //System.out.println(currentNode.key + "---" + currentNode.clusterId + "---" + id); if(id.equals(currentNode.clusterId)) { if(currentNode.distance == 0 && (getUnVisitedChildren(graph, currentNode) > 1)) { distance += distance + 2; } else { distance++; } }else { if(getUnVisitedChildren(graph, currentNode) > 1) distance = new Short((short)2); else distance = new Short((short)0); } clusterCountMap.put(id, distance); for(ClusterNode successor : successors) { if(!visited.containsKey(successor.key)) { if(getUnVisitedChildren(graph, successor) > 0) { successor.clusterId = id; successor.distance = distance; }else { successor.clusterId = currentNode.clusterId; successor.distance = -1; } addToQueue(successor); } } } } } /** * Returns the next cluster id given the current cluster id. * The next cluster id is given based on the current cluster * diameter. The cluster id is incremented if diameter is * exceeded, else same id is returned * @param id current cluster id * @return next cluster id */ private Short getNextClusterId(Short id) { Short val = clusterCountMap.get(id); if(val == null) { clusterCountMap.put(id, (short)0); return id; }else { if(val.equals(K)) { clusterCount++; clusterCountMap.put(new Short(clusterCount), (short)0); return clusterCount; } return id; } } /** * Returns the number of unvisited children for the given node * @param graph concept-net graph * @param cnode node * @return number of unvisited children */ private int getUnVisitedChildren(Map<Integer,List<ClusterNode>> graph, ClusterNode cnode) { int n = 0; List<ClusterNode> successors = graph.get(cnode.key); if(successors != null && successors.size() > 0) { for(ClusterNode s : successors) { if(s == null) System.out.println();; if(!visited.containsKey(s.key)) n++; } } return n; } /** * Returns <tt>true</tt> if given node has children * @param graph concept-net graph * @param cnode node * @return <tt>true</tt> if given node has children */ private boolean hasChildren(Map<Integer,List<ClusterNode>> graph, ClusterNode cnode) { List<ClusterNode> successors = graph.get(cnode.key); if(successors != null && successors.size() > 0) return true; return false; } /** * Adds to the Breadth First Search queue * @param n node */ private void addToQueue(ClusterNode n) { visited.put(n.key, true); queue.add(n); } /** * Clears the data structures */ private void clear() { clusterNodes.clear(); clusterCountMap.clear(); visited.clear(); queue.clear(); clusterCount = 1; } }