# MCFCutAlgorithm.txt # # This is an AMPL implementation of the The MCF cut community structure algorithm described in # "The Use of Sparsest Cuts to Reveal the Hierarchical Community Structure of Social Networks" # by Mann, Matula, and Olinick. ################################## User-Controlled Parameters ################################## # output_level controls the amount of output. # If output_level = 1 then only the best partition into communities is shown. # If output_level = 2 then the program shows the partition into communities at each iteration. # If output_level = 3 then the program also shows the MCF results at each iteration. # # nodes_per_line controls the number of nodes printer per line when output_level > 1 # # If the Q value does not increase after Qlimit consecutive iterations the # procedure will stop. # # if selection_criterion = 0 then the largest component is selected next. # if selection_criterion = 1 then the sparsest component is selected next. # param output_level default 3; param nodes_per_line default 15; param Qlimit default 5; param selection_criterion default 1; # Settings to minimize AMPL/CPLEX output; option solver_msg 0; option show_stats 0; ############################################################################################### ################################# Sets and Parameters for BFS ################################# # The following sets and parameters are used to find the connected components (communities) # via breadth-first search (BFS) after removing a set of cut edges. # # root is the root node of the current BFS. # # MARKED_NODES are the nodes that have been found so far by the BFS from the root node. # # UNMARKED_NODES are the nodes have not yet been identified as being any connected component. # param root; set MARKED_NODES ordered default {}; set UNMARKED_NODES ordered default {}; ############################################################################################### ######################### Sets and Parameters for Community Structure ######################### # The (n*(n-1))/2 node pairs are partitioned into two sets. A node pair (i,j) is connected if # if i and j are in the same community (connected component) and disconnected otherwise. # # The edges of the graph are partitioned into two sets. Edge (i,j) is an inter-community edge # if nodes i and j are in the different communities (connected components) and is an intra-community # edge otherwise. # # component[i] is the id number of the component that contains node i # in the current iteration. # # best_component[i] is id number of the component that contains node i # in the best solution found so far. # # num_components is the number of components (communities) in the current solution. # # min_component_density is the density of the sparsest component in the current community structure. # # density is the density of the current component (communitiy). # set CONNECTED_PAIRS within {i in 1 .. n-1, j in i+1 .. n} default {i in V, j in V: i < j}; set DISCONNECTED_PAIRS within {i in 1 .. n-1, j in i+1 .. n} default {}; set INTER within E default {}; # The set of inter-community edges set INTRA within E default E; # The set of intra-community edges param component {V} default 0; param best_component {V} default 0; param num_components default 1; param component_size default 0; param max_component_size default 0; param min_component_density default 0; param density default 1; ############################################################################################### ################################## MCFP Sets and Parameters #################################### # # The algorithm solves the MCFP for the selected component. The right-hand side constants for the # LP formulation of the MCFP are thus determined by the nodes, edges, and node pairs (demand pairs) # within the selected component which are represented by NODES_IN_COMPONENT, EDGES_IN_COMPONENT, # and PAIRS_IN_COMPONENT, respectively. # # Saturated_Edges is the set of edges that are saturated by the MCF in the selected component. # # Critical_Edges is the set of edges that must be saturated by any MCF in the selected component. # Note that these edges are only determined if the perturb parameter is set to 1. # # Cut_Edges are the subset of the Saturated_Edges that are removed to cut (partition) the nodes # in the selected component. # # Separated_Demands are the set of pairs of nodes that are separated by the Cut_Edges. # # current_z is the value of the MCF for the selected component. # # perturbed_z is the the value of the MCF for the selected component after the capacity # of one of the Saturated_Edges is perturbed by a small amount. If perturbed_z < current_z, # then the pertubed edge is critical. # # orig_C is the original (given) capacity of a perturbed edge. # # edge_flow[i,j] is the total flow on edge (i,j) in the MCF for the selected component. # # cut_density is the density of the partition created by removing the Cut_Edges from the selected # component. set NODES_IN_COMPONENT within V default V; set EDGES_IN_COMPONENT within E default E; set PAIRS_IN_COMPONENT within {i in 1 .. n-1, j in i+1 .. n} default {i in 1 .. n-1, j in i+1 .. n}; set Saturated_Edges within E default {}; set Critical_Edges within E default {}; set Cut_Edges within E default {}; set Separated_Demands within {k in 1 .. n-1, j in k+1 .. n}; param current_z default 0; param pertubed_z default 0; param orig_C default 1; param edge_flow {E} default 0; param cut_density default 1; ############################################################################################### ################################### Parameters for Q Metric ################################## # Q is the Q value for the current partition into communitites. # # lastQ is the Q value for the previous partition. # # Qcounter is the number of iterations since the last improvement in the Q metric. # # ADJ[i,j] = A[i,j] - (weighted_degree[i]*weighted_degree[j]/2*weighted_m) in equation (9) from # Newman, M. E. J. .Analysis of Weighted Networks,. Phys. Rev. E 70, 056131, (2004). # Note that this is a constant that is independent of the community structure. # param Q default 0; param last_Q default 0; param best_Q default 0; param Qcounter default 0; param ADJ {V, V} default 0; param weighted_degree {i in V} := sum{j in V: (min(i,j),max(i,j)) in E} w[min(i,j),max(i,j)]; param weighted_m := sum{(i,j) in E} w[i,j]; ############################################################################################### ############################## Miscellanous Sets and Parameters ############################### # CORRECT_INTRACOM_EDGES and CORRECT_INTERCOM_EDGES are the edges that the algorithm correctly # identifies as being intra- and intercommunity edges, respectively. Note that these sets are # only used if the data file contains the set INTRACOM_EDGES which is the set of user-specified # intracommunity edges. # # counter is used to count the number of nodes that are printed on a line when # displaying the community structure of the current solution. # # degree[i] is the degree of node i. # # done = 1 if stopping conditions are met. # # iteration is the the current iteration number. # # m is the number of edges in the graph. # # Any number <= the value of the zero parameter is treated as zero. # # M is the M statistic from "The Use of Sparsest Cuts to Reveal the Hierarchical Community Structure of Social Networks" # by Mann, Matula, and Olinick. Note that M is only calculated if the data file contains the set INTRACOM_EDGES. # set CORRECT_INTRACOM_EDGES within E default {}; set CORRECT_INTERCOM_EDGES within E default {}; param counter default 0; param degree {i in V} := card({j in V: (i,j) in E or (j,i) in E}); param done default 0; param iteration default 0; param m := card(E); param zero default 1.0e-6; param M default 0; ############################################################################################### ###################### Display the Name of the Data File and the Component Selection Criterion ######################### printf "\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n"; printf "MCF Cut Community Structure Algorithm\n"; printf "Data file: %s\n", filename; printf "Selection Criterion: "; if selection_criterion = 0 then printf "largest component\n"; else printf "sparsest component\n"; ###################### Code to Initialize Data Structures ######################### # Calculate ADJ values for {i in 1 .. n, j in 1 .. n} { if (min(i,j),max(i,j)) in E then let ADJ[i,j] := w[min(i,j),max(i,j)]; else let ADJ[i,j] := 0; let ADJ[i,j] := ADJ[i,j] - ((weighted_degree[i]*weighted_degree[j])/(2*weighted_m)); } # Determine the initial community structure of the graph let UNMARKED_NODES := V; let MARKED_NODES := {}; let {i in V} component[i] := 0; let num_components := 0; let max_component_size := 0; repeat { let component_size := 0; let num_components := num_components + 1; let MARKED_NODES := {first(UNMARKED_NODES)}; let UNMARKED_NODES := UNMARKED_NODES diff {first(UNMARKED_NODES)}; repeat { let root := first(MARKED_NODES); let component_size := component_size + 1; let component[root] := num_components; let MARKED_NODES := MARKED_NODES diff {root}; for {i in UNMARKED_NODES: (min(i,root),max(i,root)) in INTRA} { let component[i] := num_components; let MARKED_NODES := MARKED_NODES union {i}; let UNMARKED_NODES := UNMARKED_NODES diff {i}; } } until card(MARKED_NODES) == 0; # calculate the density of this component let NODES_IN_COMPONENT := {i in V: component[i] == num_components}; let EDGES_IN_COMPONENT := {(i,j) in INTRA: component[i] == num_components and component[j] == num_components}; let density := 2*sum{(i,j) in EDGES_IN_COMPONENT} w[i,j]/(card(NODES_IN_COMPONENT)*(card(NODES_IN_COMPONENT)-1)); # See if this the next component to select if selection_criterion = 1 then { if density > zero and density < min_component_density then { let selected_component := num_components; let min_component_density := density; } } else if selection_criterion = 0 then { if component_size > max_component_size then { let selected_component := num_components; let max_component_size := component_size; } } } until card(UNMARKED_NODES) == 0; let DISCONNECTED_PAIRS := DISCONNECTED_PAIRS union {i in 1 .. n-1, j in i+1 .. n: component[i] != component[j]}; let CONNECTED_PAIRS := CONNECTED_PAIRS diff DISCONNECTED_PAIRS; ######################### Main Loop ######################### repeat until done == 1 { if output_level > 1 then printf "\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n"; let iteration := iteration + 1; if output_level > 1 then printf "Community Structure at Iteration %d\n",iteration; for {i in 1 .. num_components} { if output_level > 1 then printf "\nNodes in Component %d:",i; let counter := 0; for {j in V: component[j] == i} { let component_size := component_size + 1; if counter == 0 and output_level > 1 then printf "\n"; let counter := counter + 1; if output_level > 1 then printf "%d ",j; if counter == nodes_per_line then let counter := 0; } if output_level > 1 then printf "\n"; let NODES_IN_COMPONENT := {j in V: component[j] == i}; let EDGES_IN_COMPONENT := {(u,v) in INTRA: component[u] == i and component[v] == i}; let density := 2*sum{(u,v) in EDGES_IN_COMPONENT} w[u,v]/(card(NODES_IN_COMPONENT)*(card(NODES_IN_COMPONENT)-1)); if output_level > 1 then printf "density = %f\n",density; } # for {i in 1 .. num_components} # calculate the Q score let last_Q := Q; let Q := (sum{i in 1 .. n, j in 1 .. n: component[i] = component[j]} ADJ[i,j])/(2*weighted_m); if output_level > 1 then printf "\nQ = %f\n",max(Q,0); if Q > last_Q then { let Qcounter := 0; if Q > best_Q then { # This is best solution found so far according to Newman's Q metric let best_Q := Q; let {i in V} best_component[i] := component[i]; } } else { let Qcounter := Qcounter + 1; } # check the stopping criteria if max_component_size == 1 or Qcounter >= Qlimit then { if output_level > 1 then printf "\nStopping criteria reached.\n"; if output_level > 1 then printf "\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n"; break; } # select the next component if output_level > 1 then printf "\nSelecting Component %d.\n",selected_component; let NODES_IN_COMPONENT := {i in V: component[i] == selected_component}; let EDGES_IN_COMPONENT := {(i,j) in INTRA: component[i] == selected_component and component[j] == selected_component}; let PAIRS_IN_COMPONENT := {(i,j) in CONNECTED_PAIRS: component[i] == selected_component and component[j] == selected_component}; # set up and solve LP for MCF in the selected component let {i in V, j in V} d[i,j] := 0; let {(i,j) in PAIRS_IN_COMPONENT} d[i,j] := 1; let {(i,j) in E diff EDGES_IN_COMPONENT} c[i,j] := 0; let {(i,j) in EDGES_IN_COMPONENT} c[i,j] := w[i,j]; # solve > /dev/null; solve; if output_level > 2 then printf "MCF in Component %d = %f\n",selected_component,z; # Find the set of edges saturated by the MCF let {(i,j) in EDGES_IN_COMPONENT} edge_flow[i,j] := 0; let {(i,j) in EDGES_IN_COMPONENT} edge_flow[i,j] := sum{k in 1 .. n-1} (flow[k,i,j] + flow[k,j,i]); let Saturated_Edges := {(i,j) in EDGES_IN_COMPONENT: c[i,j] - edge_flow[i,j] < zero}; if output_level > 2 then display Saturated_Edges; # Find the set of cut edges let Cut_Edges := {(i,j) in EDGES_IN_COMPONENT: abs(edge_capacity[i,j]) > zero}; if card(Cut_Edges intersect EDGES_IN_COMPONENT) = card(EDGES_IN_COMPONENT) then { if output_level > 2 then printf "\nAll edges are critical\n"; } if output_level > 1 then display Cut_Edges; if output_level > 2 then { printf "\nEdges Removed From Component %d:\n",selected_component; display Cut_Edges; } let INTRA := INTRA diff Cut_Edges; let INTER := INTER union Cut_Edges; # Determine the community structure of the current partitioning let UNMARKED_NODES := V; let MARKED_NODES := {}; let {i in V} component[i] := 0; let num_components := 0; let max_component_size := 0; let min_component_density := Infinity; repeat { let component_size := 0; let num_components := num_components + 1; let MARKED_NODES := {first(UNMARKED_NODES)}; let UNMARKED_NODES := UNMARKED_NODES diff {first(UNMARKED_NODES)}; repeat { let root := first(MARKED_NODES); let component_size := component_size + 1; let component[root] := num_components; let MARKED_NODES := MARKED_NODES diff {root}; for {i in UNMARKED_NODES: (min(i,root),max(i,root)) in INTRA} { let component[i] := num_components; let MARKED_NODES := MARKED_NODES union {i}; let UNMARKED_NODES := UNMARKED_NODES diff {i}; } } until card(MARKED_NODES) == 0; # calculate the density of this component let NODES_IN_COMPONENT := {i in V: component[i] == num_components}; let EDGES_IN_COMPONENT := {(i,j) in INTRA: component[i] == num_components and component[j] == num_components}; let density := 2*sum{(i,j) in EDGES_IN_COMPONENT} w[i,j]/(card(NODES_IN_COMPONENT)*(card(NODES_IN_COMPONENT)-1)); # See if this the next component to select if selection_criterion = 1 then { if density > zero and density < min_component_density then { let selected_component := num_components; let min_component_density := density; } } else if selection_criterion = 0 then { if component_size > max_component_size then { let selected_component := num_components; let max_component_size := component_size; } } } until card(UNMARKED_NODES) == 0; # Find the set of node-pairs separated by the cut let Separated_Demands := {(i,j) in PAIRS_IN_COMPONENT: component[i] != component[j]}; let DISCONNECTED_PAIRS := DISCONNECTED_PAIRS union Separated_Demands; let CONNECTED_PAIRS := CONNECTED_PAIRS diff Separated_Demands; if output_level > 2 then { printf "\nNode Pairs Separated by Cut:\n"; display Separated_Demands; } let cut_density := sum{(i,j) in Cut_Edges} w[i,j]/card(Separated_Demands); if output_level > 2 then printf "\nCut Density = %d/%d = %f\n", sum{(i,j) in Cut_Edges} w[i,j],card(Separated_Demands),cut_density; if output_level > 1 then printf "\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n"; if max_component_size = 1 then let done := 1; } # until done == 1; let num_components := max{i in V} best_component[i]; printf "\n\nBest community structure found has %d components with Q = %f:\n",num_components,best_Q; for {i in 1 .. num_components} { let component_size := 0; printf "\nNodes in Component %d:",i; let counter := 0; for {j in V: best_component[j] == i} { let component_size := component_size + 1; if counter == 0 then printf "\n"; let counter := counter + 1; printf "%d ",j; if counter == 15 then let counter := 0; } printf "\n"; if component_size > max_component_size then { let selected_component := i; let max_component_size := component_size; } } printf "\nTotal CPU seconds to partition this graph = %f\n\n",_ampl_user_time + _total_solve_user_time; # If appropriate, calculate M score and report results if (card(INTRACOM_EDGES) > 0) then { printf "\nThere are %d inter-community edges between nodes in different communities:\n",card(E diff INTRACOM_EDGES); display E diff INTRACOM_EDGES; printf "\nThere are %d intra-community edges between nodes in the same community:\n",card(INTRACOM_EDGES); display INTRACOM_EDGES; let INTER := {(i,j) in E: best_component[i] <> best_component[j]}; let INTRA := {(i,j) in E: best_component[i] = best_component[j]}; printf "\nThere are %d algorithm-labeled inter-community edges:\n",card(INTER); display INTER; printf "\nThere are %d algorithm-labeled intra-community edges:\n",card(INTRA); display INTRA; let CORRECT_INTRACOM_EDGES := {(i,j) in INTRACOM_EDGES: best_component[i] = best_component[j]}; let CORRECT_INTERCOM_EDGES := {(i,j) in E diff INTRACOM_EDGES: best_component[i] <> best_component[j]}; printf "\nThe algorithm correctly labeled %d out of %d intra-community edges:\n",card(CORRECT_INTERCOM_EDGES),card(E diff INTRACOM_EDGES); display CORRECT_INTERCOM_EDGES; printf "\nThe algorithm correctly labeled %d out of %d intra-community edges:\n",card(CORRECT_INTRACOM_EDGES),card(INTRACOM_EDGES); display CORRECT_INTRACOM_EDGES; let M := (card(CORRECT_INTRACOM_EDGES) + card(CORRECT_INTERCOM_EDGES))/m; display m,M; }