diff --git a/CHANGELOG.md b/CHANGELOG.md index 30638257c..c8c15f8e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ We follow SemVer as most of the Julia ecosystem. Below you might see the "breaki ## unreleased - `is_articulation(g, v)` for checking whether a single vertex is an articulation point +- ECG community detection algorithm ## v1.14.0 - 2026-02-26 diff --git a/src/Graphs.jl b/src/Graphs.jl index 5380f88f2..9df01764e 100644 --- a/src/Graphs.jl +++ b/src/Graphs.jl @@ -327,6 +327,8 @@ export triangles, label_propagation, louvain, + ecg, + ecg_weights, maximal_cliques, maximum_clique, clique_number, @@ -553,6 +555,7 @@ include("centrality/radiality.jl") include("community/modularity.jl") include("community/label_propagation.jl") include("community/louvain.jl") +include("community/ecg.jl") include("community/core-periphery.jl") include("community/clustering.jl") include("community/cliques.jl") diff --git a/src/community/ecg.jl b/src/community/ecg.jl new file mode 100644 index 000000000..f2c92397a --- /dev/null +++ b/src/community/ecg.jl @@ -0,0 +1,136 @@ +""" + ecg(g; γ=1, ensemble_size::Integer=16, min_edge_weight=0.05, distmx::AbstractArray{<:Number}=weights(g), max_moves::Integer=1000, max_merges::Integer=1000, move_tol::Real=10e-10, merge_tol::Real=10e-10, rng=nothing, seed=nothing) + +Community detection using ensemble clustering for graphs (ECG). Weights the edges based on the +proportion of time the endpoints are in the same cluster of a Louvain without merges before running +a final Louvain to detect communities. + +### Optional Arguments +- `distmx=weights(g)`: distance matrix for weighted graphs +- `ensemble_size=16`: the number of no merge Louvains in the ensemble +- `min_edge_weight`: the minimum edge weight passed to the final Louvain (to retain the original topology). +- `γ=1.0`: where `γ > 0` is a resolution parameter. Higher resolutions lead to more + communities, while lower resolutions lead to fewer communities. Where `γ=1.0` it + leads to the traditional definition of the modularity. +- `max_moves=1000`: maximum number of rounds moving vertices before merging for each Louvain. +- `max_merges=1000`: maximum number of merges in the final Louvain. +- `move_tol=10e-10`: necessary increase of modularity to move a vertex in each Louvain. +- `merge_tol=10e-10`: necessary increase of modularity in the move stage to merge in the final Louvain. +- `rng=nothing`: rng to use for reproducibility. May only pass one of rng or seed. +- `seed=nothing`: seed to use for reproducibility. May only pass one of rng or seed. + +### References +- [Valérie Poulin and François Théberge. Ensemble Clustering for Graphs: Comparisons and Applications. Applied Network Science, 4:4 (2019)][https://doi.org/10.1007/s41109-019-0162-z] + + +# Examples +```jldoctest +julia> using Graphs + +julia> barbell = blockdiag(complete_graph(3), complete_graph(3)); + +julia> add_edge!(barbell, 1, 4); + +julia> ecg(barbell) +6-element Vector{Int64}: + 1 + 1 + 1 + 2 + 2 + 2 + +julia> ecg(barbell, γ=0.01) +6-element Vector{Int64}: + 1 + 1 + 1 + 1 + 1 + 1 +``` +""" +function ecg( + g::AbstractGraph{T}; + γ=1.0, + ensemble_size::Integer=16, + min_edge_weight::Real=0.05, + distmx::AbstractArray{<:Number}=weights(g), + max_moves::Integer=1000, + max_merges::Integer=1000, + move_tol::Real=10e-10, + merge_tol::Real=10e-10, + rng::Union{Nothing,AbstractRNG}=nothing, + seed::Union{Nothing,Integer}=nothing, +) where {T} + rng = rng_from_rng_or_seed(rng, seed) + if nv(g) == 0 + return T[] + end + ensemble_weights = ecg_weights( + g; + γ=γ, + ensemble_size=ensemble_size, + distmx=distmx, + max_moves=max_moves, + move_tol=move_tol, + rng=rng, + ) + weights = + (1-min_edge_weight)*ensemble_weights + + min_edge_weight * adjacency_matrix(g, Float64) + return louvain( + g; + γ=γ, + distmx=weights, + max_moves=max_moves, + max_merges=max_merges, + move_tol=move_tol, + merge_tol=merge_tol, + rng=rng, + ) +end + +""" + ensemble_weights(g; c, distmx, max_moves, move_tol, rng, seed) + +Compute edge weights via an ensemble of no merge Louvains. The weight of each edge is +the proportion of time the endpoints are in the same community. +""" +function ecg_weights( + g::AbstractGraph{T}; + γ=1.0, + ensemble_size::Integer=16, + distmx::AbstractArray{<:Number}=weights(g), + max_moves::Integer=1000, + move_tol::Real=10e-10, + rng::Union{Nothing,AbstractRNG}=nothing, + seed::Union{Nothing,Integer}=nothing, +) where {T} + rng = rng_from_rng_or_seed(rng, seed) + # Create sparse adjacency matrix full of explicit zeros + ensemble_weights = adjacency_matrix(g, Float64) + ensemble_weights.nzval .= 0 + + for _ in 1:ensemble_size + ensemble_communities = louvain( + g; + γ=γ, + distmx=distmx, + max_moves=max_moves, + max_merges=0, + move_tol=move_tol, + rng=rng, + ) + for e in edges(g) + if ensemble_communities[src(e)] == ensemble_communities[dst(e)] + ensemble_weights[src(e), dst(e)] += 1 / ensemble_size + if !is_directed(g) + ensemble_weights[dst(e), src(e)] += 1 / ensemble_size + end + end + end + end + + return ensemble_weights +end diff --git a/test/community/ecg.jl b/test/community/ecg.jl new file mode 100644 index 000000000..bcde1fca0 --- /dev/null +++ b/test/community/ecg.jl @@ -0,0 +1,129 @@ +@testset "ECG" begin + # Test ecg_weights + # Undirected + barbell = barbell_graph(3, 3) + c = sparse( + [ + 0.0 1.0 1.0 0.0 0.0 0.0; + 1.0 0.0 1.0 0.0 0.0 0.0; + 1.0 1.0 0.0 0.0 0.0 0.0; + 0.0 0.0 0.0 0.0 1.0 1.0; + 0.0 0.0 0.0 1.0 0.0 1.0; + 0.0 0.0 0.0 1.0 1.0 0.0 + ], + ) + for g in test_generic_graphs(barbell) + r = ecg_weights(g) + dropzeros!(r) + @test c == r + end + + # Empty, no edges + empty = SimpleGraph(10) + c = spzeros(10, 10) + for g in test_generic_graphs(empty) + r = @inferred ecg_weights(g) + dropzeros!(r) + @test c == r + end + + # Empty, no nodes + empty = SimpleGraph() + c = spzeros(0, 0) + for g in test_generic_graphs(empty) + r = @inferred ecg_weights(g) + @test c == r + end + + # Undirected loops + loops = complete_graph(2) + add_edge!(loops, 1, 1) + add_edge!(loops, 2, 2) + c = sparse([ + 2.0 0.0; + 0.0 2.0 + ]) + for g in test_generic_graphs(loops) + r = ecg_weights(g) + dropzeros!(r) + @test c == r + end + + # Directed + triangle = SimpleDiGraph(3) + add_edge!(triangle, 1, 2) + add_edge!(triangle, 2, 3) + add_edge!(triangle, 3, 1) + + # Directed Loops + barbell = blockdiag(triangle, triangle) + add_edge!(barbell, 1, 4) + c = sparse( + [ + 0.0 1.0 0.0 0.0 0.0 0.0; + 0.0 0.0 1.0 0.0 0.0 0.0; + 1.0 0.0 0.0 0.0 0.0 0.0; + 0.0 0.0 0.0 0.0 1.0 0.0; + 0.0 0.0 0.0 0.0 0.0 1.0; + 0.0 0.0 0.0 1.0 0.0 0.0 + ], + ) + for g in test_generic_graphs(barbell) + r = ecg_weights(g) + dropzeros!(r) + @test r == c + end + + # Directed loops + barbell = SimpleDiGraph(2) + add_edge!(barbell, 1, 1) + add_edge!(barbell, 2, 2) + add_edge!(barbell, 1, 2) + c = sparse([ + 1.0 0.0; + 0.0 1.0 + ]) + for g in test_generic_graphs(barbell) + r = ecg_weights(g) + dropzeros!(r) + @test r == c + end + + # Test ECG + # Undirected + barbell = barbell_graph(3, 3) + c = [1, 1, 1, 2, 2, 2] + for g in test_generic_graphs(barbell) + r = ecg(g) + @test c == r + end + + # Directed + triangle = SimpleDiGraph(3) + add_edge!(triangle, 1, 2) + add_edge!(triangle, 2, 3) + add_edge!(triangle, 3, 1) + + barbell = blockdiag(triangle, triangle) + add_edge!(barbell, 1, 4) + c = [1, 1, 1, 2, 2, 2] + for g in test_generic_graphs(barbell) + r = ecg(g) + @test r == c + end + + # Empty, no edges + empty = SimpleGraph(10) + c = collect(1:10) + for g in test_generic_graphs(empty) + r = ecg(g) + @test c == r + end + + # Empty, no nodes + empty = SimpleGraph() + for g in test_generic_graphs(empty) + r = ecg(g) + @test length(r) == 0 + end +end diff --git a/test/runtests.jl b/test/runtests.jl index c4da7ef47..d5da00643 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -118,6 +118,7 @@ tests = [ "traversals/all_simple_paths", "community/cliques", "community/core-periphery", + "community/ecg", "community/independent_sets", "community/label_propagation", "community/louvain",