diff --git a/graphconstructor/operators/doubly_stochastic.py b/graphconstructor/operators/doubly_stochastic.py index 0213d7a..a4622b0 100644 --- a/graphconstructor/operators/doubly_stochastic.py +++ b/graphconstructor/operators/doubly_stochastic.py @@ -1,9 +1,12 @@ from dataclasses import dataclass +import networkx as nx import numpy as np from ..graph import Graph from .base import GraphOperator +# Z. 48-57: +# https://gitlab.liris.cnrs.fr/coregraphie/netbone/-/blob/main/netbone/structural/doubly_stochastic.py?ref_type=heads @dataclass(slots=True) class DoublyStochastic(GraphOperator): """ @@ -30,6 +33,7 @@ class DoublyStochastic(GraphOperator): Copy metadata frame if present. Default True. """ + backbone_method: bool = False tolerance: float = 1e-5 max_iter: int = 10_000 copy_meta: bool = True @@ -84,14 +88,12 @@ def apply(self, G: Graph) -> Graph: # Only check rows/cols that have edges (others stay 0 and are irrelevant) if row_has_edges.any(): - rows_ok = np.all((row_sums[row_has_edges] >= min_thres) & - (row_sums[row_has_edges] <= max_thres)) + rows_ok = np.all((row_sums[row_has_edges] >= min_thres) & (row_sums[row_has_edges] <= max_thres)) else: rows_ok = True if col_has_edges.any(): - cols_ok = np.all((col_sums[col_has_edges] >= min_thres) & - (col_sums[col_has_edges] <= max_thres)) + cols_ok = np.all((col_sums[col_has_edges] >= min_thres) & (col_sums[col_has_edges] <= max_thres)) else: cols_ok = True @@ -105,6 +107,41 @@ def apply(self, G: Graph) -> Graph: # col scaling A_scaled.data *= c[A_scaled.indices] + # step 2 + if self.backbone_method: + i = 0 + + rows, cols = A_scaled.nonzero() + vals = A_scaled.data + + order = np.argsort(vals)[::-1] + rows = rows[order] + cols = cols[order] + vals = vals[order] + print(rows, cols, order) + + if not G.directed: + G_filtered = nx.Graph() + while ( + nx.number_connected_components(G_filtered) != 1 + or len(G_filtered) < A_scaled.shape[0] + or not nx.is_connected(G_filtered) + ): + if i == A_scaled.shape[0]: + break + G_filtered.add_edge(rows[i], cols[i], weight=vals[i]) + i += 1 + G_csr = nx.to_scipy_sparse_array(G_filtered) + + return Graph.from_csr( + G_csr, + directed=G.directed, + weighted=True, + mode=G.mode, + # meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta), + sym_op="max", + ) + return Graph.from_csr( A_scaled, directed=G.directed, diff --git a/tests/test_doubly_stochastic.py b/tests/test_doubly_stochastic.py index c0febd5..bfa99b2 100644 --- a/tests/test_doubly_stochastic.py +++ b/tests/test_doubly_stochastic.py @@ -16,12 +16,15 @@ def _csr(data, rows, cols, n): # ----------------- Positive dense matrix: converges to ~doubly stochastic ----------------- def test_doubly_stochastic_converges_on_positive_dense(): # Strictly positive, symmetric 4x4 (undirected) - M = np.array([ - [0.2, 0.8, 0.5, 0.3], - [0.7, 0.1, 0.4, 0.6], - [0.3, 0.9, 0.2, 0.5], - [0.5, 0.2, 0.7, 0.4], - ], dtype=float) + M = np.array( + [ + [0.2, 0.8, 0.5, 0.3], + [0.7, 0.1, 0.4, 0.6], + [0.3, 0.9, 0.2, 0.5], + [0.5, 0.2, 0.7, 0.4], + ], + dtype=float, + ) # Zero the diagonal (typical adjacency semantics) np.fill_diagonal(M, 0.0) @@ -43,6 +46,49 @@ def test_doubly_stochastic_converges_on_positive_dense(): assert not G.directed and G.weighted +def test_doubly_stochastic_with_backbone_method(): + # Strictly positive, asymmetric matrix (to make backbone relevant) + M = np.array( + [ + [0.2, 0.8, 0.5, 0.3], + [0.7, 0.1, 0.4, 0.6], + [0.3, 0.9, 0.2, 0.5], + [0.5, 0.2, 0.7, 0.4], + ], + dtype=float, + ) + + # Zero diagonal + np.fill_diagonal(M, 0.0) + + G0 = Graph.from_dense(M, directed=False, weighted=True, mode="similarity", sym_op="max") + + op = DoublyStochastic(tolerance=1e-6, max_iter=10_000, backbone_method=True) + + G = op.apply(G0) + A = G.adj + + # No NaNs or infs + assert np.isfinite(A.data).all() + + # Check that only backbone edges remain (sparser than original) + assert A.nnz <= G0.adj.nnz + + # Rows/cols should still approximately sum to 1 (on non-isolated nodes) + row_sums = np.asarray(A.sum(axis=1)).ravel() + col_sums = np.asarray(A.sum(axis=0)).ravel() + + # Only check nodes that still have edges + nonzero_rows = row_sums > 0 + nonzero_cols = col_sums > 0 + + assert np.allclose(row_sums[nonzero_rows], row_sums[nonzero_rows][0], atol=1e-6) + assert np.allclose(col_sums[nonzero_cols], col_sums[nonzero_cols][0], atol=1e-6) + + # Graph properties preserved + assert not G.directed and G.weighted + + # ----------------- Sparse graph with isolates: zero rows/cols remain zero, others ~1 ----------------- def test_doubly_stochastic_sparse_with_isolates(): # 5 nodes, node 4 is isolated @@ -65,7 +111,7 @@ def test_doubly_stochastic_sparse_with_isolates(): col_sums = np.asarray(A2.sum(axis=0)).ravel() # Indices with edges - rows_with = (np.diff(A2.indptr) > 0) + rows_with = np.diff(A2.indptr) > 0 cols_with = (sp.csc_matrix(A2).indptr[1:] - sp.csc_matrix(A2).indptr[:-1]) > 0 # Non-isolated rows/cols sum ~1 @@ -81,6 +127,35 @@ def test_doubly_stochastic_sparse_with_isolates(): assert not G.directed and G.weighted +def test_doubly_stochastic_sparse_with_isolates_backbone(): + A = _csr( + data=[0.4, 0.6, 0.3, 0.7, 0.2, 0.5], + rows=[0, 0, 1, 1, 2, 3], + cols=[1, 2, 2, 3, 3, 2], + n=5, + ) + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max") + + op = DoublyStochastic(tolerance=1e-6, max_iter=10_000, backbone_method=True) + G = op.apply(G0) + A2 = G.adj + + assert np.isfinite(A2.data).all() + + row_sums = np.asarray(A2.sum(axis=1)).ravel() + col_sums = np.asarray(A2.sum(axis=0)).ravel() + rows_with = np.diff(A2.indptr) > 0 + cols_with = (sp.csc_matrix(A2).indptr[1:] - sp.csc_matrix(A2).indptr[:-1]) > 0 + + if rows_with.any(): + assert np.all(row_sums[rows_with] > 0) + if cols_with.any(): + assert np.all(col_sums[cols_with] > 0) + + # Isolated node (4) stays isolated (not in the graph) + assert len(row_sums) == 4 + + # ----------------- Directed case: rows and cols ~1 for nonzero rows/cols ----------------- def test_doubly_stochastic_directed_graph_unsolvable(): # Directed 4x4 with zeros on diagonal, not symmetric @@ -100,12 +175,7 @@ def test_doubly_stochastic_directed_graph_unsolvable(): # No NaNs or infs assert np.isfinite(A2.data).all() - expected_result = np.array([ - [0. , 0.5, 0.5, 0. ], - [0. , 0. , 1. , 0. ], - [0.5, 0. , 0. , 0.5], - [0. , 1. , 0. , 0. ] - ]) + expected_result = np.array([[0.0, 0.5, 0.5, 0.0], [0.0, 0.0, 1.0, 0.0], [0.5, 0.0, 0.0, 0.5], [0.0, 1.0, 0.0, 0.0]]) assert np.allclose(A2.toarray(), expected_result, atol=1e-4) # Directed flag preserved