From 0cda4c124f4b711ca9c5073ff0b4bd1ae1cfe0e8 Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Mon, 27 Oct 2025 20:34:33 +0100 Subject: [PATCH 1/8] larger update of Graph class (add distance/similarity attribute) --- graphconstructor/graph.py | 95 ++++++++++++++++++++++++++++++++++++--- tests/test_graph.py | 74 +++++++++++++++--------------- 2 files changed, 128 insertions(+), 41 deletions(-) diff --git a/graphconstructor/graph.py b/graphconstructor/graph.py index 6605836..ab89f07 100644 --- a/graphconstructor/graph.py +++ b/graphconstructor/graph.py @@ -1,9 +1,11 @@ +import warnings from dataclasses import dataclass from typing import Iterable, Literal, Sequence import numpy as np import pandas as pd import scipy.sparse as sp from scipy.sparse.csgraph import connected_components +from .utils import ConversionMethod, Mode, convert_adjacency_mode SymOp = Literal["max", "min", "average"] @@ -22,11 +24,14 @@ class Graph: - `adj`: CSR adjacency of shape (n, n) - `directed`: True if directed, else undirected (stored symmetric) - `weighted`: True if edge weights are meaningful; if False, all edges are 1.0 + - `mode`: "distance" or "similarity" (for interpretation of weights) - `meta`: pandas DataFrame with n rows (optional). May have a 'name' column. + - `ignore_selfloops`: If True, self-loops are ignored/removed (default for undirected graphs) """ adj: sp.csr_matrix directed: bool weighted: bool + mode: str meta: pd.DataFrame | None = None ignore_selfloops: bool = None @@ -34,6 +39,9 @@ def __post_init__(self): # Default: ignore self-loops for undirected graphs if self.ignore_selfloops is None: self.ignore_selfloops = not self.directed + # Check mode + if self.mode not in {"distance", "similarity"}: + raise ValueError("mode must be 'distance' or 'similarity'.") # -------- Construction helpers -------- @staticmethod @@ -59,6 +67,7 @@ def _symmetrize(A: sp.csr_matrix, how: SymOp = "max") -> sp.csr_matrix: def from_csr( cls, adj: sp.spmatrix | np.ndarray, + mode: str, *, directed: bool = False, weighted: bool = True, @@ -88,21 +97,28 @@ def from_csr( if len(meta) != n: raise ValueError(f"meta has {len(meta)} rows but adjacency is {n}x{n}.") meta = meta.reset_index(drop=True) - return cls(adj=A.astype(float, copy=False), directed=directed, weighted=weighted, meta=meta) + return cls( + adj=A.astype(float, copy=False), + directed=directed, weighted=weighted, + mode=mode, ignore_selfloops=ignore_selfloops, + meta=meta + ) @classmethod def from_dense( cls, adj: np.ndarray, + mode: str, **kwargs, ) -> "Graph": - return cls.from_csr(adj, **kwargs) + return cls.from_csr(adj, mode=mode, **kwargs) @classmethod def from_edges( cls, n: int, edges: Sequence[tuple[int, int]] | np.ndarray, + mode: str, weights: Sequence[float] | np.ndarray | None = None, *, directed: bool = False, @@ -138,8 +154,10 @@ def from_edges( return cls.from_csr( A, directed=directed, weighted=weighted_eff, + mode=mode, ignore_selfloops=ignore_selfloops, - meta=meta, sym_op=sym_op) + meta=meta, sym_op=sym_op + ) # -------- Core properties -------- @property @@ -197,7 +215,73 @@ def drop(self, nodes: Iterable[int | str]) -> "Graph": A2 = self.adj[keep_mask][:, keep_mask].tocsr(copy=False) meta2 = self.meta.loc[keep_mask].reset_index(drop=True) if self.meta is not None else None - return Graph(adj=A2, directed=self.directed, weighted=self.weighted, meta=meta2) + return Graph(adj=A2, directed=self.directed, weighted=self.weighted, mode=self.mode, meta=meta2) + + # ----- Convert distance/similarity ----- + def convert_mode( + self, + target_mode: Mode, + method: ConversionMethod = "reciprocal", + inplace: bool = False, + **kwargs + ) -> "Graph": + """ + Convert graph weights between distance and similarity representations. + + Parameters + ---------- + target_mode : {"distance", "similarity"} + Desired mode for edge weights. + method : str or callable, default="reciprocal" + Conversion method to use. Options: + + - "reciprocal": similarity = 1/distance (default, bidirectional) + - "negative": similarity = -distance (bidirectional, for optimization) + - "exp": similarity = exp(-distance) (distance -> similarity only) + - "gaussian": similarity = exp(-distance^2/(2*sigma^2)) (distance -> similarity only) + - Custom callable: func(weights) -> converted_weights + inplace : bool, default=False + If True, modify this graph in place and return self. + If False (default), create and return a new Graph instance. + **kwargs + Additional parameters for conversion: + + - epsilon (float): Small value to avoid division by zero (default: 1e-10) + - sigma (float): Bandwidth for gaussian method (default: 1.0) + """ + if target_mode not in ("distance", "similarity"): + raise ValueError( + f"target_mode must be 'distance' or 'similarity', got '{target_mode}'" + ) + + if self.mode == target_mode: + warnings.warn( + f"Graph is already in '{target_mode}' mode. " + "Returning unchanged." + ) + return self + + # Convert the adjacency matrix + new_adj = convert_adjacency_mode( + self.adj, + source_mode=self.mode, + target_mode=target_mode, + method=method, + inplace=inplace, + **kwargs + ) + + if inplace: + self.adj = new_adj + self.mode = target_mode + return self + else: + # Create a new Graph instance + return Graph( + adj=new_adj, + mode=target_mode, + directed=self.directed + ) # -------- Exporters -------- def to_networkx(self): @@ -250,6 +334,7 @@ def copy(self) -> "Graph": adj=self.adj.copy(), directed=self.directed, weighted=self.weighted, + mode=self.mode, meta=None if self.meta is None else self.meta.copy(), ) @@ -260,7 +345,7 @@ def sorted_by(self, col: str) -> "Graph": order = np.argsort(self.meta[col].to_numpy()) A2 = self.adj[order][:, order] meta2 = self.meta.iloc[order].reset_index(drop=True) - return Graph(adj=A2, directed=self.directed, weighted=self.weighted, meta=meta2) + return Graph(adj=A2, directed=self.directed, weighted=self.weighted, mode=self.mode, meta=meta2) def degree(self, ignore_weights: bool = False) -> np.ndarray | tuple[np.ndarray, np.ndarray]: """Return node degree(s). diff --git a/tests/test_graph.py b/tests/test_graph.py index 2ef08a4..8e5934f 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -34,7 +34,7 @@ def test_from_csr_enforces_square_and_drops_self_loops_and_symmetrizes_max(): cols=[0, 1, 0, 2, 2], n=3, ) - G = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G = Graph.from_csr(A, directed=False, weighted=True, mode="distance", sym_op="max") # self-loops removed assert np.allclose(G.adj.diagonal(), 0.0) @@ -44,23 +44,25 @@ def test_from_csr_enforces_square_and_drops_self_loops_and_symmetrizes_max(): assert G.adj[1, 0] == pytest.approx(5.0) # flags assert not G.directed and G.weighted + # Check mode + assert G.mode == "distance" def test_symmetrization_operations(): """Test min, max, average symmetrization.""" A = _csr([5.0, 2.0], [0, 1], [1, 0], 2) - G_max = Graph.from_csr(A, directed=False, sym_op="max") + G_max = Graph.from_csr(A, directed=False, mode="distance", sym_op="max") assert G_max.adj[0, 1] == pytest.approx(5.0) - G_min = Graph.from_csr(A, directed=False, sym_op="min") + G_min = Graph.from_csr(A, directed=False, mode="distance", sym_op="min") assert G_min.adj[0, 1] == pytest.approx(2.0) - G_avg = Graph.from_csr(A, directed=False, sym_op="average") + G_avg = Graph.from_csr(A, directed=False, mode="distance", sym_op="average") assert G_avg.adj[0, 1] == pytest.approx(3.5) with pytest.raises(ValueError, match="Unsupported symmetrization"): - Graph.from_csr(A, directed=False, sym_op="invalid") + Graph.from_csr(A, directed=False, mode="distance", sym_op="invalid") @@ -72,14 +74,14 @@ def test_from_csr_enforces_square_and_keep_selfloops(): cols=[0, 1, 0, 2, 2], n=3, ) - G = Graph.from_csr(A, directed=True, weighted=True, sym_op="max") + G = Graph.from_csr(A, directed=True, weighted=True, mode="distance", sym_op="max") # self-loops removed assert np.allclose(G.adj.diagonal(), np.array([1., 0., 2.])) def test_from_csr_unweighted_forces_unit_weights(): A = _csr([0.2, 0.8], [0, 1], [1, 0], 2) - G = Graph.from_csr(A, directed=False, weighted=False) + G = Graph.from_csr(A, directed=False, weighted=False, mode="distance") assert np.allclose(G.adj.data, 1.0) assert not G.weighted @@ -87,12 +89,12 @@ def test_from_csr_unweighted_forces_unit_weights(): def test_from_csr_metadata_alignment_and_names(): A = _csr([1.0], [0], [1], 2) meta = pd.DataFrame({"name": ["a", "b"], "cls": [0, 1]}) - G = Graph.from_csr(A, directed=False, weighted=True, meta=meta) + G = Graph.from_csr(A, directed=False, weighted=True, mode="distance", meta=meta) assert G.node_names == ["a", "b"] assert list(G.meta.columns) == ["name", "cls"] with pytest.raises(ValueError, match="meta has .* rows"): - Graph.from_csr(A, directed=False, weighted=True, meta=meta.iloc[:1]) + Graph.from_csr(A, directed=False, weighted=True, mode="distance", meta=meta.iloc[:1]) def test_from_edges_missing_weights(): @@ -100,29 +102,29 @@ def test_from_edges_missing_weights(): edges = np.array([[0, 1], [1, 2]]) weights = None with pytest.raises(ValueError, match="weights must be provided"): - _ = Graph.from_edges(n=3, edges=edges, weights=weights, directed=False, weighted=True) + _ = Graph.from_edges(n=3, edges=edges, mode="distance", weights=weights, directed=False, weighted=True) def test_from_edges_invalid_indices(): """Negative or out-of-bounds indices should raise error.""" edges = np.array([[0, 1], [1, 5]]) # 5 >= n=3 with pytest.raises((ValueError, IndexError)): - Graph.from_edges(n=3, edges=edges, weights=[1.0, 1.0]) + Graph.from_edges(n=3, edges=edges, mode="distance", weights=[1.0, 1.0]) edges = np.array([[-1, 1], [1, 2]]) # negative index with pytest.raises((ValueError, IndexError)): - Graph.from_edges(n=3, edges=edges, weights=[1.0, 1.0]) + Graph.from_edges(n=3, edges=edges, mode="distance", weights=[1.0, 1.0]) def test_from_edges_and_from_dense_equivalence_to_from_csr(): edges = np.array([[0, 1], [1, 2]]) weights = np.array([2.0, 3.0]) - G1 = Graph.from_edges(n=3, edges=edges, weights=weights, directed=False, weighted=True) + G1 = Graph.from_edges(n=3, edges=edges, mode="distance", weights=weights, directed=False, weighted=True) D = np.zeros((3, 3), float) D[0, 1] = 2.0 D[1, 2] = 3.0 - G2 = Graph.from_dense(D, directed=False, weighted=True) + G2 = Graph.from_dense(D, directed=False, weighted=True, mode="distance") assert (G1.adj != G2.adj).nnz == 0 @@ -130,7 +132,7 @@ def test_from_edges_and_from_dense_equivalence_to_from_csr(): # ----------------- properties ----------------- def test_n_nodes_n_edges_undirected_counts_upper_triangle(): A = _csr([1, 1, 1, 1], [0, 0, 1, 2], [1, 2, 2, 2], 3) - G = Graph.from_csr(A, directed=False, weighted=True) + G = Graph.from_csr(A, directed=False, weighted=True, mode="distance") # After sym, edges are: (0,1), (0,2), (1,2) => 3 undirected edges assert G.n_nodes == 3 assert G.n_edges == 3 @@ -138,13 +140,13 @@ def test_n_nodes_n_edges_undirected_counts_upper_triangle(): def test_n_edges_directed_counts_arcs(): A = _csr([1, 1, 1], [0, 1, 2], [1, 2, 0], 3) - G = Graph.from_csr(A, directed=True, weighted=True) + G = Graph.from_csr(A, directed=True, weighted=True, mode="distance") assert G.n_edges == 3 def test_has_self_loops_property_false_by_default(): A = _csr([5.0], [0], [0], 2) - G = Graph.from_csr(A, directed=False, weighted=True) + G = Graph.from_csr(A, directed=False, weighted=True, mode="distance") assert not G.has_self_loops @@ -152,7 +154,7 @@ def test_has_self_loops_property_false_by_default(): def test_drop_by_index_and_name_updates_adj_and_meta(): A = _csr([1, 1, 1], [0, 0, 1], [1, 2, 2], 3) meta = pd.DataFrame({"name": ["a", "b", "c"], "cls": [0, 1, 1]}) - G = Graph.from_csr(A, directed=False, weighted=True, meta=meta) + G = Graph.from_csr(A, directed=False, weighted=True, mode="distance", meta=meta) G2 = G.drop(["b"]) # drop name "b" (index 1) assert G2.n_nodes == 2 @@ -165,7 +167,7 @@ def test_drop_by_index_and_name_updates_adj_and_meta(): def test_drop_nonexistent_node(): """Should raise KeyError/IndexError for invalid nodes.""" A = _csr([1], [0], [1], 2) - G = Graph.from_csr(A, directed=False) + G = Graph.from_csr(A, mode="distance", directed=False) with pytest.raises(IndexError): G.drop([5]) # out of range @@ -177,7 +179,7 @@ def test_drop_nonexistent_node(): def test_drop_empty_list(): """Dropping empty list should return same graph.""" A = _csr([1], [0], [1], 2) - G = Graph.from_csr(A, directed=False) + G = Graph.from_csr(A, directed=False, mode="distance") G2 = G.drop([]) assert G2.n_nodes == G.n_nodes @@ -186,7 +188,7 @@ def test_drop_single_value(): """Test dropping a single int/str (not in list).""" A = _csr([1], [0], [1], 3) meta = pd.DataFrame({"name": ["a", "b", "c"]}) - G = Graph.from_csr(A, directed=False, meta=meta) + G = Graph.from_csr(A, directed=False, mode="distance", meta=meta) G2 = G.drop(1) # single int assert G2.n_nodes == 2 @@ -198,7 +200,7 @@ def test_drop_single_value(): def test_sorted_by_permuted_order(): A = _csr([1, 1], [0, 1], [1, 2], 3) meta = pd.DataFrame({"name": ["c", "a", "b"], "score": [3, 1, 2]}) - G = Graph.from_csr(A, directed=False, weighted=True, meta=meta) + G = Graph.from_csr(A, directed=False, weighted=True, mode="distance", meta=meta) G2 = G.sorted_by("score") # names should be ordered by score ascending: a, b, c assert G2.node_names == ["a", "b", "c"] @@ -210,7 +212,7 @@ def test_copy_creates_independent_graph(): """Modifications to copy shouldn't affect original.""" A = _csr([1], [0], [1], 2) meta = pd.DataFrame({"name": ["a", "b"]}) - G = Graph.from_csr(A, meta=meta) + G = Graph.from_csr(A, mode="distance", meta=meta) G2 = G.copy() G2.adj.data[0] = 999.0 @@ -224,29 +226,29 @@ def test_copy_creates_independent_graph(): def test_graph_is_connected_method(): # Connected undirected graph A1 = _csr([1, 1, 1, 1], [0, 0, 1, 2], [1, 2, 2, 0], 3) - G1 = Graph.from_csr(A1, directed=False, weighted=True) + G1 = Graph.from_csr(A1, directed=False, weighted=True, mode="distance") assert G1.is_connected() # Disconnected undirected graph A2 = _csr([1, 1], [0, 0], [1, 2], 4) - G2 = Graph.from_csr(A2, directed=False, weighted=True) + G2 = Graph.from_csr(A2, directed=False, weighted=True, mode="distance") assert not G2.is_connected() # Strongly connected directed graph A3 = _csr([1, 1, 1, 1], [0, 1, 2, 2], [1, 2, 0, 1], 3) - G3 = Graph.from_csr(A3, directed=True, weighted=True) + G3 = Graph.from_csr(A3, directed=True, weighted=True, mode="distance") assert G3.is_connected() # Not strongly connected directed graph A4 = _csr([1, 1], [0, 1], [1, 2], 3) - G4 = Graph.from_csr(A4, directed=True, weighted=True) + G4 = Graph.from_csr(A4, directed=True, weighted=True, mode="distance") assert not G4.is_connected() def test_graph_connected_components_method(): # Undirected graph with 2 components A = _csr([1, 1, 1, 1], [0, 0, 3, 3], [1, 2, 3, 4], 5) - G = Graph.from_csr(A, directed=False, weighted=True) + G = Graph.from_csr(A, directed=False, weighted=True, mode="distance") n_components, labels = G.connected_components(return_labels=True) assert n_components == 2 assert set(labels) == {0, 1} @@ -255,7 +257,7 @@ def test_graph_connected_components_method(): def test_graph_degree_method_weighted_and_unweighted(): A = _csr([2.0, 3.0, 4.0, 5.0], [0, 1, 3, 3], [1, 2, 2, 0], 4) - G_weighted = Graph.from_csr(A, directed=False, weighted=True) + G_weighted = Graph.from_csr(A, directed=False, weighted=True, mode="distance") deg_weighted = G_weighted.degree() assert np.allclose(deg_weighted, np.array([7.0, 5.0, 7.0, 9.0])) @@ -263,7 +265,7 @@ def test_graph_degree_method_weighted_and_unweighted(): deg_weighted = G_weighted.degree(ignore_weights=True) assert np.allclose(deg_weighted, np.array([2, 2, 2, 2])) - G_unweighted = Graph.from_csr(A, directed=False, weighted=False) + G_unweighted = Graph.from_csr(A, directed=False, weighted=False, mode="distance") deg_unweighted = G_unweighted.degree() assert np.allclose(deg_unweighted, np.array([2, 2, 2, 2])) @@ -275,12 +277,12 @@ def test_graph_degree_method_selfloops_counted_twice_unweighted(): [1, 0, 0] ]) # undirected case - G = Graph.from_dense(S, directed=False, weighted=True, ignore_selfloops=False, sym_op="max") + G = Graph.from_dense(S, directed=False, weighted=True, ignore_selfloops=False, mode="distance", sym_op="max") deg = G.degree() assert np.allclose(deg, np.array([3, 2, 1])) # node 0 and 1 have self-loop counted twice # directed case - G = Graph.from_dense(S, directed=True, weighted=True, ignore_selfloops=False, sym_op="max") + G = Graph.from_dense(S, directed=True, weighted=True, ignore_selfloops=False, mode="distance", sym_op="max") deg_out, deg_in = G.degree() assert np.allclose(deg_in, np.array([2, 1, 0])) assert np.allclose(deg_out, np.array([1, 1, 1])) @@ -292,12 +294,12 @@ def test_graph_degree_method_selfloops_counted_twice_weighted(): [0.25, 0, 0] ]) # undirected case - G = Graph.from_dense(S, directed=False, weighted=True, ignore_selfloops=False, sym_op="max") + G = Graph.from_dense(S, directed=False, weighted=True, ignore_selfloops=False, mode="distance", sym_op="max") deg = G.degree() assert np.allclose(deg, np.array([2.25, 5, 0.25])) # node 0 and 1 have self-loop counted twice # directed case - G = Graph.from_dense(S, directed=True, weighted=True, ignore_selfloops=False, sym_op="max") + G = Graph.from_dense(S, directed=True, weighted=True, ignore_selfloops=False, mode="distance", sym_op="max") deg_out, deg_in = G.degree() assert np.allclose(deg_in, np.array([1.25, 2.5, 0])) assert np.allclose(deg_out, np.array([1, 2.5, 0.25])) @@ -308,7 +310,7 @@ def test_graph_degree_method_selfloops_counted_twice_weighted(): def test_to_networkx_types_and_node_attributes(): A = _csr([0.5, 0.7], [0, 1], [1, 2], 3) meta = pd.DataFrame({"name": ["n0", "n1", "n2"], "cls": [0, 1, 1]}) - G = Graph.from_csr(A, directed=False, weighted=True, meta=meta) + G = Graph.from_csr(A, directed=False, weighted=True, mode="distance", meta=meta) nxG = G.to_networkx() import networkx as nx @@ -324,7 +326,7 @@ def test_to_networkx_types_and_node_attributes(): def test_to_igraph_types_and_attributes(): A = _csr([0.2, 0.9, 0.3], [0, 1, 2], [1, 2, 0], 3) meta = pd.DataFrame({"name": ["a", "b", "c"], "label": [10, 20, 30]}) - G = Graph.from_csr(A, directed=True, weighted=True, meta=meta) + G = Graph.from_csr(A, directed=True, weighted=True, mode="distance", meta=meta) igG = G.to_igraph() import igraph as ig From 1ff6bd3e2858b79460745c8b057f9fcd3415d829 Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Mon, 27 Oct 2025 20:53:44 +0100 Subject: [PATCH 2/8] adjust importers --- README.md | 2 +- graphconstructor/importers.py | 14 ++-- graphconstructor/utils.py | 154 +++++++++++++++++++++++++++++++++- tests/test_importers.py | 6 +- 4 files changed, 165 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 376f9ab..048c041 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ D = (D + D.T) / 2 np.fill_diagonal(D, 0.0) # Import (from dense array) -G0 = from_dense(D, directed=False) +G0 = from_dense(D, directed=False, mode="distance") ``` ### 2) Refine a graph (operators) diff --git a/graphconstructor/importers.py b/graphconstructor/importers.py index 95d5d4e..313e734 100644 --- a/graphconstructor/importers.py +++ b/graphconstructor/importers.py @@ -7,12 +7,12 @@ Mode = Literal["distance", "similarity"] -def from_dense(arr, *, directed=False, weighted=True, meta=None, sym_op="max") -> Graph: - return Graph.from_dense(arr, directed=directed, weighted=weighted, meta=meta, sym_op=sym_op) +def from_dense(arr, mode, *, directed=False, weighted=True, meta=None, sym_op="max") -> Graph: + return Graph.from_dense(arr, directed=directed, weighted=weighted, mode=mode, meta=meta, sym_op=sym_op) -def from_csr(adj, *, directed=False, weighted=True, meta=None, sym_op="max") -> Graph: - return Graph.from_csr(adj, directed=directed, weighted=weighted, meta=meta, sym_op=sym_op) +def from_csr(adj, mode, *, directed=False, weighted=True, meta=None, sym_op="max") -> Graph: + return Graph.from_csr(adj, directed=directed, weighted=weighted, mode=mode, meta=meta, sym_op=sym_op) def from_knn(indices, distances, *, store_weights=True, directed=False, meta=None, sym_op="max") -> Graph: @@ -33,7 +33,8 @@ def from_knn(indices, distances, *, store_weights=True, directed=False, meta=Non # Infer full graph size from neighbor ids n_full = _infer_n_from_indices(ind) A = sp.csr_matrix((weights, (rows, cols)), shape=(n_full, n_full)) - return Graph.from_csr(A, directed=directed, weighted=store_weights, meta=meta, sym_op=sym_op) + return Graph.from_csr(A, directed=directed, weighted=store_weights, mode="distance", + meta=meta, sym_op=sym_op) def from_ann(ann, query_data, k: int, *, store_weights=True, directed=False, meta=None, sym_op="max") -> Graph: @@ -45,7 +46,8 @@ def from_ann(ann, query_data, k: int, *, store_weights=True, directed=False, met if query_data is None: raise TypeError("from_ann requires query_data when index has no cached neighbors.") ind, dist = idx.query(query_data, k=k) - return from_knn(ind, dist, store_weights=store_weights, directed=directed, meta=meta, sym_op=sym_op) + return from_knn(ind, dist, store_weights=store_weights, directed=directed, + meta=meta, sym_op=sym_op) # helper functions --------------------------------------------- diff --git a/graphconstructor/utils.py b/graphconstructor/utils.py index 91f08f2..0d89b2d 100644 --- a/graphconstructor/utils.py +++ b/graphconstructor/utils.py @@ -1,4 +1,4 @@ -from typing import Tuple +from typing import Callable, Literal, Tuple, Union import numpy as np import scipy.sparse as sp from numpy.typing import NDArray @@ -6,6 +6,14 @@ from .types import MatrixMode +# Type aliases for clarity +Mode = Literal["distance", "similarity"] +ConversionMethod = Union[ + Literal["reciprocal", "negative", "exp", "gaussian"], + Callable[[np.ndarray], np.ndarray] + ] + + def _validate_square_matrix(M: np.ndarray) -> None: if M.ndim != 2 or M.shape[0] != M.shape[1]: raise TypeError("Matrix must be square (n x n).") @@ -130,3 +138,147 @@ def _knn_from_matrix(M: NDArray | spmatrix, k: int, *, mode: MatrixMode) -> Tupl nn_idx = np.argpartition(-arr, kth=k-1, axis=1)[:, :k] nn_val = np.take_along_axis(arr, nn_idx, axis=1) return nn_idx, nn_val + + +def convert_weights( + weights: np.ndarray, + source_mode: Mode, + target_mode: Mode, + method: ConversionMethod = "reciprocal", + **kwargs +) -> np.ndarray: + """ + Convert weight values between distance and similarity representations. + + Parameters + ---------- + weights : np.ndarray + Array of weight values to convert. + source_mode : {"distance", "similarity"} + The current interpretation of weights. + target_mode : {"distance", "similarity"} + The desired interpretation of weights. + method : str or callable, default="reciprocal" + Conversion method to use. Built-in options: + + - "reciprocal": w_new = 1 / w_old (handles zeros with small epsilon) + - "negative": w_new = -w_old (useful for optimization contexts) + - "exp": w_new = exp(-w_old) (distance to similarity only) + - "gaussian": w_new = exp(-w_old^2 / (2*sigma^2)) (distance to similarity only) + + Or provide a custom callable: f(weights) -> converted_weights + **kwargs + Additional arguments passed to conversion functions: + + - epsilon : float, default=1e-10 + Small value added to denominators to avoid division by zero. + - sigma : float, default=1.0 + Bandwidth parameter for "gaussian" method. + """ + if source_mode == target_mode: + raise ValueError( + f"source_mode and target_mode are both '{source_mode}'. " + "No conversion needed." + ) + + if source_mode not in ("distance", "similarity"): + raise ValueError(f"Invalid source_mode: '{source_mode}'") + if target_mode not in ("distance", "similarity"): + raise ValueError(f"Invalid target_mode: '{target_mode}'") + + # Handle custom callable + if callable(method): + return method(weights) + + # Handle built-in methods + epsilon = kwargs.get("epsilon", 1e-10) + + if method == "reciprocal": + # Works in both directions + return 1.0 / (weights + epsilon) + + elif method == "negative": + # Works in both directions + return -weights + + elif method == "exp": + # Only distance -> similarity + if source_mode != "distance": + raise ValueError( + "Method 'exp' only supports distance -> similarity conversion" + ) + return np.exp(-weights) + + elif method == "gaussian": + # Only distance -> similarity + if source_mode != "distance": + raise ValueError( + "Method 'gaussian' only supports distance -> similarity conversion" + ) + sigma = kwargs.get("sigma", 1.0) + return np.exp(-weights**2 / (2 * sigma**2)) + + else: + raise ValueError( + f"Unknown conversion method: '{method}'. " + "Use 'reciprocal', 'negative', 'exp', 'gaussian', or provide a callable." + ) + + +def convert_adjacency_mode( + adj: csr_matrix, + source_mode: Mode, + target_mode: Mode, + method: ConversionMethod = "reciprocal", + inplace: bool = False, + **kwargs +) -> csr_matrix: + """ + Convert a sparse adjacency matrix between distance and similarity modes. + + Only the non-zero elements (edges) are converted. Zero elements (non-edges) + remain zero. + + Parameters + ---------- + adj : scipy.sparse.csr_matrix + Sparse adjacency matrix to convert. + source_mode : {"distance", "similarity"} + Current interpretation of edge weights. + target_mode : {"distance", "similarity"} + Desired interpretation of edge weights. + method : str or callable, default="reciprocal" + Conversion method. See `convert_weights` for options. + inplace : bool, default=False + If True, modify the matrix in place (faster, but mutates input). + If False, create a copy before converting. + **kwargs + Additional arguments passed to conversion function. + """ + if not issparse(adj): + raise TypeError("adj must be a sparse matrix") + + if source_mode == target_mode: + raise ValueError( + f"source_mode and target_mode are both '{source_mode}'. " + "No conversion needed." + ) + + # Work with CSR format for efficiency + if not isinstance(adj, csr_matrix): + adj = adj.tocsr() + inplace = False # Can't modify in place after format conversion + + if not inplace: + adj = adj.copy() + + # Convert only the non-zero edge weights + adj.data = convert_weights( + adj.data, + source_mode=source_mode, + target_mode=target_mode, + method=method, + **kwargs + ) + + return adj diff --git a/tests/test_importers.py b/tests/test_importers.py index d229564..114b65f 100644 --- a/tests/test_importers.py +++ b/tests/test_importers.py @@ -19,12 +19,12 @@ def _csr(data, rows, cols, n): # ----------------- from_csr / from_dense ----------------- def test_from_csr_and_from_dense_basic_flags_and_symmetry(): A = _csr([0.4, 0.9], [0, 1], [1, 0], 2) - G1 = from_csr(A, directed=False, weighted=True) + G1 = from_csr(A, directed=False, weighted=True, mode="similarity") assert not G1.directed and G1.weighted assert (G1.adj != G1.adj.T).nnz == 0 D = np.array([[0.0, 0.4], [0.9, 0.0]]) - G2 = from_dense(D, directed=False, weighted=True) + G2 = from_dense(D, directed=False, weighted=True, mode="similarity") assert (G1.adj != G2.adj).nnz == 0 @@ -97,5 +97,5 @@ def test_importers_preserve_directed_flag_and_allow_metadata(): [0.0, 0.0, 0.0], ]) meta = None # can replace with a DataFrame; here we just test directed flag - G = from_dense(D, directed=True, weighted=True, meta=meta) + G = from_dense(D, directed=True, weighted=True, mode="similarity", meta=meta) assert G.directed and G.weighted From 66529b6b5b473e0ba788a60c4bfbc683765a3b1d Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Mon, 27 Oct 2025 20:54:07 +0100 Subject: [PATCH 3/8] add mode check to base class --- graphconstructor/operators/base.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/graphconstructor/operators/base.py b/graphconstructor/operators/base.py index d05bb71..67e748b 100644 --- a/graphconstructor/operators/base.py +++ b/graphconstructor/operators/base.py @@ -3,6 +3,14 @@ class GraphOperator(ABC): + """Base class for graph operators.""" + supported_modes = [] # Specify supported modes from ["distance", "similarity"] + """Pure transform: Graph -> Graph.""" @abstractmethod def apply(self, G: Graph) -> Graph: ... + + def _check_mode_supported(self, G: Graph): + if G.mode not in self.supported_modes: + raise ValueError(f"{self.__class__.__name__} only supports modes: {self.supported_modes}, got {G.mode}") + \ No newline at end of file From d89066c18012d982978e3889de5ef223ba884cb7 Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Mon, 27 Oct 2025 20:54:32 +0100 Subject: [PATCH 4/8] refactor to new mode attribute --- graphconstructor/operators/disparity.py | 10 +++++++++- .../operators/doubly_stochastic.py | 4 ++++ graphconstructor/operators/knn_selector.py | 3 +++ .../locally_adaptive_sparsification.py | 6 +++++- .../operators/marginal_likelihood.py | 9 ++++++++- graphconstructor/operators/noise_corrected.py | 7 ++++++- .../operators/weight_threshold.py | 3 +++ tests/test_disparity.py | 10 +++++----- tests/test_doubly_stochastic.py | 19 ++++++++++++------- tests/test_knn_selector.py | 8 ++++---- tests/test_locally_adaptive_sparsification.py | 12 ++++++------ tests/test_marginal_likelihood_filter.py | 10 +++++----- tests/test_noise_corrected.py | 18 +++++++++++++----- tests/test_weight_threshold.py | 8 ++++---- 14 files changed, 87 insertions(+), 40 deletions(-) diff --git a/graphconstructor/operators/disparity.py b/graphconstructor/operators/disparity.py index e6070f6..d90913a 100644 --- a/graphconstructor/operators/disparity.py +++ b/graphconstructor/operators/disparity.py @@ -15,6 +15,10 @@ class DisparityFilter(GraphOperator): Disparity Filter backbone (Serrano, Boguñá, Vespignani, 2009). Works on nonnegative, real-valued weights; no integer casting needed. + Input requirements: + - mode: "similarity" (higher weights = stronger connections) + - weights: continuous, non-negative + Undirected: P = A / rowSums(A) pval_undirected = min( (1-P)^(k_row-1), (1-P^T)^(k_col-1) ) @@ -37,10 +41,10 @@ class DisparityFilter(GraphOperator): copy_meta : bool Copy metadata (True) or keep reference (False). """ - alpha: float = 0.05 rule: UndirectedRule = "or" copy_meta: bool = True + supported_modes = ["similarity"] def _undirected(self, G: Graph) -> Graph: A = G.adj.tocsr(copy=False) @@ -49,6 +53,7 @@ def _undirected(self, G: Graph) -> Graph: if A.nnz == 0: return Graph.from_csr(A.copy(), directed=False, weighted=G.weighted, meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta), + mode="similarity", sym_op="max") # strengths and degrees (row-wise) @@ -88,6 +93,7 @@ def _undirected(self, G: Graph) -> Graph: # Symmetrize to be safe (weights preserved as in input) A_f = A_f.maximum(A_f.T) return Graph.from_csr(A_f, directed=False, weighted=G.weighted, + mode=G.mode, meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta), sym_op="max") @@ -127,9 +133,11 @@ def _directed(self, G: Graph) -> Graph: keep = np.minimum(pval_out, pval_in) <= self.alpha A_f = sp.csr_matrix((w[keep], (rows[keep], cols[keep])), shape=A.shape) return Graph.from_csr(A_f, directed=True, weighted=G.weighted, + mode=G.mode, meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta)) def apply(self, G: Graph) -> Graph: + self._check_mode_supported(G) if G.directed: return self._directed(G) return self._undirected(G) diff --git a/graphconstructor/operators/doubly_stochastic.py b/graphconstructor/operators/doubly_stochastic.py index 99ce704..0213d7a 100644 --- a/graphconstructor/operators/doubly_stochastic.py +++ b/graphconstructor/operators/doubly_stochastic.py @@ -33,8 +33,10 @@ class DoublyStochastic(GraphOperator): tolerance: float = 1e-5 max_iter: int = 10_000 copy_meta: bool = True + supported_modes = ["similarity"] def apply(self, G: Graph) -> Graph: + self._check_mode_supported(G) A = G.adj.tocsr(copy=False) if A.shape[0] != A.shape[1]: @@ -49,6 +51,7 @@ def apply(self, G: Graph) -> Graph: A.copy(), directed=G.directed, weighted=True, + mode=G.mode, meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta), sym_op="max", ) @@ -106,6 +109,7 @@ def apply(self, G: Graph) -> Graph: A_scaled, directed=G.directed, weighted=True, + mode=G.mode, meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta), sym_op="max", ) diff --git a/graphconstructor/operators/knn_selector.py b/graphconstructor/operators/knn_selector.py index a18adb0..6c5bc52 100644 --- a/graphconstructor/operators/knn_selector.py +++ b/graphconstructor/operators/knn_selector.py @@ -32,8 +32,10 @@ class KNNSelector(GraphOperator): mutual: bool = False mutual_k: Optional[int] = None mode: Mode = "distance" + supported_modes = ["similarity", "distance"] def apply(self, G: Graph) -> Graph: + self._check_mode_supported(G) csr = G.adj.tocsr(copy=False) n = csr.shape[0] largest = (self.mode == "similarity") @@ -72,6 +74,7 @@ def apply(self, G: Graph) -> Graph: A, directed=G.directed, weighted=G.weighted, + mode=self.mode, meta=G.meta.copy() if G.meta is not None else None, sym_op="max" if not G.directed else "max", ) diff --git a/graphconstructor/operators/locally_adaptive_sparsification.py b/graphconstructor/operators/locally_adaptive_sparsification.py index 66cf22e..7e09a52 100644 --- a/graphconstructor/operators/locally_adaptive_sparsification.py +++ b/graphconstructor/operators/locally_adaptive_sparsification.py @@ -43,10 +43,10 @@ class LocallyAdaptiveSparsification(GraphOperator): If True, copy metadata frame onto the result graph. """ - alpha: float = 0.05 rule: UndirectedRule = "or" copy_meta: bool = True + supported_modes = ["similarity"] # ---- helpers ---- @staticmethod @@ -129,6 +129,7 @@ def _apply_directed(self, G: Graph) -> Graph: return Graph.from_csr( A_kept, directed=True, weighted=G.weighted, + mode=G.mode, meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta) ) @@ -139,6 +140,7 @@ def _apply_undirected(self, G: Graph) -> Graph: if A.nnz == 0: return Graph.from_csr( A.copy(), directed=False, weighted=G.weighted, + mode=G.mode, meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta), sym_op="max", ) @@ -161,11 +163,13 @@ def _apply_undirected(self, G: Graph) -> Graph: return Graph.from_csr( A_kept, directed=False, weighted=G.weighted, + mode=G.mode, meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta), sym_op="max", ) def apply(self, G: Graph) -> Graph: + self._check_mode_supported(G) if G.directed: return self._apply_directed(G) return self._apply_undirected(G) diff --git a/graphconstructor/operators/marginal_likelihood.py b/graphconstructor/operators/marginal_likelihood.py index 7956d7b..2898b7f 100644 --- a/graphconstructor/operators/marginal_likelihood.py +++ b/graphconstructor/operators/marginal_likelihood.py @@ -47,11 +47,11 @@ class MarginalLikelihoodFilter(GraphOperator): - We only test existing edges (sparse support). No densification. - Undirected graphs are treated on the UPPER TRIANGLE only and mirrored back. """ - alpha: float float_scaling: float = 20. assume_loopless: bool = False copy_meta: bool = True + supported_modes = ["similarity"] def _cast_weights_to_int(self, w: np.ndarray, max_weight=None) -> np.ndarray: """Map floats to integers by scaling and rounding.""" @@ -73,6 +73,7 @@ def _undirected_filter(self, G: Graph) -> Graph: if T <= 0: # degenerate: no edges to keep return Graph.from_csr(A.copy() * 0.0, directed=False, weighted=G.weighted, + mode=G.mode, meta=G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta, sym_op="max") @@ -80,6 +81,7 @@ def _undirected_filter(self, G: Graph) -> Graph: Au = sp.triu(A, k=1).tocoo() if Au.nnz == 0: return Graph.from_csr(A.copy() * 0.0, directed=False, weighted=G.weighted, + mode=G.mode, meta=G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta, sym_op="max") @@ -113,6 +115,7 @@ def _undirected_filter(self, G: Graph) -> Graph: A_f = sp.csr_matrix((data_full, (rows_full, cols_full)), shape=A.shape) return Graph.from_csr(A_f, directed=False, weighted=G.weighted, + mode=G.mode, meta=G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta, sym_op="max") @@ -124,11 +127,13 @@ def _directed_filter(self, G: Graph) -> Graph: T = float(kout.sum()) if T <= 0: return Graph.from_csr(A.copy() * 0.0, directed=True, weighted=G.weighted, + mode=G.mode, meta=G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta) coo = A.tocoo() if coo.nnz == 0: return Graph.from_csr(A.copy() * 0.0, directed=True, weighted=G.weighted, + mode=G.mode, meta=G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta) # Optionally exclude self-edges (currently Graph anyway drops them) @@ -153,9 +158,11 @@ def _directed_filter(self, G: Graph) -> Graph: A_f = sp.csr_matrix((data, (rows, cols)), shape=A.shape) return Graph.from_csr(A_f, directed=True, weighted=G.weighted, + mode=G.mode, meta=G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta) def apply(self, G: Graph) -> Graph: + self._check_mode_supported(G) if G.directed: return self._directed_filter(G) else: diff --git a/graphconstructor/operators/noise_corrected.py b/graphconstructor/operators/noise_corrected.py index 1d81a33..868cf7a 100644 --- a/graphconstructor/operators/noise_corrected.py +++ b/graphconstructor/operators/noise_corrected.py @@ -26,10 +26,10 @@ class NoiseCorrected(GraphOperator): copy_meta : bool If True, copy metadata DataFrame; otherwise keep reference. """ - delta: float = 1.64 derivative: str = "constant" copy_meta: bool = True + supported_modes = ["similarity"] # ---------- Bayesian shrinkage for P_ij ---------- def _posterior_mean_p( @@ -117,6 +117,7 @@ def _apply_directed(self, G: Graph) -> Graph: A.copy(), directed=True, weighted=G.weighted, + mode=G.mode, meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta), ) @@ -134,6 +135,7 @@ def _apply_directed(self, G: Graph) -> Graph: A_f, directed=True, weighted=G.weighted, + mode=G.mode, meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta), ) @@ -146,6 +148,7 @@ def _apply_undirected(self, G: Graph) -> Graph: A.copy(), directed=False, weighted=G.weighted, + mode=G.mode, meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta), sym_op="max", ) @@ -171,9 +174,11 @@ def _apply_undirected(self, G: Graph) -> Graph: A_f, directed=False, weighted=G.weighted, + mode=G.mode, meta=(G.meta.copy() if (self.copy_meta and G.meta is not None) else G.meta), sym_op="max", ) def apply(self, G: Graph) -> Graph: + self._check_mode_supported(G) return self._apply_directed(G) if G.directed else self._apply_undirected(G) diff --git a/graphconstructor/operators/weight_threshold.py b/graphconstructor/operators/weight_threshold.py index 0a13423..b0ec12d 100644 --- a/graphconstructor/operators/weight_threshold.py +++ b/graphconstructor/operators/weight_threshold.py @@ -22,8 +22,10 @@ class WeightThreshold(GraphOperator): """ threshold: float mode: Mode = "distance" + supported_modes = ["similarity", "distance"] def apply(self, G: Graph) -> Graph: + self._check_mode_supported(G) csr = G.adj.tocsr(copy=False) coo = csr.tocoo() keep = (coo.data <= self.threshold) if self.mode == "distance" else (coo.data >= self.threshold) @@ -31,4 +33,5 @@ def apply(self, G: Graph) -> Graph: w = coo.data[keep] if G.weighted else np.ones(keep.sum(), dtype=float) A = sp.csr_matrix((w, (rows, cols)), shape=csr.shape) return Graph.from_csr(A, directed=G.directed, weighted=G.weighted, + mode=self.mode, meta=None if G.meta is None else G.meta.copy(), sym_op="max") diff --git a/tests/test_disparity.py b/tests/test_disparity.py index 1dc69e9..41a4add 100644 --- a/tests/test_disparity.py +++ b/tests/test_disparity.py @@ -21,7 +21,7 @@ def test_disparity_directed_min_out_in_formula(): cols=[1, 2, 2, 0, 3], n=4, ) - G0 = Graph.from_csr(A, directed=True, weighted=True) + G0 = Graph.from_csr(A, directed=True, weighted=True, mode="similarity") s_out = np.asarray(A.sum(axis=1)).ravel() s_in = np.asarray(A.sum(axis=0)).ravel() @@ -64,7 +64,7 @@ def test_disparity_undirected_or_superset_and(): cols=[1, 2, 2, 3, 0], n=4, ) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max") alpha = 0.3 G_or = DisparityFilter(alpha=alpha, rule="or").apply(G0) @@ -88,7 +88,7 @@ def test_disparity_undirected_degree1_not_always_kept_for_tiny_alpha(): cols=[1, 2, 3], n=4, ) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max") out = DisparityFilter(alpha=1e-6, rule="or").apply(G0) # It's valid that (0,1) may be dropped at tiny alpha; the test asserts only that @@ -100,7 +100,7 @@ def test_disparity_undirected_degree1_not_always_kept_for_tiny_alpha(): # ----------------- Negative weights rejected ----------------- def test_disparity_rejects_negative_weights(): A = _csr([-0.5, 0.2], [0, 1], [1, 0], 2) - G0 = Graph.from_csr(A, directed=True, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=True, weighted=True, mode="similarity", sym_op="max") with pytest.raises(ValueError, match="nonnegative"): DisparityFilter(alpha=0.1).apply(G0) @@ -109,7 +109,7 @@ def test_disparity_rejects_negative_weights(): def test_disparity_preserves_flags_and_copies_metadata(): meta = pd.DataFrame({"name": ["a", "b", "c"], "grp": [1, 0, 1]}) A = _csr([0.6, 0.4, 0.7], [0, 1, 2], [1, 2, 0], 3) - G0 = Graph.from_csr(A, directed=False, weighted=True, meta=meta, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", meta=meta, sym_op="max") out = DisparityFilter(alpha=0.2, rule="or", copy_meta=True).apply(G0) assert not out.directed and out.weighted diff --git a/tests/test_doubly_stochastic.py b/tests/test_doubly_stochastic.py index b4d2184..c0febd5 100644 --- a/tests/test_doubly_stochastic.py +++ b/tests/test_doubly_stochastic.py @@ -25,7 +25,7 @@ def test_doubly_stochastic_converges_on_positive_dense(): # Zero the diagonal (typical adjacency semantics) np.fill_diagonal(M, 0.0) - G0 = Graph.from_dense(M, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_dense(M, directed=False, weighted=True, mode="similarity", sym_op="max") op = DoublyStochastic(tolerance=1e-6, max_iter=10_000) G = op.apply(G0) @@ -52,7 +52,7 @@ def test_doubly_stochastic_sparse_with_isolates(): cols=[1, 2, 2, 3, 3, 2], n=5, ) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max") op = DoublyStochastic(tolerance=1e-6, max_iter=10_000) G = op.apply(G0) @@ -91,7 +91,7 @@ def test_doubly_stochastic_directed_graph_unsolvable(): cols=[1, 2, 2, 0, 3, 1], n=4, ) - G0 = Graph.from_csr(A, directed=True, weighted=True) + G0 = Graph.from_csr(A, directed=True, weighted=True, mode="similarity") op = DoublyStochastic(tolerance=1e-6, max_iter=10_000) G = op.apply(G0) @@ -115,19 +115,24 @@ def test_doubly_stochastic_directed_graph_unsolvable(): # ----------------- Error cases ----------------- def test_doubly_stochastic_rejects_negative_weights(): A = _csr([-0.2, 0.5], [0, 1], [1, 0], 2) - G0 = Graph.from_csr(A, directed=True, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=True, weighted=True, mode="similarity", sym_op="max") op = DoublyStochastic() with pytest.raises(ValueError, match="nonnegative"): op.apply(G0) -# (We cannot test non-square here because Graph.from_csr enforces square on construction.) +def test_doubly_stochastic_rejects_distances(): + A = _csr([0.2, 0.5], [0, 1], [1, 0], 2) + G0 = Graph.from_csr(A, directed=True, weighted=True, mode="distance", sym_op="max") + op = DoublyStochastic() + with pytest.raises(ValueError, match="only supports modes"): + op.apply(G0) # ----------------- Trivial all-zero matrix: returned unchanged ----------------- def test_doubly_stochastic_all_zero_matrix_noop(): A = sp.csr_matrix((4, 4), dtype=float) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max") op = DoublyStochastic() G = op.apply(G0) assert G.adj.nnz == 0 @@ -139,7 +144,7 @@ def test_doubly_stochastic_all_zero_matrix_noop(): def test_doubly_stochastic_preserves_flags_and_copies_metadata(): meta = pd.DataFrame({"name": ["a", "b", "c"], "group": [1, 0, 1]}) A = _csr([0.4, 0.6, 0.3], [0, 1, 2], [1, 2, 0], 3) - G0 = Graph.from_csr(A, directed=False, weighted=True, meta=meta, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", meta=meta, sym_op="max") op = DoublyStochastic(tolerance=1e-6, max_iter=10_000, copy_meta=True) G = op.apply(G0) diff --git a/tests/test_knn_selector.py b/tests/test_knn_selector.py index 8abdc01..a3afd0e 100644 --- a/tests/test_knn_selector.py +++ b/tests/test_knn_selector.py @@ -26,7 +26,7 @@ def test_knn_selector_distance_k2_undirected_symmetric_max(): cols=[1, 2, 3, 0, 2, 3, 0, 1, 3, 0, 1, 2], n=4, ) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max") out = KNNSelector(k=2, mutual=False, mode="distance").apply(G0) A2 = out.adj @@ -56,7 +56,7 @@ def test_knn_selector_similarity_k1_mode_override_and_symmetry(): cols=[1, 2, 0, 2, 0, 1], n=3, ) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max") out = KNNSelector(k=1, mutual=False, mode="similarity").apply(G0) A2 = out.adj @@ -84,7 +84,7 @@ def test_knn_selector_mutual_true_and_mutual_k_limits_per_row(): cols=[1, 2, 2, 0, 1, 0], n=3, ) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max") out = KNNSelector(k=1, mutual=True, mutual_k=2, mode="distance").apply(G0) A2 = out.adj @@ -114,7 +114,7 @@ def test_knn_selector_preserves_directed_flag_and_unit_weights_when_graph_is_unw n=3, ) # Mark as unweighted => data coerced to 1.0 by Graph.from_csr - G0 = Graph.from_csr(A, directed=True, weighted=False) + G0 = Graph.from_csr(A, directed=True, weighted=False, mode="similarity") out = KNNSelector(k=1, mutual=False, mode="similarity").apply(G0) assert out.directed and not out.weighted diff --git a/tests/test_locally_adaptive_sparsification.py b/tests/test_locally_adaptive_sparsification.py index bd9b8b4..01efeee 100644 --- a/tests/test_locally_adaptive_sparsification.py +++ b/tests/test_locally_adaptive_sparsification.py @@ -24,7 +24,7 @@ def test_lans_undirected_symmetry_and_weights_preserved(): cols=[1, 2, 2, 3], n=4, ) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max") # alpha=0.5 should keep 0->1 from node 0's perspective (p=0.5), drop 0->2 (p=1.0) G = LocallyAdaptiveSparsification(alpha=0.5, rule="or").apply(G0) @@ -48,7 +48,7 @@ def test_lans_undirected_and_subset_or_and_alpha_monotonicity(): cols=[1, 2, 2, 3, 0], n=4, ) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max") G_or = LocallyAdaptiveSparsification(alpha=0.30, rule="or").apply(G0) G_and = LocallyAdaptiveSparsification(alpha=0.30, rule="and").apply(G0) @@ -80,7 +80,7 @@ def test_lans_directed_requires_both_endpoints(): cols=[1, 2, 1, 1, 3], n=4, ) - G0 = Graph.from_csr(A, directed=True, weighted=True) + G0 = Graph.from_csr(A, directed=True, weighted=True, mode="similarity") # With a fairly small alpha, keep only edges significant on BOTH out and in sides G = LocallyAdaptiveSparsification(alpha=0.2).apply(G0) @@ -93,7 +93,7 @@ def test_lans_directed_requires_both_endpoints(): # ----------------- Negative weights rejected ----------------- def test_lans_rejects_negative_weights(): A = _csr([-0.3, 0.4], [0, 1], [2, 1], 3) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="average") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="average") with pytest.raises(ValueError, match="nonnegative"): LocallyAdaptiveSparsification(alpha=0.1).apply(G0) @@ -102,7 +102,7 @@ def test_lans_rejects_negative_weights(): def test_lans_handles_zero_strength_rows(): # Node 3 has no edges; node 2 only incoming (from 1) -> fine. A = _csr([0.5, 0.2], [0, 1], [1, 2], 4) - G0 = Graph.from_csr(A, directed=True, weighted=True) + G0 = Graph.from_csr(A, directed=True, weighted=True, mode="similarity") G = LocallyAdaptiveSparsification(alpha=0.5).apply(G0) # Graph remains well-formed, only existing arcs possibly filtered @@ -114,7 +114,7 @@ def test_lans_handles_zero_strength_rows(): def test_lans_preserves_metadata(): meta = pd.DataFrame({"name": ["a", "b", "c"], "grp": [1, 0, 1]}) A = _csr([0.6, 0.4, 0.7], [0, 1, 2], [1, 2, 0], 3) - G0 = Graph.from_csr(A, directed=False, weighted=True, meta=meta, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", meta=meta, sym_op="max") out = LocallyAdaptiveSparsification(alpha=0.25, rule="or", copy_meta=True).apply(G0) assert not out.directed and out.weighted diff --git a/tests/test_marginal_likelihood_filter.py b/tests/test_marginal_likelihood_filter.py index fef96ed..bf17d68 100644 --- a/tests/test_marginal_likelihood_filter.py +++ b/tests/test_marginal_likelihood_filter.py @@ -27,7 +27,7 @@ def test_mlf_undirected_matches_binomial_tail(): cols=[1, 2, 2, 3], n=4, ) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max") # Compute strengths and T from the symmetrized adjacency A_sym = G0.adj # already symmetric, no self-loops @@ -74,7 +74,7 @@ def test_mlf_directed_uses_out_in_degrees(): cols=[1, 2, 0, 2], n=3, ) - G0 = Graph.from_csr(A, directed=True, weighted=True) + G0 = Graph.from_csr(A, directed=True, weighted=True, mode="similarity") A_dir = G0.adj.tocsr() kout = np.asarray(A_dir.sum(axis=1)).ravel() @@ -111,7 +111,7 @@ def test_mlf_alpha_monotonicity(): cols=[1, 2, 2, 3, 0, 1], n=4, ) - G0 = Graph.from_csr(A, directed=False, weighted=True) + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity") G_small = MarginalLikelihoodFilter(alpha=0.01).apply(G0) G_large = MarginalLikelihoodFilter(alpha=0.2).apply(G0) @@ -124,7 +124,7 @@ def test_mlf_alpha_monotonicity(): # ----------------- Degenerate T = 0 case ----------------- def test_mlf_handles_no_edges_T_zero(): A = sp.csr_matrix((3, 3), dtype=float) # all zeros - G0 = Graph.from_csr(A, directed=False, weighted=True) + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity") out = MarginalLikelihoodFilter(alpha=0.05).apply(G0) assert out.adj.nnz == 0 assert out.adj.shape == (3, 3) @@ -134,7 +134,7 @@ def test_mlf_handles_no_edges_T_zero(): def test_mlf_preserves_flags_and_copies_metadata_when_requested(): meta = pd.DataFrame({"name": ["a", "b", "c"], "group": [1, 0, 1]}) A = _csr([1, 2], [0, 1], [1, 2], 3) - G0 = Graph.from_csr(A, directed=False, weighted=True, meta=meta) + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", meta=meta) op = MarginalLikelihoodFilter(alpha=0.5, copy_meta=True) out = op.apply(G0) diff --git a/tests/test_noise_corrected.py b/tests/test_noise_corrected.py index 8a92464..c5b46c2 100644 --- a/tests/test_noise_corrected.py +++ b/tests/test_noise_corrected.py @@ -22,7 +22,7 @@ def test_nc_undirected_symmetry_and_monotonicity(): cols=[1, 2, 2, 3, 0, 4, 1], n=5, ) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max") # Larger delta -> sparser backbone G_lo = NoiseCorrected(delta=1.0).apply(G0) @@ -45,7 +45,7 @@ def test_nc_directed_monotonicity_and_no_negatives(): cols=[1, 2, 2, 3, 0], n=4, ) - G0 = Graph.from_csr(A, directed=True, weighted=True) + G0 = Graph.from_csr(A, directed=True, weighted=True, mode="similarity") G1 = NoiseCorrected(delta=1.0).apply(G0) G2 = NoiseCorrected(delta=2.0).apply(G0) @@ -62,15 +62,23 @@ def test_nc_directed_monotonicity_and_no_negatives(): # ----------------- Negative weights rejected ----------------- def test_nc_rejects_negative_weights(): A = _csr([-0.1, 0.4], [0, 1], [1, 0], 2) - G0 = Graph.from_csr(A, directed=True, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=True, weighted=True, mode="similarity", sym_op="max") with pytest.raises(ValueError, match="nonnegative"): NoiseCorrected().apply(G0) +# ----------------- Distance mode rejected ----------------- +def test_nc_rejects_distances(): + A = _csr([0.2, 0.5], [0, 1], [1, 0], 2) + G0 = Graph.from_csr(A, directed=True, weighted=True, mode="distance", sym_op="max") + with pytest.raises(ValueError, match="only supports modes"): + NoiseCorrected().apply(G0) + + # ----------------- All-zero graph: noop ----------------- def test_nc_all_zero_noop(): A = sp.csr_matrix((3, 3), dtype=float) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max") G = NoiseCorrected().apply(G0) assert G.adj.nnz == 0 and G.adj.shape == (3, 3) @@ -79,7 +87,7 @@ def test_nc_all_zero_noop(): def test_nc_preserves_flags_and_copies_metadata(): meta = pd.DataFrame({"name": ["a", "b", "c"], "grp": [1, 0, 1]}) A = _csr([0.5, 0.3, 0.7], [0, 1, 2], [1, 2, 0], 3) - G0 = Graph.from_csr(A, directed=False, weighted=True, meta=meta, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", meta=meta, sym_op="max") out = NoiseCorrected(delta=1.64, copy_meta=True).apply(G0) assert not out.directed and out.weighted diff --git a/tests/test_weight_threshold.py b/tests/test_weight_threshold.py index 9ba8444..a0e3680 100644 --- a/tests/test_weight_threshold.py +++ b/tests/test_weight_threshold.py @@ -21,7 +21,7 @@ def test_weight_threshold_distance_keeps_lt_eps_and_symmetrizes_max(): cols=[1, 2, 0, 2, 0, 1], n=3, ) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="min") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="min") out = WeightThreshold(threshold=0.5, mode="distance").apply(G0) A2 = out.adj @@ -49,7 +49,7 @@ def test_weight_threshold_similarity_keeps_gt_tau_and_symmetrizes_max(): cols=[1, 2, 0, 2, 0, 1], n=3, ) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="similarity", sym_op="max") out = WeightThreshold(threshold=tau, mode="similarity").apply(G0) A2 = out.adj @@ -75,7 +75,7 @@ def test_weight_threshold_preserves_directed_and_unweighted_flags(): cols=[1, 2, 2, 1], n=3, ) - G0 = Graph.from_csr(A, directed=True, weighted=False) # unweighted -> all ones internally + G0 = Graph.from_csr(A, directed=True, weighted=False, mode="similarity") # unweighted -> all ones internally out = WeightThreshold(threshold=0.8, mode="similarity").apply(G0) @@ -100,7 +100,7 @@ def test_weight_threshold_sparse_path_and_no_densification(): data = np.array([0.35, 0.6, 0.2, 0.39, 0.8, 0.1]) A = sp.csr_matrix((data, (rows, cols)), shape=(4, 4)) - G0 = Graph.from_csr(A, directed=False, weighted=True, sym_op="max") + G0 = Graph.from_csr(A, directed=False, weighted=True, mode="distance", sym_op="max") out = WeightThreshold(threshold=0.4, mode="distance").apply(G0) A2 = out.adj From 804ee78c1da93916674cd28745051f7885625860 Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Tue, 28 Oct 2025 22:31:25 +0100 Subject: [PATCH 5/8] drop diagonal (without removing ALL zeros) --- graphconstructor/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/graphconstructor/utils.py b/graphconstructor/utils.py index 0d89b2d..74721b4 100644 --- a/graphconstructor/utils.py +++ b/graphconstructor/utils.py @@ -33,6 +33,13 @@ def _make_symmetric_csr(A: csr_matrix, option: str = "max") -> csr_matrix: raise ValueError("Unsupported option for symmetrization.") +def _drop_diagonal(A: sp.csr_matrix) -> sp.csr_matrix: + # Remove diagonal entries without touching other zeros in csr matrix + coo = A.tocoo() + mask = coo.row != coo.col + return sp.csr_matrix((coo.data[mask], (coo.row[mask], coo.col[mask])), shape=A.shape) + + def _coerce_knn_inputs(indices, distances) -> Tuple[np.ndarray, np.ndarray]: ind = _to_numpy(indices) dist = _to_numpy(distances) From 404082e9f22ae18cafbf0081d4047311fb000244 Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Tue, 28 Oct 2025 22:31:47 +0100 Subject: [PATCH 6/8] handle explicit zeros (in particular for distance graphs) --- graphconstructor/graph.py | 96 +++++++++++++++++++++++++++---------- tests/test_graph.py | 99 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 169 insertions(+), 26 deletions(-) diff --git a/graphconstructor/graph.py b/graphconstructor/graph.py index ab89f07..1342cc4 100644 --- a/graphconstructor/graph.py +++ b/graphconstructor/graph.py @@ -1,11 +1,10 @@ -import warnings from dataclasses import dataclass from typing import Iterable, Literal, Sequence import numpy as np import pandas as pd import scipy.sparse as sp from scipy.sparse.csgraph import connected_components -from .utils import ConversionMethod, Mode, convert_adjacency_mode +from .utils import ConversionMethod, Mode, _drop_diagonal, convert_adjacency_mode SymOp = Literal["max", "min", "average"] @@ -27,6 +26,7 @@ class Graph: - `mode`: "distance" or "similarity" (for interpretation of weights) - `meta`: pandas DataFrame with n rows (optional). May have a 'name' column. - `ignore_selfloops`: If True, self-loops are ignored/removed (default for undirected graphs) + - `keep_explicit_zeros`: If True, explicit zeros in adjacency are kept (default for distance graphs) """ adj: sp.csr_matrix directed: bool @@ -34,11 +34,15 @@ class Graph: mode: str meta: pd.DataFrame | None = None ignore_selfloops: bool = None + keep_explicit_zeros: bool = None def __post_init__(self): # Default: ignore self-loops for undirected graphs if self.ignore_selfloops is None: self.ignore_selfloops = not self.directed + # Default: keep explicit zeros for distance graphs + if self.keep_explicit_zeros is None: + self.keep_explicit_zeros = self.mode == "distance" # Check mode if self.mode not in {"distance", "similarity"}: raise ValueError("mode must be 'distance' or 'similarity'.") @@ -53,15 +57,39 @@ def _ensure_csr(M: sp.spmatrix | np.ndarray) -> sp.csr_matrix: raise TypeError("Adjacency must be square (n x n).") return sp.csr_matrix(arr) + + @staticmethod + def _preserve_explicit_zeros(original: sp.csr_matrix, result: sp.csr_matrix) -> sp.csr_matrix: + """Reinsert explicit zeros that were present in `original` into `result` (CSR). + This avoids CSR ops (like max/min/avg) pruning stored zeros.""" + coo = original.tocoo() + zmask = (coo.data == 0) + if not np.any(zmask): + return result + zr = coo.row[zmask] + zc = coo.col[zmask] + # Merge by concatenating coordinates with zero data; CSR will coalesce duplicates. + res_coo = result.tocoo() + rows = np.concatenate([res_coo.row, zr]) + cols = np.concatenate([res_coo.col, zc]) + data = np.concatenate([res_coo.data, np.zeros(zr.size, dtype=float)]) + return sp.csr_matrix((data, (rows, cols)), shape=result.shape) + @staticmethod - def _symmetrize(A: sp.csr_matrix, how: SymOp = "max") -> sp.csr_matrix: + def _symmetrize(A: sp.csr_matrix, how: SymOp = "max", + *, preserve_zeros_from: sp.csr_matrix | None = None) -> sp.csr_matrix: if how == "max": - return A.maximum(A.T) - if how == "min": - return A.minimum(A.T) - if how == "average": - return (A + A.T) * 0.5 - raise ValueError("Unsupported symmetrization op. Use 'max', 'min', or 'average'.") + B = A.maximum(A.T) + elif how == "min": + B = A.minimum(A.T) + elif how == "average": + B = (A + A.T) * 0.5 + else: + raise ValueError("Unsupported symmetrization op. Use 'max', 'min', or 'average'.") + # If asked, reinsert explicit zeros that existed before symmetrization. + if preserve_zeros_from is not None: + B = Graph._preserve_explicit_zeros(preserve_zeros_from, B) + return B @classmethod def from_csr( @@ -73,24 +101,31 @@ def from_csr( weighted: bool = True, meta: pd.DataFrame | None = None, ignore_selfloops: bool = None, + keep_explicit_zeros: bool = None, sym_op: SymOp = "max", copy: bool = False, ) -> "Graph": + + # Ignore self-loops (unless directed or specified otherwise) + if ignore_selfloops is None: + ignore_selfloops = not directed + # Keep explicit zeros (unless similarity or specified otherwise) + if keep_explicit_zeros is None: + keep_explicit_zeros = mode == "distance" + A = cls._ensure_csr(adj) if not weighted: if not copy and sp.issparse(adj): A = A.copy() A.data[:] = 1.0 if not directed: - A = cls._symmetrize(A, how=sym_op) - # Ignore self-loops (unless directed or specified otherwise) - if ignore_selfloops is None: - ignore_selfloops = not directed - if ignore_selfloops and A.diagonal().any(): - A = A.tolil(copy=False) - A.setdiag(0) - A = A.tocsr(copy=False) - A.eliminate_zeros() + preserve_src = A if keep_explicit_zeros else None + A = cls._symmetrize(A, how=sym_op, preserve_zeros_from=preserve_src) + + if mode == "similarity" and ignore_selfloops and A.diagonal().any(): + A = _drop_diagonal(A) + if mode == "distance" and ignore_selfloops and (A.diagonal() == 0).any(): + A = _drop_diagonal(A) n = A.shape[0] if meta is not None: @@ -101,7 +136,8 @@ def from_csr( adj=A.astype(float, copy=False), directed=directed, weighted=weighted, mode=mode, ignore_selfloops=ignore_selfloops, - meta=meta + meta=meta, + keep_explicit_zeros=keep_explicit_zeros, ) @classmethod @@ -125,9 +161,18 @@ def from_edges( weighted: bool = True, meta: pd.DataFrame | None = None, ignore_selfloops: bool = None, + keep_explicit_zeros: bool = None, sym_op: SymOp = "max", ) -> "Graph": """Build from an edge list. For undirected=True, we symmetrize later.""" + + # Ignore self-loops (unless directed or specified otherwise) + if ignore_selfloops is None: + ignore_selfloops = not directed + # Keep explicit zeros (unless similarity or specified otherwise) + if keep_explicit_zeros is None: + keep_explicit_zeros = mode == "distance" + if isinstance(edges, np.ndarray): if edges.ndim != 2 or edges.shape[1] != 2: raise TypeError("edges ndarray must be shape (m, 2).") @@ -156,7 +201,8 @@ def from_edges( weighted=weighted_eff, mode=mode, ignore_selfloops=ignore_selfloops, - meta=meta, sym_op=sym_op + meta=meta, sym_op=sym_op, + keep_explicit_zeros=keep_explicit_zeros, ) # -------- Core properties -------- @@ -255,11 +301,9 @@ def convert_mode( ) if self.mode == target_mode: - warnings.warn( - f"Graph is already in '{target_mode}' mode. " - "Returning unchanged." + raise ValueError( + f"Graph is already in mode '{target_mode}'. No conversion needed." ) - return self # Convert the adjacency matrix new_adj = convert_adjacency_mode( @@ -280,7 +324,9 @@ def convert_mode( return Graph( adj=new_adj, mode=target_mode, - directed=self.directed + directed=self.directed, + weighted=self.weighted, + meta=None if self.meta is None else self.meta.copy(), ) # -------- Exporters -------- diff --git a/tests/test_graph.py b/tests/test_graph.py index 8e5934f..eab7d05 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -25,6 +25,36 @@ def _csr(data, rows, cols, n): ) +@pytest.fixture +def S_dense(): + """Simple 4-node dense similarity matrix.""" + return np.array([ + [0.0, 0.2, 0.8, 0.5], + [0.2, 0.0, 0.6, 0.3], + [0.8, 0.6, 0.0, 0.4], + [0.5, 0.3, 0.4, 0.0], + ], dtype=float) + + +@pytest.fixture +def S_csr(): + """Simple 4-node sparse similarity matrix.""" + return _csr( + data=[0.2, 0.8, 0.5, 0.6, 0.3, 0.4], + rows=[0, 0, 0, 1, 1, 2], + cols=[1, 2, 3, 2, 3, 3], + n=4, + ) + + +@pytest.fixture +def meta_df(): + """Simple metadata DataFrame for 4 nodes.""" + return pd.DataFrame({ + "name": ["node0", "node1", "node2", "node3"], + "group": [0, 1, 0, 2], + }) + # ----------------- construction invariants ----------------- def test_from_csr_enforces_square_and_drops_self_loops_and_symmetrizes_max(): # 3x3 with asymmetry + self-loops @@ -65,7 +95,6 @@ def test_symmetrization_operations(): Graph.from_csr(A, directed=False, mode="distance", sym_op="invalid") - def test_from_csr_enforces_square_and_keep_selfloops(): # 3x3 with asymmetry + self-loops A = _csr( @@ -79,6 +108,21 @@ def test_from_csr_enforces_square_and_keep_selfloops(): # self-loops removed assert np.allclose(G.adj.diagonal(), np.array([1., 0., 2.])) + +def test_from_csr_enforces_square_and_keep_selfloops(S_dense): + G_no_selfloops = Graph.from_dense( + S_dense, directed=True, weighted=True, mode="distance", sym_op="max", + ignore_selfloops=True + ) + G_selfloops = Graph.from_dense( + S_dense, directed=True, weighted=True, mode="distance", sym_op="max", + ignore_selfloops=False + ) + assert np.allclose(G_no_selfloops.adj.diagonal(), 0.0) + assert np.allclose(G_selfloops.adj.diagonal(), S_dense.diagonal()) + + + def test_from_csr_unweighted_forces_unit_weights(): A = _csr([0.2, 0.8], [0, 1], [1, 0], 2) G = Graph.from_csr(A, directed=False, weighted=False, mode="distance") @@ -337,3 +381,56 @@ def test_to_igraph_types_and_attributes(): assert igG.vs["label"] == [10, 20, 30] # edge weights exist assert "weight" in igG.es.attributes() + + +# ----------------- Distance/similarity conversion ----------------- +def test_convert_mode_distance_to_similarity_and_back_dense(S_dense, meta_df): + G = Graph.from_dense( + S_dense, directed=False, weighted=True, mode="similarity", meta=meta_df + ) + G_dist = G.convert_mode("distance") + assert G_dist.mode == "distance" + G_sim = G_dist.convert_mode("similarity") + assert G_sim.mode == "similarity" + assert np.allclose(G_sim.adj.toarray(), G.adj.toarray()) + # Metadata preserved + assert G_sim.meta.equals(G.meta) + + +def test_convert_mode_distance_to_similarity_and_back_csr(S_csr, meta_df): + G = Graph.from_csr( + S_csr, directed=False, weighted=True, mode="similarity", meta=meta_df + ) + G_dist = G.convert_mode("distance") + assert G_dist.mode == "distance" + G_sim = G_dist.convert_mode("similarity") + assert G_sim.mode == "similarity" + assert np.allclose(G_sim.adj.toarray(), G.adj.toarray()) + # Metadata preserved + assert G_sim.meta.equals(G.meta) + + # Conversion to dense should lead to different values (in place of 0s) + G_dense = Graph.from_dense( + S_csr.toarray(), directed=False, weighted=True, mode="similarity", meta=meta_df + ) + G_dist_dense = G_dense.convert_mode("distance") + assert G_dense.adj.data.shape == G.adj.data.shape # no change in dense conversion + assert G_dist_dense.adj.data.shape == G_dist.adj.data.shape # no change in dense conversion + + +# --------------- expliccit zero handling --------------- +def test_keep_explicit_zeros_in_symmetrization(): + D_csr =_csr( + data=[0.2, 0.8, 0.5, 0.0, 0.3, 0.4], + rows=[0, 0, 0, 1, 1, 2], + cols=[1, 2, 3, 2, 3, 3], + n=4, + ) + G = Graph.from_csr(D_csr, mode="distance") + assert np.allclose(G.adj.data, np.array([0.2, 0.8, 0.5, 0.2, 0. , 0.3, 0.8, 0.4, 0.5, 0.3, 0.4])) + + G_no_zeros = Graph.from_csr(D_csr, mode="distance", keep_explicit_zeros=False) + assert np.allclose(G_no_zeros.adj.data, np.array([0.2, 0.8, 0.5, 0.2, 0.3, 0.8, 0.4, 0.5, 0.3, 0.4])) + + G_sim = Graph.from_csr(D_csr, mode="similarity") + assert np.allclose(G_sim.adj.data, np.array([0.2, 0.8, 0.5, 0.2, 0.3, 0.8, 0.4, 0.5, 0.3, 0.4])) From 161efee2041a0872e52ffa6f9a74c16668564d6f Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Tue, 28 Oct 2025 22:34:53 +0100 Subject: [PATCH 7/8] fix test --- tests/test_graph.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_graph.py b/tests/test_graph.py index eab7d05..86c4057 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -109,7 +109,7 @@ def test_from_csr_enforces_square_and_keep_selfloops(): assert np.allclose(G.adj.diagonal(), np.array([1., 0., 2.])) -def test_from_csr_enforces_square_and_keep_selfloops(S_dense): +def test_from_dense_enforces_square_and_keep_selfloops(S_dense): G_no_selfloops = Graph.from_dense( S_dense, directed=True, weighted=True, mode="distance", sym_op="max", ignore_selfloops=True @@ -122,7 +122,6 @@ def test_from_csr_enforces_square_and_keep_selfloops(S_dense): assert np.allclose(G_selfloops.adj.diagonal(), S_dense.diagonal()) - def test_from_csr_unweighted_forces_unit_weights(): A = _csr([0.2, 0.8], [0, 1], [1, 0], 2) G = Graph.from_csr(A, directed=False, weighted=False, mode="distance") From c2bc08c1a88ef37826dc6e28dfa75fd432920767 Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Wed, 29 Oct 2025 09:25:43 +0100 Subject: [PATCH 8/8] update code example --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 048c041..1dfc1f9 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,7 @@ G0 = from_dense(D, directed=False, mode="distance") ``` ### 2) Refine a graph (operators) +Some operators can work on both distance and similarity based graphs, such as kNN and a simple weight thresholding method: ```python from graphconstructor.operators import KNNSelector, WeightThreshold @@ -98,6 +99,19 @@ G_refined = KNNSelector(k=5, mutual=True, mutual_k=20, mode="distance").apply(G0 G_pruned = WeightThreshold(threshold=0.3, mode="distance").apply(G_refined) ``` +Other operators require particular either similarity or distance values as weights. In such cases it can be necessary to switch from distance to similarity measures (or vice versa). + +```python +from graphconstructor.operators import NoiseCorrected + +# There are various method for distance/similarity conversions (including using a custom function) +G0_sim = G0.convert_mode("similarity", method="exp") + +# Once converted to similarities, other operators become available +G_refined = NoiseCorrected().apply(G0_sim) +``` + + ### 3) Export when needed ```python