From 07d30fb8b884270b3e5da2b080f96fd72f9a8275 Mon Sep 17 00:00:00 2001 From: Alex Tong Date: Tue, 30 Mar 2021 22:24:30 -0400 Subject: [PATCH 1/4] Add SCOT method to multimodal data integration task --- .../requirements.txt | 1 + .../methods/scot.py | 92 +++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 openproblems/tasks/multimodal_data_integration/methods/scot.py diff --git a/docker/openproblems-python-extras/requirements.txt b/docker/openproblems-python-extras/requirements.txt index cee39838ca..c3d8473423 100644 --- a/docker/openproblems-python-extras/requirements.txt +++ b/docker/openproblems-python-extras/requirements.txt @@ -5,3 +5,4 @@ phate pyensembl pybedtools git+https://github.com/czbiohub/molecular-cross-validation +git+https://github.com/atong01/SCOT diff --git a/openproblems/tasks/multimodal_data_integration/methods/scot.py b/openproblems/tasks/multimodal_data_integration/methods/scot.py new file mode 100644 index 0000000000..ccb227f9d7 --- /dev/null +++ b/openproblems/tasks/multimodal_data_integration/methods/scot.py @@ -0,0 +1,92 @@ +from ....tools.decorators import method +from ....tools.normalize import log_cpm +from ....tools.normalize import log_scran_pooling +from ....tools.normalize import sqrt_cpm +from ....tools.utils import check_version + +import sklearn.decomposition + + +def _scot(adata, n_svd=100, balanced=False): + from SCOT import SCOT + + # PCA reduction + n_svd = min([n_svd, min(adata.X.shape) - 1, min(adata.obsm["mode2"].shape) - 1]) + X_pca = sklearn.decomposition.TruncatedSVD(n_svd).fit_transform(adata.X) + Y_pca = sklearn.decomposition.TruncatedSVD(n_svd).fit_transform(adata.obsm["mode2"]) + + # Initialize SCOT + scot = SCOT(X_pca, Y_pca) + + # call the unbalanced alignment + # From https://github.com/rsinghlab/SCOT/blob/master/examples/unbalanced_GW_SNAREseq.ipynb + X_new_unbal, y_new_unbal = scot.align( + k=50, e=1e-3, rho=0.0005, normalize=True, balanced=balanced + ) + adata.obsm["aligned"] = X_new_unbal + adata.obsm["mode2_aligned"] = y_new_unbal + + return adata + + +@method( + method_name="Single Cell Optimal Transport (sqrt CPM unbalanced)", + paper_name="Gromov-Wasserstein optimal transport to align single-cell multi-omics data", + paper_url="https://www.biorxiv.org/content/10.1101/2020.04.28.066787", + paper_year=2020, + code_url="https://github.com/rsinghlab/SCOT", + code_version=check_version("SCOT"), + image="openproblems-python-extras", +) +def scot_sqrt_cpm_unbalanced(adata, n_svd=100, balanced=False): + sqrt_cpm(adata) + log_cpm(adata, obsm="mode2", obs="mode2_obs", var="mode2_var") + _scot(adata, n_svd=n_svd, balanced=balanced) + return adata + +@method( + method_name="Single Cell Optimal Transport (sqrt CPM balanced)", + paper_name="Gromov-Wasserstein optimal transport to align single-cell multi-omics data", + paper_url="https://www.biorxiv.org/content/10.1101/2020.04.28.066787", + paper_year=2020, + code_url="https://github.com/rsinghlab/SCOT", + code_version=check_version("SCOT"), + image="openproblems-python-extras", +) +def scot_sqrt_cpm_balanced(adata, n_svd=100, balanced=True): + sqrt_cpm(adata) + log_cpm(adata, obsm="mode2", obs="mode2_obs", var="mode2_var") + _scot(adata, n_svd=n_svd, balanced=balanced) + return adata + + +@method( + method_name="Single Cell Optimal Transport (log scran unbalanced)", + paper_name="Gromov-Wasserstein optimal transport to align single-cell multi-omics data", + paper_url="https://www.biorxiv.org/content/10.1101/2020.04.28.066787", + paper_year=2020, + code_url="https://github.com/rsinghlab/SCOT", + code_version=check_version("SCOT"), + image="openproblems-python-extras", +) +def scot_log_scran_pooling_unbalanced(adata, n_svd=100, balanced=False): + log_scran_pooling(adata) + log_cpm(adata, obsm="mode2", obs="mode2_obs", var="mode2_var") + _scot(adata, n_svd=n_svd, balanced=balanced) + return adata + + +@method( + method_name="Single Cell Optimal Transport (log scran balanced)", + paper_name="Gromov-Wasserstein optimal transport to align single-cell multi-omics data", + paper_url="https://www.biorxiv.org/content/10.1101/2020.04.28.066787", + paper_year=2020, + code_url="https://github.com/rsinghlab/SCOT", + code_version=check_version("SCOT"), + image="openproblems-python-extras", +) +def scot_log_scran_pooling_balanced(adata, n_svd=100, balanced=True): + log_scran_pooling(adata) + log_cpm(adata, obsm="mode2", obs="mode2_obs", var="mode2_var") + _scot(adata, n_svd=n_svd, balanced=balanced) + return adata From 78b60af59cb55ecfdfb66a85083bcc453fcf4654 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 31 Mar 2021 02:33:01 +0000 Subject: [PATCH 2/4] pre-commit --- openproblems/tasks/multimodal_data_integration/methods/scot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openproblems/tasks/multimodal_data_integration/methods/scot.py b/openproblems/tasks/multimodal_data_integration/methods/scot.py index ccb227f9d7..9fad460520 100644 --- a/openproblems/tasks/multimodal_data_integration/methods/scot.py +++ b/openproblems/tasks/multimodal_data_integration/methods/scot.py @@ -44,6 +44,7 @@ def scot_sqrt_cpm_unbalanced(adata, n_svd=100, balanced=False): _scot(adata, n_svd=n_svd, balanced=balanced) return adata + @method( method_name="Single Cell Optimal Transport (sqrt CPM balanced)", paper_name="Gromov-Wasserstein optimal transport to align single-cell multi-omics data", From 406c40501c07a3ff1b20f97ba9609b904f28c0cc Mon Sep 17 00:00:00 2001 From: Alex Tong Date: Wed, 31 Mar 2021 06:55:07 -0400 Subject: [PATCH 3/4] Fix flake things --- .../multimodal_data_integration/methods/scot.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/openproblems/tasks/multimodal_data_integration/methods/scot.py b/openproblems/tasks/multimodal_data_integration/methods/scot.py index ccb227f9d7..27ef9a3eb0 100644 --- a/openproblems/tasks/multimodal_data_integration/methods/scot.py +++ b/openproblems/tasks/multimodal_data_integration/methods/scot.py @@ -19,7 +19,7 @@ def _scot(adata, n_svd=100, balanced=False): scot = SCOT(X_pca, Y_pca) # call the unbalanced alignment - # From https://github.com/rsinghlab/SCOT/blob/master/examples/unbalanced_GW_SNAREseq.ipynb + # From https://github.com/rsinghlab/SCOT/blob/master/examples/unbalanced_GW_SNAREseq.ipynb # noqa: 501 X_new_unbal, y_new_unbal = scot.align( k=50, e=1e-3, rho=0.0005, normalize=True, balanced=balanced ) @@ -31,7 +31,8 @@ def _scot(adata, n_svd=100, balanced=False): @method( method_name="Single Cell Optimal Transport (sqrt CPM unbalanced)", - paper_name="Gromov-Wasserstein optimal transport to align single-cell multi-omics data", + paper_name="Gromov-Wasserstein optimal transport" + "to align single-cell multi-omics data", paper_url="https://www.biorxiv.org/content/10.1101/2020.04.28.066787", paper_year=2020, code_url="https://github.com/rsinghlab/SCOT", @@ -44,9 +45,11 @@ def scot_sqrt_cpm_unbalanced(adata, n_svd=100, balanced=False): _scot(adata, n_svd=n_svd, balanced=balanced) return adata + @method( method_name="Single Cell Optimal Transport (sqrt CPM balanced)", - paper_name="Gromov-Wasserstein optimal transport to align single-cell multi-omics data", + paper_name="Gromov-Wasserstein optimal transport to " + "align single-cell multi-omics data", paper_url="https://www.biorxiv.org/content/10.1101/2020.04.28.066787", paper_year=2020, code_url="https://github.com/rsinghlab/SCOT", @@ -62,7 +65,8 @@ def scot_sqrt_cpm_balanced(adata, n_svd=100, balanced=True): @method( method_name="Single Cell Optimal Transport (log scran unbalanced)", - paper_name="Gromov-Wasserstein optimal transport to align single-cell multi-omics data", + paper_name="Gromov-Wasserstein optimal transport to " + "align single-cell multi-omics data", paper_url="https://www.biorxiv.org/content/10.1101/2020.04.28.066787", paper_year=2020, code_url="https://github.com/rsinghlab/SCOT", @@ -78,7 +82,8 @@ def scot_log_scran_pooling_unbalanced(adata, n_svd=100, balanced=False): @method( method_name="Single Cell Optimal Transport (log scran balanced)", - paper_name="Gromov-Wasserstein optimal transport to align single-cell multi-omics data", + paper_name="Gromov-Wasserstein optimal transport to " + "align single-cell multi-omics data", paper_url="https://www.biorxiv.org/content/10.1101/2020.04.28.066787", paper_year=2020, code_url="https://github.com/rsinghlab/SCOT", From 97312bd523940048b0ffee361c2f15749e6ad57f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 31 Mar 2021 11:04:59 +0000 Subject: [PATCH 4/4] pre-commit --- .../tasks/multimodal_data_integration/methods/scot.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openproblems/tasks/multimodal_data_integration/methods/scot.py b/openproblems/tasks/multimodal_data_integration/methods/scot.py index 27ef9a3eb0..6df7632fca 100644 --- a/openproblems/tasks/multimodal_data_integration/methods/scot.py +++ b/openproblems/tasks/multimodal_data_integration/methods/scot.py @@ -32,7 +32,7 @@ def _scot(adata, n_svd=100, balanced=False): @method( method_name="Single Cell Optimal Transport (sqrt CPM unbalanced)", paper_name="Gromov-Wasserstein optimal transport" - "to align single-cell multi-omics data", + "to align single-cell multi-omics data", paper_url="https://www.biorxiv.org/content/10.1101/2020.04.28.066787", paper_year=2020, code_url="https://github.com/rsinghlab/SCOT", @@ -49,7 +49,7 @@ def scot_sqrt_cpm_unbalanced(adata, n_svd=100, balanced=False): @method( method_name="Single Cell Optimal Transport (sqrt CPM balanced)", paper_name="Gromov-Wasserstein optimal transport to " - "align single-cell multi-omics data", + "align single-cell multi-omics data", paper_url="https://www.biorxiv.org/content/10.1101/2020.04.28.066787", paper_year=2020, code_url="https://github.com/rsinghlab/SCOT", @@ -66,7 +66,7 @@ def scot_sqrt_cpm_balanced(adata, n_svd=100, balanced=True): @method( method_name="Single Cell Optimal Transport (log scran unbalanced)", paper_name="Gromov-Wasserstein optimal transport to " - "align single-cell multi-omics data", + "align single-cell multi-omics data", paper_url="https://www.biorxiv.org/content/10.1101/2020.04.28.066787", paper_year=2020, code_url="https://github.com/rsinghlab/SCOT", @@ -83,7 +83,7 @@ def scot_log_scran_pooling_unbalanced(adata, n_svd=100, balanced=False): @method( method_name="Single Cell Optimal Transport (log scran balanced)", paper_name="Gromov-Wasserstein optimal transport to " - "align single-cell multi-omics data", + "align single-cell multi-omics data", paper_url="https://www.biorxiv.org/content/10.1101/2020.04.28.066787", paper_year=2020, code_url="https://github.com/rsinghlab/SCOT",