Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion dte_adj/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
SimpleStratifiedDistributionEstimator,
AdjustedStratifiedDistributionEstimator,
)
from dte_adj.util import compute_ldte, compute_lpte
from dte_adj.util import compute_ldte, compute_lpte, _convert_to_ndarray


class SimpleLocalDistributionEstimator(SimpleStratifiedDistributionEstimator):
Expand Down Expand Up @@ -47,6 +47,7 @@ def fit(
Returns:
SimpleLocalDistributionEstimator: The fitted estimator.
"""
treatment_indicator = _convert_to_ndarray(treatment_indicator)
super().fit(covariates, treatment_arms, outcomes, strata)
self.treatment_indicator = treatment_indicator

Expand Down Expand Up @@ -215,6 +216,7 @@ def fit(
Returns:
AdjustedLocalDistributionEstimator: The fitted estimator.
"""
treatment_indicator = _convert_to_ndarray(treatment_indicator)
super().fit(covariates, treatment_arms, outcomes, strata)
self.treatment_indicator = treatment_indicator

Expand Down
9 changes: 9 additions & 0 deletions dte_adj/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
SimpleStratifiedDistributionEstimator,
AdjustedStratifiedDistributionEstimator,
)
from dte_adj.util import _convert_to_ndarray


class SimpleDistributionEstimator(SimpleStratifiedDistributionEstimator):
Expand Down Expand Up @@ -58,6 +59,10 @@ def fit(
Returns:
SimpleDistributionEstimator: The fitted estimator.
"""
covariates = _convert_to_ndarray(covariates)
treatment_arms = _convert_to_ndarray(treatment_arms)
outcomes = _convert_to_ndarray(outcomes)

if covariates.shape[0] != treatment_arms.shape[0]:
raise ValueError("The shape of covariates and treatment_arm should be same")

Expand Down Expand Up @@ -118,6 +123,10 @@ def fit(
Returns:
AdjustedDistributionEstimator: The fitted estimator.
"""
covariates = _convert_to_ndarray(covariates)
treatment_arms = _convert_to_ndarray(treatment_arms)
outcomes = _convert_to_ndarray(outcomes)

if covariates.shape[0] != treatment_arms.shape[0]:
raise ValueError("The shape of covariates and treatment_arm should be same")

Expand Down
11 changes: 11 additions & 0 deletions dte_adj/stratified.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Tuple, Any
from copy import deepcopy
from dte_adj.base import DistributionEstimatorBase
from dte_adj.util import _convert_to_ndarray


class SimpleStratifiedDistributionEstimator(DistributionEstimatorBase):
Expand All @@ -25,6 +26,11 @@ def fit(
Returns:
DistributionEstimatorBase: The fitted estimator.
"""
covariates = _convert_to_ndarray(covariates)
treatment_arms = _convert_to_ndarray(treatment_arms)
outcomes = _convert_to_ndarray(outcomes)
strata = _convert_to_ndarray(strata)

if covariates.shape[0] != treatment_arms.shape[0]:
raise ValueError("The shape of covariates and treatment_arm should be same")

Expand Down Expand Up @@ -184,6 +190,11 @@ def fit(
Returns:
DistributionEstimatorBase: The fitted estimator.
"""
covariates = _convert_to_ndarray(covariates)
treatment_arms = _convert_to_ndarray(treatment_arms)
outcomes = _convert_to_ndarray(outcomes)
strata = _convert_to_ndarray(strata)

if covariates.shape[0] != treatment_arms.shape[0]:
raise ValueError("The shape of covariates and treatment_arm should be same")

Expand Down
7 changes: 7 additions & 0 deletions dte_adj/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@
)


def _convert_to_ndarray(data: object) -> np.ndarray:
"""Convert pd.Series or pd.DataFrame to np.ndarray if needed."""
if hasattr(data, "to_numpy"):
return data.to_numpy()
return data


def compute_confidence_intervals(
vec_y: np.ndarray,
vec_d: np.ndarray,
Expand Down
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ dev = [
"ruff>=0.12.2,<0.16.0",
"sphinx>=7.3.7,<8.2.0",
"scikit-learn>=1.5,<1.9",
"pre-commit>=4.0.1,<4.6.0"
"pre-commit>=4.0.1,<4.6.0",
"pandas>=2.0"
]

[tool.setuptools.packages.find]
Expand All @@ -47,7 +48,8 @@ dev-dependencies = [
"ruff>=0.12.2,<0.16.0",
"sphinx>=7.3.7,<8.2.0",
"scikit-learn>=1.5,<1.9",
"pre-commit>=4.0.1,<4.6.0"
"pre-commit>=4.0.1,<4.6.0",
"pandas>=2.0"
]

[tool.ruff.lint]
Expand Down
161 changes: 161 additions & 0 deletions tests/test_pandas_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import unittest
import numpy as np
import pandas as pd
from unittest.mock import MagicMock
from sklearn.linear_model import LogisticRegression
from dte_adj import (
SimpleDistributionEstimator,
AdjustedDistributionEstimator,
SimpleStratifiedDistributionEstimator,
AdjustedStratifiedDistributionEstimator,
SimpleLocalDistributionEstimator,
AdjustedLocalDistributionEstimator,
)


class TestPandasInputSimple(unittest.TestCase):
"""Test that Simple/Adjusted DistributionEstimator accept pandas inputs."""

def setUp(self):
np.random.seed(42)
n = 20
self.covariates_df = pd.DataFrame(np.zeros((n, 5)), columns=[f"x{i}" for i in range(5)])
self.treatment_arms_series = pd.Series(np.hstack([np.zeros(10), np.ones(10)]))
self.outcomes_series = pd.Series(np.arange(n, dtype=float))

def test_simple_estimator_with_dataframe_and_series(self):
estimator = SimpleDistributionEstimator()
result = estimator.fit(
self.covariates_df, self.treatment_arms_series, self.outcomes_series
)

self.assertIsInstance(result.covariates, np.ndarray)
self.assertIsInstance(result.treatment_arms, np.ndarray)
self.assertIsInstance(result.outcomes, np.ndarray)

def test_simple_estimator_predict_after_pandas_fit(self):
estimator = SimpleDistributionEstimator()
estimator.fit(self.covariates_df, self.treatment_arms_series, self.outcomes_series)

output = estimator.predict(0, np.array([3, 6]))
expected = np.array([0.4, 0.7])
np.testing.assert_array_almost_equal(output, expected, decimal=2)

def test_adjusted_estimator_with_dataframe_and_series(self):
base_model = MagicMock()
base_model.predict_proba.side_effect = lambda x, y: x
estimator = AdjustedDistributionEstimator(base_model, folds=2)
result = estimator.fit(
self.covariates_df, self.treatment_arms_series, self.outcomes_series
)

self.assertIsInstance(result.covariates, np.ndarray)
self.assertIsInstance(result.treatment_arms, np.ndarray)
self.assertIsInstance(result.outcomes, np.ndarray)


class TestPandasInputStratified(unittest.TestCase):
"""Test that Stratified estimators accept pandas inputs."""

def setUp(self):
np.random.seed(42)
n = 100
self.covariates_df = pd.DataFrame(
np.random.randn(n, 5), columns=[f"x{i}" for i in range(5)]
)
self.treatment_arms_series = pd.Series(np.random.choice([0, 1], size=n))
self.outcomes_series = pd.Series(np.random.randn(n))
self.strata_series = pd.Series(np.random.choice([0, 1, 2], size=n))

def test_simple_stratified_with_pandas(self):
estimator = SimpleStratifiedDistributionEstimator()
result = estimator.fit(
self.covariates_df,
self.treatment_arms_series,
self.outcomes_series,
self.strata_series,
)

self.assertIsInstance(result.covariates, np.ndarray)
self.assertIsInstance(result.treatment_arms, np.ndarray)
self.assertIsInstance(result.outcomes, np.ndarray)
self.assertIsInstance(result.strata, np.ndarray)

def test_adjusted_stratified_with_pandas(self):
base_model = LogisticRegression(random_state=42)
estimator = AdjustedStratifiedDistributionEstimator(base_model, folds=2)
result = estimator.fit(
self.covariates_df,
self.treatment_arms_series,
self.outcomes_series,
self.strata_series,
)

self.assertIsInstance(result.covariates, np.ndarray)
self.assertIsInstance(result.treatment_arms, np.ndarray)
self.assertIsInstance(result.outcomes, np.ndarray)
self.assertIsInstance(result.strata, np.ndarray)


class TestPandasInputLocal(unittest.TestCase):
"""Test that Local estimators accept pandas inputs."""

def setUp(self):
np.random.seed(42)
n = 100
self.covariates_df = pd.DataFrame(
np.random.randn(n, 3), columns=[f"x{i}" for i in range(3)]
)
self.treatment_arms_series = pd.Series(np.random.choice([0, 1], size=n))
self.treatment_indicator_series = pd.Series(np.random.choice([0, 1], size=n))
self.outcomes_series = pd.Series(np.random.randn(n))
self.strata_series = pd.Series(np.random.choice([0, 1], size=n))

def test_simple_local_with_pandas(self):
estimator = SimpleLocalDistributionEstimator()
result = estimator.fit(
self.covariates_df,
self.treatment_arms_series,
self.treatment_indicator_series,
self.outcomes_series,
self.strata_series,
)

self.assertIsInstance(result.covariates, np.ndarray)
self.assertIsInstance(result.treatment_arms, np.ndarray)
self.assertIsInstance(result.treatment_indicator, np.ndarray)
self.assertIsInstance(result.outcomes, np.ndarray)
self.assertIsInstance(result.strata, np.ndarray)

def test_adjusted_local_with_pandas(self):
base_model = LogisticRegression(random_state=42)
estimator = AdjustedLocalDistributionEstimator(base_model=base_model)
result = estimator.fit(
self.covariates_df,
self.treatment_arms_series,
self.treatment_indicator_series,
self.outcomes_series,
self.strata_series,
)

self.assertIsInstance(result.covariates, np.ndarray)
self.assertIsInstance(result.treatment_arms, np.ndarray)
self.assertIsInstance(result.treatment_indicator, np.ndarray)
self.assertIsInstance(result.outcomes, np.ndarray)
self.assertIsInstance(result.strata, np.ndarray)


class TestNumpyInputStillWorks(unittest.TestCase):
"""Verify that np.ndarray inputs continue to work as before."""

def test_simple_estimator_with_numpy(self):
estimator = SimpleDistributionEstimator()
covariates = np.zeros((20, 5))
treatment_arms = np.hstack([np.zeros(10), np.ones(10)])
outcomes = np.arange(20, dtype=float)

result = estimator.fit(covariates, treatment_arms, outcomes)

self.assertIsInstance(result.covariates, np.ndarray)
self.assertIsInstance(result.treatment_arms, np.ndarray)
self.assertIsInstance(result.outcomes, np.ndarray)
Loading