diff --git a/CLAUDE.md b/CLAUDE.md index 901192d60..6280e7706 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -175,6 +175,18 @@ Plots support both matplotlib (static) and plotly (interactive) backends. - `regression/` - Regression test data - `testdata/` - Sample data files (symlinked from docs/data) +## Git Workflow + +**IMPORTANT: Never commit directly to the main branch.** + +Always create a feature branch and submit a pull request: +```bash +git checkout -b feature/descriptive-name +# Make changes and commit +git push -u origin feature/descriptive-name +gh pr create # Create pull request +``` + ## Important Notes - The package depends on MIKE IO (`mikeio`) for reading MIKE file formats (dfs0, dfs2, dfsu) diff --git a/src/modelskill/comparison/_comparer_plotter.py b/src/modelskill/comparison/_comparer_plotter.py index 5467eafc2..0f548dfc8 100644 --- a/src/modelskill/comparison/_comparer_plotter.py +++ b/src/modelskill/comparison/_comparer_plotter.py @@ -746,7 +746,14 @@ def taylor( df = df.rename(columns={"_std_obs": "obs_std", "_std_mod": "std"}) pts = [ - TaylorPoint(name=r.model, obs_std=r.obs_std, std=r.std, cc=r.cc, marker=marker, marker_size=marker_size) + TaylorPoint( + name=r.model, + obs_std=r.obs_std, + std=r.std, + cc=r.cc, + marker=marker, + marker_size=marker_size, + ) for r in df.itertuples() ] diff --git a/src/modelskill/comparison/_comparison.py b/src/modelskill/comparison/_comparison.py index cedc91d81..321139350 100644 --- a/src/modelskill/comparison/_comparison.py +++ b/src/modelskill/comparison/_comparison.py @@ -55,6 +55,15 @@ def _parse_dataset(data: xr.Dataset) -> xr.Dataset: # matched_data = self._matched_data_to_xarray(matched_data) assert "Observation" in data.data_vars + # Normalize datetime precision to avoid xarray interp issues with pandas 3.0 + # Different data sources may have different precisions (datetime64[s], datetime64[us], etc.) + # Use nanoseconds (ns) for backward compatibility with pandas 2.x + if data.time.dtype.kind == "M": # M = datetime + time_pd = data.time.to_index() # Preserves freq attribute + if time_pd.dtype != "datetime64[ns]": + time_index = time_pd.as_unit("ns") + data = data.assign_coords(time=time_index) + # no missing values allowed in Observation if data["Observation"].isnull().any(): raise ValueError("Observation data must not contain missing values.") @@ -331,12 +340,12 @@ def _matched_data_to_xarray( ) # check that items.obs and items.model are numeric - if not np.issubdtype(df[items.obs].dtype, np.number): + if not pd.api.types.is_numeric_dtype(df[items.obs].dtype): raise ValueError( "Observation data is of type {df[items.obs].dtype}, it must be numeric" ) for m in items.model: - if not np.issubdtype(df[m].dtype, np.number): + if not pd.api.types.is_numeric_dtype(df[m].dtype): raise ValueError( f"Model data: {m} is of type {df[m].dtype}, it must be numeric" ) @@ -451,8 +460,10 @@ def __init__( else { # key: ModelResult(value, gtype=self.data.gtype, name=key, x=self.x, y=self.y) str(key): PointModelResult(self.data[[str(key)]], name=str(key)) - for key, value in matched_data.data_vars.items() - if value.attrs["kind"] == "model" + for key, value in self.data.data_vars.items() + # Use .get("kind") instead of ["kind"] to avoid KeyError + # Auxiliary variables (e.g., "leadtime", "wind") may not have "kind" attribute + if value.attrs.get("kind") == "model" } ) @@ -762,7 +773,9 @@ def merge( if isinstance(other, Comparer) and (self.name == other.name): raw_mod_data = self.raw_mod_data.copy() raw_mod_data.update(other.raw_mod_data) # TODO! - matched = self.data.merge(other.data).dropna(dim="time") + matched = self.data.merge( + other.data, compat="no_conflicts", join="outer" + ).dropna(dim="time") cmp = Comparer(matched_data=matched, raw_mod_data=raw_mod_data) return cmp diff --git a/src/modelskill/comparison/_utils.py b/src/modelskill/comparison/_utils.py index 5b4bde01f..7bd20c8db 100644 --- a/src/modelskill/comparison/_utils.py +++ b/src/modelskill/comparison/_utils.py @@ -36,9 +36,9 @@ def _add_spatial_grid_to_df( ) # cut and get bin centre df["xBin"] = pd.cut(df.x, bins=bins_x) - df["xBin"] = df["xBin"].apply(lambda x: x.mid) + df["xBin"] = df["xBin"].apply(lambda x: x.mid if pd.notna(x) else x) df["yBin"] = pd.cut(df.y, bins=bins_y) - df["yBin"] = df["yBin"].apply(lambda x: x.mid) + df["yBin"] = df["yBin"].apply(lambda x: x.mid if pd.notna(x) else x) return df diff --git a/src/modelskill/metrics.py b/src/modelskill/metrics.py index 30e4b02bc..22099df9b 100644 --- a/src/modelskill/metrics.py +++ b/src/modelskill/metrics.py @@ -588,9 +588,11 @@ def peak_ratio( time = obs.index # Calculate number of years - dt_int = (time[1:].values - time[0:-1].values).view("int64") - dt_int_mode = float(stats.mode(dt_int, keepdims=False)[0]) / 1e9 # in seconds - N_years = dt_int_mode / 24 / 3600 / 365.25 * len(time) + # Use total_seconds() to handle any datetime precision (ns, us, ms, s) + dt = time[1:] - time[:-1] + dt_seconds = dt.total_seconds().values + dt_mode_seconds = float(stats.mode(dt_seconds, keepdims=False)[0]) + N_years = dt_mode_seconds / 24 / 3600 / 365.25 * len(time) peak_index, AAP_ = _partial_duration_series( time, obs, diff --git a/src/modelskill/model/dummy.py b/src/modelskill/model/dummy.py index 6a1efea0d..251598123 100644 --- a/src/modelskill/model/dummy.py +++ b/src/modelskill/model/dummy.py @@ -28,7 +28,7 @@ class DummyModelResult: -------- >>> import pandas as pd >>> import modelskill as ms - >>> df = pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="H", periods=2)) + >>> df = pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="h", periods=2)) >>> obs = ms.PointObservation(df, name="foo") >>> mr = ms.DummyModelResult(strategy='mean') >>> pmr = mr.extract(obs) diff --git a/src/modelskill/timeseries/_point.py b/src/modelskill/timeseries/_point.py index 5e2a0c86a..43675a4e3 100644 --- a/src/modelskill/timeseries/_point.py +++ b/src/modelskill/timeseries/_point.py @@ -122,6 +122,17 @@ def _convert_to_dataset( data = data.rename({time_dim_name: "time"}) ds = data + # Normalize datetime precision to avoid xarray interp issues with pandas 3.0 + # Different data sources (dfs0 files, DataFrames) may have different precisions + # (datetime64[s], datetime64[us], datetime64[ns], etc.), and xarray.interp() + # fails when interpolating between datasets with mismatched precisions. + # Use nanoseconds (ns) for backward compatibility with pandas 2.x + if ds.time.dtype.kind == "M": # M = datetime + time_pd = ds.time.to_index() # Preserves freq attribute + if time_pd.dtype != "datetime64[ns]": + time_index = time_pd.as_unit("ns") + ds = ds.assign_coords(time=time_index) + name = _validate_data_var_name(varname) n_unique_times = len(ds.time.to_index().unique()) diff --git a/tests/integration/test_integration_grid.py b/tests/integration/test_integration_grid.py index e9fe57103..8ad736c8e 100644 --- a/tests/integration/test_integration_grid.py +++ b/tests/integration/test_integration_grid.py @@ -2,6 +2,7 @@ import pytest import xarray as xr + def test_extract_point_from_3d(): mr = ms.GridModelResult( "tests/testdata/cmems_mod_med_phy-sal_anfc_4.2km-3D_PT1H-m_1705916517624.nc", @@ -27,4 +28,4 @@ def test_extract_point_from_3d(): sc = cmp.score() # "Observed" data is extracted from the 3D model result, so the score should be 0.0 - assert sc["MedSea"] == pytest.approx(0.0) \ No newline at end of file + assert sc["MedSea"] == pytest.approx(0.0) diff --git a/tests/model/test_point.py b/tests/model/test_point.py index 1dfbd73ea..dcbd2b695 100644 --- a/tests/model/test_point.py +++ b/tests/model/test_point.py @@ -248,7 +248,6 @@ def test_point_model_result_from_nc_file(): def test_interp_time(): - df = pd.DataFrame( { "WL": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], diff --git a/tests/test_combine_comparers.py b/tests/test_combine_comparers.py index 97735ece1..7402501a8 100644 --- a/tests/test_combine_comparers.py +++ b/tests/test_combine_comparers.py @@ -1,4 +1,3 @@ -import pandas as pd import pytest import modelskill as ms diff --git a/tests/test_comparercollection.py b/tests/test_comparercollection.py index a291cd0f5..f884ff7f4 100644 --- a/tests/test_comparercollection.py +++ b/tests/test_comparercollection.py @@ -570,12 +570,31 @@ def test_plot_accepts_figsize(cc_plot_function): assert a, b == figsize -def test_peak_ratio(cc): - """Non existent peak ratio""" - cc = cc.sel(model="m1") - sk = cc.skill(metrics=["peak_ratio"]) - - assert sk.loc["fake point obs", "peak_ratio"] == pytest.approx(1.119999999) +def test_peak_ratio(): + """Test peak_ratio with synthetic data containing clear, verifiable peaks""" + # Create data with 2 clear peaks: + # Peak 1: obs=5.0, model=5.5 → ratio=1.1 + # Peak 2: obs=6.0, model=6.6 → ratio=1.1 + # Expected peak_ratio = mean([1.1, 1.1]) = 1.1 + times = pd.date_range("2020-01-01", periods=100, freq="h") + obs_vals = np.zeros(100) + mod_vals = np.zeros(100) + + # Create peak 1 around index 10 + obs_vals[8:13] = [0, 1, 5, 1, 0] + mod_vals[8:13] = [0, 1.1, 5.5, 1.1, 0] + + # Create peak 2 around index 50 + obs_vals[48:53] = [0, 1, 6, 1, 0] + mod_vals[48:53] = [0, 1.1, 6.6, 1.1, 0] + + df = pd.DataFrame({"Observation": obs_vals, "model": mod_vals}, index=times) + + cmp = ms.from_matched(df, obs_item=0, name="synthetic_peaks") + sk = cmp.skill(metrics=["peak_ratio"]) + + # Model peaks are 1.1x observation peaks + assert sk.to_dataframe()["peak_ratio"].values == pytest.approx(1.1, abs=0.01) def test_peak_ratio_2(cc_pr): diff --git a/tests/test_consistency.py b/tests/test_consistency.py index 4c2329aa7..38dbf965c 100644 --- a/tests/test_consistency.py +++ b/tests/test_consistency.py @@ -1,4 +1,5 @@ """Test consitency across input formats and classes""" + from functools import partial import pytest import mikeio diff --git a/tests/test_grid_skill.py b/tests/test_grid_skill.py index 5d1343b62..bac970a8b 100644 --- a/tests/test_grid_skill.py +++ b/tests/test_grid_skill.py @@ -1,6 +1,5 @@ import pytest import pandas as pd -import xarray as xr import modelskill as ms diff --git a/tests/test_multimodelcompare.py b/tests/test_multimodelcompare.py index 729d7af75..5c5609be4 100644 --- a/tests/test_multimodelcompare.py +++ b/tests/test_multimodelcompare.py @@ -1,7 +1,6 @@ import matplotlib.pyplot as plt import numpy as np import pytest -from matplotlib.table import Table import modelskill as ms import modelskill.metrics as mtr diff --git a/tests/test_simple_compare.py b/tests/test_simple_compare.py index 65eecd300..f94911a00 100644 --- a/tests/test_simple_compare.py +++ b/tests/test_simple_compare.py @@ -124,12 +124,12 @@ def test_matching_pointobservation_with_trackmodelresult_is_not_possible(): # ignore the data tdf = pd.DataFrame( {"x": [1, 2], "y": [1, 2], "m1": [0, 0]}, - index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4S"), + index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4s"), ) mr = ms.TrackModelResult(tdf, item="m1", x_item="x", y_item="y") pdf = pd.DataFrame( data={"level": [0.0, 0.0]}, - index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4S"), + index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4s"), ) obs = ms.PointObservation(pdf, item="level") with pytest.raises(TypeError, match="TrackModelResult"):