Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,18 @@ Plots support both matplotlib (static) and plotly (interactive) backends.
- `regression/` - Regression test data
- `testdata/` - Sample data files (symlinked from docs/data)

## Git Workflow

**IMPORTANT: Never commit directly to the main branch.**

Always create a feature branch and submit a pull request:
```bash
git checkout -b feature/descriptive-name
# Make changes and commit
git push -u origin feature/descriptive-name
gh pr create # Create pull request
```

## Important Notes

- The package depends on MIKE IO (`mikeio`) for reading MIKE file formats (dfs0, dfs2, dfsu)
Expand Down
9 changes: 8 additions & 1 deletion src/modelskill/comparison/_comparer_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,14 @@ def taylor(
df = df.rename(columns={"_std_obs": "obs_std", "_std_mod": "std"})

pts = [
TaylorPoint(name=r.model, obs_std=r.obs_std, std=r.std, cc=r.cc, marker=marker, marker_size=marker_size)
TaylorPoint(
name=r.model,
obs_std=r.obs_std,
std=r.std,
cc=r.cc,
marker=marker,
marker_size=marker_size,
)
for r in df.itertuples()
]

Expand Down
23 changes: 18 additions & 5 deletions src/modelskill/comparison/_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@ def _parse_dataset(data: xr.Dataset) -> xr.Dataset:
# matched_data = self._matched_data_to_xarray(matched_data)
assert "Observation" in data.data_vars

# Normalize datetime precision to avoid xarray interp issues with pandas 3.0
# Different data sources may have different precisions (datetime64[s], datetime64[us], etc.)
# Use nanoseconds (ns) for backward compatibility with pandas 2.x
if data.time.dtype.kind == "M": # M = datetime
time_pd = data.time.to_index() # Preserves freq attribute
if time_pd.dtype != "datetime64[ns]":
time_index = time_pd.as_unit("ns")
data = data.assign_coords(time=time_index)

# no missing values allowed in Observation
if data["Observation"].isnull().any():
raise ValueError("Observation data must not contain missing values.")
Expand Down Expand Up @@ -331,12 +340,12 @@ def _matched_data_to_xarray(
)

# check that items.obs and items.model are numeric
if not np.issubdtype(df[items.obs].dtype, np.number):
if not pd.api.types.is_numeric_dtype(df[items.obs].dtype):
raise ValueError(
"Observation data is of type {df[items.obs].dtype}, it must be numeric"
)
for m in items.model:
if not np.issubdtype(df[m].dtype, np.number):
if not pd.api.types.is_numeric_dtype(df[m].dtype):
raise ValueError(
f"Model data: {m} is of type {df[m].dtype}, it must be numeric"
)
Expand Down Expand Up @@ -451,8 +460,10 @@ def __init__(
else {
# key: ModelResult(value, gtype=self.data.gtype, name=key, x=self.x, y=self.y)
str(key): PointModelResult(self.data[[str(key)]], name=str(key))
for key, value in matched_data.data_vars.items()
if value.attrs["kind"] == "model"
for key, value in self.data.data_vars.items()
# Use .get("kind") instead of ["kind"] to avoid KeyError
# Auxiliary variables (e.g., "leadtime", "wind") may not have "kind" attribute
if value.attrs.get("kind") == "model"
}
)

Expand Down Expand Up @@ -762,7 +773,9 @@ def merge(
if isinstance(other, Comparer) and (self.name == other.name):
raw_mod_data = self.raw_mod_data.copy()
raw_mod_data.update(other.raw_mod_data) # TODO!
matched = self.data.merge(other.data).dropna(dim="time")
matched = self.data.merge(
other.data, compat="no_conflicts", join="outer"
).dropna(dim="time")
cmp = Comparer(matched_data=matched, raw_mod_data=raw_mod_data)

return cmp
Expand Down
4 changes: 2 additions & 2 deletions src/modelskill/comparison/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ def _add_spatial_grid_to_df(
)
# cut and get bin centre
df["xBin"] = pd.cut(df.x, bins=bins_x)
df["xBin"] = df["xBin"].apply(lambda x: x.mid)
df["xBin"] = df["xBin"].apply(lambda x: x.mid if pd.notna(x) else x)
df["yBin"] = pd.cut(df.y, bins=bins_y)
df["yBin"] = df["yBin"].apply(lambda x: x.mid)
df["yBin"] = df["yBin"].apply(lambda x: x.mid if pd.notna(x) else x)

return df

Expand Down
8 changes: 5 additions & 3 deletions src/modelskill/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,9 +588,11 @@ def peak_ratio(
time = obs.index

# Calculate number of years
dt_int = (time[1:].values - time[0:-1].values).view("int64")
dt_int_mode = float(stats.mode(dt_int, keepdims=False)[0]) / 1e9 # in seconds
N_years = dt_int_mode / 24 / 3600 / 365.25 * len(time)
# Use total_seconds() to handle any datetime precision (ns, us, ms, s)
dt = time[1:] - time[:-1]
dt_seconds = dt.total_seconds().values
dt_mode_seconds = float(stats.mode(dt_seconds, keepdims=False)[0])
N_years = dt_mode_seconds / 24 / 3600 / 365.25 * len(time)
peak_index, AAP_ = _partial_duration_series(
time,
obs,
Expand Down
2 changes: 1 addition & 1 deletion src/modelskill/model/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class DummyModelResult:
--------
>>> import pandas as pd
>>> import modelskill as ms
>>> df = pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="H", periods=2))
>>> df = pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="h", periods=2))
>>> obs = ms.PointObservation(df, name="foo")
>>> mr = ms.DummyModelResult(strategy='mean')
>>> pmr = mr.extract(obs)
Expand Down
11 changes: 11 additions & 0 deletions src/modelskill/timeseries/_point.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,17 @@ def _convert_to_dataset(
data = data.rename({time_dim_name: "time"})
ds = data

# Normalize datetime precision to avoid xarray interp issues with pandas 3.0
# Different data sources (dfs0 files, DataFrames) may have different precisions
# (datetime64[s], datetime64[us], datetime64[ns], etc.), and xarray.interp()
# fails when interpolating between datasets with mismatched precisions.
# Use nanoseconds (ns) for backward compatibility with pandas 2.x
if ds.time.dtype.kind == "M": # M = datetime
time_pd = ds.time.to_index() # Preserves freq attribute
if time_pd.dtype != "datetime64[ns]":
time_index = time_pd.as_unit("ns")
ds = ds.assign_coords(time=time_index)

name = _validate_data_var_name(varname)

n_unique_times = len(ds.time.to_index().unique())
Expand Down
3 changes: 2 additions & 1 deletion tests/integration/test_integration_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest
import xarray as xr


def test_extract_point_from_3d():
mr = ms.GridModelResult(
"tests/testdata/cmems_mod_med_phy-sal_anfc_4.2km-3D_PT1H-m_1705916517624.nc",
Expand All @@ -27,4 +28,4 @@ def test_extract_point_from_3d():
sc = cmp.score()

# "Observed" data is extracted from the 3D model result, so the score should be 0.0
assert sc["MedSea"] == pytest.approx(0.0)
assert sc["MedSea"] == pytest.approx(0.0)
1 change: 0 additions & 1 deletion tests/model/test_point.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@ def test_point_model_result_from_nc_file():


def test_interp_time():

df = pd.DataFrame(
{
"WL": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
Expand Down
1 change: 0 additions & 1 deletion tests/test_combine_comparers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import pandas as pd
import pytest

import modelskill as ms
Expand Down
31 changes: 25 additions & 6 deletions tests/test_comparercollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,12 +570,31 @@ def test_plot_accepts_figsize(cc_plot_function):
assert a, b == figsize


def test_peak_ratio(cc):
"""Non existent peak ratio"""
cc = cc.sel(model="m1")
sk = cc.skill(metrics=["peak_ratio"])

assert sk.loc["fake point obs", "peak_ratio"] == pytest.approx(1.119999999)
def test_peak_ratio():
"""Test peak_ratio with synthetic data containing clear, verifiable peaks"""
# Create data with 2 clear peaks:
# Peak 1: obs=5.0, model=5.5 → ratio=1.1
# Peak 2: obs=6.0, model=6.6 → ratio=1.1
# Expected peak_ratio = mean([1.1, 1.1]) = 1.1
times = pd.date_range("2020-01-01", periods=100, freq="h")
obs_vals = np.zeros(100)
mod_vals = np.zeros(100)

# Create peak 1 around index 10
obs_vals[8:13] = [0, 1, 5, 1, 0]
mod_vals[8:13] = [0, 1.1, 5.5, 1.1, 0]

# Create peak 2 around index 50
obs_vals[48:53] = [0, 1, 6, 1, 0]
mod_vals[48:53] = [0, 1.1, 6.6, 1.1, 0]

df = pd.DataFrame({"Observation": obs_vals, "model": mod_vals}, index=times)

cmp = ms.from_matched(df, obs_item=0, name="synthetic_peaks")
sk = cmp.skill(metrics=["peak_ratio"])

# Model peaks are 1.1x observation peaks
assert sk.to_dataframe()["peak_ratio"].values == pytest.approx(1.1, abs=0.01)


def test_peak_ratio_2(cc_pr):
Expand Down
1 change: 1 addition & 0 deletions tests/test_consistency.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Test consitency across input formats and classes"""

from functools import partial
import pytest
import mikeio
Expand Down
1 change: 0 additions & 1 deletion tests/test_grid_skill.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import pytest
import pandas as pd
import xarray as xr

import modelskill as ms

Expand Down
1 change: 0 additions & 1 deletion tests/test_multimodelcompare.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import matplotlib.pyplot as plt
import numpy as np
import pytest
from matplotlib.table import Table

import modelskill as ms
import modelskill.metrics as mtr
Expand Down
4 changes: 2 additions & 2 deletions tests/test_simple_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,12 @@ def test_matching_pointobservation_with_trackmodelresult_is_not_possible():
# ignore the data
tdf = pd.DataFrame(
{"x": [1, 2], "y": [1, 2], "m1": [0, 0]},
index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4S"),
index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4s"),
)
mr = ms.TrackModelResult(tdf, item="m1", x_item="x", y_item="y")
pdf = pd.DataFrame(
data={"level": [0.0, 0.0]},
index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4S"),
index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4s"),
)
obs = ms.PointObservation(pdf, item="level")
with pytest.raises(TypeError, match="TrackModelResult"):
Expand Down
Loading