From 780486c96581ac3c8481da6dd1c3f37e7df8e89c Mon Sep 17 00:00:00 2001 From: Daniel Thom Date: Sun, 10 May 2026 12:30:46 -0600 Subject: [PATCH] Treat empty/comment-only YAML configs as empty list silently MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit yaml.safe_load returns None for files containing only comments — which is exactly what `datasight generate` writes for time_series.yaml when no timestamp candidates are detected. The list-shape check was then warning on every load. Treat None as an empty list (silent) at all five list-loader sites; keep warning for genuinely wrong shapes like a top-level mapping. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/datasight/config.py | 20 ++++++++++++++++++++ src/datasight/validation.py | 3 +++ tests/test_config_extra.py | 31 ++++++++++++++++++++++++++++++- 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/src/datasight/config.py b/src/datasight/config.py index bffa9758..a636b991 100644 --- a/src/datasight/config.py +++ b/src/datasight/config.py @@ -265,6 +265,11 @@ def load_example_queries(path: str | None, project_dir: str) -> list[dict[str, A except yaml.YAMLError as e: logger.warning(f"Failed to parse {path}: {e}") return [] + if data is None: + # Empty or comment-only YAML — yaml.safe_load returns None. + # `datasight generate` writes such scaffolds when nothing is detected, + # so treat this as an empty config rather than warning on every load. + return [] if not isinstance(data, list): logger.warning(f"Expected a list in {path}, got {type(data).__name__}") return [] @@ -300,6 +305,11 @@ def load_measure_overrides(path: str | None, project_dir: str) -> list[dict[str, except yaml.YAMLError as e: logger.warning(f"Failed to parse {path}: {e}") return [] + if data is None: + # Empty or comment-only YAML — yaml.safe_load returns None. + # `datasight generate` writes such scaffolds when nothing is detected, + # so treat this as an empty config rather than warning on every load. + return [] if not isinstance(data, list): logger.warning(f"Expected a list in {path}, got {type(data).__name__}") return [] @@ -436,6 +446,11 @@ def load_time_series_config(path: str | None, project_dir: str) -> list[dict[str except yaml.YAMLError as e: logger.warning(f"Failed to parse {path}: {e}") return [] + if data is None: + # Empty or comment-only YAML — yaml.safe_load returns None. + # `datasight generate` writes such scaffolds when nothing is detected, + # so treat this as an empty config rather than warning on every load. + return [] if not isinstance(data, list): logger.warning(f"Expected a list in {path}, got {type(data).__name__}") return [] @@ -490,6 +505,11 @@ def load_joins_config(path: str | None, project_dir: str) -> list[dict[str, Any] except yaml.YAMLError as e: logger.warning(f"Failed to parse {path}: {e}") return [] + if data is None: + # Empty or comment-only YAML — yaml.safe_load returns None. + # `datasight generate` writes such scaffolds when nothing is detected, + # so treat this as an empty config rather than warning on every load. + return [] if not isinstance(data, list): logger.warning(f"Expected a list in {path}, got {type(data).__name__}") return [] diff --git a/src/datasight/validation.py b/src/datasight/validation.py index c999acd5..bc721e75 100644 --- a/src/datasight/validation.py +++ b/src/datasight/validation.py @@ -42,6 +42,9 @@ def load_validation_config( except yaml.YAMLError as e: logger.warning(f"Failed to parse {path}: {e}") return [] + if data is None: + # Empty or comment-only YAML — yaml.safe_load returns None. + return [] if not isinstance(data, list): logger.warning(f"Expected a list in {path}, got {type(data).__name__}") return [] diff --git a/tests/test_config_extra.py b/tests/test_config_extra.py index 62a85a03..07da11d0 100644 --- a/tests/test_config_extra.py +++ b/tests/test_config_extra.py @@ -253,10 +253,39 @@ def test_load_time_series_config_invalid_yaml(tmp_path): assert load_time_series_config(None, str(tmp_path)) == [] +def _capture_loguru_warnings(): + """Datasight uses loguru, which bypasses pytest's caplog by default.""" + from loguru import logger as _logger + + captured: list[str] = [] + sink_id = _logger.add(lambda msg: captured.append(str(msg)), level="WARNING") + return captured, lambda: _logger.remove(sink_id) + + def test_load_time_series_config_non_list(tmp_path): p = tmp_path / "time_series.yaml" p.write_text("foo: bar\n", encoding="utf-8") - assert load_time_series_config(None, str(tmp_path)) == [] + captured, cleanup = _capture_loguru_warnings() + try: + assert load_time_series_config(None, str(tmp_path)) == [] + finally: + cleanup() + # Genuine wrong-shape input still warns. + assert any("Expected a list" in line for line in captured) + + +def test_load_time_series_config_comment_only_is_silent(tmp_path): + """An all-comments scaffold (what `datasight generate` writes when it + finds no timestamp candidates) parses to None — treat as empty silently + rather than warning on every load.""" + p = tmp_path / "time_series.yaml" + p.write_text("# datasight time series declarations\n# nothing detected\n", encoding="utf-8") + captured, cleanup = _capture_loguru_warnings() + try: + assert load_time_series_config(None, str(tmp_path)) == [] + finally: + cleanup() + assert not any("Expected a list" in line for line in captured) def test_load_time_series_config_valid_and_invalid_entries(tmp_path):