From 780486c96581ac3c8481da6dd1c3f37e7df8e89c Mon Sep 17 00:00:00 2001
From: Daniel Thom <daniel.thom@gmail.com>
Date: Sun, 10 May 2026 12:30:46 -0600
Subject: [PATCH] Treat empty/comment-only YAML configs as empty list silently
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

yaml.safe_load returns None for files containing only comments — which
is exactly what `datasight generate` writes for time_series.yaml when
no timestamp candidates are detected. The list-shape check was then
warning on every load. Treat None as an empty list (silent) at all five
list-loader sites; keep warning for genuinely wrong shapes like a
top-level mapping.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/datasight/config.py     | 20 ++++++++++++++++++++
 src/datasight/validation.py |  3 +++
 tests/test_config_extra.py  | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/src/datasight/config.py b/src/datasight/config.py
index bffa9758..a636b991 100644
--- a/src/datasight/config.py
+++ b/src/datasight/config.py
@@ -265,6 +265,11 @@ def load_example_queries(path: str | None, project_dir: str) -> list[dict[str, A
         except yaml.YAMLError as e:
             logger.warning(f"Failed to parse {path}: {e}")
             return []
+    if data is None:
+        # Empty or comment-only YAML — yaml.safe_load returns None.
+        # `datasight generate` writes such scaffolds when nothing is detected,
+        # so treat this as an empty config rather than warning on every load.
+        return []
     if not isinstance(data, list):
         logger.warning(f"Expected a list in {path}, got {type(data).__name__}")
         return []
@@ -300,6 +305,11 @@ def load_measure_overrides(path: str | None, project_dir: str) -> list[dict[str,
         except yaml.YAMLError as e:
             logger.warning(f"Failed to parse {path}: {e}")
             return []
+    if data is None:
+        # Empty or comment-only YAML — yaml.safe_load returns None.
+        # `datasight generate` writes such scaffolds when nothing is detected,
+        # so treat this as an empty config rather than warning on every load.
+        return []
     if not isinstance(data, list):
         logger.warning(f"Expected a list in {path}, got {type(data).__name__}")
         return []
@@ -436,6 +446,11 @@ def load_time_series_config(path: str | None, project_dir: str) -> list[dict[str
         except yaml.YAMLError as e:
             logger.warning(f"Failed to parse {path}: {e}")
             return []
+    if data is None:
+        # Empty or comment-only YAML — yaml.safe_load returns None.
+        # `datasight generate` writes such scaffolds when nothing is detected,
+        # so treat this as an empty config rather than warning on every load.
+        return []
     if not isinstance(data, list):
         logger.warning(f"Expected a list in {path}, got {type(data).__name__}")
         return []
@@ -490,6 +505,11 @@ def load_joins_config(path: str | None, project_dir: str) -> list[dict[str, Any]
         except yaml.YAMLError as e:
             logger.warning(f"Failed to parse {path}: {e}")
             return []
+    if data is None:
+        # Empty or comment-only YAML — yaml.safe_load returns None.
+        # `datasight generate` writes such scaffolds when nothing is detected,
+        # so treat this as an empty config rather than warning on every load.
+        return []
     if not isinstance(data, list):
         logger.warning(f"Expected a list in {path}, got {type(data).__name__}")
         return []
diff --git a/src/datasight/validation.py b/src/datasight/validation.py
index c999acd5..bc721e75 100644
--- a/src/datasight/validation.py
+++ b/src/datasight/validation.py
@@ -42,6 +42,9 @@ def load_validation_config(
         except yaml.YAMLError as e:
             logger.warning(f"Failed to parse {path}: {e}")
             return []
+    if data is None:
+        # Empty or comment-only YAML — yaml.safe_load returns None.
+        return []
     if not isinstance(data, list):
         logger.warning(f"Expected a list in {path}, got {type(data).__name__}")
         return []
diff --git a/tests/test_config_extra.py b/tests/test_config_extra.py
index 62a85a03..07da11d0 100644
--- a/tests/test_config_extra.py
+++ b/tests/test_config_extra.py
@@ -253,10 +253,39 @@ def test_load_time_series_config_invalid_yaml(tmp_path):
     assert load_time_series_config(None, str(tmp_path)) == []
 
 
+def _capture_loguru_warnings():
+    """Datasight uses loguru, which bypasses pytest's caplog by default."""
+    from loguru import logger as _logger
+
+    captured: list[str] = []
+    sink_id = _logger.add(lambda msg: captured.append(str(msg)), level="WARNING")
+    return captured, lambda: _logger.remove(sink_id)
+
+
 def test_load_time_series_config_non_list(tmp_path):
     p = tmp_path / "time_series.yaml"
     p.write_text("foo: bar\n", encoding="utf-8")
-    assert load_time_series_config(None, str(tmp_path)) == []
+    captured, cleanup = _capture_loguru_warnings()
+    try:
+        assert load_time_series_config(None, str(tmp_path)) == []
+    finally:
+        cleanup()
+    # Genuine wrong-shape input still warns.
+    assert any("Expected a list" in line for line in captured)
+
+
+def test_load_time_series_config_comment_only_is_silent(tmp_path):
+    """An all-comments scaffold (what `datasight generate` writes when it
+    finds no timestamp candidates) parses to None — treat as empty silently
+    rather than warning on every load."""
+    p = tmp_path / "time_series.yaml"
+    p.write_text("# datasight time series declarations\n# nothing detected\n", encoding="utf-8")
+    captured, cleanup = _capture_loguru_warnings()
+    try:
+        assert load_time_series_config(None, str(tmp_path)) == []
+    finally:
+        cleanup()
+    assert not any("Expected a list" in line for line in captured)
 
 
 def test_load_time_series_config_valid_and_invalid_entries(tmp_path):