diff --git a/README.md b/README.md
index e4f3d4f..280db2d 100644
--- a/README.md
+++ b/README.md
@@ -110,6 +110,39 @@ for feature, explanation in engineer.explain_features().items():
     print(f"{feature}: {explanation}")
 ```
 
+## Command-Line Interface
+
+FeatCopilot ships a `featcopilot` CLI for shell, scripting, and agentic
+(LLM tool-use) workflows — no Python glue required. All subcommands accept
+`--json` for machine-readable stdout; errors are written to stderr with a
+non-zero exit code so agents can parse failures deterministically.
+
+```bash
+# Discover capabilities (engines, selection methods, I/O formats)
+featcopilot info --json
+
+# Run feature engineering on a CSV / JSON file
+featcopilot transform \
+    --input data.csv --target label --output features.csv \
+    --engines tabular --max-features 50 --json
+
+# Inspect generated features (name, explanation, code) as JSON for an LLM
+featcopilot explain --input data.csv --target label
+
+# Equivalent module form
+python -m featcopilot info --json
+```
+
+Pass `--config config.json` to provide nested keys such as `llm_config`;
+explicit CLI flags override values from the config file.
+
+> **Parquet I/O.** FeatCopilot's base install does not pin a parquet engine.
+> To use `--input file.parquet` / `--output file.parquet` (or the
+> `parquet` value in `--input-format` / `--output-format`), install one of
+> `pyarrow` or `fastparquet`. `featcopilot info --json` reports
+> `"parquet_available": true` only when an engine is importable in the
+> current environment.
+
 ## Engines
 
 ### Tabular Engine
diff --git a/featcopilot/__main__.py b/featcopilot/__main__.py
new file mode 100644
index 0000000..0cce4e0
--- /dev/null
+++ b/featcopilot/__main__.py
@@ -0,0 +1,6 @@
+"""Enable ``python -m featcopilot`` to dispatch to the CLI."""
+
+from featcopilot.cli import main
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/featcopilot/cli.py b/featcopilot/cli.py
new file mode 100644
index 0000000..6f019f5
--- /dev/null
+++ b/featcopilot/cli.py
@@ -0,0 +1,1473 @@
+"""
+FeatCopilot command-line interface.
+
+Provides a stable, agent-friendly CLI for invoking FeatCopilot from shells,
+notebooks, agentic workflows (e.g. Copilot/LLM tool-use), and CI pipelines
+without writing Python glue code.
+
+Subcommands
+-----------
+info
+    Print version and supported engines/methods. Always machine-readable
+    when ``--json`` is passed.
+transform
+    Run :class:`featcopilot.AutoFeatureEngineer` on a tabular input file
+    (CSV / Parquet / JSON) and write engineered features to an output file.
+    Emits a JSON status line on stdout when ``--json`` is passed so that
+    agents can parse the result deterministically.
+explain
+    Fit the engineer and print a JSON document describing each generated
+    feature (name, explanation, code) for downstream LLM consumption.
+
+Examples
+--------
+Agentic usage (machine-readable result on stdout, errors on stderr)::
+
+    featcopilot info --json
+    featcopilot transform \\
+        --input data.csv --target label --output features.csv \\
+        --engines tabular --max-features 50 --json
+    featcopilot explain --input data.csv --target label --json
+
+Equivalent module invocation::
+
+    python -m featcopilot info --json
+
+Parquet I/O is supported only when ``pyarrow`` or ``fastparquet`` is
+installed (FeatCopilot's base distribution does not pin either); ``info``
+reports the runtime availability via ``parquet_available``.
+"""
+
+from __future__ import annotations
+
+import argparse
+import contextlib
+import json
+import logging
+import sys
+import threading
+import warnings
+from pathlib import Path
+from typing import Any
+
+from featcopilot import __version__
+from featcopilot.transformers.sklearn_compat import AutoFeatureEngineer
+from featcopilot.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+SUPPORTED_INPUT_FORMATS = ("csv", "parquet", "json")
+SUPPORTED_OUTPUT_FORMATS = ("csv", "parquet", "json")
+
+
+def _parquet_engine_available() -> bool:
+    """Return ``True`` if a parquet engine (pyarrow or fastparquet) can be imported.
+
+    FeatCopilot's base install pins neither ``pyarrow`` nor ``fastparquet``;
+    parquet I/O is therefore opportunistic. ``info`` uses this probe so the
+    machine-readable capability output reflects what will actually work in
+    the current environment, rather than always advertising parquet.
+
+    Uses ``__import__`` (not ``importlib.util.find_spec``) so the probe is
+    *correct* even on environments with a broken native install:
+    ``find_spec`` only confirms a distribution is on ``sys.path``; it does
+    not prove the C extensions can actually load. A real import is the
+    only way to verify the engine is usable.
+    """
+    for name in ("pyarrow", "fastparquet"):
+        try:
+            __import__(name)
+            return True
+        except Exception:  # noqa: BLE001  - any import-time failure means unusable
+            continue
+    return False
+
+
+def _detect_format(path: Path, override: str | None) -> str:
+    """Return one of ``SUPPORTED_INPUT_FORMATS`` for ``path``.
+
+    Parameters
+    ----------
+    path : pathlib.Path
+        File path whose suffix is inspected when ``override`` is ``None``.
+    override : str or None
+        Explicit format override (``csv`` / ``parquet`` / ``json``).
+
+    Raises
+    ------
+    ValueError
+        If the format cannot be determined or is not supported.
+    """
+    if override is not None:
+        fmt = override.lower()
+        if fmt not in SUPPORTED_INPUT_FORMATS:
+            raise ValueError(f"Unsupported format {override!r}; expected one of {SUPPORTED_INPUT_FORMATS}")
+        return fmt
+
+    suffix = path.suffix.lower().lstrip(".")
+    aliases = {"pq": "parquet", "parq": "parquet"}
+    fmt = aliases.get(suffix, suffix)
+    if fmt not in SUPPORTED_INPUT_FORMATS:
+        raise ValueError(
+            f"Cannot infer format from extension {path.suffix!r}; "
+            f"pass --input-format / --output-format (one of {SUPPORTED_INPUT_FORMATS})."
+        )
+    return fmt
+
+
+def _read_table(path: Path, fmt: str, *, nrows: int | None = None, suppress_truncation_warning: bool = False):
+    """Read a tabular file into a pandas DataFrame.
+
+    All user-facing failure modes (missing parquet engine, ``--input``
+    pointing at a directory, permission denied, malformed JSON/CSV,
+    decoding errors) are normalized into :class:`ValueError` so the CLI's
+    top-level handler routes them to the deterministic ``exit 2``
+    user-error path. The generic ``exit 1`` backstop is reserved for
+    truly unexpected (i.e. CLI-internal) errors.
+
+    Parameters
+    ----------
+    path : pathlib.Path
+        File to read.
+    fmt : str
+        One of ``csv`` / ``parquet`` / ``json``.
+    nrows : int or None, optional
+        Cap the number of rows returned. For ``csv``, this is propagated
+        directly to :func:`pandas.read_csv` so the underlying read is
+        memory-bounded. For ``parquet`` and ``json``, pandas does not
+        expose a native row limit, so the file is fully read and then
+        truncated; a :class:`UserWarning` is issued in that case (unless
+        ``suppress_truncation_warning`` is true) so the caller knows the
+        bound is post-read (not memory-bounded). The ``nrows`` cap is
+        applied with a deterministic head slice so re-runs on the same
+        input produce the same metadata.
+    suppress_truncation_warning : bool, optional
+        When True, the post-read truncation notice (parquet / JSON only)
+        is *not* emitted from this helper. Used by callers that emit
+        their own consolidated, user-facing warning so users don't see
+        a confusing pair of messages — see ``_cmd_explain``'s
+        ``--explain-sample-size`` handling.
+    """
+    import pandas as pd
+
+    if path.is_dir():
+        raise ValueError(f"--input expects a file, but {str(path)!r} is a directory.")
+
+    if fmt == "csv":
+        try:
+            # ``nrows`` is the only memory-bound knob native to read_csv;
+            # passing it here is what lets ``--explain-sample-size`` actually
+            # cap memory on huge CSV inputs (rather than loading the entire
+            # file and then trimming).
+            df = pd.read_csv(path, nrows=nrows)
+        except (
+            OSError,
+            pd.errors.ParserError,
+            pd.errors.EmptyDataError,
+            UnicodeDecodeError,
+        ) as exc:
+            # ``EmptyDataError`` fires for headerless / zero-byte CSVs;
+            # without it, those inputs would fall into the generic exit-1
+            # "unexpected error" path instead of the documented exit-2
+            # user-input error.
+            raise ValueError(f"Failed to read CSV from {str(path)!r}: {exc}") from exc
+    elif fmt == "parquet":
+        try:
+            df = pd.read_parquet(path)
+        except ImportError as exc:
+            raise ValueError(
+                f"Reading parquet requires a parquet engine (pyarrow or fastparquet); "
+                f"install one of them, or convert the input to CSV/JSON. Original error: {exc}"
+            ) from exc
+        except Exception as exc:
+            # Catch *any* backend failure (``OSError`` for I/O,
+            # ``pyarrow.lib.ArrowInvalid`` for corrupt files,
+            # ``ValueError`` from ``fastparquet`` for malformed metadata,
+            # etc.) and surface it via the deterministic exit-2 path.
+            # Catching ``Exception`` is appropriate here because the entire
+            # operation is delegated to a third-party backend; any error
+            # raised is by definition an I/O or data issue, not a CLI bug.
+            raise ValueError(f"Failed to read parquet from {str(path)!r}: {exc}") from exc
+        if nrows is not None and len(df) > nrows:
+            if not suppress_truncation_warning:
+                warnings.warn(
+                    f"--explain-sample-size cap is applied post-read for parquet "
+                    f"(loaded {len(df)} rows, truncating to {nrows}). pandas "
+                    "does not expose a native parquet row-limit, so the full "
+                    "file is materialized in memory before the cap. For hard "
+                    "memory bounds on huge inputs, convert to CSV first.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+            df = df.iloc[:nrows]
+    elif fmt == "json":
+        # ``orient='records'`` is the agent-friendly default; fall back to
+        # pandas' auto-detection when the file isn't a records list.
+        try:
+            df = pd.read_json(path, orient="records")
+        except ValueError:
+            try:
+                df = pd.read_json(path)
+            except ValueError as exc:
+                raise ValueError(f"Failed to read JSON from {str(path)!r}: {exc}") from exc
+        except OSError as exc:
+            raise ValueError(f"Failed to read JSON from {str(path)!r}: {exc}") from exc
+        if nrows is not None and len(df) > nrows:
+            if not suppress_truncation_warning:
+                warnings.warn(
+                    f"--explain-sample-size cap is applied post-read for JSON "
+                    f"(loaded {len(df)} rows, truncating to {nrows}). pandas "
+                    "does not expose a native JSON row-limit, so the full "
+                    "file is materialized in memory before the cap. For hard "
+                    "memory bounds on huge inputs, convert to CSV first.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+            df = df.iloc[:nrows]
+    else:
+        raise ValueError(f"Unsupported input format: {fmt}")
+
+    # Reject "header-only" / empty inputs across every supported format.
+    # ``DataFrame.empty`` returns ``True`` for both zero-row AND
+    # zero-column frames, but those are very different user errors that
+    # warrant different remediation paths, so check the two cases
+    # explicitly. ``pd.read_csv`` returns an empty DataFrame (no
+    # exception) when the CSV has headers but zero data rows; the same
+    # goes for an empty parquet file or ``[]`` JSON body. Without this
+    # check, the CLI would pass an empty frame into ``TabularEngine``,
+    # which divides by ``len(X)`` while fitting categorical encoding and
+    # exits via the generic ``unexpected error`` path. Surface both
+    # cases as clean exit-2 user-input errors.
+    if len(df.columns) == 0:
+        raise ValueError(
+            f"Input file {str(path)!r} has no columns. "
+            "Feature engineering requires at least one input feature column "
+            "(e.g. a JSON array of ``{}`` objects, or a table that only "
+            "preserved an index, would hit this error)."
+        )
+    if len(df) == 0:
+        raise ValueError(
+            f"Input file {str(path)!r} is empty (zero data rows). "
+            "Feature engineering requires at least one row of data."
+        )
+    return df
+
+
+def _write_table(df, path: Path, fmt: str) -> None:
+    """Write a pandas DataFrame to ``path`` in ``fmt``.
+
+    All user-facing failure modes (missing parquet engine, ``--output``
+    pointing at a directory, permission denied, parent-directory creation
+    failures) are normalized into :class:`ValueError` so the CLI surfaces a
+    clean stderr message via the standard ``exit 2`` path instead of the
+    generic ``exit 1`` "unexpected error" backstop.
+    """
+    if path.exists() and path.is_dir():
+        raise ValueError(f"--output expects a file, but {str(path)!r} is an existing directory.")
+
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+    except OSError as exc:
+        raise ValueError(f"Cannot create parent directory for {str(path)!r}: {exc}") from exc
+
+    if fmt == "csv":
+        try:
+            df.to_csv(path, index=False)
+        except OSError as exc:
+            raise ValueError(f"Failed to write CSV to {str(path)!r}: {exc}") from exc
+    elif fmt == "parquet":
+        try:
+            df.to_parquet(path, index=False)
+        except ImportError as exc:
+            raise ValueError(
+                f"Writing parquet requires a parquet engine (pyarrow or fastparquet); "
+                f"install one of them, or pick CSV/JSON via --output-format. Original error: {exc}"
+            ) from exc
+        except Exception as exc:
+            # Same broad-catch rationale as ``_read_table``: parquet write
+            # is fully delegated to a backend (``pyarrow``/``fastparquet``)
+            # whose errors include ``OSError`` (I/O), engine-specific type
+            # / conversion exceptions for unsupported column values, etc.
+            # All of these are user-facing data issues, not CLI bugs, so
+            # they should produce a clean exit-2 failure.
+            raise ValueError(f"Failed to write parquet to {str(path)!r}: {exc}") from exc
+    elif fmt == "json":
+        try:
+            df.to_json(path, orient="records", indent=2)
+        except OSError as exc:
+            raise ValueError(f"Failed to write JSON to {str(path)!r}: {exc}") from exc
+    else:
+        raise ValueError(f"Unsupported output format: {fmt}")
+
+
+# Top-level keys recognized in a ``--config`` JSON file. The CLI rejects
+# any other top-level key with a precise exit-2 error so typos like
+# ``max_feature`` (no s) fail fast in automation rather than silently
+# running with defaults.
+_KNOWN_CONFIG_KEYS = frozenset(
+    {
+        "engines",
+        "selection_methods",
+        "max_features",
+        "correlation_threshold",
+        "leakage_guard",
+        "gate_n_jobs",
+        "llm_config",
+        "verbose",
+        "explain_sample_size",
+    }
+)
+
+
+def _load_config(config_path: str | None) -> dict[str, Any]:
+    """Load a JSON config file (or return an empty dict).
+
+    Normalizes user-input mistakes (missing path, directory passed instead
+    of a file, invalid JSON, non-object root, unknown top-level keys) into
+    :class:`ValueError` / :class:`FileNotFoundError` so the CLI's top-level
+    error handler can route them all to the deterministic ``exit 2``
+    user-error path (rather than e.g. ``IsADirectoryError`` falling into
+    the generic ``exit 1`` "unexpected error" backstop, or a typo silently
+    being ignored).
+    """
+    if config_path is None:
+        return {}
+    path = Path(config_path)
+    if not path.exists():
+        raise FileNotFoundError(f"Config file not found: {config_path}")
+    if path.is_dir():
+        raise ValueError(f"--config expects a JSON file, but {config_path!r} is a directory.")
+    try:
+        with path.open("r", encoding="utf-8") as fh:
+            data = json.load(fh)
+    except json.JSONDecodeError as exc:
+        raise ValueError(f"Config file {config_path!r} is not valid JSON: {exc}") from exc
+    except OSError as exc:
+        # Catch-all for unreadable files (permission denied, broken symlink,
+        # etc.). Surface as a user-facing error rather than the generic
+        # exit-1 backstop.
+        raise ValueError(f"Config file {config_path!r} could not be read: {exc}") from exc
+    if not isinstance(data, dict):
+        raise ValueError(f"Config file {config_path!r} must contain a JSON object at the top level")
+    unknown = sorted(set(data.keys()) - _KNOWN_CONFIG_KEYS)
+    if unknown:
+        raise ValueError(
+            f"Config file {config_path!r} has unknown top-level key(s): {unknown}. "
+            f"Recognized keys: {sorted(_KNOWN_CONFIG_KEYS)}."
+        )
+    return data
+
+
+def _emit(payload: dict[str, Any], *, as_json: bool, stream=None) -> None:
+    """Emit a payload to stdout, JSON-encoded when ``as_json`` is true."""
+    stream = stream if stream is not None else sys.stdout
+    if as_json:
+        stream.write(json.dumps(payload, default=str, sort_keys=True))
+        stream.write("\n")
+    else:
+        for key, value in payload.items():
+            stream.write(f"{key}: {value}\n")
+    stream.flush()
+
+
+def _check_scalar_type(
+    name: str,
+    value: Any,
+    expected: tuple[type, ...],
+    *,
+    allow_none: bool = False,
+    allow_bool: bool = True,
+) -> None:
+    """Validate a scalar value's type for ``--config``-supplied keys.
+
+    Raises :class:`ValueError` (caught by ``main()`` -> exit 2) when the
+    value's type does not match. ``bool`` is a subclass of ``int`` in
+    Python; pass ``allow_bool=False`` to reject ``True``/``False`` for
+    numeric-only fields like ``max_features`` / ``correlation_threshold``.
+    """
+    if value is None:
+        if allow_none:
+            return
+        raise ValueError(f"`{name}` must not be null in --config")
+    if not allow_bool and isinstance(value, bool):
+        raise ValueError(
+            f"`{name}` in --config must be a {' or '.join(t.__name__ for t in expected)}; " f"got bool={value!r}."
+        )
+    if not isinstance(value, expected):
+        raise ValueError(
+            f"`{name}` in --config must be a {' or '.join(t.__name__ for t in expected)}; "
+            f"got {type(value).__name__}={value!r}."
+        )
+
+
+def _build_engineer(args: argparse.Namespace, *, include_selection_config: bool = True) -> AutoFeatureEngineer:
+    """Construct an :class:`AutoFeatureEngineer` from parsed CLI args.
+
+    Precedence: explicit CLI flags override values from ``--config``;
+    explicit config values (including empty lists) override the defaults.
+    Empty / non-list values are propagated unchanged so that
+    :meth:`AutoFeatureEngineer._validate_configuration` produces its
+    canonical (and deterministic) error path — the CLI's wrapper must not
+    silently rewrite a misconfigured config into something that looks
+    different from what the user wrote.
+
+    ``include_selection_config=False`` (used by the ``explain`` subcommand)
+    skips reading selection-only config keys (``selection_methods``,
+    ``correlation_threshold``) so a shared config file with selection
+    settings does not cause ``explain`` to fail config-validation for keys
+    that are inert at runtime (selection is disabled in ``explain``).
+    """
+    config = _load_config(args.config)
+
+    def pick(flag_value, config_key, default):
+        # Explicit CLI flag wins. Otherwise honor an explicit config entry
+        # — even a falsy one such as ``[]`` — so AutoFeatureEngineer can
+        # raise its own clear "must contain at least one" error rather than
+        # the CLI silently swapping in defaults. Only fall back to the
+        # default when the key is *absent* from the config.
+        if flag_value is not None:
+            return flag_value
+        if config_key in config:
+            return config[config_key]
+        return default
+
+    engines = pick(args.engines, "engines", ["tabular"])
+    # ``explain`` exposes ``--engines`` and ``--max-features`` (engine-level
+    # caps) but not the selection-only flags ``--selection-methods`` and
+    # ``--correlation-threshold``. When ``include_selection_config`` is
+    # False (i.e. we're called from ``explain``) we also skip reading the
+    # selection-only keys from the config file, so a shared transform/explain
+    # config with selection settings won't trip ``explain`` over keys that
+    # have no effect on its runtime behavior.
+    if include_selection_config:
+        selection_methods = pick(
+            getattr(args, "selection_methods", None),
+            "selection_methods",
+            ["mutual_info", "importance"],
+        )
+        correlation_threshold = pick(getattr(args, "correlation_threshold", None), "correlation_threshold", 0.85)
+    else:
+        selection_methods = ["mutual_info", "importance"]
+        correlation_threshold = 0.85
+    max_features = pick(args.max_features, "max_features", None)
+    leakage_guard = pick(args.leakage_guard, "leakage_guard", "warn")
+    gate_n_jobs = pick(args.gate_n_jobs, "gate_n_jobs", 1)
+
+    # Type-check scalar config fields here so the CLI surfaces a clean
+    # exit-2 error instead of a downstream ``TypeError`` (e.g. from
+    # ``self.max_features <= 0`` when the JSON config supplied a string).
+    # ``argparse`` already enforces types for the flag side; this only
+    # guards against malformed ``--config`` JSON.
+    _check_scalar_type("max_features", max_features, (int,), allow_none=True, allow_bool=False)
+    _check_scalar_type("correlation_threshold", correlation_threshold, (int, float), allow_bool=False)
+    _check_scalar_type("gate_n_jobs", gate_n_jobs, (int,), allow_bool=False)
+    _check_scalar_type("leakage_guard", leakage_guard, (str,))
+
+    # Range-check ``correlation_threshold``: it's only meaningful in
+    # ``[0.0, 1.0]``. Values above 1 silently disable redundancy
+    # elimination (``FeatureSelector.fit`` only runs it when threshold
+    # < 1.0); values below 0 effectively treat every numeric pair as
+    # redundant. Reject out-of-range up front so the CLI doesn't quietly
+    # change selector behavior.
+    if not (0.0 <= float(correlation_threshold) <= 1.0):
+        raise ValueError(f"`correlation_threshold` must be in the range [0.0, 1.0]; got {correlation_threshold!r}.")
+    # ``max_features`` must be positive when set (matches
+    # AutoFeatureEngineer's own validation). Surface that here too so
+    # the message says ``max_features`` rather than the more cryptic
+    # transformer error.
+    if max_features is not None and max_features <= 0:
+        raise ValueError(f"`max_features` must be a positive integer when set; got {max_features!r}.")
+
+    # Validate ``llm_config`` is a JSON object (i.e. a Python dict) before
+    # forwarding it. Without this check, a misconfigured non-dict value
+    # would only fail at engine-construction time inside
+    # ``AutoFeatureEngineer._create_engine`` via ``self.llm_config.get(...)``,
+    # raising an ``AttributeError`` that bypasses the structured exit-2
+    # user-error path (the CLI would surface it as exit 1 "unexpected
+    # error", which is a poor agent contract for a documented config key).
+    llm_config_raw = config.get("llm_config")
+    if llm_config_raw is None:
+        llm_config: dict[str, Any] = {}
+    elif isinstance(llm_config_raw, dict):
+        llm_config = llm_config_raw
+    else:
+        raise ValueError(
+            "`llm_config` in the --config file must be a JSON object (mapping); "
+            f"got {type(llm_config_raw).__name__}={llm_config_raw!r}."
+        )
+
+    # ``verbose`` is type-checked before being forwarded so a malformed
+    # config like ``{"verbose": "false"}`` (truthy string) does NOT silently
+    # turn verbose mode on — instead it raises a clean exit-2 error
+    # consistent with the other scalar fields. ``args.verbose`` is already
+    # a bool / None thanks to ``BooleanOptionalAction``; only the config
+    # path can introduce a non-bool.
+    verbose_raw = pick(args.verbose, "verbose", False)
+    _check_scalar_type("verbose", verbose_raw, (bool,))
+    verbose = bool(verbose_raw)
+
+    # Pass ``engines`` / ``selection_methods`` through *unchanged* (no
+    # ``list(...)`` wrapping). Coercion would convert a misconfigured
+    # JSON string like ``"tabular"`` into ``['t','a','b','u','l','a','r']``,
+    # turning a clear type error into a confusing "Unknown engines" path.
+    # AutoFeatureEngineer.__init__ rejects non-list/tuple inputs with a
+    # precise message — let it.
+    return AutoFeatureEngineer(
+        engines=engines,
+        max_features=max_features,
+        selection_methods=selection_methods,
+        correlation_threshold=correlation_threshold,
+        llm_config=llm_config,
+        verbose=verbose,
+        leakage_guard=leakage_guard,
+        gate_n_jobs=gate_n_jobs,
+    )
+
+
+def _split_xy(df, target: str | None):
+    """Split a DataFrame into ``(X, y)``; ``y`` is ``None`` when no target."""
+    if target is None:
+        return df, None
+    if target not in df.columns:
+        raise ValueError(
+            f"Target column {target!r} not found in input. "
+            f"Available columns: {list(df.columns)[:20]}{'...' if len(df.columns) > 20 else ''}"
+        )
+    y = df[target]
+    X = df.drop(columns=[target])
+    return X, y
+
+
+def _cmd_info(args: argparse.Namespace) -> int:
+    """Print version + supported engines/methods.
+
+    Parquet appears in ``supported_input_formats`` / ``supported_output_formats``
+    only when an actual parquet engine (``pyarrow`` or ``fastparquet``) can
+    be imported in the current environment — otherwise the ``info`` output
+    would advertise a format that immediately fails on use, which is
+    misleading for the agentic capability-discovery the CLI is designed to
+    support.
+    """
+    parquet_ok = _parquet_engine_available()
+    input_formats = [f for f in SUPPORTED_INPUT_FORMATS if f != "parquet" or parquet_ok]
+    output_formats = [f for f in SUPPORTED_OUTPUT_FORMATS if f != "parquet" or parquet_ok]
+    payload = {
+        "version": __version__,
+        "supported_engines": sorted(AutoFeatureEngineer.SUPPORTED_ENGINES),
+        "supported_selection_methods": sorted(AutoFeatureEngineer.SUPPORTED_SELECTION_METHODS),
+        "supported_leakage_guards": sorted(AutoFeatureEngineer.SUPPORTED_LEAKAGE_GUARDS),
+        "supported_input_formats": input_formats,
+        "supported_output_formats": output_formats,
+        "parquet_available": parquet_ok,
+    }
+    _emit(payload, as_json=args.json)
+    return 0
+
+
+def _fit_transform_capturing_warnings(engineer, X, y, **kwargs):
+    """Thin convenience wrapper: run ``engineer.fit_transform(X, y, **kwargs)``
+    inside a single ``_capture_featcopilot_messages()`` block.
+
+    This helper is *not* used by the CLI subcommands themselves anymore
+    — they wrap a wider region (read + fit_transform + write) in one
+    capture so warnings emitted during pandas / pyarrow read or write
+    phases are also surfaced via the JSON ``warnings`` field instead of
+    leaking onto stderr. It is preserved as a public-ish helper for
+    external test code that just wants to capture messages around a
+    plain ``fit_transform`` call.
+
+    Returns
+    -------
+    (messages, result)
+        ``messages`` is a list of ``str`` (warnings then logs, in
+        emission order). ``result`` is whatever ``fit_transform`` returned.
+    """
+    with _capture_featcopilot_messages() as captured:
+        result = engineer.fit_transform(X, y, **kwargs)
+    return captured, result
+
+
+class _ThreadCaptureState:
+    """Holds per-thread capture *stacks* with strict per-thread isolation
+    and a *narrow* cross-thread fallback for LLM-client log records.
+
+    Each thread maps to a stack of capture lists. Nested
+    :func:`_capture_featcopilot_messages` calls on the same thread push
+    onto the stack; the innermost active capture is always at the top
+    and receives records / warnings until its block exits, at which
+    point the outer capture (if any) becomes active again.
+
+    **Strict thread isolation by default.** :meth:`get` returns a target
+    list ONLY for the calling thread itself. This avoids the
+    misattribution that an unconditional single-active-capture fallback
+    would cause: any featcopilot log on any thread would otherwise be
+    silently swallowed into the active CLI run's payload, including
+    output from unrelated background work happening in the same process.
+
+    **Narrow LLM-client fallback.** :meth:`get_for_llm_record` is the
+    one exception: when a record originates from one of the *exact*
+    sync LLM client modules whitelisted in
+    :attr:`_LLM_FALLBACK_LOGGER_NAMES` (the modules whose synchronous
+    entry points fall back to ``ThreadPoolExecutor`` in event-loop
+    environments), and exactly one capture is active in the process,
+    the record is routed to that capture even when emitted on a worker
+    thread. This addresses the common case where an LLM client's
+    mock-mode startup warning fires on a worker that ``submit()``
+    spawned and would otherwise bleed onto stderr. The whitelist is an
+    explicit set of module names — *not* a prefix — so unrelated
+    ``featcopilot.llm.*`` loggers (e.g. ``semantic_engine``,
+    ``code_generator``, ``transform_rule_generator``, ``explainer``)
+    that never run on worker threads do not trigger the fallback.
+
+    **Atomic per-record decision.** :meth:`resolve_for_record` caches
+    the lookup on the record itself the first time it is called, so
+    :class:`_ThreadRoutingHandler` (which routes records) and
+    :class:`_SuppressCapturingFilter` (which suppresses them from
+    stderr) cannot disagree under concurrent push/pop activity from
+    other threads. Without that caching, ``filter`` could see one
+    state and the later ``emit`` could see another, breaking the
+    "captured XOR on stderr" invariant.
+
+    Shared by :class:`_ThreadRoutingHandler` (writes records),
+    :class:`_SuppressCapturingFilter` (suppresses stderr), and the
+    routing ``warnings.showwarning`` override.
+    """
+
+    # Logger names whose records are eligible for the narrow cross-thread
+    # fallback. Only the synchronous LLM client modules whose ``run`` /
+    # batch entry points fall back to ``ThreadPoolExecutor`` workers in
+    # event-loop environments are listed; their startup / mock-mode
+    # warnings legitimately fire from those worker threads. Other
+    # ``featcopilot.llm.*`` loggers are intentionally NOT included so
+    # cross-thread records from unrelated background work cannot be
+    # silently swallowed into an active CLI capture.
+    _LLM_FALLBACK_LOGGER_NAMES = frozenset(
+        {
+            "featcopilot.llm.copilot_client",
+            "featcopilot.llm.litellm_client",
+            "featcopilot.llm.openai_client",
+        }
+    )
+
+    # Sentinel for "no cached decision yet" on a log record. Distinct
+    # from ``None``, which is itself a valid resolved decision meaning
+    # "no capture target — let the record flow to stderr".
+    _UNCACHED = object()
+
+    def __init__(self):
+        self._per_thread: dict[int, list[list[str]]] = {}
+        self._lock = threading.Lock()
+
+    def push(self, tid: int, target: list[str]) -> None:
+        with self._lock:
+            self._per_thread.setdefault(tid, []).append(target)
+
+    def pop(self, tid: int) -> None:
+        with self._lock:
+            stack = self._per_thread.get(tid)
+            if stack:
+                stack.pop()
+                if not stack:
+                    del self._per_thread[tid]
+
+    def get(self, tid: int) -> list[str] | None:
+        # Strict per-thread lookup. No cross-thread fallback: an
+        # unconditional fallback was too broad and could silently
+        # swallow unrelated background log output into a CLI run's
+        # payload. The narrow LLM-client fallback lives in
+        # :meth:`get_for_llm_record` instead, opted into by name.
+        with self._lock:
+            stack = self._per_thread.get(tid)
+            if stack:
+                return stack[-1]
+            return None
+
+    def get_for_llm_record(self, tid: int, logger_name: str) -> list[str] | None:
+        """Per-thread lookup with a narrow cross-thread fallback for LLM
+        client records.
+
+        When the calling thread has its own active capture, that's used.
+        Otherwise — and only when the record originates from a
+        whitelisted sync LLM client module
+        (:attr:`_LLM_FALLBACK_LOGGER_NAMES`) AND exactly one capture is
+        active in the process — the record is routed to that single
+        capture so it doesn't bleed onto stderr. The whitelist is an
+        explicit set of module names so unrelated ``featcopilot.llm.*``
+        loggers (e.g. ``semantic_engine``, ``code_generator``) that
+        never hop onto worker threads do not trigger the fallback.
+
+        This method takes the lock once and returns a snapshot
+        decision; callers should generally use :meth:`resolve_for_record`
+        to additionally cache the result on the record so that paired
+        filter / emit calls can never disagree under concurrent
+        push/pop on other threads.
+        """
+        with self._lock:
+            stack = self._per_thread.get(tid)
+            if stack:
+                return stack[-1]
+            if logger_name not in self._LLM_FALLBACK_LOGGER_NAMES:
+                return None
+            if len(self._per_thread) != 1:
+                # Either no captures are active (nothing to route to) or
+                # multiple are active (ambiguous — keep strict isolation
+                # so concurrent CLI calls don't cross-contaminate).
+                return None
+            only_stack = next(iter(self._per_thread.values()))
+            if only_stack:
+                return only_stack[-1]
+            return None
+
+    def resolve_for_record(self, record: logging.LogRecord) -> list[str] | None:
+        """Resolve the capture target for ``record`` exactly once.
+
+        The first call computes the decision via
+        :meth:`get_for_llm_record` and caches it on the record itself
+        as ``record._featcopilot_capture_target``; subsequent calls
+        return the cached value. This is what makes the routing
+        handler's ``emit`` and the suppression filter's ``filter``
+        atomic with respect to each other: they always see the same
+        decision for a given record even if another thread pops or
+        pushes a capture between the two calls.
+
+        Logging records are produced and dispatched to handlers in the
+        same thread, so caching directly on the record is safe — there
+        is no concurrent reader of the same record's attributes.
+        """
+        cached = getattr(record, "_featcopilot_capture_target", self._UNCACHED)
+        if cached is not self._UNCACHED:
+            return cached
+        target = self.get_for_llm_record(threading.get_ident(), record.name)
+        record._featcopilot_capture_target = target
+        return target
+
+
+class _ThreadRoutingHandler(logging.Handler):
+    """Logging handler that routes records to the calling thread's capture list.
+
+    Attached once to the ``featcopilot`` root logger. Records propagated
+    from any ``featcopilot.*`` child logger reach this handler in the same
+    way they reach the existing stderr handler. If the calling thread has
+    a registered capture list, the record is appended to it.
+    Otherwise, for records originating from a ``featcopilot.llm.*``
+    logger AND when exactly one capture is active in the process, the
+    record is routed to that capture (the narrow LLM-client cross-thread
+    fallback — see :class:`_ThreadCaptureState`). Records from any
+    other thread / logger combination flow through to the existing
+    stderr handler.
+    """
+
+    def __init__(self, state: _ThreadCaptureState):
+        super().__init__(logging.DEBUG)
+        self._state = state
+        self.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
+
+    def emit(self, record: logging.LogRecord) -> None:
+        # Use the cached resolver so this handler and the paired
+        # ``_SuppressCapturingFilter`` always see the same decision
+        # for a given record, even if another thread pushes or pops
+        # a capture between the filter and emit phases.
+        target = self._state.resolve_for_record(record)
+        if target is None:
+            return
+        try:
+            target.append(self.format(record))
+        except Exception:  # pragma: no cover - never let logging crash the CLI
+            target.append(record.getMessage())
+
+
+class _SuppressCapturingFilter(logging.Filter):
+    """Filter for the *existing* handlers: drops records being captured.
+
+    Without this filter, every record routed by
+    :class:`_ThreadRoutingHandler` to a capture list would still hit the
+    featcopilot root logger's stderr ``StreamHandler`` and bleed onto
+    stderr — breaking the CLI's "stderr reserved for failures" contract.
+    The filter mirrors the routing handler's policy so the two stay in
+    lockstep: anything captured (current-thread record OR cross-thread
+    LLM-client record under the narrow fallback) is also suppressed
+    from the original handlers; anything else (records from
+    non-capturing threads / unrelated background work) flows through to
+    stderr unchanged.
+    """
+
+    def __init__(self, state: _ThreadCaptureState):
+        super().__init__()
+        self._state = state
+
+    def filter(self, record: logging.LogRecord) -> bool:
+        # Use the cached resolver so this filter and the paired
+        # ``_ThreadRoutingHandler`` always see the same decision for a
+        # given record, even if another thread pushes or pops a capture
+        # between this call and the routing handler's emit. Without the
+        # cache, the two could disagree and the same record could end
+        # up both captured and on stderr (or suppressed without being
+        # captured) — breaking the "stderr reserved for failures"
+        # contract under concurrent CLI calls.
+        return self._state.resolve_for_record(record) is None
+
+
+# Module-level singletons. Installed exactly once on the featcopilot root
+# logger / its existing handlers; subsequent ``_capture_featcopilot_messages``
+# calls just push/pop thread state. No global lock is held during the slow
+# ``fit_transform`` body — concurrent threads each capture their own records
+# independently.
+_capture_state = _ThreadCaptureState()
+_routing_handler = _ThreadRoutingHandler(_capture_state)
+_suppress_filter = _SuppressCapturingFilter(_capture_state)
+_install_lock = threading.Lock()
+_install_done = False
+# Captures the original ``warnings.showwarning`` at first install so the
+# routing override can chain to it for non-capturing threads (and so we
+# never mutate it again on subsequent capture calls — the previous
+# per-call save/restore raced under concurrent overlapping captures).
+_original_showwarning = None
+
+
+def _routing_showwarning(message, category, filename, lineno, file=None, line=None):
+    """Permanent ``warnings.showwarning`` override (installed once).
+
+    Routes warnings to the *innermost* capturing list for the current
+    thread (via :class:`_ThreadCaptureState` stack lookup). If the
+    current thread is not capturing, chains to the original
+    ``warnings.showwarning`` so non-capturing threads keep their normal
+    behavior.
+
+    Installed once globally — *not* swapped per-call — so concurrent
+    overlapping captures on different threads cannot race on the
+    process-global ``warnings.showwarning`` slot.
+    """
+    target = _capture_state.get(threading.get_ident())
+    if target is not None:
+        target.append(str(message))
+        return
+    if _original_showwarning is not None:
+        _original_showwarning(message, category, filename, lineno, file, line)
+
+
+def _install_capture_hooks_once() -> None:
+    """Install the routing handler + suppress filter + showwarning override.
+
+    The logger handler and filter are installed exactly once (idempotent).
+    The ``warnings.showwarning`` override is re-installed every call if
+    something else has replaced it — this is necessary because external
+    code (most commonly ``warnings.catch_warnings()`` blocks) can reset
+    the global ``warnings.showwarning`` and undo a previous install. The
+    fresh re-install captures the current (caller's) ``showwarning`` as
+    the new "original" to chain to, so non-capturing threads still see
+    whatever warning behavior the caller had set up.
+
+    All hooks themselves dispatch on :class:`_ThreadCaptureState` which
+    uses a per-thread stack, so they are no-ops for threads that aren't
+    currently capturing.
+    """
+    global _install_done, _original_showwarning
+    with _install_lock:
+        # Logger handler/filter install (truly once — these can't be
+        # silently undone by external code in the way ``warnings.showwarning``
+        # can).
+        if not _install_done:
+            fc_root = logging.getLogger("featcopilot")
+            if _routing_handler not in fc_root.handlers:
+                fc_root.addHandler(_routing_handler)
+            for handler in list(fc_root.handlers):
+                if handler is _routing_handler:
+                    continue
+                if _suppress_filter not in handler.filters:
+                    handler.addFilter(_suppress_filter)
+            _install_done = True
+
+        # ``warnings.showwarning`` install — re-check every entry. A
+        # caller's ``warnings.catch_warnings()`` block restores the
+        # previous ``showwarning`` on exit, undoing our install. Re-
+        # installing on next entry is what makes overlapping captures
+        # robust against caller-side warning context manipulation.
+        if warnings.showwarning is not _routing_showwarning:
+            _original_showwarning = warnings.showwarning
+            warnings.showwarning = _routing_showwarning
+
+
+@contextlib.contextmanager
+def _capture_featcopilot_messages():
+    """Capture FeatCopilot log records and ``warnings.warn`` calls emitted
+    on the *current thread*.
+
+    Yields a list that the caller can read after the with-block exits.
+    The list contains formatted log records (in emission order) and any
+    Python warning messages emitted during the with-block on this thread.
+
+    Concurrency model
+    -----------------
+    * **Logger records** are routed *per-thread* via
+      :class:`_ThreadRoutingHandler` (added once to the ``featcopilot``
+      root logger) and a :class:`_SuppressCapturingFilter` on the existing
+      handlers. Two threads can capture concurrently without blocking
+      each other; each sees only its own records, and other threads'
+      records still flow normally to stderr.
+    * **``warnings.warn`` records** are intercepted via a permanent
+      :func:`_routing_showwarning` override installed once. The override
+      routes by ``threading.get_ident()`` and chains to the original
+      ``warnings.showwarning`` for non-capturing threads. The override is
+      *not* swapped per-call, so concurrent overlapping captures on
+      different threads cannot race on the process-global
+      ``warnings.showwarning`` slot.
+    * **Nested captures** on the same thread are supported via a
+      per-thread stack in :class:`_ThreadCaptureState`. Records and
+      warnings always go to the innermost active capture; when the inner
+      block exits, the outer capture is automatically reactivated.
+
+    The contextmanager does NOT hold any lock for the duration of the
+    with-block — only briefly during install/push/pop — so long-running
+    ``fit_transform`` calls in one thread do not block other threads
+    from running concurrently.
+    """
+    _install_capture_hooks_once()
+
+    captured: list[str] = []
+    tid = threading.get_ident()
+    _capture_state.push(tid, captured)
+    try:
+        yield captured
+    finally:
+        _capture_state.pop(tid)
+
+
+def _cmd_transform(args: argparse.Namespace) -> int:
+    """Read input, fit/transform, write output.
+
+    The "successful runs keep stderr empty" CLI contract requires that
+    *every* phase that can legitimately emit a Python warning be wrapped
+    in the message capture, not just ``fit_transform``. Concretely:
+
+    * **Read** — ``pd.read_csv`` can emit ``DtypeWarning`` on mixed-type
+      columns and parquet/JSON readers can emit pyarrow / pandas
+      future-warnings on a successful read.
+    * **Fit / transform** — engineers themselves emit warnings (e.g.
+      ``AutoFeatureEngineer.fit`` for leakage-prone column names under
+      ``leakage_guard='warn'``).
+    * **Write** — ``DataFrame.to_csv`` / ``to_parquet`` / ``to_json``
+      can emit pandas / pyarrow ``FutureWarning`` / ``UserWarning``
+      during a successful write.
+
+    All three phases now live inside one capture block so any warnings
+    they emit are surfaced via the JSON ``warnings`` field rather than
+    leaking onto stderr.
+    """
+    input_path = Path(args.input)
+    if not input_path.exists():
+        raise FileNotFoundError(f"Input file not found: {args.input}")
+    output_path = Path(args.output)
+
+    in_fmt = _detect_format(input_path, args.input_format)
+    out_fmt = _detect_format(output_path, args.output_format)
+
+    # Single capture context spans read + fit_transform + write so any
+    # legitimate-but-noisy warnings from pandas/pyarrow during read or
+    # write end up in the JSON payload's ``warnings`` field instead of
+    # bleeding to stderr. ``_capture_featcopilot_messages`` is a no-op
+    # for non-warning code paths so wrapping the broader region has no
+    # side effects on the happy path.
+    with _capture_featcopilot_messages() as captured_warnings:
+        df = _read_table(input_path, in_fmt)
+        X, y = _split_xy(df, args.target)
+
+        # Build the engineer first: ``_build_engineer`` runs all scalar / list /
+        # dict type validation on the merged CLI-flag + config view, so any
+        # malformed value (e.g. ``"max_features": "5"``, ``"verbose": "false"``)
+        # surfaces a precise exit-2 error here rather than down the wrong
+        # ``--target is required`` rabbit hole.
+        engineer = _build_engineer(args)
+
+        # Selection requires a target column to fit against. ``AutoFeatureEngineer``
+        # only actually fits a selector when ``y is not None`` AND ``max_features``
+        # is set; without ``max_features`` the call is a raw feature-generation
+        # run and does not need a target. The CLI mirrors that contract: only
+        # require ``--target`` when both selection is enabled (the default) AND
+        # ``max_features`` is configured (CLI flag or config), so commands like
+        # ``featcopilot transform --input in.csv --output out.csv`` (no target,
+        # no cap) still work. Using ``engineer.max_features`` here means the
+        # value has already been type-validated, so we never report
+        # ``--target is required`` when the real problem is a malformed
+        # ``max_features`` config value.
+        if not args.no_selection and args.target is None and engineer.max_features is not None:
+            raise ValueError(
+                "--target is required when feature selection is applied "
+                "(i.e. when --max-features / config max_features is set). "
+                "Pass --target <column>, or pass --no-selection / drop --max-features to skip selection."
+            )
+
+        transformed = engineer.fit_transform(
+            X,
+            y,
+            task_description=args.task_description or "prediction task",
+            target_name=args.target,
+            apply_selection=not args.no_selection,
+        )
+
+        if args.include_target and y is not None:
+            # Re-attach the target column so downstream training scripts can
+            # consume the engineered file as a single artifact. Detect column
+            # collisions: if an engineered feature happens to share the
+            # target's column name (e.g. a target named ``foo_pow2`` matching
+            # a tabular-engine derived feature), blindly assigning ``transformed[
+            # target_name] = y.values`` would silently overwrite the engineered
+            # column. Surface that as a clean exit-2 error instead. Callers
+            # who knowingly want to overwrite can rename their target before
+            # invoking ``transform`` (or skip ``--include-target``).
+            target_name = args.target if args.target in df.columns else "target"
+            if target_name in transformed.columns:
+                raise ValueError(
+                    f"--include-target would overwrite engineered feature {target_name!r} "
+                    "with the target values. Rename the target column in the input file, "
+                    "or drop --include-target."
+                )
+            transformed = transformed.copy()
+            transformed[target_name] = y.values
+
+        _write_table(transformed, output_path, out_fmt)
+
+    payload = {
+        "status": "ok",
+        "input": str(input_path),
+        "output": str(output_path),
+        "input_format": in_fmt,
+        "output_format": out_fmt,
+        "n_rows": int(transformed.shape[0]),
+        "n_features": int(transformed.shape[1]),
+        "n_input_columns": int(X.shape[1]),
+        "n_generated_features": len(engineer.get_feature_names()),
+        "engines": list(engineer.engines),
+        "selection_methods": list(engineer.selection_methods),
+        "max_features": engineer.max_features,
+        "target": args.target,
+        "selection_applied": engineer._selector is not None,
+        "warnings": captured_warnings,
+    }
+    _emit(payload, as_json=args.json)
+    return 0
+
+
+# Default ``explain`` behavior is to use the full input so the metadata
+# is a faithful description of what a corresponding ``transform`` run
+# would do — engines like ``TabularEngine._fit_categorical_encoding``
+# use ``n_rows`` and per-category counts to decide e.g. one-hot vs.
+# target-encoding, so subsampling can silently change which features
+# appear. Callers who knowingly accept that trade-off can opt in via
+# ``--explain-sample-size`` (set to ``None``/absent to disable, any
+# positive integer to cap).
+def _cmd_explain(args: argparse.Namespace) -> int:
+    """Fit + transform engines and print feature explanations + code as JSON.
+
+    The built-in engines populate their internal feature-name registry during
+    :meth:`transform`, not :meth:`fit` (planning happens in ``fit`` but feature
+    objects are materialized in ``transform``). We therefore call
+    :meth:`AutoFeatureEngineer.fit_transform` so ``get_feature_names()``,
+    :meth:`explain_features` and :meth:`get_feature_code` all return the
+    actual generated features. Selection is intentionally skipped here so the
+    payload describes every candidate feature the engines produced, not just
+    the post-selection survivors.
+
+    Performance vs. faithfulness
+    ---------------------------
+    By default ``explain`` runs on the *full* input so the reported
+    metadata is a faithful description of what a corresponding
+    ``transform`` would generate. Some engines (notably
+    :class:`TabularEngine`) consult row counts and per-category
+    statistics when deciding which features to plan, so blind
+    subsampling can silently change the result.
+
+    For very large inputs where the metadata-only nature of ``explain``
+    really should not pay full memory / compute cost, callers can pass
+    ``--explain-sample-size N`` (or set ``"explain_sample_size": N`` in
+    ``--config``) to cap the rows fed to the engineer. The cap is a
+    deterministic *head slice* (the first N rows): for CSV the cap is
+    threaded through ``pd.read_csv(nrows=N)`` so memory is bounded
+    natively; for parquet/JSON pandas has no native row-limit so the
+    file is fully read and then truncated, with a UserWarning explaining
+    the limitation. The cap is NOT a random sample — callers who need
+    randomness should sample externally before invoking ``explain``.
+    A "metadata may differ" UserWarning is emitted (captured into the
+    JSON payload's ``warnings`` field) only when the cap actually
+    truncated the input. The ``n_rows_used`` field reports the effective
+    sample size.
+    """
+    input_path = Path(args.input)
+    if not input_path.exists():
+        raise FileNotFoundError(f"Input file not found: {args.input}")
+
+    in_fmt = _detect_format(input_path, args.input_format)
+
+    # Apply opt-in sample cap from CLI flag or config (CLI flag wins).
+    # Resolve and validate it BEFORE reading the input so the cap can be
+    # threaded into ``_read_table(... nrows=sample_size)`` to bound memory
+    # on huge inputs (CSV uses ``pd.read_csv(nrows=...)`` natively;
+    # parquet/JSON fall back to post-read truncation with a UserWarning
+    # since pandas doesn't expose a native row-limit for those formats).
+    sample_size = getattr(args, "explain_sample_size", None)
+    if sample_size is None and args.config is not None:
+        sample_size = _load_config(args.config).get("explain_sample_size")
+    if sample_size is not None:
+        _check_scalar_type("explain_sample_size", sample_size, (int,), allow_bool=False)
+        if sample_size <= 0:
+            raise ValueError(f"`explain_sample_size` must be a positive integer when set; got {sample_size!r}.")
+
+    engineer = _build_engineer(args, include_selection_config=False)
+
+    # Run the sample-warning AND ``fit_transform`` inside a single
+    # capture context so the sampling notice ends up in the JSON
+    # payload's ``warnings`` field instead of bleeding onto stderr.
+    with _capture_featcopilot_messages() as captured_warnings:
+        # Choose a read strategy that gives us:
+        #   1. a memory bound where pandas allows it (CSV ``nrows``), and
+        #   2. enough information to detect truncation without ``_read_table``
+        #      emitting its own (slightly off-by-one) warning that would
+        #      then double up with ours.
+        #
+        # For CSV we ask for ``sample_size + 1`` rows: ``pd.read_csv``
+        # reads at most that many, AND ``len(df) > sample_size`` becomes
+        # a strict proof of truncation. We pass ``suppress_truncation_warning``
+        # so ``_read_table`` doesn't emit its own message — ``_cmd_explain``
+        # is the single source of truth for the sampling notice and uses
+        # the user-facing ``sample_size`` value.
+        #
+        # For parquet/JSON pandas exposes no native row-limit, so we
+        # always read fully (``nrows=None``) and let ``_cmd_explain``
+        # both detect truncation and emit a single notice that includes
+        # the "memory bound is post-read" caveat. Asking ``_read_table``
+        # for a limit there would only have caused it to truncate at
+        # ``sample_size + 1`` and emit a confusing duplicate warning.
+        if sample_size is not None and in_fmt == "csv":
+            read_nrows: int | None = sample_size + 1
+        else:
+            read_nrows = None
+        df = _read_table(input_path, in_fmt, nrows=read_nrows, suppress_truncation_warning=True)
+        X, y = _split_xy(df, args.target)
+        n_sampled = len(X)
+        if sample_size is not None and n_sampled > sample_size:
+            # Strict proof of truncation: file had at least one more row
+            # than the requested cap. Trim to the exact cap and emit the
+            # "metadata may differ" notice.
+            X = X.iloc[:sample_size]
+            if y is not None:
+                y = y.iloc[:sample_size]
+            original_len = n_sampled
+            n_sampled = sample_size
+            msg = f"explain: capping input to {sample_size} rows (head slice). "
+            if in_fmt != "csv":
+                # For parquet/JSON we read the whole file before truncation
+                # (no native row-limit). Surface that fact so callers know
+                # memory wasn't bounded.
+                msg += (
+                    f"For {in_fmt}, pandas does not expose a native row-limit, "
+                    f"so the full file ({original_len}+ rows) was loaded into "
+                    "memory before truncation. For hard memory bounds on huge "
+                    "inputs, convert to CSV first. "
+                )
+            msg += (
+                "Some engines (e.g. TabularEngine categorical encoding) decide which "
+                "features to plan based on row counts and per-category statistics, "
+                "so the reported metadata may differ from a full-input transform run."
+            )
+            warnings.warn(msg, UserWarning, stacklevel=2)
+
+        engineer.fit_transform(
+            X,
+            y,
+            task_description=args.task_description or "prediction task",
+            target_name=args.target,
+            apply_selection=False,
+        )
+
+        # ``explain_features`` / ``get_feature_code`` consult engine
+        # internals that may legitimately emit pandas / pyarrow warnings
+        # (e.g. when stringifying expression trees touching deprecated
+        # APIs). Keep them inside the capture so any such warning ends
+        # up in the JSON payload's ``warnings`` field instead of bleeding
+        # to stderr — same "stderr reserved for failures" contract as
+        # the read / fit_transform phases above.
+        explanations = engineer.explain_features()
+        code = engineer.get_feature_code()
+        feature_names = engineer.get_feature_names()
+
+    payload = {
+        "status": "ok",
+        "input": str(input_path),
+        "n_features": len(feature_names),
+        "n_rows_used": n_sampled,
+        "engines": list(engineer.engines),
+        "features": [
+            {
+                "name": name,
+                "explanation": explanations.get(name, ""),
+                "code": code.get(name, ""),
+            }
+            for name in feature_names
+        ],
+        "warnings": captured_warnings,
+    }
+
+    # explain always emits JSON to stdout (it's the only sensible format),
+    # but we still respect ``--json`` for symmetry with other subcommands.
+    _emit(payload, as_json=True)
+    return 0
+
+
+class _StructuredArgumentParser(argparse.ArgumentParser):
+    """``argparse.ArgumentParser`` that emits the CLI's structured single-line
+    error format on usage failures.
+
+    The default ``argparse`` ``error()`` method writes the multi-line
+    usage banner ("usage: featcopilot ...\\n featcopilot: error: ...") to
+    stderr before raising :class:`SystemExit`. That breaks the CLI
+    contract that stderr carries exactly one ``featcopilot: error: ...``
+    line per failure — agents parsing stderr deterministically would see
+    the banner and the actual error mixed together, with no easy way to
+    tell which is which.
+
+    This subclass overrides :meth:`error` to write a single line and
+    skip the banner, so usage failures (missing required argument,
+    unknown flag, missing subcommand, etc.) follow the same single-line
+    contract as the rest of the CLI's exit-2 paths.
+    """
+
+    def error(self, message: str) -> None:  # type: ignore[override]
+        sys.stderr.write(f"featcopilot: error: {message}\n")
+        # ``ArgumentParser.error`` is documented to terminate; ``SystemExit(2)``
+        # is what the parent class would do after writing the banner.
+        # ``main()``'s ``except SystemExit`` handler converts this to an int
+        # return value so callers still see the documented exit-2 contract.
+        raise SystemExit(2)
+
+
+def _build_parser() -> argparse.ArgumentParser:
+    parser = _StructuredArgumentParser(
+        prog="featcopilot",
+        description=(
+            "FeatCopilot CLI — automated feature engineering from the command line. "
+            "Designed for scripting and agentic usage; pass --json to any subcommand "
+            "for machine-readable stdout."
+        ),
+    )
+    parser.add_argument(
+        "-V",
+        "--version",
+        action="version",
+        version=f"featcopilot {__version__}",
+    )
+    # Use the structured parser class for subparsers too so any
+    # subcommand-specific usage error (unknown flag, missing required
+    # arg) follows the same single-line stderr contract.
+    subparsers = parser.add_subparsers(
+        dest="command", required=True, metavar="COMMAND", parser_class=_StructuredArgumentParser
+    )
+
+    # ----- info ---------------------------------------------------------
+    p_info = subparsers.add_parser(
+        "info",
+        help="Print version and supported engines/methods.",
+        description="Print the installed FeatCopilot version and the supported engines, "
+        "selection methods, leakage guards, and I/O formats.",
+    )
+    p_info.add_argument("--json", action="store_true", help="Emit JSON to stdout.")
+    p_info.set_defaults(func=_cmd_info)
+
+    # ----- transform ----------------------------------------------------
+    p_transform = subparsers.add_parser(
+        "transform",
+        help="Run feature engineering on a tabular file.",
+        description="Read INPUT, run AutoFeatureEngineer, and write engineered features to OUTPUT.",
+    )
+    _add_io_args(p_transform)
+    _add_engineer_args(p_transform)
+    p_transform.add_argument(
+        "--no-selection",
+        action="store_true",
+        help="Disable feature selection (skip do-no-harm gate).",
+    )
+    p_transform.add_argument(
+        "--include-target",
+        action="store_true",
+        help="Include the target column in the output file.",
+    )
+    p_transform.add_argument("--json", action="store_true", help="Emit a JSON status line on stdout.")
+    p_transform.set_defaults(func=_cmd_transform)
+
+    # ----- explain ------------------------------------------------------
+    p_explain = subparsers.add_parser(
+        "explain",
+        help="Print JSON feature explanations and code for agent consumption.",
+        description="Fit AutoFeatureEngineer on INPUT and emit a JSON document "
+        "describing each generated feature (name, explanation, code). Selection is "
+        "intentionally disabled, so all candidate features are reported.",
+    )
+    p_explain.add_argument("--input", "-i", required=True, help="Path to input file (CSV / Parquet / JSON).")
+    p_explain.add_argument("--input-format", choices=SUPPORTED_INPUT_FORMATS, help="Override input format detection.")
+    p_explain.add_argument(
+        "--target",
+        "-t",
+        help="Target column name. Used by leakage-guard checks and as task context "
+        "for the LLM engine. (Selection is disabled in `explain`, so this flag "
+        "does not gate selector behavior.)",
+    )
+    p_explain.add_argument(
+        "--task-description",
+        help="Natural-language ML task description (used by the LLM engine).",
+    )
+    p_explain.add_argument(
+        "--explain-sample-size",
+        type=int,
+        default=None,
+        help="Cap the input fed to the engineer at this many rows. The cap is "
+        "applied as a deterministic head slice (the first N rows of the input — "
+        "for CSV via `pd.read_csv(nrows=N)` so memory is bounded; for parquet/JSON "
+        "the file is fully read and then truncated, with a warning). OFF by default: "
+        "the full input is used so the metadata is a faithful description of what a "
+        "corresponding `transform` would generate. Pass a positive integer ONLY when "
+        "you knowingly accept that (a) the analyzed rows are the first N (not a "
+        "random sample), and (b) some engines (e.g. TabularEngine categorical "
+        "encoding) decide which features to plan based on row counts and per-category "
+        "statistics, so the reported metadata may differ from a full-input run.",
+    )
+    _add_engineer_args(p_explain, include_selection_args=False)
+    p_explain.add_argument("--json", action="store_true", help="(Always JSON — flag accepted for symmetry.)")
+    p_explain.set_defaults(func=_cmd_explain)
+
+    return parser
+
+
+def _add_io_args(p: argparse.ArgumentParser) -> None:
+    p.add_argument("--input", "-i", required=True, help="Path to input file (CSV / Parquet / JSON).")
+    p.add_argument("--output", "-o", required=True, help="Path to output file (CSV / Parquet / JSON).")
+    p.add_argument("--input-format", choices=SUPPORTED_INPUT_FORMATS, help="Override input format detection.")
+    p.add_argument("--output-format", choices=SUPPORTED_OUTPUT_FORMATS, help="Override output format detection.")
+    p.add_argument(
+        "--target",
+        "-t",
+        help="Target column name. Required when feature selection is applied "
+        "(i.e. when --max-features / config max_features is set so the "
+        "selector actually fits). With no max_features, raw feature "
+        "generation runs without a target.",
+    )
+    p.add_argument(
+        "--task-description",
+        help="Natural-language ML task description (used by the LLM engine).",
+    )
+
+
+def _add_engineer_args(p: argparse.ArgumentParser, *, include_selection_args: bool = True) -> None:
+    """Add ``AutoFeatureEngineer``-related flags to a subparser.
+
+    ``include_selection_args=False`` omits selection-only flags
+    (``--selection-methods`` and ``--correlation-threshold``) — these are
+    silently ignored by the ``explain`` subcommand, which always runs with
+    selection disabled. ``--max-features`` is *not* selection-only:
+    ``AutoFeatureEngineer`` forwards it into engine construction (e.g. the
+    tabular engine uses it to cap the number of generated features), so it
+    is exposed even when ``include_selection_args=False`` to give callers
+    a CLI-level handle on the engine output size.
+    """
+    p.add_argument(
+        "--engines",
+        nargs="+",
+        choices=sorted(AutoFeatureEngineer.SUPPORTED_ENGINES),
+        help="Engines to use (default: tabular).",
+    )
+    # ``--max-features`` is exposed on every engineer-using subcommand
+    # because it caps engine output, not just selection — see the
+    # ``AutoFeatureEngineer`` constructor and ``TabularEngine``.
+    p.add_argument(
+        "--max-features",
+        type=int,
+        help="Maximum number of features to generate / keep (forwarded to engines and selector).",
+    )
+    if include_selection_args:
+        p.add_argument(
+            "--selection-methods",
+            nargs="+",
+            choices=sorted(AutoFeatureEngineer.SUPPORTED_SELECTION_METHODS),
+            help="Selection methods (default: mutual_info importance).",
+        )
+        p.add_argument(
+            "--correlation-threshold",
+            type=float,
+            help="Maximum pairwise correlation in redundancy elimination (default: 0.85).",
+        )
+    p.add_argument(
+        "--leakage-guard",
+        choices=sorted(AutoFeatureEngineer.SUPPORTED_LEAKAGE_GUARDS),
+        help="How to handle suspicious column names (default: warn).",
+    )
+    p.add_argument(
+        "--gate-n-jobs",
+        type=int,
+        help="Parallelism for the do-no-harm gate's RF (default: 1; -1 = all cores).",
+    )
+    p.add_argument(
+        "--config",
+        help="Path to a JSON config file. CLI flags take precedence over config keys. "
+        "Use this to pass nested keys such as ``llm_config``.",
+    )
+    # ``BooleanOptionalAction`` (Python 3.9+) provides both ``--verbose``
+    # and ``--no-verbose`` so a config-supplied ``"verbose": true`` can be
+    # explicitly turned off from the command line. ``default=None`` so the
+    # absence of either flag means "fall through to config / default".
+    p.add_argument(
+        "--verbose",
+        action=argparse.BooleanOptionalAction,
+        default=None,
+        help="Enable verbose logging (or --no-verbose to override config).",
+    )
+
+
+def main(argv: list[str] | None = None) -> int:
+    """CLI entry point.
+
+    Returns the process exit code; suitable for both the ``console_scripts``
+    entry point (``featcopilot``) and ``python -m featcopilot``. Argparse
+    usage errors (missing subcommand, unknown flag) and the cooperative
+    ``--help`` / ``--version`` actions all normally raise :class:`SystemExit`;
+    we trap those here and return their exit code so that programmatic
+    callers (and agent harnesses) get a consistent integer-returning API.
+    """
+    parser = _build_parser()
+
+    try:
+        args = parser.parse_args(argv)
+    except SystemExit as exc:
+        # argparse uses SystemExit(0) for ``--help`` / ``--version`` and
+        # SystemExit(2) for usage errors (also writing to stderr). We let the
+        # output through but convert the exit into a return value so
+        # ``main(argv) -> int`` is honored even on parse-time failures.
+        code = exc.code
+        if code is None:
+            return 0
+        if isinstance(code, int):
+            return code
+        # Non-int code (e.g. error string): print to stderr, return 2.
+        sys.stderr.write(f"{code}\n")
+        return 2
+
+    try:
+        return args.func(args)
+    except (FileNotFoundError, ValueError) as exc:
+        # User-facing input/config errors: print a clean message to stderr
+        # without a traceback so agents can parse the failure.
+        sys.stderr.write(f"featcopilot: error: {exc}\n")
+        return 2
+    except KeyboardInterrupt:
+        sys.stderr.write("featcopilot: interrupted\n")
+        return 130
+    except Exception as exc:  # pragma: no cover - defensive backstop
+        # Single deterministic stderr line so agents can parse the failure.
+        # We deliberately do NOT call ``logger.exception(...)`` here:
+        # FeatCopilot loggers write to stderr, which would append a second
+        # timestamped traceback after our structured line and break the
+        # CLI's "stderr is exactly one error message" contract. Internal
+        # failure introspection is the caller's job (e.g. set
+        # ``PYTHONFAULTHANDLER=1`` or attach a debugger).
+        sys.stderr.write(f"featcopilot: unexpected error: {type(exc).__name__}: {exc}\n")
+        return 1
+
+
+if __name__ == "__main__":  # pragma: no cover
+    raise SystemExit(main())
diff --git a/pyproject.toml b/pyproject.toml
index f904b9c..583f2eb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -86,6 +86,9 @@ Homepage = "https://github.com/thinkall/featcopilot"
 Documentation = "https://github.com/thinkall/featcopilot#readme"
 Repository = "https://github.com/thinkall/featcopilot"
 
+[project.scripts]
+featcopilot = "featcopilot.cli:main"
+
 [tool.setuptools.packages.find]
 where = ["."]
 include = ["featcopilot*"]
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 0000000..2f8c1f3
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,3427 @@
+"""Tests for the featcopilot CLI."""
+
+from __future__ import annotations
+
+import argparse
+import io
+import json
+import logging
+import sys
+import threading
+import warnings
+from contextlib import redirect_stderr, redirect_stdout
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from featcopilot import __version__
+from featcopilot import cli as fc_cli
+
+
+def _run(argv: list[str]) -> tuple[int, str, str]:
+    """Invoke ``cli.main(argv)`` and capture exit code, stdout, stderr.
+
+    The featcopilot logger installs a ``StreamHandler(sys.stderr)`` at
+    import time, which holds a reference to the *original* ``sys.stderr``
+    object. ``redirect_stderr`` only swaps the ``sys.stderr`` module
+    attribute, so without also redirecting the handler's ``stream`` any
+    log output the suppression filter doesn't catch would still go to
+    the real terminal — leaving every ``err == ""`` assertion in this
+    file vacuously satisfied even in the presence of a leak. This helper
+    therefore both redirects ``sys.stderr`` AND temporarily re-points
+    every ``StreamHandler`` on the ``featcopilot`` root logger at the
+    same ``err`` buffer for the duration of the call, so the captured
+    ``err`` value reflects what would actually have been written to the
+    user's terminal.
+    """
+    out, err = io.StringIO(), io.StringIO()
+    fc_logger = logging.getLogger("featcopilot")
+    saved_streams: list[tuple[logging.StreamHandler, object]] = []
+    for handler in list(fc_logger.handlers):
+        if isinstance(handler, logging.StreamHandler):
+            saved_streams.append((handler, handler.stream))
+            handler.stream = err
+    try:
+        with redirect_stdout(out), redirect_stderr(err):
+            rc = fc_cli.main(argv)
+    finally:
+        for handler, original_stream in saved_streams:
+            handler.stream = original_stream
+    return rc, out.getvalue(), err.getvalue()
+
+
+@pytest.fixture
+def tabular_csv(tmp_path: Path) -> Path:
+    """A small classification dataset written to CSV."""
+    rng = np.random.default_rng(42)
+    n = 200
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=n),
+            "x2": rng.normal(size=n),
+            "x3": rng.integers(0, 5, size=n),
+            "y": rng.integers(0, 2, size=n),
+        }
+    )
+    path = tmp_path / "in.csv"
+    df.to_csv(path, index=False)
+    return path
+
+
+# --------------------------------------------------------------------- info
+
+
+def test_info_json_emits_supported_options():
+    rc, out, err = _run(["info", "--json"])
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["version"] == __version__
+    assert "tabular" in payload["supported_engines"]
+    assert "mutual_info" in payload["supported_selection_methods"]
+    assert "warn" in payload["supported_leakage_guards"]
+    # CSV/JSON are always supported; parquet is gated on engine availability.
+    assert {"csv", "json"} <= set(payload["supported_input_formats"])
+    assert {"csv", "json"} <= set(payload["supported_output_formats"])
+    assert isinstance(payload["parquet_available"], bool)
+    if payload["parquet_available"]:
+        assert "parquet" in payload["supported_input_formats"]
+        assert "parquet" in payload["supported_output_formats"]
+    else:
+        assert "parquet" not in payload["supported_input_formats"]
+        assert "parquet" not in payload["supported_output_formats"]
+
+
+def test_info_excludes_parquet_when_engine_missing(monkeypatch):
+    """When no parquet engine can be imported, ``info`` must not advertise it."""
+    monkeypatch.setattr(fc_cli, "_parquet_engine_available", lambda: False)
+    rc, out, _ = _run(["info", "--json"])
+    assert rc == 0
+    payload = json.loads(out)
+    assert payload["parquet_available"] is False
+    assert "parquet" not in payload["supported_input_formats"]
+    assert "parquet" not in payload["supported_output_formats"]
+
+
+def test_info_includes_parquet_when_engine_present(monkeypatch):
+    monkeypatch.setattr(fc_cli, "_parquet_engine_available", lambda: True)
+    rc, out, _ = _run(["info", "--json"])
+    assert rc == 0
+    payload = json.loads(out)
+    assert payload["parquet_available"] is True
+    assert "parquet" in payload["supported_input_formats"]
+    assert "parquet" in payload["supported_output_formats"]
+
+
+def test_info_text_mode_is_human_readable():
+    rc, out, _ = _run(["info"])
+    assert rc == 0
+    # Not JSON: parsing should fail.
+    with pytest.raises(json.JSONDecodeError):
+        json.loads(out)
+    assert "version" in out
+    assert __version__ in out
+
+
+def test_top_level_version_flag(capsys):
+    # ``--version`` (argparse action) prints to stdout; main() now traps the
+    # SystemExit and returns the code so the API contract is consistent.
+    rc = fc_cli.main(["--version"])
+    assert rc == 0
+    assert __version__ in capsys.readouterr().out
+
+
+# ----------------------------------------------------------------- transform
+
+
+def test_transform_csv_to_csv(tmp_path: Path, tabular_csv: Path):
+    out_path = tmp_path / "out.csv"
+    rc, out, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+            "--max-features",
+            "10",
+            "--json",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["status"] == "ok"
+    assert payload["target"] == "y"
+    assert payload["engines"] == ["tabular"]
+    assert payload["selection_applied"] is True
+    assert payload["n_input_columns"] == 3  # x1, x2, x3 (y is the target)
+
+    # The output file exists and is readable as CSV.
+    assert out_path.exists()
+    written = pd.read_csv(out_path)
+    assert written.shape[0] == 200
+    assert "y" not in written.columns  # target excluded by default
+
+
+def test_transform_include_target_round_trip(tmp_path: Path, tabular_csv: Path):
+    out_path = tmp_path / "out.csv"
+    rc, _, err = _run(
+        [
+            "transform",
+            "-i",
+            str(tabular_csv),
+            "-o",
+            str(out_path),
+            "-t",
+            "y",
+            "--max-features",
+            "10",
+            "--include-target",
+        ]
+    )
+    assert rc == 0, err
+    written = pd.read_csv(out_path)
+    assert "y" in written.columns
+
+
+def test_transform_parquet_round_trip(tmp_path: Path):
+    pytest.importorskip("pyarrow")
+    rng = np.random.default_rng(0)
+    df = pd.DataFrame({"a": rng.normal(size=120), "b": rng.normal(size=120), "y": rng.integers(0, 2, size=120)})
+    in_path = tmp_path / "in.parquet"
+    out_path = tmp_path / "out.parquet"
+    df.to_parquet(in_path, index=False)
+
+    rc, out, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+            "--max-features",
+            "8",
+            "--json",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["input_format"] == "parquet"
+    assert payload["output_format"] == "parquet"
+    pd.read_parquet(out_path)  # readable
+
+
+def test_transform_json_round_trip(tmp_path: Path):
+    rng = np.random.default_rng(0)
+    df = pd.DataFrame({"a": rng.normal(size=80), "b": rng.normal(size=80), "y": rng.integers(0, 2, size=80)})
+    in_path = tmp_path / "in.json"
+    out_path = tmp_path / "out.json"
+    df.to_json(in_path, orient="records")
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 0, err
+    written = pd.read_json(out_path, orient="records")
+    assert written.shape[0] == 80
+
+
+def test_transform_no_selection_skips_selector(tmp_path: Path, tabular_csv: Path):
+    out_path = tmp_path / "out.csv"
+    rc, out, err = _run(
+        [
+            "transform",
+            "-i",
+            str(tabular_csv),
+            "-o",
+            str(out_path),
+            "-t",
+            "y",
+            "--no-selection",
+            "--max-features",
+            "5",
+            "--json",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["selection_applied"] is False
+
+
+def test_transform_config_file_supplies_engineer_kwargs(tmp_path: Path, tabular_csv: Path):
+    config_path = tmp_path / "cfg.json"
+    config_path.write_text(
+        json.dumps(
+            {
+                "engines": ["tabular"],
+                "selection_methods": ["mutual_info"],
+                "max_features": 7,
+                "correlation_threshold": 0.9,
+                "leakage_guard": "off",
+            }
+        )
+    )
+    out_path = tmp_path / "out.csv"
+    rc, out, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+            "--config",
+            str(config_path),
+            "--json",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["selection_methods"] == ["mutual_info"]
+    assert payload["max_features"] == 7
+
+
+def test_transform_cli_flags_override_config(tmp_path: Path, tabular_csv: Path):
+    config_path = tmp_path / "cfg.json"
+    config_path.write_text(json.dumps({"max_features": 5, "engines": ["tabular"]}))
+    out_path = tmp_path / "out.csv"
+    rc, out, _ = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+            "--config",
+            str(config_path),
+            "--max-features",
+            "12",
+            "--json",
+        ]
+    )
+    assert rc == 0
+    assert json.loads(out)["max_features"] == 12
+
+
+# ----------------------- _build_engineer config validation
+
+
+def test_string_engines_in_config_returns_clean_exit_2(tmp_path: Path, tabular_csv: Path):
+    """A misconfigured ``"engines": "tabular"`` (string instead of list) must
+    surface ``AutoFeatureEngineer``'s precise type-validation error via the
+    standard exit-2 path — *not* be silently coerced into a per-character list.
+    """
+    config_path = tmp_path / "cfg.json"
+    config_path.write_text(json.dumps({"engines": "tabular"}))
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+            "--config",
+            str(config_path),
+        ]
+    )
+    assert rc == 2
+    assert "engines must be a list or tuple" in err
+
+
+def test_empty_engines_list_in_config_returns_clean_exit_2(tmp_path: Path, tabular_csv: Path):
+    """An explicit empty ``engines`` list in the config must propagate to the
+    transformer's validation so the user sees the documented error, instead
+    of being silently rewritten into the defaults.
+    """
+    config_path = tmp_path / "cfg.json"
+    config_path.write_text(json.dumps({"engines": []}))
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+            "--config",
+            str(config_path),
+        ]
+    )
+    assert rc == 2
+    assert "at least one engine" in err.lower() or "empty sequence" in err.lower()
+
+
+def test_empty_selection_methods_list_in_config_returns_clean_exit_2(tmp_path: Path, tabular_csv: Path):
+    config_path = tmp_path / "cfg.json"
+    config_path.write_text(json.dumps({"selection_methods": []}))
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+            "--config",
+            str(config_path),
+        ]
+    )
+    assert rc == 2
+    assert "at least one method" in err.lower() or "empty sequence" in err.lower()
+
+
+# ----------------------- scalar config-type validation
+
+
+@pytest.mark.parametrize(
+    "key,value,fragment",
+    [
+        ("max_features", "10", "max_features"),
+        ("max_features", True, "max_features"),  # bool rejected for numeric field
+        ("correlation_threshold", "0.9", "correlation_threshold"),
+        ("correlation_threshold", True, "correlation_threshold"),
+        ("gate_n_jobs", "2", "gate_n_jobs"),
+        ("leakage_guard", 42, "leakage_guard"),
+    ],
+)
+def test_scalar_type_mismatch_in_config_returns_exit_2(tmp_path: Path, tabular_csv: Path, key, value, fragment):
+    """A malformed JSON config (string in a numeric field, etc.) must hit the
+    deterministic exit-2 user-error path with a precise message — not bubble
+    up as a downstream ``TypeError`` (exit 1).
+    """
+    config_path = tmp_path / "cfg.json"
+    config_path.write_text(json.dumps({key: value}))
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+            "--config",
+            str(config_path),
+        ]
+    )
+    assert rc == 2
+    assert fragment in err
+
+
+@pytest.mark.parametrize("threshold", [-0.1, 1.1, 5.0, -1.0])
+def test_correlation_threshold_out_of_range_returns_exit_2(tmp_path: Path, tabular_csv: Path, threshold):
+    """``correlation_threshold`` is only meaningful in [0.0, 1.0]. Out-of-range
+    values silently change selector behavior (>1 disables redundancy elim,
+    <0 treats every numeric pair as redundant), so the CLI rejects them up
+    front with a precise exit-2 error.
+    """
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+            "--correlation-threshold",
+            str(threshold),
+            "--max-features",
+            "5",
+        ]
+    )
+    assert rc == 2
+    assert "correlation_threshold" in err
+    assert "[0.0, 1.0]" in err or "0.0" in err
+
+
+def test_correlation_threshold_in_config_out_of_range_returns_exit_2(tmp_path: Path, tabular_csv: Path):
+    """The same range check applies when ``correlation_threshold`` arrives
+    from ``--config`` rather than the CLI flag.
+    """
+    cfg = tmp_path / "cfg.json"
+    cfg.write_text(json.dumps({"correlation_threshold": 2.5}))
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+            "--max-features",
+            "5",
+            "--config",
+            str(cfg),
+        ]
+    )
+    assert rc == 2
+    assert "correlation_threshold" in err
+
+
+def test_correlation_threshold_boundary_values_accepted(tmp_path: Path, tabular_csv: Path):
+    """The boundaries (0.0 and 1.0) must be accepted — they're the inclusive
+    valid range. Default 0.85 is also exercised throughout the suite.
+    """
+    out_path = tmp_path / "out.csv"
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+            "--correlation-threshold",
+            "0.0",
+            "--max-features",
+            "5",
+        ]
+    )
+    assert rc == 0, err
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+            "--correlation-threshold",
+            "1.0",
+            "--max-features",
+            "5",
+        ]
+    )
+    assert rc == 0, err
+
+
+# ----------------------- --verbose / --no-verbose
+
+
+def test_no_verbose_overrides_config_verbose_true(tmp_path: Path, tabular_csv: Path):
+    """``--no-verbose`` (BooleanOptionalAction) must override a config-level
+    ``"verbose": true`` to false — the documented precedence rule.
+    """
+    config_path = tmp_path / "cfg.json"
+    config_path.write_text(json.dumps({"verbose": True}))
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+            "--config",
+            str(config_path),
+            "--no-verbose",
+            "--max-features",
+            "5",
+            "--json",
+        ]
+    )
+    assert rc == 0, err
+
+
+def test_verbose_overrides_config_verbose_false(tmp_path: Path, tabular_csv: Path):
+    config_path = tmp_path / "cfg.json"
+    config_path.write_text(json.dumps({"verbose": False}))
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+            "--config",
+            str(config_path),
+            "--verbose",
+            "--max-features",
+            "5",
+        ]
+    )
+    assert rc == 0, err
+
+
+@pytest.mark.parametrize(
+    "value",
+    ["true", "false", 1, 0],
+)
+def test_non_bool_verbose_in_config_returns_exit_2(tmp_path: Path, tabular_csv: Path, value):
+    """A malformed ``"verbose": <non-bool>`` config must hit exit 2 with a
+    precise message, not silently turn verbose mode on/off via Python's
+    truthiness rules.
+    """
+    config_path = tmp_path / "cfg.json"
+    config_path.write_text(json.dumps({"verbose": value}))
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+            "--config",
+            str(config_path),
+            "--max-features",
+            "5",
+        ]
+    )
+    assert rc == 2
+    assert "verbose" in err
+
+
+def test_transform_missing_target_with_selection_returns_exit_2(tmp_path: Path, tabular_csv: Path):
+    """Without ``--target``, selection silently degrades to a no-op. The CLI
+    must surface that as a clean exit-2 user error so automation can react.
+    """
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--max-features",
+            "5",
+        ]
+    )
+    assert rc == 2
+    assert "--target" in err
+    assert "selection" in err.lower()
+
+
+def test_transform_missing_target_with_no_selection_succeeds(tmp_path: Path, tabular_csv: Path):
+    """Once selection is opted out, the missing target is no longer an error
+    (selection requires a target; raw transform doesn't).
+    """
+    # Drop the target column so we can run without --target.
+    in_path = tmp_path / "in_notarget.csv"
+    pd.read_csv(tabular_csv).drop(columns=["y"]).to_csv(in_path, index=False)
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--no-selection",
+        ]
+    )
+    assert rc == 0, err
+
+
+def test_transform_missing_target_no_max_features_succeeds(tmp_path: Path, tabular_csv: Path):
+    """Without ``--max-features`` (and the corresponding config key),
+    ``AutoFeatureEngineer`` doesn't actually fit a selector even with
+    ``apply_selection=True``, so requiring ``--target`` would be a false
+    positive. Raw feature generation without target / without cap must
+    therefore succeed.
+    """
+    in_path = tmp_path / "in_notarget.csv"
+    pd.read_csv(tabular_csv).drop(columns=["y"]).to_csv(in_path, index=False)
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(tmp_path / "out.csv"),
+        ]
+    )
+    assert rc == 0, err
+
+
+def test_transform_missing_target_max_features_in_config_returns_exit_2(tmp_path: Path, tabular_csv: Path):
+    """The ``--target`` requirement also fires when ``max_features`` comes
+    from ``--config`` (not just the CLI flag), since the selector will
+    actually run in that case.
+    """
+    config_path = tmp_path / "cfg.json"
+    config_path.write_text(json.dumps({"max_features": 5}))
+    in_path = tmp_path / "in_notarget.csv"
+    pd.read_csv(tabular_csv).drop(columns=["y"]).to_csv(in_path, index=False)
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--config",
+            str(config_path),
+        ]
+    )
+    assert rc == 2
+    assert "--target" in err
+
+
+def test_explain_ignores_selection_only_config_keys(tmp_path: Path, tabular_csv: Path):
+    """A shared transform/explain config with selection-only keys
+    (``selection_methods`` / ``correlation_threshold``) must not break
+    ``explain``: those keys are inert at runtime (selection is disabled
+    in ``explain``) and ``_build_engineer(include_selection_config=False)``
+    skips reading them so config-validation does not fire.
+    """
+    config_path = tmp_path / "cfg.json"
+    # Use *valid* selection_methods values; the point is they''re ignored.
+    config_path.write_text(
+        json.dumps(
+            {
+                "engines": ["tabular"],
+                "selection_methods": ["mutual_info"],
+                "correlation_threshold": 0.5,
+                "max_features": 5,
+            }
+        )
+    )
+    rc, out, err = _run(
+        [
+            "explain",
+            "--input",
+            str(tabular_csv),
+            "--target",
+            "y",
+            "--config",
+            str(config_path),
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["status"] == "ok"
+
+
+# ----------------------- explain subparser doesn't expose selection-only flags
+
+
+def test_explain_rejects_selection_methods_flag(tmp_path: Path, tabular_csv: Path):
+    """``explain`` always disables selection, so accepting ``--selection-methods``
+    on the CLI would silently mis-configure the user. The subparser must not
+    advertise it.
+    """
+    rc, _, err = _run(
+        [
+            "explain",
+            "--input",
+            str(tabular_csv),
+            "--target",
+            "y",
+            "--selection-methods",
+            "mutual_info",
+        ]
+    )
+    assert rc == 2
+    assert "unrecognized" in err.lower() or "--selection-methods" in err.lower()
+
+
+def test_explain_accepts_max_features_flag(tmp_path: Path, tabular_csv: Path):
+    """``--max-features`` is *not* selection-only — ``AutoFeatureEngineer``
+    forwards it into engine construction (e.g. the tabular engine uses it
+    to cap how many features it generates). ``explain`` must therefore
+    expose it so callers can bound the size of the explanation payload.
+    """
+    rc, out, err = _run(
+        [
+            "explain",
+            "--input",
+            str(tabular_csv),
+            "--target",
+            "y",
+            "--max-features",
+            "5",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["status"] == "ok"
+
+
+def test_explain_rejects_correlation_threshold_flag(tmp_path: Path, tabular_csv: Path):
+    rc, _, err = _run(
+        [
+            "explain",
+            "--input",
+            str(tabular_csv),
+            "--target",
+            "y",
+            "--correlation-threshold",
+            "0.9",
+        ]
+    )
+    assert rc == 2
+
+
+def test_explain_target_help_no_longer_says_required_for_selection():
+    """The ``--target`` help on ``explain`` must not claim it gates selection
+    (selection is intentionally disabled in ``explain``).
+    """
+    parser = fc_cli._build_parser()
+    # argparse stores subparsers under a special action attribute
+    explain_parser = next(
+        action.choices["explain"] for action in parser._actions if isinstance(action, argparse._SubParsersAction)
+    )
+    target_help = next(a.help for a in explain_parser._actions if "--target" in a.option_strings)
+    assert "required for selection" not in target_help
+    assert "leakage" in target_help.lower() or "task context" in target_help.lower()
+
+
+# ----------------------- I/O OSError normalization
+
+
+def test_input_directory_returns_exit_2(tmp_path: Path):
+    """Pointing ``--input`` at a directory must surface as exit 2."""
+    in_dir = tmp_path / "i_am_a_dir.csv"
+    in_dir.mkdir()
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_dir),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "directory" in err.lower()
+
+
+def test_output_directory_returns_exit_2(tmp_path: Path, tabular_csv: Path):
+    """Pointing ``--output`` at an existing directory must surface as exit 2."""
+    out_dir = tmp_path / "i_am_a_dir.csv"
+    out_dir.mkdir()
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(out_dir),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "directory" in err.lower()
+
+
+def test_unwritable_output_returns_exit_2(tmp_path: Path, tabular_csv: Path, monkeypatch):
+    """An ``OSError`` on write (e.g. permission denied) must surface as exit 2."""
+    import pandas as pd
+
+    def _raise_oserror(self, *args, **kwargs):
+        raise PermissionError("simulated write failure")
+
+    monkeypatch.setattr(pd.DataFrame, "to_csv", _raise_oserror, raising=True)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "failed to write" in err.lower()
+
+
+def test_unreadable_input_csv_returns_exit_2(tmp_path: Path, tabular_csv: Path, monkeypatch):
+    """An ``OSError`` while reading the input must surface as exit 2."""
+    import pandas as pd
+
+    def _raise_oserror(*args, **kwargs):
+        raise PermissionError("simulated read failure")
+
+    monkeypatch.setattr(pd, "read_csv", _raise_oserror, raising=True)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "failed to read" in err.lower()
+
+
+def test_empty_csv_input_returns_exit_2(tmp_path: Path):
+    """A zero-byte / headerless CSV triggers ``pandas.errors.EmptyDataError``,
+    which must be normalized to the documented exit-2 user-input error path
+    rather than falling through to the generic exit-1 backstop.
+    """
+    in_path = tmp_path / "empty.csv"
+    in_path.write_text("")  # zero bytes -> EmptyDataError on read
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "failed to read csv" in err.lower()
+
+
+def test_headerless_csv_input_returns_exit_2(tmp_path: Path):
+    """A CSV with no header and no rows is also empty-data territory and
+    must surface as exit 2.
+    """
+    in_path = tmp_path / "headerless.csv"
+    in_path.write_text("\n\n\n")  # only newlines, no header
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "failed to read csv" in err.lower()
+
+
+def test_header_only_csv_input_returns_exit_2(tmp_path: Path):
+    """A CSV that has a header line but ZERO data rows is read by pandas
+    as an *empty* DataFrame (no exception). Without the explicit empty
+    check, the CLI would feed it into ``TabularEngine`` which divides by
+    ``len(X)`` and exits via the generic exit-1 backstop. The CLI must
+    surface this as a clean exit-2 user-input error.
+    """
+    in_path = tmp_path / "header_only.csv"
+    in_path.write_text("x1,x2,y\n")  # header but no data
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "empty" in err.lower()
+    assert "zero data rows" in err.lower()
+
+
+def test_empty_json_input_returns_exit_2(tmp_path: Path):
+    """An empty JSON array is parsed as an empty DataFrame and must be
+    rejected up front like header-only CSV.
+    """
+    in_path = tmp_path / "empty.json"
+    in_path.write_text("[]")
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "empty" in err.lower()
+
+
+def test_empty_parquet_input_returns_exit_2(tmp_path: Path):
+    """A parquet file with schema but zero rows is rejected up front."""
+    pytest.importorskip("pyarrow")
+    in_path = tmp_path / "empty.parquet"
+    pd.DataFrame({"x1": [], "x2": [], "y": []}).to_parquet(in_path, index=False)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "empty" in err.lower()
+
+
+def test_explain_header_only_csv_returns_exit_2(tmp_path: Path):
+    """The empty-input check is applied to ``explain`` too."""
+    in_path = tmp_path / "header_only.csv"
+    in_path.write_text("x1,x2,y\n")
+
+    rc, _, err = _run(
+        [
+            "explain",
+            "--input",
+            str(in_path),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "empty" in err.lower()
+
+
+def test_transform_include_target_collision_returns_exit_2(tmp_path: Path):
+    """``--include-target`` would silently overwrite an engineered feature
+    if it happens to share the target column's name. The CLI must detect
+    that collision and fail with exit 2 instead of losing the engineered
+    feature.
+
+    A target named ``x1_pow2`` (which the tabular engine generates as a
+    derived feature from a numeric column ``x1``) provokes the collision.
+    """
+    rng = np.random.default_rng(0)
+    n = 200
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=n),
+            "x2": rng.normal(size=n),
+            # Target column has a name that the tabular engine would also
+            # generate (``x1_pow2`` etc. is in the tabular engine's
+            # derived feature catalog).
+            "x1_pow2": rng.integers(0, 2, size=n),
+        }
+    )
+    in_path = tmp_path / "collision.csv"
+    df.to_csv(in_path, index=False)
+    out_path = tmp_path / "out.csv"
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(out_path),
+            "--target",
+            "x1_pow2",
+            "--include-target",
+            "--max-features",
+            "5",
+        ]
+    )
+    # Either the engineered set actually contains the colliding name (in
+    # which case we MUST exit 2), or selection happened to drop it. Skip
+    # if the engine didn't materialize the colliding feature this run —
+    # the test is about the contract, not whether ``x1_pow2`` is always
+    # generated.
+    if rc == 2:
+        assert "include-target would overwrite" in err.lower()
+        assert "x1_pow2" in err
+    else:
+        # No collision actually occurred; the test is a no-op for this
+        # input. Future engine changes that always emit ``x1_pow2`` will
+        # expose the collision branch.
+        assert rc == 0, err
+
+
+def test_transform_include_target_collision_deterministic(tmp_path: Path, tabular_csv: Path, monkeypatch):
+    """Deterministic version of the collision test: monkey-patch the
+    engineer so its transformed frame contains a column with the target's
+    name. This guarantees we exercise the exit-2 collision branch
+    regardless of which features the real engineer picks.
+    """
+    from featcopilot.transformers.sklearn_compat import AutoFeatureEngineer
+
+    real_fit_transform = AutoFeatureEngineer.fit_transform
+
+    def _patched_fit_transform(self, X, y=None, **kwargs):
+        result = real_fit_transform(self, X, y, **kwargs)
+        # Inject a column named ``y`` into the result so it collides with
+        # the target column the test will pass.
+        result = result.copy()
+        result["y"] = result.iloc[:, 0]  # arbitrary engineered values
+        return result
+
+    monkeypatch.setattr(AutoFeatureEngineer, "fit_transform", _patched_fit_transform)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+            "--include-target",
+            "--max-features",
+            "5",
+        ]
+    )
+    assert rc == 2
+    assert "include-target would overwrite" in err.lower()
+    assert "'y'" in err
+
+
+def test_unreadable_input_json_returns_exit_2(tmp_path: Path, tabular_csv: Path, monkeypatch):
+    """``OSError`` from ``pd.read_json`` is surfaced as exit 2 too."""
+    import pandas as pd
+
+    in_path = tmp_path / "in.json"
+    in_path.write_text("[]")  # contents irrelevant; we'll intercept
+
+    def _raise_oserror(*args, **kwargs):
+        raise PermissionError("simulated read failure")
+
+    monkeypatch.setattr(pd, "read_json", _raise_oserror, raising=True)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "failed to read json" in err.lower()
+
+
+def test_unreadable_input_parquet_returns_exit_2(tmp_path: Path, monkeypatch):
+    """``OSError`` from ``pd.read_parquet`` (e.g. corrupt file) is exit 2."""
+    import pandas as pd
+
+    in_path = tmp_path / "in.parquet"
+    in_path.write_bytes(b"")
+
+    def _raise_oserror(*args, **kwargs):
+        raise OSError("simulated parquet read failure")
+
+    monkeypatch.setattr(pd, "read_parquet", _raise_oserror, raising=True)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "failed to read parquet" in err.lower()
+
+
+def test_unwritable_output_json_returns_exit_2(tmp_path: Path, tabular_csv: Path, monkeypatch):
+    import pandas as pd
+
+    def _raise_oserror(self, *args, **kwargs):
+        raise PermissionError("simulated json write failure")
+
+    monkeypatch.setattr(pd.DataFrame, "to_json", _raise_oserror, raising=True)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.json"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "failed to write json" in err.lower()
+
+
+def test_unwritable_output_parquet_returns_exit_2(tmp_path: Path, tabular_csv: Path, monkeypatch):
+    """``OSError`` (vs ``ImportError``) from ``DataFrame.to_parquet`` -> exit 2."""
+    import pandas as pd
+
+    def _raise_oserror(self, *args, **kwargs):
+        raise OSError("simulated parquet write failure")
+
+    monkeypatch.setattr(pd.DataFrame, "to_parquet", _raise_oserror, raising=True)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.parquet"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "failed to write parquet" in err.lower()
+
+
+def test_parquet_read_engine_error_returns_exit_2(tmp_path: Path, monkeypatch):
+    """A non-OSError parquet *backend* error (e.g. ``pyarrow.lib.ArrowInvalid``
+    for a corrupt file) must surface as exit 2, not the generic exit 1
+    "unexpected error" backstop. The CLI catches ``Exception`` for parquet
+    operations because they are fully delegated to a third-party backend
+    whose failures are by definition user-facing data issues.
+    """
+    import pandas as pd
+
+    in_path = tmp_path / "fake.parquet"
+    in_path.write_bytes(b"\x00\x01\x02\x03")  # not a real parquet file
+
+    class _FakeArrowInvalid(Exception):
+        """Stand-in for ``pyarrow.lib.ArrowInvalid`` (also subclasses Exception)."""
+
+    def _raise_backend_error(*args, **kwargs):
+        raise _FakeArrowInvalid("simulated corrupt parquet")
+
+    monkeypatch.setattr(pd, "read_parquet", _raise_backend_error, raising=True)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "failed to read parquet" in err.lower()
+
+
+def test_parquet_write_engine_error_returns_exit_2(tmp_path: Path, tabular_csv: Path, monkeypatch):
+    """Same coverage on the write side: a backend-level pyarrow exception
+    that is *not* an ``OSError`` (e.g. an unsupported column-type
+    conversion error) must produce exit 2, not exit 1.
+    """
+    import pandas as pd
+
+    class _FakeArrowTypeError(Exception):
+        pass
+
+    def _raise_backend_error(self, *args, **kwargs):
+        raise _FakeArrowTypeError("simulated unsupported column dtype for parquet")
+
+    monkeypatch.setattr(pd.DataFrame, "to_parquet", _raise_backend_error, raising=True)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.parquet"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "failed to write parquet" in err.lower()
+
+
+def test_uncreatable_parent_directory_returns_exit_2(tmp_path: Path, tabular_csv: Path, monkeypatch):
+    """If creating the output's parent directory fails, exit 2 with a clean message."""
+    real_mkdir = Path.mkdir
+
+    def _raise_oserror(self, *args, **kwargs):
+        # Only fail for our test's would-be output parent so other calls (e.g.
+        # tmp_path operations under the hood) still work.
+        if "deep" in self.parts:
+            raise PermissionError("simulated mkdir failure")
+        return real_mkdir(self, *args, **kwargs)
+
+    monkeypatch.setattr(Path, "mkdir", _raise_oserror, raising=True)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "deep" / "nested" / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "create parent directory" in err.lower()
+
+
+# ----------------------- stderr is reserved for failures (warnings captured)
+
+
+def test_transform_leakage_warning_does_not_pollute_stderr(tmp_path: Path):
+    """``leakage_guard='warn'`` (the default) must not bleed
+    ``warnings.warn(...)`` onto stderr on a successful run; the warnings
+    are captured and surfaced inside the JSON payload's ``warnings`` field
+    instead, so agents can keep treating non-empty stderr as failure metadata.
+    """
+    rng = np.random.default_rng(0)
+    n = 200
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=n),
+            "x2": rng.normal(size=n),
+            # ``label_encoded`` is detected as leakage-prone ("label" + "encoded"
+            # both appear in the stoplist).
+            "label_encoded": rng.integers(0, 2, size=n),
+            "y": rng.integers(0, 2, size=n),
+        }
+    )
+    in_path = tmp_path / "in_with_leakage.csv"
+    df.to_csv(in_path, index=False)
+    out_path = tmp_path / "out.csv"
+
+    rc, out, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+            "--max-features",
+            "5",
+            "--json",
+        ]
+    )
+    assert rc == 0, err
+    assert err == "", f"stderr should be empty on success but got: {err!r}"
+    payload = json.loads(out)
+    assert payload["status"] == "ok"
+    # ``warnings`` field is always present; it MAY contain the leakage
+    # warning depending on the heuristic. The contract being tested is
+    # that stderr stays clean — not that any specific warning was emitted
+    # (the leakage detector heuristics evolve).
+    assert "warnings" in payload
+    assert isinstance(payload["warnings"], list)
+
+
+def test_explain_leakage_warning_does_not_pollute_stderr(tmp_path: Path):
+    """``explain`` has the same stderr-cleanliness contract as ``transform``."""
+    rng = np.random.default_rng(0)
+    n = 200
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=n),
+            "label_encoded": rng.integers(0, 2, size=n),
+            "y": rng.integers(0, 2, size=n),
+        }
+    )
+    in_path = tmp_path / "in.csv"
+    df.to_csv(in_path, index=False)
+
+    rc, out, err = _run(
+        [
+            "explain",
+            "--input",
+            str(in_path),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 0, err
+    assert err == "", f"stderr should be empty on success but got: {err!r}"
+    payload = json.loads(out)
+    assert payload["status"] == "ok"
+    # The ``warnings`` field is always present and is a list. Whether or
+    # not the leakage heuristic fires is not guaranteed (it evolves); the
+    # contract under test is that stderr stays clean.
+    assert isinstance(payload["warnings"], list)
+
+
+def test_transform_logger_warning_does_not_pollute_stderr(tmp_path: Path, tabular_csv: Path):
+    """The CLI captures ``logger.warning(...)`` records (in addition to
+    ``warnings.warn``), so any successful run that exercises a code path
+    emitting a logger message — for example the do-no-harm gate's
+    fallback — keeps stderr empty. The captured records appear in the
+    JSON payload's ``warnings`` field.
+    """
+    out_path = tmp_path / "out.csv"
+    rc, out, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+            "--max-features",
+            "5",
+            "--verbose",  # exercises ``logger.info(...)`` paths in engines
+            "--json",
+        ]
+    )
+    assert rc == 0, err
+    assert err == "", f"stderr should be empty on success but got: {err!r}"
+    payload = json.loads(out)
+    assert payload["status"] == "ok"
+    assert isinstance(payload["warnings"], list)
+
+
+def test_transform_verbose_logger_info_captured_not_on_stderr(tmp_path: Path, tabular_csv: Path):
+    """``--verbose`` enables ``logger.info(...)`` calls in
+    ``AutoFeatureEngineer`` and the engines. Those records must end up
+    in the JSON payload's ``warnings`` field, not on stderr.
+    """
+    out_path = tmp_path / "out.csv"
+    rc, out, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+            "--max-features",
+            "5",
+            "--verbose",
+            "--json",
+        ]
+    )
+    assert rc == 0, err
+    assert err == "", f"stderr should be empty on success but got: {err!r}"
+    payload = json.loads(out)
+    # ``--verbose`` reliably emits "Fitted tabular engine" via logger.info,
+    # and selection / engineer calls also log. We don't pin the exact
+    # messages (they evolve) — just check at least one log record is
+    # present in the captured payload.
+    assert isinstance(payload["warnings"], list)
+    assert len(payload["warnings"]) >= 1
+
+
+def test_capture_featcopilot_messages_intercepts_logger_warning():
+    """Direct unit test for the contextmanager so the docstring contract is
+    not just covered transitively via the CLI subcommands.
+    """
+    fc_logger = logging.getLogger("featcopilot.test_cli")
+    # Reset Python's warning-deduplication state for the duration of the
+    # test so a previous test that fired ``warnings.warn`` at the same
+    # source location does not suppress this one.
+    with warnings.catch_warnings():
+        warnings.simplefilter("always")
+        with fc_cli._capture_featcopilot_messages() as captured:
+            fc_logger.warning("captured-warning-message")
+            warnings.warn("captured-runtime-warning", UserWarning, stacklevel=2)
+    assert any("captured-warning-message" in m for m in captured)
+    assert any("captured-runtime-warning" in m for m in captured)
+
+
+def test_capture_featcopilot_messages_does_not_mutate_logger_state_per_call():
+    """The contextmanager installs hooks *once* (lazily) and then never
+    mutates the featcopilot logger again — so successive captures don't
+    add or remove handlers, regardless of test ordering. The earlier
+    "restores handlers" test (asserting equality with pre-first-call
+    state) was order-dependent: on the very first capture in a process,
+    ``_install_capture_hooks_once()`` permanently adds
+    ``_routing_handler`` and that's a one-way change. We instead assert
+    *stability* across an exception-propagating with-block, which is the
+    real behavioral contract.
+    """
+    # First, force install via a no-op capture.
+    with fc_cli._capture_featcopilot_messages():
+        pass
+
+    fc_root = logging.getLogger("featcopilot")
+    handlers_before = list(fc_root.handlers)
+    level_before = fc_root.level
+    showwarning_before = warnings.showwarning
+
+    with pytest.raises(RuntimeError):
+        with fc_cli._capture_featcopilot_messages():
+            raise RuntimeError("boom")
+
+    # Hooks remain installed (handler stays, level unchanged, showwarning
+    # override remains in place); per-call state has been popped.
+    assert fc_root.handlers == handlers_before
+    assert fc_root.level == level_before
+    assert warnings.showwarning is showwarning_before
+
+
+def test_capture_featcopilot_messages_thread_safety():
+    """Concurrent ``_capture_featcopilot_messages`` invocations must not
+    steal each other's records. Implementation uses per-thread routing
+    (no global lock held during the body), so threads execute concurrently.
+    """
+    import threading
+
+    fc_logger = logging.getLogger("featcopilot.test_concurrent")
+
+    results: list[list[str]] = []
+    barrier = threading.Barrier(2)
+
+    def worker(tag: str):
+        # Force both threads to enter the with-block at roughly the same
+        # time so the routing dispatch is genuinely contended.
+        barrier.wait()
+        with fc_cli._capture_featcopilot_messages() as captured:
+            for i in range(20):
+                fc_logger.warning(f"{tag}-{i}")
+        results.append(captured)
+
+    t1 = threading.Thread(target=worker, args=("A",))
+    t2 = threading.Thread(target=worker, args=("B",))
+    t1.start()
+    t2.start()
+    t1.join()
+    t2.join()
+
+    assert len(results) == 2
+    # Each capture list must contain exactly its own thread's records and
+    # nothing from the other thread.
+    for res in results:
+        # Find which tag this list belongs to.
+        tag = "A" if any("A-" in m for m in res) else "B"
+        assert all(f"{tag}-" in m for m in res), f"Thread isolation violated in capture {tag!r}: got {res!r}"
+        assert len(res) == 20
+
+
+def test_capture_does_not_block_concurrent_callers():
+    """Two concurrent ``_capture_featcopilot_messages`` blocks must run in
+    parallel — i.e. the design does NOT serialize the body via a global
+    lock. Verified by timing: a worker that sleeps inside the block must
+    not block another worker from also entering the block at the same
+    time.
+    """
+    import threading
+    import time
+
+    inside = []
+    inside_lock = threading.Lock()
+    seen_overlap = threading.Event()
+    barrier = threading.Barrier(2)
+
+    def worker():
+        barrier.wait()
+        with fc_cli._capture_featcopilot_messages():
+            with inside_lock:
+                inside.append(1)
+                if len(inside) >= 2:
+                    seen_overlap.set()
+            # Sleep long enough that, if the implementation serialized via
+            # a global lock, the second thread would never enter
+            # simultaneously.
+            time.sleep(0.2)
+            with inside_lock:
+                inside.pop()
+
+    t1 = threading.Thread(target=worker)
+    t2 = threading.Thread(target=worker)
+    t1.start()
+    t2.start()
+    t1.join(timeout=5)
+    t2.join(timeout=5)
+
+    assert seen_overlap.is_set(), (
+        "Both threads should have been inside _capture_featcopilot_messages "
+        "simultaneously; the implementation appears to serialize the body."
+    )
+
+
+def test_capture_warnings_warn_thread_isolated():
+    """``warnings.warn`` calls from one capturing thread must not leak into
+    another capturing thread's payload. The CLI overrides
+    ``warnings.showwarning`` per-thread (rather than using
+    ``warnings.catch_warnings(record=True)`` which is process-global).
+    """
+    import threading
+
+    barrier = threading.Barrier(2)
+    a_captured: list[str] = []
+    b_captured: list[str] = []
+
+    def worker(tag: str, target: list[str]):
+        barrier.wait()
+        with fc_cli._capture_featcopilot_messages() as captured:
+            for i in range(10):
+                # ``stacklevel=2`` is forwarded; reset filter state so we
+                # don't lose the warning to Python's default dedup.
+                warnings.warn(f"{tag}-warn-{i}", UserWarning, stacklevel=2)
+        target.extend(captured)
+
+    # Reset warning filters for this test so dedup doesn't suppress
+    # repeated emissions at the same source line.
+    with warnings.catch_warnings():
+        warnings.simplefilter("always")
+        t1 = threading.Thread(target=worker, args=("A", a_captured))
+        t2 = threading.Thread(target=worker, args=("B", b_captured))
+        t1.start()
+        t2.start()
+        t1.join()
+        t2.join()
+
+    assert all("A-warn-" in m for m in a_captured)
+    assert all("B-warn-" in m for m in b_captured)
+    assert not any("B-warn-" in m for m in a_captured)
+    assert not any("A-warn-" in m for m in b_captured)
+
+
+def test_nested_capture_on_same_thread_preserves_outer_list():
+    """A capture inside a capture on the same thread must:
+
+    1. Route records to the *innermost* list while the inner block is active.
+    2. Restore the outer list when the inner block exits, so subsequent
+       records flow into the outer payload.
+
+    The previous single-list-per-thread design clobbered the outer
+    registration; this test guards against that regression.
+    """
+    fc_logger = logging.getLogger("featcopilot.test_nested")
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("always")
+        with fc_cli._capture_featcopilot_messages() as outer:
+            fc_logger.warning("outer-before-nested")
+            with fc_cli._capture_featcopilot_messages() as inner:
+                fc_logger.warning("inner-only")
+                warnings.warn("inner-runtime", UserWarning, stacklevel=2)
+            fc_logger.warning("outer-after-nested")
+
+    # Inner contains only the records emitted while it was the active
+    # capture.
+    assert any("inner-only" in m for m in inner)
+    assert any("inner-runtime" in m for m in inner)
+    assert not any("outer-before-nested" in m for m in inner)
+    assert not any("outer-after-nested" in m for m in inner)
+
+    # Outer contains records emitted before AND after the inner block,
+    # but NOT records emitted while inner was active (those went to inner).
+    assert any("outer-before-nested" in m for m in outer)
+    assert any("outer-after-nested" in m for m in outer)
+    assert not any("inner-only" in m for m in outer)
+    assert not any("inner-runtime" in m for m in outer)
+
+
+def test_overlapping_captures_with_out_of_order_exit():
+    """Two threads enter the capture block, then thread A exits *before*
+    thread B. The CLI must continue to capture B's warnings even after
+    A has exited — i.e. A's exit must not restore a global state that
+    disables B's capture.
+
+    This is the strict version of the warnings.showwarning race that
+    existed when the override was saved/restored per-call: A's exit
+    used to restore the original ``warnings.showwarning``, leaking B's
+    subsequent ``warnings.warn`` calls onto stderr.
+    """
+    import threading
+    import time
+
+    barrier = threading.Barrier(2)
+    a_done = threading.Event()
+    a_captured: list[str] = []
+    b_captured: list[str] = []
+
+    fc_logger = logging.getLogger("featcopilot.test_overlap")
+
+    def worker_a():
+        barrier.wait()
+        with fc_cli._capture_featcopilot_messages() as captured:
+            fc_logger.warning("A-1")
+            warnings.warn("A-warn-1", UserWarning, stacklevel=2)
+        a_captured.extend(captured)
+        a_done.set()  # signal: A has exited the capture block
+
+    def worker_b():
+        barrier.wait()
+        with fc_cli._capture_featcopilot_messages() as captured:
+            fc_logger.warning("B-1")
+            # Wait for A to fully exit before emitting B's tail records.
+            assert a_done.wait(timeout=5)
+            time.sleep(0.05)  # small grace so any racy restoration would have happened
+            fc_logger.warning("B-2-after-A-exit")
+            warnings.warn("B-warn-after-A-exit", UserWarning, stacklevel=2)
+        b_captured.extend(captured)
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("always")
+        t_a = threading.Thread(target=worker_a)
+        t_b = threading.Thread(target=worker_b)
+        t_b.start()  # start B first so it's already in the block
+        time.sleep(0.05)
+        t_a.start()
+        t_a.join(timeout=5)
+        t_b.join(timeout=5)
+
+    # B's records — including the ones emitted *after* A exited — must
+    # all be captured. None of A's records should have leaked into B.
+    assert any("B-1" in m for m in b_captured)
+    assert any("B-2-after-A-exit" in m for m in b_captured)
+    assert any("B-warn-after-A-exit" in m for m in b_captured)
+    assert not any("A-1" in m for m in b_captured)
+    assert not any("A-warn-1" in m for m in b_captured)
+    # A's payload likewise contains only A's records.
+    assert any("A-1" in m for m in a_captured)
+    assert any("A-warn-1" in m for m in a_captured)
+    assert not any("B-" in m for m in a_captured)
+
+
+def test_unexpected_error_writes_single_stderr_line(monkeypatch, tmp_path: Path, tabular_csv: Path):
+    """An unexpected (non-ValueError) exception must produce exactly one
+    structured stderr line — no second timestamped traceback from
+    ``logger.exception(...)`` — so agents can parse failures
+    deterministically.
+    """
+    import pandas as pd
+
+    class _UnexpectedError(Exception):
+        """A non-ValueError, non-OSError exception that escapes the helpers."""
+
+    def _raise_unexpected(*args, **kwargs):
+        raise _UnexpectedError("simulated internal failure")
+
+    # Monkey-patch ``pd.read_csv`` directly. Since ``_read_table``'s CSV
+    # branch normally catches ``OSError`` / ``ParserError`` / ``UnicodeDecodeError``,
+    # raising a different exception type forces us into the generic exit-1
+    # backstop in ``main()``.
+    monkeypatch.setattr(pd, "read_csv", _raise_unexpected, raising=True)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 1, err
+    # Exactly one non-empty line on stderr.
+    err_lines = [line for line in err.splitlines() if line.strip()]
+    assert len(err_lines) == 1, f"Expected single-line stderr, got: {err!r}"
+    assert err_lines[0].startswith("featcopilot: unexpected error:")
+    assert "_UnexpectedError" in err_lines[0]
+    assert "simulated internal failure" in err_lines[0]
+    # No traceback signature.
+    assert "Traceback" not in err
+    assert 'File "' not in err
+
+
+# ----------------------- --target help text accuracy
+
+
+def test_transform_target_help_reflects_actual_contract():
+    """The ``--target`` help on ``transform`` must say the flag is required
+    only when ``--max-features`` is set (which is when the selector
+    actually fits), not whenever selection is enabled by default.
+    """
+    parser = fc_cli._build_parser()
+    transform_parser = next(
+        action.choices["transform"] for action in parser._actions if isinstance(action, argparse._SubParsersAction)
+    )
+    target_help = next(a.help for a in transform_parser._actions if "--target" in a.option_strings)
+    assert "max_features" in target_help.lower() or "max-features" in target_help.lower()
+    # The old ("required when selection is applied (the default ...)")
+    # phrasing was misleading — guard against regressions.
+    assert "the default" not in target_help.lower()
+
+
+# ----------------------- target check runs after type validation
+
+
+def test_invalid_max_features_in_config_takes_precedence_over_target_check(tmp_path: Path, tabular_csv: Path):
+    """A malformed ``max_features`` in ``--config`` (string, negative, etc.)
+    must surface its real validation error rather than ``--target is
+    required``. The CLI now builds the engineer first (which type-validates
+    every scalar config field) and only checks ``--target`` after.
+    """
+    in_path = tmp_path / "in_notarget.csv"
+    pd.read_csv(tabular_csv).drop(columns=["y"]).to_csv(in_path, index=False)
+
+    cfg = tmp_path / "cfg.json"
+    cfg.write_text(json.dumps({"max_features": "5"}))  # string, not int
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--config",
+            str(cfg),
+        ]
+    )
+    assert rc == 2
+    # The real error is the type mismatch, NOT --target missing.
+    assert "max_features" in err
+    assert "--target" not in err
+
+
+def test_check_scalar_type_rejects_none_when_required():
+    """Direct unit test for ``_check_scalar_type`` to exercise the
+    ``allow_none=False`` + ``value is None`` branch, which the integration
+    path doesn't naturally hit (every scalar with ``allow_none=False`` has
+    a non-None default).
+    """
+    with pytest.raises(ValueError, match="must not be null"):
+        fc_cli._check_scalar_type("foo", None, (int,), allow_none=False)
+
+
+# -------------------------------------------------------------- error paths
+
+
+def test_transform_missing_input_returns_exit_2(tmp_path: Path):
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tmp_path / "nope.csv"),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "Input file not found" in err
+
+
+def test_transform_unknown_target_returns_exit_2(tmp_path: Path, tabular_csv: Path):
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "does_not_exist",
+        ]
+    )
+    assert rc == 2
+    assert "does_not_exist" in err
+
+
+def test_transform_unknown_extension_without_override(tmp_path: Path, tabular_csv: Path):
+    out_path = tmp_path / "out.weird"
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "infer format" in err.lower()
+
+
+def test_transform_format_override_accepted(tmp_path: Path, tabular_csv: Path):
+    out_path = tmp_path / "out.weird"
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+            "--output-format",
+            "csv",
+        ]
+    )
+    assert rc == 0, err
+    assert out_path.exists()
+
+
+def test_invalid_config_file_returns_exit_2(tmp_path: Path, tabular_csv: Path):
+    bad = tmp_path / "bad.json"
+    bad.write_text("[1, 2, 3]")  # JSON, but not an object
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "o.csv"),
+            "--target",
+            "y",
+            "--config",
+            str(bad),
+        ]
+    )
+    assert rc == 2
+    assert "JSON object" in err
+
+
+def test_unknown_config_top_level_key_returns_exit_2(tmp_path: Path, tabular_csv: Path):
+    """A typo in a top-level config key (``max_feature`` instead of
+    ``max_features``, etc.) must fail fast with a precise exit-2 message
+    listing the recognized keys — not silently run with defaults.
+    """
+    cfg = tmp_path / "cfg.json"
+    cfg.write_text(json.dumps({"max_feature": 5}))  # missing 's'
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "o.csv"),
+            "--target",
+            "y",
+            "--config",
+            str(cfg),
+        ]
+    )
+    assert rc == 2
+    assert "max_feature" in err
+    assert "Recognized keys" in err or "recognized keys" in err.lower()
+
+
+def test_unknown_config_top_level_key_lists_known_keys(tmp_path: Path, tabular_csv: Path):
+    """The error message must enumerate the recognized keys so users can
+    self-correct without reading the source.
+    """
+    cfg = tmp_path / "cfg.json"
+    cfg.write_text(json.dumps({"selection_method": ["mutual_info"]}))  # missing 's'
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "o.csv"),
+            "--target",
+            "y",
+            "--config",
+            str(cfg),
+        ]
+    )
+    assert rc == 2
+    assert "selection_method" in err
+    # Recognized-keys list must include the canonical names.
+    assert "selection_methods" in err
+    assert "max_features" in err
+
+
+def test_directory_as_config_returns_exit_2(tmp_path: Path, tabular_csv: Path):
+    """Pointing ``--config`` at a directory must surface as exit 2, not the
+    generic ``exit 1`` backstop (``IsADirectoryError``).
+    """
+    cfg_dir = tmp_path / "not_a_file"
+    cfg_dir.mkdir()
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "o.csv"),
+            "--target",
+            "y",
+            "--config",
+            str(cfg_dir),
+        ]
+    )
+    assert rc == 2
+    assert "directory" in err.lower()
+
+
+def test_malformed_json_config_returns_exit_2(tmp_path: Path, tabular_csv: Path):
+    bad = tmp_path / "bad.json"
+    bad.write_text("{not valid json,}")
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "o.csv"),
+            "--target",
+            "y",
+            "--config",
+            str(bad),
+        ]
+    )
+    assert rc == 2
+    assert "valid json" in err.lower()
+
+
+def test_non_dict_llm_config_returns_exit_2(tmp_path: Path, tabular_csv: Path):
+    """A non-mapping ``llm_config`` (e.g. a string) must be rejected at
+    config-load time with a clean exit 2, not bubble up as an
+    ``AttributeError`` from ``.get(...)`` deep inside engine construction.
+    """
+    cfg = tmp_path / "cfg.json"
+    cfg.write_text(json.dumps({"engines": ["tabular"], "llm_config": "gpt-5"}))
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "o.csv"),
+            "--target",
+            "y",
+            "--config",
+            str(cfg),
+        ]
+    )
+    assert rc == 2
+    assert "llm_config" in err
+    assert "JSON object" in err or "mapping" in err.lower()
+
+
+def test_no_subcommand_exits_nonzero(capsys):
+    # main() now returns the argparse-reported exit code (2 for usage error)
+    # rather than letting SystemExit propagate, so programmatic callers get
+    # an integer back even on parse-time failures.
+    rc = fc_cli.main([])
+    assert rc == 2
+
+
+def test_unknown_flag_returns_exit_2(capsys):
+    rc = fc_cli.main(["transform", "--no-such-flag"])
+    assert rc == 2
+
+
+def test_argparse_usage_error_emits_single_structured_line(tmp_path: Path, tabular_csv: Path):
+    """``argparse`` defaults to writing a multi-line usage banner before its
+    error message, mixing two pieces of information on stderr that agents
+    must then parse apart. The CLI's ``_StructuredArgumentParser`` collapses
+    those into the single canonical ``featcopilot: error: <message>`` line
+    so usage failures match the rest of the exit-2 contract.
+    """
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+            "--no-such-flag",  # genuine unknown flag (not a missing-required)
+        ]
+    )
+    assert rc == 2
+    err_lines = [line for line in err.splitlines() if line.strip()]
+    # Exactly one non-empty stderr line.
+    assert len(err_lines) == 1, f"Expected single-line stderr, got {err_lines!r}"
+    assert err_lines[0].startswith("featcopilot: error: ")
+    # No multi-line ``argparse`` usage banner.
+    assert "usage:" not in err.lower()
+    # Still mentions the offending flag.
+    assert "--no-such-flag" in err
+
+
+def test_argparse_missing_subcommand_emits_single_structured_line():
+    rc, _, err = _run([])
+    assert rc == 2
+    err_lines = [line for line in err.splitlines() if line.strip()]
+    assert len(err_lines) == 1, f"Expected single-line stderr, got {err_lines!r}"
+    assert err_lines[0].startswith("featcopilot: error: ")
+    assert "usage:" not in err.lower()
+
+
+def test_help_flag_returns_zero(capsys):
+    rc = fc_cli.main(["--help"])
+    assert rc == 0
+    captured = capsys.readouterr()
+    assert "featcopilot" in captured.out
+
+
+# ------------------------------------------------------------------ explain
+
+
+def test_explain_emits_json_payload(tmp_path: Path, tabular_csv: Path):
+    rc, out, err = _run(
+        [
+            "explain",
+            "--input",
+            str(tabular_csv),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["status"] == "ok"
+    assert payload["engines"] == ["tabular"]
+    assert isinstance(payload["features"], list)
+    # The tabular engine actually generates derived features, and the explain
+    # subcommand must materialize them by running the full fit_transform
+    # pipeline (engines populate _feature_names during transform()).
+    assert payload["n_features"] > 0
+    assert len(payload["features"]) == payload["n_features"]
+    # Each feature entry is a dict with the expected keys.
+    entry = payload["features"][0]
+    assert {"name", "explanation", "code"} <= set(entry.keys())
+    assert entry["name"]
+
+
+def test_explain_uses_full_input_by_default(tmp_path: Path):
+    """``explain`` defaults to using the FULL input — no implicit
+    sub-sampling. Some engines (e.g. ``TabularEngine`` categorical
+    encoding) decide which features to plan based on row counts and
+    per-category statistics, so silent sampling would change the
+    advertised metadata. Sampling is opt-in via ``--explain-sample-size``.
+    """
+    rng = np.random.default_rng(0)
+    n = 1500  # arbitrary
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=n),
+            "x2": rng.normal(size=n),
+            "y": rng.integers(0, 2, size=n),
+        }
+    )
+    in_path = tmp_path / "big.csv"
+    df.to_csv(in_path, index=False)
+
+    rc, out, err = _run(
+        [
+            "explain",
+            "--input",
+            str(in_path),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["status"] == "ok"
+    # Default: no sampling — full input is used.
+    assert payload["n_rows_used"] == n
+
+
+def test_explain_caps_input_size_when_sample_size_set(tmp_path: Path):
+    """When ``--explain-sample-size N`` is passed, the input is capped at
+    ``N`` rows (with a captured warning) so callers can opt into bounded
+    cost on huge inputs. The default remains full-input.
+    """
+    rng = np.random.default_rng(0)
+    n = 5000
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=n),
+            "x2": rng.normal(size=n),
+            "y": rng.integers(0, 2, size=n),
+        }
+    )
+    in_path = tmp_path / "big.csv"
+    df.to_csv(in_path, index=False)
+
+    rc, out, err = _run(
+        [
+            "explain",
+            "--input",
+            str(in_path),
+            "--target",
+            "y",
+            "--explain-sample-size",
+            "1000",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    # Sampling cap was enforced.
+    assert payload["n_rows_used"] == 1000
+    assert payload["n_features"] > 0
+    # The CLI emits a warning when sampling so callers can detect that
+    # metadata may not match a full-input transform run.
+    assert any("capping input" in w.lower() or "sampling" in w.lower() for w in payload["warnings"])
+
+
+def test_explain_sample_size_smaller_than_input_no_op(tmp_path: Path):
+    """When ``--explain-sample-size`` exceeds the actual input, no sampling
+    happens (and no warning is emitted).
+    """
+    rng = np.random.default_rng(0)
+    n = 50
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=n),
+            "x2": rng.normal(size=n),
+            "y": rng.integers(0, 2, size=n),
+        }
+    )
+    in_path = tmp_path / "small.csv"
+    df.to_csv(in_path, index=False)
+
+    rc, out, err = _run(
+        [
+            "explain",
+            "--input",
+            str(in_path),
+            "--target",
+            "y",
+            "--explain-sample-size",
+            "1000",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["n_rows_used"] == n
+    assert not any("capping input" in w.lower() or "sampling" in w.lower() for w in payload["warnings"])
+
+
+def test_explain_sample_size_via_config(tmp_path: Path):
+    """``explain_sample_size`` is also recognized in ``--config`` JSON."""
+    rng = np.random.default_rng(0)
+    n = 5000
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=n),
+            "x2": rng.normal(size=n),
+            "y": rng.integers(0, 2, size=n),
+        }
+    )
+    in_path = tmp_path / "big.csv"
+    df.to_csv(in_path, index=False)
+
+    cfg = tmp_path / "cfg.json"
+    cfg.write_text(json.dumps({"explain_sample_size": 500}))
+
+    rc, out, err = _run(
+        [
+            "explain",
+            "--input",
+            str(in_path),
+            "--target",
+            "y",
+            "--config",
+            str(cfg),
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["n_rows_used"] == 500
+
+
+@pytest.mark.parametrize("bad_value", [0, -1, -100])
+def test_explain_sample_size_rejects_non_positive(tmp_path: Path, bad_value):
+    """``--explain-sample-size`` must be a positive integer."""
+    rc, _, err = _run(
+        [
+            "explain",
+            "--input",
+            str(tmp_path / "in.csv"),  # missing — but flag check happens first
+            "--target",
+            "y",
+            "--explain-sample-size",
+            str(bad_value),
+        ]
+    )
+    # We accept either argparse-level rejection or our own ValueError;
+    # both surface as exit 2.
+    assert rc == 2
+
+
+def test_explain_sample_size_rejects_string_in_config(tmp_path: Path, tabular_csv: Path):
+    """Type-validation: ``"explain_sample_size": "100"`` (string) is rejected."""
+    cfg = tmp_path / "cfg.json"
+    cfg.write_text(json.dumps({"explain_sample_size": "100"}))
+    rc, _, err = _run(
+        [
+            "explain",
+            "--input",
+            str(tabular_csv),
+            "--target",
+            "y",
+            "--config",
+            str(cfg),
+        ]
+    )
+    assert rc == 2
+    assert "explain_sample_size" in err
+
+
+def test_explain_sample_size_rejects_zero_in_config(tmp_path: Path, tabular_csv: Path):
+    cfg = tmp_path / "cfg.json"
+    cfg.write_text(json.dumps({"explain_sample_size": 0}))
+    rc, _, err = _run(
+        [
+            "explain",
+            "--input",
+            str(tabular_csv),
+            "--target",
+            "y",
+            "--config",
+            str(cfg),
+        ]
+    )
+    assert rc == 2
+    assert "explain_sample_size" in err
+
+
+# --------------------------------------------------------------- parquet path
+
+
+def test_transform_parquet_missing_engine_returns_exit_2(tmp_path, tabular_csv, monkeypatch):
+    """When pyarrow/fastparquet is missing, the CLI should surface a clean
+    user-facing dependency error (exit 2) rather than the generic exit 1
+    backstop.
+    """
+    import pandas as pd
+
+    def _raise_import_error(self, *args, **kwargs):  # noqa: ANN001
+        raise ImportError("Missing optional dependency 'pyarrow' (simulated)")
+
+    monkeypatch.setattr(pd.DataFrame, "to_parquet", _raise_import_error, raising=True)
+
+    out_path = tmp_path / "out.parquet"
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+            "--max-features",
+            "5",
+        ]
+    )
+    assert rc == 2
+    assert "parquet engine" in err.lower()
+
+
+def test_transform_read_parquet_missing_engine_returns_exit_2(tmp_path, tabular_csv, monkeypatch):
+    """Symmetric coverage for reading a .parquet input when no engine is installed.
+
+    The CLI must convert the ``ImportError`` from ``pd.read_parquet`` into
+    the deterministic exit-2 path (with a user-facing install hint),
+    just like the write path.
+    """
+    import pandas as pd
+
+    # Make sure the input path has a .parquet suffix so format detection picks parquet.
+    fake_pq = tmp_path / "fake.parquet"
+    fake_pq.write_bytes(b"")  # contents don't matter; we'll intercept read_parquet
+
+    def _raise_import_error(*args, **kwargs):
+        raise ImportError("Missing optional dependency 'pyarrow' (simulated)")
+
+    monkeypatch.setattr(pd, "read_parquet", _raise_import_error, raising=True)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(fake_pq),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+        ]
+    )
+    assert rc == 2
+    assert "parquet engine" in err.lower()
+
+
+def test_parquet_engine_available_returns_false_when_neither_installed(monkeypatch):
+    """When ``__import__`` raises ``ImportError`` for both engines, the
+    function reports parquet as unavailable.
+    """
+    import builtins
+
+    real_import = builtins.__import__
+
+    def fake_import(name, *args, **kwargs):
+        if name in ("pyarrow", "fastparquet"):
+            raise ImportError(f"No module named '{name}' (simulated)")
+        return real_import(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, "__import__", fake_import)
+    assert fc_cli._parquet_engine_available() is False
+
+
+def test_parquet_engine_available_returns_true_for_fastparquet_only(monkeypatch):
+    """Even without pyarrow, importing fastparquet must report parquet as available."""
+    import builtins
+
+    real_import = builtins.__import__
+
+    def fake_import(name, *args, **kwargs):
+        if name == "pyarrow":
+            raise ImportError("No module named 'pyarrow' (simulated)")
+        if name == "fastparquet":
+            # Simulate a successful import by short-circuiting; we don't
+            # actually need a real module object, just a non-raising return.
+            class _FakeModule:
+                pass
+
+            return _FakeModule()
+        return real_import(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, "__import__", fake_import)
+    assert fc_cli._parquet_engine_available() is True
+
+
+def test_parquet_engine_available_returns_false_for_broken_native_install(monkeypatch):
+    """A distribution that's on sys.path but raises a non-ImportError at
+    import time (e.g. broken native bindings) is reported as unavailable.
+    Using ``__import__`` (rather than ``importlib.util.find_spec``) is what
+    makes this honest: ``find_spec`` would have returned a spec and lied.
+    """
+    import builtins
+
+    real_import = builtins.__import__
+
+    def fake_import(name, *args, **kwargs):
+        if name in ("pyarrow", "fastparquet"):
+            # Simulate a broken native install (loader-level failure).
+            raise OSError("broken native install: undefined symbol (simulated)")
+        return real_import(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, "__import__", fake_import)
+    assert fc_cli._parquet_engine_available() is False
+
+
+def test_unreadable_config_returns_exit_2(tmp_path, tabular_csv, monkeypatch):
+    """An ``OSError`` while opening the config (permission denied, broken
+    symlink, etc.) is converted into the deterministic exit-2 path.
+    """
+    cfg = tmp_path / "cfg.json"
+    cfg.write_text("{}")
+
+    real_open = Path.open
+
+    def _raise_oserror(self, *args, **kwargs):
+        if self == cfg:
+            raise PermissionError("simulated read failure")
+        return real_open(self, *args, **kwargs)
+
+    monkeypatch.setattr(Path, "open", _raise_oserror, raising=True)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+            "--config",
+            str(cfg),
+        ]
+    )
+    assert rc == 2
+    assert "could not be read" in err.lower()
+
+
+def test_explain_sample_size_handles_non_unique_index(tmp_path: Path):
+    """Sampling must keep X and y aligned even when the input frame has a
+    non-unique index — e.g. a parquet read that preserves a saved index
+    where labels can repeat. Positional sampling (``.iloc``) avoids the
+    label-based ``.loc`` expansion / reordering bug.
+    """
+    pytest.importorskip("pyarrow")  # parquet write needs an engine
+
+    rng = np.random.default_rng(0)
+    n = 4000
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=n),
+            "x2": rng.normal(size=n),
+            "y": rng.integers(0, 2, size=n),
+        }
+    )
+    # Force a non-unique index — labels repeat (each label appears twice).
+    df.index = pd.Index([i // 2 for i in range(n)], name="duplicated_index")
+    in_path = tmp_path / "non_unique.parquet"
+    df.to_parquet(in_path, index=True)
+
+    rc, out, err = _run(
+        [
+            "explain",
+            "--input",
+            str(in_path),
+            "--target",
+            "y",
+            "--explain-sample-size",
+            "100",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["status"] == "ok"
+    # Sample size must be honored exactly, not expanded by ``.loc``-with-
+    # duplicate-labels behavior.
+    assert payload["n_rows_used"] == 100
+
+
+def test_include_target_collision_error_text_lists_only_actionable_options(
+    tmp_path: Path, tabular_csv: Path, monkeypatch
+):
+    """The error text emitted when ``--include-target`` would overwrite an
+    engineered feature must only suggest actions that are actually
+    possible from this command. The CLI does not offer auto-rename, so
+    the message must NOT mention "rename and retry" or any other phantom
+    option.
+    """
+    from featcopilot.transformers.sklearn_compat import AutoFeatureEngineer
+
+    real_fit_transform = AutoFeatureEngineer.fit_transform
+
+    def _patched_fit_transform(self, X, y=None, **kwargs):
+        result = real_fit_transform(self, X, y, **kwargs)
+        result = result.copy()
+        result["y"] = result.iloc[:, 0]
+        return result
+
+    monkeypatch.setattr(AutoFeatureEngineer, "fit_transform", _patched_fit_transform)
+
+    rc, _, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(tmp_path / "out.csv"),
+            "--target",
+            "y",
+            "--include-target",
+            "--max-features",
+            "5",
+        ]
+    )
+    assert rc == 2
+    # Must mention the real options.
+    assert "rename the target column" in err.lower()
+    assert "drop --include-target" in err
+    # Must NOT mention non-existent CLI options.
+    assert "accept the rename" not in err.lower()
+    assert "retry" not in err.lower()
+
+
+# ----------------------- explain --explain-sample-size memory bound
+
+
+def test_explain_sample_size_bounds_csv_read_with_nrows(tmp_path: Path, monkeypatch):
+    """``--explain-sample-size N`` must propagate to ``pd.read_csv`` as
+    ``nrows=N`` so the underlying read is memory-bounded for huge CSV
+    inputs (rather than fully loading the file and then trimming).
+    """
+    import pandas as pd
+
+    rng = np.random.default_rng(0)
+    n = 5000
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=n),
+            "x2": rng.normal(size=n),
+            "y": rng.integers(0, 2, size=n),
+        }
+    )
+    in_path = tmp_path / "big.csv"
+    df.to_csv(in_path, index=False)
+
+    real_read_csv = pd.read_csv
+    captured_kwargs: list[dict] = []
+
+    def _spy_read_csv(*args, **kwargs):
+        captured_kwargs.append(kwargs.copy())
+        return real_read_csv(*args, **kwargs)
+
+    monkeypatch.setattr(pd, "read_csv", _spy_read_csv, raising=True)
+
+    rc, out, err = _run(
+        [
+            "explain",
+            "--input",
+            str(in_path),
+            "--target",
+            "y",
+            "--explain-sample-size",
+            "200",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["n_rows_used"] == 200
+    # Must have called pd.read_csv with nrows=201 (sample_size + 1, the
+    # CLI requests one extra row so it can detect whether the input was
+    # actually larger than the cap and only emit the metadata-may-differ
+    # warning when truncation really happened). The full 5000-row file
+    # is never loaded.
+    explain_reads = [k for k in captured_kwargs if k.get("nrows") == 201]
+    assert explain_reads, f"expected pd.read_csv to be called with nrows=201; got {captured_kwargs!r}"
+
+
+def test_explain_sample_size_warns_post_read_for_parquet(tmp_path: Path):
+    """For parquet inputs, pandas has no native row-limit, so the bound
+    is applied post-read. The CLI must surface a warning describing the
+    limitation so callers know memory isn't strictly bounded. The
+    warning is emitted by ``_cmd_explain`` itself (not duplicated by
+    ``_read_table``) so the user sees one accurate message.
+    """
+    pytest.importorskip("pyarrow")
+    rng = np.random.default_rng(0)
+    n = 4000
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=n),
+            "x2": rng.normal(size=n),
+            "y": rng.integers(0, 2, size=n),
+        }
+    )
+    in_path = tmp_path / "big.parquet"
+    df.to_parquet(in_path, index=False)
+
+    rc, out, err = _run(
+        [
+            "explain",
+            "--input",
+            str(in_path),
+            "--target",
+            "y",
+            "--explain-sample-size",
+            "100",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["n_rows_used"] == 100
+    # The post-read truncation notice must appear in the captured warnings.
+    # The unified message says: "For parquet, pandas does not expose a
+    # native row-limit, so the full file ... was loaded into memory before
+    # truncation."
+    captured = " ".join(payload["warnings"]).lower()
+    assert "native row-limit" in captured or "post-read" in captured or "memory before truncation" in captured
+    # The user-facing message uses the actual sample_size (100), NOT the
+    # internal +1 read size, AND there is exactly one truncation notice.
+    assert "100 rows" in " ".join(payload["warnings"])
+    truncation_msgs = [w for w in payload["warnings"] if "truncat" in w.lower() or "capping" in w.lower()]
+    assert len(truncation_msgs) == 1, f"expected exactly one truncation notice, got {truncation_msgs!r}"
+
+
+# ----------------------- strict per-thread capture isolation
+
+
+def test_capture_does_not_route_unrelated_thread_records():
+    """The capture layer must use STRICT per-thread routing for non-LLM
+    records: records emitted on threads other than the one that opened
+    a capture flow through the normal handler chain (and reach stderr)
+    — they are NOT silently rolled into the single in-flight CLI run's
+    payload.
+
+    A previous "single-active-capture fallback" was too broad: when a
+    single CLI run was active, *any* featcopilot log on any thread
+    would have been swallowed into that command's payload, including
+    unrelated background work, causing misattribution. This test
+    guards against that regression for the non-LLM case (the narrow
+    LLM-only fallback is covered separately).
+    """
+    import threading
+
+    fc_logger = logging.getLogger("featcopilot.test_unrelated")
+
+    with fc_cli._capture_featcopilot_messages() as captured:
+        # Caller emits on its own thread (must be captured).
+        fc_logger.warning("from-caller")
+
+        # Spawn a separate, unrelated thread that ALSO emits via a
+        # NON-LLM featcopilot logger. With strict per-thread isolation
+        # for non-LLM records, that record must NOT appear in this
+        # capture's payload.
+        def _emit_elsewhere():
+            fc_logger.warning("from-other-thread")
+
+        t = threading.Thread(target=_emit_elsewhere)
+        t.start()
+        t.join()
+
+    assert any("from-caller" in m for m in captured)
+    # Strict per-thread isolation for non-LLM records: unrelated thread's
+    # record is NOT in this capture's payload.
+    assert not any("from-other-thread" in m for m in captured)
+
+
+def test_capture_routes_llm_client_worker_records_to_single_active_capture():
+    """The narrow LLM-client fallback: when a record originates from one
+    of the *whitelisted* sync LLM client modules
+    (``featcopilot.llm.copilot_client`` / ``litellm_client`` /
+    ``openai_client``) and exactly one capture is active, the record
+    is routed to that capture even when emitted from a worker thread.
+
+    This addresses the common case where an LLM sync client wrapping
+    ``ThreadPoolExecutor`` (the fallback used in event-loop
+    environments) emits a mock-mode startup warning on a worker thread
+    that ``submit()`` spawned. Without the narrow fallback, that
+    warning would bleed onto stderr on a successful run.
+    """
+    import threading
+    from concurrent.futures import ThreadPoolExecutor
+
+    # Use an actual whitelisted sync-client module name; an arbitrary
+    # ``featcopilot.llm.*`` name (e.g. ``test_client``) is intentionally
+    # NOT eligible — see ``test_capture_does_not_apply_llm_fallback_for_non_whitelisted_llm_loggers``.
+    llm_logger = logging.getLogger("featcopilot.llm.openai_client")
+
+    def _emit_llm_in_worker():
+        llm_logger.warning("llm-mock-mode-startup")
+        return "ok"
+
+    with fc_cli._capture_featcopilot_messages() as captured:
+        # Caller emits its own LLM record (current-thread path).
+        llm_logger.warning("llm-from-caller")
+        # ThreadPoolExecutor worker emits an LLM record (cross-thread,
+        # but the narrow LLM-only fallback should route it).
+        with ThreadPoolExecutor(max_workers=1) as pool:
+            assert pool.submit(_emit_llm_in_worker).result(timeout=5) == "ok"
+        # A raw threading.Thread emits an LLM record too.
+        t = threading.Thread(target=_emit_llm_in_worker)
+        t.start()
+        t.join()
+
+    # Caller's record + 2 worker records (one from pool, one from thread)
+    # are all in the capture.
+    assert any("llm-from-caller" in m for m in captured)
+    assert sum(1 for m in captured if "llm-mock-mode-startup" in m) >= 2
+
+
+def test_capture_does_not_apply_llm_fallback_for_non_whitelisted_llm_loggers():
+    """The narrow LLM-client fallback whitelist is an *exact* set of
+    sync-client module names — NOT a ``featcopilot.llm.*`` prefix.
+    Other ``featcopilot.llm.*`` loggers (e.g. ``semantic_engine``,
+    ``code_generator``, ``transform_rule_generator``, ``explainer``)
+    must keep strict per-thread isolation, so cross-thread records
+    from unrelated background work cannot be silently swallowed into
+    an active CLI capture.
+    """
+    import threading
+
+    non_whitelisted = [
+        "featcopilot.llm.semantic_engine",
+        "featcopilot.llm.code_generator",
+        "featcopilot.llm.transform_rule_generator",
+        "featcopilot.llm.explainer",
+        "featcopilot.llm.test_dummy",  # arbitrary subname — must NOT match
+    ]
+    captured_lists: list[list[str]] = []
+
+    for name in non_whitelisted:
+        other_logger = logging.getLogger(name)
+
+        def _emit_in_other_thread(logger=other_logger, tag=name):
+            logger.warning(f"{tag}-from-other-thread")
+
+        with fc_cli._capture_featcopilot_messages() as captured:
+            t = threading.Thread(target=_emit_in_other_thread)
+            t.start()
+            t.join()
+        captured_lists.append(list(captured))
+
+    for name, captured in zip(non_whitelisted, captured_lists, strict=True):
+        assert not any(
+            f"{name}-from-other-thread" in m for m in captured
+        ), f"Non-whitelisted LLM logger {name} unexpectedly tripped the cross-thread fallback"
+
+
+def test_capture_does_not_apply_llm_fallback_with_multiple_captures():
+    """When two captures are concurrently active, the narrow LLM
+    fallback stays disabled — strict per-thread isolation is preserved
+    so concurrent CLI calls don't cross-contaminate, even for LLM
+    records.
+    """
+    import threading
+    from concurrent.futures import ThreadPoolExecutor
+
+    llm_logger = logging.getLogger("featcopilot.llm.openai_client")
+    a_captured: list[str] = []
+    b_captured: list[str] = []
+    enter_barrier = threading.Barrier(2)
+    inside_barrier = threading.Barrier(2)
+    done_barrier = threading.Barrier(2)
+
+    def worker(tag: str, target: list[str]):
+        # Phase 0: both threads start at roughly the same time.
+        enter_barrier.wait()
+        with fc_cli._capture_featcopilot_messages() as captured:
+            llm_logger.warning(f"{tag}-direct")
+            # Phase 1: BOTH threads have entered their captures, so
+            # ``_state._per_thread`` has TWO entries when either thread's
+            # worker fires below — that's the multi-capture scenario the
+            # narrow LLM fallback must skip. Without this barrier the
+            # threads race: one thread's worker can fire before the other
+            # has pushed its capture, making ``len == 1`` and (incorrectly
+            # for this test's intent) tripping the fallback.
+            inside_barrier.wait()
+            with ThreadPoolExecutor(max_workers=1) as pool:
+                pool.submit(lambda t=tag: llm_logger.warning(f"{t}-worker")).result(timeout=5)
+            # Phase 2: BOTH threads' workers have completed before either
+            # exits its capture. This pins ``len == 2`` for the entire
+            # worker-emit window.
+            done_barrier.wait()
+        target.extend(captured)
+
+    t1 = threading.Thread(target=worker, args=("A", a_captured))
+    t2 = threading.Thread(target=worker, args=("B", b_captured))
+    t1.start()
+    t2.start()
+    t1.join()
+    t2.join()
+
+    # Each capture sees its own direct record (current-thread path).
+    assert any("A-direct" in m for m in a_captured)
+    assert any("B-direct" in m for m in b_captured)
+    # The worker record is NOT in either capture (fallback disabled
+    # because two captures were active during the worker emit).
+    assert not any("worker" in m for m in a_captured)
+    assert not any("worker" in m for m in b_captured)
+
+
+def test_capture_keeps_thread_isolation_with_multiple_active_captures():
+    """The single-active-capture fallback must NOT activate when two
+    threads are concurrently capturing — each must see only its own
+    thread's records, not records emitted on the other thread's
+    workers.
+    """
+    import threading
+
+    fc_logger = logging.getLogger("featcopilot.test_dual")
+    a_captured: list[str] = []
+    b_captured: list[str] = []
+    barrier = threading.Barrier(2)
+    inside = threading.Event()
+
+    def worker(tag: str, target: list[str]):
+        barrier.wait()
+        with fc_cli._capture_featcopilot_messages() as captured:
+            inside.set()
+            for i in range(10):
+                fc_logger.warning(f"{tag}-{i}")
+        target.extend(captured)
+
+    t1 = threading.Thread(target=worker, args=("A", a_captured))
+    t2 = threading.Thread(target=worker, args=("B", b_captured))
+    t1.start()
+    t2.start()
+    t1.join()
+    t2.join()
+
+    # Each capture must contain ONLY its own thread's records (no fallback
+    # cross-talk because two captures are active).
+    assert all("A-" in m for m in a_captured)
+    assert all("B-" in m for m in b_captured)
+    assert len(a_captured) == 10
+    assert len(b_captured) == 10
+
+
+def test_capture_decision_is_cached_per_record_for_atomic_filter_emit():
+    """The capture state must resolve each record's routing decision
+    *exactly once*, then cache the outcome on the record itself, so the
+    suppression filter and the routing handler always see the same
+    answer for that record. Otherwise a concurrent push/pop on another
+    thread could land between the filter (computed at handler-1 phase)
+    and the emit (computed at handler-2 phase), making the same record
+    both captured and emitted to stderr (or suppressed without being
+    captured) — breaking the CLI contract.
+    """
+    state = fc_cli._ThreadCaptureState()
+
+    class _CountingState:
+        """Wrap state so we can count ``get_for_llm_record`` calls."""
+
+        def __init__(self, inner):
+            self._inner = inner
+            self.calls: list[tuple[int, str]] = []
+
+        # Forward the attributes ``resolve_for_record`` reads.
+        @property
+        def _UNCACHED(self):
+            return self._inner._UNCACHED
+
+        def get_for_llm_record(self, tid, name):
+            self.calls.append((tid, name))
+            return self._inner.get_for_llm_record(tid, name)
+
+        # Re-bind ``resolve_for_record`` so calls are counted via the
+        # wrapped ``get_for_llm_record``.
+        def resolve_for_record(self, record):
+            return fc_cli._ThreadCaptureState.resolve_for_record(self, record)
+
+    counted = _CountingState(state)
+
+    record = logging.LogRecord(
+        name="featcopilot.llm.openai_client",
+        level=logging.WARNING,
+        pathname=__file__,
+        lineno=1,
+        msg="hello",
+        args=(),
+        exc_info=None,
+    )
+
+    # First call computes and caches; subsequent calls must not hit
+    # ``get_for_llm_record`` again.
+    first = counted.resolve_for_record(record)
+    second = counted.resolve_for_record(record)
+    third = counted.resolve_for_record(record)
+
+    assert first is second is third
+    assert len(counted.calls) == 1, (
+        "resolve_for_record must compute the decision exactly once per record; "
+        f"saw {len(counted.calls)} get_for_llm_record calls"
+    )
+
+    # The cached attribute is set on the record itself.
+    assert hasattr(record, "_featcopilot_capture_target")
+
+
+def test_capture_decision_stable_under_concurrent_pop_between_filter_and_emit():
+    """Regression test for the atomic filter/emit invariant: even if a
+    concurrent thread pops its capture between the moment a record is
+    filtered and the moment it is emitted, both phases see the SAME
+    decision because it was resolved and cached on the record once.
+    """
+    state = fc_cli._ThreadCaptureState()
+    cap_a: list[str] = []
+    state.push(threading.get_ident() ^ 1, cap_a)  # foreign-thread capture
+    try:
+        record = logging.LogRecord(
+            name="featcopilot.llm.copilot_client",
+            level=logging.WARNING,
+            pathname=__file__,
+            lineno=1,
+            msg="hi",
+            args=(),
+            exc_info=None,
+        )
+        # Phase 1: "filter" computes and caches ("len(_per_thread)==1"
+        # so the LLM fallback returns ``cap_a``).
+        first = state.resolve_for_record(record)
+        assert first is cap_a
+
+        # Concurrent pop: another thread tears its capture down. State
+        # would now produce a *different* answer for a fresh lookup.
+        state.pop(threading.get_ident() ^ 1)
+        fresh_lookup = state.get_for_llm_record(threading.get_ident(), record.name)
+        assert fresh_lookup is None  # state has indeed changed
+
+        # Phase 2: "emit" must still see the same decision via the cache.
+        second = state.resolve_for_record(record)
+        assert second is cap_a, (
+            "After a concurrent pop, resolve_for_record must still return the "
+            "originally cached decision so filter and emit cannot disagree"
+        )
+    finally:
+        # Clean up any stragglers.
+        state.pop(threading.get_ident() ^ 1)
+
+
+def test_run_helper_redirects_featcopilot_stream_handlers(monkeypatch):
+    """Regression test for the test helper itself: ``_run`` must
+    redirect every ``logging.StreamHandler`` on the ``featcopilot``
+    root logger so that any handler write that escapes the suppression
+    filter (the contract-violation scenario) lands in the captured
+    ``err`` buffer, NOT on the real terminal.
+
+    Without this redirect, every ``err == ""`` assertion in this file
+    would be vacuously satisfied because the ``StreamHandler`` installed
+    at import time holds a reference to the *original* ``sys.stderr``
+    object and ``redirect_stderr`` only swaps the module attribute.
+    """
+    fc_logger = logging.getLogger("featcopilot")
+    stream_handlers = [h for h in fc_logger.handlers if isinstance(h, logging.StreamHandler)]
+    assert stream_handlers, "featcopilot logger must have at least one StreamHandler"
+
+    # Stub ``cli.main`` to write directly through the StreamHandler's
+    # current ``stream`` attribute (which ``_run`` should have re-pointed
+    # at the captured ``err`` buffer).
+    def fake_main(argv):
+        for h in fc_logger.handlers:
+            if isinstance(h, logging.StreamHandler):
+                h.stream.write("HANDLER_LEAK_LINE\n")
+                h.stream.flush()
+        return 0
+
+    monkeypatch.setattr(fc_cli, "main", fake_main)
+    rc, _out, err = _run(["info"])
+    assert rc == 0
+    assert "HANDLER_LEAK_LINE" in err, (
+        "_run must redirect featcopilot StreamHandler streams; otherwise stderr-cleanliness " "assertions are vacuous"
+    )
+    # And the original stream is restored after the call.
+    for h in stream_handlers:
+        assert h.stream is sys.stderr or h.stream is sys.__stderr__
+
+
+# ----------------------- empty-input column-vs-row distinction
+
+
+def test_transform_zero_columns_input_distinguishes_from_zero_rows(tmp_path: Path):
+    """``DataFrame.empty`` is ``True`` for both zero-row AND zero-column
+    frames. The CLI must distinguish: a JSON array of empty objects
+    ``[{}, {}, ...]`` is a zero-COLUMN input (the user has no feature
+    columns), not a zero-ROW input. The error message must point at
+    the actual problem so callers can take the right remediation.
+    """
+    # JSON array of empty objects: pandas reads this as a frame with
+    # rows but no columns.
+    p = tmp_path / "empty_columns.json"
+    p.write_text("[{}, {}, {}]")
+    rc, _out, err = _run(["transform", "--input", str(p), "--output", str(tmp_path / "out.csv"), "--target", "y"])
+    assert rc == 2
+    assert "no columns" in err.lower(), err
+    assert "feature column" in err.lower(), err
+    assert "zero data rows" not in err.lower(), err
+
+
+def test_transform_zero_rows_input_still_uses_zero_rows_message(tmp_path: Path):
+    """The zero-row case (header but no data) still surfaces the
+    distinct "zero data rows" wording so the two failure modes are
+    distinguishable in CLI output.
+    """
+    p = tmp_path / "header_only.csv"
+    p.write_text("x1,x2,y\n")
+    rc, _out, err = _run(["transform", "--input", str(p), "--output", str(tmp_path / "out.csv"), "--target", "y"])
+    assert rc == 2
+    assert "zero data rows" in err.lower(), err
+    assert "no columns" not in err.lower(), err
+
+
+# ----------------------- transform read/write warnings captured (not stderr)
+
+
+def test_transform_read_warning_captured_not_on_stderr(tmp_path: Path, monkeypatch):
+    """``pd.read_csv`` can legitimately emit ``DtypeWarning`` on a
+    successful read with mixed-type columns. That warning must end up
+    in the JSON ``warnings`` field, NOT on stderr — the contract is
+    that successful runs keep stderr empty for agent callers.
+    """
+    import pandas as _pd
+
+    # Build a valid CSV input and a real fit_transform-able payload.
+    rng = np.random.default_rng(0)
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=50),
+            "x2": rng.integers(0, 5, size=50),
+            "y": rng.integers(0, 2, size=50),
+        }
+    )
+    in_path = tmp_path / "in.csv"
+    df.to_csv(in_path, index=False)
+    out_path = tmp_path / "out.csv"
+
+    # Patch ``pd.read_csv`` so that calling it emits a real Python
+    # ``warnings.warn`` (mirroring DtypeWarning on a successful read)
+    # while still returning the same DataFrame.
+    real_read_csv = _pd.read_csv
+
+    def warning_emitting_read_csv(*a, **kw):
+        warnings.warn("pandas-mock-read-csv: DtypeWarning equivalent", UserWarning, stacklevel=2)
+        return real_read_csv(*a, **kw)
+
+    monkeypatch.setattr(_pd, "read_csv", warning_emitting_read_csv)
+
+    rc, out, err = _run(
+        [
+            "transform",
+            "--input",
+            str(in_path),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+            "--no-selection",
+            "--json",
+        ]
+    )
+    assert rc == 0, err
+    assert err == "", f"read-time warning leaked to stderr: {err!r}"
+    payload = json.loads(out)
+    assert any("pandas-mock-read-csv" in w for w in payload["warnings"]), payload["warnings"]
+
+
+def test_transform_write_warning_captured_not_on_stderr(tmp_path: Path, monkeypatch, tabular_csv: Path):
+    """Pandas/pyarrow can legitimately emit ``FutureWarning`` /
+    ``UserWarning`` during ``DataFrame.to_csv`` / ``to_parquet`` /
+    ``to_json`` on a successful write. Those warnings must end up in
+    the JSON ``warnings`` field, NOT on stderr.
+    """
+    out_path = tmp_path / "out.csv"
+
+    # Patch ``DataFrame.to_csv`` so calling it emits a warning while
+    # still actually writing the file.
+    real_to_csv = pd.DataFrame.to_csv
+
+    def warning_emitting_to_csv(self, *a, **kw):
+        warnings.warn("pandas-mock-to-csv: FutureWarning equivalent", FutureWarning, stacklevel=2)
+        return real_to_csv(self, *a, **kw)
+
+    monkeypatch.setattr(pd.DataFrame, "to_csv", warning_emitting_to_csv)
+
+    rc, out, err = _run(
+        [
+            "transform",
+            "--input",
+            str(tabular_csv),
+            "--output",
+            str(out_path),
+            "--target",
+            "y",
+            "--no-selection",
+            "--json",
+        ]
+    )
+    assert rc == 0, err
+    assert err == "", f"write-time warning leaked to stderr: {err!r}"
+    payload = json.loads(out)
+    assert any("pandas-mock-to-csv" in w for w in payload["warnings"]), payload["warnings"]
+
+
+# ----------------------- explain captures explain_features warnings
+
+
+def test_explain_features_warnings_captured_not_on_stderr(tmp_path: Path, monkeypatch, tabular_csv: Path):
+    """``explain_features`` / ``get_feature_code`` are now inside the
+    same capture as the read + ``fit_transform``, so any warning they
+    emit goes to the JSON ``warnings`` field, not stderr.
+    """
+    from featcopilot.transformers import sklearn_compat as _sc
+
+    real_explain = _sc.AutoFeatureEngineer.explain_features
+
+    def warning_emitting_explain(self):
+        warnings.warn("explain-features-mock-warning", UserWarning, stacklevel=2)
+        return real_explain(self)
+
+    monkeypatch.setattr(_sc.AutoFeatureEngineer, "explain_features", warning_emitting_explain)
+
+    rc, out, err = _run(["explain", "--input", str(tabular_csv), "--target", "y"])
+    assert rc == 0, err
+    assert err == "", f"explain_features warning leaked to stderr: {err!r}"
+    payload = json.loads(out)
+    assert any("explain-features-mock-warning" in w for w in payload["warnings"]), payload["warnings"]
+
+
+# ----------------------- explain --explain-sample-size warning hygiene
+
+
+def test_explain_no_sampling_warning_when_input_fits_exactly(tmp_path: Path):
+    """When the input has exactly ``--explain-sample-size`` rows, no
+    truncation actually happens, so the "metadata may differ" warning
+    must NOT fire. The success payload was previously inaccurate when
+    the warning fired on the boundary case.
+    """
+    rng = np.random.default_rng(0)
+    n = 200  # exactly the sample-size we'll request
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=n),
+            "x2": rng.normal(size=n),
+            "y": rng.integers(0, 2, size=n),
+        }
+    )
+    in_path = tmp_path / "exact.csv"
+    df.to_csv(in_path, index=False)
+
+    rc, out, err = _run(
+        [
+            "explain",
+            "--input",
+            str(in_path),
+            "--target",
+            "y",
+            "--explain-sample-size",
+            "200",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["n_rows_used"] == 200
+    # No "metadata may differ" warning — input fit naturally.
+    assert not any("capping input" in w.lower() or "metadata may differ" in w.lower() for w in payload["warnings"])
+
+
+def test_explain_no_sampling_warning_when_input_smaller_than_sample(tmp_path: Path):
+    """When the input has fewer rows than ``--explain-sample-size``,
+    obviously no truncation happens. Belt-and-suspenders coverage of
+    the "<= cap, no warning" branch.
+    """
+    rng = np.random.default_rng(0)
+    n = 50
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=n),
+            "x2": rng.normal(size=n),
+            "y": rng.integers(0, 2, size=n),
+        }
+    )
+    in_path = tmp_path / "small.csv"
+    df.to_csv(in_path, index=False)
+
+    rc, out, err = _run(
+        [
+            "explain",
+            "--input",
+            str(in_path),
+            "--target",
+            "y",
+            "--explain-sample-size",
+            "200",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["n_rows_used"] == n
+    assert not any("capping input" in w.lower() or "metadata may differ" in w.lower() for w in payload["warnings"])
+
+
+def test_explain_sampling_warning_fires_when_input_strictly_larger(tmp_path: Path):
+    """Strict proof of truncation: input has at least one MORE row than
+    the cap. The warning must fire, and the payload must report
+    ``n_rows_used == sample_size``.
+    """
+    rng = np.random.default_rng(0)
+    n = 201  # exactly one more than the cap
+    df = pd.DataFrame(
+        {
+            "x1": rng.normal(size=n),
+            "x2": rng.normal(size=n),
+            "y": rng.integers(0, 2, size=n),
+        }
+    )
+    in_path = tmp_path / "barely_over.csv"
+    df.to_csv(in_path, index=False)
+
+    rc, out, err = _run(
+        [
+            "explain",
+            "--input",
+            str(in_path),
+            "--target",
+            "y",
+            "--explain-sample-size",
+            "200",
+        ]
+    )
+    assert rc == 0, err
+    payload = json.loads(out)
+    assert payload["n_rows_used"] == 200
+    assert any("capping input" in w.lower() for w in payload["warnings"])
+
+
+def test_explain_sample_size_help_text_describes_head_slice_not_random_seed():
+    """The ``--explain-sample-size`` help text must accurately describe
+    the actual semantics (deterministic head slice, NOT a seeded random
+    sample). Guards against misleading users / agents who would expect
+    an unbiased sample.
+    """
+    parser = fc_cli._build_parser()
+    explain_parser = next(
+        action.choices["explain"] for action in parser._actions if isinstance(action, argparse._SubParsersAction)
+    )
+    sample_help = next(a.help for a in explain_parser._actions if "--explain-sample-size" in a.option_strings)
+    # Must accurately describe the implementation.
+    assert "head slice" in sample_help.lower() or "first n" in sample_help.lower()
+    # Must NOT use the misleading old phrasing.
+    assert "deterministic seed" not in sample_help.lower()
+    assert "random sample" not in sample_help.lower() or "not a random sample" in sample_help.lower()
+
+
+# ----------------------- python -m
+
+
+def test_dunder_main_module_runs(monkeypatch, capsys):
+    """``cli.main`` is invoked via the same code path as ``python -m featcopilot``."""
+    monkeypatch.setattr(sys, "argv", ["featcopilot", "info", "--json"])
+    rc = fc_cli.main(["info", "--json"])
+    assert rc == 0
+
+
+def test_dunder_main_subprocess_invocation():
+    """``python -m featcopilot info --json`` must succeed in a real subprocess.
+
+    Exercises ``featcopilot/__main__.py`` end-to-end so a regression in
+    module-form invocation (e.g. a broken import path) actually breaks the
+    test, not just the unit-level call to ``cli.main``.
+    """
+    import subprocess
+
+    result = subprocess.run(
+        [sys.executable, "-m", "featcopilot", "info", "--json"],
+        capture_output=True,
+        text=True,
+        timeout=60,
+        check=False,
+    )
+    assert result.returncode == 0, result.stderr
+    payload = json.loads(result.stdout)
+    assert payload["version"] == __version__
+    assert "tabular" in payload["supported_engines"]
+
+
+def test_dunder_main_subprocess_version_flag():
+    """``python -m featcopilot --version`` must print and exit 0."""
+    import subprocess
+
+    result = subprocess.run(
+        [sys.executable, "-m", "featcopilot", "--version"],
+        capture_output=True,
+        text=True,
+        timeout=30,
+        check=False,
+    )
+    assert result.returncode == 0, result.stderr
+    assert __version__ in result.stdout
+
+
+# ------------------------------------------------------- console script
+
+
+def _featcopilot_package_is_installed() -> bool:
+    """Return True iff the ``featcopilot`` distribution is installed in the
+    current environment (i.e. the entry-point machinery should have placed
+    the console script on ``PATH``).
+
+    Used by the console-script tests to distinguish two cases:
+
+    * Running tests directly against the source tree (``python -m pytest``
+      from a clean checkout, no ``pip install -e .``): the package is
+      *not* installed; the script is legitimately missing and the test
+      should ``skip`` rather than report a packaging bug.
+    * Running tests after ``pip install`` (the CI flow): the package IS
+      installed, so the script MUST be on ``PATH``. If it isn't, that's a
+      real ``[project.scripts]`` regression and the test should ``fail``,
+      not silently pass via skip.
+    """
+    try:
+        from importlib.metadata import PackageNotFoundError, distribution
+    except ImportError:  # pragma: no cover - py3.10+ always has this
+        return False
+    try:
+        distribution("featcopilot")
+    except PackageNotFoundError:
+        return False
+    return True
+
+
+def test_console_script_subprocess_invocation():
+    """The installed ``featcopilot`` console script must be on PATH and runnable.
+
+    Exercises the ``[project.scripts] featcopilot = "featcopilot.cli:main"``
+    entry point end-to-end so a typo or packaging regression in
+    ``pyproject.toml`` would actually break the suite. When the
+    ``featcopilot`` distribution is installed, the script must be on
+    ``PATH``: a missing script in that case is a real packaging
+    regression, not a test environment quirk, so we ``fail`` (not
+    ``skip``). The skip is reserved for the rare case of running tests
+    against an un-installed source tree.
+    """
+    import shutil
+    import subprocess
+
+    script = shutil.which("featcopilot")
+    if script is None:
+        if _featcopilot_package_is_installed():
+            pytest.fail(
+                "featcopilot package is installed but the `featcopilot` console "
+                "script is missing from PATH. This is a `[project.scripts]` "
+                "regression in pyproject.toml."
+            )
+        pytest.skip(
+            "featcopilot package is not installed in this environment; install "
+            "it with `pip install -e .` to exercise the console-script entry point."
+        )
+
+    result = subprocess.run(
+        [script, "info", "--json"],
+        capture_output=True,
+        text=True,
+        timeout=60,
+        check=False,
+    )
+    assert result.returncode == 0, result.stderr
+    payload = json.loads(result.stdout)
+    assert payload["version"] == __version__
+    assert "tabular" in payload["supported_engines"]
+
+
+def test_console_script_version_flag():
+    """Same install-aware skip/fail policy as
+    :func:`test_console_script_subprocess_invocation`.
+    """
+    import shutil
+    import subprocess
+
+    script = shutil.which("featcopilot")
+    if script is None:
+        if _featcopilot_package_is_installed():
+            pytest.fail(
+                "featcopilot package is installed but the `featcopilot` console "
+                "script is missing from PATH. This is a `[project.scripts]` "
+                "regression in pyproject.toml."
+            )
+        pytest.skip("featcopilot package is not installed in this environment.")
+
+    result = subprocess.run(
+        [script, "--version"],
+        capture_output=True,
+        text=True,
+        timeout=30,
+        check=False,
+    )
+    assert result.returncode == 0, result.stderr
+    assert __version__ in result.stdout