From b1765779a10056c3e001a4e339af067dcc9ee0dc Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Wed, 13 May 2026 00:53:42 -0400 Subject: [PATCH 1/2] refactor: rename precompile surface to preprocess --- README.md | 5 +- docs/DescriptionAndMilestones.md | 2 +- docs/llm-preprocessor.md | 3 +- .../integrations/litellm/with_preprocessor.py | 32 +++---- ...compiler_precall_hook_with_preprocessor.py | 24 ++--- .../open_webui_pipe_with_preprocessor.py | 46 +++++----- experimental/preprocessor/README.md | 15 ++-- experimental/preprocessor/__init__.py | 32 ++++--- experimental/preprocessor/constants.py | 10 +-- .../preprocessor/heuristic_precompiler.py | 7 -- .../preprocessor/heuristic_preprocessor.py | 56 ++++++------ .../preprocessor/output_validation.py | 56 ++++-------- src/context_compiler/repl.py | 14 +-- .../fixtures/preprocessor/public-api-v1.json | 17 ++-- tests/test_litellm_checkpoint_integration.py | 52 +++++------ tests/test_litellm_integration_error_paths.py | 10 +-- .../test_litellm_preprocessor_model_config.py | 18 ++-- tests/test_openwebui_preprocessor_pipe.py | 90 +++++++++---------- tests/test_preprocessor_conformance.py | 12 +-- tests/test_preprocessor_heuristic.py | 52 +++++------ .../test_preprocessor_heuristic_properties.py | 52 +++++------ tests/test_preprocessor_output_validation.py | 50 ++++------- .../test_preprocessor_validator_properties.py | 6 +- tests/test_repl.py | 48 ++++------ 24 files changed, 319 insertions(+), 390 deletions(-) delete mode 100644 experimental/preprocessor/heuristic_precompiler.py diff --git a/README.md b/README.md index ce729af..8145c1d 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,6 @@ context-compiler --with-preprocessor (heuristic + validation only). Near-miss inputs are not rewritten and are passed through to the engine, which continues to return clarify behavior for those forms. -`--with-precompiler` remains supported as a compatibility alias in 0.6.x. Or in code: ```python @@ -368,9 +367,7 @@ into canonical directives before compilation. It is designed to be conservative and must be used with validation: - reject-first; directive-adjacent unsafe forms abstain instead of rewriting -- all outputs must be validated with `parse_preprocessor_output(...)` (the - preferred preprocessor validation function; `parse_precompiler_output(...)` - remains as a compatibility alias) +- all outputs must be validated with `parse_preprocessor_output(...)` - no directive grammar expansion - raw outputs must not be passed directly to the compiler diff --git a/docs/DescriptionAndMilestones.md b/docs/DescriptionAndMilestones.md index a0811de..2bf1f90 100644 --- a/docs/DescriptionAndMilestones.md +++ b/docs/DescriptionAndMilestones.md @@ -116,7 +116,7 @@ Make engine behavior inspectable and externally controllable without guessing. - `--initial-state-file` - REPL LLM fallback as explicit optional mode: - `--with-llm-fallback` - - requires `--with-precompiler` + - requires `--with-preprocessor` - never implicit - inspectable via preview / JSON output - Explicit preprocessor policy for multi-line, multi-sentence, and conversational-prefix input diff --git a/docs/llm-preprocessor.md b/docs/llm-preprocessor.md index 5172117..25b08b0 100644 --- a/docs/llm-preprocessor.md +++ b/docs/llm-preprocessor.md @@ -25,8 +25,7 @@ Recommended conceptual flow: Otherwise pass the original input unchanged. All preprocessor outputs, including heuristic outputs, must be validated with -`parse_preprocessor_output(...)` (preferred; `parse_precompiler_output(...)` -remains as a compatibility alias) before being applied. +`parse_preprocessor_output(...)` before being applied. Raw heuristic/LLM outputs must not be passed directly to the compiler. diff --git a/examples/integrations/litellm/with_preprocessor.py b/examples/integrations/litellm/with_preprocessor.py index b250a37..34ae484 100644 --- a/examples/integrations/litellm/with_preprocessor.py +++ b/examples/integrations/litellm/with_preprocessor.py @@ -27,9 +27,9 @@ from context_compiler import State, get_policy_items, get_premise_value from context_compiler.engine import Engine from experimental.preprocessor import ( - PRECOMPILE_OUTCOME_DIRECTIVE, + PREPROCESS_OUTCOME_DIRECTIVE, parse_preprocessor_output, - precompile_heuristic, + preprocess_heuristic, render_prompt, ) @@ -197,7 +197,7 @@ def _prompt_file_path() -> Traversable: return _PROMPTS_DIR.joinpath("default.txt") -def _llm_fallback_precompile(message: str, state: State) -> str | None: +def _llm_fallback_preprocess(message: str, state: State) -> str | None: with as_file(_prompt_file_path()) as prompt_path: prompt = render_prompt(prompt_path, state) if prompt is None: @@ -242,13 +242,13 @@ def _llm_fallback_precompile(message: str, state: State) -> str | None: return parsed -def _precompile_user_input(message: str, state: State) -> str | None: +def _preprocess_user_input(message: str, state: State) -> str | None: # Heuristic first (fast + high precision), then optional LLM fallback. try: - heuristic_result = precompile_heuristic(message) + heuristic_result = preprocess_heuristic(message) logger.debug("preprocessor: heuristic_outcome=%s", heuristic_result["outcome"]) if ( - heuristic_result["outcome"] == PRECOMPILE_OUTCOME_DIRECTIVE + heuristic_result["outcome"] == PREPROCESS_OUTCOME_DIRECTIVE and heuristic_result["directive"] ): parsed = parse_preprocessor_output(heuristic_result["directive"]) @@ -262,7 +262,7 @@ def _precompile_user_input(message: str, state: State) -> str | None: return None try: - fallback_directive = _llm_fallback_precompile(message, state) + fallback_directive = _llm_fallback_preprocess(message, state) logger.debug("preprocessor: fallback_directive=%r", fallback_directive) return fallback_directive except Exception: @@ -427,15 +427,15 @@ def handle_turn(user_input: str, engine: Engine, *, session_key: str | None = No _restore_session_checkpoint_if_needed(engine, session_key) checkpoint_before = engine.export_checkpoint() pending_before = checkpoint_before.get("pending") - precompiled: str | None = None + preprocessd: str | None = None if pending_before is not None: compile_input = user_input else: - precompiled = _precompile_user_input(user_input, engine.state) - compile_input = precompiled if precompiled else user_input + preprocessd = _preprocess_user_input(user_input, engine.state) + compile_input = preprocessd if preprocessd else user_input logger.debug( "preprocessor: engine_input=%s", - "directive" if precompiled else f"user_input len={len(user_input)}", + "directive" if preprocessd else f"user_input len={len(user_input)}", ) decision = engine.step(compile_input) @@ -450,7 +450,7 @@ def handle_turn(user_input: str, engine: Engine, *, session_key: str | None = No response_text, original_input=user_input, compiler_input=compile_input, - preprocessor_output=precompiled, + preprocessor_output=preprocessd, decision=decision, state_before=checkpoint_before.get("authoritative_state"), state_after=engine.state, @@ -461,7 +461,7 @@ def handle_turn(user_input: str, engine: Engine, *, session_key: str | None = No near_miss_prompt, original_input=user_input, compiler_input=compile_input, - preprocessor_output=precompiled, + preprocessor_output=preprocessd, decision={"kind": "clarify", "prompt_to_user": near_miss_prompt}, state_before=checkpoint_before.get("authoritative_state"), state_after=engine.state, @@ -474,7 +474,7 @@ def handle_turn(user_input: str, engine: Engine, *, session_key: str | None = No response_text, original_input=user_input, compiler_input=compile_input, - preprocessor_output=precompiled, + preprocessor_output=preprocessd, decision=decision, state_before=checkpoint_before.get("authoritative_state"), state_after=engine.state, @@ -486,7 +486,7 @@ def handle_turn(user_input: str, engine: Engine, *, session_key: str | None = No response_text, original_input=user_input, compiler_input=compile_input, - preprocessor_output=precompiled, + preprocessor_output=preprocessd, decision=decision, state_before=checkpoint_before.get("authoritative_state"), state_after=engine.state, @@ -500,7 +500,7 @@ def handle_turn(user_input: str, engine: Engine, *, session_key: str | None = No response_text, original_input=user_input, compiler_input=compile_input, - preprocessor_output=precompiled, + preprocessor_output=preprocessd, decision=decision, state_before=checkpoint_before.get("authoritative_state"), state_after=compiled_state, diff --git a/examples/integrations/litellm_proxy/context_compiler_precall_hook_with_preprocessor.py b/examples/integrations/litellm_proxy/context_compiler_precall_hook_with_preprocessor.py index 27acbcc..b3e4b23 100644 --- a/examples/integrations/litellm_proxy/context_compiler_precall_hook_with_preprocessor.py +++ b/examples/integrations/litellm_proxy/context_compiler_precall_hook_with_preprocessor.py @@ -33,9 +33,9 @@ class CustomLogger: # type: ignore[no-redef] get_premise_value, ) from experimental.preprocessor import ( - PRECOMPILE_OUTCOME_DIRECTIVE, + PREPROCESS_OUTCOME_DIRECTIVE, parse_preprocessor_output, - precompile_heuristic, + preprocess_heuristic, render_prompt, ) @@ -140,7 +140,7 @@ def _get_litellm_completion() -> Callable[..., object]: return cast(Callable[..., object], litellm_module.completion) -def _llm_fallback_precompile(message: str, state: State) -> str | None: +def _llm_fallback_preprocess(message: str, state: State) -> str | None: with as_file(_prompt_file_path()) as prompt_path: prompt = render_prompt(prompt_path, state) if prompt is None: @@ -196,11 +196,11 @@ def _state_before_last_message(user_transcript: Transcript) -> State | None: return replay["state"] -def _precompile_last_user_message(message: str, state: State | None) -> str | None: +def _preprocess_last_user_message(message: str, state: State | None) -> str | None: try: - heuristic_result = precompile_heuristic(message) + heuristic_result = preprocess_heuristic(message) if ( - heuristic_result["outcome"] == PRECOMPILE_OUTCOME_DIRECTIVE + heuristic_result["outcome"] == PREPROCESS_OUTCOME_DIRECTIVE and heuristic_result["directive"] ): parsed = parse_preprocessor_output(heuristic_result["directive"]) @@ -213,7 +213,7 @@ def _precompile_last_user_message(message: str, state: State | None) -> str | No return None try: - return _llm_fallback_precompile(message, state) + return _llm_fallback_preprocess(message, state) except Exception: logger.debug("litellm_proxy: fallback_exception", exc_info=True) return None @@ -240,16 +240,16 @@ async def async_pre_call_hook( transcript_for_replay = user_transcript replaced_last_user_message = False - precompiled: str | None = None + preprocessd: str | None = None if user_transcript: last_user_content = cast(str, user_transcript[-1]["content"]) prior_state = _state_before_last_message(user_transcript) - precompiled = _precompile_last_user_message(last_user_content, prior_state) - logger.debug("litellm_proxy: precompiled=%r", precompiled) - if precompiled: + preprocessd = _preprocess_last_user_message(last_user_content, prior_state) + logger.debug("litellm_proxy: preprocessd=%r", preprocessd) + if preprocessd: transcript_for_replay = [*user_transcript] - transcript_for_replay[-1] = {"role": "user", "content": precompiled} + transcript_for_replay[-1] = {"role": "user", "content": preprocessd} replaced_last_user_message = True logger.debug("litellm_proxy: replaced_last_user_message=%s", replaced_last_user_message) diff --git a/examples/integrations/openwebui/open_webui_pipe_with_preprocessor.py b/examples/integrations/openwebui/open_webui_pipe_with_preprocessor.py index d943a28..b7c3331 100644 --- a/examples/integrations/openwebui/open_webui_pipe_with_preprocessor.py +++ b/examples/integrations/openwebui/open_webui_pipe_with_preprocessor.py @@ -50,9 +50,9 @@ def Field(*, default: Any, description: str = "") -> Any: # type: ignore[no-red from context_compiler import State, create_engine, get_policy_items, get_premise_value from context_compiler.engine import Engine from experimental.preprocessor import ( - PRECOMPILE_OUTCOME_DIRECTIVE, + PREPROCESS_OUTCOME_DIRECTIVE, parse_preprocessor_output, - precompile_heuristic, + preprocess_heuristic, render_prompt, ) @@ -629,7 +629,7 @@ async def _validate_configured_model_ids( ) return None - async def _llm_fallback_precompile( + async def _llm_fallback_preprocess( self, message: str, state: State, @@ -676,7 +676,7 @@ async def _llm_fallback_precompile( return None, None return parsed, None - async def _precompile_user_input( + async def _preprocess_user_input( self, message: str, state: State, @@ -688,10 +688,10 @@ async def _precompile_user_input( ) -> tuple[str | None, str | None]: # Heuristic first for precision, determinism, and low latency. # If heuristic does not produce a directive, try Open WebUI-native fallback. - heuristic_result = precompile_heuristic(message) + heuristic_result = preprocess_heuristic(message) if ( - heuristic_result["outcome"] == PRECOMPILE_OUTCOME_DIRECTIVE + heuristic_result["outcome"] == PREPROCESS_OUTCOME_DIRECTIVE and heuristic_result["directive"] ): parsed = parse_preprocessor_output(heuristic_result["directive"]) @@ -702,12 +702,12 @@ async def _precompile_user_input( return None, None # In debug mode with missing base/preprocessor model ids, skip fallback - # precompile entirely so we never attempt an empty-model LLM call. + # preprocess entirely so we never attempt an empty-model LLM call. model_id = _normalize_model_id(model_id) if model_id is None: return None, None - return await self._llm_fallback_precompile( + return await self._llm_fallback_preprocess( message, state, request=request, @@ -816,7 +816,7 @@ async def pipe( ) -> Any: # Open WebUI integration entrypoint: # 1) extract latest user input - # 2) run precompile (heuristic -> LLM fallback) + # 2) run preprocess (heuristic -> LLM fallback) # 3) pass directive or original input to engine.step(...) # 4) map decision back to Open WebUI response behavior raw_messages = body.get("messages") @@ -882,10 +882,10 @@ async def pipe( checkpoint_before = engine.export_checkpoint() pending_before = checkpoint_before.get("pending") - precompiled: str | None = None - precompile_error: str | None = None + preprocessd: str | None = None + preprocess_error: str | None = None if pending_before is None: - precompiled, precompile_error = await self._precompile_user_input( + preprocessd, preprocess_error = await self._preprocess_user_input( latest_user_text, engine.state, request=__request__, @@ -893,13 +893,13 @@ async def pipe( prompt_profile=self.valves.PREPROCESSOR_PROMPT_PROFILE, model_id=effective_preprocessor_model, ) - if precompile_error is not None: - return precompile_error + if preprocess_error is not None: + return preprocess_error - logger.debug("preprocessor: precompiled=%r", precompiled) - # Preserve core behavior: if precompile yields no directive, use raw user + logger.debug("preprocessor: preprocessd=%r", preprocessd) + # Preserve core behavior: if preprocess yields no directive, use raw user # text so the compiler still decides clarify/passthrough/update. - compile_input = precompiled if precompiled is not None else latest_user_text + compile_input = preprocessd if preprocessd is not None else latest_user_text logger.debug("preprocessor: engine_input=%r", compile_input) decision = engine.step(compile_input) @@ -920,7 +920,7 @@ async def pipe( decision=decision, state_before=state_before, state_after=state_after, - preprocessor_output=precompiled, + preprocessor_output=preprocessd, llm_called=False, ) if near_miss_prompt is not None and kind == "passthrough": @@ -931,7 +931,7 @@ async def pipe( decision={"kind": "clarify", "prompt_to_user": near_miss_prompt}, state_before=state_before, state_after=state_after, - preprocessor_output=precompiled, + preprocessor_output=preprocessd, llm_called=False, ) if kind == "passthrough": @@ -948,7 +948,7 @@ async def pipe( decision=decision, state_before=state_before, state_after=state_after, - preprocessor_output=precompiled, + preprocessor_output=preprocessd, llm_called=base_model_id is not None, ) if kind == "update": @@ -961,7 +961,7 @@ async def pipe( decision=decision, state_before=state_before, state_after=state_after, - preprocessor_output=precompiled, + preprocessor_output=preprocessd, llm_called=False, ) response = await self._forward_update( @@ -978,7 +978,7 @@ async def pipe( decision=decision, state_before=state_before, state_after=state_after, - preprocessor_output=precompiled, + preprocessor_output=preprocessd, llm_called=base_model_id is not None, state_injected=_active_state_summary(state_after), ) @@ -996,6 +996,6 @@ async def pipe( decision=decision, state_before=state_before, state_after=state_after, - preprocessor_output=precompiled, + preprocessor_output=preprocessd, llm_called=base_model_id is not None, ) diff --git a/experimental/preprocessor/README.md b/experimental/preprocessor/README.md index d46de13..5f80724 100644 --- a/experimental/preprocessor/README.md +++ b/experimental/preprocessor/README.md @@ -12,14 +12,11 @@ Integrations should import this package from the installed environment rather than using repo-relative preprocessor paths. Compatibility note: -- Prefer `heuristic_preprocessor.py` and `parse_preprocessor_output(...)`. -- `heuristic_precompiler.py` and `parse_precompiler_output(...)` remain - supported compatibility aliases in 0.6.x. +- Use `heuristic_preprocessor.py` and `parse_preprocessor_output(...)`. ## Modules - `heuristic_preprocessor.py`: conservative structural preprocessing pass. -- `heuristic_precompiler.py`: compatibility re-export for older imports. - `output_validation.py`: shared normalization/validation boundary. - `prompt_utils.py`: state-aware prompt rendering helper. - `constants.py`: shared protocol literals and directive validation patterns. @@ -31,12 +28,10 @@ Compatibility note: Public validator entry point: - `parse_preprocessor_output(raw_output: object, *, source_input: str | None = None) -> str | None` -- `parse_precompiler_output(raw_output: object, *, source_input: str | None = None) -> str | None` (compatibility alias) -- `validate_precompiler_output(raw_output: object, *, source_input: str | None = None) -> dict` +- `validate_preprocessor_output(raw_output: object, *, source_input: str | None = None) -> dict` All preprocessor outputs (heuristic or LLM) must be validated with -`parse_preprocessor_output(...)` (preferred; `parse_precompiler_output(...)` -remains as a compatibility alias) before being applied. +`parse_preprocessor_output(...)` before being applied. Classification contract: @@ -65,10 +60,10 @@ canonical directives accepted by the compiler. ## Safe usage pattern -1. Run `precompile_heuristic(message)`. +1. Run `preprocess_heuristic(message)`. 2. If a heuristic candidate directive exists, validate it with `parse_preprocessor_output(...)`. -3. If no valid directive was produced, run LLM fallback precompile. +3. If no valid directive was produced, run LLM fallback preprocess. 4. Validate fallback output with `parse_preprocessor_output(..., source_input=message)`. 5. If a valid directive is produced, pass it through a normal compiler input path. diff --git a/experimental/preprocessor/__init__.py b/experimental/preprocessor/__init__.py index d622f78..cf08253 100644 --- a/experimental/preprocessor/__init__.py +++ b/experimental/preprocessor/__init__.py @@ -1,30 +1,28 @@ """Experimental preprocessor integration helpers and prompts.""" from .constants import ( - PRECOMPILE_OUTCOME_DIRECTIVE, - PRECOMPILE_OUTCOME_NO_DIRECTIVE, - PRECOMPILE_OUTCOME_UNKNOWN, - PRECOMPILER_NO_DIRECTIVE_SENTINEL, - PrecompileOutcome, + PREPROCESS_OUTCOME_DIRECTIVE, + PREPROCESS_OUTCOME_NO_DIRECTIVE, + PREPROCESS_OUTCOME_UNKNOWN, + PREPROCESSOR_NO_DIRECTIVE_SENTINEL, + PreprocessOutcome, ) -from .heuristic_preprocessor import PrecompileResult, precompile_heuristic +from .heuristic_preprocessor import PreprocessResult, preprocess_heuristic from .output_validation import ( - parse_precompiler_output, parse_preprocessor_output, - validate_precompiler_output, + validate_preprocessor_output, ) from .prompt_utils import render_prompt __all__ = [ - "PRECOMPILE_OUTCOME_DIRECTIVE", - "PRECOMPILE_OUTCOME_NO_DIRECTIVE", - "PRECOMPILE_OUTCOME_UNKNOWN", - "PRECOMPILER_NO_DIRECTIVE_SENTINEL", - "PrecompileResult", - "PrecompileOutcome", + "PREPROCESS_OUTCOME_DIRECTIVE", + "PREPROCESS_OUTCOME_NO_DIRECTIVE", + "PREPROCESS_OUTCOME_UNKNOWN", + "PREPROCESSOR_NO_DIRECTIVE_SENTINEL", + "PreprocessResult", + "PreprocessOutcome", "parse_preprocessor_output", - "parse_precompiler_output", - "precompile_heuristic", + "preprocess_heuristic", "render_prompt", - "validate_precompiler_output", + "validate_preprocessor_output", ] diff --git a/experimental/preprocessor/constants.py b/experimental/preprocessor/constants.py index 7f8d62a..c4da269 100644 --- a/experimental/preprocessor/constants.py +++ b/experimental/preprocessor/constants.py @@ -3,12 +3,12 @@ import re from typing import Final, Literal -PRECOMPILER_NO_DIRECTIVE_SENTINEL: Final = "" +PREPROCESSOR_NO_DIRECTIVE_SENTINEL: Final = "" -PRECOMPILE_OUTCOME_DIRECTIVE: Final = "directive" -PRECOMPILE_OUTCOME_NO_DIRECTIVE: Final = "no_directive" -PRECOMPILE_OUTCOME_UNKNOWN: Final = "unknown" -PrecompileOutcome = Literal["directive", "no_directive", "unknown"] +PREPROCESS_OUTCOME_DIRECTIVE: Final = "directive" +PREPROCESS_OUTCOME_NO_DIRECTIVE: Final = "no_directive" +PREPROCESS_OUTCOME_UNKNOWN: Final = "unknown" +PreprocessOutcome = Literal["directive", "no_directive", "unknown"] PROMPT_TOKEN_NULL_OR_VALUE: Final = "" PROMPT_TOKEN_POLICY_SET: Final = "" diff --git a/experimental/preprocessor/heuristic_precompiler.py b/experimental/preprocessor/heuristic_precompiler.py deleted file mode 100644 index 2e7cd4f..0000000 --- a/experimental/preprocessor/heuristic_precompiler.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Compatibility shim for heuristic preprocessing module name. - -Prefer importing from ``experimental.preprocessor.heuristic_preprocessor``. -This module is kept to avoid breaking older imports. -""" - -from .heuristic_preprocessor import * # noqa: F403 diff --git a/experimental/preprocessor/heuristic_preprocessor.py b/experimental/preprocessor/heuristic_preprocessor.py index 8ec35f2..df34143 100644 --- a/experimental/preprocessor/heuristic_preprocessor.py +++ b/experimental/preprocessor/heuristic_preprocessor.py @@ -13,24 +13,24 @@ from .constants import ( CANONICAL_DIRECTIVE_EXACT, CANONICAL_DIRECTIVE_PATTERNS, - PRECOMPILE_OUTCOME_DIRECTIVE, - PRECOMPILE_OUTCOME_NO_DIRECTIVE, - PRECOMPILE_OUTCOME_UNKNOWN, - PrecompileOutcome, + PREPROCESS_OUTCOME_DIRECTIVE, + PREPROCESS_OUTCOME_NO_DIRECTIVE, + PREPROCESS_OUTCOME_UNKNOWN, + PreprocessOutcome, ) except ImportError: # pragma: no cover - direct module loading in tests/evals from experimental.preprocessor.constants import ( CANONICAL_DIRECTIVE_EXACT, CANONICAL_DIRECTIVE_PATTERNS, - PRECOMPILE_OUTCOME_DIRECTIVE, - PRECOMPILE_OUTCOME_NO_DIRECTIVE, - PRECOMPILE_OUTCOME_UNKNOWN, - PrecompileOutcome, + PREPROCESS_OUTCOME_DIRECTIVE, + PREPROCESS_OUTCOME_NO_DIRECTIVE, + PREPROCESS_OUTCOME_UNKNOWN, + PreprocessOutcome, ) -class PrecompileResult(TypedDict): - outcome: PrecompileOutcome +class PreprocessResult(TypedDict): + outcome: PreprocessOutcome directive: str | None rule_id: str | None @@ -140,14 +140,14 @@ def _is_quoted_or_backtick_wrapped(message: str) -> bool: return (stripped[0], stripped[-1]) in {('"', '"'), ("'", "'"), ("`", "`")} -def precompile_heuristic(message: str) -> PrecompileResult: +def preprocess_heuristic(message: str) -> PreprocessResult: """Run the conservative structural heuristic preprocessing pass. Args: message: Raw user text to evaluate as a possible directive. Returns: - A PrecompileResult with: + A PreprocessResult with: - outcome="directive" and a canonical directive string when matched - outcome="no_directive" when the heuristic abstains/rejects - outcome="unknown" when unresolved and LLM fallback may be attempted @@ -158,7 +158,7 @@ def precompile_heuristic(message: str) -> PrecompileResult: """ if _LIST_MARKER_PATTERN.match(message): return { - "outcome": PRECOMPILE_OUTCOME_UNKNOWN, + "outcome": PREPROCESS_OUTCOME_UNKNOWN, "directive": None, "rule_id": "reject.list_or_enumeration", } @@ -167,49 +167,49 @@ def precompile_heuristic(message: str) -> PrecompileResult: if "?" in message and _DIRECTIVE_CUE_PATTERN.search(normalized): return { - "outcome": PRECOMPILE_OUTCOME_UNKNOWN, + "outcome": PREPROCESS_OUTCOME_UNKNOWN, "directive": None, "rule_id": "reject.question_form", } if _META_PREFIX_PATTERN.match(normalized): return { - "outcome": PRECOMPILE_OUTCOME_UNKNOWN, + "outcome": PREPROCESS_OUTCOME_UNKNOWN, "directive": None, "rule_id": "reject.meta_or_reporting", } if _MULTI_SEGMENT_PATTERN.match(normalized): return { - "outcome": PRECOMPILE_OUTCOME_UNKNOWN, + "outcome": PREPROCESS_OUTCOME_UNKNOWN, "directive": None, "rule_id": "reject.multi_segment_or_mixed_prose", } if normalized in _MULTI_INSTRUCTION_CASES: return { - "outcome": PRECOMPILE_OUTCOME_UNKNOWN, + "outcome": PREPROCESS_OUTCOME_UNKNOWN, "directive": None, "rule_id": "reject.multi_instruction", } if _contains_reporting_bracket_mention(message): return { - "outcome": PRECOMPILE_OUTCOME_UNKNOWN, + "outcome": PREPROCESS_OUTCOME_UNKNOWN, "directive": None, "rule_id": "reject.quoted_reported_bracket", } if _is_quoted_or_backtick_wrapped(message): return { - "outcome": PRECOMPILE_OUTCOME_UNKNOWN, + "outcome": PREPROCESS_OUTCOME_UNKNOWN, "directive": None, "rule_id": "reject.quoted_exact", } if normalized in _QUOTED_OR_REPORTED_CASES: return { - "outcome": PRECOMPILE_OUTCOME_UNKNOWN, + "outcome": PREPROCESS_OUTCOME_UNKNOWN, "directive": None, "rule_id": "reject.quoted_reported", } @@ -218,14 +218,14 @@ def precompile_heuristic(message: str) -> PrecompileResult: if normalized in _NEAR_MISS_ALIAS_CASES: return { - "outcome": PRECOMPILE_OUTCOME_UNKNOWN, + "outcome": PREPROCESS_OUTCOME_UNKNOWN, "directive": None, "rule_id": "reject.near_miss_alias", } if normalized in _ADMIN_NEAR_MISS_CASES: return { - "outcome": PRECOMPILE_OUTCOME_UNKNOWN, + "outcome": PREPROCESS_OUTCOME_UNKNOWN, "directive": None, "rule_id": "reject.admin_near_miss_alias", } @@ -235,21 +235,21 @@ def precompile_heuristic(message: str) -> PrecompileResult: and " instead of " not in normalized_candidate ) or (" in stead of " in normalized_candidate): return { - "outcome": PRECOMPILE_OUTCOME_UNKNOWN, + "outcome": PREPROCESS_OUTCOME_UNKNOWN, "directive": None, "rule_id": "reject.malformed_replacement_syntax", } if _MULTI_CANDIDATE_DIRECTIVE_PATTERN.search(normalized_candidate): return { - "outcome": PRECOMPILE_OUTCOME_UNKNOWN, + "outcome": PREPROCESS_OUTCOME_UNKNOWN, "directive": None, "rule_id": "reject.multi_candidate_directive", } if normalized_candidate in CANONICAL_DIRECTIVE_EXACT: return { - "outcome": PRECOMPILE_OUTCOME_DIRECTIVE, + "outcome": PREPROCESS_OUTCOME_DIRECTIVE, "directive": normalized_candidate, "rule_id": "canonical.full_match", } @@ -257,20 +257,20 @@ def precompile_heuristic(message: str) -> PrecompileResult: for pattern in CANONICAL_DIRECTIVE_PATTERNS: if pattern.fullmatch(normalized_candidate): return { - "outcome": PRECOMPILE_OUTCOME_DIRECTIVE, + "outcome": PREPROCESS_OUTCOME_DIRECTIVE, "directive": normalized_candidate, "rule_id": "canonical.full_match", } if _DIRECTIVE_CUE_PATTERN.search(normalized_candidate): return { - "outcome": PRECOMPILE_OUTCOME_UNKNOWN, + "outcome": PREPROCESS_OUTCOME_UNKNOWN, "directive": None, "rule_id": "reject.directive_adjacent_unsafe", } return { - "outcome": PRECOMPILE_OUTCOME_NO_DIRECTIVE, + "outcome": PREPROCESS_OUTCOME_NO_DIRECTIVE, "directive": None, "rule_id": "reject.confident_non_directive", } diff --git a/experimental/preprocessor/output_validation.py b/experimental/preprocessor/output_validation.py index b0bb646..73602f4 100644 --- a/experimental/preprocessor/output_validation.py +++ b/experimental/preprocessor/output_validation.py @@ -2,8 +2,7 @@ Public API: - parse_preprocessor_output -- parse_precompiler_output (compatibility alias) -- validate_precompiler_output +- validate_preprocessor_output Internal helpers are implementation details and may change. """ @@ -15,19 +14,18 @@ from .constants import ( CANONICAL_DIRECTIVE_EXACT, CANONICAL_DIRECTIVE_PATTERNS, - PRECOMPILER_NO_DIRECTIVE_SENTINEL, - PrecompileOutcome, + PREPROCESSOR_NO_DIRECTIVE_SENTINEL, + PreprocessOutcome, ) __all__ = [ "parse_preprocessor_output", - "parse_precompiler_output", - "validate_precompiler_output", + "validate_preprocessor_output", ] -class PrecompilerValidationResult(TypedDict): - classification: PrecompileOutcome +class PreprocessorValidationResult(TypedDict): + classification: PreprocessOutcome output: str | None @@ -39,15 +37,15 @@ class PrecompilerValidationResult(TypedDict): _CHANGE_PREMISE_MISSING_TO_NEAR_MISS_PATTERN = re.compile(r"^change premise\s+(?!to\b)(.+\S)\s*$") -def _unknown() -> PrecompilerValidationResult: +def _unknown() -> PreprocessorValidationResult: return {"classification": "unknown", "output": None} -def _directive(output: str) -> PrecompilerValidationResult: +def _directive(output: str) -> PreprocessorValidationResult: return {"classification": "directive", "output": output} -def _no_directive() -> PrecompilerValidationResult: +def _no_directive() -> PreprocessorValidationResult: return {"classification": "no_directive", "output": None} @@ -82,7 +80,7 @@ def _is_safe_fallback_directive_rewrite(source_input: str, directive_output: str return True -def _validate_structured_output(raw_output: object) -> PrecompilerValidationResult: +def _validate_structured_output(raw_output: object) -> PreprocessorValidationResult: if not isinstance(raw_output, dict): return _unknown() @@ -119,12 +117,12 @@ def _validate_structured_output(raw_output: object) -> PrecompilerValidationResu return _unknown() -def _validate_text_output(raw_output: str) -> PrecompilerValidationResult: +def _validate_text_output(raw_output: str) -> PreprocessorValidationResult: stripped = raw_output.strip() if not stripped: return _unknown() - if stripped.upper() == PRECOMPILER_NO_DIRECTIVE_SENTINEL: + if stripped.upper() == PREPROCESSOR_NO_DIRECTIVE_SENTINEL: return _no_directive() if _contains_multiple_candidate_directives(stripped): @@ -143,9 +141,9 @@ def _validate_text_output(raw_output: str) -> PrecompilerValidationResult: return _unknown() -def validate_precompiler_output( +def validate_preprocessor_output( raw_output: object, *, source_input: str | None = None -) -> PrecompilerValidationResult: +) -> PreprocessorValidationResult: """Validate raw preprocessor output into a strict classification/output result. Contract: @@ -169,29 +167,9 @@ def validate_precompiler_output( return validated -def parse_precompiler_output(raw_output: object, *, source_input: str | None = None) -> str | None: - """Compatibility wrapper returning only validated directive output. - - Args: - raw_output: Raw value produced by heuristic or LLM preprocessing. - - Returns: - Canonical directive string when valid, else None. - - Notes: - This is the public validation boundary. Preprocessor outputs must be - passed through this function before being applied to compiler paths. - """ - validated = validate_precompiler_output(raw_output, source_input=source_input) +def parse_preprocessor_output(raw_output: object, *, source_input: str | None = None) -> str | None: + """Public validation boundary returning only validated directive output.""" + validated = validate_preprocessor_output(raw_output, source_input=source_input) if validated["classification"] == "directive": return validated["output"] return None - - -def parse_preprocessor_output(raw_output: object, *, source_input: str | None = None) -> str | None: - """Preferred name for the preprocessor validation boundary. - - Backward compatibility note: - ``parse_precompiler_output`` remains supported as an alias. - """ - return parse_precompiler_output(raw_output, source_input=source_input) diff --git a/src/context_compiler/repl.py b/src/context_compiler/repl.py index fc7eab7..c3603dd 100644 --- a/src/context_compiler/repl.py +++ b/src/context_compiler/repl.py @@ -102,8 +102,8 @@ def _has_pending_clarification(engine: Engine) -> bool: return checkpoint["pending"] is not None -def _compile_input(raw_input: str, engine: Engine, *, use_precompiler: bool) -> str: - if not use_precompiler: +def _compile_input(raw_input: str, engine: Engine, *, use_preprocessor: bool) -> str: + if not use_preprocessor: return raw_input if _has_pending_clarification(engine): return raw_input @@ -111,7 +111,7 @@ def _compile_input(raw_input: str, engine: Engine, *, use_precompiler: bool) -> return parsed if parsed is not None else raw_input -def run_repl(in_stream: TextIO, out_stream: TextIO, *, use_precompiler: bool = False) -> None: +def run_repl(in_stream: TextIO, out_stream: TextIO, *, use_preprocessor: bool = False) -> None: engine = create_engine() if _is_interactive(in_stream, out_stream): @@ -135,7 +135,7 @@ def run_repl(in_stream: TextIO, out_stream: TextIO, *, use_precompiler: bool = F _print_interactive_help(out_stream) continue - compile_input = _compile_input(user_input, engine, use_precompiler=use_precompiler) + compile_input = _compile_input(user_input, engine, use_preprocessor=use_preprocessor) decision = engine.step(compile_input) _print_decision_lines(decision, out_stream, leading_blank=True) return @@ -147,7 +147,7 @@ def run_repl(in_stream: TextIO, out_stream: TextIO, *, use_precompiler: bool = F user_input = line.rstrip("\n") if user_input.strip().lower() in _EXIT_TOKENS: return - compile_input = _compile_input(user_input, engine, use_precompiler=use_precompiler) + compile_input = _compile_input(user_input, engine, use_preprocessor=use_preprocessor) decision = engine.step(compile_input) _print_decision_lines(decision, out_stream, leading_blank=False) @@ -166,8 +166,8 @@ def main() -> int: # pragma: no cover print(__version__, file=sys.stdout) return 0 - if args in (["--with-preprocessor"], ["--with-precompiler"]): - run_repl(sys.stdin, sys.stdout, use_precompiler=True) + if args == ["--with-preprocessor"]: + run_repl(sys.stdin, sys.stdout, use_preprocessor=True) return 0 bad_arg = args[0] diff --git a/tests/fixtures/preprocessor/public-api-v1.json b/tests/fixtures/preprocessor/public-api-v1.json index c314bdb..4067ea1 100644 --- a/tests/fixtures/preprocessor/public-api-v1.json +++ b/tests/fixtures/preprocessor/public-api-v1.json @@ -3,16 +3,15 @@ "kind": "api-contract", "module": "experimental.preprocessor", "required_exports": [ - "PRECOMPILE_OUTCOME_DIRECTIVE", - "PRECOMPILE_OUTCOME_NO_DIRECTIVE", - "PRECOMPILE_OUTCOME_UNKNOWN", - "PRECOMPILER_NO_DIRECTIVE_SENTINEL", - "PrecompileResult", - "PrecompileOutcome", + "PREPROCESS_OUTCOME_DIRECTIVE", + "PREPROCESS_OUTCOME_NO_DIRECTIVE", + "PREPROCESS_OUTCOME_UNKNOWN", + "PREPROCESSOR_NO_DIRECTIVE_SENTINEL", + "PreprocessResult", + "PreprocessOutcome", "parse_preprocessor_output", - "parse_precompiler_output", - "precompile_heuristic", + "preprocess_heuristic", "render_prompt", - "validate_precompiler_output" + "validate_preprocessor_output" ] } diff --git a/tests/test_litellm_checkpoint_integration.py b/tests/test_litellm_checkpoint_integration.py index ca96074..1ab043c 100644 --- a/tests/test_litellm_checkpoint_integration.py +++ b/tests/test_litellm_checkpoint_integration.py @@ -73,8 +73,8 @@ def _track_litellm(_messages: list[dict[str, str]]) -> str: return "ok" module._call_litellm = _track_litellm - if hasattr(module, "_precompile_user_input"): - module._precompile_user_input = lambda _text, _state: None + if hasattr(module, "_preprocess_user_input"): + module._preprocess_user_input = lambda _text, _state: None try: checkpoints["s1"] = "ckpt-in" @@ -137,7 +137,7 @@ def test_litellm_with_preprocessor_checkpoint_restore_and_persist_points() -> No ("no", "State unchanged."), ], ) -def test_litellm_with_preprocessor_bypasses_precompile_while_pending( +def test_litellm_with_preprocessor_bypasses_preprocess_while_pending( confirmation: str, expected_response: str ) -> None: module = _load_module( @@ -175,10 +175,10 @@ def step(self, text: str) -> dict[str, object]: return {"kind": "passthrough", "state": None} call_litellm = cast(Any, module._call_litellm) - precompile_user_input = cast(Any, module._precompile_user_input) + preprocess_user_input = cast(Any, module._preprocess_user_input) - def _fail_precompile(_text: str, _state: dict[str, object]) -> None: - raise AssertionError("should not precompile") + def _fail_preprocess(_text: str, _state: dict[str, object]) -> None: + raise AssertionError("should not preprocess") litellm_calls = 0 @@ -188,13 +188,13 @@ def _track_litellm(_messages: list[dict[str, str]]) -> str: return "ok" module._call_litellm = _track_litellm - module._precompile_user_input = _fail_precompile + module._preprocess_user_input = _fail_preprocess try: engine = _PendingEngine() result = module.handle_turn(confirmation, engine) finally: module._call_litellm = call_litellm - module._precompile_user_input = precompile_user_input + module._preprocess_user_input = preprocess_user_input assert result == expected_response assert engine.step_inputs == [confirmation] @@ -221,16 +221,16 @@ def test_litellm_with_preprocessor_checkpoint_resume_yes_no_end_to_end( restored.clear() call_litellm = cast(Any, module._call_litellm) - precompile_user_input = cast(Any, module._precompile_user_input) + preprocess_user_input = cast(Any, module._preprocess_user_input) - precompile_inputs: list[str] = [] + preprocess_inputs: list[str] = [] - def _precompile_before_pending(text: str, _state: dict[str, object]) -> None: - precompile_inputs.append(text) + def _preprocess_before_pending(text: str, _state: dict[str, object]) -> None: + preprocess_inputs.append(text) return None - def _fail_precompile(_text: str, _state: dict[str, object]) -> None: - raise AssertionError("precompile should be bypassed while pending is restored") + def _fail_preprocess(_text: str, _state: dict[str, object]) -> None: + raise AssertionError("preprocess should be bypassed while pending is restored") litellm_calls = 0 @@ -240,7 +240,7 @@ def _track_litellm(_messages: list[dict[str, str]]) -> str: return "ok" module._call_litellm = _track_litellm - module._precompile_user_input = _precompile_before_pending + module._preprocess_user_input = _preprocess_before_pending session_key = "resume-e2e" try: @@ -251,10 +251,10 @@ def _track_litellm(_messages: list[dict[str, str]]) -> str: session_key=session_key, ) assert clarify == 'Did you mean to use "kubectl" instead?' - assert precompile_inputs == ["use kubectl instead of docker"] + assert preprocess_inputs == ["use kubectl instead of docker"] assert session_key in checkpoints - module._precompile_user_input = _fail_precompile + module._preprocess_user_input = _fail_preprocess second_engine = create_engine() resumed = module.handle_turn(confirmation, second_engine, session_key=session_key) assert resumed == expected_response @@ -268,7 +268,7 @@ def _track_litellm(_messages: list[dict[str, str]]) -> str: assert litellm_calls == 0 finally: module._call_litellm = call_litellm - module._precompile_user_input = precompile_user_input + module._preprocess_user_input = preprocess_user_input @pytest.mark.parametrize( @@ -490,8 +490,8 @@ def _track_litellm(_messages: list[dict[str, str]]) -> str: litellm_calls += 1 raise AssertionError("downstream model should not be called for update summaries") - if hasattr(module, "_llm_fallback_precompile"): - module._llm_fallback_precompile = lambda _message, _state: None + if hasattr(module, "_llm_fallback_preprocess"): + module._llm_fallback_preprocess = lambda _message, _state: None module._call_litellm = _track_litellm try: @@ -608,15 +608,15 @@ def _track_litellm(_messages: list[dict[str, str]]) -> str: fallback_calls = 0 fallback_original = None - if hasattr(module, "_llm_fallback_precompile"): - fallback_original = module._llm_fallback_precompile + if hasattr(module, "_llm_fallback_preprocess"): + fallback_original = module._llm_fallback_preprocess def _track_fallback(_message: str, _state: dict[str, object]) -> None: nonlocal fallback_calls fallback_calls += 1 raise AssertionError("fallback should not be called for near-miss directive input") - module._llm_fallback_precompile = _track_fallback + module._llm_fallback_preprocess = _track_fallback module._call_litellm = _track_litellm try: @@ -644,7 +644,7 @@ def _track_fallback(_message: str, _state: dict[str, object]) -> None: finally: module._call_litellm = call_litellm if fallback_original is not None: - module._llm_fallback_precompile = fallback_original + module._llm_fallback_preprocess = fallback_original assert litellm_calls == 0 @@ -685,7 +685,7 @@ def test_litellm_with_preprocessor_trace_on_includes_preprocessor_output() -> No ) module.SHOW_CONTEXT_COMPILER_TRACE = True module._call_litellm = lambda _messages: "ok" - module._precompile_user_input = lambda _text, _state: "prohibit peanuts" + module._preprocess_user_input = lambda _text, _state: "prohibit peanuts" engine = create_engine() result = module.handle_turn("please use docker", engine) @@ -703,7 +703,7 @@ def test_litellm_with_preprocessor_trace_on_passthrough_includes_trace() -> None ) module.SHOW_CONTEXT_COMPILER_TRACE = True module._call_litellm = lambda _messages: "ok" - module._precompile_user_input = lambda _text, _state: None + module._preprocess_user_input = lambda _text, _state: None engine = create_engine() result = module.handle_turn("hello", engine) diff --git a/tests/test_litellm_integration_error_paths.py b/tests/test_litellm_integration_error_paths.py index e1ba5d0..9382250 100644 --- a/tests/test_litellm_integration_error_paths.py +++ b/tests/test_litellm_integration_error_paths.py @@ -242,13 +242,13 @@ def _completion(**_: Any) -> dict[str, object]: def test_with_preprocessor_fallback_failure_preserves_basic_behavior(monkeypatch) -> None: module = _load_module("litellm_with_preproc_fallback_failure", LITELLM_WITH_PREPROC_PATH) - seen_precompile_inputs: list[str] = [] + seen_preprocess_inputs: list[str] = [] def _heuristic(_text: str) -> dict[str, object]: return {"outcome": "no_directive", "directive": None} def _fallback(_message: str, _state: dict[str, object]) -> str | None: - seen_precompile_inputs.append("called") + seen_preprocess_inputs.append("called") raise RuntimeError("fallback failed") seen_engine_inputs: list[str] = [] @@ -268,13 +268,13 @@ def step(self, text: str) -> dict[str, object]: seen_engine_inputs.append(text) return self._engine.step(text) - monkeypatch.setattr(module, "precompile_heuristic", _heuristic) - monkeypatch.setattr(module, "_llm_fallback_precompile", _fallback) + monkeypatch.setattr(module, "preprocess_heuristic", _heuristic) + monkeypatch.setattr(module, "_llm_fallback_preprocess", _fallback) monkeypatch.setattr(module, "_call_litellm", lambda _messages: "ok") engine = _ProxyEngine() result = module.handle_turn("hello world", engine) assert result == "ok" - assert seen_precompile_inputs == ["called"] + assert seen_preprocess_inputs == ["called"] assert seen_engine_inputs == ["hello world"] diff --git a/tests/test_litellm_preprocessor_model_config.py b/tests/test_litellm_preprocessor_model_config.py index 3858f2c..02739d6 100644 --- a/tests/test_litellm_preprocessor_model_config.py +++ b/tests/test_litellm_preprocessor_model_config.py @@ -54,7 +54,7 @@ def _completion(**kwargs): monkeypatch.setattr(module, "render_prompt", lambda *_: "prompt") monkeypatch.setattr(module, "parse_preprocessor_output", lambda value, **_kwargs: value) - result = module._llm_fallback_precompile("please use docker", None) + result = module._llm_fallback_preprocess("please use docker", None) assert result == "use docker" assert seen["model"] == "openai/main-model" @@ -77,7 +77,7 @@ def _completion(**kwargs): monkeypatch.setattr(module, "render_prompt", lambda *_: "prompt") monkeypatch.setattr(module, "parse_preprocessor_output", lambda value, **_kwargs: value) - result = module._llm_fallback_precompile("please use docker", None) + result = module._llm_fallback_preprocess("please use docker", None) assert result == "use docker" assert seen["model"] == "openai/preprocessor-model" @@ -102,7 +102,7 @@ def _completion(**kwargs): monkeypatch.setattr(module, "render_prompt", lambda *_: "prompt") monkeypatch.setattr(module, "parse_preprocessor_output", lambda value, **_kwargs: value) - result = module._llm_fallback_precompile("please use docker", None) + result = module._llm_fallback_preprocess("please use docker", None) assert result == "use docker" assert seen["model"] == "openai/main-model" @@ -127,7 +127,7 @@ def _completion(**kwargs): monkeypatch.setattr(module, "render_prompt", lambda *_: "prompt") monkeypatch.setattr(module, "parse_preprocessor_output", lambda value, **_kwargs: value) - result = module._llm_fallback_precompile("please use docker", None) + result = module._llm_fallback_preprocess("please use docker", None) assert result == "use docker" assert seen["model"] == "openai/preprocessor-model" @@ -149,7 +149,7 @@ def _completion(**kwargs): monkeypatch.setattr(module, "_get_litellm_completion", lambda: _completion) monkeypatch.setattr(module, "render_prompt", lambda *_: "prompt") - result = module._llm_fallback_precompile("set premise to concise replies", None) + result = module._llm_fallback_preprocess("set premise to concise replies", None) assert result is None @@ -169,16 +169,16 @@ def _completion(**kwargs): monkeypatch.setattr(module, "_get_litellm_completion", lambda: _completion) monkeypatch.setattr(module, "render_prompt", lambda *_: "prompt") - result = module._llm_fallback_precompile("change premise concise replies", None) + result = module._llm_fallback_preprocess("change premise concise replies", None) assert result is None -def test_litellm_precompile_skips_fallback_for_directive_shaped_malformed_inputs(monkeypatch): +def test_litellm_preprocess_skips_fallback_for_directive_shaped_malformed_inputs(monkeypatch): module = _load_module("litellm_with_preproc_skip_fallback_malformed", LITELLM_WITH_PREPROC_PATH) monkeypatch.setattr( module, - "precompile_heuristic", + "preprocess_heuristic", lambda _text: {"outcome": "no_directive", "directive": None}, ) @@ -196,7 +196,7 @@ def _downstream(_messages: list[dict[str, str]]) -> str: downstream_calls += 1 raise AssertionError("downstream should not be called for clarify output") - monkeypatch.setattr(module, "_llm_fallback_precompile", _fallback) + monkeypatch.setattr(module, "_llm_fallback_preprocess", _fallback) monkeypatch.setattr(module, "_call_litellm", _downstream) cases = [ diff --git a/tests/test_openwebui_preprocessor_pipe.py b/tests/test_openwebui_preprocessor_pipe.py index c09059b..6e5bf74 100644 --- a/tests/test_openwebui_preprocessor_pipe.py +++ b/tests/test_openwebui_preprocessor_pipe.py @@ -180,8 +180,8 @@ async def _chat_completion(_: object, payload: dict[str, Any], __: object) -> di return {"ok": True} module.generate_chat_completion = _chat_completion - module.precompile_heuristic = lambda _text: { - "outcome": module.PRECOMPILE_OUTCOME_DIRECTIVE, + module.preprocess_heuristic = lambda _text: { + "outcome": module.PREPROCESS_OUTCOME_DIRECTIVE, "directive": "use docker", } module.parse_preprocessor_output = lambda value, **_kwargs: value @@ -219,7 +219,7 @@ async def _chat_completion(_: object, payload: dict[str, Any], __: object) -> di return {"ok": True} module.generate_chat_completion = _chat_completion - module.precompile_heuristic = lambda _text: { + module.preprocess_heuristic = lambda _text: { "outcome": "no_directive", "directive": None, } @@ -262,7 +262,7 @@ async def _chat_completion(_: object, payload: dict[str, Any], __: object) -> di return {"ok": True} module.generate_chat_completion = _chat_completion - module.precompile_heuristic = lambda _text: { + module.preprocess_heuristic = lambda _text: { "outcome": "no_directive", "directive": None, } @@ -336,7 +336,7 @@ def test_preprocessor_fallback_uses_preprocessor_model_only(monkeypatch) -> None async def _chat_completion(_: object, payload: dict[str, Any], __: object) -> dict[str, object]: calls.append(str(payload.get("model", ""))) - # First call is fallback precompile completion; return no directive. + # First call is fallback preprocess completion; return no directive. # Second call is main forward passthrough. if len(calls) == 1: return {"choices": [{"message": {"content": "no_directive"}}]} @@ -351,7 +351,7 @@ def _heuristic(_: str) -> dict[str, object]: module.generate_chat_completion = _chat_completion module.get_all_models = _models - module.precompile_heuristic = _heuristic + module.preprocess_heuristic = _heuristic pipe.valves.BASE_MODEL_ID = "base-model" pipe.valves.PREPROCESSOR_MODEL_ID = "prep-model" @@ -384,7 +384,7 @@ async def _chat_completion(_: object, payload: dict[str, Any], __: object) -> di module.render_prompt = lambda *_: "prompt" directive, error = asyncio.run( - pipe._llm_fallback_precompile( + pipe._llm_fallback_preprocess( "set premise to concise replies", {"premise": None, "policies": {}, "version": 2}, request=object(), @@ -430,7 +430,7 @@ async def _chat_completion(_: object, payload: dict[str, Any], __: object) -> di return {"ok": True} module.generate_chat_completion = _chat_completion - module.precompile_heuristic = lambda _text: {"outcome": "no_directive", "directive": None} + module.preprocess_heuristic = lambda _text: {"outcome": "no_directive", "directive": None} result = asyncio.run( pipe.pipe( @@ -466,7 +466,7 @@ async def _chat_completion(_: object, payload: dict[str, Any], __: object) -> di return {"ok": True} module.generate_chat_completion = _chat_completion - module.precompile_heuristic = lambda _text: {"outcome": "no_directive", "directive": None} + module.preprocess_heuristic = lambda _text: {"outcome": "no_directive", "directive": None} result = asyncio.run( pipe.pipe( @@ -532,7 +532,7 @@ def _create_engine(): return engine monkeypatch.setattr(module, "create_engine", _create_engine) - monkeypatch.setattr(module, "precompile_heuristic", lambda _text: {"outcome": "no_directive"}) + monkeypatch.setattr(module, "preprocess_heuristic", lambda _text: {"outcome": "no_directive"}) monkeypatch.setattr(module, "parse_preprocessor_output", lambda _value, **_kwargs: None) pipe = module.Pipe() @@ -579,15 +579,15 @@ def test_preprocessor_pipe_normal_update_forwards_with_state_and_persists_checkp monkeypatch.setattr( module, - "precompile_heuristic", + "preprocess_heuristic", lambda text: ( { - "outcome": module.PRECOMPILE_OUTCOME_DIRECTIVE, + "outcome": module.PREPROCESS_OUTCOME_DIRECTIVE, "directive": "remove policy peanuts", } if "remove policy peanuts" in text.lower() else { - "outcome": module.PRECOMPILE_OUTCOME_DIRECTIVE, + "outcome": module.PREPROCESS_OUTCOME_DIRECTIVE, "directive": "prohibit peanuts", } ), @@ -784,7 +784,7 @@ async def _track_downstream( ("no",), ], ) -def test_preprocessor_pipe_bypasses_precompile_while_pending( +def test_preprocessor_pipe_bypasses_preprocess_while_pending( monkeypatch, confirmation: str ) -> None: module = _load_module_with_openwebui_stubs("owui_preproc_pending_bypass", monkeypatch) @@ -826,10 +826,10 @@ def step(self, text: str) -> dict[str, object]: engine = _PendingEngine() monkeypatch.setattr(module, "create_engine", lambda: engine) - def _fail_precompile(_: str) -> dict[str, object]: - raise AssertionError("should not precompile") + def _fail_preprocess(_: str) -> dict[str, object]: + raise AssertionError("should not preprocess") - monkeypatch.setattr(module, "precompile_heuristic", _fail_precompile) + monkeypatch.setattr(module, "preprocess_heuristic", _fail_preprocess) forwarded_payloads: list[dict[str, Any]] = [] @@ -886,11 +886,11 @@ def test_preprocessor_pipe_checkpoint_resume_yes_no_end_to_end( def _heuristic(text: str) -> dict[str, object]: if text in {"yes", "no"}: - raise AssertionError("heuristic precompile should be bypassed while pending") + raise AssertionError("heuristic preprocess should be bypassed while pending") heuristic_inputs.append(text) return {"outcome": "no_directive", "directive": None} - monkeypatch.setattr(module, "precompile_heuristic", _heuristic) + monkeypatch.setattr(module, "preprocess_heuristic", _heuristic) pipe = module.Pipe() pipe.valves.BASE_MODEL_ID = "base-model" @@ -991,7 +991,7 @@ def test_preprocessor_pipe_skips_fallback_for_directive_shaped_malformed_inputs( monkeypatch.setattr( module, - "precompile_heuristic", + "preprocess_heuristic", lambda _text: {"outcome": "no_directive", "directive": None}, ) @@ -1019,7 +1019,7 @@ async def _downstream(_: object, payload: dict[str, Any], __: object) -> dict[st downstream_calls += 1 raise AssertionError(f"downstream model should not be called: {payload.get('model')}") - monkeypatch.setattr(module.Pipe, "_llm_fallback_precompile", _fallback) + monkeypatch.setattr(module.Pipe, "_llm_fallback_preprocess", _fallback) monkeypatch.setattr(module, "generate_chat_completion", _downstream) pipe = module.Pipe() @@ -1049,7 +1049,7 @@ def test_preprocessor_pipe_near_miss_directives_return_deterministic_clarify_wit monkeypatch.setattr( module, - "precompile_heuristic", + "preprocess_heuristic", lambda _text: {"outcome": "no_directive", "directive": None}, ) @@ -1077,7 +1077,7 @@ async def _downstream(_: object, payload: dict[str, Any], __: object) -> dict[st downstream_calls += 1 raise AssertionError(f"downstream model should not be called: {payload.get('model')}") - monkeypatch.setattr(module.Pipe, "_llm_fallback_precompile", _fallback) + monkeypatch.setattr(module.Pipe, "_llm_fallback_preprocess", _fallback) monkeypatch.setattr(module, "generate_chat_completion", _downstream) pipe = module.Pipe() @@ -1121,7 +1121,7 @@ async def _chat_completion(_: object, payload: dict[str, Any], __: object) -> di return {"choices": [{"message": {"content": "downstream"}}]} monkeypatch.setattr(module, "generate_chat_completion", _chat_completion) - monkeypatch.setattr(module, "precompile_heuristic", lambda _text: {"outcome": "no_directive"}) + monkeypatch.setattr(module, "preprocess_heuristic", lambda _text: {"outcome": "no_directive"}) monkeypatch.setattr(module, "parse_preprocessor_output", lambda _value, **_kwargs: None) pipe = module.Pipe() @@ -1152,9 +1152,9 @@ async def _chat_completion(_: object, payload: dict[str, Any], __: object) -> di monkeypatch.setattr(module, "generate_chat_completion", _chat_completion) monkeypatch.setattr( module, - "precompile_heuristic", + "preprocess_heuristic", lambda _text: { - "outcome": module.PRECOMPILE_OUTCOME_DIRECTIVE, + "outcome": module.PREPROCESS_OUTCOME_DIRECTIVE, "directive": "prohibit peanuts", }, ) @@ -1197,7 +1197,7 @@ async def _chat_completion(_: object, payload: dict[str, Any], __: object) -> di return {"choices": [{"message": {"content": "downstream"}}]} monkeypatch.setattr(module, "generate_chat_completion", _chat_completion) - monkeypatch.setattr(module, "precompile_heuristic", lambda _text: {"outcome": "no_directive"}) + monkeypatch.setattr(module, "preprocess_heuristic", lambda _text: {"outcome": "no_directive"}) monkeypatch.setattr(module, "parse_preprocessor_output", lambda _value, **_kwargs: None) pipe = module.Pipe() @@ -1239,7 +1239,7 @@ async def _chat_completion(_: object, payload: dict[str, Any], __: object) -> An return _streaming_response() monkeypatch.setattr(module, "generate_chat_completion", _chat_completion) - monkeypatch.setattr(module, "precompile_heuristic", lambda _text: {"outcome": "no_directive"}) + monkeypatch.setattr(module, "preprocess_heuristic", lambda _text: {"outcome": "no_directive"}) monkeypatch.setattr(module, "parse_preprocessor_output", lambda _value, **_kwargs: None) pipe = module.Pipe() @@ -1281,7 +1281,7 @@ def test_preprocessor_pipe_trace_on_clarify_shows_prompt_and_no_downstream_call( monkeypatch.setattr( module, - "precompile_heuristic", + "preprocess_heuristic", lambda _text: {"outcome": "no_directive", "directive": None}, ) @@ -1341,10 +1341,10 @@ def __init__(self, content: str) -> None: def _heuristic(text: str) -> dict[str, object]: if "use docker" in text.lower(): - return {"outcome": module.PRECOMPILE_OUTCOME_DIRECTIVE, "directive": "use docker"} + return {"outcome": module.PREPROCESS_OUTCOME_DIRECTIVE, "directive": "use docker"} return {"outcome": "no_directive", "directive": None} - monkeypatch.setattr(module, "precompile_heuristic", _heuristic) + monkeypatch.setattr(module, "preprocess_heuristic", _heuristic) monkeypatch.setattr(module, "parse_preprocessor_output", lambda value, **_kwargs: value) forwarded_payloads: list[dict[str, object]] = [] @@ -1412,10 +1412,10 @@ async def _iter() -> object: def _heuristic(text: str) -> dict[str, object]: if "use docker" in text.lower(): - return {"outcome": module.PRECOMPILE_OUTCOME_DIRECTIVE, "directive": "use docker"} + return {"outcome": module.PREPROCESS_OUTCOME_DIRECTIVE, "directive": "use docker"} return {"outcome": "no_directive", "directive": None} - monkeypatch.setattr(module, "precompile_heuristic", _heuristic) + monkeypatch.setattr(module, "preprocess_heuristic", _heuristic) monkeypatch.setattr(module, "parse_preprocessor_output", lambda value, **_kwargs: value) forwarded_payloads: list[dict[str, object]] = [] @@ -1506,7 +1506,7 @@ def test_preprocessor_pipe_trace_update_clear_reset_paths_single_and_consistent( monkeypatch.setattr( module, - "precompile_heuristic", + "preprocess_heuristic", lambda _text: {"outcome": "no_directive", "directive": None}, ) monkeypatch.setattr(module, "parse_preprocessor_output", lambda _value, **_kwargs: None) @@ -1563,7 +1563,7 @@ def test_preprocessor_pipe_clear_state_trace_not_duplicated_when_model_echoes_hi monkeypatch.setattr( module, - "precompile_heuristic", + "preprocess_heuristic", lambda _text: {"outcome": "no_directive", "directive": None}, ) monkeypatch.setattr(module, "parse_preprocessor_output", lambda _value, **_kwargs: None) @@ -1640,7 +1640,7 @@ def test_preprocessor_pipe_clear_state_strips_preexisting_contradictory_trace_fr monkeypatch.setattr( module, - "precompile_heuristic", + "preprocess_heuristic", lambda _text: {"outcome": "no_directive", "directive": None}, ) monkeypatch.setattr(module, "parse_preprocessor_output", lambda _value, **_kwargs: None) @@ -1709,9 +1709,9 @@ def test_preprocessor_pipe_update_trace_and_injection_when_heuristic_emits_direc monkeypatch.setattr( module, - "precompile_heuristic", + "preprocess_heuristic", lambda text: ( - {"outcome": module.PRECOMPILE_OUTCOME_DIRECTIVE, "directive": "use docker"} + {"outcome": module.PREPROCESS_OUTCOME_DIRECTIVE, "directive": "use docker"} if "i think we should use docker" in text.lower() else {"outcome": "no_directive", "directive": None} ), @@ -1771,13 +1771,13 @@ def test_preprocessor_pipe_replacement_update_trace_and_injection_when_heuristic monkeypatch.setattr( module, - "precompile_heuristic", + "preprocess_heuristic", lambda text: ( - {"outcome": module.PRECOMPILE_OUTCOME_DIRECTIVE, "directive": "use docker"} + {"outcome": module.PREPROCESS_OUTCOME_DIRECTIVE, "directive": "use docker"} if text.strip().lower() == "use docker" else ( { - "outcome": module.PRECOMPILE_OUTCOME_DIRECTIVE, + "outcome": module.PREPROCESS_OUTCOME_DIRECTIVE, "directive": "use podman instead of docker", } if "switch to podman instead of docker" in text.lower() @@ -1840,7 +1840,7 @@ def test_preprocessor_pipe_ambiguous_text_passthrough_trace_streaming(monkeypatc module._ENGINES_BY_CHAT_KEY.clear() module._CHECKPOINTS_BY_CHAT_KEY.clear() - monkeypatch.setattr(module, "precompile_heuristic", lambda _text: {"outcome": "no_directive"}) + monkeypatch.setattr(module, "preprocess_heuristic", lambda _text: {"outcome": "no_directive"}) monkeypatch.setattr(module, "parse_preprocessor_output", lambda _value, **_kwargs: None) class _StreamingResponse: @@ -1946,16 +1946,16 @@ def test_preprocessor_pipe_pending_clarification_bypasses_preprocessing_for_ambi def _heuristic(text: str) -> dict[str, object]: if text.strip().lower() == "yeah probably": - raise AssertionError("heuristic precompile should be bypassed while pending") + raise AssertionError("heuristic preprocess should be bypassed while pending") heuristic_calls.append(text) if "use podman instead of kubectl" in text.lower(): return { - "outcome": module.PRECOMPILE_OUTCOME_DIRECTIVE, + "outcome": module.PREPROCESS_OUTCOME_DIRECTIVE, "directive": "use podman instead of kubectl", } return {"outcome": "no_directive", "directive": None} - monkeypatch.setattr(module, "precompile_heuristic", _heuristic) + monkeypatch.setattr(module, "preprocess_heuristic", _heuristic) monkeypatch.setattr(module, "parse_preprocessor_output", lambda value, **_kwargs: value) downstream_calls = 0 diff --git a/tests/test_preprocessor_conformance.py b/tests/test_preprocessor_conformance.py index c9337e0..abfee40 100644 --- a/tests/test_preprocessor_conformance.py +++ b/tests/test_preprocessor_conformance.py @@ -2,8 +2,8 @@ import re from pathlib import Path -from experimental.preprocessor.heuristic_preprocessor import precompile_heuristic -from experimental.preprocessor.output_validation import validate_precompiler_output +from experimental.preprocessor.heuristic_preprocessor import preprocess_heuristic +from experimental.preprocessor.output_validation import validate_preprocessor_output _PREPROCESSOR_FIXTURES_DIR = Path(__file__).resolve().parent / "fixtures" / "preprocessor" @@ -17,7 +17,7 @@ def _load_fixture(path: Path) -> dict[str, object]: def _normalize_result(message: str) -> dict[str, object]: - result = precompile_heuristic(message) + result = preprocess_heuristic(message) output = result["directive"] if result["outcome"] == "directive" else None normalized = { "classification": result["outcome"], @@ -25,7 +25,7 @@ def _normalize_result(message: str) -> dict[str, object]: } # Enforce the validation boundary: only validated directive output may pass. - validated = validate_precompiler_output(output) + validated = validate_preprocessor_output(output) if normalized["classification"] == "directive": assert validated["classification"] == "directive" assert validated["output"] == output @@ -39,7 +39,7 @@ def _normalize_result(message: str) -> dict[str, object]: def _normalize_validator_result( raw_output: object, source_input: str | None = None ) -> dict[str, object]: - validated = validate_precompiler_output(raw_output, source_input=source_input) + validated = validate_preprocessor_output(raw_output, source_input=source_input) return { "classification": validated["classification"], "output": validated["output"], @@ -119,5 +119,5 @@ def test_engine_owned_near_misses_are_reject_only_for_fallback_rewrites() -> Non continue for candidate in _derived_risky_rewrite_candidates(input_text): - validated = validate_precompiler_output(candidate, source_input=input_text) + validated = validate_preprocessor_output(candidate, source_input=input_text) assert validated["classification"] != "directive", fixture_name diff --git a/tests/test_preprocessor_heuristic.py b/tests/test_preprocessor_heuristic.py index 62bf776..8744ad7 100644 --- a/tests/test_preprocessor_heuristic.py +++ b/tests/test_preprocessor_heuristic.py @@ -1,6 +1,6 @@ import pytest -from experimental.preprocessor.heuristic_preprocessor import precompile_heuristic +from experimental.preprocessor.heuristic_preprocessor import preprocess_heuristic def test_heuristic_rejects_consistent_high_risk_non_directives() -> None: @@ -23,7 +23,7 @@ def test_heuristic_rejects_consistent_high_risk_non_directives() -> None: ] for message in cases: - result = precompile_heuristic(message) + result = preprocess_heuristic(message) assert result["outcome"] == "unknown" assert result["directive"] is None assert result["rule_id"] is not None @@ -36,7 +36,7 @@ def test_heuristic_accepts_trailing_period_or_bang_for_whole_message_directives( ("use docker.", "use docker"), ] for message, expected in cases: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "directive", "directive": expected, "rule_id": "canonical.full_match", @@ -49,7 +49,7 @@ def test_heuristic_allows_exact_full_message_wrappers_for_directives() -> None: ("[prohibit peanuts]", "prohibit peanuts"), ] for message, expected in cases: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "directive", "directive": expected, "rule_id": "canonical.full_match", @@ -63,7 +63,7 @@ def test_heuristic_rejects_quoted_or_backticked_exact_directives() -> None: "'reset policies'", ] for message in cases: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "unknown", "directive": None, "rule_id": "reject.quoted_exact", @@ -77,7 +77,7 @@ def test_heuristic_case_normalizes_exact_command_shapes() -> None: ("Prohibit Peanuts", "prohibit peanuts"), ] for message, expected in cases: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "directive", "directive": expected, "rule_id": "canonical.full_match", @@ -85,7 +85,7 @@ def test_heuristic_case_normalizes_exact_command_shapes() -> None: def test_heuristic_question_mark_only_non_directive_is_confident() -> None: - assert precompile_heuristic("can you help with lunch?") == { + assert preprocess_heuristic("can you help with lunch?") == { "outcome": "no_directive", "directive": None, "rule_id": "reject.confident_non_directive", @@ -99,7 +99,7 @@ def test_heuristic_rejects_directive_adjacent_question_mark_as_unknown() -> None "can you use pytest instead of unittest?", ] for message in cases: - result = precompile_heuristic(message) + result = preprocess_heuristic(message) assert result["outcome"] == "unknown" assert result["directive"] is None assert result["rule_id"] is not None @@ -114,7 +114,7 @@ def test_heuristic_rejects_meta_reporting_or_example_prefixes() -> None: 'example: "use docker"', ] for message in cases: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "unknown", "directive": None, "rule_id": "reject.meta_or_reporting", @@ -128,7 +128,7 @@ def test_heuristic_rejects_list_or_enumeration_inputs() -> None: "* prohibit peanuts", ] for message in cases: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "unknown", "directive": None, "rule_id": "reject.list_or_enumeration", @@ -142,7 +142,7 @@ def test_heuristic_rejects_multi_segment_or_mixed_prose_inputs() -> None: "use docker and prohibit peanuts", ] for message in cases: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "unknown", "directive": None, "rule_id": "reject.multi_segment_or_mixed_prose", @@ -155,7 +155,7 @@ def test_heuristic_rejects_malformed_replacement_syntax() -> None: "use podman in stead of docker", ] for message in cases: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "unknown", "directive": None, "rule_id": "reject.malformed_replacement_syntax", @@ -168,7 +168,7 @@ def test_heuristic_rejects_admin_near_miss_aliases() -> None: "remove policies docker", ] for message in cases: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "unknown", "directive": None, "rule_id": "reject.admin_near_miss_alias", @@ -182,7 +182,7 @@ def test_heuristic_rejects_notes_and_reporting_with_bracketed_mentions() -> None "I wrote down [change premise to concise replies] yesterday", ] for message in cases: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "unknown", "directive": None, "rule_id": "reject.quoted_reported_bracket", @@ -190,7 +190,7 @@ def test_heuristic_rejects_notes_and_reporting_with_bracketed_mentions() -> None def test_heuristic_accepts_bracket_wrapper_without_reporting_marker() -> None: - assert precompile_heuristic("[clear state]") == { + assert preprocess_heuristic("[clear state]") == { "outcome": "directive", "directive": "clear state", "rule_id": "canonical.full_match", @@ -203,7 +203,7 @@ def test_heuristic_set_premise_to_forms_are_unknown_not_rewritten() -> None: "set premise to formal tone", ] for message in cases: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "unknown", "directive": None, "rule_id": "reject.directive_adjacent_unsafe", @@ -216,7 +216,7 @@ def test_heuristic_dont_use_forms_are_unknown_not_rewritten() -> None: "do not use peanuts", ] for message in cases: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "unknown", "directive": None, "rule_id": "reject.directive_adjacent_unsafe", @@ -224,7 +224,7 @@ def test_heuristic_dont_use_forms_are_unknown_not_rewritten() -> None: def test_heuristic_does_not_canonicalize_set_premise_to_with_empty_payload() -> None: - assert precompile_heuristic("set premise to ") == { + assert preprocess_heuristic("set premise to ") == { "outcome": "unknown", "directive": None, "rule_id": "reject.directive_adjacent_unsafe", @@ -232,7 +232,7 @@ def test_heuristic_does_not_canonicalize_set_premise_to_with_empty_payload() -> def test_heuristic_does_not_canonicalize_set_premise_to_when_not_whole_message() -> None: - assert precompile_heuristic("please set premise to concise replies") == { + assert preprocess_heuristic("please set premise to concise replies") == { "outcome": "unknown", "directive": None, "rule_id": "reject.directive_adjacent_unsafe", @@ -245,7 +245,7 @@ def test_heuristic_change_premise_missing_to_forms_are_unknown_not_rewritten() - "change premise formal tone", ] for message in cases: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "unknown", "directive": None, "rule_id": "reject.directive_adjacent_unsafe", @@ -253,7 +253,7 @@ def test_heuristic_change_premise_missing_to_forms_are_unknown_not_rewritten() - def test_heuristic_does_not_canonicalize_change_premise_with_empty_payload() -> None: - assert precompile_heuristic("change premise ") == { + assert preprocess_heuristic("change premise ") == { "outcome": "unknown", "directive": None, "rule_id": "reject.directive_adjacent_unsafe", @@ -261,7 +261,7 @@ def test_heuristic_does_not_canonicalize_change_premise_with_empty_payload() -> def test_heuristic_does_not_canonicalize_change_premise_when_not_whole_message() -> None: - assert precompile_heuristic("please change premise concise replies") == { + assert preprocess_heuristic("please change premise concise replies") == { "outcome": "unknown", "directive": None, "rule_id": "reject.directive_adjacent_unsafe", @@ -282,7 +282,7 @@ def test_heuristic_accepts_strict_canonical_directives() -> None: ] for directive in directives: - result = precompile_heuristic(directive) + result = preprocess_heuristic(directive) assert result == { "outcome": "directive", "directive": directive, @@ -294,7 +294,7 @@ def test_heuristic_returns_unknown_for_unresolved_cases() -> None: unresolved = ["Could we maybe use uv later"] for message in unresolved: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "unknown", "directive": None, "rule_id": "reject.directive_adjacent_unsafe", @@ -307,7 +307,7 @@ def test_heuristic_returns_no_directive_for_ordinary_non_directive_content() -> "thanks for the help", ] for message in cases: - assert precompile_heuristic(message) == { + assert preprocess_heuristic(message) == { "outcome": "no_directive", "directive": None, "rule_id": "reject.confident_non_directive", @@ -316,5 +316,5 @@ def test_heuristic_returns_no_directive_for_ordinary_non_directive_content() -> @pytest.mark.parametrize("message", ['""', "''", "()", "[]", "``"]) def test_heuristic_empty_wrappers_do_not_produce_directive(message: str) -> None: - result = precompile_heuristic(message) + result = preprocess_heuristic(message) assert result["directive"] is None diff --git a/tests/test_preprocessor_heuristic_properties.py b/tests/test_preprocessor_heuristic_properties.py index 438a9ee..a28193e 100644 --- a/tests/test_preprocessor_heuristic_properties.py +++ b/tests/test_preprocessor_heuristic_properties.py @@ -4,11 +4,11 @@ from hypothesis import strategies as st from experimental.preprocessor.constants import ( - PRECOMPILE_OUTCOME_DIRECTIVE, - PRECOMPILE_OUTCOME_NO_DIRECTIVE, - PRECOMPILE_OUTCOME_UNKNOWN, + PREPROCESS_OUTCOME_DIRECTIVE, + PREPROCESS_OUTCOME_NO_DIRECTIVE, + PREPROCESS_OUTCOME_UNKNOWN, ) -from experimental.preprocessor.heuristic_preprocessor import precompile_heuristic +from experimental.preprocessor.heuristic_preprocessor import preprocess_heuristic from experimental.preprocessor.output_validation import ( _is_allowed_directive, parse_preprocessor_output, @@ -46,16 +46,16 @@ def test_heuristic_accepts_canonical_directive_with_trailing_period_or_bang( directive: str, punctuation: str ) -> None: - result = precompile_heuristic(f"{directive}{punctuation}") - assert result["outcome"] == PRECOMPILE_OUTCOME_DIRECTIVE + result = preprocess_heuristic(f"{directive}{punctuation}") + assert result["outcome"] == PREPROCESS_OUTCOME_DIRECTIVE parsed = parse_preprocessor_output(result["directive"]) assert parsed == result["directive"] @given(st.sampled_from(CANONICAL_DIRECTIVES)) def test_heuristic_question_suffix_never_produces_directive(directive: str) -> None: - result = precompile_heuristic(f"{directive}?") - assert result["outcome"] == PRECOMPILE_OUTCOME_UNKNOWN + result = preprocess_heuristic(f"{directive}?") + assert result["outcome"] == PREPROCESS_OUTCOME_UNKNOWN assert result["directive"] is None @@ -64,8 +64,8 @@ def test_heuristic_accepts_single_layer_exact_wrapper( directive: str, wrapper: tuple[str, str] ) -> None: left, right = wrapper - result = precompile_heuristic(f"{left}{directive}{right}") - assert result["outcome"] == PRECOMPILE_OUTCOME_DIRECTIVE + result = preprocess_heuristic(f"{left}{directive}{right}") + assert result["outcome"] == PREPROCESS_OUTCOME_DIRECTIVE parsed = parse_preprocessor_output(result["directive"]) assert parsed == result["directive"] @@ -75,8 +75,8 @@ def test_heuristic_quoted_exact_wrappers_never_directive( directive: str, wrapper: tuple[str, str] ) -> None: left, right = wrapper - result = precompile_heuristic(f"{left}{directive}{right}") - assert result["outcome"] == PRECOMPILE_OUTCOME_UNKNOWN + result = preprocess_heuristic(f"{left}{directive}{right}") + assert result["outcome"] == PREPROCESS_OUTCOME_UNKNOWN assert result["directive"] is None @@ -90,22 +90,22 @@ def test_heuristic_rejects_wrapped_directive_with_surrounding_meta_text( ) -> None: left, right = wrapper message = f"{prefix} {left}{directive}{right}" - result = precompile_heuristic(message) - assert result["outcome"] != PRECOMPILE_OUTCOME_DIRECTIVE + result = preprocess_heuristic(message) + assert result["outcome"] != PREPROCESS_OUTCOME_DIRECTIVE @given(st.text(max_size=60), st.text(max_size=60)) def test_heuristic_question_mark_is_always_rejected(prefix: str, suffix: str) -> None: message = f"{prefix}?{suffix}" - result = precompile_heuristic(message) - assert result["outcome"] in {PRECOMPILE_OUTCOME_NO_DIRECTIVE, PRECOMPILE_OUTCOME_UNKNOWN} + result = preprocess_heuristic(message) + assert result["outcome"] in {PREPROCESS_OUTCOME_NO_DIRECTIVE, PREPROCESS_OUTCOME_UNKNOWN} assert result["directive"] is None @given(st.text(max_size=120)) def test_heuristic_directive_output_is_always_validator_safe(message: str) -> None: - result = precompile_heuristic(message) - if result["outcome"] != PRECOMPILE_OUTCOME_DIRECTIVE: + result = preprocess_heuristic(message) + if result["outcome"] != PREPROCESS_OUTCOME_DIRECTIVE: return directive = result["directive"] assert isinstance(directive, str) @@ -122,15 +122,15 @@ def test_heuristic_whole_message_discipline_for_surrounded_directive( assume(suffix.strip() not in {'"', "'", "`", ")", "]"}) assume(not message.strip().lower().startswith("change premise ")) assume(not _is_allowed_directive(normalized)) - result = precompile_heuristic(message) - assert result["outcome"] != PRECOMPILE_OUTCOME_DIRECTIVE + result = preprocess_heuristic(message) + assert result["outcome"] != PREPROCESS_OUTCOME_DIRECTIVE @given(NON_EMPTY_TEXT) def test_heuristic_list_or_enumeration_prefix_never_directive(rest: str) -> None: for prefix in ("1. ", "- ", "* "): - result = precompile_heuristic(f"{prefix}{rest}") - assert result["outcome"] != PRECOMPILE_OUTCOME_DIRECTIVE + result = preprocess_heuristic(f"{prefix}{rest}") + assert result["outcome"] != PREPROCESS_OUTCOME_DIRECTIVE @given(st.sampled_from(CANONICAL_DIRECTIVES)) @@ -143,8 +143,8 @@ def test_heuristic_meta_reporting_prefix_never_directive(directive: str) -> None f'he said "{directive}"', ] for message in samples: - result = precompile_heuristic(message) - assert result["outcome"] != PRECOMPILE_OUTCOME_DIRECTIVE + result = preprocess_heuristic(message) + assert result["outcome"] != PREPROCESS_OUTCOME_DIRECTIVE @given(st.sampled_from(["use docker", "clear state", "prohibit peanuts"]), NON_EMPTY_TEXT) @@ -157,5 +157,5 @@ def test_heuristic_mixed_prose_connector_forms_never_directive( f"{directive_seed} and {detail}", ] for message in messages: - result = precompile_heuristic(message) - assert result["outcome"] != PRECOMPILE_OUTCOME_DIRECTIVE + result = preprocess_heuristic(message) + assert result["outcome"] != PREPROCESS_OUTCOME_DIRECTIVE diff --git a/tests/test_preprocessor_output_validation.py b/tests/test_preprocessor_output_validation.py index 8220d62..91fd73d 100644 --- a/tests/test_preprocessor_output_validation.py +++ b/tests/test_preprocessor_output_validation.py @@ -1,10 +1,7 @@ -from importlib import import_module - from experimental.preprocessor.output_validation import ( _is_allowed_directive, - parse_precompiler_output, parse_preprocessor_output, - validate_precompiler_output, + validate_preprocessor_output, ) @@ -16,7 +13,7 @@ def test_is_allowed_directive_accepts_canonical_shapes() -> None: def test_validate_text_accepts_canonical_directive() -> None: - result = validate_precompiler_output("prohibit peanuts") + result = validate_preprocessor_output("prohibit peanuts") assert result == { "classification": "directive", "output": "prohibit peanuts", @@ -24,7 +21,7 @@ def test_validate_text_accepts_canonical_directive() -> None: def test_validate_text_accepts_exact_no_directive_sentinel() -> None: - result = validate_precompiler_output("") + result = validate_preprocessor_output("") assert result == { "classification": "no_directive", "output": None, @@ -32,22 +29,22 @@ def test_validate_text_accepts_exact_no_directive_sentinel() -> None: def test_validate_text_rejects_malformed_or_mixed_output_as_unknown() -> None: - assert validate_precompiler_output("") == { + assert validate_preprocessor_output("") == { "classification": "unknown", "output": None, } - assert validate_precompiler_output("set premise to concise replies") == { + assert validate_preprocessor_output("set premise to concise replies") == { "classification": "unknown", "output": None, } - assert validate_precompiler_output("prohibit peanuts and use almonds") == { + assert validate_preprocessor_output("prohibit peanuts and use almonds") == { "classification": "unknown", "output": None, } def test_validate_structured_output_accepts_strict_contract_shape() -> None: - assert validate_precompiler_output( + assert validate_preprocessor_output( { "classification": "directive", "output": "clear state", @@ -57,7 +54,7 @@ def test_validate_structured_output_accepts_strict_contract_shape() -> None: "output": "clear state", } - assert validate_precompiler_output( + assert validate_preprocessor_output( { "classification": "no_directive", "output": None, @@ -67,7 +64,7 @@ def test_validate_structured_output_accepts_strict_contract_shape() -> None: "output": None, } - assert validate_precompiler_output( + assert validate_preprocessor_output( { "classification": "unknown", "output": None, @@ -96,7 +93,7 @@ def test_validate_structured_output_rejects_malformed_shape_or_payload_as_unknow {"action": "prohibit", "item": "peanuts"}, ] for raw in cases: - assert validate_precompiler_output(raw) == { + assert validate_preprocessor_output(raw) == { "classification": "unknown", "output": None, } @@ -104,41 +101,28 @@ def test_validate_structured_output_rejects_malformed_shape_or_payload_as_unknow def test_validate_text_parses_and_validates_json_contract() -> None: raw = '{"classification":"directive","output":"use docker"}' - assert validate_precompiler_output(raw) == { + assert validate_preprocessor_output(raw) == { "classification": "directive", "output": "use docker", } def test_parse_returns_validated_directive_only() -> None: - assert parse_precompiler_output("prohibit peanuts") == "prohibit peanuts" - assert parse_precompiler_output("") is None - assert parse_precompiler_output("set premise to concise replies") is None - - -def test_parse_preprocessor_output_alias_matches_compat_parser() -> None: assert parse_preprocessor_output("prohibit peanuts") == "prohibit peanuts" assert parse_preprocessor_output("") is None assert parse_preprocessor_output("set premise to concise replies") is None -def test_heuristic_module_new_and_compat_import_paths_both_work() -> None: - new_mod = import_module("experimental.preprocessor.heuristic_preprocessor") - compat_mod = import_module("experimental.preprocessor.heuristic_precompiler") - message = "use docker" - assert new_mod.precompile_heuristic(message) == compat_mod.precompile_heuristic(message) - - def test_parse_with_source_input_rejects_premise_near_miss_canonicalization() -> None: assert ( - parse_precompiler_output( + parse_preprocessor_output( "set premise concise replies", source_input="set premise to concise replies", ) is None ) assert ( - parse_precompiler_output( + parse_preprocessor_output( "change premise to concise replies", source_input="change premise concise replies", ) @@ -147,14 +131,14 @@ def test_parse_with_source_input_rejects_premise_near_miss_canonicalization() -> def test_validation_with_source_input_rejects_premise_near_miss_canonicalization() -> None: - assert validate_precompiler_output( + assert validate_preprocessor_output( "set premise concise replies", source_input="set premise to concise replies", ) == { "classification": "unknown", "output": None, } - assert validate_precompiler_output( + assert validate_preprocessor_output( "change premise to concise replies", source_input="change premise concise replies", ) == { @@ -164,14 +148,14 @@ def test_validation_with_source_input_rejects_premise_near_miss_canonicalization def test_validation_with_source_input_allows_other_directives() -> None: - assert validate_precompiler_output( + assert validate_preprocessor_output( "prohibit peanuts", source_input="prohibit peanuts", ) == { "classification": "directive", "output": "prohibit peanuts", } - assert validate_precompiler_output( + assert validate_preprocessor_output( "use coconut milk", source_input="what is a simple curry recipe?", ) == { diff --git a/tests/test_preprocessor_validator_properties.py b/tests/test_preprocessor_validator_properties.py index 8a30631..dfafeeb 100644 --- a/tests/test_preprocessor_validator_properties.py +++ b/tests/test_preprocessor_validator_properties.py @@ -4,7 +4,7 @@ from experimental.preprocessor.output_validation import ( _is_allowed_directive, parse_preprocessor_output, - validate_precompiler_output, + validate_preprocessor_output, ) CANONICAL_DIRECTIVES = [ @@ -102,7 +102,7 @@ def test_validate_malformed_abstain_negative_boundaries_are_unknown() -> None: "": "no_directive", } for raw, expected_cls in cases.items(): - validated = validate_precompiler_output(raw) + validated = validate_preprocessor_output(raw) assert validated["classification"] == expected_cls assert validated["output"] is None @@ -135,7 +135,7 @@ def test_parse_output_idempotent(raw_output: object) -> None: ) ) def test_validate_output_always_has_null_for_non_directive(raw_output: object) -> None: - validated = validate_precompiler_output(raw_output) + validated = validate_preprocessor_output(raw_output) if validated["classification"] == "directive": assert isinstance(validated["output"], str) else: diff --git a/tests/test_repl.py b/tests/test_repl.py index 42ca966..024a64c 100644 --- a/tests/test_repl.py +++ b/tests/test_repl.py @@ -114,11 +114,11 @@ def test_main_without_args_runs_repl_as_before(monkeypatch: pytest.MonkeyPatch) called: dict[str, object] = {} def _fake_run_repl( - in_stream: TextIO, out_stream: TextIO, *, use_precompiler: bool = False + in_stream: TextIO, out_stream: TextIO, *, use_preprocessor: bool = False ) -> None: called["in_stream"] = in_stream called["out_stream"] = out_stream - called["use_precompiler"] = use_precompiler + called["use_preprocessor"] = use_preprocessor monkeypatch.setattr(repl_module, "run_repl", _fake_run_repl) monkeypatch.setattr(sys, "argv", ["context-compiler"]) @@ -128,31 +128,28 @@ def _fake_run_repl( assert result == 0 assert called["in_stream"] is sys.stdin assert called["out_stream"] is sys.stdout - assert called["use_precompiler"] is False + assert called["use_preprocessor"] is False -@pytest.mark.parametrize("flag", ["--with-preprocessor", "--with-precompiler"]) -def test_main_with_preprocessor_flag_runs_repl_with_flag( - monkeypatch: pytest.MonkeyPatch, flag: str -) -> None: +def test_main_with_preprocessor_flag_runs_repl_with_flag(monkeypatch: pytest.MonkeyPatch) -> None: called: dict[str, object] = {} def _fake_run_repl( - in_stream: TextIO, out_stream: TextIO, *, use_precompiler: bool = False + in_stream: TextIO, out_stream: TextIO, *, use_preprocessor: bool = False ) -> None: called["in_stream"] = in_stream called["out_stream"] = out_stream - called["use_precompiler"] = use_precompiler + called["use_preprocessor"] = use_preprocessor monkeypatch.setattr(repl_module, "run_repl", _fake_run_repl) - monkeypatch.setattr(sys, "argv", ["context-compiler", flag]) + monkeypatch.setattr(sys, "argv", ["context-compiler", "--with-preprocessor"]) result = repl_module.main() assert result == 0 assert called["in_stream"] is sys.stdin assert called["out_stream"] is sys.stdout - assert called["use_precompiler"] is True + assert called["use_preprocessor"] is True def test_main_unknown_flag_prints_error_hint_and_exits_nonzero( @@ -173,10 +170,8 @@ def test_main_unknown_flag_prints_error_hint_and_exits_nonzero( @pytest.mark.parametrize( "args, expected_bad_arg", [ - (["--with-precompiler", "foo"], "--with-precompiler"), (["--with-preprocessor", "foo"], "--with-preprocessor"), (["--help", "--version"], "--help"), - (["--version", "--with-precompiler"], "--version"), (["--version", "--with-preprocessor"], "--version"), ], ) @@ -248,43 +243,34 @@ def test_repl_non_interactive_uses_human_readable_output() -> None: assert lines == ["passthrough"] -def test_repl_with_precompiler_parses_directive_before_engine_step() -> None: +def test_repl_with_preprocessor_parses_directive_before_engine_step() -> None: out = StringIO() run_repl( StringIO('{"classification":"directive","output":"prohibit peanuts"}\nquit\n'), out, - use_precompiler=True, + use_preprocessor=True, ) lines = out.getvalue().splitlines() assert lines == ["updated", "premise: (none)", "policies:", "- prohibit peanuts"] -def test_repl_with_precompiler_near_miss_passes_through_and_clarifies() -> None: +def test_repl_with_preprocessor_near_miss_passes_through_and_clarifies() -> None: out = StringIO() - run_repl(StringIO("set premise to concise replies\nquit\n"), out, use_precompiler=True) + run_repl(StringIO("set premise to concise replies\nquit\n"), out, use_preprocessor=True) lines = out.getvalue().splitlines() assert lines == ["confirm: Did you mean 'set premise concise replies'?"] -def test_repl_with_precompiler_non_directive_passthrough() -> None: +def test_repl_with_preprocessor_non_directive_passthrough() -> None: out = StringIO() - run_repl(StringIO("what is a simple curry recipe?\nquit\n"), out, use_precompiler=True) + run_repl(StringIO("what is a simple curry recipe?\nquit\n"), out, use_preprocessor=True) lines = out.getvalue().splitlines() assert lines == ["passthrough"] -def test_cli_with_precompiler_pipe_smoke_emits_clarify_without_update() -> None: - result = _run_repl_cli("--with-precompiler", input_text="set premise to concise replies\n") - - assert result.returncode == 0 - assert "Did you mean 'set premise concise replies'?" in result.stdout - assert "updated" not in result.stdout - assert result.stderr == "" - - def test_cli_with_preprocessor_pipe_smoke_emits_clarify_without_update() -> None: result = _run_repl_cli("--with-preprocessor", input_text="set premise to concise replies\n") @@ -294,7 +280,7 @@ def test_cli_with_preprocessor_pipe_smoke_emits_clarify_without_update() -> None assert result.stderr == "" -def test_repl_with_precompiler_bypasses_parsing_while_pending( +def test_repl_with_preprocessor_bypasses_parsing_while_pending( monkeypatch: pytest.MonkeyPatch, ) -> None: seen: list[tuple[object, str | None]] = [] @@ -311,7 +297,7 @@ def _parse(raw_output: object, *, source_input: str | None = None) -> str | None run_repl( StringIO("use podman instead of docker\nyes\nquit\n"), out, - use_precompiler=True, + use_preprocessor=True, ) assert seen == [("use podman instead of docker", "use podman instead of docker")] @@ -320,7 +306,7 @@ def _parse(raw_output: object, *, source_input: str | None = None) -> str | None assert _contains_subsequence(lines, ["updated", "premise: (none)", "policies:", "- use podman"]) -def test_repl_without_precompiler_does_not_parse_inputs(monkeypatch: pytest.MonkeyPatch) -> None: +def test_repl_without_preprocessor_does_not_parse_inputs(monkeypatch: pytest.MonkeyPatch) -> None: def _fail_parse(_raw: object, *, source_input: str | None = None) -> str | None: del source_input raise AssertionError("parse_preprocessor_output should not be called") From 658b834200ba68b967e87568a165970288c24bfe Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Wed, 13 May 2026 00:54:51 -0400 Subject: [PATCH 2/2] chore: bump version to 0.6.19 --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4d3ea6f..24566d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "context-compiler" -version = "0.6.18" +version = "0.6.19" description = "Deterministic conversational state engine for LLM applications." readme = "README.md" requires-python = ">=3.11" diff --git a/uv.lock b/uv.lock index f57caed..6b95f79 100644 --- a/uv.lock +++ b/uv.lock @@ -468,7 +468,7 @@ wheels = [ [[package]] name = "context-compiler" -version = "0.6.18" +version = "0.6.19" source = { editable = "." } [package.optional-dependencies]