Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ context-compiler --with-preprocessor
(heuristic + validation only). Near-miss inputs are not rewritten and are
passed through to the engine, which continues to return clarify behavior for
those forms.
`--with-precompiler` remains supported as a compatibility alias in 0.6.x.

Or in code:
```python
Expand Down Expand Up @@ -368,9 +367,7 @@ into canonical directives before compilation.
It is designed to be conservative and must be used with validation:

- reject-first; directive-adjacent unsafe forms abstain instead of rewriting
- all outputs must be validated with `parse_preprocessor_output(...)` (the
preferred preprocessor validation function; `parse_precompiler_output(...)`
remains as a compatibility alias)
- all outputs must be validated with `parse_preprocessor_output(...)`
- no directive grammar expansion
- raw outputs must not be passed directly to the compiler

Expand Down
2 changes: 1 addition & 1 deletion docs/DescriptionAndMilestones.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ Make engine behavior inspectable and externally controllable without guessing.
- `--initial-state-file`
- REPL LLM fallback as explicit optional mode:
- `--with-llm-fallback`
- requires `--with-precompiler`
- requires `--with-preprocessor`
- never implicit
- inspectable via preview / JSON output
- Explicit preprocessor policy for multi-line, multi-sentence, and conversational-prefix input
Expand Down
3 changes: 1 addition & 2 deletions docs/llm-preprocessor.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ Recommended conceptual flow:
Otherwise pass the original input unchanged.

All preprocessor outputs, including heuristic outputs, must be validated with
`parse_preprocessor_output(...)` (preferred; `parse_precompiler_output(...)`
remains as a compatibility alias) before being applied.
`parse_preprocessor_output(...)` before being applied.

Raw heuristic/LLM outputs must not be passed directly to the compiler.

Expand Down
32 changes: 16 additions & 16 deletions examples/integrations/litellm/with_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
from context_compiler import State, get_policy_items, get_premise_value
from context_compiler.engine import Engine
from experimental.preprocessor import (
PRECOMPILE_OUTCOME_DIRECTIVE,
PREPROCESS_OUTCOME_DIRECTIVE,
parse_preprocessor_output,
precompile_heuristic,
preprocess_heuristic,
render_prompt,
)

Expand Down Expand Up @@ -197,7 +197,7 @@ def _prompt_file_path() -> Traversable:
return _PROMPTS_DIR.joinpath("default.txt")


def _llm_fallback_precompile(message: str, state: State) -> str | None:
def _llm_fallback_preprocess(message: str, state: State) -> str | None:
with as_file(_prompt_file_path()) as prompt_path:
prompt = render_prompt(prompt_path, state)
if prompt is None:
Expand Down Expand Up @@ -242,13 +242,13 @@ def _llm_fallback_precompile(message: str, state: State) -> str | None:
return parsed


def _precompile_user_input(message: str, state: State) -> str | None:
def _preprocess_user_input(message: str, state: State) -> str | None:
# Heuristic first (fast + high precision), then optional LLM fallback.
try:
heuristic_result = precompile_heuristic(message)
heuristic_result = preprocess_heuristic(message)
logger.debug("preprocessor: heuristic_outcome=%s", heuristic_result["outcome"])
if (
heuristic_result["outcome"] == PRECOMPILE_OUTCOME_DIRECTIVE
heuristic_result["outcome"] == PREPROCESS_OUTCOME_DIRECTIVE
and heuristic_result["directive"]
):
parsed = parse_preprocessor_output(heuristic_result["directive"])
Expand All @@ -262,7 +262,7 @@ def _precompile_user_input(message: str, state: State) -> str | None:
return None

try:
fallback_directive = _llm_fallback_precompile(message, state)
fallback_directive = _llm_fallback_preprocess(message, state)
logger.debug("preprocessor: fallback_directive=%r", fallback_directive)
return fallback_directive
except Exception:
Expand Down Expand Up @@ -427,15 +427,15 @@ def handle_turn(user_input: str, engine: Engine, *, session_key: str | None = No
_restore_session_checkpoint_if_needed(engine, session_key)
checkpoint_before = engine.export_checkpoint()
pending_before = checkpoint_before.get("pending")
precompiled: str | None = None
preprocessd: str | None = None
if pending_before is not None:
compile_input = user_input
else:
precompiled = _precompile_user_input(user_input, engine.state)
compile_input = precompiled if precompiled else user_input
preprocessd = _preprocess_user_input(user_input, engine.state)
compile_input = preprocessd if preprocessd else user_input
logger.debug(
"preprocessor: engine_input=%s",
"directive" if precompiled else f"user_input len={len(user_input)}",
"directive" if preprocessd else f"user_input len={len(user_input)}",
)

decision = engine.step(compile_input)
Expand All @@ -450,7 +450,7 @@ def handle_turn(user_input: str, engine: Engine, *, session_key: str | None = No
response_text,
original_input=user_input,
compiler_input=compile_input,
preprocessor_output=precompiled,
preprocessor_output=preprocessd,
decision=decision,
state_before=checkpoint_before.get("authoritative_state"),
state_after=engine.state,
Expand All @@ -461,7 +461,7 @@ def handle_turn(user_input: str, engine: Engine, *, session_key: str | None = No
near_miss_prompt,
original_input=user_input,
compiler_input=compile_input,
preprocessor_output=precompiled,
preprocessor_output=preprocessd,
decision={"kind": "clarify", "prompt_to_user": near_miss_prompt},
state_before=checkpoint_before.get("authoritative_state"),
state_after=engine.state,
Expand All @@ -474,7 +474,7 @@ def handle_turn(user_input: str, engine: Engine, *, session_key: str | None = No
response_text,
original_input=user_input,
compiler_input=compile_input,
preprocessor_output=precompiled,
preprocessor_output=preprocessd,
decision=decision,
state_before=checkpoint_before.get("authoritative_state"),
state_after=engine.state,
Expand All @@ -486,7 +486,7 @@ def handle_turn(user_input: str, engine: Engine, *, session_key: str | None = No
response_text,
original_input=user_input,
compiler_input=compile_input,
preprocessor_output=precompiled,
preprocessor_output=preprocessd,
decision=decision,
state_before=checkpoint_before.get("authoritative_state"),
state_after=engine.state,
Expand All @@ -500,7 +500,7 @@ def handle_turn(user_input: str, engine: Engine, *, session_key: str | None = No
response_text,
original_input=user_input,
compiler_input=compile_input,
preprocessor_output=precompiled,
preprocessor_output=preprocessd,
decision=decision,
state_before=checkpoint_before.get("authoritative_state"),
state_after=compiled_state,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ class CustomLogger: # type: ignore[no-redef]
get_premise_value,
)
from experimental.preprocessor import (
PRECOMPILE_OUTCOME_DIRECTIVE,
PREPROCESS_OUTCOME_DIRECTIVE,
parse_preprocessor_output,
precompile_heuristic,
preprocess_heuristic,
render_prompt,
)

Expand Down Expand Up @@ -140,7 +140,7 @@ def _get_litellm_completion() -> Callable[..., object]:
return cast(Callable[..., object], litellm_module.completion)


def _llm_fallback_precompile(message: str, state: State) -> str | None:
def _llm_fallback_preprocess(message: str, state: State) -> str | None:
with as_file(_prompt_file_path()) as prompt_path:
prompt = render_prompt(prompt_path, state)
if prompt is None:
Expand Down Expand Up @@ -196,11 +196,11 @@ def _state_before_last_message(user_transcript: Transcript) -> State | None:
return replay["state"]


def _precompile_last_user_message(message: str, state: State | None) -> str | None:
def _preprocess_last_user_message(message: str, state: State | None) -> str | None:
try:
heuristic_result = precompile_heuristic(message)
heuristic_result = preprocess_heuristic(message)
if (
heuristic_result["outcome"] == PRECOMPILE_OUTCOME_DIRECTIVE
heuristic_result["outcome"] == PREPROCESS_OUTCOME_DIRECTIVE
and heuristic_result["directive"]
):
parsed = parse_preprocessor_output(heuristic_result["directive"])
Expand All @@ -213,7 +213,7 @@ def _precompile_last_user_message(message: str, state: State | None) -> str | No
return None

try:
return _llm_fallback_precompile(message, state)
return _llm_fallback_preprocess(message, state)
except Exception:
logger.debug("litellm_proxy: fallback_exception", exc_info=True)
return None
Expand All @@ -240,16 +240,16 @@ async def async_pre_call_hook(

transcript_for_replay = user_transcript
replaced_last_user_message = False
precompiled: str | None = None
preprocessd: str | None = None

if user_transcript:
last_user_content = cast(str, user_transcript[-1]["content"])
prior_state = _state_before_last_message(user_transcript)
precompiled = _precompile_last_user_message(last_user_content, prior_state)
logger.debug("litellm_proxy: precompiled=%r", precompiled)
if precompiled:
preprocessd = _preprocess_last_user_message(last_user_content, prior_state)
logger.debug("litellm_proxy: preprocessd=%r", preprocessd)
if preprocessd:
transcript_for_replay = [*user_transcript]
transcript_for_replay[-1] = {"role": "user", "content": precompiled}
transcript_for_replay[-1] = {"role": "user", "content": preprocessd}
replaced_last_user_message = True

logger.debug("litellm_proxy: replaced_last_user_message=%s", replaced_last_user_message)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ def Field(*, default: Any, description: str = "") -> Any: # type: ignore[no-red
from context_compiler import State, create_engine, get_policy_items, get_premise_value
from context_compiler.engine import Engine
from experimental.preprocessor import (
PRECOMPILE_OUTCOME_DIRECTIVE,
PREPROCESS_OUTCOME_DIRECTIVE,
parse_preprocessor_output,
precompile_heuristic,
preprocess_heuristic,
render_prompt,
)

Expand Down Expand Up @@ -629,7 +629,7 @@ async def _validate_configured_model_ids(
)
return None

async def _llm_fallback_precompile(
async def _llm_fallback_preprocess(
self,
message: str,
state: State,
Expand Down Expand Up @@ -676,7 +676,7 @@ async def _llm_fallback_precompile(
return None, None
return parsed, None

async def _precompile_user_input(
async def _preprocess_user_input(
self,
message: str,
state: State,
Expand All @@ -688,10 +688,10 @@ async def _precompile_user_input(
) -> tuple[str | None, str | None]:
# Heuristic first for precision, determinism, and low latency.
# If heuristic does not produce a directive, try Open WebUI-native fallback.
heuristic_result = precompile_heuristic(message)
heuristic_result = preprocess_heuristic(message)

if (
heuristic_result["outcome"] == PRECOMPILE_OUTCOME_DIRECTIVE
heuristic_result["outcome"] == PREPROCESS_OUTCOME_DIRECTIVE
and heuristic_result["directive"]
):
parsed = parse_preprocessor_output(heuristic_result["directive"])
Expand All @@ -702,12 +702,12 @@ async def _precompile_user_input(
return None, None

# In debug mode with missing base/preprocessor model ids, skip fallback
# precompile entirely so we never attempt an empty-model LLM call.
# preprocess entirely so we never attempt an empty-model LLM call.
model_id = _normalize_model_id(model_id)
if model_id is None:
return None, None

return await self._llm_fallback_precompile(
return await self._llm_fallback_preprocess(
message,
state,
request=request,
Expand Down Expand Up @@ -816,7 +816,7 @@ async def pipe(
) -> Any:
# Open WebUI integration entrypoint:
# 1) extract latest user input
# 2) run precompile (heuristic -> LLM fallback)
# 2) run preprocess (heuristic -> LLM fallback)
# 3) pass directive or original input to engine.step(...)
# 4) map decision back to Open WebUI response behavior
raw_messages = body.get("messages")
Expand Down Expand Up @@ -882,24 +882,24 @@ async def pipe(
checkpoint_before = engine.export_checkpoint()
pending_before = checkpoint_before.get("pending")

precompiled: str | None = None
precompile_error: str | None = None
preprocessd: str | None = None
preprocess_error: str | None = None
if pending_before is None:
precompiled, precompile_error = await self._precompile_user_input(
preprocessd, preprocess_error = await self._preprocess_user_input(
latest_user_text,
engine.state,
request=__request__,
user_payload=__user__,
prompt_profile=self.valves.PREPROCESSOR_PROMPT_PROFILE,
model_id=effective_preprocessor_model,
)
if precompile_error is not None:
return precompile_error
if preprocess_error is not None:
return preprocess_error

logger.debug("preprocessor: precompiled=%r", precompiled)
# Preserve core behavior: if precompile yields no directive, use raw user
logger.debug("preprocessor: preprocessd=%r", preprocessd)
# Preserve core behavior: if preprocess yields no directive, use raw user
# text so the compiler still decides clarify/passthrough/update.
compile_input = precompiled if precompiled is not None else latest_user_text
compile_input = preprocessd if preprocessd is not None else latest_user_text

logger.debug("preprocessor: engine_input=%r", compile_input)
decision = engine.step(compile_input)
Expand All @@ -920,7 +920,7 @@ async def pipe(
decision=decision,
state_before=state_before,
state_after=state_after,
preprocessor_output=precompiled,
preprocessor_output=preprocessd,
llm_called=False,
)
if near_miss_prompt is not None and kind == "passthrough":
Expand All @@ -931,7 +931,7 @@ async def pipe(
decision={"kind": "clarify", "prompt_to_user": near_miss_prompt},
state_before=state_before,
state_after=state_after,
preprocessor_output=precompiled,
preprocessor_output=preprocessd,
llm_called=False,
)
if kind == "passthrough":
Expand All @@ -948,7 +948,7 @@ async def pipe(
decision=decision,
state_before=state_before,
state_after=state_after,
preprocessor_output=precompiled,
preprocessor_output=preprocessd,
llm_called=base_model_id is not None,
)
if kind == "update":
Expand All @@ -961,7 +961,7 @@ async def pipe(
decision=decision,
state_before=state_before,
state_after=state_after,
preprocessor_output=precompiled,
preprocessor_output=preprocessd,
llm_called=False,
)
response = await self._forward_update(
Expand All @@ -978,7 +978,7 @@ async def pipe(
decision=decision,
state_before=state_before,
state_after=state_after,
preprocessor_output=precompiled,
preprocessor_output=preprocessd,
llm_called=base_model_id is not None,
state_injected=_active_state_summary(state_after),
)
Expand All @@ -996,6 +996,6 @@ async def pipe(
decision=decision,
state_before=state_before,
state_after=state_after,
preprocessor_output=precompiled,
preprocessor_output=preprocessd,
llm_called=base_model_id is not None,
)
Loading
Loading