From 879c55746542cfc0bff35d8ad0931326381f229f Mon Sep 17 00:00:00 2001 From: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com> Date: Wed, 8 Apr 2026 00:16:55 +0200 Subject: [PATCH 1/2] results: qwen3.5:9b SimpleQA 59.0% (100q, source_based) --- .../serper/qwen3.5-9b_2026-04-07.yaml | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 results/simpleqa/source-based/serper/qwen3.5-9b_2026-04-07.yaml diff --git a/results/simpleqa/source-based/serper/qwen3.5-9b_2026-04-07.yaml b/results/simpleqa/source-based/serper/qwen3.5-9b_2026-04-07.yaml new file mode 100644 index 0000000..370d940 --- /dev/null +++ b/results/simpleqa/source-based/serper/qwen3.5-9b_2026-04-07.yaml @@ -0,0 +1,22 @@ +model: qwen3.5:9b +model_provider: OLLAMA +search_engine: serper +results: + dataset: SimpleQA + total_questions: 100 + source_based: + accuracy: "59.0% (59/100)" + iterations: 10 + questions_per_iteration: 1 + avg_time_per_question: "9m 31s" +configuration: + temperature: 0.7 + context_window: 36352 # captured at benchmark start + max_tokens: 30000 # captured at benchmark start +evaluator: + model: qwen3.5:9b + provider: ollama + temperature: 0 +versions: + ldr_version: 1.5.6 +date_tested: 2026-04-07 From b294009deaedea9915f470b1b549b372dc05333d Mon Sep 17 00:00:00 2001 From: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com> Date: Wed, 8 Apr 2026 00:18:02 +0200 Subject: [PATCH 2/2] fix: relabel as langgraph_agent (exporter bug mislabeled it source_based) --- .../serper/qwen3.5-9b_2026-04-07.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename results/simpleqa/{source-based => langgraph-agent}/serper/qwen3.5-9b_2026-04-07.yaml (96%) diff --git a/results/simpleqa/source-based/serper/qwen3.5-9b_2026-04-07.yaml b/results/simpleqa/langgraph-agent/serper/qwen3.5-9b_2026-04-07.yaml similarity index 96% rename from results/simpleqa/source-based/serper/qwen3.5-9b_2026-04-07.yaml rename to results/simpleqa/langgraph-agent/serper/qwen3.5-9b_2026-04-07.yaml index 370d940..f1576cc 100644 --- a/results/simpleqa/source-based/serper/qwen3.5-9b_2026-04-07.yaml +++ b/results/simpleqa/langgraph-agent/serper/qwen3.5-9b_2026-04-07.yaml @@ -4,7 +4,7 @@ search_engine: serper results: dataset: SimpleQA total_questions: 100 - source_based: + langgraph_agent: accuracy: "59.0% (59/100)" iterations: 10 questions_per_iteration: 1