diff --git a/results/simpleqa/langgraph-agent/serper/qwen3.5-9b_2026-04-07.yaml b/results/xbench-deepsearch/langgraph-agent/serper/qwen3.5-9b_2026-04-07.yaml similarity index 94% rename from results/simpleqa/langgraph-agent/serper/qwen3.5-9b_2026-04-07.yaml rename to results/xbench-deepsearch/langgraph-agent/serper/qwen3.5-9b_2026-04-07.yaml index f1576cc..d327ea6 100644 --- a/results/simpleqa/langgraph-agent/serper/qwen3.5-9b_2026-04-07.yaml +++ b/results/xbench-deepsearch/langgraph-agent/serper/qwen3.5-9b_2026-04-07.yaml @@ -2,7 +2,7 @@ model: qwen3.5:9b model_provider: OLLAMA search_engine: serper results: - dataset: SimpleQA + dataset: xbench_deepsearch total_questions: 100 langgraph_agent: accuracy: "59.0% (59/100)"