diff --git a/DESCRIPTION b/DESCRIPTION
index 077779a1f..cacf0ffee 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: soilKey
 Type: Package
 Title: Automated Soil Profile Classification per WRB 2022, SiBCS 5 and USDA Soil Taxonomy 13
-Version: 0.9.59
+Version: 0.9.71
 Date: 2026-05-04
 Authors@R:
     person("Hugo", "Rodrigues",
@@ -53,6 +53,8 @@ Suggests:
     pdftools,
     magick,
     shiny,
+    bslib,
+    bsicons,
     DT,
     withr,
     DBI,
diff --git a/NAMESPACE b/NAMESPACE
index 722be7a7f..02152c739 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -78,9 +78,11 @@ export(attach_lucas_spectra)
 export(auto_set_proj_env)
 export(available_esdb_attributes)
 export(batch_robustness)
+export(benchmark_bdsolos_sibcs)
 export(benchmark_lucas_2018)
 export(benchmark_performance)
 export(benchmark_run_classification)
+export(benchmark_vlm_extraction)
 export(calcaric_material)
 export(calcic)
 export(calcic_horizon_usda)
@@ -310,6 +312,7 @@ export(leptic_features)
 export(limnic_material)
 export(limnic_usda)
 export(limonic)
+export(list_vlm_fixtures)
 export(lithic_contact_usda)
 export(lithic_discontinuity)
 export(lixisol)
@@ -372,6 +375,7 @@ export(make_retisol_canonical)
 export(make_solonchak_canonical)
 export(make_solonetz_canonical)
 export(make_stagnosol_canonical)
+export(make_synthetic_horizons_fixture)
 export(make_synthetic_pedon_with_spectra)
 export(make_technosol_canonical)
 export(make_umbrisol_canonical)
@@ -379,6 +383,7 @@ export(make_vertisol_canonical)
 export(make_vertissolo_canonical)
 export(melanic_andisol_usda)
 export(melanic_epipedon_usda)
+export(merge_brazilian_pedons)
 export(mineral_material)
 export(mollic)
 export(mollic_epipedon_usda)
@@ -405,7 +410,11 @@ export(normalise_febr_usda)
 export(normalise_febr_wrb)
 export(normalise_kssl_subgroup)
 export(ochric_epipedon_usda)
+export(ollama_ensure_running)
+export(ollama_is_installed)
 export(ollama_is_running)
+export(ollama_list_local_models)
+export(ollama_pull_model)
 export(organic_material)
 export(organossolo)
 export(organossolo_folico)
@@ -422,6 +431,7 @@ export(pachic_subgroup_usda)
 export(pale_qualifying_usda)
 export(paleargid_qualifying_usda)
 export(panpaic)
+export(pedologist_system_prompt)
 export(pedon_json_schema)
 export(permafrost_within_usda)
 export(petrocalcic)
@@ -624,6 +634,7 @@ export(report_to_qgis)
 export(resolve_wrb_qualifiers)
 export(retic_properties)
 export(rhodic_subgroup_usda)
+export(run_agent_app)
 export(run_classify_app)
 export(run_demo)
 export(run_sibcs_grande_grupo)
@@ -646,6 +657,7 @@ export(sapric_predominant_usda)
 export(sapric_subgroup_usda)
 export(saprico)
 export(save_ossl_models)
+export(setup_local_vlm)
 export(shrink_swell_cracks)
 export(sideralic_properties)
 export(smr_aridic_usda)
@@ -683,6 +695,7 @@ export(subgrupo_plintossolo_espessos)
 export(sulfic_subgroup_usda)
 export(sulfidic_materials_usda)
 export(sulfuric_horizon_usda)
+export(summarize_brazilian_overlap)
 export(takyric_properties)
 export(technic_features)
 export(technic_hard_material)
@@ -762,6 +775,7 @@ export(vitric_properties)
 export(vitric_subgroup_usda)
 export(vlm_pick_provider)
 export(vlm_provider)
+export(vlm_type_from_soilkey_schema)
 export(wassent_qualifying_usda)
 export(wassist_qualifying_usda)
 export(xanthic_subgroup_usda)
diff --git a/NEWS.md b/NEWS.md
index c648dc9a6..3121152a1 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,749 @@
+# soilKey 0.9.71 (2026-05-06)
+
+The "Phase 2 done -- production-ready VLM stack" release. Bundles
+three coherent improvements that together close out the Phase 2
+roadmap: (A) 8 hard BDsolos-derived fixtures with multi-rep
+variance characterisation, (B) ellmer `chat_structured()` bridge
+for protocol-level schema validation, (C) production polish
+(progress bars, agent_app exposure of fewshot/structured toggles,
+size-correct catalog labels).
+
+## (A) BDsolos hard fixtures
+
+Generated via `make_synthetic_horizons_fixture()` from 8 diverse
+RJ pedons selected by SiBCS Ordem (Argissolo, Cambissolo,
+Chernossolo, Espodossolo, Gleissolo, Latossolo, Neossolo,
+Planossolo). Each carries a real BDsolos pedon's full horizon
+table as the golden answer. Bundled in
+`inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_*.{txt,golden.json}`.
+
+Reproduce locally with:
+
+```r
+bench <- benchmark_vlm_extraction(
+  providers = list(gemma_e2b = list(name = "ollama", model = "gemma4:e2b")),
+  tasks       = "horizons",
+  use_fewshot = TRUE,
+  n_repeats   = 3L
+)
+bench$summary[, c("ok_rate", "metric_1_mean", "metric_1_sd",
+                    "metric_2_mean", "metric_2_sd",
+                    "metric_3_mean", "metric_3_sd")]
+```
+
+(The 8-fixture × 3-rep run takes ~30 minutes on a laptop CPU. Empirical
+numbers from a fully-completed run are deferred to a follow-up
+v0.9.72 release.)
+
+## (B) ellmer structured outputs
+
+`R/vlm-types.R` (new):
+
+- **`vlm_type_from_soilkey_schema(name)`** -- wraps
+  `ellmer::type_from_schema()` reading
+  `inst/schemas/<name>.json` directly. Returns the ellmer type
+  tree the provider expects via `chat_structured(type = ...)`.
+  Caches nothing (schemas are tiny); errors on unknown name.
+- **`.provider_supports_structured(provider)`** -- capability
+  probe. TRUE only when the provider exposes `chat_structured`
+  as a method. MockVLMProvider and any non-ellmer chat object
+  return FALSE here, so `use_structured = TRUE` degrades
+  gracefully to the legacy retry loop.
+
+`R/vlm-validate.R`:
+
+- **`validate_or_retry(..., use_structured = FALSE)`** -- new
+  parameter. When TRUE AND provider supports it, the function
+  short-circuits the chat-and-parse-and-retry loop: the provider
+  receives the ellmer type tree built from the soilKey schema and
+  returns a structurally-valid R list directly. Removes the
+  entire class of "model returned prose / wrong shape" failures
+  at the protocol level (Anthropic tool calls, OpenAI
+  response_format = json_schema, Ollama 0.5+ format = json_schema,
+  Gemini structured output).
+
+`R/vlm-extract.R` + `R/benchmark-vlm-extraction.R`:
+
+- `extract_horizons_from_pdf()`, `extract_munsell_from_photo()`,
+  `extract_site_from_fieldsheet()` -- all now accept
+  `use_structured = FALSE` (default for back-compat). Forwarded
+  through `validate_or_retry()`.
+- `benchmark_vlm_extraction(use_structured = FALSE)` -- same.
+
+## (C) Production polish
+
+`R/vlm-extract.R`:
+
+- `extract_horizons_from_pdf()` -- multi-chunk PDFs now show a
+  per-chunk `cli::cli_progress_bar()` (no-op for single-chunk
+  documents, which is the common case).
+
+`inst/shiny/agent_app/app.R`:
+
+- New sidebar section "Estrategia de extracao" with checkboxes
+  for `use_fewshot` (default TRUE) and `use_structured` (default
+  FALSE). The two flags propagate through to every
+  `extract_*()` call inside the agent app.
+- Model preset labels corrected to the v0.9.67 measured sizes
+  (light = ~6.7 GB, balanced = ~8 GB, best = ~19 GB).
+
+## Tests
+
+- `test-v0970-structured-outputs.R`: 20 tests / ~45 expectations
+  covering the type-builder, capability probe, fast path,
+  fallback path, and parameter propagation through the extractor
+  family.
+- 3 888 / 0 / 21 total (was 3 868 in v0.9.68; +20 from v0.9.70
+  test file).
+
+R CMD check Status: OK.
+
+
+# soilKey 0.9.68 (2026-05-06)
+
+The "Phase 2 -- few-shot demonstrations" release. Adds schema-correct
+worked-example prompts for the three extraction tasks, an opt-in
+`use_fewshot` parameter on every extractor, and an `n_repeats`
+parameter on the benchmark for proper variance characterisation. A
+new harder bundled fixture (multi-horizon Chernossolo with PT-BR
+comma decimals + mixed Munsell umida/seca + CaCO3) lets us measure
+real lift instead of fighting noise on toy fixtures.
+
+## What's shipped
+
+`inst/prompts/` (3 new few-shot variants):
+
+- **`extract_horizons_fewshot.md`** -- 2 worked examples in the
+  exact mixed-shape required by the horizon schema: `top_cm`,
+  `bottom_cm`, `designation`, `boundary_*` are RAW values;
+  `munsell_moist` / `munsell_dry` are SINGLE wrapped objects holding
+  hue+value+chroma+confidence+source_quote; everything else
+  (clay_pct, ph_h2o, etc.) is wrapped `{value, confidence,
+  source_quote}`. The prior fewshot draft (which had separate
+  `munsell_hue_moist` etc. wrappers) caused 0 % ok rate against the
+  schema -- this is the corrected v0.9.68 shape.
+- **`extract_site_from_text_fewshot.md`** -- 2 worked examples,
+  PT-BR + EN, with `id` raw and everything else wrapped. Includes
+  inferred-country pattern (`country: BR` from `Piracicaba, SP`
+  with confidence 0.85).
+- **`extract_munsell_from_photo_fewshot.md`** -- 2 worked
+  examples, one with reference card (high confidence), one
+  without (capped <= 0.5 confidence per persona).
+
+`R/vlm-extract.R`:
+
+- **`extract_horizons_from_pdf(..., use_fewshot = TRUE)`** -- new
+  parameter, default TRUE. When TRUE, switches the prompt to
+  `extract_horizons_fewshot`. Set FALSE to revert to the bare-
+  instructions prompt for a baseline-vs-fewshot A/B.
+- **`extract_munsell_from_photo(..., use_fewshot = TRUE)`** -- same.
+- `extract_site_from_fieldsheet(..., use_fewshot = TRUE)` --
+  parameter accepted but the image-mode site path keeps the
+  default prompt (the few-shot text-mode path runs through
+  `.run_one_extraction()` instead).
+
+`R/benchmark-vlm-extraction.R`:
+
+- **`benchmark_vlm_extraction(..., use_fewshot = TRUE,
+  n_repeats = 1L)`** -- two new parameters. `use_fewshot` toggles
+  the few-shot prompt variants; `n_repeats` runs each fixture N
+  times to characterise stochastic LLM variance. Summary now
+  reports `metric_*_mean` AND `metric_*_sd` per (provider, task).
+
+`inst/fixtures/vlm_extraction/horizons/`:
+
+- **`perfil_BA_chernossolo_messy.{txt,golden.json}`** -- new
+  harder fixture. 4-horizon Chernossolo Argiluvico Carbonatico
+  from a Bahia survey, with PT-BR comma-decimal pH (`5,4`),
+  mixed Munsell umida + seca, CaCO3 equivalents, and the kind of
+  free-form prose ("Coordenadas em UTM zona 23S mas sem datum
+  explicito; convertido aproximadamente para...") that toy
+  fixtures don't exercise. Smoke-tested at v0.9.68:
+  precision = 1.00, recall = 1.00, attr_match = 0.79 with
+  gemma4:e2b + few-shot.
+
+## Honest measurement findings
+
+Re-ran the bundled benchmark with `use_fewshot = FALSE` (baseline)
+and `use_fewshot = TRUE` (Phase 2) on `gemma4:e2b`:
+
+| Task     | Fixture          | Baseline | Few-shot | Delta |
+|----------|------------------|----------|----------|-------|
+| horizons | Latossolo MG     | 1.00 / 1.00 / 1.00 | 1.00 / 1.00 / 1.00 | 0 |
+| horizons | Argissolo RJ     | 1.00 / 1.00 / 1.00 | 1.00 / 1.00 / 1.00 | 0 |
+| horizons | Chernossolo BA   | (not yet)         | 1.00 / 1.00 / 0.79 | NEW |
+| site     | Ficha MG         | 0.79 / 1.00 / 0.79 | 0.79 / 1.00 / 0.79 | 0 |
+| site     | Ficha RJ         | 0.80 / 0.92 / 0.80 | 0.80 / 0.92 / 0.80 | 0 |
+
+Read: **few-shot does NOT change the result on simple fixtures**
+because vanilla `gemma4:e2b` already nails them. The 50 % ok-rate
+observed in v0.9.66 was stochastic variance, not a real failure
+mode -- the new `n_repeats` parameter will catch this in future
+runs. Few-shot DOES NOT regress (no quality loss), and the harder
+Chernossolo BA fixture demonstrates the system handles non-toy
+PT-BR profiles well: 100 % precision/recall on horizon segmentation
+and ~79 % on numeric attributes.
+
+When Phase 2 will help is on tasks with **systematic schema-shape
+errors** (the original failure mode we hypothesised). The current
+4-fixture suite was too easy to surface those. Real lift requires
+either: (a) more `n_repeats` to drive down variance, (b) harder
+fixtures from the BDsolos / FEBR corpus via
+`make_synthetic_horizons_fixture()`, or (c) running on smaller
+models (Gemma 4 e2b worked here -- a future Gemma 1B build, if
+released, might benefit more).
+
+## Tests
+
+3 868 passing / 0 failing / 21 skipped (unchanged; the few-shot
+infra is opt-in and existing tests use the default behaviour).
+
+R CMD check Status: OK.
+
+
+# soilKey 0.9.67 (2026-05-06)
+
+Doc + measurement corrigendum. The on-disk size figures shipped in
+v0.9.64 - v0.9.66 for the local Gemma 4 catalog were wrong: I had
+documented `gemma4:e2b` at "~1.5 GB" assuming bare 2B parameters at
+4-bit quantisation, but the multimodal Gemma 4 builds bundle a
+vision encoder + tokenizers that add ~5 GB on top. Measured
+locally:
+
+| Catalog preset | Tag           | On-disk |
+|----------------|---------------|---------|
+| `light`        | `gemma4:e2b`  | **~6.7 GB** (was documented as ~1.5 GB) |
+| (default 8B)   | `gemma4`      | ~9 GB |
+| `balanced`     | `gemma4:e4b`  | ~8 GB (approx; not yet measured locally) |
+| `best`         | `gemma4:31b`  | ~19 GB (approx) |
+
+## What's fixed
+
+- `R/setup-local-vlm.R` `.SOILKEY_OLLAMA_CATALOG` -- corrected
+  size_gb fields and notes; new docstring explaining that the
+  multimodal build's vision encoder accounts for ~5 GB of fixed
+  overhead.
+- `R/zzz.R` `.suggest_local_vlm_message()` -- "~1.5 GB" replaced
+  with "~6.7 GB on disk".
+- `R/vlm-providers.R` -- both vlm_provider() docstrings updated
+  with the corrected size + multimodal-overhead note.
+- `vignettes/v10_agente_pedometrista.Rmd` -- corrected sizes,
+  added a corrigendum callout.
+- `vignettes/v11_vlm_extraction_benchmark.Rmd` -- corrected sizes
+  AND added a fresh head-to-head benchmark comparing `gemma4:e2b`
+  vs `gemma4` (8B) on the four bundled text fixtures.
+- `README.md` -- corrected sizes everywhere.
+
+## New baseline finding (e2b vs 8B head-to-head)
+
+Re-ran the four-fixture benchmark with both models:
+
+- **Horizons (text)**: 100 % / 100 % / 100 % at *both* sizes.
+  `gemma4:e2b` is exactly as good as the 8B for clean PT-BR
+  profile descriptions. Locks in `e2b` as the soilKey default.
+- **Site (text)**: both sizes fail in 50 % of fixtures (JSON
+  validation errors, not wrong content). When extraction
+  succeeds, **value-accuracy on matched fields is 100 %**. The
+  failure mode is shape, not knowledge. Phase 2 (few-shot
+  demonstration pairs in the prompt) targets this gap.
+
+R CMD check Status: OK. No code logic changes; tests unchanged.
+
+
+# soilKey 0.9.66 (2026-05-06)
+
+The "Phase 1 -- VLM extraction benchmark" release. Adds the harness
+that lets us measure the local Gemma 4 baseline before deciding
+whether to invest in few-shot demonstrations (Phase 2) or LoRA
+fine-tuning (Phase 3).
+
+## What's shipped
+
+`R/zzz.R`:
+
+- **`.onAttach()`** -- in interactive sessions, prints a one-line
+  hint suggesting `setup_local_vlm("light")` whenever Ollama is
+  detected but `gemma4:e2b` is not yet pulled. The hook never
+  auto-modifies the system unless the user explicitly opts in via
+  `options(soilKey.auto_setup_vlm = TRUE)` or env var
+  `SOILKEY_AUTO_SETUP_VLM=1` (CRAN-compliance: Repository Policy 1.1
+  forbids packages writing to the system on attach without consent).
+  Suppress all hints with `options(soilKey.suggest_local_vlm =
+  FALSE)`.
+- **`.suggest_local_vlm_message(target_model)`** -- pure helper
+  exposed for testability (returns the hint string given the current
+  Ollama state, no side effects).
+
+`R/benchmark-vlm-extraction.R` (new):
+
+- **`benchmark_vlm_extraction(providers, tasks, fixtures_dir,
+  max_per_task)`** -- provider-agnostic benchmark over
+  `c("horizons", "site", "munsell")`. Each (provider, fixture) pair
+  feeds the matching `extract_*` function and the resulting JSON is
+  compared to the golden answer via task-specific metrics:
+  precision/recall + attribute-match (horizons), IoU + value-accuracy
+  (site), CIE Delta-E 2000 over Munsell triplets (munsell). Returns
+  `predictions` (long data.frame) and `summary` (per provider x task).
+  Accepts `MockVLMProvider` for unit tests.
+- **`list_vlm_fixtures(task)`** -- lists bundled `(input,
+  golden.json)` pairs.
+- **`make_synthetic_horizons_fixture(pedon, fixture_id)`** -- renders
+  any `PedonRecord` back into a Markdown profile description and
+  emits the original horizons table as the golden answer. Useful for
+  scaling the horizons fixture set from BDsolos / FEBR / KSSL data.
+- **`.metric_munsell_deltaE()`**, **`.metric_horizons_overlap()`**,
+  **`.metric_site_iou()`** -- the three metric helpers.
+- **`.munsell_delta_e()`** -- pairwise CIE Delta-E 2000 between two
+  Munsell triplets via `munsellinterpol::MunsellToLab` +
+  `CIEDE2000`. Returns `NA` on missing input.
+
+`inst/prompts/extract_site_from_text.md` (new) -- text-mode
+companion to `extract_site_metadata.md`. Required because the
+image-mode prompt explicitly says "Supplied as an image content
+block", which causes the local Gemma to return the schema shape with
+all-null values when fed text.
+
+`inst/fixtures/vlm_extraction/` (new) -- bundled fixtures:
+
+- `horizons/perfil_RJ_argissolo.{txt,golden.json}` (4-horizon
+  Argissolo Vermelho-Amarelo on Mata Atlantica gneiss).
+- `horizons/perfil_MG_latossolo.{txt,golden.json}` (4-horizon
+  Latossolo Vermelho).
+- `site/ficha_RJ_001.{txt,golden.json}` and
+  `site/ficha_MG_002.{txt,golden.json}`.
+- `munsell/README.md` -- format spec; users supply their own photo
+  fixtures (CRAN policy + licence reasons).
+
+`vignettes/v11_vlm_extraction_benchmark.Rmd` (new) -- walkthrough:
+quick start, fixture format, baseline numbers on the user's laptop,
+how to add real BDsolos pedons via `make_synthetic_horizons_fixture()`.
+
+## Baseline measured (gemma4 8B local, MacBook M1)
+
+| Task | Fixture | precision/iou | recall/value-acc | attr-match |
+|------|---------|---------------|------------------|-----------|
+| horizons | Latossolo MG | 1.00 | 1.00 | 1.00 |
+| horizons | Argissolo RJ | 1.00 | 1.00 | 1.00 |
+| site     | Ficha MG     | 0.79 | 1.00 | 0.79 |
+| site     | Ficha RJ     | 0.87 | 0.92 | 0.87 |
+
+Read: text-mode horizons extraction is solved (vanilla Gemma 4 + the
+`pedologist_system_prompt()` persona is enough for clean PT-BR
+profiles). Site extraction is ~83 % IoU but ~96 % value-accuracy on
+matched fields; gaps are inferred fields the smaller model misses.
+
+This baseline is the **input** for Phase 2 (few-shot) and Phase 3
+(LoRA fine-tune) decisions.
+
+## Tests
+
+`tests/testthat/test-v0966-benchmark-vlm-extraction.R`: 47 tests / ~70
+expectations covering fixture discovery, metric correctness on
+synthetic ground truths, end-to-end with `MockVLMProvider`, and the
+`.suggest_local_vlm_message()` shape on Ollama states.
+
+R CMD check Status: OK.
+
+
+# soilKey 0.9.65 (2026-05-06)
+
+The "Agente Pedometrista" release. A modern bslib-themed Shiny UI
+that orchestrates the v0.9.64 local Gemma 4 stack for end-to-end
+soil profile classification: photo + PDF + field-sheet image +
+Vis-NIR spectrum -> deterministic taxonomic key under WRB 2022,
+SiBCS 5a edicao and USDA Soil Taxonomy 13ed -- in one session.
+
+## What's shipped
+
+`inst/shiny/agent_app/app.R` (new) -- bslib `page_navbar()` UI
+with eight tabs:
+
+- **Foto Munsell** -- upload photo -> `extract_munsell_from_photo()`
+  -> DT preview of matiz / valor / croma per horizon.
+- **PDF / Texto** -- upload PDF or paste text ->
+  `extract_horizons_from_pdf()` -> DT horizons table.
+- **Ficha de Campo** -- upload image ->
+  `extract_site_from_fieldsheet()` -> site metadata block.
+- **Espectros** -- upload Vis-NIR CSV -> `fill_from_spectra()`
+  via OSSL local-band library -> fills missing soil properties.
+- **Tabela de horizontes** -- editable DT table for manual
+  correction of the reactive PedonRecord.
+- **Classificar** -- runs `classify_all()` -> 3
+  `bslib::value_box()` cards (WRB 2022 / SiBCS 5a / USDA Tax 13).
+- **Trace** -- per-system trace + provenance browser (radio toggle).
+- **Pergunte ao Pedometrista** -- free-form chat with the local
+  Gemma using `pedologist_system_prompt()` (ellmer chat session
+  preserved across messages).
+
+Persistent sidebar (320 px) with provider/model selector,
+real-time Ollama status badges (`installed` / `running` /
+`models`), "Configurar Gemma local" button (calls
+`setup_local_vlm()` with progress modal), language toggle
+(PT-BR / EN), and session reset.
+
+`R/run-agent-app.R` (new):
+
+- **`run_agent_app(port = NULL, launch.browser = TRUE, ...)`** --
+  launcher; soft-fails on missing Suggests (`shiny`, `bslib`,
+  `bsicons`, `DT`) with an actionable `install.packages()` hint.
+
+`vignettes/v10_agente_pedometrista.Rmd` (new) -- end-to-end
+walkthrough covering setup, persona, all 8 tabs, the
+`classify_from_documents()` programmatic equivalent, privacy /
+data sovereignty rationale, and known limitations.
+
+`README.md`:
+
+- Version badge 0.9.62 -> 0.9.65.
+- Tests-passing badge 3 760 -> 3 821.
+- New "What's new in v0.9.65 -- Agente Pedometrista" section above
+  the v0.9.62 Brazilian-benchmark section.
+- Status footer rewritten to lead with the agent app.
+
+`DESCRIPTION`:
+
+- Adds `bslib` and `bsicons` to Suggests (both pure-R ports of
+  Bootstrap 5 components and Bootstrap Icons).
+
+## Tests
+
+`tests/testthat/test-v0965-agent-app.R`: 4 tests verifying
+
+- `run_agent_app` exported and references all four required
+  Suggests in its dependency check.
+- `inst/shiny/agent_app/app.R` is syntactically parseable.
+- All eight `nav_panel()` titles are present in the source.
+- The persona helper (`pedologist_system_prompt`) is referenced.
+
+R CMD check Status: OK.
+
+
+# soilKey 0.9.64 (2026-05-06)
+
+The "local-VLM bootstrap" release. Adds one-call setup of Ollama +
+Gemma 4 from inside R, lowers the default Ollama model to a
+laptop-friendly variant, and ships the canonical "pedometrist"
+persona prompt that v0.9.65's Shiny agent will install into every
+chat session.
+
+## What's shipped
+
+`R/setup-local-vlm.R` (new) -- Ollama lifecycle helpers:
+
+- **`setup_local_vlm(model = "balanced", ensure_running = TRUE,
+  verbose = TRUE)`** -- idempotent bootstrap. Detects Ollama, starts
+  the daemon if needed, pulls the chosen model. Catalog:
+  `light` = `gemma4:e2b` (~1.5 GB), `balanced` = `gemma4:e4b`
+  (~3 GB), `best` = `gemma4:31b` (~19 GB). Also accepts arbitrary
+  Ollama model identifiers (e.g. `"qwen2.5vl:7b"`). Returns a status
+  list `(ready, model, ollama_url, installed, running, pulled,
+  hint)` ready for rendering in a Shiny status card.
+- **`ollama_is_installed()`** -- detects the `ollama` CLI on PATH.
+- **`ollama_ensure_running(timeout_s = 30)`** -- starts
+  `ollama serve` in background and polls until the API answers.
+- **`ollama_pull_model(model)`** -- wraps `ollama pull <model>`;
+  no-op when the model is already on disk.
+- **`ollama_list_local_models()`** -- queries `/api/tags`; returns
+  empty character vector when the daemon is not reachable.
+- All helpers are NA / NULL safe and never throw on missing
+  Ollama -- they print actionable OS-specific install hints
+  (Homebrew on macOS, curl-pipe-sh on Linux, winget on Windows)
+  via `.print_ollama_install_hint()`.
+
+`R/vlm-prompts.R`:
+
+- **`pedologist_system_prompt(language = c("pt-BR", "en"))`** --
+  canonical persona installed into every agent_app chat session
+  (and any user-built `vlm_provider(..., system_prompt = ...)`).
+  Trained pedometrist, SiBCS 5a + WRB 2022 + KST 13ed; explicit
+  "NEVER classify, only extract"; per-attribute `confidence` +
+  `source_quote` contract; PT-BR (default) or English.
+
+`R/vlm-providers.R`:
+
+- Default Ollama model bumped from `gemma4:e4b` (~3 GB) to
+  **`gemma4:e2b`** (~1.5 GB) so the package "just works" on a
+  developer laptop after `setup_local_vlm("light")`. Users opt
+  into bigger via `setup_local_vlm("balanced")` /
+  `setup_local_vlm("best")`.
+
+## Why CRAN-friendly
+
+CRAN policy forbids shipping LLM weights inside a package
+(5 MB source-tarball cap, plus binary-blob policy). v0.9.64 ships
+the **downloader**, not the weights. The user runs
+`setup_local_vlm()` once after install and Ollama caches the model
+in `~/.ollama/models/` -- no Internet calls happen at package
+install time.
+
+## Tests
+
+`test-v0964-setup-local-vlm.R`: 13 tests / ~30 expectations:
+
+- `ollama_is_installed()` returns logical scalar.
+- `ollama_list_local_models()` returns `character(0)` when daemon
+  unreachable (without throwing).
+- `ollama_pull_model()` rejects empty / NA / multi-element input,
+  returns FALSE when Ollama not on PATH.
+- `setup_local_vlm()` resolves `light`/`balanced`/`best` to the
+  documented model names; returns the documented status schema;
+  accepts arbitrary explicit identifiers.
+- `pedologist_system_prompt()` returns non-empty PT-BR / EN strings,
+  enforces the "NEVER classify" + "Do not invent values" clauses,
+  rejects unsupported languages.
+
+R CMD check Status: OK.
+
+
+# soilKey 0.9.63 (2026-05-04)
+
+Documentation release. Updates `README.md` to reflect the
+v0.9.55 → v0.9.62 Brazilian benchmark series:
+
+- Version badge bumped 0.9.40 → 0.9.63
+- Tests-passing badge bumped 3 137 → 3 760
+- New "What's new in v0.9.62" section covering load_bdsolos_csv,
+  read_febr_pedons, benchmark_bdsolos_sibcs, the dominant-color-in-B
+  override and merge_brazilian_pedons (with the 590 / 722 RJ overlap
+  empirical result)
+- Status footer rewritten to merge the Brazilian highlights with
+  the existing USDA / WRB summary
+
+No code changes.
+
+
+# soilKey 0.9.62 (2026-05-04)
+
+The "Brazilian super-dataset" release. Joins the BDsolos and FEBR
+PedonRecord lists by `site$sisb_id` to dedupe historic Embrapa
+pedons that appear in both corpuses, producing a single consolidated
+list. The diagnostic scan on Hugo's RJ snapshot found 590 of 905
+BDsolos pedons (65%) overlap a FEBR `sisb_id`, so dedup reduces the
+~9k + ~10k naive concatenation by roughly the magnitude of FEBR's
+8,124 sisb_id-bearing records.
+
+## What's shipped
+
+`R/merge-brazilian.R` (new) exports:
+
+- **`merge_brazilian_pedons(bdsolos, febr, prefer = c("bdsolos",
+  "febr"), verbose = TRUE)`** -- joins two PedonRecord lists by
+  `site$sisb_id`, drops duplicates from the non-preferred side, and
+  emits a single super-list. Each surviving pedon's `site` is tagged
+  with `merge_decision` (`"kept_bdsolos"` / `"kept_febr"` /
+  `"unique"`) and `merge_source` so downstream code can audit
+  provenance.
+
+- **`summarize_brazilian_overlap(bdsolos, febr)`** -- diagnostic
+  table reporting overlap counts (n_bdsolos, n_febr, n_shared,
+  n_bdsolos_only, n_febr_only, n_unmatchable). Useful for verifying
+  the dedup ratio before running the merge.
+
+- **`.get_sisb_id(pedon)`** internal: centralised lookup of
+  `site$sisb_id` with NA-safe trimming. Backwards-compatible with
+  PedonRecord objects that pre-date v0.9.62.
+
+`R/bdsolos.R`:
+
+- `load_bdsolos_csv()` now also assigns `site$sisb_id <- Codigo PA`
+  (BDsolos historical pedon ID, identical numbering to FEBR's
+  `observacao$sisb_id` field).
+
+`R/febr.R`:
+
+- `read_febr_pedons()` now captures `observacao$sisb_id` into
+  `site$sisb_id`.
+
+## RJ overlap scan (empirical)
+
+Loading BDsolos RJ + the FEBR-curated RJ observation table:
+
+- BDsolos RJ: 722 pedons, 722 with sisb_id
+- FEBR RJ obs with sisb_id: 829 (out of 884 total RJ obs)
+- Shared sisb_ids: **590**
+- BDsolos-only: 132 / FEBR-only: 239 / unmatchable: 55
+- Naive concat: 1606 -> after merge: **1016 distinct pedons**
+
+## Tests
+
+`tests/testthat/test-v0962-merge-brazilian.R` adds 12 tests
+(28 expectations):
+
+- `.get_sisb_id` NA / NULL / trimming
+- merge with prefer = "bdsolos" / "febr"
+- unique-on-each-side / unmatchable / empty / NULL inputs
+- non-PedonRecord input rejected
+- pedon ordering preservation
+- summarize_brazilian_overlap counts
+- Integration: load_bdsolos_csv populates site$sisb_id
+
+R CMD check Status: OK.
+
+
+# soilKey 0.9.61 (2026-05-04)
+
+The "thickness-weighted dominant-color-in-B" release.
+Replaces SiBCS subordem first-match-wins for color-driven Ordens
+(Argissolos / Latossolos / Nitossolos) with a deterministic
+thickness-weighted dominant-color rule.
+
+## What's shipped
+
+`R/sibcs-color-tuning.R` (new):
+
+- **`.classify_b_color(hue, value, chroma)`** internal: classifies
+  a single Munsell color into one of `VERMELHO` / `VERMELHO_AMARELO`
+  / `AMARELO` / `BRUNO_ACINZENTADO` / `ACINZENTADO` (`NA` when any
+  Munsell component is missing).
+
+- **`.dominant_b_color(pedon)`** internal: walks every B-like
+  horizon, classifies each via `.classify_b_color()`, sums
+  thickness per category, returns the dominant category. Ties
+  broken in canonical SiBCS order (BRUNO_ACINZ > ACINZ > AMARELO
+  > VERMELHO > V_AMARELO).
+
+- **`.dominant_b_color_subordem(pedon, ordem_code)`** internal:
+  ordem-aware mapping from dominant color category -> SiBCS
+  subordem code:
+    - P (Argissolos): PV / PA / PVA / PBAC / PAC
+    - L (Latossolos): LV / LA / LVA / LB   / LVA
+    - N (Nitossolos): NV / NX  / NX  / NB  / NX
+  Other Ordens return NA (no override).
+
+- **`.apply_color_dominant_override(subordem, pedon, ordem_code,
+  rules)`** internal post-processor: when the YAML key's
+  first-match-wins assignment differs from the dominant-color
+  rule, swaps the assigned subordem entry with the YAML block
+  matching the new code, and emits a `reason` string for the
+  classification trace.
+
+`R/key-sibcs.R`:
+
+- **`classify_sibcs()`** wires `.apply_color_dominant_override()`
+  between subordem assignment and the v0.9.45 "cor a determinar"
+  fallback detection. The override happens FIRST, so a profile
+  whose first-match was forced into the catch-all PVA/LVA/NX by
+  YAML order can still be correctly resolved when its dominant
+  color is an explicit subordem. The override evidence ends up in
+  `result$trace$color_dominant_override` and a warning is added
+  to `result$warnings` whenever it fires.
+
+`R/benchmark-bdsolos.R`:
+
+- **`.bdsolos_normalize_subordem(s)`** internal: maps SiBCS
+  subordem display names (BDsolos ALL-CAPS or soilKey Title-Case
+  plural) to canonical 2-3 letter codes (PV / PBAC / LVA / etc.).
+  Diacritic-aware. Handles compound names (BRUNO-ACINZENTADO,
+  VERMELHO-AMARELO).
+
+- **`benchmark_bdsolos_sibcs()`** now also reports subordem-level
+  metrics:
+    - `predictions$predicted_subordem_code` /
+      `reference_subordem_code` / `agree_subordem`
+    - `accuracy_subordem` (top-level)
+    - `summary$n_in_scope_sub` / `summary$n_matched_sub`
+
+## Smoke results (RJ benchmark, 100 pedons)
+
+- Pedons with subordem overridden by dominant-color rule: 9 / 100
+- Ordem accuracy unchanged (33%) -- override is a 2nd-level rule
+- Argissolo subordem accuracy now reportable (was 0% by name
+  comparison due to case mismatch; canonical-code comparison
+  fixes this).
+
+## Tests
+
+`tests/testthat/test-v0961-sibcs-color-tuning.R` adds 14 tests
+(37 expectations):
+
+- `.classify_b_color` mapping for all 5 categories + NA inputs
+- `.dominant_b_color` thickness-weighted dominant + NA fallback
+- `.dominant_b_color_subordem` for P/L Ordens + non-color Ordens
+- `.apply_color_dominant_override` flip + no-op + non-color Ordem
+  + missing-Munsell paths
+- `classify_sibcs()` end-to-end: override exposed in trace +
+  Cambissolos untouched
+
+R CMD check Status: OK.
+
+
+# soilKey 0.9.60 (2026-05-06)
+
+The "Brazilian SiBCS surveyor-reference benchmark" release.
+Mirror of v0.9.49 \code{benchmark_lucas_2018()} but for the
+Embrapa BDsolos 27-UF corpus (~9k perfis, 7.4k with surveyor's
+SiBCS classification).
+
+## What's shipped
+
+`R/benchmark-bdsolos.R` (new) exports:
+
+- **`benchmark_bdsolos_sibcs(pedons, classify_with, classify_args,
+  max_n, verbose)`** -- runs \code{\link{classify_sibcs}} on each
+  pedon, compares predicted Ordem to surveyor's reference
+  (\code{site$reference_nivel_1}). Returns
+  \code{predictions} data.frame, \code{confusion} matrix,
+  \code{per_ordem} recall, \code{summary} (n_total, n_in_scope,
+  n_matched, n_errors, n_unmapped).
+
+- **`.bdsolos_normalize_ordem(s)`** internal: BDsolos
+  ALL-CAPS singular -> soilKey Title-Case plural mapping.
+  Handles modern Ordens (ARGISSOLO -> Argissolos) plus legacy /
+  pre-1999 names (PODZOLICO -> Argissolos, LATOSOL -> Latossolos,
+  GLEI -> Gleissolos, BRUNIZEM -> Chernossolos, ALUVIAL ->
+  Neossolos, etc.). Diacritic-aware via \code{intToUtf8} so the
+  source stays ASCII-pure.
+
+- Loader extension: \code{load_bdsolos_csv()} now captures
+  \code{site$reference_nivel_1/2/3} (BDsolos pre-parsed
+  Ordem / Subordem / Grande Grupo), used preferentially over the
+  full \code{Classificacao Atual} string.
+
+## Bug fix: header detection
+
+\code{.bdsolos_find_header_line()} was using \code{which.max}
+on field counts, but real BDsolos data rows often have MORE
+delimiters than the header (free-text fields like
+\code{Descricao Original} contain embedded \code{;}). Fix:
+return the FIRST line with field count >= 5 (preamble has 1-2).
+Validated: real RJ.csv now correctly resolves header at line 3
+instead of line 7.
+
+## Smoke results on real RJ.csv (100 random pedons)
+
+```
+Ordem accuracy: 34.0% (34 / 100 in-scope)
+
+Per-Ordem recall:
+  Argissolos    : 67.6% (23/34)   <- best
+  Cambissolos   : 42.8% (6/14)
+  Chernossolos  : 50.0% (1/2)
+  Organossolos  : 50.0% (1/2)
+  Neossolos     : 42.8% (3/7)
+  Espodossolos  :  0%   (0/3)
+  Gleissolos    :  0%   (0/16)    <- gleyic predicate not triggering
+  Latossolos    :  0%   (0/15)    <- B latossolico predicate too strict
+  Planossolos   :  0%   (0/6)
+  Plintossolos  :  0%   (0/1)
+```
+
+The 0% recalls on Gleissolos / Latossolos / Espodossolos /
+Planossolos / Plintossolos point at concrete classifier rules to
+relax in v0.9.61. Argissolos' 67.6% recall is healthy and
+consistent with the v0.9.45 / v0.9.58 work.
+
+## Tests
+
+10 new tests in `test-v0960-bdsolos-benchmark.R` (42
+expectations) covering Ordem normalisation (modern + legacy),
+benchmark schema, accuracy computation, confusion matrix,
+unmapped reference detection, max_n, error handling, and the
+loader extension for nivel_1/2/3.
+
+Suite total: 3717 / 0 / 20 (pass / fail / skip). R CMD check
+Status OK.
+
+
 # soilKey 0.9.59 (2026-05-06)
 
 The "read.csv2 fallback for malformed BDsolos UTF-8" patch.
diff --git a/R/bdsolos.R b/R/bdsolos.R
index 299c25b1a..db601559a 100644
--- a/R/bdsolos.R
+++ b/R/bdsolos.R
@@ -121,6 +121,10 @@
   reference_sibcs = "(^classificacao_atual$|^classificacao$|^taxon_sibcs$|^classe_sibcs$)",
   reference_wrb   = "(^classificacao_fao_wrb$|^classificacao_wrb$|^taxon_wrb$)",
   reference_st    = "(^classificacao_soil_taxonomy$|^taxon_st$|^taxon_soil_taxonomy$)",
+  # SiBCS levelled reference (BDsolos pre-parsed):
+  reference_nivel_1 = "(^classe_de_solos_nivel_1$)",
+  reference_nivel_2 = "(^classe_de_solos_nivel_2$)",
+  reference_nivel_3 = "(^classe_de_solos_nivel_3$)",
   drainage        = "(^classe_de_drenagem$|^drenagem$)",
   parent_material = "(^material_de_origem$|^material_origem$)",
   vegetacao       = "(^uso_atual$|^vegetacao$|^fase_de_vegetacao_primaria$)",
@@ -204,11 +208,10 @@
 .bdsolos_find_header_line <- function(path, n_probe = 10L) {
   lines <- readLines(path, n = n_probe, encoding = "UTF-8", warn = FALSE)
   if (length(lines) == 0L) return(1L)
-  # The header is the first line with the maximum number of fields
-  # (separator-agnostic: tries comma + semicolon + tab and picks the
-  # most-populous line). Real BDsolos has ~222 fields on the header
-  # row; minimal synthetic tests may have only ~5. Either way the
-  # header has more fields than the preamble (1-2 fields).
+  # The header is the FIRST line where field count jumps significantly
+  # (>= 5 fields). Cannot use which.max(field_counts): real BDsolos data
+  # rows often have MORE semicolons than the header because free-text
+  # fields ("Descricao Original", "Observacoes") contain embedded ';'.
   field_counts <- vapply(lines, function(s) {
     if (!nzchar(s)) return(0L)
     n_semi  <- length(strsplit(s, ";",  fixed = TRUE)[[1L]])
@@ -216,9 +219,11 @@
     n_tab   <- length(strsplit(s, "\t", fixed = TRUE)[[1L]])
     max(n_semi, n_comma, n_tab)
   }, integer(1L))
-  best <- unname(which.max(field_counts))
-  if (length(best) == 0L || field_counts[best] < 2L) return(1L)
-  as.integer(best)
+  # First line whose field count is large (>= 5) -- preamble has 1-2.
+  big <- which(field_counts >= 5L)
+  if (length(big) > 0L) return(as.integer(unname(big[1L])))
+  # Fallback: first line at all (no header detected)
+  1L
 }
 
 
@@ -503,6 +508,7 @@ load_bdsolos_csv <- function(path, sep = NULL, verbose = TRUE) {
     }
     site <- list(
       id      = rid,
+      sisb_id = rid,  # v0.9.62: Codigo PA == sisb_id (FEBR cross-ref)
       lat     = lat,
       lon     = lon,
       country = "BR",
@@ -510,6 +516,9 @@ load_bdsolos_csv <- function(path, sep = NULL, verbose = TRUE) {
       municipality    = safe_field(site_cols$municipio),
       altitude_m      = suppressWarnings(as.numeric(safe_field(site_cols$altitude_m))),
       reference_sibcs = if (!is.na(taxon_col)) safe_field(taxon_col) else NA_character_,
+      reference_nivel_1 = safe_field(site_cols$reference_nivel_1),
+      reference_nivel_2 = safe_field(site_cols$reference_nivel_2),
+      reference_nivel_3 = safe_field(site_cols$reference_nivel_3),
       reference_wrb   = safe_field(site_cols$reference_wrb),
       reference_st    = safe_field(site_cols$reference_st),
       drainage        = safe_field(site_cols$drainage),
diff --git a/R/benchmark-bdsolos.R b/R/benchmark-bdsolos.R
new file mode 100644
index 000000000..1ec013e54
--- /dev/null
+++ b/R/benchmark-bdsolos.R
@@ -0,0 +1,427 @@
+# =============================================================================
+# v0.9.60 -- benchmark_bdsolos_sibcs(): Brazilian SiBCS benchmark.
+#
+# Mirror of v0.9.49 benchmark_lucas_2018() but for BDsolos:
+#
+#   load_bdsolos_csv(...) -> list of PedonRecord with site$reference_sibcs
+#       + site$reference_nivel_1/2/3
+#       (the surveyor's SiBCS classification, BDsolos canonical fields)
+#       |
+#       v
+#   benchmark_bdsolos_sibcs(pedons)
+#       |
+#       v
+#   for each pedon:
+#     predicted = classify_sibcs(p)$rsg_or_order  (Brazilian Ordem)
+#     reference = .normalize_bdsolos_ordem(p$site$reference_nivel_1)
+#     compare at 4 levels (Ordem / Subordem / Grande Grupo / Subgrupo)
+#       |
+#       v
+#   confusion matrix + per-Ordem recall + L0..L4 summary
+#
+# The audit on Hugo's full 27-UF download (May 2026) showed:
+#   8,995 perfis, 7,369 (82%) with surveyor's SiBCS reference.
+# This benchmark validates the soilKey SiBCS classifier on the real
+# Brazilian surveyor-labeled corpus.
+# =============================================================================
+
+
+#' Map BDsolos ALL-CAPS Ordem singular -> soilKey Title-Case plural
+#'
+#' BDsolos exports SiBCS classes in ALL CAPS singular form
+#' (e.g. \code{"LATOSSOLO"}); soilKey returns Title Case plural
+#' (e.g. \code{"Latossolos"}). This helper aligns the two.
+#'
+#' Also handles the legacy / folk Ordem names that appear in older
+#' BDsolos surveys (1970s-90s pre-SiBCS-1ª-edição):
+#'
+#' \itemize{
+#'   \item \code{PODZOLICO}, \code{PODZOLCIO}, \code{LATOSOL}
+#'         -> \code{Argissolos} (the 1999 SiBCS rename)
+#'   \item \code{GLEI} -> \code{Gleissolos}
+#'   \item \code{BRUNIZEM} -> \code{Chernossolos}
+#'   \item \code{AREIA(S)} -> \code{Neossolos} (Quartzarenicos)
+#'   \item \code{ALUVIAL} -> \code{Neossolos} (Fluvicos)
+#'   \item \code{BRUNO}, \code{RENDZINA} -> \code{Chernossolos}
+#'   \item \code{SOLONCHAK}, \code{SOLONETZ} -> \code{Planossolos}
+#'         (Naticos / Solodicos in SiBCS)
+#' }
+#'
+#' Returns \code{NA_character_} when the input is NA or unrecognised.
+#'
+#' @keywords internal
+.bdsolos_normalize_ordem <- function(s) {
+  if (is.null(s) || length(s) == 0L || is.na(s) || !nzchar(trimws(s))) {
+    return(NA_character_)
+  }
+  raw <- toupper(trimws(as.character(s)))
+  raw <- strsplit(raw, "[ ,;]")[[1L]][1L]
+  # Modern SiBCS Ordens (1999+, Embrapa SiBCS 1a -> 5a edicao)
+  modern <- c(
+    ARGISSOLO    = "Argissolos",
+    CAMBISSOLO   = "Cambissolos",
+    CHERNOSSOLO  = "Chernossolos",
+    ESPODOSSOLO  = "Espodossolos",
+    GLEISSOLO    = "Gleissolos",
+    LATOSSOLO    = "Latossolos",
+    LUVISSOLO    = "Luvissolos",
+    NEOSSOLO     = "Neossolos",
+    NITOSSOLO    = "Nitossolos",
+    ORGANOSSOLO  = "Organossolos",
+    PLANOSSOLO   = "Planossolos",
+    PLINTOSSOLO  = "Plintossolos",
+    VERTISSOLO   = "Vertissolos"
+  )
+  if (raw %in% names(modern)) return(unname(modern[raw]))
+  # Legacy / folk names (pre-1999 surveys)
+  legacy <- c(
+    "PODZOLICO" = "Argissolos",
+    "PODZOLCIO" = "Argissolos",
+    "PODZOLICA" = "Argissolos",
+    "LATOSOL"   = "Latossolos",
+    "GLEI"      = "Gleissolos",
+    "GLEISOLO"  = "Gleissolos",
+    "PODZOL"    = "Espodossolos",
+    "BRUNIZEM"  = "Chernossolos",
+    "RENDZINA"  = "Chernossolos",
+    "AREIA"     = "Neossolos",
+    "AREIAS"    = "Neossolos",
+    "ALUVIAL"   = "Neossolos",
+    "ATERRO"    = "Neossolos",
+    "REGOSOLO"  = "Neossolos",
+    "PLINTOSOL" = "Plintossolos",
+    "SOLONETZ"  = "Planossolos",
+    "SOLONCHAK" = "Planossolos",
+    "VERTISOLO" = "Vertissolos",
+    "BRUNO"     = "Chernossolos",
+    "ORGANOSSOL"= "Organossolos",
+    "NEO0SSOLO" = "Neossolos",     # typo seen in BDsolos
+    "SOLO"      = NA_character_    # generic; cannot map
+  )
+  ascii <- chartr(
+    intToUtf8(c(0xc1, 0xc0, 0xc2, 0xc3, 0xc4, 0xc9, 0xc8, 0xca, 0xcb,
+                 0xcd, 0xcc, 0xce, 0xcf, 0xd3, 0xd2, 0xd4, 0xd5, 0xd6,
+                 0xda, 0xd9, 0xdb, 0xdc, 0xc7, 0xd1)),
+    "AAAAAEEEEIIIIOOOOOUUUUCN",
+    raw
+  )
+  ascii_short <- gsub("[^A-Z]", "", ascii)
+  if (ascii_short %in% names(legacy)) return(unname(legacy[ascii_short]))
+  # Unknown
+  NA_character_
+}
+
+
+#' Map a SiBCS subordem name (any case / language form) to the canonical code
+#'
+#' v0.9.61 helper: BDsolos surveyors export Nivel 2 in ALL CAPS singular
+#' (e.g. \code{"ARGISSOLO VERMELHO"}); soilKey returns Title Case plural
+#' (e.g. \code{"Argissolos Vermelhos"}). To compute subordem-level
+#' agreement we collapse both to the canonical 2-3 letter SiBCS code
+#' (PV / PA / PVA / PBAC / PAC / LV / LA / LVA / LB / NV / NB / NX / TC /
+#' TX / CX / CHU / CH / CY / GM / GZ / GJ / GX / EK / EJ / ES / OJ / OO /
+#' OX / RL / RY / RQ / RR / MD / ME / MT / MX / SN / SX / FT / FF / FX /
+#' VC / VE / VX).
+#'
+#' @keywords internal
+.bdsolos_normalize_subordem <- function(s) {
+  if (is.null(s) || length(s) == 0L || is.na(s) || !nzchar(trimws(s))) {
+    return(NA_character_)
+  }
+  raw <- toupper(trimws(as.character(s)))
+  ascii <- chartr(
+    intToUtf8(c(0xc1, 0xc0, 0xc2, 0xc3, 0xc4, 0xc9, 0xc8, 0xca, 0xcb,
+                 0xcd, 0xcc, 0xce, 0xcf, 0xd3, 0xd2, 0xd4, 0xd5, 0xd6,
+                 0xda, 0xd9, 0xdb, 0xdc, 0xc7, 0xd1)),
+    "AAAAAEEEEIIIIOOOOOUUUUCN",
+    raw
+  )
+  # Strip trailing characters after 2nd word (third level descriptors etc.)
+  toks <- strsplit(ascii, "[ ,;]+")[[1L]]
+  if (length(toks) < 1L) return(NA_character_)
+  ord_word <- toks[1L]
+  sub_word <- if (length(toks) >= 2L) toks[2L] else ""
+  # Some BDsolos rows use compound names (BRUNO-ACINZENTADO,
+  # VERMELHO-AMARELO).
+  sub_word <- gsub("-", "", sub_word)
+  if (length(toks) >= 3L &&
+        sub_word %in% c("BRUNO", "VERMELHO") &&
+        toks[3L] %in% c("ACINZENTADO", "AMARELO", "ACINZ")) {
+    sub_word <- paste0(sub_word, toks[3L])
+  }
+  key <- paste0(ord_word, "_", sub_word)
+  map <- c(
+    # Argissolos (P)
+    "ARGISSOLO_BRUNOACINZENTADO" = "PBAC",
+    "ARGISSOLO_ACINZENTADO"      = "PAC",
+    "ARGISSOLO_AMARELO"          = "PA",
+    "ARGISSOLO_VERMELHO"         = "PV",
+    "ARGISSOLO_VERMELHOAMARELO"  = "PVA",
+    # Cambissolos (C)
+    "CAMBISSOLO_HISTICO"         = "CH",
+    "CAMBISSOLO_HUMICO"          = "CHU",
+    "CAMBISSOLO_FLUVICO"         = "CY",
+    "CAMBISSOLO_HAPLICO"         = "CX",
+    # Chernossolos (M)
+    "CHERNOSSOLO_RENDZICO"       = "MD",
+    "CHERNOSSOLO_EBANICO"        = "ME",
+    "CHERNOSSOLO_ARGILUVICO"     = "MT",
+    "CHERNOSSOLO_HAPLICO"        = "MX",
+    # Espodossolos (E)
+    "ESPODOSSOLO_HUMILUVICO"     = "EK",
+    "ESPODOSSOLO_FERRILUVICO"    = "EJ",
+    "ESPODOSSOLO_FERRIHUMILUVICO" = "ES",
+    # Gleissolos (G)
+    "GLEISSOLO_TIOMORFICO"       = "GJ",
+    "GLEISSOLO_SALICO"           = "GZ",
+    "GLEISSOLO_MELANICO"         = "GM",
+    "GLEISSOLO_HAPLICO"          = "GX",
+    # Latossolos (L)
+    "LATOSSOLO_BRUNO"            = "LB",
+    "LATOSSOLO_AMARELO"          = "LA",
+    "LATOSSOLO_VERMELHO"         = "LV",
+    "LATOSSOLO_VERMELHOAMARELO"  = "LVA",
+    # Luvissolos (T)
+    "LUVISSOLO_CROMICO"          = "TC",
+    "LUVISSOLO_HAPLICO"          = "TX",
+    # Neossolos (R)
+    "NEOSSOLO_LITOLICO"          = "RL",
+    "NEOSSOLO_FLUVICO"           = "RY",
+    "NEOSSOLO_QUARTZARENICO"     = "RQ",
+    "NEOSSOLO_REGOLITICO"        = "RR",
+    # Nitossolos (N)
+    "NITOSSOLO_BRUNO"            = "NB",
+    "NITOSSOLO_VERMELHO"         = "NV",
+    "NITOSSOLO_HAPLICO"          = "NX",
+    # Organossolos (O)
+    "ORGANOSSOLO_TIOMORFICO"     = "OJ",
+    "ORGANOSSOLO_FOLICO"         = "OO",
+    "ORGANOSSOLO_HAPLICO"        = "OX",
+    # Planossolos (S)
+    "PLANOSSOLO_NATRICO"         = "SN",
+    "PLANOSSOLO_HAPLICO"         = "SX",
+    # Plintossolos (F)
+    "PLINTOSSOLO_PETRICO"        = "FF",
+    "PLINTOSSOLO_ARGILUVICO"     = "FT",
+    "PLINTOSSOLO_HAPLICO"        = "FX",
+    # Vertissolos (V)
+    "VERTISSOLO_HIDROMORFICO"    = "VC",
+    "VERTISSOLO_EBANICO"         = "VE",
+    "VERTISSOLO_HAPLICO"         = "VX"
+  )
+  if (key %in% names(map)) return(unname(map[key]))
+  NA_character_
+}
+
+
+#' Run the BDsolos / SiBCS surveyor-reference benchmark
+#'
+#' Runs \code{\link{classify_sibcs}} on each pedon and tabulates
+#' agreement with the surveyor's SiBCS classification embedded in
+#' the BDsolos export (\code{site$reference_nivel_1} when
+#' available, fall back to parsing \code{site$reference_sibcs}).
+#'
+#' Compared to the v0.9.49 \code{\link{benchmark_lucas_2018}}, this
+#' uses the SURVEYOR's reference (richer than the WRB-1km raster):
+#' the BDsolos pedologist who described the profile assigns the
+#' Ordem / Subordem / Grande Grupo / Subgrupo. This is the
+#' authoritative Brazilian benchmark.
+#'
+#' @param pedons List of \code{\link{PedonRecord}} objects, typically
+#'        from \code{\link{load_bdsolos_csv}}.
+#' @param classify_with Internal: classifier (default
+#'        \code{classify_sibcs}). Pass \code{classify_via_smartsolos_api}
+#'        to benchmark the Embrapa PROLOG classifier instead.
+#' @param classify_args List of additional arguments passed to the
+#'        classifier (e.g. \code{list(api_key = ...,
+#'        post_fn = ...)} for SmartSolos).
+#' @param max_n Optional integer cap on pedons benchmarked.
+#' @param verbose If \code{TRUE} (default), prints a summary line.
+#' @return A list with elements:
+#'   \describe{
+#'     \item{\code{predictions}}{data.frame with columns:
+#'           point_id, predicted_ordem, reference_ordem, agree_ordem,
+#'           predicted_subordem, reference_subordem,
+#'           predicted_subordem_code, reference_subordem_code
+#'           (canonical SiBCS 2-3 letter codes from
+#'           .bdsolos_normalize_subordem()),
+#'           agree_subordem, predicted_gg, reference_gg,
+#'           reference_raw.}
+#'     \item{\code{confusion}}{Ordem-level confusion table.}
+#'     \item{\code{accuracy}}{Overall Ordem-level match fraction.}
+#'     \item{\code{accuracy_subordem}}{v0.9.61: subordem-level match
+#'           fraction over pedons with both predicted and reference
+#'           subordem codes resolvable.}
+#'     \item{\code{per_ordem}}{data.frame: per-Ordem recall.}
+#'     \item{\code{summary}}{n_total, n_in_scope, n_matched,
+#'           n_errors, n_unmapped, n_in_scope_sub, n_matched_sub.}
+#'   }
+#'
+#' @examples
+#' \dontrun{
+#' pedons <- load_bdsolos_csv("soil_data/embrapa_bdsolos/BD_solos/RJ.csv")
+#' bench <- benchmark_bdsolos_sibcs(pedons)
+#' bench$accuracy
+#' bench$per_ordem
+#' bench$confusion
+#' }
+#' @seealso \code{\link{load_bdsolos_csv}},
+#'          \code{\link{benchmark_lucas_2018}},
+#'          \code{\link{classify_sibcs}},
+#'          \code{\link{compare_smartsolos}}.
+#' @export
+benchmark_bdsolos_sibcs <- function(pedons,
+                                      classify_with  = classify_sibcs,
+                                      classify_args  = list(on_missing = "silent"),
+                                      max_n          = NULL,
+                                      verbose        = TRUE) {
+  if (!is.list(pedons) || length(pedons) == 0L) {
+    stop("benchmark_bdsolos_sibcs(): 'pedons' must be a non-empty list of PedonRecord.")
+  }
+  if (!all(vapply(pedons, inherits, logical(1L), "PedonRecord"))) {
+    stop("benchmark_bdsolos_sibcs(): every element of 'pedons' must be a PedonRecord.")
+  }
+  if (!is.null(max_n) && length(pedons) > max_n) {
+    pedons <- pedons[seq_len(as.integer(max_n))]
+  }
+  if (isTRUE(verbose)) {
+    cli::cli_alert_info(sprintf(
+      "Running %s on %d pedons...",
+      ifelse(identical(classify_with, classify_sibcs), "classify_sibcs",
+              deparse(substitute(classify_with))),
+      length(pedons)))
+  }
+
+  predicted_ordem <- character(length(pedons))
+  predicted_subordem <- character(length(pedons))
+  predicted_gg       <- character(length(pedons))
+  predicted_sg       <- character(length(pedons))
+  reference_raw      <- character(length(pedons))
+  reference_ordem    <- character(length(pedons))
+  reference_subordem <- character(length(pedons))
+  reference_gg       <- character(length(pedons))
+  errors <- list()
+
+  for (i in seq_along(pedons)) {
+    p <- pedons[[i]]
+    res <- tryCatch(
+      do.call(classify_with, c(list(p), classify_args)),
+      error = function(e) {
+        errors[[length(errors) + 1L]] <<- list(
+          i = i, id = p$site$id %||% i,
+          error = conditionMessage(e)
+        )
+        NULL
+      }
+    )
+    predicted_ordem[i]    <- if (is.null(res)) NA_character_ else
+                                as.character(res$rsg_or_order %||% NA_character_)
+    if (!is.null(res) && !is.null(res$trace)) {
+      predicted_subordem[i] <- as.character(res$trace$subordem_assigned$name        %||% NA_character_)
+      predicted_gg[i]       <- as.character(res$trace$grande_grupo_assigned$name    %||% NA_character_)
+      predicted_sg[i]       <- as.character(res$trace$subgrupo_assigned$name        %||% NA_character_)
+    }
+    # Reference: prefer site$reference_nivel_1 (already-parsed Ordem),
+    # fall back to first word of site$reference_sibcs.
+    ref_n1 <- p$site$reference_nivel_1 %||% NA_character_
+    ref_n2 <- p$site$reference_nivel_2 %||% NA_character_
+    ref_n3 <- p$site$reference_nivel_3 %||% NA_character_
+    ref_full <- p$site$reference_sibcs %||% NA_character_
+    reference_raw[i]      <- ref_full
+    reference_ordem[i]    <- .bdsolos_normalize_ordem(ref_n1 %||% ref_full)
+    reference_subordem[i] <- if (!is.na(ref_n2 %||% NA_character_)) ref_n2 else NA_character_
+    reference_gg[i]       <- if (!is.na(ref_n3 %||% NA_character_)) ref_n3 else NA_character_
+  }
+
+  ids <- vapply(pedons, function(p) as.character(p$site$id %||% NA_character_),
+                  character(1L))
+  agree_ordem <- !is.na(predicted_ordem) & !is.na(reference_ordem) &
+                  predicted_ordem == reference_ordem
+  # v0.9.61: Subordem-level agreement via canonical 2-3 letter SiBCS code.
+  predicted_sub_code <- vapply(predicted_subordem, .bdsolos_normalize_subordem,
+                                  character(1L))
+  reference_sub_code <- vapply(reference_subordem, .bdsolos_normalize_subordem,
+                                  character(1L))
+  agree_subordem <- !is.na(predicted_sub_code) & !is.na(reference_sub_code) &
+                       predicted_sub_code == reference_sub_code
+  comparison <- data.frame(
+    point_id           = ids,
+    predicted_ordem    = predicted_ordem,
+    reference_ordem    = reference_ordem,
+    agree_ordem        = agree_ordem,
+    predicted_subordem = predicted_subordem,
+    reference_subordem = reference_subordem,
+    predicted_subordem_code = predicted_sub_code,
+    reference_subordem_code = reference_sub_code,
+    agree_subordem     = agree_subordem,
+    predicted_gg       = predicted_gg,
+    reference_gg       = reference_gg,
+    reference_raw      = reference_raw,
+    stringsAsFactors   = FALSE
+  )
+
+  in_scope <- !is.na(comparison$predicted_ordem) & !is.na(comparison$reference_ordem)
+  n_in_scope <- sum(in_scope)
+  n_matched  <- sum(comparison$agree_ordem)
+  accuracy <- if (n_in_scope > 0L) n_matched / n_in_scope else NA_real_
+
+  # v0.9.61: subordem-level metrics (only over pedons with both codes).
+  in_scope_sub <- !is.na(comparison$predicted_subordem_code) &
+                     !is.na(comparison$reference_subordem_code)
+  n_in_scope_sub <- sum(in_scope_sub)
+  n_matched_sub  <- sum(comparison$agree_subordem)
+  accuracy_subordem <- if (n_in_scope_sub > 0L) {
+    n_matched_sub / n_in_scope_sub
+  } else NA_real_
+
+  conf <- if (n_in_scope > 0L) {
+    table(
+      Predicted = comparison$predicted_ordem[in_scope],
+      Reference = comparison$reference_ordem[in_scope]
+    )
+  } else NULL
+
+  per_ordem <- if (n_in_scope > 0L) {
+    sub_in <- comparison[in_scope, ]
+    refs <- sort(unique(sub_in$reference_ordem))
+    do.call(rbind, lapply(refs, function(o) {
+      sub <- sub_in[sub_in$reference_ordem == o, ]
+      data.frame(
+        reference_ordem = o,
+        n               = nrow(sub),
+        n_correct       = sum(sub$agree_ordem),
+        recall          = mean(sub$agree_ordem),
+        stringsAsFactors = FALSE
+      )
+    }))
+  } else NULL
+
+  n_unmapped <- sum(is.na(reference_ordem) & !is.na(reference_raw) &
+                       nzchar(trimws(reference_raw)))
+
+  if (isTRUE(verbose)) {
+    cli::cli_alert_success(sprintf(
+      "benchmark_bdsolos_sibcs(): Ordem accuracy = %.1f%% over %d in-scope pedons (%d matched / %d total / %d errors / %d unmapped).",
+      100 * (accuracy %||% NA_real_),
+      n_in_scope, n_matched, length(pedons), length(errors), n_unmapped
+    ))
+  }
+
+  list(
+    predictions = comparison,
+    confusion   = conf,
+    accuracy    = accuracy,
+    accuracy_subordem = accuracy_subordem,
+    per_ordem   = per_ordem,
+    summary = list(
+      n_total          = length(pedons),
+      n_in_scope       = n_in_scope,
+      n_matched        = n_matched,
+      n_errors         = length(errors),
+      n_unmapped       = n_unmapped,
+      n_in_scope_sub   = n_in_scope_sub,
+      n_matched_sub    = n_matched_sub
+    ),
+    errors = errors
+  )
+}
diff --git a/R/benchmark-vlm-extraction.R b/R/benchmark-vlm-extraction.R
new file mode 100644
index 000000000..497f56d51
--- /dev/null
+++ b/R/benchmark-vlm-extraction.R
@@ -0,0 +1,684 @@
+# =============================================================================
+# v0.9.66 -- Phase 1: VLM extraction benchmark.
+#
+# Question we want to answer before deciding whether to fine-tune:
+#   "Is the vanilla Gemma 4 e2b / e4b + pedologist persona good enough on
+#    real soilKey extraction tasks, or do we need few-shot / LoRA?"
+#
+# Three tasks, three metrics:
+#
+#   1. Munsell-from-photo:   Delta-E 2000 perceptual color distance
+#                            (lower is better; <= 2.3 = imperceptible)
+#   2. Horizons-from-text:   precision + recall over horizon count;
+#                            per-attribute match rate over numeric fields
+#   3. Site-from-fieldsheet: field-level Intersection-over-Union +
+#                            value accuracy on matched fields
+#
+# Each task has a directory of paired (input, golden_json) fixtures
+# under inst/fixtures/vlm_extraction/<task>/. Fixtures are either
+# bundled (synthetic) OR added by the user (real photos / PDFs).
+# =============================================================================
+
+
+# ---- Fixture loader -------------------------------------------------------
+
+#' Locate the soilKey VLM-extraction fixture directory
+#'
+#' Resolves to `system.file("fixtures", "vlm_extraction", ...)` after
+#' install, or to `inst/fixtures/vlm_extraction/` in a development
+#' checkout. Errors when neither is found.
+#'
+#' @keywords internal
+.vlm_fixtures_dir <- function(subdir = NULL) {
+  base <- system.file("fixtures", "vlm_extraction", package = "soilKey")
+  if (!nzchar(base) || !dir.exists(base)) {
+    base <- file.path("inst", "fixtures", "vlm_extraction")
+  }
+  if (!is.null(subdir)) base <- file.path(base, subdir)
+  base
+}
+
+
+#' List the paired (input, golden) fixtures available for one task
+#'
+#' Each task directory holds matched files: an `input` (`.txt` for
+#' horizons / site, `.jpg`/`.png` for munsell) and a `golden.json`
+#' with the ground-truth answer. The pairing rule is filename-stem.
+#'
+#' @param task One of `"munsell"`, `"horizons"`, `"site"`.
+#' @param fixtures_dir Optional override (default uses bundled).
+#'
+#' @return data.frame with columns `id`, `input_path`, `golden_path`
+#'   (one row per fixture).
+#' @export
+list_vlm_fixtures <- function(task = c("munsell", "horizons", "site"),
+                                 fixtures_dir = NULL) {
+  task <- match.arg(task)
+  base <- fixtures_dir %||% .vlm_fixtures_dir(task)
+  if (!dir.exists(base)) {
+    return(data.frame(id = character(0), input_path = character(0),
+                        golden_path = character(0), stringsAsFactors = FALSE))
+  }
+  golden <- list.files(base, pattern = "\\.golden\\.json$",
+                          full.names = TRUE)
+  if (length(golden) == 0L) {
+    return(data.frame(id = character(0), input_path = character(0),
+                        golden_path = character(0), stringsAsFactors = FALSE))
+  }
+  ids <- sub("\\.golden\\.json$", "", basename(golden))
+  ext_map <- list(munsell  = c("jpg", "jpeg", "png", "webp"),
+                   horizons = c("txt", "md", "pdf"),
+                   site     = c("txt", "md", "jpg", "png"))
+  exts <- ext_map[[task]]
+  input_paths <- vapply(ids, function(id) {
+    cands <- file.path(base, paste0(id, ".", exts))
+    hit   <- cands[file.exists(cands)]
+    if (length(hit) == 0L) NA_character_ else hit[1L]
+  }, character(1L))
+  keep <- !is.na(input_paths)
+  data.frame(
+    id          = ids[keep],
+    input_path  = input_paths[keep],
+    golden_path = golden[keep],
+    stringsAsFactors = FALSE
+  )
+}
+
+
+# ---- Synthetic fixture generator -----------------------------------------
+
+#' Generate a synthetic horizons-extraction fixture from a real pedon
+#'
+#' Renders a `PedonRecord$horizons` table back into a Markdown-style
+#' description (the input the VLM will see) and emits the original
+#' structured horizon table as the golden answer. This lets us scale
+#' the horizons-task fixture set from any pedon source we already
+#' have a loader for (BDsolos, FEBR, KSSL, LUCAS, ...).
+#'
+#' Useful as a *unit-test* fixture: the VLM should be able to round-
+#' trip its own description into structured JSON. Limitation: the
+#' description is template-rendered (uniform style); does not exercise
+#' truly natural-language variation. Pair with hand-curated real-PDF
+#' fixtures.
+#'
+#' @param pedon A `[PedonRecord]`.
+#' @param fixture_id Filename stem (no extension) that the input + golden
+#'   files will share.
+#' @param out_dir Directory to write `<fixture_id>.txt` and
+#'   `<fixture_id>.golden.json`. Default: bundled horizons fixtures dir.
+#'
+#' @return Invisibly, the named list `(input_path, golden_path)`.
+#' @export
+make_synthetic_horizons_fixture <- function(pedon,
+                                                 fixture_id,
+                                                 out_dir = NULL) {
+  if (!inherits(pedon, "PedonRecord")) {
+    stop("'pedon' must be a PedonRecord.")
+  }
+  if (!is.character(fixture_id) || length(fixture_id) != 1L ||
+        !nzchar(fixture_id)) {
+    stop("'fixture_id' must be a non-empty character scalar.")
+  }
+  out_dir <- out_dir %||% .vlm_fixtures_dir("horizons")
+  if (!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE)
+
+  h <- as.data.frame(pedon$horizons)
+  if (nrow(h) == 0L) stop("pedon has no horizons.")
+
+  # Render Markdown description (one section per horizon).
+  lines <- c(
+    "# Descricao do perfil",
+    "",
+    sprintf("Local: %s, %s.", pedon$site$state %||% "?",
+              pedon$site$municipality %||% "?"),
+    sprintf("Identificacao: %s.", pedon$site$id %||% "?"),
+    ""
+  )
+  for (i in seq_len(nrow(h))) {
+    sect <- sprintf("## Horizonte %s (%g a %g cm)",
+                      h$designation[i] %||% "?",
+                      h$top_cm[i]    %||% NA,
+                      h$bottom_cm[i] %||% NA)
+    body <- character(0)
+    if (!is.null(h$munsell_hue_moist) && !is.na(h$munsell_hue_moist[i])) {
+      body <- c(body, sprintf("Cor Munsell umida: %s %g/%g.",
+                                  h$munsell_hue_moist[i],
+                                  h$munsell_value_moist[i] %||% NA,
+                                  h$munsell_chroma_moist[i] %||% NA))
+    }
+    if (!is.null(h$clay_pct) && !is.na(h$clay_pct[i])) {
+      body <- c(body, sprintf("Argila %g %%.", h$clay_pct[i]))
+    }
+    if (!is.null(h$silt_pct) && !is.na(h$silt_pct[i])) {
+      body <- c(body, sprintf("Silte %g %%.", h$silt_pct[i]))
+    }
+    if (!is.null(h$sand_pct) && !is.na(h$sand_pct[i])) {
+      body <- c(body, sprintf("Areia %g %%.", h$sand_pct[i]))
+    }
+    if (!is.null(h$ph_h2o) && !is.na(h$ph_h2o[i])) {
+      body <- c(body, sprintf("pH em agua: %.1f.", h$ph_h2o[i]))
+    }
+    if (!is.null(h$oc_pct) && !is.na(h$oc_pct[i])) {
+      body <- c(body, sprintf("Carbono organico: %.2f %%.", h$oc_pct[i]))
+    }
+    lines <- c(lines, sect, "", body, "")
+  }
+
+  golden <- list(
+    horizons = lapply(seq_len(nrow(h)), function(i) {
+      row <- as.list(h[i, , drop = FALSE])
+      Filter(function(v) length(v) > 0L && !is.na(v), row)
+    })
+  )
+
+  in_path  <- file.path(out_dir, paste0(fixture_id, ".txt"))
+  out_path <- file.path(out_dir, paste0(fixture_id, ".golden.json"))
+
+  writeLines(lines, in_path, useBytes = TRUE)
+  if (!requireNamespace("jsonlite", quietly = TRUE)) {
+    stop("'jsonlite' is required to write golden JSON fixtures.")
+  }
+  writeLines(jsonlite::toJSON(golden, pretty = TRUE, auto_unbox = TRUE,
+                                  na = "null"),
+               out_path, useBytes = TRUE)
+  invisible(list(input_path = in_path, golden_path = out_path))
+}
+
+
+# ---- Metric: Munsell Delta-E 2000 ----------------------------------------
+
+#' Pairwise perceptual color distance between two Munsell triplets
+#'
+#' Prefers the Nickerson Color Difference Index (operates directly on
+#' HVC, well-known in pedology and the Munsell renotation literature)
+#' via `munsellinterpol::NickersonColorDifference`. Falls back to a
+#' CIE Lab Euclidean distance (ΔE 1976) computed via
+#' `munsellinterpol::MunsellToLab` when Nickerson is unavailable.
+#' Returns `NA_real_` when either Munsell triplet is unparseable.
+#'
+#' Approximate Nickerson scale (matches Δ Lab roughly 1:1 for Munsell
+#' value 4 chromas 1-8): `< 2` = visually equivalent;
+#' `2-5` = noticeable but small; `> 10` = clearly different colors.
+#'
+#' @param hue1,value1,chroma1 First Munsell color (e.g. `"5YR", 4, 6`).
+#' @param hue2,value2,chroma2 Second Munsell color.
+#'
+#' @return Numeric scalar (Nickerson or Lab distance), or `NA_real_`.
+#' @keywords internal
+.munsell_delta_e <- function(hue1, value1, chroma1,
+                                hue2, value2, chroma2) {
+  if (!requireNamespace("munsellinterpol", quietly = TRUE)) {
+    return(NA_real_)
+  }
+  if (any(is.na(c(hue1, value1, chroma1, hue2, value2, chroma2)))) {
+    return(NA_real_)
+  }
+  hvc1 <- sprintf("%s %s/%s", hue1, value1, chroma1)
+  hvc2 <- sprintf("%s %s/%s", hue2, value2, chroma2)
+  # Prefer Nickerson Color Difference Index (Munsell-domain).
+  ncd <- tryCatch(
+    munsellinterpol::NickersonColorDifference(hvc1, hvc2),
+    error = function(e) NULL,
+    warning = function(w) NULL
+  )
+  if (!is.null(ncd) && is.numeric(ncd) && length(ncd) >= 1L &&
+        is.finite(as.numeric(ncd)[1L])) {
+    return(as.numeric(ncd)[1L])
+  }
+  # Fallback: ΔE 1976 (Euclidean Lab distance).
+  lab1 <- tryCatch(munsellinterpol::MunsellToLab(hvc1), error = function(e) NULL)
+  lab2 <- tryCatch(munsellinterpol::MunsellToLab(hvc2), error = function(e) NULL)
+  if (is.null(lab1) || is.null(lab2)) return(NA_real_)
+  sqrt(sum((as.numeric(lab1) - as.numeric(lab2))^2))
+}
+
+
+#' Mean Delta-E 2000 between predicted and golden Munsell horizons
+#'
+#' Pairs predicted horizons to golden horizons by index (assumes the
+#' ordering by depth is consistent, the same convention soilKey uses
+#' throughout). Returns the mean over the min(length) horizons; pads
+#' missing predictions with `NA` (penalised separately via the
+#' coverage rate).
+#'
+#' @keywords internal
+.metric_munsell_deltaE <- function(pred, golden) {
+  pred_h   <- pred$horizons   %||% list()
+  golden_h <- golden$horizons %||% list()
+  n <- min(length(pred_h), length(golden_h))
+  if (n == 0L) {
+    return(list(mean_delta_e = NA_real_,
+                n_compared = 0L,
+                coverage = if (length(golden_h) == 0L) NA_real_ else 0))
+  }
+  des <- vapply(seq_len(n), function(i) {
+    p <- pred_h[[i]]; g <- golden_h[[i]]
+    .munsell_delta_e(
+      p$munsell_hue_moist,    p$munsell_value_moist,    p$munsell_chroma_moist,
+      g$munsell_hue_moist,    g$munsell_value_moist,    g$munsell_chroma_moist
+    )
+  }, numeric(1L))
+  list(
+    mean_delta_e = mean(des, na.rm = TRUE),
+    n_compared   = sum(!is.na(des)),
+    coverage     = n / max(1L, length(golden_h))
+  )
+}
+
+
+# ---- Metric: horizons precision/recall + attribute match -----------------
+
+#' Precision / recall on horizon count + numeric attribute match rate
+#'
+#' Counts how many predicted horizons line up with a golden horizon
+#' under the depth-overlap heuristic (>=80 % overlap of [top, bottom]
+#' interval) and what fraction of numeric attributes agree within a
+#' small tolerance. The overlap heuristic gives partial credit when
+#' the model splits / merges adjacent horizons.
+#'
+#' @keywords internal
+.metric_horizons_overlap <- function(pred, golden,
+                                        numeric_tol = 0.10) {
+  pred_h   <- pred$horizons   %||% list()
+  golden_h <- golden$horizons %||% list()
+  n_g <- length(golden_h); n_p <- length(pred_h)
+  if (n_g == 0L) return(list(precision = NA_real_, recall = NA_real_,
+                                attr_match_rate = NA_real_,
+                                n_pred = n_p, n_golden = 0L))
+
+  matched_g <- logical(n_g); matched_p <- logical(n_p)
+  attr_total <- 0L; attr_match <- 0L
+  for (i in seq_len(n_g)) {
+    g <- golden_h[[i]]
+    g_top <- as.numeric(g$top_cm    %||% NA)
+    g_bot <- as.numeric(g$bottom_cm %||% NA)
+    if (is.na(g_top) || is.na(g_bot) || g_bot <= g_top) next
+    g_span <- g_bot - g_top
+    best_j <- NA_integer_; best_overlap <- 0
+    for (j in seq_len(n_p)) {
+      if (matched_p[j]) next
+      p <- pred_h[[j]]
+      p_top <- as.numeric(p$top_cm    %||% NA)
+      p_bot <- as.numeric(p$bottom_cm %||% NA)
+      if (is.na(p_top) || is.na(p_bot) || p_bot <= p_top) next
+      overlap <- max(0, min(g_bot, p_bot) - max(g_top, p_top))
+      if (overlap / g_span > best_overlap) {
+        best_overlap <- overlap / g_span; best_j <- j
+      }
+    }
+    if (!is.na(best_j) && best_overlap >= 0.80) {
+      matched_g[i] <- TRUE; matched_p[best_j] <- TRUE
+      # Attribute-level match
+      p <- pred_h[[best_j]]
+      for (key in c("clay_pct", "silt_pct", "sand_pct", "ph_h2o",
+                      "oc_pct",   "cec_cmol", "bs_pct")) {
+        g_raw <- g[[key]]
+        p_raw <- p[[key]]
+        gv <- if (length(g_raw) == 0L) NA_real_ else
+                  suppressWarnings(as.numeric(g_raw))
+        pv <- if (length(p_raw) == 0L) NA_real_ else
+                  suppressWarnings(as.numeric(p_raw))
+        if (length(gv) == 1L && is.finite(gv)) {
+          attr_total <- attr_total + 1L
+          if (length(pv) == 1L && is.finite(pv) &&
+                abs(gv - pv) <= numeric_tol * max(1, abs(gv))) {
+            attr_match <- attr_match + 1L
+          }
+        }
+      }
+    }
+  }
+  list(
+    precision = if (n_p > 0L) sum(matched_p) / n_p else NA_real_,
+    recall    = sum(matched_g) / n_g,
+    attr_match_rate = if (attr_total > 0L) attr_match / attr_total else NA_real_,
+    n_pred    = n_p,
+    n_golden  = n_g
+  )
+}
+
+
+# ---- Metric: site fields IoU ---------------------------------------------
+
+#' Field-level Intersection-over-Union + value accuracy
+#'
+#' For site metadata: how many of the golden fields appear in the
+#' prediction (recall), how many predicted fields appear in golden
+#' (precision), and -- for the matched fields -- what fraction agree
+#' on value. Numeric values use `numeric_tol`; character uses exact
+#' (case-insensitive, trimmed) match.
+#'
+#' @keywords internal
+.metric_site_iou <- function(pred, golden, numeric_tol = 0.05) {
+  ps <- pred$site   %||% list()
+  gs <- golden$site %||% list()
+  pk <- names(ps); gk <- names(gs)
+  inter <- intersect(pk, gk); union_keys <- union(pk, gk)
+  iou <- if (length(union_keys) == 0L) NA_real_ else length(inter) / length(union_keys)
+  recall    <- if (length(gk) == 0L) NA_real_ else length(inter) / length(gk)
+  precision <- if (length(pk) == 0L) NA_real_ else length(inter) / length(pk)
+
+  v_ok <- 0L; v_total <- 0L
+  for (k in inter) {
+    pv <- ps[[k]]; gv <- gs[[k]]
+    if (is.numeric(gv) || (is.character(gv) && !is.na(suppressWarnings(as.numeric(gv))))) {
+      pn <- suppressWarnings(as.numeric(pv))
+      gn <- suppressWarnings(as.numeric(gv))
+      if (is.finite(pn) && is.finite(gn)) {
+        v_total <- v_total + 1L
+        if (abs(pn - gn) <= numeric_tol * max(1, abs(gn))) {
+          v_ok <- v_ok + 1L
+        }
+      }
+    } else {
+      v_total <- v_total + 1L
+      if (identical(tolower(trimws(as.character(pv))),
+                      tolower(trimws(as.character(gv))))) {
+        v_ok <- v_ok + 1L
+      }
+    }
+  }
+  list(iou = iou, precision = precision, recall = recall,
+       value_accuracy = if (v_total > 0L) v_ok / v_total else NA_real_,
+       n_pred = length(pk), n_golden = length(gk),
+       n_matched = length(inter))
+}
+
+
+# ---- Top-level benchmark --------------------------------------------------
+
+#' Provider-agnostic VLM extraction benchmark (Phase 1)
+#'
+#' Runs each (provider, model) pair against every fixture for every
+#' selected task and reports per-fixture and per-(provider, task)
+#' aggregate metrics. Mock providers (`MockVLMProvider`) are accepted
+#' for unit testing.
+#'
+#' @param providers Named list of provider specifications. Each entry
+#'   is one of: a pre-built ellmer Chat object; a MockVLMProvider;
+#'   a list `(name = ..., model = ...)` forwarded to
+#'   [vlm_provider()].
+#' @param tasks Subset of `c("munsell", "horizons", "site")`.
+#' @param fixtures_dir Optional override; default = bundled fixtures.
+#' @param max_per_task Cap fixtures per task (useful for smoke tests).
+#' @param use_fewshot Logical, default `TRUE` (v0.9.68+). When TRUE,
+#'   uses the few-shot prompt variants (worked examples embedded in
+#'   the prompt) for horizons / site / munsell. Set to `FALSE` to
+#'   benchmark the bare-instructions baseline -- useful when
+#'   measuring few-shot lift.
+#' @param use_structured Logical, default `FALSE` (v0.9.70+). When
+#'   TRUE and the provider supports `chat_structured()` (Anthropic /
+#'   OpenAI / Ollama 0.5+ / Gemini), the validate-and-retry loop is
+#'   replaced by a single structured call where the provider is
+#'   handed the ellmer type tree built from the soilKey schema and
+#'   returns a structurally-valid R list directly. Removes the entire
+#'   class of "model returned prose / wrong shape" failures at the
+#'   protocol level. Falls back to the legacy retry loop when the
+#'   provider has no `chat_structured` method.
+#' @param n_repeats Positive integer (default 1). Runs each
+#'   (provider, task, fixture) cell `n_repeats` times so the summary
+#'   table can report `metric_*_sd` alongside `metric_*_mean`. LLM
+#'   responses are stochastic; without `n_repeats >= 3` it is hard to
+#'   distinguish real lift from noise on a small fixture set.
+#' @param verbose Logical (default TRUE); print per-fixture progress.
+#'
+#' @return List with
+#'   \describe{
+#'     \item{`predictions`}{long data.frame: provider, task, fixture,
+#'           ok, error, raw_pred, golden, metric_*}
+#'     \item{`summary`}{data.frame: provider x task aggregates}
+#'   }
+#'
+#' @section What this does NOT measure:
+#'   - Latency / cost per request (use the provider's own telemetry).
+#'   - End-to-end classification accuracy (run
+#'     `benchmark_bdsolos_sibcs()` for that).
+#'   - VLM hallucination outside the schema (the JSON validator catches
+#'     that as a parse failure, counted as `ok = FALSE`).
+#'
+#' @examples
+#' \dontrun{
+#' # Compare local Gemma e2b vs e4b vs Claude:
+#' bench <- benchmark_vlm_extraction(
+#'   providers = list(
+#'     gemma_e2b = list(name = "ollama", model = "gemma4:e2b"),
+#'     gemma_e4b = list(name = "ollama", model = "gemma4:e4b"),
+#'     claude    = list(name = "anthropic")
+#'   ),
+#'   tasks = c("horizons", "site"),     # skip Munsell if no photo fixtures
+#'   max_per_task = 5
+#' )
+#' bench$summary
+#' }
+#' @seealso [list_vlm_fixtures()], [make_synthetic_horizons_fixture()],
+#'   [extract_horizons_from_pdf()].
+#' @export
+benchmark_vlm_extraction <- function(providers,
+                                         tasks        = c("horizons", "site", "munsell"),
+                                         fixtures_dir = NULL,
+                                         max_per_task = NULL,
+                                         use_fewshot  = TRUE,
+                                         use_structured = FALSE,
+                                         n_repeats    = 1L,
+                                         verbose      = TRUE) {
+  n_repeats <- as.integer(n_repeats)
+  if (!is.finite(n_repeats) || n_repeats < 1L) n_repeats <- 1L
+  tasks <- match.arg(tasks, several.ok = TRUE)
+  if (!is.list(providers) || length(providers) == 0L ||
+        is.null(names(providers)) ||
+        any(!nzchar(names(providers)))) {
+    stop("'providers' must be a non-empty named list.")
+  }
+
+  rows <- list()
+  for (task in tasks) {
+    fxs <- list_vlm_fixtures(task, fixtures_dir = fixtures_dir)
+    if (!is.null(max_per_task) && nrow(fxs) > max_per_task) {
+      fxs <- fxs[seq_len(max_per_task), , drop = FALSE]
+    }
+    if (nrow(fxs) == 0L) {
+      if (isTRUE(verbose)) {
+        cli::cli_alert_warning("Task {.field {task}}: no fixtures found.")
+      }
+      next
+    }
+    for (pname in names(providers)) {
+      pspec <- providers[[pname]]
+      provider <- .resolve_provider(pspec)
+      for (k in seq_len(nrow(fxs))) {
+        fx <- fxs[k, ]
+        for (rep in seq_len(n_repeats)) {
+          if (isTRUE(verbose)) {
+            if (n_repeats > 1L) {
+              cli::cli_alert_info(
+                "[{.field {pname}}] task={task} fixture={fx$id} fewshot={use_fewshot} rep={rep}/{n_repeats}"
+              )
+            } else {
+              cli::cli_alert_info(
+                "[{.field {pname}}] task={task} fixture={fx$id} fewshot={use_fewshot}"
+              )
+            }
+          }
+          out <- .run_one_extraction(provider, task, fx,
+                                         use_fewshot = use_fewshot,
+                                         use_structured = use_structured)
+          metric <- .compute_metric(task, out$pred, out$golden)
+          rows[[length(rows) + 1L]] <- data.frame(
+            provider   = pname,
+            task       = task,
+            fixture    = fx$id,
+            repetition = rep,
+            ok         = isTRUE(out$ok),
+            error      = out$error %||% NA_character_,
+            metric_1   = metric[[1L]] %||% NA_real_,
+            metric_2   = metric[[2L]] %||% NA_real_,
+            metric_3   = metric[[3L]] %||% NA_real_,
+            metric_1_name = names(metric)[1L] %||% NA_character_,
+            metric_2_name = names(metric)[2L] %||% NA_character_,
+            metric_3_name = names(metric)[3L] %||% NA_character_,
+            stringsAsFactors = FALSE
+          )
+        }
+      }
+    }
+  }
+  predictions <- if (length(rows) > 0L) do.call(rbind, rows) else
+                    data.frame(provider = character(0), task = character(0),
+                                stringsAsFactors = FALSE)
+
+  summary <- if (nrow(predictions) > 0L) {
+    df <- predictions
+    df$ok_num <- as.integer(df$ok)
+    parts <- split(df, list(df$provider, df$task), drop = TRUE)
+    do.call(rbind, lapply(parts, function(d) {
+      sd_safe <- function(x) {
+        x <- x[is.finite(x)]
+        if (length(x) < 2L) NA_real_ else stats::sd(x)
+      }
+      data.frame(
+        provider = d$provider[1L],
+        task     = d$task[1L],
+        n        = nrow(d),
+        ok_rate  = mean(d$ok_num, na.rm = TRUE),
+        metric_1_mean = mean(d$metric_1, na.rm = TRUE),
+        metric_1_sd   = sd_safe(d$metric_1),
+        metric_2_mean = mean(d$metric_2, na.rm = TRUE),
+        metric_2_sd   = sd_safe(d$metric_2),
+        metric_3_mean = mean(d$metric_3, na.rm = TRUE),
+        metric_3_sd   = sd_safe(d$metric_3),
+        metric_1_name = d$metric_1_name[1L],
+        metric_2_name = d$metric_2_name[1L],
+        metric_3_name = d$metric_3_name[1L],
+        stringsAsFactors = FALSE
+      )
+    }))
+  } else NULL
+  if (!is.null(summary)) rownames(summary) <- NULL
+
+  list(predictions = predictions, summary = summary)
+}
+
+
+# ---- Internal helpers ----------------------------------------------------
+
+# Build / accept a provider object from a spec.
+.resolve_provider <- function(spec) {
+  if (inherits(spec, "MockVLMProvider")) return(spec)
+  if (inherits(spec, "Chat")) return(spec)
+  if (is.list(spec) && !is.null(spec$name)) {
+    return(do.call(vlm_provider, spec))
+  }
+  stop("provider spec must be a Chat object, MockVLMProvider, or list(name=..., model=...).")
+}
+
+
+# Single (provider, fixture) extraction call. Loads input + golden,
+# routes to the right extract_* helper, returns parsed JSON or error.
+.run_one_extraction <- function(provider, task, fx, use_fewshot = TRUE,
+                                    use_structured = FALSE) {
+  golden <- tryCatch(jsonlite::fromJSON(fx$golden_path, simplifyVector = FALSE),
+                       error = function(e) NULL)
+  if (is.null(golden)) {
+    return(list(ok = FALSE, error = "golden JSON unreadable",
+                pred = NULL, golden = NULL))
+  }
+  out <- tryCatch({
+    if (identical(task, "horizons")) {
+      txt <- paste(readLines(fx$input_path, warn = FALSE,
+                                encoding = "UTF-8"), collapse = "\n")
+      ped <- PedonRecord$new(
+        site = list(id = fx$id, country = "BR"),
+        horizons = ensure_horizon_schema(
+          data.table::data.table(top_cm = numeric(0), bottom_cm = numeric(0))
+        )
+      )
+      extract_horizons_from_pdf(ped, pdf_text = txt, provider = provider,
+                                  overwrite = TRUE,
+                                  use_fewshot = use_fewshot,
+                                  use_structured = use_structured)
+      list(horizons = lapply(seq_len(nrow(ped$horizons)), function(i) {
+        as.list(ped$horizons[i, ])
+      }))
+    } else if (identical(task, "site")) {
+      ext <- tolower(tools::file_ext(fx$input_path))
+      if (ext %in% c("txt", "md")) {
+        # Text-mode site fixture: bypass extract_site_from_fieldsheet
+        # (which is photo-only) and use the same prompt + schema +
+        # validate_or_retry contract directly.
+        txt <- paste(readLines(fx$input_path, warn = FALSE,
+                                  encoding = "UTF-8"), collapse = "\n")
+        schema_json <- load_schema("site")
+        prompt_name <- if (isTRUE(use_fewshot))
+                          "extract_site_from_text_fewshot"
+                       else "extract_site_from_text"
+        rendered <- load_prompt(prompt_name,
+                                  vars = list(schema_json = schema_json,
+                                                 document_text = txt))
+        res <- validate_or_retry(provider, rendered, "site",
+                                    max_retries = 3L, image = NULL,
+                                    use_structured = use_structured)
+        # Schema wraps every field in {value, confidence, source_quote}.
+        # Unwrap so the predicted site is flat -- matching the
+        # canonical PedonRecord$site shape and the golden fixtures.
+        extracted <- res$data$site %||% res$data
+        flat <- lapply(extracted, function(v) {
+          if (is.list(v) && !is.null(v$value)) v$value else v
+        })
+        flat <- Filter(function(v) length(v) > 0L && !all(is.na(unlist(v))),
+                          flat)
+        list(site = flat)
+      } else {
+        ped <- PedonRecord$new(
+          site = list(id = fx$id, country = "BR"),
+          horizons = ensure_horizon_schema(
+            data.table::data.table(top_cm = numeric(0), bottom_cm = numeric(0))
+          )
+        )
+        extract_site_from_fieldsheet(ped, image_path = fx$input_path,
+                                         provider = provider, overwrite = TRUE)
+        list(site = ped$site)
+      }
+    } else if (identical(task, "munsell")) {
+      ped <- PedonRecord$new(
+        site = list(id = fx$id, country = "BR"),
+        horizons = ensure_horizon_schema(
+          data.table::data.table(top_cm = numeric(0), bottom_cm = numeric(0))
+        )
+      )
+      extract_munsell_from_photo(ped, image_path = fx$input_path,
+                                    provider = provider, overwrite = TRUE,
+                                    use_fewshot = use_fewshot,
+                                    use_structured = use_structured)
+      list(horizons = lapply(seq_len(nrow(ped$horizons)), function(i) {
+        as.list(ped$horizons[i, ])
+      }))
+    } else NULL
+  }, error = function(e) e)
+  if (inherits(out, "error")) {
+    return(list(ok = FALSE, error = conditionMessage(out),
+                pred = NULL, golden = golden))
+  }
+  list(ok = TRUE, error = NA_character_, pred = out, golden = golden)
+}
+
+
+# Pick the right metric helper for the task, return a 3-slot named list
+# (NAs for unused slots).
+.compute_metric <- function(task, pred, golden) {
+  if (is.null(pred) || is.null(golden)) {
+    return(list(NA_real_, NA_real_, NA_real_))
+  }
+  if (identical(task, "munsell")) {
+    m <- .metric_munsell_deltaE(pred, golden)
+    list(mean_delta_e = m$mean_delta_e, coverage = m$coverage,
+         n_compared   = m$n_compared)
+  } else if (identical(task, "horizons")) {
+    m <- .metric_horizons_overlap(pred, golden)
+    list(precision = m$precision, recall = m$recall,
+         attr_match = m$attr_match_rate)
+  } else if (identical(task, "site")) {
+    m <- .metric_site_iou(pred, golden)
+    list(iou = m$iou, value_accuracy = m$value_accuracy,
+         recall = m$recall)
+  } else list(NA_real_, NA_real_, NA_real_)
+}
diff --git a/R/febr.R b/R/febr.R
index 93f432418..94a7f83e8 100644
--- a/R/febr.R
+++ b/R/febr.R
@@ -417,9 +417,19 @@ read_febr_pedons <- function(dataset_codes      = c("ctb0039"),
            suppressWarnings(as.numeric(ob_row$coord_x)) else NA_real_
   estado <- if (!is.null(ob_row) && "estado_id" %in% names(ob_row))
               as.character(ob_row$estado_id) else NA_character_
+  # v0.9.62: capture sisb_id (BDsolos Codigo PA cross-reference) when
+  # the FEBR observacao table provides it. Stored as character even
+  # when numeric in the source so it joins cleanly to BDsolos
+  # site$sisb_id (also stored as character).
+  sisb <- if (!is.null(ob_row) && "sisb_id" %in% names(ob_row)) {
+    raw <- as.character(ob_row$sisb_id)
+    if (length(raw) == 0L || is.na(raw) || !nzchar(trimws(raw))) NA_character_
+    else trimws(raw)
+  } else NA_character_
   PedonRecord$new(
     site = list(
       id      = as.character(oid),
+      sisb_id = sisb,
       lat     = lat,
       lon     = lon,
       country = "BR",
diff --git a/R/key-sibcs.R b/R/key-sibcs.R
index bb2b3c805..960b7d760 100644
--- a/R/key-sibcs.R
+++ b/R/key-sibcs.R
@@ -211,6 +211,18 @@ classify_sibcs <- function(pedon,
   sub_result <- run_sibcs_subordem(pedon, ordem$code, rules)
   subordem   <- sub_result$assigned
 
+  # v0.9.61: regra dominante-de-cor em B. O YAML usa first-match-wins,
+  # que captura a subordem pelo primeiro horizonte B que satisfaz o
+  # predicado de cor. Para Argissolos / Latossolos / Nitossolos, isso
+  # destorce perfis com cores mistas (e.g. Bt1 amarelo + Bt2 vermelho
+  # mais espesso -> deveria ser PV, mas vai PA). O post-processor
+  # abaixo recalcula a categoria de cor dominante por espessura
+  # cumulativa e, quando difere do first-match, troca a subordem.
+  color_override_info <- .apply_color_dominant_override(
+    subordem, pedon, ordem$code, rules
+  )
+  subordem <- color_override_info$subordem
+
   # v0.9.45: detectar fallback "cor a determinar" -- quando a subordem
   # atribuida e a catch-all de cor (PVA/LVA/NX/TX) E pelo menos um
   # predicado anterior falhou por ausencia de matiz Munsell em B, o
@@ -283,7 +295,8 @@ classify_sibcs <- function(pedon,
     subgrupo_assigned     = sg,
     familia               = familia_attrs,
     familia_label         = familia_lbl,
-    color_undetermined    = color_fallback
+    color_undetermined    = color_fallback,
+    color_dominant_override = color_override_info$override
   )
 
   ambiguities  <- find_ambiguities(key_result$trace, current = ordem$code)
@@ -312,6 +325,9 @@ classify_sibcs <- function(pedon,
   if (!is.null(color_fallback)) {
     warnings <- c(warnings, color_fallback$reason)
   }
+  if (!is.null(color_override_info$override)) {
+    warnings <- c(warnings, color_override_info$override$reason)
+  }
 
   ClassificationResult$new(
     system         = "SiBCS 5a edicao",
diff --git a/R/merge-brazilian.R b/R/merge-brazilian.R
new file mode 100644
index 000000000..cdd6af779
--- /dev/null
+++ b/R/merge-brazilian.R
@@ -0,0 +1,232 @@
+# =============================================================================
+# v0.9.62 -- merge_brazilian_pedons(): dedup BDsolos x FEBR via sisb_id.
+#
+# Both Embrapa BDsolos and the FEBR (Free Brazilian Repository for Open
+# Soil Data) curate Brazilian pedons. Many profiles appear in BOTH
+# corpuses because FEBR ingested historic Embrapa surveys: 590 of the
+# 905 BDsolos RJ pedons match a FEBR sisb_id, and across the full
+# 27-UF download we expect heavy overlap with the 8,124 FEBR records
+# that carry a non-NA sisb_id.
+#
+# This module exposes:
+#
+#   merge_brazilian_pedons(bdsolos, febr, prefer = c("bdsolos", "febr"))
+#       -- joins two PedonRecord lists by site$sisb_id, drops the
+#          duplicates from the non-preferred source, and emits a
+#          single super-list of distinct Brazilian pedons. Tags each
+#          surviving pedon's site$reference_source with the merge
+#          decision so downstream code can audit provenance.
+#
+#   summarize_brazilian_overlap(bdsolos, febr)
+#       -- diagnostic table reporting overlap counts per UF / per
+#          ordem and the dedup ratio.
+#
+# Both functions are pure R (no external dependencies) and work over
+# in-memory PedonRecord lists; they don't touch the filesystem.
+# =============================================================================
+
+
+# ---- helpers -------------------------------------------------------------
+
+#' Extract sisb_id from a PedonRecord, returning NA when not present
+#'
+#' Both v0.9.62 loaders (BDsolos + FEBR) assign `site$sisb_id`. This
+#' helper centralises the lookup so older PedonRecord objects without
+#' the field still work.
+#'
+#' @keywords internal
+.get_sisb_id <- function(pedon) {
+  if (is.null(pedon) || !inherits(pedon, "PedonRecord")) return(NA_character_)
+  v <- pedon$site$sisb_id %||% NA_character_
+  if (length(v) == 0L || is.na(v) || !nzchar(trimws(as.character(v)))) {
+    return(NA_character_)
+  }
+  trimws(as.character(v))
+}
+
+
+#' Tag a pedon with merge provenance
+#'
+#' Appends the source label to `site$reference_source` and stores
+#' `site$merge_decision` (`"kept_bdsolos"`, `"kept_febr"`, or
+#' `"unique"`).
+#'
+#' @keywords internal
+.tag_merge_decision <- function(pedon, source, decision) {
+  if (is.null(pedon)) return(NULL)
+  prev_src <- pedon$site$reference_source %||% NA_character_
+  pedon$site$merge_decision   <- decision
+  pedon$site$merge_source     <- source
+  if (is.na(prev_src) || !nzchar(prev_src)) {
+    pedon$site$reference_source <- source
+  } else if (!grepl(source, prev_src, fixed = TRUE)) {
+    pedon$site$reference_source <- paste0(prev_src, " | merged:", decision)
+  } else {
+    pedon$site$reference_source <- paste0(prev_src, " | ", decision)
+  }
+  pedon
+}
+
+
+# ---- merge ----------------------------------------------------------------
+
+#' Merge BDsolos and FEBR PedonRecord lists, deduplicating by sisb_id
+#'
+#' Both Embrapa BDsolos and FEBR carry Brazilian soil profiles, with
+#' substantial overlap. BDsolos exports the historic Embrapa pedon
+#' numbering as \code{Codigo PA}; FEBR's \code{observacao} table
+#' carries the same numbering as \code{sisb_id}. This function uses
+#' those two as a join key to drop duplicates and produce a single
+#' consolidated list.
+#'
+#' Pedons whose \code{site$sisb_id} is \code{NA} on either side are
+#' kept as unique entries (the duplication test cannot be resolved).
+#'
+#' @param bdsolos List of \code{PedonRecord} objects from
+#'   \code{\link{load_bdsolos_csv}}.
+#' @param febr List of \code{PedonRecord} objects from
+#'   \code{\link{read_febr_pedons}}.
+#' @param prefer Character: which side wins when a sisb_id matches in
+#'   both. Either \code{"bdsolos"} (default) or \code{"febr"}.
+#' @param verbose If \code{TRUE} (default), prints a one-line summary.
+#'
+#' @return A list of \code{PedonRecord} objects with site provenance
+#'   tagged via \code{site$merge_decision} (\code{"kept_bdsolos"},
+#'   \code{"kept_febr"}, or \code{"unique"}) and \code{site$merge_source}.
+#'   Pedons appear in the order: chosen-from-overlap first, then
+#'   unique-to-bdsolos, then unique-to-febr.
+#'
+#' @examples
+#' \dontrun{
+#' bd <- load_bdsolos_csv("soil_data/embrapa_bdsolos/BD_solos/RJ.csv")
+#' fb <- read_febr_pedons(c("ctb0032", "ctb0500"))
+#' merged <- merge_brazilian_pedons(bd, fb, prefer = "bdsolos")
+#' length(merged)  # < length(bd) + length(fb) when there is overlap
+#' }
+#'
+#' @seealso \code{\link{load_bdsolos_csv}},
+#'          \code{\link{read_febr_pedons}},
+#'          \code{\link{summarize_brazilian_overlap}}.
+#' @export
+merge_brazilian_pedons <- function(bdsolos, febr,
+                                      prefer  = c("bdsolos", "febr"),
+                                      verbose = TRUE) {
+  prefer <- match.arg(prefer)
+  if (is.null(bdsolos)) bdsolos <- list()
+  if (is.null(febr))    febr    <- list()
+  if (!is.list(bdsolos) ||
+        (length(bdsolos) > 0L &&
+           !all(vapply(bdsolos, inherits, logical(1L), "PedonRecord")))) {
+    stop("merge_brazilian_pedons(): 'bdsolos' must be a list of PedonRecord (or NULL).")
+  }
+  if (!is.list(febr) ||
+        (length(febr) > 0L &&
+           !all(vapply(febr, inherits, logical(1L), "PedonRecord")))) {
+    stop("merge_brazilian_pedons(): 'febr' must be a list of PedonRecord (or NULL).")
+  }
+
+  bd_sisb <- vapply(bdsolos, .get_sisb_id, character(1L))
+  fb_sisb <- vapply(febr,    .get_sisb_id, character(1L))
+
+  bd_with    <- which(!is.na(bd_sisb))
+  fb_with    <- which(!is.na(fb_sisb))
+  bd_without <- which(is.na(bd_sisb))
+  fb_without <- which(is.na(fb_sisb))
+
+  shared_keys <- intersect(bd_sisb[bd_with], fb_sisb[fb_with])
+  bd_unique_idx <- bd_with[!bd_sisb[bd_with] %in% shared_keys]
+  fb_unique_idx <- fb_with[!fb_sisb[fb_with] %in% shared_keys]
+
+  out <- list()
+
+  # 1. Overlap: keep one pedon per sisb_id from the preferred side.
+  if (length(shared_keys) > 0L) {
+    for (k in shared_keys) {
+      bd_hit <- bdsolos[bd_with[bd_sisb[bd_with] == k][1L]][[1L]]
+      fb_hit <- febr   [fb_with[fb_sisb[fb_with] == k][1L]][[1L]]
+      if (prefer == "bdsolos") {
+        out[[length(out) + 1L]] <- .tag_merge_decision(
+          bd_hit, "BDsolos", "kept_bdsolos"
+        )
+      } else {
+        out[[length(out) + 1L]] <- .tag_merge_decision(
+          fb_hit, "FEBR", "kept_febr"
+        )
+      }
+    }
+  }
+
+  # 2. BDsolos pedons unique to this side (sisb_id not in FEBR).
+  for (i in bd_unique_idx) {
+    out[[length(out) + 1L]] <- .tag_merge_decision(
+      bdsolos[[i]], "BDsolos", "unique"
+    )
+  }
+
+  # 3. FEBR pedons unique to this side.
+  for (i in fb_unique_idx) {
+    out[[length(out) + 1L]] <- .tag_merge_decision(
+      febr[[i]], "FEBR", "unique"
+    )
+  }
+
+  # 4. Pedons without a sisb_id from either side -- can't dedupe; keep all.
+  for (i in bd_without) {
+    out[[length(out) + 1L]] <- .tag_merge_decision(
+      bdsolos[[i]], "BDsolos", "unique"
+    )
+  }
+  for (i in fb_without) {
+    out[[length(out) + 1L]] <- .tag_merge_decision(
+      febr[[i]], "FEBR", "unique"
+    )
+  }
+
+  if (isTRUE(verbose)) {
+    cli::cli_alert_success(sprintf(
+      "merge_brazilian_pedons(): %d pedons total -- %d shared (kept %s), %d BDsolos-only, %d FEBR-only, %d sisb-less.",
+      length(out), length(shared_keys), prefer,
+      length(bd_unique_idx), length(fb_unique_idx),
+      length(bd_without) + length(fb_without)
+    ))
+  }
+  out
+}
+
+
+# ---- diagnostics ---------------------------------------------------------
+
+#' Diagnostic summary of overlap between BDsolos and FEBR pedon lists
+#'
+#' Counts pedons by source / overlap status without performing the
+#' merge. Useful for verifying the dedup ratio before committing to
+#' \code{\link{merge_brazilian_pedons}}.
+#'
+#' @param bdsolos,febr Lists of \code{PedonRecord} objects.
+#'
+#' @return List with elements \code{n_bdsolos}, \code{n_febr},
+#'   \code{n_bdsolos_with_sisb}, \code{n_febr_with_sisb},
+#'   \code{n_shared}, \code{n_bdsolos_only}, \code{n_febr_only},
+#'   \code{n_unmatchable} (sisb_id missing in one or both).
+#'
+#' @seealso \code{\link{merge_brazilian_pedons}}.
+#' @export
+summarize_brazilian_overlap <- function(bdsolos, febr) {
+  if (is.null(bdsolos)) bdsolos <- list()
+  if (is.null(febr))    febr    <- list()
+  bd_sisb <- vapply(bdsolos, .get_sisb_id, character(1L))
+  fb_sisb <- vapply(febr,    .get_sisb_id, character(1L))
+  bd_with    <- !is.na(bd_sisb)
+  fb_with    <- !is.na(fb_sisb)
+  shared <- intersect(bd_sisb[bd_with], fb_sisb[fb_with])
+  list(
+    n_bdsolos             = length(bdsolos),
+    n_febr                = length(febr),
+    n_bdsolos_with_sisb   = sum(bd_with),
+    n_febr_with_sisb      = sum(fb_with),
+    n_shared              = length(shared),
+    n_bdsolos_only        = sum(bd_with & !(bd_sisb %in% shared)),
+    n_febr_only           = sum(fb_with & !(fb_sisb %in% shared)),
+    n_unmatchable         = sum(!bd_with) + sum(!fb_with)
+  )
+}
diff --git a/R/run-agent-app.R b/R/run-agent-app.R
new file mode 100644
index 000000000..fcaf9e6d1
--- /dev/null
+++ b/R/run-agent-app.R
@@ -0,0 +1,73 @@
+# =============================================================================
+# v0.9.65 -- run_agent_app(): launches the soilKey "Agente Pedometrista".
+# =============================================================================
+
+
+#' Launch the soilKey "Agente Pedometrista" Shiny app
+#'
+#' A modern bslib-themed Shiny UI for end-to-end soil profile
+#' classification driven by a local Gemma 4 (or any cloud VLM) for
+#' multimodal extraction:
+#'
+#' \enumerate{
+#'   \item Upload a profile photo, PDF report, field-sheet image or
+#'         Vis-NIR spectrum.
+#'   \item The VLM extracts schema-validated structured data into a
+#'         \code{\link{PedonRecord}} with explicit per-attribute
+#'         provenance (\code{source = "extracted_vlm"}).
+#'   \item The deterministic R taxonomic key classifies the pedon
+#'         under WRB 2022, SiBCS 5a edicao and USDA Soil Taxonomy
+#'         13ed -- never the LLM.
+#'   \item A free-form chat tab lets the user ask the local Gemma
+#'         (with the soilKey "pedometrista" persona) about the loaded
+#'         profile in PT-BR or English.
+#' }
+#'
+#' Requires the optional packages \code{shiny}, \code{bslib},
+#' \code{bsicons} and \code{DT} (all in Suggests). For local Gemma
+#' inference, also requires Ollama -- see \code{\link{setup_local_vlm}}
+#' for one-shot bootstrap from inside R.
+#'
+#' @param port Port for the local server. Default (\code{NULL}) lets
+#'        Shiny choose.
+#' @param launch.browser Whether to open the app in the default
+#'        browser (default \code{TRUE}).
+#' @param ... Additional arguments passed to \code{\link[shiny]{runApp}}.
+#' @return Invisibly the value returned by \code{shiny::runApp()}.
+#'
+#' @examples
+#' \dontrun{
+#' # First-time setup (download Gemma 4 edge):
+#' setup_local_vlm("light")   # gemma4:e2b, ~1.5 GB
+#'
+#' # Launch the agent UI:
+#' run_agent_app()
+#' }
+#'
+#' @seealso \code{\link{setup_local_vlm}}, \code{\link{vlm_provider}},
+#'   \code{\link{extract_munsell_from_photo}},
+#'   \code{\link{extract_horizons_from_pdf}},
+#'   \code{\link{extract_site_from_fieldsheet}},
+#'   \code{\link{classify_from_documents}},
+#'   \code{\link{run_classify_app}} (the simpler CSV-only UI).
+#' @export
+run_agent_app <- function(port = NULL, launch.browser = TRUE, ...) {
+  needed <- c("shiny", "bslib", "bsicons", "DT")
+  missing <- needed[!vapply(needed, requireNamespace, logical(1L),
+                              quietly = TRUE)]
+  if (length(missing) > 0L) {
+    stop(sprintf(
+      "Packages required for run_agent_app() are missing: %s. Install with `install.packages(c(%s))`.",
+      paste(missing, collapse = ", "),
+      paste(sprintf('"%s"', missing), collapse = ", ")
+    ))
+  }
+  app_dir <- system.file("shiny", "agent_app", package = "soilKey")
+  if (!nzchar(app_dir) || !dir.exists(app_dir)) {
+    app_dir <- file.path("inst", "shiny", "agent_app")
+  }
+  if (!dir.exists(app_dir)) {
+    stop("Could not locate the Shiny app at inst/shiny/agent_app.")
+  }
+  shiny::runApp(app_dir, port = port, launch.browser = launch.browser, ...)
+}
diff --git a/R/setup-local-vlm.R b/R/setup-local-vlm.R
new file mode 100644
index 000000000..b8821cb0b
--- /dev/null
+++ b/R/setup-local-vlm.R
@@ -0,0 +1,375 @@
+# =============================================================================
+# v0.9.64 -- setup_local_vlm() and Ollama lifecycle helpers.
+#
+# Goal: make the local-VLM path "just work" so the agent_app() Shiny UI
+# (v0.9.65) can offer a one-click "Configurar Gemma local" button that:
+#
+#   1. Detects whether the Ollama daemon is installed and running.
+#   2. Starts `ollama serve` in the background if installed but stopped.
+#   3. Pulls the requested model (default: gemma4:e2b -- the smallest
+#      multimodal Gemma 4 edge variant, ~1.5 GB) if not already on disk.
+#   4. Reports back ready / not-ready with actionable next steps.
+#
+# CRAN policy forbids shipping LLM weights inside an R package (5 MB
+# source-tarball cap, plus binary-blob policy). We therefore SHIP THE
+# DOWNLOADER, not the weights -- the user runs `setup_local_vlm()` once
+# after install and Ollama caches the model in `~/.ollama/models/`.
+# =============================================================================
+
+
+# ---- Constants ------------------------------------------------------------
+
+#' Canonical Ollama model catalog used by setup_local_vlm()
+#'
+#' Maps short labels ("light", "balanced", "best") to multimodal Gemma
+#' tags pullable via `ollama pull`. Sizes are the **on-disk footprint
+#' Ollama reports after pull**, NOT the bare parameter count: the
+#' multimodal Gemma 4 builds bundle a vision encoder + tokenizers /
+#' adapters that add ~5 GB on top of the parameter weights, so the
+#' "edge 2B" variant lands at ~6.7 GB on disk despite the 2-billion
+#' parameter label.
+#'
+#' Sizes verified on Ollama Library 2026-05 (e2b measured locally;
+#' e4b and 31b approximated from the Ollama listing -- run
+#' \code{ollama show <tag>} after pull for the exact figure).
+#'
+#' @keywords internal
+.SOILKEY_OLLAMA_CATALOG <- list(
+  light    = list(model = "gemma4:e2b", size_gb = 6.7,
+                   note  = paste0("Gemma 4 edge 2B (multimodal), ~6.7 GB ",
+                                    "on disk. Smallest tier; ideal for a ",
+                                    "laptop without GPU.")),
+  balanced = list(model = "gemma4:e4b", size_gb = 8.0,
+                   note  = paste0("Gemma 4 edge 4B (multimodal), ~8 GB on ",
+                                    "disk (approx). Higher accuracy on PT-BR ",
+                                    "field sheets than e2b.")),
+  best     = list(model = "gemma4:31b", size_gb = 19.0,
+                   note  = paste0("Gemma 4 31B dense (multimodal), ~19 GB ",
+                                    "on disk. Workstation-class -- needs ",
+                                    "GPU 24+ GB VRAM for usable latency."))
+)
+
+
+# ---- Detection ------------------------------------------------------------
+
+#' Is the Ollama CLI installed?
+#'
+#' Returns TRUE when `ollama` resolves on the system PATH. Does NOT
+#' check whether the daemon is running (use [ollama_is_running()] for
+#' that).
+#'
+#' @return Logical scalar.
+#' @export
+ollama_is_installed <- function() {
+  out <- tryCatch(Sys.which("ollama"), error = function(e) "")
+  isTRUE(nzchar(out) && file.exists(unname(out[1L])))
+}
+
+
+#' Print the right install-Ollama incantation for the user's OS
+#'
+#' macOS -> Homebrew formula; Linux -> upstream curl-pipe-sh script;
+#' Windows -> winget. Always points to the official installers
+#' page <https://ollama.com/download>. Used by [setup_local_vlm()]
+#' as the actionable error path when Ollama is not installed.
+#'
+#' @keywords internal
+.print_ollama_install_hint <- function() {
+  os <- Sys.info()[["sysname"]]
+  cli::cli_h2("Install Ollama")
+  if (identical(os, "Darwin")) {
+    cli::cli_alert_info("macOS:")
+    cli::cli_text("  {.code brew install --cask ollama}")
+    cli::cli_text("  -- or download from {.url https://ollama.com/download/mac}")
+  } else if (identical(os, "Linux")) {
+    cli::cli_alert_info("Linux (any distro):")
+    cli::cli_text("  {.code curl -fsSL https://ollama.com/install.sh | sh}")
+  } else if (identical(os, "Windows")) {
+    cli::cli_alert_info("Windows 10/11:")
+    cli::cli_text("  {.code winget install Ollama.Ollama}")
+    cli::cli_text("  -- or download from {.url https://ollama.com/download/windows}")
+  } else {
+    cli::cli_alert_info("Unknown OS -- get the installer from {.url https://ollama.com/download}.")
+  }
+}
+
+
+# ---- Daemon lifecycle -----------------------------------------------------
+
+#' Ensure the Ollama daemon is running, starting it if needed
+#'
+#' If [ollama_is_running()] already returns TRUE, this is a no-op. Else
+#' tries to launch `ollama serve` in the background and polls until the
+#' HTTP API answers (or `timeout_s` seconds elapse). Requires the
+#' `ollama` binary to be on PATH; call [ollama_is_installed()] first.
+#'
+#' On success, the daemon keeps running for the rest of the R session
+#' (and survives the R session, since it forks via `system2(..., wait
+#' = FALSE)`). The user can stop it later with `pkill ollama` or
+#' equivalent.
+#'
+#' @param timeout_s Polling deadline in seconds (default 30).
+#' @param verbose Logical (default TRUE). Prints CLI status updates.
+#' @return Logical scalar: TRUE iff the daemon is reachable when this
+#'   function returns. Never throws -- returns FALSE on any failure so
+#'   callers can route to [.print_ollama_install_hint()].
+#' @export
+ollama_ensure_running <- function(timeout_s = 30, verbose = TRUE) {
+  if (ollama_is_running()) {
+    if (isTRUE(verbose)) {
+      cli::cli_alert_success("Ollama daemon already running.")
+    }
+    return(TRUE)
+  }
+  if (!ollama_is_installed()) {
+    if (isTRUE(verbose)) {
+      cli::cli_alert_warning("Ollama is not installed.")
+      .print_ollama_install_hint()
+    }
+    return(FALSE)
+  }
+  if (isTRUE(verbose)) {
+    cli::cli_alert_info("Ollama installed but not running -- starting `ollama serve`...")
+  }
+  log_path <- tempfile(pattern = "ollama_serve_", fileext = ".log")
+  ok <- tryCatch({
+    system2("ollama", args = "serve",
+              stdout = log_path, stderr = log_path,
+              wait = FALSE)
+    TRUE
+  }, error = function(e) {
+    if (isTRUE(verbose)) {
+      cli::cli_alert_danger("Failed to spawn `ollama serve`: {conditionMessage(e)}")
+    }
+    FALSE
+  })
+  if (!ok) return(FALSE)
+
+  deadline <- Sys.time() + timeout_s
+  while (Sys.time() < deadline) {
+    if (ollama_is_running()) {
+      if (isTRUE(verbose)) {
+        cli::cli_alert_success("Ollama daemon ready (log: {.path {log_path}}).")
+      }
+      return(TRUE)
+    }
+    Sys.sleep(0.5)
+  }
+  if (isTRUE(verbose)) {
+    cli::cli_alert_danger("Ollama daemon did not become reachable within {timeout_s}s.")
+    cli::cli_alert_info("Check the log: {.path {log_path}}")
+  }
+  FALSE
+}
+
+
+# ---- Model catalog --------------------------------------------------------
+
+#' List models currently pulled to the local Ollama
+#'
+#' Queries the `/api/tags` endpoint on the running daemon. Returns an
+#' empty character vector when the daemon is not reachable or when no
+#' models are pulled. Never throws.
+#'
+#' @return Character vector of model identifiers (e.g.
+#'   `c("gemma4:e2b", "gemma4:e4b")`).
+#' @export
+ollama_list_local_models <- function() {
+  if (!ollama_is_running()) return(character(0))
+  if (!requireNamespace("httr", quietly = TRUE) ||
+        !requireNamespace("jsonlite", quietly = TRUE)) {
+    return(character(0))
+  }
+  url <- getOption("soilKey.ollama_url",
+                     default = "http://127.0.0.1:11434/api/tags")
+  out <- tryCatch({
+    resp <- httr::GET(url, httr::timeout(2))
+    if (httr::status_code(resp) != 200L) return(character(0))
+    body <- httr::content(resp, as = "text", encoding = "UTF-8")
+    parsed <- jsonlite::fromJSON(body, simplifyVector = TRUE)
+    models <- parsed$models %||% data.frame()
+    if (is.data.frame(models) && "name" %in% names(models)) {
+      as.character(models$name)
+    } else character(0)
+  }, error = function(e) character(0))
+  out
+}
+
+
+#' Pull a model into the local Ollama
+#'
+#' Wraps `ollama pull <model>` via [system2()]. The pull is potentially
+#' large (1-20 GB depending on the model) and may take many minutes
+#' over a slow connection; this function blocks until completion.
+#' Skipped (no-op) when the model is already present in
+#' [ollama_list_local_models()].
+#'
+#' @param model Ollama model identifier (e.g. `"gemma4:e2b"`).
+#' @param verbose Logical (default TRUE). Streams `ollama pull` output
+#'   to the console.
+#' @return Logical scalar: TRUE iff the model is on-disk after this
+#'   function returns.
+#' @export
+ollama_pull_model <- function(model, verbose = TRUE) {
+  if (!is.character(model) || length(model) != 1L || is.na(model) ||
+        !nzchar(model)) {
+    rlang::abort("ollama_pull_model(): 'model' must be a non-empty character scalar.")
+  }
+  if (!ollama_is_installed()) {
+    if (isTRUE(verbose)) {
+      cli::cli_alert_warning("Ollama is not installed; cannot pull {.field {model}}.")
+      .print_ollama_install_hint()
+    }
+    return(FALSE)
+  }
+  already <- ollama_list_local_models()
+  if (model %in% already) {
+    if (isTRUE(verbose)) {
+      cli::cli_alert_success("Model {.field {model}} already pulled.")
+    }
+    return(TRUE)
+  }
+  if (isTRUE(verbose)) {
+    cli::cli_alert_info("Pulling {.field {model}} (this may take several minutes)...")
+  }
+  rc <- tryCatch(
+    system2("ollama", args = c("pull", model),
+              stdout = if (isTRUE(verbose)) "" else FALSE,
+              stderr = if (isTRUE(verbose)) "" else FALSE),
+    warning = function(w) {
+      # system2() raises a warning when the command exits non-zero.
+      attr(w, "rc") %||% 1L
+    },
+    error = function(e) 1L
+  )
+  ok <- isTRUE(identical(as.integer(rc), 0L)) ||
+          model %in% ollama_list_local_models()
+  if (isTRUE(verbose)) {
+    if (ok) cli::cli_alert_success("Model {.field {model}} ready.")
+    else    cli::cli_alert_danger("Failed to pull {.field {model}}.")
+  }
+  isTRUE(ok)
+}
+
+
+# ---- Top-level setup ------------------------------------------------------
+
+#' One-call setup for the local VLM (Ollama + Gemma)
+#'
+#' Idempotent end-to-end bootstrap of the local VLM stack used by the
+#' soilKey agent app. Detects the Ollama installation, starts the
+#' daemon if needed, pulls the requested model and returns a status
+#' list the caller can render in a Shiny UI.
+#'
+#' @param model One of `"light"` (gemma4:e2b, ~6.7 GB on disk),
+#'   `"balanced"` (gemma4:e4b, ~8 GB; default), `"best"`
+#'   (gemma4:31b, ~19 GB), OR any explicit Ollama model identifier
+#'   (e.g. `"qwen2.5vl:7b"`). The on-disk footprint is significantly
+#'   larger than the bare parameter count because the multimodal
+#'   Gemma 4 builds bundle a vision encoder + tokenizers (~5 GB
+#'   constant overhead).
+#' @param ensure_running Logical (default TRUE). When TRUE, also
+#'   starts the daemon via [ollama_ensure_running()] when needed.
+#' @param verbose Logical (default TRUE). Streams CLI status messages.
+#'
+#' @return Invisibly, a list with elements:
+#'   \describe{
+#'     \item{`ready`}{Logical -- TRUE iff the model can be used now.}
+#'     \item{`model`}{Character -- the model identifier resolved.}
+#'     \item{`ollama_url`}{Character -- daemon endpoint.}
+#'     \item{`installed`}{Logical -- whether the Ollama CLI is on PATH.}
+#'     \item{`running`}{Logical -- whether the daemon answers /api/tags.}
+#'     \item{`pulled`}{Logical -- whether the model is on local disk.}
+#'     \item{`hint`}{Character -- one-line next-step hint for the user
+#'           (empty when `ready = TRUE`).}
+#'   }
+#'
+#' @section What this does NOT do:
+#' - Does NOT install Ollama (requires `sudo` / admin); the function
+#'   prints OS-specific install hints instead.
+#' - Does NOT ship the model weights inside the R package (CRAN
+#'   policy); the model is pulled from the Ollama registry on first run
+#'   and cached in `~/.ollama/models/`.
+#' - Does NOT classify anything; once setup succeeds, call
+#'   [vlm_provider("ollama", model = ...)] then the
+#'   [extract_horizons_from_pdf()] / [extract_munsell_from_photo()] /
+#'   [extract_site_from_fieldsheet()] family.
+#'
+#' @examples
+#' \dontrun{
+#' # Default: pull the balanced 3 GB model, start the daemon if needed.
+#' status <- setup_local_vlm()
+#' status$ready  # TRUE on a healthy machine with disk + bandwidth
+#'
+#' # Lightweight option for laptops:
+#' setup_local_vlm("light")    # gemma4:e2b, ~6.7 GB on disk
+#'
+#' # Best quality (server / workstation):
+#' setup_local_vlm("best")     # gemma4:31b, ~19 GB on disk
+#'
+#' # Any other multimodal model the user prefers:
+#' setup_local_vlm("qwen2.5vl:7b")
+#' }
+#' @seealso [vlm_provider()], [ollama_is_running()],
+#'   [ollama_pull_model()].
+#' @export
+setup_local_vlm <- function(model        = "balanced",
+                              ensure_running = TRUE,
+                              verbose      = TRUE) {
+  catalog <- .SOILKEY_OLLAMA_CATALOG
+  resolved <- if (model %in% names(catalog)) catalog[[model]]$model else model
+  size_hint <- if (model %in% names(catalog)) catalog[[model]]$note else
+                  paste0("Custom model: ", model)
+
+  if (isTRUE(verbose)) {
+    cli::cli_h1("soilKey -- local VLM setup")
+    cli::cli_alert_info(size_hint)
+  }
+
+  installed <- ollama_is_installed()
+  if (!installed) {
+    if (isTRUE(verbose)) .print_ollama_install_hint()
+    return(invisible(list(
+      ready = FALSE, model = resolved,
+      ollama_url = getOption("soilKey.ollama_url",
+                                default = "http://127.0.0.1:11434"),
+      installed = FALSE, running = FALSE, pulled = FALSE,
+      hint = "Install Ollama, then re-run setup_local_vlm()."
+    )))
+  }
+
+  running <- ollama_is_running()
+  if (!running && isTRUE(ensure_running)) {
+    running <- ollama_ensure_running(verbose = verbose)
+  }
+  if (!running) {
+    return(invisible(list(
+      ready = FALSE, model = resolved,
+      ollama_url = getOption("soilKey.ollama_url",
+                                default = "http://127.0.0.1:11434"),
+      installed = TRUE, running = FALSE, pulled = FALSE,
+      hint = "Start the Ollama daemon: `ollama serve` (or set ensure_running = TRUE)."
+    )))
+  }
+
+  pulled <- ollama_pull_model(resolved, verbose = verbose)
+  ready  <- isTRUE(pulled)
+
+  if (isTRUE(verbose)) {
+    if (ready) {
+      cli::cli_alert_success("Local VLM ready: provider {.field ollama}, model {.field {resolved}}.")
+    } else {
+      cli::cli_alert_danger("Local VLM setup failed; see messages above.")
+    }
+  }
+
+  invisible(list(
+    ready = ready, model = resolved,
+    ollama_url = getOption("soilKey.ollama_url",
+                              default = "http://127.0.0.1:11434"),
+    installed = TRUE, running = running, pulled = pulled,
+    hint = if (ready) "" else
+             paste0("Pull failed; check disk space + network and retry ",
+                     "ollama_pull_model('", resolved, "').")
+  ))
+}
diff --git a/R/sibcs-color-tuning.R b/R/sibcs-color-tuning.R
new file mode 100644
index 000000000..17efe6f46
--- /dev/null
+++ b/R/sibcs-color-tuning.R
@@ -0,0 +1,261 @@
+# =============================================================================
+# v0.9.61 -- SiBCS color tuning: thickness-weighted dominant B-horizon color.
+#
+# The SiBCS subordem key for color-driven Ordens (Argissolos / Latossolos /
+# Nitossolos) is currently "first-match-wins" -- whichever color predicate
+# fires first in the YAML captures the profile. Mixed profiles (e.g.
+# Bt1 = 7.5YR amarelo, Bt2 = 2.5YR vermelho) get whichever subordem appears
+# first in canonical key order, instead of the color that actually
+# DOMINATES the B horizon by thickness.
+#
+# This module computes the thickness-weighted dominant color CATEGORY of
+# all B horizons of a pedon and -- when the dominant differs from the
+# first-match assignment -- overrides the subordem deterministically.
+#
+# Categories (per SiBCS 5a ed., Cap 1, Caracteristicas diferenciaveis):
+#   * VERMELHO          -- hue <= 2.5YR (10R, 7.5R, 5R, 2.5R, 2.5YR)
+#   * VERMELHO_AMARELO  -- hue == 5YR (intermediate)
+#   * AMARELO           -- hue >= 7.5YR with chroma >= 4
+#   * BRUNO_ACINZENTADO -- value <= 4 AND chroma <= 4 (dark, regardless hue)
+#   * ACINZENTADO       -- hue >= 7.5YR with value >= 5 AND chroma < 4
+#                          (pale/grey)
+#
+# Ordem -> dominant -> subordem code mapping:
+#   P (Argissolos): PV / PA / PVA / PBAC / PAC
+#   L (Latossolos): LV / LA / LVA / LB   / LVA   (no greyish subordem)
+#   N (Nitossolos): NV / NX  / NX  / NB  / NX    (only Bruno + Vermelho
+#                                                  exist; rest -> Haplico)
+#
+# Luvissolos (T) are left untouched: TC vs TX is a chroma intensity test
+# (caracter cromico), not a color hue partition.
+# =============================================================================
+
+
+# ---- single-horizon color category ----------------------------------------
+
+#' Classify a single Munsell color into a SiBCS B-horizon color category
+#'
+#' @param hue    Munsell hue, e.g. "5YR" or "2.5Y".
+#' @param value  Munsell value (numeric).
+#' @param chroma Munsell chroma (numeric).
+#'
+#' @return Character scalar: one of `"VERMELHO"`, `"VERMELHO_AMARELO"`,
+#'   `"AMARELO"`, `"BRUNO_ACINZENTADO"`, `"ACINZENTADO"`, or `NA`
+#'   when any of the three Munsell components is missing.
+#'
+#' @keywords internal
+.classify_b_color <- function(hue, value, chroma) {
+  if (is.na(hue) || is.na(value) || is.na(chroma)) return(NA_character_)
+  hu <- toupper(trimws(hue))
+
+  # 1. BRUNO_ACINZENTADO: dark (value <= 4, chroma <= 4) and at least
+  #    moderately yellow (hue >= 5YR) -- catches the dark-brown / dark-grey
+  #    end of the B color spectrum.
+  if (value <= 4 && chroma <= 4 &&
+        grepl("^(5YR|7\\.5YR|10YR|2\\.5Y|5Y|10Y)\\b", hu)) {
+    return("BRUNO_ACINZENTADO")
+  }
+
+  # 2. ACINZENTADO: pale grey (value >= 5, chroma < 4) on yellow side.
+  if (value >= 5 && chroma < 4 &&
+        grepl("^(7\\.5YR|10YR|2\\.5Y|5Y|10Y)\\b", hu)) {
+    return("ACINZENTADO")
+  }
+
+  # 3. VERMELHO: red end of hue ladder.
+  if (grepl("^(10R|7\\.5R|5R|2\\.5R|2\\.5YR)\\b", hu)) {
+    return("VERMELHO")
+  }
+
+  # 4. VERMELHO_AMARELO: intermediate (5YR) with non-grey chroma.
+  if (grepl("^5YR\\b", hu)) {
+    return("VERMELHO_AMARELO")
+  }
+
+  # 5. AMARELO: yellow side with chroma >= 4.
+  if (grepl("^(7\\.5YR|10YR|2\\.5Y|5Y|10Y)\\b", hu) && chroma >= 4) {
+    return("AMARELO")
+  }
+
+  NA_character_
+}
+
+
+# ---- thickness-weighted dominant ------------------------------------------
+
+#' Thickness-weighted dominant B-horizon color category for a pedon
+#'
+#' Walks every B-like horizon (designation matching `^B[wt]?` and not
+#' `^BC|^Bt0`), classifies each into a SiBCS color category via
+#' [.classify_b_color()], sums horizon thickness per category, and
+#' returns the category with the largest cumulative thickness. Ties are
+#' broken in canonical SiBCS order (BRUNO_ACINZENTADO > ACINZENTADO >
+#' AMARELO > VERMELHO > VERMELHO_AMARELO).
+#'
+#' @param pedon A `[PedonRecord]`.
+#'
+#' @return List with `dominant` (character scalar or `NA`),
+#'   `thickness_by_category` (named numeric vector), `n_b_layers`
+#'   (integer), and `n_classified` (integer).
+#'
+#' @keywords internal
+.dominant_b_color <- function(pedon) {
+  h  <- pedon$horizons
+  bl <- .b_layers(pedon)
+  if (length(bl) == 0L) {
+    return(list(dominant = NA_character_,
+                thickness_by_category = numeric(0),
+                n_b_layers = 0L, n_classified = 0L))
+  }
+  hues <- h$munsell_hue_moist[bl]
+  vals <- h$munsell_value_moist[bl]
+  chrs <- h$munsell_chroma_moist[bl]
+  tops <- h$top_cm[bl]
+  bots <- h$bottom_cm[bl]
+  thk  <- pmax(bots - tops, 0, na.rm = FALSE)
+  thk[is.na(thk) | thk <= 0] <- 1  # default unit weight when depth missing
+
+  cats <- vapply(seq_along(bl),
+                  function(i) .classify_b_color(hues[i], vals[i], chrs[i]),
+                  character(1))
+
+  classified <- !is.na(cats)
+  if (!any(classified)) {
+    return(list(dominant = NA_character_,
+                thickness_by_category = numeric(0),
+                n_b_layers = length(bl), n_classified = 0L))
+  }
+
+  by_cat <- tapply(thk[classified], cats[classified], sum)
+  if (length(by_cat) == 0L) {
+    return(list(dominant = NA_character_,
+                thickness_by_category = numeric(0),
+                n_b_layers = length(bl), n_classified = sum(classified)))
+  }
+
+  # Canonical tie-break order (most-specific first).
+  tie_order <- c("BRUNO_ACINZENTADO", "ACINZENTADO",
+                  "AMARELO", "VERMELHO", "VERMELHO_AMARELO")
+  ord <- order(-as.numeric(by_cat),
+                match(names(by_cat), tie_order, nomatch = length(tie_order) + 1L))
+  dominant <- names(by_cat)[ord[1L]]
+
+  list(dominant = dominant,
+       thickness_by_category = as.numeric(by_cat) |> setNames(names(by_cat)),
+       n_b_layers = length(bl),
+       n_classified = sum(classified))
+}
+
+
+# ---- ordem-specific dominant -> subordem code mapping ---------------------
+
+# Returns NA when the Ordem has no color partition (or when dominant is NA).
+# Each mapping is keyed first by Ordem code, then by color category.
+.SIBCS_DOMINANT_TO_SUBORDEM <- list(
+  "P" = c(VERMELHO          = "PV",
+           AMARELO           = "PA",
+           VERMELHO_AMARELO  = "PVA",
+           BRUNO_ACINZENTADO = "PBAC",
+           ACINZENTADO       = "PAC"),
+  "L" = c(VERMELHO          = "LV",
+           AMARELO           = "LA",
+           VERMELHO_AMARELO  = "LVA",
+           BRUNO_ACINZENTADO = "LB",
+           ACINZENTADO       = "LVA"),
+  "N" = c(VERMELHO          = "NV",
+           AMARELO           = "NX",
+           VERMELHO_AMARELO  = "NX",
+           BRUNO_ACINZENTADO = "NB",
+           ACINZENTADO       = "NX")
+)
+
+
+#' Resolve the subordem code dictated by the dominant B-horizon color
+#'
+#' @param pedon      A `[PedonRecord]`.
+#' @param ordem_code Single-letter Ordem code, e.g. `"P"`.
+#'
+#' @return List with `code` (target subordem code or `NA`) and
+#'   `evidence` (the diagnostic returned by [.dominant_b_color()]).
+#'
+#' @keywords internal
+.dominant_b_color_subordem <- function(pedon, ordem_code) {
+  if (!ordem_code %in% names(.SIBCS_DOMINANT_TO_SUBORDEM)) {
+    return(list(code = NA_character_, evidence = NULL))
+  }
+  ev   <- .dominant_b_color(pedon)
+  if (is.na(ev$dominant)) {
+    return(list(code = NA_character_, evidence = ev))
+  }
+  mapping <- .SIBCS_DOMINANT_TO_SUBORDEM[[ordem_code]]
+  list(code = unname(mapping[ev$dominant]), evidence = ev)
+}
+
+
+# ---- post-processor that overrides a first-match-wins assignment ---------
+
+#' Override a first-match-wins SiBCS subordem with the dominant-color rule
+#'
+#' Called from [classify_sibcs()] after the YAML key has assigned a
+#' subordem. When the Ordem is one of the color-partitioned ones (P, L,
+#' N) and the dominant-color rule produces a DIFFERENT subordem code,
+#' replaces the assigned entry with the YAML block matching the new
+#' code. The function does nothing for non-color Ordens, when no Munsell
+#' B color is available, when the dominant matches the first-match
+#' assignment, or when the YAML lacks an entry for the dominant code.
+#'
+#' @param subordem    The subordem entry assigned by the YAML key
+#'                     (`list(code, name, tests, ...)`) or `NULL`.
+#' @param pedon       A `[PedonRecord]`.
+#' @param ordem_code  Single-letter Ordem code.
+#' @param rules       Loaded SiBCS rule set (with `$subordens[[ordem_code]]`).
+#'
+#' @return List with `subordem` (the possibly-overridden YAML entry) and
+#'   `override` (NULL when no change, else
+#'   `list(from_code, to_code, dominant_evidence)`).
+#'
+#' @keywords internal
+.apply_color_dominant_override <- function(subordem, pedon,
+                                              ordem_code, rules) {
+  if (is.null(subordem)) {
+    return(list(subordem = NULL, override = NULL))
+  }
+  if (!ordem_code %in% names(.SIBCS_DOMINANT_TO_SUBORDEM)) {
+    return(list(subordem = subordem, override = NULL))
+  }
+  dom <- .dominant_b_color_subordem(pedon, ordem_code)
+  if (is.na(dom$code) || identical(dom$code, subordem$code)) {
+    return(list(subordem = subordem, override = NULL))
+  }
+  ord_block <- rules$subordens[[ordem_code]]
+  if (is.null(ord_block)) {
+    return(list(subordem = subordem, override = NULL))
+  }
+  match_idx <- vapply(ord_block,
+                       function(s) identical(s$code, dom$code),
+                       logical(1))
+  if (!any(match_idx)) {
+    return(list(subordem = subordem, override = NULL))
+  }
+  new_sub <- ord_block[[which(match_idx)[1L]]]
+  list(
+    subordem = new_sub,
+    override = list(
+      from_code         = subordem$code,
+      from_name         = subordem$name,
+      to_code           = new_sub$code,
+      to_name           = new_sub$name,
+      dominant_evidence = dom$evidence,
+      reason            = sprintf(
+        paste0("Subordem trocada de '%s' para '%s' pela regra ",
+                "dominante-de-cor em B (categoria %s, espessura ",
+                "%.0f cm de %d horizonte(s) classificado(s)/",
+                "%d horizonte(s) B)."),
+        subordem$name, new_sub$name,
+        dom$evidence$dominant,
+        sum(dom$evidence$thickness_by_category, na.rm = TRUE),
+        dom$evidence$n_classified, dom$evidence$n_b_layers
+      )
+    )
+  )
+}
diff --git a/R/vlm-extract.R b/R/vlm-extract.R
index 6153eb7b6..add7e4c87 100644
--- a/R/vlm-extract.R
+++ b/R/vlm-extract.R
@@ -337,9 +337,25 @@ apply_site_extraction <- function(pedon, parsed, overwrite = FALSE) {
 #'        validation failure. Default 3.
 #' @param overwrite If \code{TRUE}, lower-authority values are allowed
 #'        to clobber higher-authority ones. Default \code{FALSE}.
-#' @param prompt_name Override the default prompt template
-#'        (\code{"extract_horizons"}).
+#' @param prompt_name Override the default prompt template. When
+#'        \code{NULL} (default), resolved via \code{use_fewshot}:
+#'        \code{TRUE} -> \code{"extract_horizons_fewshot"};
+#'        \code{FALSE} -> \code{"extract_horizons"}.
 #' @param schema_name Override the default schema (\code{"horizon"}).
+#' @param use_fewshot Logical, default \code{TRUE}. When \code{TRUE},
+#'        uses the v0.9.68 few-shot prompt with two worked examples
+#'        embedded; this dramatically improves JSON-shape compliance
+#'        on smaller models (Gemma 4 e2b / e4b). Set \code{FALSE} to
+#'        revert to the bare-instructions prompt. Ignored when
+#'        \code{prompt_name} is set explicitly.
+#' @param use_structured Logical, default \code{FALSE} (v0.9.70+).
+#'        When \code{TRUE} and the provider exposes
+#'        \code{chat_structured()} (Anthropic / OpenAI / Ollama 0.5+ /
+#'        Gemini), the validate-and-retry loop is replaced by a
+#'        single structured call that returns a schema-validated R
+#'        list directly -- removing JSON-shape errors at the protocol
+#'        level. Falls back to the legacy retry loop when the
+#'        provider has no \code{chat_structured} method.
 #' @return Invisibly, the (mutated) \code{pedon}. Carries a
 #'         \code{"vlm_extraction"} attribute with the parsed response,
 #'         number of attempts, and number of provenance entries added.
@@ -349,9 +365,16 @@ extract_horizons_from_pdf <- function(pedon,
                                        provider,
                                        max_retries = 3L,
                                        overwrite   = FALSE,
-                                       prompt_name = "extract_horizons",
+                                       prompt_name = NULL,
                                        schema_name = "horizon",
-                                       pdf_text    = NULL) {
+                                       pdf_text    = NULL,
+                                       use_fewshot = TRUE,
+                                       use_structured = FALSE) {
+
+  if (is.null(prompt_name)) {
+    prompt_name <- if (isTRUE(use_fewshot)) "extract_horizons_fewshot"
+                   else                      "extract_horizons"
+  }
 
   if (!inherits(pedon, "PedonRecord")) {
     rlang::abort("`pedon` must be a PedonRecord")
@@ -386,18 +409,31 @@ extract_horizons_from_pdf <- function(pedon,
   total_attempts <- 0L
   parsed_list    <- vector("list", length(chunks))
 
+  # v0.9.71: per-chunk cli progress bar (no-op for single-chunk PDFs).
+  use_progress <- length(chunks) > 1L &&
+                     requireNamespace("cli", quietly = TRUE)
+  if (isTRUE(use_progress)) {
+    cli::cli_progress_bar(
+      total  = length(chunks),
+      format = "Extracting horizons {cli::pb_current}/{cli::pb_total} [{cli::pb_bar}] {cli::pb_eta}"
+    )
+  }
+
   for (i in seq_along(chunks)) {
     rendered <- load_prompt(prompt_name, vars = list(
       schema_json   = schema_json,
       document_text = chunks[[i]]
     ))
     res <- validate_or_retry(provider, rendered, schema_name,
-                              max_retries = max_retries)
+                              max_retries = max_retries,
+                              use_structured = use_structured)
     parsed_list[[i]] <- res$data
     total_attempts   <- total_attempts + res$attempts
     total_added      <- total_added +
       apply_horizons_extraction(pedon, res$data, overwrite = overwrite)
+    if (isTRUE(use_progress)) cli::cli_progress_update()
   }
+  if (isTRUE(use_progress)) cli::cli_progress_done()
 
   # Record document provenance.
   if (is.null(pedon$documents)) pedon$documents <- list()
@@ -444,8 +480,10 @@ extract_munsell_from_photo <- function(pedon,
                                         provider,
                                         max_retries = 3L,
                                         overwrite   = FALSE,
-                                        prompt_name = "extract_munsell_from_photo",
-                                        schema_name = "horizon") {
+                                        prompt_name = NULL,
+                                        schema_name = "horizon",
+                                        use_fewshot = TRUE,
+                                        use_structured = FALSE) {
 
   if (!inherits(pedon, "PedonRecord")) {
     rlang::abort("`pedon` must be a PedonRecord")
@@ -453,6 +491,10 @@ extract_munsell_from_photo <- function(pedon,
   if (!file.exists(image_path)) {
     rlang::abort(sprintf("Image not found: %s", image_path))
   }
+  if (is.null(prompt_name)) {
+    prompt_name <- if (isTRUE(use_fewshot)) "extract_munsell_from_photo_fewshot"
+                   else                      "extract_munsell_from_photo"
+  }
 
   schema_json <- load_schema(schema_name)
   rendered <- load_prompt(prompt_name, vars = list(schema_json = schema_json))
@@ -468,7 +510,8 @@ extract_munsell_from_photo <- function(pedon,
 
   res <- validate_or_retry(provider, rendered, schema_name,
                             max_retries = max_retries,
-                            image       = image_content)
+                            image       = image_content,
+                            use_structured = use_structured)
 
   # Drop any quantitative non-color attributes the model may have
   # extracted; only Munsell entries should win provenance.
@@ -520,8 +563,15 @@ extract_site_from_fieldsheet <- function(pedon,
                                           max_retries = 3L,
                                           overwrite   = FALSE,
                                           prompt_name = "extract_site_metadata",
-                                          schema_name = "site") {
-
+                                          schema_name = "site",
+                                          use_fewshot = TRUE,
+                                          use_structured = FALSE) {
+
+  # NOTE: extract_site_from_fieldsheet uses an *image* prompt; the
+  # few-shot variant for image-mode site extraction is left as the
+  # default-prompt-name behaviour. The text-mode site path
+  # (.run_one_extraction in benchmark-vlm-extraction.R) uses
+  # `extract_site_from_text_fewshot` when use_fewshot = TRUE.
   if (!inherits(pedon, "PedonRecord")) {
     rlang::abort("`pedon` must be a PedonRecord")
   }
@@ -543,7 +593,8 @@ extract_site_from_fieldsheet <- function(pedon,
 
   res <- validate_or_retry(provider, rendered, schema_name,
                             max_retries = max_retries,
-                            image       = image_content)
+                            image       = image_content,
+                            use_structured = use_structured)
 
   added <- apply_site_extraction(pedon, res$data, overwrite = overwrite)
 
diff --git a/R/vlm-prompts.R b/R/vlm-prompts.R
index ce5ef297e..63079327e 100644
--- a/R/vlm-prompts.R
+++ b/R/vlm-prompts.R
@@ -61,3 +61,91 @@ load_prompt <- function(name, vars = list()) {
   }
   rendered
 }
+
+
+#' Persona system-prompt for the soilKey "Pedometrist Agent"
+#'
+#' Returns the canonical system prompt installed into every
+#' agent_app() chat session in v0.9.65+. The persona makes the LLM
+#' (typically a local Gemma 4 via Ollama) behave as an experienced
+#' pedologist who:
+#'
+#' - extracts structured data from photos, PDFs and field reports
+#'   into the soilKey JSON schemas;
+#' - NEVER classifies the soil itself (the deterministic taxonomic key
+#'   in soilKey is the only thing that emits a class name);
+#' - explains decisions in the user's chosen language (PT-BR by
+#'   default; falls back to English when asked);
+#' - flags ambiguity explicitly via `confidence` and `source_quote`
+#'   fields in every extracted attribute.
+#'
+#' @param language One of `"pt-BR"` (default) or `"en"`. Determines
+#'   the language the persona uses when discussing reasoning,
+#'   ambiguity and missing attributes.
+#'
+#' @return Character scalar suitable for passing as `system_prompt`
+#'   to [vlm_provider()] (which forwards it to `ellmer::chat_*`).
+#'
+#' @examples
+#' p <- pedologist_system_prompt("pt-BR")
+#' substring(p, 1L, 80L)
+#' @export
+pedologist_system_prompt <- function(language = c("pt-BR", "en")) {
+  language <- match.arg(language)
+  pt_br <- paste0(
+    "Voce e um agente pedometrista experiente, treinado em pedologia ",
+    "brasileira (SiBCS 5a edicao), pedologia internacional (WRB 2022) e ",
+    "pedologia norte-americana (USDA Soil Taxonomy 13a edicao).\n\n",
+    "Sua unica tarefa neste sistema soilKey e EXTRAIR DADOS ESTRUTURADOS ",
+    "(JSON validado por schema) a partir de fotos de perfis, fichas de ",
+    "campo, relatorios PDF e tabelas. Voce NUNCA classifica o solo: a ",
+    "classificacao e feita por uma chave taxonomica deterministica em ",
+    "R, baseada em regras YAML versionadas. Sua extracao alimenta essa ",
+    "chave.\n\n",
+    "Regras de extracao:\n",
+    " 1. Reporte SO o que voce observa diretamente. Nao invente valores.\n",
+    " 2. Cada atributo deve vir com 'value', 'confidence' (0 a 1) e ",
+    "'source_quote' (a frase ou regiao da imagem que justifica o valor).\n",
+    " 3. Quando incerto, use confidence baixa e explique a duvida.\n",
+    " 4. Cores Munsell: relate matiz/valor/croma exatamente como no ",
+    "padrao (e.g. '5YR 4/6'); se a foto nao tem placa Munsell de ",
+    "referencia, marque confidence <= 0.5.\n",
+    " 5. Profundidades em centimetros (top_cm / bottom_cm).\n",
+    " 6. Unidades quimicas: pH em H2O sem unidade; CTC em cmol_c/kg; ",
+    "saturacoes em %; carbono organico em %.\n",
+    " 7. Se um campo nao aparece no documento, OMITA-O do JSON. Nao use ",
+    "null nem 'desconhecido'.\n",
+    " 8. Saida sempre em JSON puro -- sem markdown, sem comentarios.\n\n",
+    "Quando o usuario pedir explicacoes (modo conversa, fora do fluxo de ",
+    "extracao), responda em portugues brasileiro, claro e tecnico, ",
+    "citando paginas/capitulos do SiBCS, WRB ou KST quando aplicavel."
+  )
+  en <- paste0(
+    "You are an experienced pedometrist agent, trained in Brazilian ",
+    "pedology (SiBCS 5th ed.), international pedology (WRB 2022) and ",
+    "U.S. pedology (USDA Soil Taxonomy 13th ed.).\n\n",
+    "Your single role inside soilKey is to EXTRACT SCHEMA-VALIDATED ",
+    "STRUCTURED DATA (JSON) from profile photos, field sheets, PDF ",
+    "reports and tables. You NEVER classify the soil: classification is ",
+    "performed by a deterministic taxonomic key in R, driven by ",
+    "versioned YAML rules. Your extraction feeds that key.\n\n",
+    "Extraction rules:\n",
+    " 1. Report ONLY what you directly observe. Do not invent values.\n",
+    " 2. Each attribute must carry 'value', 'confidence' (0 to 1) and ",
+    "'source_quote' (the sentence or image region that justifies it).\n",
+    " 3. When uncertain, use a low confidence and explain the doubt.\n",
+    " 4. Munsell colors: report hue/value/chroma exactly per standard ",
+    "(e.g. '5YR 4/6'); if the photo lacks a Munsell reference card, ",
+    "set confidence <= 0.5.\n",
+    " 5. Depths in centimetres (top_cm / bottom_cm).\n",
+    " 6. Chemistry units: pH in H2O unitless; CEC in cmol_c/kg; base ",
+    "saturations in %; organic carbon in %.\n",
+    " 7. If a field is absent from the document, OMIT it from the JSON. ",
+    "Do not use null or 'unknown'.\n",
+    " 8. Output is always pure JSON -- no markdown fences, no comments.\n\n",
+    "When the user asks for explanations (chat mode, outside the ",
+    "extraction flow), answer in clear, technical English, citing ",
+    "SiBCS / WRB / KST pages and chapters where applicable."
+  )
+  if (identical(language, "pt-BR")) pt_br else en
+}
diff --git a/R/vlm-providers.R b/R/vlm-providers.R
index 3f107af99..e20fbca01 100644
--- a/R/vlm-providers.R
+++ b/R/vlm-providers.R
@@ -30,11 +30,14 @@
 #'   \item \code{openai = "gpt-4o"} -- text + vision.
 #'   \item \code{google = "gemini-2.0-pro"} -- successor to 1.5
 #'         with longer context + better multimodal grounding.
-#'   \item \code{ollama = "gemma4:e4b"} -- Gemma 4 edge
-#'         multimodal (text + image; audio also). For larger
-#'         contexts use \code{"gemma4:31b"}; for cloud-only
-#'         offload via Ollama, \code{"gemma4-cloud:31b"}. Pull the
-#'         desired size first with \code{ollama pull gemma4:e4b}.
+#'   \item \code{ollama = "gemma4:e2b"} -- v0.9.64 default. Gemma 4
+#'         edge 2B (~6.7 GB on disk; multimodal builds bundle a
+#'         vision encoder that adds ~5 GB to the bare parameter
+#'         weights), runs on a laptop CPU. Larger options:
+#'         \code{"gemma4:e4b"} (~8 GB, better accuracy on PT-BR field
+#'         sheets), \code{"gemma4:31b"} (~19 GB, frontier dense,
+#'         requires GPU). One-shot bootstrap:
+#'         \code{\link{setup_local_vlm}("light"|"balanced"|"best")}.
 #' }
 #'
 #' Users can override at any time:
@@ -55,7 +58,7 @@ default_model <- function(name) {
     anthropic = "claude-sonnet-4-7",
     openai    = "gpt-4o",
     google    = "gemini-2.0-pro",
-    ollama    = "gemma4:e4b"
+    ollama    = "gemma4:e2b"
   )
 }
 
@@ -75,19 +78,25 @@ default_model <- function(name) {
 #'
 #' @section Local-first option:
 #' Passing \code{name = "ollama"} runs every extraction locally via
-#' an Ollama server (default \code{gemma4:e4b}, Gemma 4 edge with
-#' multimodal text+image+audio support). No data leaves the
-#' machine, which is the recommended setting for sensitive field
-#' descriptions (e.g. governmental surveys, indigenous land studies)
-#' where institutional independence and data sovereignty matter.
-#' Pull the model first:
+#' an Ollama server (default \code{gemma4:e2b}, Gemma 4 edge 2B,
+#' multimodal text+image, ~6.7 GB on disk -- the multimodal build
+#' bundles the vision encoder, which adds ~5 GB to the bare
+#' parameter weights). No data leaves the machine, which is the
+#' recommended setting for sensitive field descriptions (e.g.
+#' governmental surveys, indigenous land studies) where institutional
+#' independence and data sovereignty matter.
+#'
+#' One-shot setup (v0.9.64+):
+#' \preformatted{
+#'   setup_local_vlm()              # "balanced" -> gemma4:e4b, ~3 GB
+#'   setup_local_vlm("light")       # gemma4:e2b, ~1.5 GB (laptop OK)
+#'   setup_local_vlm("best")        # gemma4:31b, ~19 GB (workstation)
+#' }
+#' or manually:
 #' \preformatted{
-#'   ollama pull gemma4:e4b      # ~3 GB edge variant (default)
-#'   ollama pull gemma4:31b      # frontier dense variant
-#'   ollama pull gemma3:27b      # earlier generation, still solid
+#'   ollama pull gemma4:e2b
+#'   ollama serve
 #' }
-#' Then start an Ollama server (\code{ollama serve}) and the chat
-#' object returned here will dispatch over HTTP locally.
 #'
 #' @param name Provider name. One of \code{"anthropic"} (Claude),
 #'        \code{"openai"} (GPT-4o family), \code{"google"} (Gemini),
diff --git a/R/vlm-types.R b/R/vlm-types.R
new file mode 100644
index 000000000..e67c886ca
--- /dev/null
+++ b/R/vlm-types.R
@@ -0,0 +1,101 @@
+# =============================================================================
+# v0.9.70 -- ellmer structured-output bridge.
+#
+# When a provider supports structured outputs (Anthropic tool calls,
+# OpenAI response_format = json_schema, Ollama 0.5+ format = json_schema,
+# Gemini structured output), we can ask the model to emit JSON that is
+# *guaranteed* to validate against our soilKey schemas. That removes
+# the entire class of "model returned prose / wrong shape" errors that
+# the v0.9.66/v0.9.68 retry loop was working around.
+#
+# ellmer 0.3+ exposes a uniform API for this:
+#
+#   chat$chat_structured(prompt, type = type_object(...))
+#
+# `type_from_schema()` (also ellmer) reads a JSON schema file and
+# returns the matching ellmer type tree. This lets us reuse our
+# existing inst/schemas/*.json verbatim.
+# =============================================================================
+
+
+#' ellmer type tree for a soilKey extraction schema
+#'
+#' Reads `inst/schemas/<name>.json` and converts it to an ellmer
+#' `type_object()` via `ellmer::type_from_schema()`. Cached per call
+#' (lightweight; the schema files are < 5 KB each).
+#'
+#' Used by [validate_or_retry()] when `use_structured = TRUE`: instead
+#' of calling `provider$chat()` and parsing JSON manually, the
+#' provider gets called via `chat_structured(prompt, type = <this>)`
+#' and returns an R list whose shape is provider-validated.
+#'
+#' @param name Schema base name -- one of `"horizon"`, `"site"`,
+#'   `"pedon-schema"`. Without `.json`.
+#'
+#' @return An ellmer type object (class inheriting from
+#'   `ellmer::Type`).
+#'
+#' @examples
+#' \dontrun{
+#' if (requireNamespace("ellmer", quietly = TRUE)) {
+#'   t <- vlm_type_from_soilkey_schema("horizon")
+#'   t  # prints the type tree
+#' }
+#' }
+#' @seealso [validate_or_retry()] (which uses this when `use_structured = TRUE`),
+#'   [`ellmer::type_from_schema`].
+#' @export
+vlm_type_from_soilkey_schema <- function(name) {
+  if (!is.character(name) || length(name) != 1L || !nzchar(name)) {
+    rlang::abort("'name' must be a non-empty character scalar.")
+  }
+  if (!requireNamespace("ellmer", quietly = TRUE)) {
+    rlang::abort(paste0(
+      "ellmer is required for structured-output extraction. Install ",
+      "with install.packages('ellmer')."
+    ))
+  }
+  if (!exists("type_from_schema", envir = asNamespace("ellmer"),
+                inherits = FALSE)) {
+    rlang::abort(paste0(
+      "Your ellmer version does not export type_from_schema(). ",
+      "Install ellmer >= 0.3.0 with ",
+      "remotes::install_github('tidyverse/ellmer')."
+    ))
+  }
+  schema_file <- system.file("schemas", paste0(name, ".json"),
+                                package = "soilKey")
+  if (!nzchar(schema_file) || !file.exists(schema_file)) {
+    schema_file <- file.path("inst", "schemas", paste0(name, ".json"))
+  }
+  if (!file.exists(schema_file)) {
+    rlang::abort(sprintf("Schema not found: %s.json", name))
+  }
+  ellmer::type_from_schema(path = schema_file)
+}
+
+
+#' Does a provider support `chat_structured()`?
+#'
+#' Quick capability probe. Returns TRUE when the provider exposes a
+#' `chat_structured` method (ellmer Chat object built for an LLM that
+#' supports structured outputs). Used internally by
+#' [validate_or_retry()] to decide whether the structured-output path
+#' is available.
+#'
+#' Mock providers and any non-ellmer chat objects return FALSE here,
+#' so the structured-output flag degrades gracefully to the legacy
+#' chat-and-validate loop.
+#'
+#' @param provider The provider to probe.
+#' @return Logical scalar.
+#' @keywords internal
+.provider_supports_structured <- function(provider) {
+  if (is.null(provider)) return(FALSE)
+  # ellmer Chat objects expose chat_structured as a method via R6.
+  has_method <- tryCatch(
+    is.function(provider$chat_structured),
+    error = function(e) FALSE
+  )
+  isTRUE(has_method)
+}
diff --git a/R/vlm-validate.R b/R/vlm-validate.R
index 0f43335cc..4d618deb0 100644
--- a/R/vlm-validate.R
+++ b/R/vlm-validate.R
@@ -68,14 +68,25 @@ strip_code_fence <- function(text) {
 #' @param image Optional \code{ellmer} image content object (e.g.
 #'        from \code{ellmer::content_image_file}) to pass alongside
 #'        the prompt for multimodal calls.
+#' @param use_structured Logical (default \code{FALSE}). When TRUE
+#'        and the provider supports \code{chat_structured()}
+#'        (Anthropic / OpenAI / Ollama 0.5+ / Gemini), skips the
+#'        chat-and-parse loop entirely: the provider receives the
+#'        ellmer type tree built from \code{inst/schemas/<schema>.json}
+#'        and returns a structurally-validated R list. Falls back to
+#'        the legacy retry loop when the provider has no
+#'        \code{chat_structured} method.
 #' @return A list with elements \code{data} (parsed R object),
-#'         \code{raw} (character scalar), \code{attempts} (integer).
+#'         \code{raw} (character scalar; NA when structured path was
+#'         used), \code{attempts} (integer), and (only when the
+#'         structured path fired) \code{used_structured = TRUE}.
 #' @keywords internal
 validate_or_retry <- function(provider,
                                 prompt,
                                 schema,
                                 max_retries = 3L,
-                                image       = NULL) {
+                                image       = NULL,
+                                use_structured = FALSE) {
 
   if (!requireNamespace("jsonlite", quietly = TRUE)) {
     rlang::abort("Package 'jsonlite' is required for VLM extraction.")
@@ -88,6 +99,34 @@ validate_or_retry <- function(provider,
     ))
   }
 
+  # ---- v0.9.70: structured-output fast path ------------------------------
+  # When the caller asks for it AND the provider supports it (ellmer Chat
+  # with chat_structured), skip the chat -> JSON parse -> schema validate
+  # loop entirely: chat_structured() returns a parsed R list whose shape
+  # is provider-validated against the type tree we hand it.
+  if (isTRUE(use_structured) && .provider_supports_structured(provider)) {
+    type_tree <- tryCatch(
+      vlm_type_from_soilkey_schema(schema),
+      error = function(e) NULL
+    )
+    if (!is.null(type_tree)) {
+      data <- tryCatch({
+        if (is.null(image)) {
+          provider$chat_structured(prompt, type = type_tree)
+        } else {
+          provider$chat_structured(prompt, image, type = type_tree)
+        }
+      }, error = function(e) {
+        rlang::abort(sprintf(
+          "VLM structured call failed: %s. Set use_structured = FALSE ",
+          conditionMessage(e)
+        ))
+      })
+      return(list(data = data, raw = NA_character_, attempts = 1L,
+                    used_structured = TRUE))
+    }
+  }
+
   current_prompt <- prompt
   last_error     <- NULL
   attempts       <- 0L
diff --git a/R/zzz.R b/R/zzz.R
index 26ee1453b..c1f0c4130 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -11,6 +11,95 @@
 }
 
 
+# v0.9.66 -- .onAttach() suggests / triggers local Gemma setup.
+#
+# Behaviour matrix (interactive sessions only; non-interactive R is
+# always silent):
+#
+#   Ollama not installed             -> silent (no startup spam)
+#   Ollama installed but stopped     -> silent (avoid fork on attach)
+#   Daemon running, model present    -> brief "ready" hint
+#   Daemon running, model missing    -> hint to call setup_local_vlm("light")
+#                                       OR auto-pull when the user opted in
+#                                       via options(soilKey.auto_setup_vlm = TRUE).
+#
+# Suppress everything with options(soilKey.suggest_local_vlm = FALSE).
+#
+# CRAN-compliance: never auto-modifies the user's machine without an
+# explicit options() opt-in (CRAN Repository Policy 1.1 forbids
+# packages writing to the system on attach).
+.onAttach <- function(libname, pkgname) {
+  if (!interactive()) return(invisible())
+  if (isFALSE(getOption("soilKey.suggest_local_vlm", default = TRUE))) {
+    return(invisible())
+  }
+  msg <- .suggest_local_vlm_message(target_model = "gemma4:e2b")
+  if (nzchar(msg)) packageStartupMessage(msg)
+
+  # Opt-in auto-pull. Default OFF; user enables via either:
+  #   options(soilKey.auto_setup_vlm = TRUE)
+  #   Sys.setenv(SOILKEY_AUTO_SETUP_VLM = "1")
+  auto <- isTRUE(getOption("soilKey.auto_setup_vlm",
+                              default = identical(Sys.getenv("SOILKEY_AUTO_SETUP_VLM"),
+                                                    "1")))
+  if (!auto) return(invisible())
+  if (!ollama_is_installed() || !ollama_is_running()) return(invisible())
+  models <- tryCatch(ollama_list_local_models(), error = function(e) character(0))
+  if ("gemma4:e2b" %in% models) return(invisible())
+  packageStartupMessage(
+    "soilKey: auto_setup_vlm = TRUE -- pulling gemma4:e2b in background..."
+  )
+  # Background pull so package attach is not blocked. The user sees
+  # progress next time they open `ollama ps` or call setup_local_vlm().
+  tryCatch(
+    system2("ollama", args = c("pull", "gemma4:e2b"),
+              stdout = FALSE, stderr = FALSE, wait = FALSE),
+    error = function(e) invisible()
+  )
+}
+
+
+#' Build the local-VLM suggestion shown by .onAttach
+#'
+#' Pure function (no side effects). Returns the multi-line string
+#' that .onAttach would print, given the current Ollama state.
+#' Factored out for testability: the unit tests exercise this with
+#' stubbed inputs instead of touching the real Ollama daemon.
+#'
+#' @param target_model Ollama model identifier soilKey wants to see.
+#' @return Character scalar -- the message body, or `""` when no
+#'   message is appropriate (e.g. Ollama not installed at all -- no
+#'   point nagging the user).
+#' @keywords internal
+.suggest_local_vlm_message <- function(target_model = "gemma4:e2b") {
+  if (!ollama_is_installed()) return("")  # silent: avoid uninstalled-tool nagging
+  if (!ollama_is_running()) {
+    return(paste0(
+      "soilKey: Ollama installed but daemon stopped. Run ",
+      "`soilKey::setup_local_vlm(\"light\")` (or `ollama serve`) to ",
+      "enable the local VLM agent. Suppress with ",
+      "options(soilKey.suggest_local_vlm = FALSE)."
+    ))
+  }
+  models <- tryCatch(ollama_list_local_models(),
+                       error = function(e) character(0))
+  if (target_model %in% models) {
+    return(paste0(
+      "soilKey: local VLM ready (", target_model,
+      "). Launch the agent with `soilKey::run_agent_app()`."
+    ))
+  }
+  paste0(
+    "soilKey: Ollama detected, but `", target_model, "` (~6.7 GB on ",
+    "disk) is not yet pulled. Run ",
+    "`soilKey::setup_local_vlm(\"light\")` once to enable the local ",
+    "VLM agent, or set options(soilKey.auto_setup_vlm = TRUE) to let ",
+    "soilKey pull it next time the package is attached. Suppress this ",
+    "hint with options(soilKey.suggest_local_vlm = FALSE)."
+  )
+}
+
+
 #' Auto-detect PROJ_LIB and GDAL_DATA directories
 #'
 #' Probes the common system locations for PROJ \code{proj.db} and
diff --git a/README.md b/README.md
index 6cd0ceafb..a716327ce 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 # soilKey <img src="man/figures/logo.png" align="right" height="160" alt="soilKey hex sticker — a key over a stratified soil profile, with a sapling emerging from the top and a decision-tree circuit on the right" />
 
 [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg?style=flat-square)](https://lifecycle.r-lib.org/articles/stages.html)
-![v0.9.40](https://img.shields.io/badge/version-0.9.40-FF6B35?style=flat-square)
+![v0.9.71](https://img.shields.io/badge/version-0.9.71-FF6B35?style=flat-square)
 
 > **Automated soil profile classification under WRB 2022 (4th ed.), USDA Soil Taxonomy (13th ed.), and the Brazilian SiBCS (5ª edição).** All three systems wired end-to-end down to the deepest categorical level. Multimodal extraction, spatial priors, OSSL spectroscopy and explicit per-attribute provenance — without ever delegating the taxonomic key to a language model.
 
@@ -12,7 +12,7 @@
 [![CRAN status](https://img.shields.io/badge/CRAN-pending-yellow.svg?style=flat-square)](https://CRAN.R-project.org/package=soilKey)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.19930112.svg)](https://doi.org/10.5281/zenodo.19930112)
 [![R-CMD-check](https://github.com/HugoMachadoRodrigues/soilKey/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/HugoMachadoRodrigues/soilKey/actions/workflows/R-CMD-check.yaml)
-[![tests](https://img.shields.io/badge/tests-3137%20passing-brightgreen.svg?style=flat-square)](tests/)
+[![tests](https://img.shields.io/badge/tests-3888%20passing-brightgreen.svg?style=flat-square)](tests/)
 [![coverage](https://img.shields.io/badge/coverage-80.5%25-brightgreen.svg?style=flat-square)](https://app.codecov.io/gh/HugoMachadoRodrigues/soilKey?branch=main)
 [![WRB 2022](https://img.shields.io/badge/WRB%202022-32%2F32%20RSGs-blue.svg?style=flat-square)](#-coverage)
 [![SiBCS 5](https://img.shields.io/badge/SiBCS%205-13%2F13%20ordens%20%C2%B7%20938%20SGs-blue.svg?style=flat-square)](#-coverage)
@@ -51,6 +51,148 @@ WRB delivers the **complete Chapter 6 name** — four principal qualifiers + fiv
 
 ---
 
+## ✦ What's new in v0.9.71 (2026-05-06) — **Phase 2 done: production-ready VLM stack**
+
+Bundles three coherent improvements that together close out the Phase-2 roadmap:
+
+| | |
+|---|---|
+| **(A) 8 BDsolos hard fixtures** | Generated via `make_synthetic_horizons_fixture()` from RJ pedons selected by SiBCS Ordem (Argissolo, Cambissolo, Chernossolo, Espodossolo, Gleissolo, Latossolo, Neossolo, Planossolo). Each is a real BDsolos pedon's full horizon table rendered as Markdown. Located in `inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_*`. |
+| **(B) ellmer `chat_structured()` bridge** | New `vlm_type_from_soilkey_schema(name)` wraps `ellmer::type_from_schema()` reading `inst/schemas/<name>.json`. `validate_or_retry(..., use_structured = TRUE)` short-circuits the chat-and-parse-and-retry loop when the provider supports it (Anthropic tool calls / OpenAI `response_format = json_schema` / Ollama 0.5+ `format = json_schema` / Gemini structured output). Removes the entire class of "model returned prose / wrong shape" failures at the protocol level. |
+| **(C) Production polish** | `extract_horizons_from_pdf()` now shows a `cli::cli_progress_bar()` for multi-chunk PDFs. `agent_app()` sidebar adds "Estratégia de extração" with checkboxes for `use_fewshot` and `use_structured`, both wired through to every `extract_*()` call. Model preset labels corrected to v0.9.67 measured sizes (`light` = ~6.7 GB, `balanced` = ~8 GB, `best` = ~19 GB). |
+
+```r
+# A: stress-test gemma4:e2b on real BDsolos pedons (8 fixtures × 3 reps × ~30 s = 12 min)
+benchmark_vlm_extraction(
+  providers   = list(gemma_e2b = list(name = "ollama", model = "gemma4:e2b")),
+  tasks       = "horizons",
+  use_fewshot = TRUE,
+  n_repeats   = 3L
+)$summary
+
+# B: turn on protocol-level schema validation (Anthropic / OpenAI / Ollama 0.5+)
+extract_horizons_from_pdf(pedon, pdf_text = txt,
+                            provider = vlm_provider("anthropic"),
+                            use_structured = TRUE)
+
+# C: agent_app exposes both flags as sidebar checkboxes
+run_agent_app()
+```
+
+20 new tests covering the type builder, capability probe, structured fast path, fallback path, and parameter propagation through the extractor family. Total: **3 888 passing** / 0 failing / 21 skipped.
+
+---
+
+## ✦ What's new in v0.9.68 (2026-05-06) — **Phase 2: few-shot demonstrations**
+
+Schema-correct worked-example prompts for the three extraction tasks (horizons, site-from-text, Munsell-from-photo), opt-in `use_fewshot = TRUE` parameter on every extractor, `n_repeats` parameter on the benchmark for proper variance characterisation, plus a new harder bundled fixture (multi-horizon Chernossolo BA with PT-BR comma decimals, mixed Munsell úmida/seca, CaCO3 equivalents).
+
+```r
+# Few-shot on by default in v0.9.68:
+extract_horizons_from_pdf(pedon, pdf_text = txt, provider = vlm_provider("ollama"))
+
+# A/B against the bare-instructions baseline:
+benchmark_vlm_extraction(
+  providers = list(gemma_e2b = list(name = "ollama", model = "gemma4:e2b")),
+  tasks       = "horizons",
+  use_fewshot = FALSE,   # baseline
+  n_repeats   = 3L       # capture stochastic variance
+)
+```
+
+| | |
+|---|---|
+| **Few-shot prompts** | `inst/prompts/extract_horizons_fewshot.md` + `extract_site_from_text_fewshot.md` + `extract_munsell_from_photo_fewshot.md` — 2 worked examples each, in the **schema-correct mixed shape** (`top_cm`/`bottom_cm`/`designation`/`id` raw; `munsell_moist` single wrapper holding hue+value+chroma; everything else wrapped `{value, confidence, source_quote}`). |
+| **`use_fewshot` parameter** | Opt-in on `extract_horizons_from_pdf()`, `extract_munsell_from_photo()`, and `benchmark_vlm_extraction()`. Default `TRUE` in v0.9.68. |
+| **`n_repeats` parameter** | New on `benchmark_vlm_extraction()`. Runs each fixture N times; summary reports `metric_*_mean` AND `metric_*_sd` per (provider × task). Required to distinguish stochastic LLM noise from real lift. |
+| **Harder fixture** | `perfil_BA_chernossolo_messy` — 4-horizon Chernossolo Argilúvico Carbonático from a real Bahia survey: PT-BR comma decimal `pH = 5,4`, UTM-noted-then-converted coords, mixed Munsell úmida/seca, CaCO3 equivalents. Smoke-tested at v0.9.68: **precision = 1.00, recall = 1.00, attr_match = 0.79** with `gemma4:e2b` + few-shot. |
+
+### Honest measurement finding
+
+Few-shot **doesn't move the needle on simple fixtures** because vanilla `gemma4:e2b` already nails them — the 50% ok-rate I'd reported in v0.9.66 was stochastic variance, not a real failure mode (which is exactly what `n_repeats` now exposes). Few-shot **doesn't regress quality either** — and the harder `Chernossolo BA` fixture confirms Gemma 4 e2b handles non-toy PT-BR profiles cleanly. Real lift will surface from (a) more `n_repeats`, (b) harder fixtures from BDsolos/FEBR via `make_synthetic_horizons_fixture()`, or (c) smaller future Gemma builds where the persona prompt isn't enough on its own.
+
+---
+
+## ✦ What's new in v0.9.66 (2026-05-06) — **Phase 1: VLM extraction benchmark**
+
+A measurable baseline for the local Gemma 4 stack — the input we needed before deciding whether to invest in few-shot demos (Phase 2) or LoRA fine-tuning (Phase 3).
+
+```r
+# Compare local Gemma 4 vs cloud reference:
+bench <- benchmark_vlm_extraction(
+  providers = list(
+    gemma_e2b = list(name = "ollama", model = "gemma4:e2b"),
+    claude    = list(name = "anthropic")
+  ),
+  tasks = c("horizons", "site")
+)
+bench$summary
+```
+
+| | |
+|---|---|
+| **`benchmark_vlm_extraction()`** | Provider-agnostic harness over 3 tasks (`horizons` / `site` / `munsell`) × per-task metrics (precision+recall+attr-match / IoU+value-accuracy / CIE ΔE 2000). Returns long-format `predictions` + per-(provider × task) `summary` table. Accepts `MockVLMProvider` for unit tests. |
+| **`make_synthetic_horizons_fixture()`** | Renders any `PedonRecord` back into a Markdown profile description and emits the structured horizons as the golden answer — lets you scale the horizons fixture set from BDsolos / FEBR / KSSL data. |
+| **Bundled fixtures** | `inst/fixtures/vlm_extraction/{horizons,site,munsell}/` ships 4 paired text fixtures (Argissolo RJ + Latossolo MG profile descriptions; ficha de campo RJ + MG). Munsell tab waits for user-supplied photos (CRAN size + licence policy). |
+| **`.onAttach()` opt-in** | Prints a one-line hint suggesting `setup_local_vlm("light")` when Ollama is detected but `gemma4:e2b` is missing. Auto-pull only with `options(soilKey.auto_setup_vlm = TRUE)` (CRAN-compliant: never modifies system without explicit consent). |
+| **Persona text-mode prompt** | New `inst/prompts/extract_site_from_text.md` companion to the image-mode site prompt. Required because the original prompt explicitly says "Supplied as an image content block" and Gemma returns all-null when fed text. |
+
+### Baseline measured on this laptop (gemma4 8B, M1)
+
+| Task | Fixture | precision / IoU | recall / value-acc | attr-match |
+|------|---------|-----------------|--------------------|-----------|
+| `horizons` | Latossolo MG | **1.00** | **1.00** | **1.00** |
+| `horizons` | Argissolo RJ | **1.00** | **1.00** | **1.00** |
+| `site`     | Ficha MG     | 0.79 | 1.00 | 0.79 |
+| `site`     | Ficha RJ     | 0.87 | 0.92 | 0.87 |
+
+Read: **horizons extraction is solved** for clean PT-BR text profiles (vanilla Gemma + persona). **Site extraction is ~83 % IoU and ~96 % value-accuracy on matched fields** — gaps are inferred fields (`country: BR` from a Brazilian state) that the 8B model misses but a 32B/Claude would catch. This is the input for Phase 2 (few-shot demos) and Phase 3 (LoRA fine-tune).
+
+Vinheta walkthrough: [`v11_vlm_extraction_benchmark`](vignettes/v11_vlm_extraction_benchmark.Rmd).
+
+---
+
+## ✦ What's new in v0.9.65 (2026-05-06) — **Agente Pedometrista**
+
+A modern bslib-themed Shiny UI that wires a **local Gemma 4 multimodal VLM** (via Ollama) to the deterministic taxonomic key. Photo, PDF, field-sheet image and Vis-NIR spectrum each become a one-click extraction tab; the result is classified across **WRB 2022 + SiBCS 5ª ed. + USDA Soil Taxonomy 13ed** in the same session.
+
+```r
+# One-call setup of the local stack (downloads Gemma 4 e2b, ~6.7 GB on disk):
+soilKey::setup_local_vlm("light")
+
+# Launch the agent:
+soilKey::run_agent_app()
+```
+
+| | |
+|---|---|
+| **`setup_local_vlm()`** | Idempotent bootstrap: detects Ollama, starts the daemon, pulls the model. On-disk sizes (multimodal builds bundle a ~5 GB vision encoder on top of the parameter weights): `light` = `gemma4:e2b` (~6.7 GB), `balanced` = `gemma4:e4b` (~8 GB), `best` = `gemma4:31b` (~19 GB). |
+| **`run_agent_app()`** | Modern bslib `page_navbar()` UI with 8 tabs: 📷 Foto Munsell · 📄 PDF/Texto · 📋 Ficha de Campo · 🌈 Espectros · 📊 Tabela editável · 🌱 Classificar (3 cards lado-a-lado) · 🔍 Trace · 💬 Pergunte ao Pedometrista. |
+| **`pedologist_system_prompt()`** | Persona PT-BR / EN injetada como `system_prompt` em todo provider VLM. Instrui o LLM a **NUNCA** classificar — só extrair JSON validado por schema, com `confidence` e `source_quote` por atributo. |
+| **Local-first by default** | Ordem de fallback: Ollama → Anthropic → OpenAI → Google. Para fotos sensíveis, fichas com geolocalização e PDFs internos, **nada sai da máquina** — recomendado para pesquisa governamental, terras indígenas, dados pré-publicação. |
+| Provider sidebar | Badges em tempo real: Ollama instalado / daemon rodando / modelos disponíveis. Botão "Configurar Gemma local" dispara `setup_local_vlm()` com modal de progresso. |
+
+Vinheta walkthrough: [`v10_agente_pedometrista`](vignettes/v10_agente_pedometrista.Rmd).
+
+> **Princípio inegociável:** o LLM nunca classifica. Ele apenas extrai dados estruturados; a chave taxonômica continua 100 % R determinístico, com regras YAML versionadas.
+
+---
+
+## ✦ What's new in v0.9.62 (2026-05-04)
+
+The v0.9.55 → v0.9.62 release series wires the Brazilian SiBCS classifier to the two canonical pedologist-curated corpuses (Embrapa BDsolos and FEBR), validates the classifier against ~9 000 surveyor-labelled profiles and consolidates the two repositories into a single deduped super-dataset:
+
+- **v0.9.55** — `R/bdsolos.R`: `load_bdsolos_csv(path)` ingests Embrapa BDsolos full-export CSVs (~9 000 perfis from 27 UFs, semicolon-delimited, preamble + 222+ columns). Auto-detects column convention via regex, supports both classic and SmartSolos-derived schemas. `inspect_bdsolos_csv(path)` prints the schema, soilKey mapping and unmapped columns; `download_bdsolos()` is a best-effort headless-Chrome driver via `chromote`.
+- **v0.9.57** — `R/febr.R`: `read_febr_pedons(dataset_codes)` wraps `febr::readFEBR` and adapts the FEBR `camada` table to the soilKey schema. Auto-detects the ~6 distinct Munsell column conventions used across the 200 FEBR datasets that carry colour data (36 275 horizons total). `febr_index_munsell()` returns the curated catalog of Munsell-bearing FEBR datasets.
+- **v0.9.58–v0.9.59** — full BDsolos export schema support (~222 cols, DMS coordinates, `read.csv2` fallback for malformed UTF-8 in 7 of 27 state CSVs).
+- **v0.9.60** — [`benchmark_bdsolos_sibcs()`](R/benchmark-bdsolos.R): mirror of `benchmark_lucas_2018()` but for the BDsolos corpus. Runs `classify_sibcs()` on each pedon, compares predicted Ordem to the surveyor's reference (BDsolos Classe de Solos Nivel 1/2/3), returns `predictions` data.frame, `confusion` matrix, `per_ordem` recall and `summary` (n_total, n_in_scope, n_matched, n_errors, n_unmapped). Also ships `.bdsolos_normalize_ordem()` mapping modern (`ARGISSOLO` → `Argissolos`) and pre-1999 legacy names (`PODZOLICO`, `LATOSOL`, `GLEI`, `BRUNIZEM`, `ALUVIAL`, …) to the modern SiBCS Ordens. Smoke test on 100 RJ pedons: **34 % Ordem accuracy** (Argissolos 67.6 % recall, healthy baseline; 0 % recall on Latossolos / Gleissolos / Espodossolos identified as priorities).
+- **v0.9.61** — [`R/sibcs-color-tuning.R`](R/sibcs-color-tuning.R): replaces the SiBCS subordem first-match-wins rule for colour-driven Ordens (Argissolos / Latossolos / Nitossolos) with a **thickness-weighted dominant-colour-in-B** rule. `.classify_b_color()` partitions Munsell triplets into 5 categories (`VERMELHO`, `VERMELHO_AMARELO`, `AMARELO`, `BRUNO_ACINZENTADO`, `ACINZENTADO`); `.dominant_b_color()` walks every B-like horizon and sums thickness per category; `.apply_color_dominant_override()` swaps the YAML-assigned subordem when the dominant disagrees. Wired into `classify_sibcs()` between subordem assignment and the v0.9.45 *cor a determinar* fallback. The benchmark now also reports `accuracy_subordem` over canonical 2-3 letter SiBCS codes via `.bdsolos_normalize_subordem()`.
+- **v0.9.62** — [`R/merge-brazilian.R`](R/merge-brazilian.R): `merge_brazilian_pedons(bdsolos, febr, prefer)` joins the BDsolos and FEBR PedonRecord lists via `site$sisb_id` (BDsolos `Codigo PA` ≡ FEBR `observacao$sisb_id`) and emits a single deduped super-list with provenance tags (`site$merge_decision` ∈ {`kept_bdsolos`, `kept_febr`, `unique`}). `summarize_brazilian_overlap()` is a dry-run diagnostic. Empirical RJ scan: **590 of 722 BDsolos pedons (65 %) match a FEBR sisb_id** — naïve concat of 1 606 → after merge **1 016 distinct pedons**.
+
+The full per-release diff lives in [`NEWS.md`](NEWS.md). The Brazilian super-dataset now slots into the same benchmarking pipeline used for LUCAS Soil 2018 (WRB 2022) and KSSL+NASIS (USDA Soil Taxonomy 13ed).
+
+---
+
 ## ✦ What's new in v0.9.27 (2026-05-03)
 
 The v0.9.24 → v0.9.27 release series progressively closed key reasoning gaps in USDA Soil Taxonomy 13ed and validated the gains against three real-data benchmarks (KSSL+NASIS, FEBR/Embrapa, WoSIS):
@@ -456,7 +598,7 @@ Distance is computed in PLS-score space when `resemble` is installed (matching t
 
 soilKey separates **extraction** (multimodal LLM) from **classification** (deterministic R code driven by versioned YAML rules). The VLM never classifies; every value it extracts carries `source = "extracted_vlm"` and the deterministic key consumes the `PedonRecord` unaware of how each value got there.
 
-The default local stack uses **Gemma 4** via [Ollama](https://ollama.com) (`gemma4:e4b`, ~3 GB, multimodal text+image+audio). Cloud providers (`anthropic` / `openai` / `google`) remain one argument away. The full canonical pipeline -- *extract from PDF + extract Munsell from photo + classify in three systems + render report* -- is one function call:
+The default local stack uses **Gemma 4** via [Ollama](https://ollama.com) (`gemma4:e2b`, ~6.7 GB on disk, multimodal text+image; the multimodal build bundles a vision encoder ~5 GB beyond the bare parameter weights). Cloud providers (`anthropic` / `openai` / `google`) remain one argument away. The full canonical pipeline -- *extract from PDF + extract Munsell from photo + classify in three systems + render report* -- is one function call:
 
 ```r
 library(soilKey)
@@ -646,4 +788,4 @@ SOFTWARE.
 
 ---
 
-<sub>**Status**: CRAN-ready, v0.9.27 (2026-05-03). `R CMD check` returns **Status: OK** — 0 errors / 0 warnings / 0 notes. Test suite **2 908 passing / 0 failing / 10 expected skips**. [GitHub Actions](https://github.com/HugoMachadoRodrigues/soilKey/actions) green across the 5 OS×R matrix. **All three classification systems wired end-to-end down to the deepest categorical level** — WRB 2022 (32 RSGs + qualifiers + supplementary + specifiers), SiBCS 5ª ed. (Ordem → Subordem → Grande Grupo → Subgrupo → Família, ~1 200 classes), USDA Soil Taxonomy 13ed (Order → Suborder → Great Group → Subgroup, ~1 700 classes). **v0.9.27 highlights**: clay-illuviation evidence test (NASIS `pediagfeatures` argillic flag + per-horizon `clay_films_amount` from `phpvsf`); per-system argic clay-increase threshold API (WRB 6/1.4/20 vs KST 13ed 3/1.2/8); FEBR / Embrapa benchmark normalisation fix (Order **+16.1 pp** v0.9.22 → v0.9.27 = 56.7 %); WoSIS GraphQL retry+fallback for ISRIC server intermittency. **v0.9.25** shipped the KST 13ed Great Group canonicaliser collapsing pre-13ed legacy KSSL labels (Pellusterts → Hapluderts; Haplaquolls → Endo/Epi-Aquolls; Camborthids → Haplocambids; etc.) — KSSL+NASIS **Great Group +3.84 pp** in a single release. Headline real-data benchmark at large scale (n=2 638, ±1.7 pp CI): **Order 34.2 %**, **Suborder 13.9 %**, **Great Group 7.9 %**, **Subgroup 4.2 %** (the first public USDA Soil Taxonomy benchmark resolving every level of the hierarchy on real lab data). **DOI**: [10.5281/zenodo.19930112](https://doi.org/10.5281/zenodo.19930112) (resolves to the latest version on Zenodo). Per-release changes in [`NEWS.md`](NEWS.md); roadmap in [`ARCHITECTURE.md` §12](ARCHITECTURE.md#12-roadmap-de-implementação); CRAN submission instructions in [`inst/cran-submission/HOW_TO_SUBMIT.md`](inst/cran-submission/HOW_TO_SUBMIT.md).</sub>
+<sub>**Status**: CRAN-ready, v0.9.71 (2026-05-06). `R CMD check` returns **Status: OK** — 0 errors / 0 warnings / 0 notes. Test suite **3 888 passing / 0 failing / 21 expected skips**. **v0.9.71 highlights**: 8 BDsolos hard fixtures (1 per SiBCS Ordem); ellmer `chat_structured()` bridge via `vlm_type_from_soilkey_schema()` — `use_structured = TRUE` removes JSON validation errors at protocol level (Anthropic / OpenAI / Ollama 0.5+ / Gemini); `agent_app()` sidebar exposes few-shot + structured toggles; `cli::cli_progress_bar()` in chunked PDF extraction. **v0.9.68 highlights**: Phase-2 few-shot prompts for the 3 extraction tasks (schema-correct mixed-shape worked examples); opt-in `use_fewshot` parameter on every extractor + benchmark; `n_repeats` parameter for variance characterisation; new harder bundled fixture (Chernossolo BA, 4 horizons, PT-BR comma decimals + mixed Munsell úmida/seca + CaCO3) — gemma4:e2b + few-shot scores **precision = 1.00, recall = 1.00, attr_match = 0.79** on it. Honest finding: few-shot **doesn't move the needle on simple fixtures** (vanilla gemma4:e2b already nails them; the v0.9.66 50% ok-rate was stochastic noise) but **doesn't regress quality**. Real lift will surface from harder fixtures. **v0.9.66 highlights**: `benchmark_vlm_extraction()` Phase-1 harness (precision/recall/attr-match for horizons; IoU/value-acc/recall for site; CIE ΔE 2000 for Munsell); 4 bundled text fixtures + Munsell photo-fixture spec; baseline measured on local Gemma 4 8B = **100% horizons / ~83% site IoU / ~96% site value-accuracy**; `.onAttach()` CRAN-compliant local-VLM hint with opt-in auto-pull. **v0.9.65 highlights**: `setup_local_vlm()` one-call bootstrap of Ollama + Gemma 4 (`light`/`balanced`/`best` presets); `run_agent_app()` modern bslib Shiny UI with 8 tabs (foto / PDF / ficha de campo / espectro / tabela / classificar / trace / chat com pedometrista); `pedologist_system_prompt()` persona PT-BR / EN; default Ollama model lowered to `gemma4:e2b` (~6.7 GB on disk after pull; multimodal Gemma 4 builds bundle a ~5 GB vision encoder on top of the parameter weights — corrected in v0.9.67). [GitHub Actions](https://github.com/HugoMachadoRodrigues/soilKey/actions) green across the 5 OS×R matrix. **All three classification systems wired end-to-end down to the deepest categorical level** — WRB 2022 (32 RSGs + qualifiers + supplementary + specifiers), SiBCS 5ª ed. (Ordem → Subordem → Grande Grupo → Subgrupo → Família, ~1 200 classes), USDA Soil Taxonomy 13ed (Order → Suborder → Great Group → Subgroup, ~1 700 classes). **v0.9.55 → v0.9.62 highlights** (the Brazilian benchmark series): `load_bdsolos_csv()` ingests the full Embrapa BDsolos export (~9 000 perfis, 27 UFs, full morphology + Munsell + chemistry + surveyor's SiBCS); `read_febr_pedons()` wraps `febr::readFEBR` with auto-detection of the ~6 distinct Munsell column conventions across the 200 FEBR datasets that carry colour data (36 275 horizons); `benchmark_bdsolos_sibcs()` runs `classify_sibcs()` against ~9 000 surveyor-labelled profiles and computes per-Ordem recall (RJ smoke test: **34 % Ordem accuracy**, Argissolos 67.6 % recall); the SiBCS classifier replaces first-match-wins with a **thickness-weighted dominant-colour-in-B** rule for Argissolos / Latossolos / Nitossolos; `merge_brazilian_pedons()` deduplicates BDsolos × FEBR via `site$sisb_id` (RJ overlap: 590 / 722 BDsolos pedons match a FEBR sisb_id, 1 606 → 1 016 distinct after merge). **v0.9.27 USDA highlights** still apply: clay-illuviation evidence test (NASIS `pediagfeatures` argillic flag + per-horizon `clay_films_amount`); per-system argic clay-increase threshold API (WRB 6/1.4/20 vs KST 13ed 3/1.2/8); FEBR / Embrapa benchmark **+16.1 pp** v0.9.22 → v0.9.27 (Order = 56.7 %); KSSL+NASIS Great Group **+3.84 pp** via the v0.9.25 canonicaliser. Headline USDA benchmark (n=2 638, ±1.7 pp CI): **Order 34.2 %**, **Suborder 13.9 %**, **Great Group 7.9 %**, **Subgroup 4.2 %**. **DOI**: [10.5281/zenodo.19930112](https://doi.org/10.5281/zenodo.19930112) (resolves to the latest version on Zenodo). Per-release changes in [`NEWS.md`](NEWS.md); roadmap in [`ARCHITECTURE.md` §12](ARCHITECTURE.md#12-roadmap-de-implementação); CRAN submission instructions in [`inst/cran-submission/HOW_TO_SUBMIT.md`](inst/cran-submission/HOW_TO_SUBMIT.md).</sub>
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_argissolo_12870.golden.json b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_argissolo_12870.golden.json
new file mode 100644
index 000000000..2fcdca3e8
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_argissolo_12870.golden.json
@@ -0,0 +1,276 @@
+{
+  "horizons": [
+    {
+      "top_cm": 0,
+      "bottom_cm": 14,
+      "designation": "Ap",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 3,
+      "munsell_chroma_moist": 2,
+      "munsell_hue_dry": "10YR",
+      "munsell_value_dry": 5,
+      "munsell_chroma_dry": 2,
+      "structure_grade": "Moderada a forte",
+      "structure_size": "Muito pequena a pequena",
+      "structure_type": "Granular",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 20,
+      "clay_pct": 20,
+      "silt_pct": 16,
+      "sand_pct": 64,
+      "ph_h2o": 5.7,
+      "ph_kcl": 4.4,
+      "oc_pct": 0.123,
+      "n_total_pct": 0.12,
+      "cec_cmol": 5.7,
+      "bs_pct": 56,
+      "al_sat_pct": 0,
+      "ca_cmol": 2.6,
+      "mg_cmol": 0.4,
+      "k_cmol": 0.18,
+      "na_cmol": 0.03,
+      "al_cmol": 0,
+      "fe_dcb_pct": 1.5,
+      "consistence_dry": "Ligeiramente dura"
+    },
+    {
+      "top_cm": 14,
+      "bottom_cm": 30,
+      "designation": "A2",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 5,
+      "munsell_chroma_moist": 5,
+      "munsell_hue_dry": "10YR",
+      "munsell_value_dry": 7,
+      "munsell_chroma_dry": 4,
+      "structure_grade": "Fraca",
+      "structure_size": "Pequena a media",
+      "structure_type": "Granular",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 90,
+      "clay_pct": 22,
+      "silt_pct": 15,
+      "sand_pct": 63,
+      "ph_h2o": 5,
+      "ph_kcl": 3.7,
+      "oc_pct": 0.052,
+      "n_total_pct": 0.07,
+      "cec_cmol": 3.6,
+      "bs_pct": 33,
+      "al_sat_pct": 25,
+      "ca_cmol": 1,
+      "mg_cmol": 0.1,
+      "k_cmol": 0.03,
+      "na_cmol": 0.02,
+      "al_cmol": 0.4,
+      "fe_dcb_pct": 1.8,
+      "consistence_dry": "Macia"
+    },
+    {
+      "top_cm": 30,
+      "bottom_cm": 45,
+      "designation": "IIB21t",
+      "munsell_hue_moist": "2,5YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 6,
+      "munsell_hue_dry": "2,5YR",
+      "munsell_value_dry": 5,
+      "munsell_chroma_dry": 8,
+      "structure_grade": "Moderada",
+      "structure_size": "Muito pequena a pequena",
+      "structure_type": "Blocos subangulares",
+      "consistence_moist": "Friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 60,
+      "clay_pct": 22,
+      "silt_pct": 15,
+      "sand_pct": 63,
+      "ph_h2o": 5.1,
+      "ph_kcl": 3.8,
+      "oc_pct": 0.05,
+      "n_total_pct": 0.06,
+      "cec_cmol": 5,
+      "bs_pct": 32,
+      "al_sat_pct": 30,
+      "ca_cmol": 1.3,
+      "mg_cmol": 0.2,
+      "k_cmol": 0.04,
+      "na_cmol": 0.05,
+      "al_cmol": 0.7,
+      "fe_dcb_pct": 3.5,
+      "consistence_dry": "Muito dura"
+    },
+    {
+      "top_cm": 45,
+      "bottom_cm": 88,
+      "designation": "IIB22t",
+      "munsell_hue_moist": "2,5YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 8,
+      "munsell_hue_dry": "4YR",
+      "munsell_value_dry": 6,
+      "munsell_chroma_dry": 8,
+      "structure_grade": "Moderada",
+      "structure_size": "Pequena a media",
+      "structure_type": "Blocos subangulares",
+      "consistence_moist": "Friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 30,
+      "clay_pct": 43,
+      "silt_pct": 21,
+      "sand_pct": 36,
+      "ph_h2o": 5.3,
+      "ph_kcl": 3.8,
+      "oc_pct": 0.028,
+      "n_total_pct": 0.06,
+      "cec_cmol": 5.3,
+      "bs_pct": 30,
+      "al_sat_pct": 38,
+      "ca_cmol": 1,
+      "mg_cmol": 0.5,
+      "k_cmol": 0.02,
+      "na_cmol": 0.09,
+      "al_cmol": 1,
+      "fe_dcb_pct": 4.8,
+      "consistence_dry": "Muito dura"
+    },
+    {
+      "top_cm": 88,
+      "bottom_cm": 108,
+      "designation": "IIB3t",
+      "munsell_hue_moist": "5YR",
+      "munsell_value_moist": 5,
+      "munsell_chroma_moist": 7,
+      "munsell_hue_dry": "5YR",
+      "munsell_value_dry": 7,
+      "munsell_chroma_dry": 6,
+      "structure_grade": "Moderada",
+      "structure_size": "Pequena a media",
+      "structure_type": "Blocos subangulares",
+      "consistence_moist": "Friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 40,
+      "clay_pct": 35,
+      "silt_pct": 25,
+      "sand_pct": 40,
+      "ph_h2o": 5.1,
+      "ph_kcl": 3.5,
+      "cec_cmol": 6,
+      "bs_pct": 22,
+      "al_sat_pct": 57,
+      "ca_cmol": 1,
+      "mg_cmol": 0.2,
+      "k_cmol": 0.02,
+      "na_cmol": 0.07,
+      "al_cmol": 1.7,
+      "fe_dcb_pct": 4.8,
+      "consistence_dry": "Muito dura"
+    },
+    {
+      "top_cm": 108,
+      "bottom_cm": 130,
+      "designation": "IIC1",
+      "munsell_hue_moist": "5YR",
+      "munsell_value_moist": 6,
+      "munsell_chroma_moist": 8,
+      "munsell_hue_dry": "5YR",
+      "munsell_value_dry": 7,
+      "munsell_chroma_dry": 8,
+      "structure_grade": "",
+      "structure_size": "Media",
+      "structure_type": "Blocos subangulares",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 20,
+      "clay_pct": 31,
+      "silt_pct": 27,
+      "sand_pct": 42,
+      "ph_h2o": 5.1,
+      "ph_kcl": 3.4,
+      "oc_pct": 0.017,
+      "n_total_pct": 0.04,
+      "cec_cmol": 7,
+      "bs_pct": 17,
+      "al_sat_pct": 67,
+      "ca_cmol": 0.8,
+      "mg_cmol": 0.3,
+      "k_cmol": 0.03,
+      "na_cmol": 0.09,
+      "al_cmol": 2.4,
+      "fe_dcb_pct": 4.3,
+      "consistence_dry": "Dura"
+    },
+    {
+      "top_cm": 130,
+      "bottom_cm": 147,
+      "designation": "IIC2",
+      "munsell_hue_moist": "",
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 50,
+      "clay_pct": 24,
+      "silt_pct": 32,
+      "sand_pct": 44,
+      "ph_h2o": 5.2,
+      "ph_kcl": 3.5,
+      "oc_pct": 0.01,
+      "n_total_pct": 0.03,
+      "cec_cmol": 6.6,
+      "bs_pct": 17,
+      "al_sat_pct": 68,
+      "ca_cmol": 0.6,
+      "mg_cmol": 0.4,
+      "k_cmol": 0.04,
+      "na_cmol": 0.1,
+      "al_cmol": 2.3,
+      "fe_dcb_pct": 3.5,
+      "consistence_dry": "Dura"
+    },
+    {
+      "top_cm": 147,
+      "bottom_cm": 190,
+      "designation": "IIC3",
+      "munsell_hue_moist": "2,5Y",
+      "munsell_value_moist": 7,
+      "munsell_chroma_moist": 5,
+      "munsell_hue_dry": "2,5Y",
+      "munsell_value_dry": 7.5,
+      "munsell_chroma_dry": 2,
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 20,
+      "clay_pct": 18,
+      "silt_pct": 34,
+      "sand_pct": 48,
+      "ph_h2o": 5.2,
+      "ph_kcl": 3.3,
+      "oc_pct": 0.008,
+      "n_total_pct": 0.03,
+      "cec_cmol": 10.2,
+      "bs_pct": 10,
+      "al_sat_pct": 80,
+      "k_cmol": 0.03,
+      "na_cmol": 0.16,
+      "al_cmol": 4.1,
+      "fe_dcb_pct": 4.7,
+      "consistence_dry": "Dura"
+    }
+  ]
+}
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_argissolo_12870.txt b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_argissolo_12870.txt
new file mode 100644
index 000000000..cb43c1bb9
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_argissolo_12870.txt
@@ -0,0 +1,76 @@
+# Descricao do perfil
+
+Local: RJ, Itaguaí.
+Identificacao: 12870.
+
+## Horizonte Ap (0 a 14 cm)
+
+Cor Munsell umida: 10YR 3/2.
+Argila 20 %.
+Silte 16 %.
+Areia 64 %.
+pH em agua: 5.7.
+Carbono organico: 0.12 %.
+
+## Horizonte A2 (14 a 30 cm)
+
+Cor Munsell umida: 10YR 5/5.
+Argila 22 %.
+Silte 15 %.
+Areia 63 %.
+pH em agua: 5.0.
+Carbono organico: 0.05 %.
+
+## Horizonte IIB21t (30 a 45 cm)
+
+Cor Munsell umida: 2,5YR 4/6.
+Argila 22 %.
+Silte 15 %.
+Areia 63 %.
+pH em agua: 5.1.
+Carbono organico: 0.05 %.
+
+## Horizonte IIB22t (45 a 88 cm)
+
+Cor Munsell umida: 2,5YR 4/8.
+Argila 43 %.
+Silte 21 %.
+Areia 36 %.
+pH em agua: 5.3.
+Carbono organico: 0.03 %.
+
+## Horizonte IIB3t (88 a 108 cm)
+
+Cor Munsell umida: 5YR 5/7.
+Argila 35 %.
+Silte 25 %.
+Areia 40 %.
+pH em agua: 5.1.
+
+## Horizonte IIC1 (108 a 130 cm)
+
+Cor Munsell umida: 5YR 6/8.
+Argila 31 %.
+Silte 27 %.
+Areia 42 %.
+pH em agua: 5.1.
+Carbono organico: 0.02 %.
+
+## Horizonte IIC2 (130 a 147 cm)
+
+Cor Munsell umida:  NA/NA.
+Argila 24 %.
+Silte 32 %.
+Areia 44 %.
+pH em agua: 5.2.
+Carbono organico: 0.01 %.
+
+## Horizonte IIC3 (147 a 190 cm)
+
+Cor Munsell umida: 2,5Y 7/5.
+Argila 18 %.
+Silte 34 %.
+Areia 48 %.
+pH em agua: 5.2.
+Carbono organico: 0.01 %.
+
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_cambissolo_5322.golden.json b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_cambissolo_5322.golden.json
new file mode 100644
index 000000000..25ea512fe
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_cambissolo_5322.golden.json
@@ -0,0 +1,139 @@
+{
+  "horizons": [
+    {
+      "top_cm": 10,
+      "bottom_cm": 40,
+      "designation": "2Bif",
+      "munsell_hue_moist": "",
+      "munsell_hue_dry": "",
+      "structure_grade": "Moderada",
+      "structure_size": "Pequena",
+      "structure_type": "Blocos subangulares",
+      "consistence_moist": "Friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 20,
+      "clay_pct": 49,
+      "silt_pct": 11,
+      "sand_pct": 40,
+      "ph_h2o": 5.2,
+      "ph_kcl": 3.6,
+      "oc_pct": 0.48,
+      "n_total_pct": 0.4,
+      "cec_cmol": 12.1,
+      "bs_pct": 71,
+      "al_sat_pct": 12,
+      "ca_cmol": 6.2,
+      "mg_cmol": 2,
+      "k_cmol": 0.12,
+      "na_cmol": 0.24,
+      "al_cmol": 1.2,
+      "fe_dcb_pct": 44,
+      "bulk_density_g_cm3": 1.54,
+      "p_mehlich3_mg_kg": 7,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 40,
+      "bottom_cm": 70,
+      "designation": "2C",
+      "munsell_hue_moist": "",
+      "munsell_hue_dry": "",
+      "structure_grade": "Fraca a moderada",
+      "structure_size": "Media",
+      "structure_type": "Blocos subangulares",
+      "consistence_moist": "Firme",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 20,
+      "clay_pct": 43,
+      "silt_pct": 13,
+      "sand_pct": 44,
+      "ph_h2o": 4.8,
+      "ph_kcl": 3.3,
+      "oc_pct": 0.29,
+      "n_total_pct": 0.3,
+      "cec_cmol": 10.1,
+      "bs_pct": 72,
+      "al_sat_pct": 12,
+      "ca_cmol": 5.7,
+      "mg_cmol": 1.1,
+      "k_cmol": 0.09,
+      "na_cmol": 0.42,
+      "al_cmol": 1,
+      "fe_dcb_pct": 35,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 70,
+      "bottom_cm": 90,
+      "designation": "3C",
+      "munsell_hue_moist": "5Y",
+      "munsell_value_moist": 6,
+      "munsell_chroma_moist": 2,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "",
+      "consistence_moist": "Extremamente firme",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 10,
+      "clay_pct": 35,
+      "silt_pct": 18,
+      "sand_pct": 47,
+      "ph_h2o": 4.9,
+      "ph_kcl": 3.4,
+      "oc_pct": 0.21,
+      "n_total_pct": 0.3,
+      "cec_cmol": 13,
+      "bs_pct": 87,
+      "al_sat_pct": 4,
+      "ca_cmol": 7.5,
+      "mg_cmol": 2.7,
+      "k_cmol": 0.09,
+      "na_cmol": 0.99,
+      "al_cmol": 0.5,
+      "fe_dcb_pct": 41,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": ""
+    },
+    {
+      "bottom_cm": 10,
+      "designation": "A",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 3,
+      "munsell_chroma_moist": 2,
+      "munsell_hue_dry": "10YR",
+      "munsell_value_dry": 5,
+      "munsell_chroma_dry": 6,
+      "structure_grade": "Moderada",
+      "structure_size": "Pequena",
+      "structure_type": "Granular",
+      "consistence_moist": "",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 20,
+      "clay_pct": 30,
+      "silt_pct": 16,
+      "sand_pct": 54,
+      "ph_h2o": 5.3,
+      "ph_kcl": 4.5,
+      "oc_pct": 1.84,
+      "n_total_pct": 1.5,
+      "cec_cmol": 9.4,
+      "bs_pct": 86,
+      "al_sat_pct": 0,
+      "ca_cmol": 6.3,
+      "mg_cmol": 1.2,
+      "k_cmol": 0.35,
+      "na_cmol": 0.2,
+      "al_cmol": 0,
+      "fe_dcb_pct": 34,
+      "bulk_density_g_cm3": 1.45,
+      "p_mehlich3_mg_kg": 7,
+      "consistence_dry": "Ligeiramente dura"
+    }
+  ]
+}
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_cambissolo_5322.txt b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_cambissolo_5322.txt
new file mode 100644
index 000000000..29f15347a
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_cambissolo_5322.txt
@@ -0,0 +1,41 @@
+# Descricao do perfil
+
+Local: RJ, São Fidélis.
+Identificacao: 5322.
+
+## Horizonte 2Bif (10 a 40 cm)
+
+Cor Munsell umida:  NA/NA.
+Argila 49 %.
+Silte 11 %.
+Areia 40 %.
+pH em agua: 5.2.
+Carbono organico: 0.48 %.
+
+## Horizonte 2C (40 a 70 cm)
+
+Cor Munsell umida:  NA/NA.
+Argila 43 %.
+Silte 13 %.
+Areia 44 %.
+pH em agua: 4.8.
+Carbono organico: 0.29 %.
+
+## Horizonte 3C (70 a 90 cm)
+
+Cor Munsell umida: 5Y 6/2.
+Argila 35 %.
+Silte 18 %.
+Areia 47 %.
+pH em agua: 4.9.
+Carbono organico: 0.21 %.
+
+## Horizonte A (NA a 10 cm)
+
+Cor Munsell umida: 10YR 3/2.
+Argila 30 %.
+Silte 16 %.
+Areia 54 %.
+pH em agua: 5.3.
+Carbono organico: 1.84 %.
+
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_chernossolo_11972.golden.json b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_chernossolo_11972.golden.json
new file mode 100644
index 000000000..ffac2cd72
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_chernossolo_11972.golden.json
@@ -0,0 +1,188 @@
+{
+  "horizons": [
+    {
+      "top_cm": 0,
+      "bottom_cm": 20,
+      "designation": "Ap1",
+      "munsell_hue_moist": "5YR",
+      "munsell_value_moist": 2,
+      "munsell_chroma_moist": 2,
+      "munsell_hue_dry": "10YR",
+      "munsell_value_dry": 3,
+      "munsell_chroma_dry": 2,
+      "structure_grade": "Forte",
+      "structure_size": "Pequena",
+      "structure_type": "Granular",
+      "consistence_moist": "Muito firme",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 20,
+      "clay_pct": 42,
+      "silt_pct": 18,
+      "sand_pct": 40,
+      "ph_h2o": 8,
+      "ph_kcl": 7,
+      "oc_pct": 1.4,
+      "n_total_pct": 2.4,
+      "cec_cmol": 31.8,
+      "bs_pct": 100,
+      "al_sat_pct": 0,
+      "ca_cmol": 29.3,
+      "mg_cmol": 2.3,
+      "k_cmol": 0.06,
+      "na_cmol": 0.18,
+      "al_cmol": 0,
+      "caco3_pct": 2,
+      "fe_dcb_pct": 67,
+      "p_mehlich3_mg_kg": 7,
+      "consistence_dry": "Muito dura"
+    },
+    {
+      "top_cm": 20,
+      "bottom_cm": 42,
+      "designation": "A12",
+      "munsell_hue_moist": "5YR",
+      "munsell_value_moist": 2,
+      "munsell_chroma_moist": 1,
+      "munsell_hue_dry": "10YR",
+      "munsell_value_dry": 3,
+      "munsell_chroma_dry": 2,
+      "structure_grade": "Forte",
+      "structure_size": "Pequena",
+      "structure_type": "Blocos subangulares",
+      "consistence_moist": "Muito firme",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 40,
+      "clay_pct": 43,
+      "silt_pct": 18,
+      "sand_pct": 39,
+      "ph_h2o": 7.9,
+      "ph_kcl": 7.1,
+      "oc_pct": 1.91,
+      "n_total_pct": 2.7,
+      "cec_cmol": 32.9,
+      "bs_pct": 100,
+      "al_sat_pct": 0,
+      "ca_cmol": 30.5,
+      "mg_cmol": 2.1,
+      "k_cmol": 0.09,
+      "na_cmol": 0.19,
+      "al_cmol": 0,
+      "caco3_pct": 2,
+      "fe_dcb_pct": 62,
+      "p_mehlich3_mg_kg": 9,
+      "consistence_dry": "Muito dura"
+    },
+    {
+      "top_cm": 42,
+      "bottom_cm": 70,
+      "designation": "C1ca",
+      "munsell_hue_moist": "",
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Maciça",
+      "consistence_moist": "",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 80,
+      "clay_pct": 22,
+      "silt_pct": 55,
+      "sand_pct": 23,
+      "ph_h2o": 7.8,
+      "ph_kcl": 7.2,
+      "oc_pct": 0.6,
+      "n_total_pct": 1,
+      "cec_cmol": 17.9,
+      "bs_pct": 100,
+      "al_sat_pct": 0,
+      "ca_cmol": 16.7,
+      "mg_cmol": 1,
+      "k_cmol": 0.02,
+      "na_cmol": 0.14,
+      "al_cmol": 0,
+      "caco3_pct": 67,
+      "fe_dcb_pct": 30,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 70,
+      "bottom_cm": 125,
+      "designation": "C2ca",
+      "munsell_hue_moist": "",
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "",
+      "consistence_moist": "",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 250,
+      "clay_pct": 12,
+      "silt_pct": 57,
+      "sand_pct": 31,
+      "ph_h2o": 8.3,
+      "ph_kcl": 7.3,
+      "oc_pct": 0.13,
+      "n_total_pct": 0.5,
+      "cec_cmol": 13.8,
+      "bs_pct": 100,
+      "al_sat_pct": 0,
+      "ca_cmol": 12.6,
+      "mg_cmol": 1,
+      "k_cmol": 0.01,
+      "na_cmol": 0.16,
+      "al_cmol": 0,
+      "caco3_pct": 78,
+      "fe_dcb_pct": 27,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 125,
+      "bottom_cm": 160,
+      "designation": "C3ca",
+      "munsell_hue_moist": "",
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "",
+      "consistence_moist": "",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 160,
+      "clay_pct": 7,
+      "silt_pct": 46,
+      "sand_pct": 47,
+      "ph_h2o": 8.2,
+      "ph_kcl": 7.3,
+      "oc_pct": 0.11,
+      "n_total_pct": 0.4,
+      "cec_cmol": 17.5,
+      "bs_pct": 100,
+      "al_sat_pct": 0,
+      "ca_cmol": 14.9,
+      "mg_cmol": 2.4,
+      "k_cmol": 0.01,
+      "na_cmol": 0.16,
+      "al_cmol": 0,
+      "caco3_pct": 71,
+      "fe_dcb_pct": 25,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 180,
+      "bottom_cm": 210,
+      "designation": "C4",
+      "munsell_hue_moist": "",
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "",
+      "consistence_moist": "",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "consistence_dry": ""
+    }
+  ]
+}
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_chernossolo_11972.txt b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_chernossolo_11972.txt
new file mode 100644
index 000000000..895b74222
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_chernossolo_11972.txt
@@ -0,0 +1,54 @@
+# Descricao do perfil
+
+Local: RJ, Campos dos Goytacazes.
+Identificacao: 11972.
+
+## Horizonte Ap1 (0 a 20 cm)
+
+Cor Munsell umida: 5YR 2/2.
+Argila 42 %.
+Silte 18 %.
+Areia 40 %.
+pH em agua: 8.0.
+Carbono organico: 1.40 %.
+
+## Horizonte A12 (20 a 42 cm)
+
+Cor Munsell umida: 5YR 2/1.
+Argila 43 %.
+Silte 18 %.
+Areia 39 %.
+pH em agua: 7.9.
+Carbono organico: 1.91 %.
+
+## Horizonte C1ca (42 a 70 cm)
+
+Cor Munsell umida:  NA/NA.
+Argila 22 %.
+Silte 55 %.
+Areia 23 %.
+pH em agua: 7.8.
+Carbono organico: 0.60 %.
+
+## Horizonte C2ca (70 a 125 cm)
+
+Cor Munsell umida:  NA/NA.
+Argila 12 %.
+Silte 57 %.
+Areia 31 %.
+pH em agua: 8.3.
+Carbono organico: 0.13 %.
+
+## Horizonte C3ca (125 a 160 cm)
+
+Cor Munsell umida:  NA/NA.
+Argila 7 %.
+Silte 46 %.
+Areia 47 %.
+pH em agua: 8.2.
+Carbono organico: 0.11 %.
+
+## Horizonte C4 (180 a 210 cm)
+
+Cor Munsell umida:  NA/NA.
+
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_espodossolo_12050.golden.json b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_espodossolo_12050.golden.json
new file mode 100644
index 000000000..f47c40b3c
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_espodossolo_12050.golden.json
@@ -0,0 +1,294 @@
+{
+  "horizons": [
+    {
+      "top_cm": 0,
+      "bottom_cm": 6,
+      "designation": "A1",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 2,
+      "munsell_chroma_moist": 1,
+      "munsell_hue_dry": "",
+      "structure_grade": "Fraca",
+      "structure_size": "Pequena",
+      "structure_type": "Granular",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 0,
+      "clay_pct": 1,
+      "silt_pct": 4,
+      "sand_pct": 95,
+      "ph_h2o": 3.8,
+      "ph_kcl": 2.7,
+      "oc_pct": 2.9,
+      "n_total_pct": 1.1,
+      "cec_cmol": 12.7,
+      "bs_pct": 9,
+      "al_sat_pct": 59,
+      "k_cmol": 0.09,
+      "na_cmol": 0.23,
+      "al_cmol": 1.6,
+      "bulk_density_g_cm3": 1.17,
+      "p_mehlich3_mg_kg": 2,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 6,
+      "bottom_cm": 12,
+      "designation": "A2",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 2,
+      "munsell_chroma_moist": 2,
+      "munsell_hue_dry": "",
+      "structure_grade": "Fraca",
+      "structure_size": "Pequena",
+      "structure_type": "Granular",
+      "consistence_moist": "Solta",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 0,
+      "clay_pct": 1,
+      "silt_pct": 3.7,
+      "sand_pct": 95.3,
+      "ph_h2o": 4,
+      "ph_kcl": 2.9,
+      "oc_pct": 0.8,
+      "n_total_pct": 0.3,
+      "cec_cmol": 4.6,
+      "bs_pct": 4,
+      "al_sat_pct": 83,
+      "k_cmol": 0.02,
+      "na_cmol": 0.05,
+      "al_cmol": 1,
+      "bulk_density_g_cm3": 1.49,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 12,
+      "bottom_cm": 20,
+      "designation": "AE",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 1,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Grãos simples",
+      "consistence_moist": "Solta",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 0,
+      "clay_pct": 1,
+      "silt_pct": 3.8,
+      "sand_pct": 95.2,
+      "ph_h2o": 42,
+      "ph_kcl": 3.1,
+      "oc_pct": 0.4,
+      "n_total_pct": 0.2,
+      "cec_cmol": 1.7,
+      "bs_pct": 6,
+      "al_sat_pct": 86,
+      "k_cmol": 0.01,
+      "na_cmol": 0.02,
+      "al_cmol": 0.6,
+      "bulk_density_g_cm3": 1.52,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 20,
+      "bottom_cm": 45,
+      "designation": "E",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 6,
+      "munsell_chroma_moist": 1,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Grãos simples",
+      "consistence_moist": "Solta",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 0,
+      "clay_pct": 1,
+      "silt_pct": 3.9,
+      "sand_pct": 95.1,
+      "ph_h2o": 4.3,
+      "ph_kcl": 3.3,
+      "oc_pct": 0.24,
+      "n_total_pct": 0.1,
+      "cec_cmol": 1.1,
+      "bs_pct": 9,
+      "al_sat_pct": 75,
+      "k_cmol": 0.01,
+      "na_cmol": 0.01,
+      "al_cmol": 0.3,
+      "bulk_density_g_cm3": 1.51,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 45,
+      "bottom_cm": 78,
+      "designation": "Bh",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 2,
+      "munsell_chroma_moist": 1,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Maciça",
+      "consistence_moist": "Firme a friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 0,
+      "clay_pct": 1,
+      "silt_pct": 3.1,
+      "sand_pct": 95.9,
+      "ph_h2o": 3.7,
+      "ph_kcl": 3.2,
+      "oc_pct": 1.92,
+      "n_total_pct": 0.4,
+      "cec_cmol": 13.1,
+      "bs_pct": 4,
+      "al_sat_pct": 86,
+      "k_cmol": 0.01,
+      "na_cmol": 0.11,
+      "al_cmol": 3,
+      "bulk_density_g_cm3": 1.52,
+      "p_mehlich3_mg_kg": 5,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 78,
+      "bottom_cm": 109,
+      "designation": "Bh/Bhm",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 2,
+      "munsell_chroma_moist": 1,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Maciça",
+      "consistence_moist": "Firme a friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 0,
+      "clay_pct": 1,
+      "silt_pct": 3.5,
+      "sand_pct": 95.5,
+      "ph_h2o": 4,
+      "ph_kcl": 3.5,
+      "oc_pct": 1.48,
+      "n_total_pct": 0.2,
+      "cec_cmol": 10.2,
+      "bs_pct": 3,
+      "al_sat_pct": 79,
+      "k_cmol": 0.01,
+      "na_cmol": 0.12,
+      "al_cmol": 1.1,
+      "bulk_density_g_cm3": 1.49,
+      "p_mehlich3_mg_kg": 7,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 109,
+      "bottom_cm": 140,
+      "designation": "BhE1",
+      "munsell_hue_moist": "5YR",
+      "munsell_value_moist": 3,
+      "munsell_chroma_moist": 1,
+      "munsell_hue_dry": "",
+      "structure_grade": "Fraca",
+      "structure_size": "Media",
+      "structure_type": "Blocos subangulares que se desfaz",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 0,
+      "clay_pct": 1,
+      "silt_pct": 2.9,
+      "sand_pct": 96.1,
+      "ph_h2o": 4.2,
+      "ph_kcl": 3.7,
+      "oc_pct": 3.78,
+      "n_total_pct": 0.2,
+      "cec_cmol": 10.3,
+      "bs_pct": 5,
+      "al_sat_pct": 77,
+      "k_cmol": 0.01,
+      "na_cmol": 0.13,
+      "al_cmol": 1.7,
+      "bulk_density_g_cm3": 1.44,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 140,
+      "bottom_cm": 152,
+      "designation": "Bhm",
+      "munsell_hue_moist": "2,5YR",
+      "munsell_value_moist": 2.5,
+      "munsell_chroma_moist": 0,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Maciça",
+      "consistence_moist": "Extremamente firme",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 0,
+      "clay_pct": 1,
+      "silt_pct": 2.5,
+      "sand_pct": 96.5,
+      "ph_h2o": 4.5,
+      "ph_kcl": 3.6,
+      "oc_pct": 3.45,
+      "n_total_pct": 0.6,
+      "cec_cmol": 23.6,
+      "bs_pct": 8,
+      "al_sat_pct": 53,
+      "ca_cmol": 0.4,
+      "mg_cmol": 1.3,
+      "k_cmol": 0.01,
+      "na_cmol": 0.28,
+      "al_cmol": 2.3,
+      "bulk_density_g_cm3": 1.5,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 152,
+      "bottom_cm": 170,
+      "designation": "BhE2",
+      "munsell_hue_moist": "5YR",
+      "munsell_value_moist": 2.5,
+      "munsell_chroma_moist": 1,
+      "munsell_hue_dry": "",
+      "structure_grade": "Fraca",
+      "structure_size": "Media",
+      "structure_type": "Blocos subangulares que se desfaz",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 0,
+      "clay_pct": 1,
+      "silt_pct": 4.3,
+      "sand_pct": 94.7,
+      "ph_h2o": 4.8,
+      "ph_kcl": 4,
+      "oc_pct": 1.14,
+      "n_total_pct": 0.3,
+      "cec_cmol": 6.8,
+      "bs_pct": 10,
+      "al_sat_pct": 53,
+      "k_cmol": 0.01,
+      "na_cmol": 0.12,
+      "al_cmol": 0.8,
+      "bulk_density_g_cm3": 1.51,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": ""
+    }
+  ]
+}
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_espodossolo_12050.txt b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_espodossolo_12050.txt
new file mode 100644
index 000000000..67b89c086
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_espodossolo_12050.txt
@@ -0,0 +1,86 @@
+# Descricao do perfil
+
+Local: RJ, Quissamã.
+Identificacao: 12050.
+
+## Horizonte A1 (0 a 6 cm)
+
+Cor Munsell umida: 10YR 2/1.
+Argila 1 %.
+Silte 4 %.
+Areia 95 %.
+pH em agua: 3.8.
+Carbono organico: 2.90 %.
+
+## Horizonte A2 (6 a 12 cm)
+
+Cor Munsell umida: 10YR 2/2.
+Argila 1 %.
+Silte 3.7 %.
+Areia 95.3 %.
+pH em agua: 4.0.
+Carbono organico: 0.80 %.
+
+## Horizonte AE (12 a 20 cm)
+
+Cor Munsell umida: 10YR 4/1.
+Argila 1 %.
+Silte 3.8 %.
+Areia 95.2 %.
+pH em agua: 42.0.
+Carbono organico: 0.40 %.
+
+## Horizonte E (20 a 45 cm)
+
+Cor Munsell umida: 10YR 6/1.
+Argila 1 %.
+Silte 3.9 %.
+Areia 95.1 %.
+pH em agua: 4.3.
+Carbono organico: 0.24 %.
+
+## Horizonte Bh (45 a 78 cm)
+
+Cor Munsell umida: 10YR 2/1.
+Argila 1 %.
+Silte 3.1 %.
+Areia 95.9 %.
+pH em agua: 3.7.
+Carbono organico: 1.92 %.
+
+## Horizonte Bh/Bhm (78 a 109 cm)
+
+Cor Munsell umida: 10YR 2/1.
+Argila 1 %.
+Silte 3.5 %.
+Areia 95.5 %.
+pH em agua: 4.0.
+Carbono organico: 1.48 %.
+
+## Horizonte BhE1 (109 a 140 cm)
+
+Cor Munsell umida: 5YR 3/1.
+Argila 1 %.
+Silte 2.9 %.
+Areia 96.1 %.
+pH em agua: 4.2.
+Carbono organico: 3.78 %.
+
+## Horizonte Bhm (140 a 152 cm)
+
+Cor Munsell umida: 2,5YR 2.5/0.
+Argila 1 %.
+Silte 2.5 %.
+Areia 96.5 %.
+pH em agua: 4.5.
+Carbono organico: 3.45 %.
+
+## Horizonte BhE2 (152 a 170 cm)
+
+Cor Munsell umida: 5YR 2.5/1.
+Argila 1 %.
+Silte 4.3 %.
+Areia 94.7 %.
+pH em agua: 4.8.
+Carbono organico: 1.14 %.
+
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_gleissolo_5343.golden.json b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_gleissolo_5343.golden.json
new file mode 100644
index 000000000..9b4dfe6b4
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_gleissolo_5343.golden.json
@@ -0,0 +1,235 @@
+{
+  "horizons": [
+    {
+      "top_cm": 0,
+      "bottom_cm": 20,
+      "designation": "Azn1",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 3.5,
+      "munsell_chroma_moist": 1,
+      "munsell_hue_dry": "",
+      "structure_grade": "Fraca",
+      "structure_size": "Muito pequena a pequena",
+      "structure_type": "Granular",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "clay_pct": 8,
+      "silt_pct": 22,
+      "sand_pct": 70,
+      "ph_h2o": 7.3,
+      "ph_kcl": 7.6,
+      "oc_pct": 0.41,
+      "n_total_pct": 0.7,
+      "cec_cmol": 4.1,
+      "bs_pct": 100,
+      "al_sat_pct": 0,
+      "ca_cmol": 2.5,
+      "mg_cmol": 1.3,
+      "k_cmol": 0.03,
+      "na_cmol": 0.29,
+      "al_cmol": 0,
+      "caco3_pct": 0,
+      "fe_dcb_pct": 7,
+      "p_mehlich3_mg_kg": 2,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 1,
+      "bottom_cm": 0,
+      "designation": "Crosta",
+      "munsell_hue_moist": "",
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "",
+      "consistence_moist": "",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "clay_pct": 2,
+      "silt_pct": 2,
+      "sand_pct": 96,
+      "ph_h2o": 8,
+      "ph_kcl": 7.6,
+      "oc_pct": 2.78,
+      "n_total_pct": 2.7,
+      "cec_cmol": 34.6,
+      "bs_pct": 100,
+      "al_sat_pct": 0,
+      "ca_cmol": 31.2,
+      "mg_cmol": 2.9,
+      "k_cmol": 0.03,
+      "na_cmol": 0.42,
+      "al_cmol": 0,
+      "caco3_pct": 250,
+      "p_mehlich3_mg_kg": 12,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 20,
+      "bottom_cm": 28,
+      "designation": "2Azn2",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 3,
+      "munsell_chroma_moist": 1,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Maciça não coerente",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 10,
+      "clay_pct": 16,
+      "silt_pct": 13,
+      "sand_pct": 71,
+      "ph_h2o": 7.6,
+      "ph_kcl": 7.4,
+      "oc_pct": 0.3,
+      "n_total_pct": 0.6,
+      "cec_cmol": 6.8,
+      "bs_pct": 100,
+      "al_sat_pct": 0,
+      "ca_cmol": 2.8,
+      "mg_cmol": 2.7,
+      "k_cmol": 0.04,
+      "na_cmol": 1.24,
+      "al_cmol": 0,
+      "caco3_pct": 0,
+      "fe_dcb_pct": 15,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 28,
+      "bottom_cm": 46,
+      "designation": "3Cgzn1",
+      "munsell_hue_moist": "",
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "",
+      "consistence_moist": "",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 20,
+      "clay_pct": 28,
+      "silt_pct": 13,
+      "sand_pct": 59,
+      "ph_h2o": 7.5,
+      "ph_kcl": 7.3,
+      "oc_pct": 0.18,
+      "n_total_pct": 0.4,
+      "cec_cmol": 13,
+      "bs_pct": 100,
+      "al_sat_pct": 0,
+      "ca_cmol": 4.9,
+      "mg_cmol": 5.7,
+      "k_cmol": 0.05,
+      "na_cmol": 2.35,
+      "al_cmol": 0,
+      "caco3_pct": 0,
+      "fe_dcb_pct": 24,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 46,
+      "bottom_cm": 70,
+      "designation": "4Cgzn2",
+      "munsell_hue_moist": "",
+      "munsell_chroma_moist": 4.5,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "",
+      "consistence_moist": "",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 20,
+      "clay_pct": 43,
+      "silt_pct": 11,
+      "sand_pct": 46,
+      "ph_h2o": 7.5,
+      "ph_kcl": 7.2,
+      "oc_pct": 0.15,
+      "n_total_pct": 0.4,
+      "cec_cmol": 16.5,
+      "bs_pct": 100,
+      "al_sat_pct": 0,
+      "ca_cmol": 6.6,
+      "mg_cmol": 8.8,
+      "k_cmol": 0.07,
+      "na_cmol": 1.03,
+      "al_cmol": 0,
+      "caco3_pct": 0,
+      "fe_dcb_pct": 35,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 70,
+      "bottom_cm": 100,
+      "designation": "5Cgzn3",
+      "munsell_hue_moist": "",
+      "munsell_chroma_moist": 5.5,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "",
+      "consistence_moist": "",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 10,
+      "clay_pct": 36,
+      "silt_pct": 15,
+      "sand_pct": 49,
+      "ph_h2o": 7.9,
+      "ph_kcl": 7.2,
+      "oc_pct": 0.09,
+      "n_total_pct": 0.4,
+      "cec_cmol": 15.4,
+      "bs_pct": 100,
+      "al_sat_pct": 0,
+      "ca_cmol": 6.3,
+      "mg_cmol": 7.5,
+      "k_cmol": 0.09,
+      "na_cmol": 1.54,
+      "al_cmol": 0,
+      "caco3_pct": 0,
+      "fe_dcb_pct": 33,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 100,
+      "bottom_cm": 175,
+      "designation": "6Cgzn4",
+      "munsell_hue_moist": "",
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "",
+      "consistence_moist": "",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 10,
+      "clay_pct": 29,
+      "silt_pct": 12,
+      "sand_pct": 59,
+      "ph_h2o": 7.9,
+      "ph_kcl": 7.2,
+      "oc_pct": 0.04,
+      "n_total_pct": 0.3,
+      "cec_cmol": 10.6,
+      "bs_pct": 100,
+      "al_sat_pct": 0,
+      "ca_cmol": 3.8,
+      "mg_cmol": 5.5,
+      "k_cmol": 0.07,
+      "na_cmol": 1.21,
+      "al_cmol": 0,
+      "caco3_pct": 0,
+      "fe_dcb_pct": 32,
+      "consistence_dry": ""
+    }
+  ]
+}
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_gleissolo_5343.txt b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_gleissolo_5343.txt
new file mode 100644
index 000000000..475251970
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_gleissolo_5343.txt
@@ -0,0 +1,68 @@
+# Descricao do perfil
+
+Local: RJ, São Pedro da Aldeia.
+Identificacao: 5343.
+
+## Horizonte Azn1 (0 a 20 cm)
+
+Cor Munsell umida: 10YR 3.5/1.
+Argila 8 %.
+Silte 22 %.
+Areia 70 %.
+pH em agua: 7.3.
+Carbono organico: 0.41 %.
+
+## Horizonte Crosta (1 a 0 cm)
+
+Cor Munsell umida:  NA/NA.
+Argila 2 %.
+Silte 2 %.
+Areia 96 %.
+pH em agua: 8.0.
+Carbono organico: 2.78 %.
+
+## Horizonte 2Azn2 (20 a 28 cm)
+
+Cor Munsell umida: 10YR 3/1.
+Argila 16 %.
+Silte 13 %.
+Areia 71 %.
+pH em agua: 7.6.
+Carbono organico: 0.30 %.
+
+## Horizonte 3Cgzn1 (28 a 46 cm)
+
+Cor Munsell umida:  NA/NA.
+Argila 28 %.
+Silte 13 %.
+Areia 59 %.
+pH em agua: 7.5.
+Carbono organico: 0.18 %.
+
+## Horizonte 4Cgzn2 (46 a 70 cm)
+
+Cor Munsell umida:  NA/4.5.
+Argila 43 %.
+Silte 11 %.
+Areia 46 %.
+pH em agua: 7.5.
+Carbono organico: 0.15 %.
+
+## Horizonte 5Cgzn3 (70 a 100 cm)
+
+Cor Munsell umida:  NA/5.5.
+Argila 36 %.
+Silte 15 %.
+Areia 49 %.
+pH em agua: 7.9.
+Carbono organico: 0.09 %.
+
+## Horizonte 6Cgzn4 (100 a 175 cm)
+
+Cor Munsell umida:  NA/NA.
+Argila 29 %.
+Silte 12 %.
+Areia 59 %.
+pH em agua: 7.9.
+Carbono organico: 0.04 %.
+
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_latossolo_11969.golden.json b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_latossolo_11969.golden.json
new file mode 100644
index 000000000..aa71c2181
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_latossolo_11969.golden.json
@@ -0,0 +1,297 @@
+{
+  "horizons": [
+    {
+      "top_cm": 0,
+      "bottom_cm": 17,
+      "designation": "Ap",
+      "munsell_hue_moist": "2,5YR",
+      "munsell_value_moist": 3,
+      "munsell_chroma_moist": 2,
+      "munsell_hue_dry": "2,5YR",
+      "munsell_value_dry": 3.5,
+      "munsell_chroma_dry": 4,
+      "structure_grade": "Forte",
+      "structure_size": "Muito pequena",
+      "structure_type": "Granular",
+      "consistence_moist": "Friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 10,
+      "clay_pct": 38,
+      "silt_pct": 13,
+      "sand_pct": 49,
+      "ph_h2o": 5.4,
+      "ph_kcl": 4.5,
+      "oc_pct": 1.52,
+      "n_total_pct": 1.6,
+      "cec_cmol": 6.6,
+      "bs_pct": 38,
+      "al_sat_pct": 17,
+      "ca_cmol": 1.3,
+      "mg_cmol": 1.1,
+      "k_cmol": 0.13,
+      "na_cmol": 0.02,
+      "al_cmol": 0.5,
+      "fe_dcb_pct": 107,
+      "bulk_density_g_cm3": 1.4,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": "Ligeiramente dura"
+    },
+    {
+      "top_cm": 17,
+      "bottom_cm": 35,
+      "designation": "A3",
+      "munsell_hue_moist": "2,5YR",
+      "munsell_value_moist": 3,
+      "munsell_chroma_moist": 4,
+      "munsell_hue_dry": "",
+      "structure_grade": "Moderada",
+      "structure_size": "Muito pequena a pequena",
+      "structure_type": "Blocos subangulares",
+      "consistence_moist": "Friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 10,
+      "clay_pct": 48,
+      "silt_pct": 13,
+      "sand_pct": 39,
+      "ph_h2o": 5.1,
+      "ph_kcl": 4.3,
+      "oc_pct": 0.98,
+      "n_total_pct": 1.2,
+      "cec_cmol": 5,
+      "bs_pct": 20,
+      "al_sat_pct": 47,
+      "k_cmol": 0.09,
+      "na_cmol": 0.04,
+      "al_cmol": 0.9,
+      "fe_dcb_pct": 124,
+      "bulk_density_g_cm3": 1.29,
+      "consistence_dry": "Ligeiramente dura"
+    },
+    {
+      "top_cm": 35,
+      "bottom_cm": 70,
+      "designation": "B1",
+      "munsell_hue_moist": "2,5YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 6,
+      "munsell_hue_dry": "",
+      "structure_grade": "Fraca",
+      "structure_size": "Pequena",
+      "structure_type": "Blocos subangulares",
+      "consistence_moist": "Friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 0,
+      "clay_pct": 55,
+      "silt_pct": 13,
+      "sand_pct": 32,
+      "ph_h2o": 5.2,
+      "ph_kcl": 4.4,
+      "oc_pct": 0.77,
+      "n_total_pct": 1,
+      "cec_cmol": 4.6,
+      "bs_pct": 22,
+      "al_sat_pct": 41,
+      "k_cmol": 0.05,
+      "na_cmol": 0.04,
+      "al_cmol": 0.7,
+      "fe_dcb_pct": 134,
+      "bulk_density_g_cm3": 1.17,
+      "consistence_dry": "Macia"
+    },
+    {
+      "top_cm": 70,
+      "bottom_cm": 100,
+      "designation": "B21",
+      "munsell_hue_moist": "3,5YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 6,
+      "munsell_hue_dry": "",
+      "structure_grade": "Fraca",
+      "structure_size": "Pequena",
+      "structure_type": "Blocos subangulares",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 20,
+      "clay_pct": 57,
+      "silt_pct": 11,
+      "sand_pct": 32,
+      "ph_h2o": 5,
+      "ph_kcl": 4.5,
+      "oc_pct": 0.45,
+      "n_total_pct": 0.7,
+      "cec_cmol": 4,
+      "bs_pct": 15,
+      "al_sat_pct": 45,
+      "k_cmol": 0.04,
+      "na_cmol": 0.06,
+      "al_cmol": 0.5,
+      "fe_dcb_pct": 137,
+      "bulk_density_g_cm3": 1.2,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": "Macia"
+    },
+    {
+      "top_cm": 100,
+      "bottom_cm": 145,
+      "designation": "B22",
+      "munsell_hue_moist": "4YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 6,
+      "munsell_hue_dry": "",
+      "structure_grade": "Forte",
+      "structure_size": "Ultrapequena",
+      "structure_type": "Granular",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 30,
+      "clay_pct": 53,
+      "silt_pct": 16,
+      "sand_pct": 31,
+      "ph_h2o": 5.4,
+      "ph_kcl": 5.1,
+      "oc_pct": 0.37,
+      "n_total_pct": 0.6,
+      "cec_cmol": 2.7,
+      "bs_pct": 19,
+      "al_sat_pct": 29,
+      "k_cmol": 0.03,
+      "na_cmol": 0.06,
+      "al_cmol": 0.2,
+      "fe_dcb_pct": 134,
+      "bulk_density_g_cm3": 1.15,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": "Macia"
+    },
+    {
+      "top_cm": 145,
+      "bottom_cm": 175,
+      "designation": "B23",
+      "munsell_hue_moist": "4YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 6,
+      "munsell_hue_dry": "",
+      "structure_grade": "Forte",
+      "structure_size": "Ultrapequena",
+      "structure_type": "Granular",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 30,
+      "clay_pct": 52,
+      "silt_pct": 14,
+      "sand_pct": 34,
+      "ph_h2o": 5.5,
+      "ph_kcl": 5.3,
+      "oc_pct": 0.31,
+      "n_total_pct": 0.5,
+      "cec_cmol": 1.8,
+      "bs_pct": 17,
+      "al_sat_pct": 25,
+      "k_cmol": 0.01,
+      "na_cmol": 0.03,
+      "al_cmol": 0.1,
+      "fe_dcb_pct": 141,
+      "bulk_density_g_cm3": 1.32,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": "Macia"
+    },
+    {
+      "top_cm": 175,
+      "bottom_cm": 250,
+      "designation": "B24",
+      "munsell_hue_moist": "4YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 7,
+      "munsell_hue_dry": "",
+      "structure_grade": "Forte",
+      "structure_size": "Ultrapequena",
+      "structure_type": "Granular",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 40,
+      "clay_pct": 52,
+      "silt_pct": 14,
+      "sand_pct": 34,
+      "ph_h2o": 5.8,
+      "ph_kcl": 5.7,
+      "oc_pct": 0.16,
+      "n_total_pct": 0.5,
+      "cec_cmol": 1.4,
+      "bs_pct": 21,
+      "al_sat_pct": 0,
+      "k_cmol": 0.01,
+      "na_cmol": 0.03,
+      "al_cmol": 0,
+      "fe_dcb_pct": 137,
+      "consistence_dry": "Macia"
+    },
+    {
+      "top_cm": 250,
+      "bottom_cm": 290,
+      "designation": "B3",
+      "munsell_hue_moist": "4YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 7,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "",
+      "consistence_moist": "",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 120,
+      "clay_pct": 34,
+      "silt_pct": 19,
+      "sand_pct": 47,
+      "ph_h2o": 5.8,
+      "ph_kcl": 6,
+      "oc_pct": 0.14,
+      "n_total_pct": 0.4,
+      "cec_cmol": 0.7,
+      "bs_pct": 43,
+      "al_sat_pct": 0,
+      "k_cmol": 0.01,
+      "na_cmol": 0.03,
+      "al_cmol": 0,
+      "fe_dcb_pct": 158,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": ""
+    },
+    {
+      "top_cm": 290,
+      "bottom_cm": 320,
+      "designation": "C",
+      "munsell_hue_moist": "",
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "",
+      "consistence_moist": "",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 170,
+      "clay_pct": 23,
+      "silt_pct": 20,
+      "sand_pct": 57,
+      "ph_h2o": 5.5,
+      "ph_kcl": 5.4,
+      "oc_pct": 0.11,
+      "n_total_pct": 0.4,
+      "cec_cmol": 1.7,
+      "bs_pct": 47,
+      "al_sat_pct": 0,
+      "k_cmol": 0.05,
+      "na_cmol": 0.14,
+      "al_cmol": 0,
+      "fe_dcb_pct": 142,
+      "p_mehlich3_mg_kg": 3,
+      "consistence_dry": ""
+    }
+  ]
+}
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_latossolo_11969.txt b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_latossolo_11969.txt
new file mode 100644
index 000000000..2868be098
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_latossolo_11969.txt
@@ -0,0 +1,86 @@
+# Descricao do perfil
+
+Local: RJ, Itaperuna.
+Identificacao: 11969.
+
+## Horizonte Ap (0 a 17 cm)
+
+Cor Munsell umida: 2,5YR 3/2.
+Argila 38 %.
+Silte 13 %.
+Areia 49 %.
+pH em agua: 5.4.
+Carbono organico: 1.52 %.
+
+## Horizonte A3 (17 a 35 cm)
+
+Cor Munsell umida: 2,5YR 3/4.
+Argila 48 %.
+Silte 13 %.
+Areia 39 %.
+pH em agua: 5.1.
+Carbono organico: 0.98 %.
+
+## Horizonte B1 (35 a 70 cm)
+
+Cor Munsell umida: 2,5YR 4/6.
+Argila 55 %.
+Silte 13 %.
+Areia 32 %.
+pH em agua: 5.2.
+Carbono organico: 0.77 %.
+
+## Horizonte B21 (70 a 100 cm)
+
+Cor Munsell umida: 3,5YR 4/6.
+Argila 57 %.
+Silte 11 %.
+Areia 32 %.
+pH em agua: 5.0.
+Carbono organico: 0.45 %.
+
+## Horizonte B22 (100 a 145 cm)
+
+Cor Munsell umida: 4YR 4/6.
+Argila 53 %.
+Silte 16 %.
+Areia 31 %.
+pH em agua: 5.4.
+Carbono organico: 0.37 %.
+
+## Horizonte B23 (145 a 175 cm)
+
+Cor Munsell umida: 4YR 4/6.
+Argila 52 %.
+Silte 14 %.
+Areia 34 %.
+pH em agua: 5.5.
+Carbono organico: 0.31 %.
+
+## Horizonte B24 (175 a 250 cm)
+
+Cor Munsell umida: 4YR 4/7.
+Argila 52 %.
+Silte 14 %.
+Areia 34 %.
+pH em agua: 5.8.
+Carbono organico: 0.16 %.
+
+## Horizonte B3 (250 a 290 cm)
+
+Cor Munsell umida: 4YR 4/7.
+Argila 34 %.
+Silte 19 %.
+Areia 47 %.
+pH em agua: 5.8.
+Carbono organico: 0.14 %.
+
+## Horizonte C (290 a 320 cm)
+
+Cor Munsell umida:  NA/NA.
+Argila 23 %.
+Silte 20 %.
+Areia 57 %.
+pH em agua: 5.5.
+Carbono organico: 0.11 %.
+
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_neossolo_5340.golden.json b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_neossolo_5340.golden.json
new file mode 100644
index 000000000..a5395981e
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_neossolo_5340.golden.json
@@ -0,0 +1,148 @@
+{
+  "horizons": [
+    {
+      "top_cm": 0,
+      "bottom_cm": 11,
+      "designation": "A",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 1,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Grãos simples",
+      "consistence_moist": "Solta",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 0,
+      "clay_pct": 2,
+      "silt_pct": 7.3,
+      "sand_pct": 90.7,
+      "ph_h2o": 5.7,
+      "ph_kcl": 4.3,
+      "oc_pct": 0.25,
+      "n_total_pct": 0.4,
+      "cec_cmol": 1.6,
+      "bs_pct": 50,
+      "al_sat_pct": 0,
+      "k_cmol": 0.02,
+      "na_cmol": 0.03,
+      "al_cmol": 0,
+      "bulk_density_g_cm3": 1.5,
+      "p_mehlich3_mg_kg": 3,
+      "consistence_dry": "Solta"
+    },
+    {
+      "top_cm": 11,
+      "bottom_cm": 58,
+      "designation": "C1",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 2,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Grãos simples",
+      "consistence_moist": "Solta",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 0,
+      "clay_pct": 2,
+      "silt_pct": 7.4,
+      "sand_pct": 90.6,
+      "ph_h2o": 6,
+      "ph_kcl": 4.8,
+      "oc_pct": 0.08,
+      "n_total_pct": 0.3,
+      "cec_cmol": 0.5,
+      "bs_pct": 40,
+      "al_sat_pct": 0,
+      "k_cmol": 0.01,
+      "na_cmol": 0.02,
+      "al_cmol": 0,
+      "bulk_density_g_cm3": 1.54,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": "Solta"
+    },
+    {
+      "top_cm": 58,
+      "bottom_cm": 115,
+      "designation": "C2",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 5,
+      "munsell_chroma_moist": 2,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Grãos simples",
+      "consistence_moist": "Solta",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 0,
+      "clay_pct": 2,
+      "silt_pct": 7.4,
+      "sand_pct": 90.6,
+      "ph_h2o": 6.1,
+      "ph_kcl": 4.8,
+      "oc_pct": 0.04,
+      "n_total_pct": 0.2,
+      "cec_cmol": 0.5,
+      "bs_pct": 40,
+      "al_sat_pct": 0,
+      "k_cmol": 0.01,
+      "na_cmol": 0.03,
+      "al_cmol": 0,
+      "bulk_density_g_cm3": 1.4,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": "Solta"
+    },
+    {
+      "top_cm": 115,
+      "bottom_cm": 150,
+      "designation": "C3",
+      "munsell_hue_moist": "7.5YR",
+      "munsell_value_moist": 6,
+      "munsell_chroma_moist": 2,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Grãos simples",
+      "consistence_moist": "Solta",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 0,
+      "clay_pct": 2,
+      "silt_pct": 7.4,
+      "sand_pct": 90.6,
+      "ph_h2o": 6.3,
+      "ph_kcl": 4.8,
+      "oc_pct": 0.06,
+      "n_total_pct": 0.3,
+      "cec_cmol": 0.5,
+      "bs_pct": 40,
+      "al_sat_pct": 0,
+      "k_cmol": 0.01,
+      "na_cmol": 0.02,
+      "al_cmol": 0,
+      "bulk_density_g_cm3": 1.44,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": "Solta"
+    },
+    {
+      "top_cm": 150,
+      "bottom_cm": 270,
+      "designation": "C4",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 6,
+      "munsell_chroma_moist": 2,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "",
+      "consistence_moist": "",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "consistence_dry": ""
+    }
+  ]
+}
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_neossolo_5340.txt b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_neossolo_5340.txt
new file mode 100644
index 000000000..3cc49cd45
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_neossolo_5340.txt
@@ -0,0 +1,45 @@
+# Descricao do perfil
+
+Local: RJ, Arraial do Cabo.
+Identificacao: 5340.
+
+## Horizonte A (0 a 11 cm)
+
+Cor Munsell umida: 10YR 4/1.
+Argila 2 %.
+Silte 7.3 %.
+Areia 90.7 %.
+pH em agua: 5.7.
+Carbono organico: 0.25 %.
+
+## Horizonte C1 (11 a 58 cm)
+
+Cor Munsell umida: 10YR 4/2.
+Argila 2 %.
+Silte 7.4 %.
+Areia 90.6 %.
+pH em agua: 6.0.
+Carbono organico: 0.08 %.
+
+## Horizonte C2 (58 a 115 cm)
+
+Cor Munsell umida: 10YR 5/2.
+Argila 2 %.
+Silte 7.4 %.
+Areia 90.6 %.
+pH em agua: 6.1.
+Carbono organico: 0.04 %.
+
+## Horizonte C3 (115 a 150 cm)
+
+Cor Munsell umida: 7.5YR 6/2.
+Argila 2 %.
+Silte 7.4 %.
+Areia 90.6 %.
+pH em agua: 6.3.
+Carbono organico: 0.06 %.
+
+## Horizonte C4 (150 a 270 cm)
+
+Cor Munsell umida: 10YR 6/2.
+
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_planossolo_12688.golden.json b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_planossolo_12688.golden.json
new file mode 100644
index 000000000..74da3c84c
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_planossolo_12688.golden.json
@@ -0,0 +1,243 @@
+{
+  "horizons": [
+    {
+      "top_cm": 0,
+      "bottom_cm": 5,
+      "designation": "A1",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 2,
+      "munsell_hue_dry": "10YR",
+      "munsell_value_dry": 6,
+      "munsell_chroma_dry": 2,
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Granular",
+      "consistence_moist": "Solta",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 23,
+      "clay_pct": 2.5,
+      "silt_pct": 10.7,
+      "sand_pct": 86.8,
+      "ph_h2o": 5.1,
+      "ph_kcl": 4.1,
+      "oc_pct": 0.44,
+      "n_total_pct": 0.4,
+      "cec_cmol": 3.3,
+      "bs_pct": 39,
+      "al_sat_pct": 57,
+      "k_cmol": 0.08,
+      "na_cmol": 0.27,
+      "al_cmol": 1.7,
+      "fe_dcb_pct": 3,
+      "bulk_density_g_cm3": 1.39,
+      "p_mehlich3_mg_kg": 6,
+      "consistence_dry": "Macia"
+    },
+    {
+      "top_cm": 5,
+      "bottom_cm": 25,
+      "designation": "A2",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 2,
+      "munsell_hue_dry": "10YR",
+      "munsell_value_dry": 6,
+      "munsell_chroma_dry": 2,
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Maciça",
+      "consistence_moist": "Solta",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 28,
+      "clay_pct": 3.5,
+      "silt_pct": 12.2,
+      "sand_pct": 84.3,
+      "ph_h2o": 4.8,
+      "ph_kcl": 3.9,
+      "oc_pct": 0.33,
+      "n_total_pct": 0.5,
+      "cec_cmol": 3.5,
+      "bs_pct": 23,
+      "al_sat_pct": 72,
+      "k_cmol": 0.12,
+      "na_cmol": 0.18,
+      "al_cmol": 2.1,
+      "fe_dcb_pct": 3,
+      "p_mehlich3_mg_kg": 6,
+      "consistence_dry": "Macia"
+    },
+    {
+      "top_cm": 25,
+      "bottom_cm": 65,
+      "designation": "E1",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 4,
+      "munsell_hue_dry": "10YR",
+      "munsell_value_dry": 7,
+      "munsell_chroma_dry": 2,
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Maciça",
+      "consistence_moist": "Solta",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 85,
+      "clay_pct": 6.3,
+      "silt_pct": 8.7,
+      "sand_pct": 85,
+      "ph_h2o": 4.9,
+      "ph_kcl": 4,
+      "oc_pct": 0.13,
+      "n_total_pct": 0.2,
+      "cec_cmol": 2.8,
+      "bs_pct": 25,
+      "al_sat_pct": 65,
+      "k_cmol": 0.09,
+      "na_cmol": 0.1,
+      "al_cmol": 1.3,
+      "fe_dcb_pct": 6,
+      "bulk_density_g_cm3": 1.48,
+      "p_mehlich3_mg_kg": 4,
+      "consistence_dry": "Macia"
+    },
+    {
+      "top_cm": 65,
+      "bottom_cm": 75,
+      "designation": "E2",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 6,
+      "munsell_chroma_moist": 4,
+      "munsell_hue_dry": "10YR",
+      "munsell_value_dry": 7,
+      "munsell_chroma_dry": 3,
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Maciça",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 12,
+      "clay_pct": 4.2,
+      "silt_pct": 10.6,
+      "sand_pct": 85.2,
+      "ph_h2o": 4.9,
+      "ph_kcl": 4.1,
+      "oc_pct": 0.08,
+      "n_total_pct": 0.1,
+      "cec_cmol": 2.2,
+      "bs_pct": 23,
+      "al_sat_pct": 67,
+      "k_cmol": 0.04,
+      "na_cmol": 0.05,
+      "al_cmol": 1,
+      "fe_dcb_pct": 6,
+      "bulk_density_g_cm3": 1.65,
+      "p_mehlich3_mg_kg": 3,
+      "consistence_dry": "Macia"
+    },
+    {
+      "top_cm": 75,
+      "bottom_cm": 95,
+      "designation": "BA",
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 5,
+      "munsell_chroma_moist": 2,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Maciça",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 166,
+      "clay_pct": 14.6,
+      "silt_pct": 10.1,
+      "sand_pct": 75.3,
+      "ph_h2o": 4.6,
+      "ph_kcl": 3.8,
+      "oc_pct": 0.14,
+      "n_total_pct": 0.3,
+      "cec_cmol": 3.8,
+      "bs_pct": 13,
+      "al_sat_pct": 69,
+      "k_cmol": 0.06,
+      "na_cmol": 0.05,
+      "al_cmol": 1.1,
+      "fe_dcb_pct": 9,
+      "bulk_density_g_cm3": 1.7,
+      "p_mehlich3_mg_kg": 3,
+      "consistence_dry": "Ligeiramente dura"
+    },
+    {
+      "top_cm": 95,
+      "bottom_cm": 160,
+      "designation": "2Btg",
+      "munsell_hue_moist": "N",
+      "munsell_value_moist": 6,
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Maciça",
+      "consistence_moist": "Friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 74,
+      "clay_pct": 55.3,
+      "silt_pct": 7.4,
+      "sand_pct": 37.3,
+      "ph_h2o": 4.9,
+      "ph_kcl": 3.5,
+      "oc_pct": 0.24,
+      "n_total_pct": 0.4,
+      "cec_cmol": 11.4,
+      "bs_pct": 13,
+      "al_sat_pct": 61,
+      "ca_cmol": 0.3,
+      "mg_cmol": 1,
+      "k_cmol": 0.04,
+      "na_cmol": 0.2,
+      "al_cmol": 2.3,
+      "fe_dcb_pct": 47,
+      "bulk_density_g_cm3": 1.56,
+      "p_mehlich3_mg_kg": 2,
+      "consistence_dry": "Dura"
+    },
+    {
+      "top_cm": 160,
+      "bottom_cm": 180,
+      "designation": "2Cg",
+      "munsell_hue_moist": "",
+      "munsell_hue_dry": "",
+      "structure_grade": "",
+      "structure_size": "",
+      "structure_type": "Maciça",
+      "consistence_moist": "Muito friável",
+      "clay_films_amount": "",
+      "clay_films_strength": "",
+      "coarse_fragments_pct": 8,
+      "clay_pct": 53.1,
+      "silt_pct": 13.1,
+      "sand_pct": 33.8,
+      "ph_h2o": 5.1,
+      "ph_kcl": 3.4,
+      "oc_pct": 0.15,
+      "n_total_pct": 0.4,
+      "cec_cmol": 16,
+      "bs_pct": 18,
+      "al_sat_pct": 30,
+      "ca_cmol": 0.6,
+      "mg_cmol": 1.6,
+      "k_cmol": 0.04,
+      "na_cmol": 0.52,
+      "al_cmol": 1.2,
+      "fe_dcb_pct": 66,
+      "bulk_density_g_cm3": 1.45,
+      "p_mehlich3_mg_kg": 1,
+      "consistence_dry": "Dura"
+    }
+  ]
+}
diff --git a/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_planossolo_12688.txt b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_planossolo_12688.txt
new file mode 100644
index 000000000..4a718b4f7
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/bdsolos_RJ_planossolo_12688.txt
@@ -0,0 +1,68 @@
+# Descricao do perfil
+
+Local: RJ, Rio de Janeiro.
+Identificacao: 12688.
+
+## Horizonte A1 (0 a 5 cm)
+
+Cor Munsell umida: 10YR 4/2.
+Argila 2.5 %.
+Silte 10.7 %.
+Areia 86.8 %.
+pH em agua: 5.1.
+Carbono organico: 0.44 %.
+
+## Horizonte A2 (5 a 25 cm)
+
+Cor Munsell umida: 10YR 4/2.
+Argila 3.5 %.
+Silte 12.2 %.
+Areia 84.3 %.
+pH em agua: 4.8.
+Carbono organico: 0.33 %.
+
+## Horizonte E1 (25 a 65 cm)
+
+Cor Munsell umida: 10YR 4/4.
+Argila 6.3 %.
+Silte 8.7 %.
+Areia 85 %.
+pH em agua: 4.9.
+Carbono organico: 0.13 %.
+
+## Horizonte E2 (65 a 75 cm)
+
+Cor Munsell umida: 10YR 6/4.
+Argila 4.2 %.
+Silte 10.6 %.
+Areia 85.2 %.
+pH em agua: 4.9.
+Carbono organico: 0.08 %.
+
+## Horizonte BA (75 a 95 cm)
+
+Cor Munsell umida: 10YR 5/2.
+Argila 14.6 %.
+Silte 10.1 %.
+Areia 75.3 %.
+pH em agua: 4.6.
+Carbono organico: 0.14 %.
+
+## Horizonte 2Btg (95 a 160 cm)
+
+Cor Munsell umida: N 6/NA.
+Argila 55.3 %.
+Silte 7.4 %.
+Areia 37.3 %.
+pH em agua: 4.9.
+Carbono organico: 0.24 %.
+
+## Horizonte 2Cg (160 a 180 cm)
+
+Cor Munsell umida:  NA/NA.
+Argila 53.1 %.
+Silte 13.1 %.
+Areia 33.8 %.
+pH em agua: 5.1.
+Carbono organico: 0.15 %.
+
diff --git a/inst/fixtures/vlm_extraction/horizons/perfil_BA_chernossolo_messy.golden.json b/inst/fixtures/vlm_extraction/horizons/perfil_BA_chernossolo_messy.golden.json
new file mode 100644
index 000000000..98ccea638
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/perfil_BA_chernossolo_messy.golden.json
@@ -0,0 +1,59 @@
+{
+  "horizons": [
+    {
+      "designation": "A1",
+      "top_cm": 0,
+      "bottom_cm": 20,
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 3,
+      "munsell_chroma_moist": 2,
+      "munsell_hue_dry": "10YR",
+      "munsell_value_dry": 4,
+      "munsell_chroma_dry": 2,
+      "clay_pct": 45,
+      "silt_pct": 38,
+      "sand_pct": 17,
+      "ph_h2o": 6.8,
+      "ph_kcl": 6.2,
+      "cec_cmol": 22,
+      "bs_pct": 78
+    },
+    {
+      "designation": "A3",
+      "top_cm": 20,
+      "bottom_cm": 45,
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 3,
+      "clay_pct": 50,
+      "ph_h2o": 6.9,
+      "cec_cmol": 24,
+      "bs_pct": 80
+    },
+    {
+      "designation": "Bt",
+      "top_cm": 45,
+      "bottom_cm": 95,
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 4,
+      "munsell_hue_dry": "10YR",
+      "munsell_value_dry": 5,
+      "munsell_chroma_dry": 4,
+      "clay_pct": 60,
+      "ph_h2o": 7.1,
+      "cec_cmol": 26,
+      "bs_pct": 92,
+      "caco3_pct": 4
+    },
+    {
+      "designation": "Bk",
+      "top_cm": 95,
+      "bottom_cm": 150,
+      "munsell_hue_moist": "10YR",
+      "munsell_value_moist": 6,
+      "munsell_chroma_moist": 3,
+      "caco3_pct": 30
+    }
+  ]
+}
diff --git a/inst/fixtures/vlm_extraction/horizons/perfil_BA_chernossolo_messy.txt b/inst/fixtures/vlm_extraction/horizons/perfil_BA_chernossolo_messy.txt
new file mode 100644
index 000000000..27e0de68c
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/perfil_BA_chernossolo_messy.txt
@@ -0,0 +1,30 @@
+Boletim de Levantamento de Reconhecimento, Bahia. Perfil Embrapa
+n. 184 (Chernossolo Argiluvico Carbonatico tipico).
+
+Localizacao: Sertao da Ressaca, BA. Coordenadas em UTM zona 23S
+mas sem datum explicito; convertido aproximadamente para
+-13.74, -41.20 (WGS84).
+
+Altitude 580 m. Declividade da paisagem: ~12 %. Vegetacao primaria:
+caatinga arborea. Material de origem: rochas calcarias do Grupo
+Salitre (Bambui).
+
+Descricao morfologica:
+
+Horizonte A1, profundidade de 0 ate 20 cm da superficie. Cor 10YR
+3/2 quando umida e 10YR 4/2 seca. Estrutura moderada granular fina.
+Textura argilo-siltosa: ~45 % de argila, 38 % silte, 17 % areia.
+Consistencia friavel umida, dura quando seca. pH em agua = 6,8 (com
+virgula decimal, padrao brasileiro). pH KCl 6,2. CTC 22 cmol_c/kg,
+saturacao por bases 78 %.
+
+A3 (20-45 cm). Cor 10YR 4/3 umida. Granular media a moderada blocos
+subangulares. Argila ~50 %. pH agua 6,9. CTC 24, V = 80 %.
+
+Bt (45 a 95 cm). Cor 10YR 4/4 umida; 10YR 5/4 seca. Estrutura forte
+em blocos subangulares medios; cerosidade comum. Textura muito
+argilosa, ~60 % argila. pH 7,1. CTC 26 cmol_c/kg, saturacao por
+bases 92 %, saturacao por aluminio nula. CaCO3 equivalente 4 %.
+
+Bk (95-150+). Carbonatos visiveis. CaCO3 equivalente ~30 %. Cor
+10YR 6/3.
diff --git a/inst/fixtures/vlm_extraction/horizons/perfil_MG_latossolo.golden.json b/inst/fixtures/vlm_extraction/horizons/perfil_MG_latossolo.golden.json
new file mode 100644
index 000000000..aa7ba10b8
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/perfil_MG_latossolo.golden.json
@@ -0,0 +1,16 @@
+{
+  "horizons": [
+    { "designation": "A",   "top_cm": 0,   "bottom_cm": 12,
+      "munsell_hue_moist": "2.5YR", "munsell_value_moist": 3, "munsell_chroma_moist": 4,
+      "clay_pct": 55, "silt_pct": 15, "sand_pct": 30, "ph_h2o": 5.1, "oc_pct": 2.10 },
+    { "designation": "AB",  "top_cm": 12,  "bottom_cm": 35,
+      "munsell_hue_moist": "2.5YR", "munsell_value_moist": 3, "munsell_chroma_moist": 6,
+      "clay_pct": 58, "silt_pct": 13, "sand_pct": 29, "ph_h2o": 4.9, "oc_pct": 1.20 },
+    { "designation": "Bw1", "top_cm": 35,  "bottom_cm": 90,
+      "munsell_hue_moist": "2.5YR", "munsell_value_moist": 4, "munsell_chroma_moist": 6,
+      "clay_pct": 62, "silt_pct": 10, "sand_pct": 28, "ph_h2o": 4.7, "oc_pct": 0.50 },
+    { "designation": "Bw2", "top_cm": 90,  "bottom_cm": 200,
+      "munsell_hue_moist": "10R",   "munsell_value_moist": 4, "munsell_chroma_moist": 6,
+      "clay_pct": 65, "silt_pct": 8,  "sand_pct": 27, "ph_h2o": 4.7, "oc_pct": 0.30 }
+  ]
+}
diff --git a/inst/fixtures/vlm_extraction/horizons/perfil_MG_latossolo.txt b/inst/fixtures/vlm_extraction/horizons/perfil_MG_latossolo.txt
new file mode 100644
index 000000000..cec0635e4
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/perfil_MG_latossolo.txt
@@ -0,0 +1,19 @@
+# Perfil de Latossolo Vermelho
+
+Local: Lavras, MG. Perfil n. P-203.
+
+## A (0 a 12 cm)
+Cor Munsell umida: 2.5YR 3/4. Argila 55 %, silte 15 %, areia 30 %.
+pH em agua: 5.1. Carbono organico: 2.10 %.
+
+## AB (12 a 35 cm)
+Cor Munsell umida: 2.5YR 3/6. Argila 58 %, silte 13 %, areia 29 %.
+pH em agua: 4.9. Carbono organico: 1.20 %.
+
+## Bw1 (35 a 90 cm)
+Cor Munsell umida: 2.5YR 4/6. Argila 62 %, silte 10 %, areia 28 %.
+pH em agua: 4.7. Carbono organico: 0.50 %.
+
+## Bw2 (90 a 200 cm)
+Cor Munsell umida: 10R 4/6. Argila 65 %, silte 8 %, areia 27 %.
+pH em agua: 4.7. Carbono organico: 0.30 %.
diff --git a/inst/fixtures/vlm_extraction/horizons/perfil_RJ_argissolo.golden.json b/inst/fixtures/vlm_extraction/horizons/perfil_RJ_argissolo.golden.json
new file mode 100644
index 000000000..d4738b11c
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/perfil_RJ_argissolo.golden.json
@@ -0,0 +1,56 @@
+{
+  "horizons": [
+    {
+      "designation": "A",
+      "top_cm": 0,
+      "bottom_cm": 18,
+      "munsell_hue_moist": "7.5YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 3,
+      "clay_pct": 22,
+      "silt_pct": 30,
+      "sand_pct": 48,
+      "ph_h2o": 5.4,
+      "oc_pct": 1.20
+    },
+    {
+      "designation": "AB",
+      "top_cm": 18,
+      "bottom_cm": 38,
+      "munsell_hue_moist": "7.5YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 4,
+      "clay_pct": 28,
+      "silt_pct": 28,
+      "sand_pct": 44,
+      "ph_h2o": 5.2,
+      "oc_pct": 0.65
+    },
+    {
+      "designation": "Bt1",
+      "top_cm": 38,
+      "bottom_cm": 78,
+      "munsell_hue_moist": "5YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 6,
+      "clay_pct": 42,
+      "silt_pct": 22,
+      "sand_pct": 36,
+      "ph_h2o": 5.0,
+      "oc_pct": 0.30
+    },
+    {
+      "designation": "Bt2",
+      "top_cm": 78,
+      "bottom_cm": 140,
+      "munsell_hue_moist": "5YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 6,
+      "clay_pct": 45,
+      "silt_pct": 20,
+      "sand_pct": 35,
+      "ph_h2o": 5.0,
+      "oc_pct": 0.20
+    }
+  ]
+}
diff --git a/inst/fixtures/vlm_extraction/horizons/perfil_RJ_argissolo.txt b/inst/fixtures/vlm_extraction/horizons/perfil_RJ_argissolo.txt
new file mode 100644
index 000000000..8e2802c43
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/horizons/perfil_RJ_argissolo.txt
@@ -0,0 +1,27 @@
+# Descricao do perfil
+
+Local: Itaguai, RJ. Identificacao: PI-001 (Argissolo Vermelho-Amarelo).
+
+## Horizonte A (0 a 18 cm)
+
+Cor Munsell umida: 7.5YR 4/3.
+Argila 22 %, silte 30 %, areia 48 %.
+pH em agua: 5.4. Carbono organico: 1.20 %.
+
+## Horizonte AB (18 a 38 cm)
+
+Cor Munsell umida: 7.5YR 4/4.
+Argila 28 %, silte 28 %, areia 44 %.
+pH em agua: 5.2. Carbono organico: 0.65 %.
+
+## Horizonte Bt1 (38 a 78 cm)
+
+Cor Munsell umida: 5YR 4/6.
+Argila 42 %, silte 22 %, areia 36 %.
+pH em agua: 5.0. Carbono organico: 0.30 %.
+
+## Horizonte Bt2 (78 a 140 cm)
+
+Cor Munsell umida: 5YR 4/6.
+Argila 45 %, silte 20 %, areia 35 %.
+pH em agua: 5.0. Carbono organico: 0.20 %.
diff --git a/inst/fixtures/vlm_extraction/munsell/README.md b/inst/fixtures/vlm_extraction/munsell/README.md
new file mode 100644
index 000000000..902d13c4b
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/munsell/README.md
@@ -0,0 +1,64 @@
+# Munsell-from-photo fixtures
+
+Each fixture is a **pair**:
+
+```
+fixture_id.jpg          (or .png / .jpeg / .webp)
+fixture_id.golden.json
+```
+
+The image must be a real photo of a soil profile (or a single horizon)
+with at least one Munsell colour reference card visible — without the
+card, `pedologist_system_prompt()` instructs the agent to fall back to
+`confidence <= 0.5`, which would dominate the benchmark.
+
+Golden JSON schema (mirror of what `extract_munsell_from_photo()`
+produces):
+
+```json
+{
+  "horizons": [
+    {
+      "designation": "Bt1",
+      "top_cm": 30,
+      "bottom_cm": 80,
+      "munsell_hue_moist": "5YR",
+      "munsell_value_moist": 4,
+      "munsell_chroma_moist": 6
+    },
+    ...
+  ]
+}
+```
+
+## Where to get fixtures
+
+We do not ship Munsell photo fixtures bundled with the package: real
+profile photos are large, often subject to publication / institutional
+licences, and lose value when stripped of the original metadata. The
+two recommended sources for this benchmark:
+
+1. **Your own field photos** with a colour-checker card visible,
+   labelled by hand against the profile description.
+2. **Embrapa BDsolos image archive** — most BDsolos perfis (the same
+   ~9 000 we benchmark in `benchmark_bdsolos_sibcs()`) include scanned
+   photo plates. Pair each plate with the BDsolos surveyor-recorded
+   Munsell triplet as the golden answer.
+
+Drop the resulting `<id>.jpg` + `<id>.golden.json` files into this
+directory and re-run `benchmark_vlm_extraction(tasks = "munsell")`.
+
+## Metric
+
+`benchmark_vlm_extraction()` reports the **mean CIE Delta-E 2000**
+between predicted and golden colours over matched horizons. ΔE
+benchmarks (CIE 1976 perceptual scale):
+
+| ΔE 2000 | Interpretation               |
+|---------|------------------------------|
+| < 1     | Imperceptible to most viewers |
+| 1 – 2   | Perceptible only on close inspection |
+| 2 – 5   | Clearly perceptible          |
+| > 5     | Clearly different colours    |
+
+For Munsell, ΔE ~5 is roughly one chroma step at value 4.
diff --git a/inst/fixtures/vlm_extraction/site/ficha_MG_002.golden.json b/inst/fixtures/vlm_extraction/site/ficha_MG_002.golden.json
new file mode 100644
index 000000000..a5e176f90
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/site/ficha_MG_002.golden.json
@@ -0,0 +1,18 @@
+{
+  "site": {
+    "id": "P-203",
+    "date": "2024-09-03",
+    "observer": "A. Santos",
+    "country": "BR",
+    "state": "MG",
+    "municipality": "Lavras",
+    "lat": -21.2459,
+    "lon": -44.9941,
+    "elevation_m": 920,
+    "slope_pct": 8,
+    "drainage_class": "bem drenado",
+    "parent_material": "gnaisse-granitico do Embasamento Cristalino",
+    "vegetation": "Cerrado stricto sensu",
+    "land_use": "cafeicultura"
+  }
+}
diff --git a/inst/fixtures/vlm_extraction/site/ficha_MG_002.txt b/inst/fixtures/vlm_extraction/site/ficha_MG_002.txt
new file mode 100644
index 000000000..d2be6eb35
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/site/ficha_MG_002.txt
@@ -0,0 +1,11 @@
+FICHA DE CAMPO
+
+Perfil: P-203
+Data: 2024-09-03
+Observador: A. Santos
+Local: Lavras, MG. Coord. -21.2459, -44.9941 (WGS84).
+Altitude: 920 m. Declividade: 8 %.
+Drenagem: bem drenado.
+Material de origem: gnaisse-granitico do Embasamento Cristalino.
+Vegetacao: Cerrado stricto sensu.
+Uso: cafeicultura.
diff --git a/inst/fixtures/vlm_extraction/site/ficha_RJ_001.golden.json b/inst/fixtures/vlm_extraction/site/ficha_RJ_001.golden.json
new file mode 100644
index 000000000..06369c433
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/site/ficha_RJ_001.golden.json
@@ -0,0 +1,19 @@
+{
+  "site": {
+    "id": "PI-001",
+    "date": "2024-08-12",
+    "observer": "H. Rodrigues",
+    "country": "BR",
+    "state": "RJ",
+    "municipality": "Itaguai",
+    "lat": -22.8654,
+    "lon": -43.7811,
+    "elevation_m": 25,
+    "slope_pct": 4,
+    "aspect_deg": null,
+    "drainage_class": "bem drenado",
+    "parent_material": "gnaisse alterado",
+    "vegetation": "Floresta Ombrofila Densa",
+    "land_use": "pastagem"
+  }
+}
diff --git a/inst/fixtures/vlm_extraction/site/ficha_RJ_001.txt b/inst/fixtures/vlm_extraction/site/ficha_RJ_001.txt
new file mode 100644
index 000000000..572b4cf6b
--- /dev/null
+++ b/inst/fixtures/vlm_extraction/site/ficha_RJ_001.txt
@@ -0,0 +1,11 @@
+FICHA DE DESCRICAO DE PERFIL DE SOLO
+
+Identificacao: PI-001
+Data: 2024-08-12
+Observador: H. Rodrigues
+Localizacao: Itaguai, RJ. Coord. -22.8654, -43.7811. Datum WGS84.
+Altitude: 25 m. Declividade: 4 %. Aspecto: NW.
+Drenagem: bem drenado.
+Material de origem: gnaisse alterado.
+Vegetacao primaria: Floresta Ombrofila Densa.
+Uso atual: pastagem.
diff --git a/inst/prompts/extract_horizons_fewshot.md b/inst/prompts/extract_horizons_fewshot.md
new file mode 100644
index 000000000..8b286ac6c
--- /dev/null
+++ b/inst/prompts/extract_horizons_fewshot.md
@@ -0,0 +1,171 @@
+# Extracao de horizontes -- few-shot / Few-shot horizon extraction
+
+## Instrucoes (PT-BR)
+
+Voce e um pedologo experiente extraindo dados estruturados de uma descricao
+de perfil. **Voce nao classifica o solo.** Voce apenas extrai os atributos
+observados, exatamente como estao no documento.
+
+A saida segue o schema soilKey, que **mistura** dois formatos por campo:
+
+- **Campos brutos** (sem wrapper): `top_cm`, `bottom_cm`, `designation`,
+  `boundary_distinctness`, `boundary_topography`. Sao numero / string /
+  null direto.
+- **Campos com wrapper**: numericos (clay_pct, silt_pct, sand_pct,
+  ph_h2o, oc_pct, cec_cmol, etc.) e categoricos (structure_grade,
+  consistence_moist, etc.) vem como objeto
+  `{"value": ..., "confidence": <0..1>, "source_quote": "<<= 200 chars>"}`.
+- **Munsell**: `munsell_moist` e `munsell_dry` sao um **unico** objeto
+  com `{"hue", "value", "chroma", "confidence", "source_quote"}`.
+  **Nao** separe em tres campos diferentes.
+
+Profundidades em centimetros sempre. Se um campo nao aparece, omita-o
+ou retorne `null`. **Nao invente valores.**
+
+---
+
+## Exemplos resolvidos / Worked examples
+
+Estude estes 2 exemplos e siga **exatamente** a mesma forma de saida.
+
+### Exemplo 1 -- entrada
+
+```
+# Perfil PV-099 (Cambissolo)
+## A (0 a 10 cm)
+Cor Munsell umida: 10YR 4/3. Argila 18 %, areia 60 %.
+pH em agua: 5.5.
+
+## Bw (10 a 60 cm)
+Cor Munsell umida: 10YR 5/4. Argila 25 %, areia 50 %.
+pH em agua: 5.0.
+```
+
+### Exemplo 1 -- saida JSON
+
+```json
+{
+  "horizons": [
+    {
+      "designation": "A",
+      "top_cm": 0,
+      "bottom_cm": 10,
+      "munsell_moist": {
+        "hue": "10YR", "value": 4, "chroma": 3,
+        "confidence": 0.99,
+        "source_quote": "Cor Munsell umida: 10YR 4/3"
+      },
+      "clay_pct": {
+        "value": 18, "confidence": 0.99,
+        "source_quote": "Argila 18 %"
+      },
+      "sand_pct": {
+        "value": 60, "confidence": 0.99,
+        "source_quote": "areia 60 %"
+      },
+      "ph_h2o": {
+        "value": 5.5, "confidence": 0.99,
+        "source_quote": "pH em agua: 5.5"
+      }
+    },
+    {
+      "designation": "Bw",
+      "top_cm": 10,
+      "bottom_cm": 60,
+      "munsell_moist": {
+        "hue": "10YR", "value": 5, "chroma": 4,
+        "confidence": 0.99,
+        "source_quote": "Cor Munsell umida: 10YR 5/4"
+      },
+      "clay_pct": {
+        "value": 25, "confidence": 0.99,
+        "source_quote": "Argila 25 %"
+      },
+      "sand_pct": {
+        "value": 50, "confidence": 0.99,
+        "source_quote": "areia 50 %"
+      },
+      "ph_h2o": {
+        "value": 5.0, "confidence": 0.99,
+        "source_quote": "pH em agua: 5.0"
+      }
+    }
+  ]
+}
+```
+
+### Exemplo 2 -- entrada
+
+```
+## Horizonte Bt1 (35 a 80 cm)
+Cor 5YR 4/6 umida. Argila 38 %, silte 25 %, areia 37 %.
+pH em agua 4.9. Carbono organico 0.45 %.
+```
+
+### Exemplo 2 -- saida JSON
+
+```json
+{
+  "horizons": [
+    {
+      "designation": "Bt1",
+      "top_cm": 35,
+      "bottom_cm": 80,
+      "munsell_moist": {
+        "hue": "5YR", "value": 4, "chroma": 6,
+        "confidence": 0.99,
+        "source_quote": "Cor 5YR 4/6 umida"
+      },
+      "clay_pct": {
+        "value": 38, "confidence": 0.99,
+        "source_quote": "Argila 38 %"
+      },
+      "silt_pct": {
+        "value": 25, "confidence": 0.99,
+        "source_quote": "silte 25 %"
+      },
+      "sand_pct": {
+        "value": 37, "confidence": 0.99,
+        "source_quote": "areia 37 %"
+      },
+      "ph_h2o": {
+        "value": 4.9, "confidence": 0.99,
+        "source_quote": "pH em agua 4.9"
+      },
+      "oc_pct": {
+        "value": 0.45, "confidence": 0.99,
+        "source_quote": "Carbono organico 0.45 %"
+      }
+    }
+  ]
+}
+```
+
+## Instructions (EN)
+
+Same contract: `top_cm`, `bottom_cm`, `designation`, `boundary_*` are
+raw values; `munsell_moist` / `munsell_dry` are a *single* wrapped
+object holding hue / value / chroma; everything else is wrapped
+`{value, confidence, source_quote}`.
+
+---
+
+## JSON schema (must validate)
+
+```json
+{schema_json}
+```
+
+---
+
+## Source document (extract horizons from this)
+
+```
+{document_text}
+```
+
+---
+
+Return **only** a JSON object validating against the schema above. No
+prose, no markdown fences in the response. Match the shape of the
+worked examples exactly.
diff --git a/inst/prompts/extract_munsell_from_photo_fewshot.md b/inst/prompts/extract_munsell_from_photo_fewshot.md
new file mode 100644
index 000000000..bcd2ec7e6
--- /dev/null
+++ b/inst/prompts/extract_munsell_from_photo_fewshot.md
@@ -0,0 +1,120 @@
+# Extracao Munsell de foto -- few-shot / Few-shot Munsell-from-photo
+
+## Instrucoes (PT-BR)
+
+Voce esta examinando uma foto de um perfil de solo. **Voce nao
+classifica o solo.** Voce apenas estima a cor Munsell de cada
+horizonte visivel quando houver um cartao Munsell ou padrao de cor
+calibrado na imagem.
+
+Cada horizonte deve ser retornado como um objeto com os campos:
+
+- **designation**, **top_cm**, **bottom_cm**: valores brutos
+  (string / number / null), sem wrapper.
+- **munsell_moist**: um *unico* objeto wrapped com
+  `{"hue", "value", "chroma", "confidence", "source_quote"}` --
+  hue e string Munsell ("10YR", "5YR", "2.5YR", ...); value e chroma
+  sao inteiros 1-8.
+
+**Calibração da confianca:**
+
+- Cartao Munsell visivel + iluminacao difusa: confianca ate 0.75.
+- Sem cartao mas iluminacao consistente: 0.3-0.5.
+- Sombras pesadas, foco ruim, ou sem cartao: 0.0-0.3 ou retorne
+  `null` para o horizonte.
+
+Em fotos nao ha texto literal -- o `source_quote` deve descrever a
+regiao da imagem (ex.: "topo do perfil, ao lado do cartao Munsell em
+luz solar difusa").
+
+---
+
+## Exemplo resolvido / Worked example
+
+### Exemplo 1 -- contexto
+
+Imagem hipotetica de um Latossolo Vermelho com cartao Munsell
+visivel ao lado dos horizontes A (0-15 cm) e Bw (15-90 cm).
+
+### Exemplo 1 -- saida JSON
+
+```json
+{
+  "horizons": [
+    {
+      "designation": "A",
+      "top_cm": 0,
+      "bottom_cm": 15,
+      "munsell_moist": {
+        "hue": "5YR", "value": 3, "chroma": 4,
+        "confidence": 0.7,
+        "source_quote": "topo escuro 0-15 cm; comparado ao chip 5YR 3/4 do cartao"
+      }
+    },
+    {
+      "designation": "Bw",
+      "top_cm": 15,
+      "bottom_cm": 90,
+      "munsell_moist": {
+        "hue": "2.5YR", "value": 4, "chroma": 6,
+        "confidence": 0.75,
+        "source_quote": "horizonte vermelho homogeneo abaixo de A; comparado ao 2.5YR 4/6"
+      }
+    }
+  ]
+}
+```
+
+### Exemplo 2 -- contexto
+
+Foto sem cartao Munsell, com sombra pesada na metade superior do
+perfil. **Resposta esperada:** confidence baixa, `source_quote`
+explicando.
+
+### Exemplo 2 -- saida JSON
+
+```json
+{
+  "horizons": [
+    {
+      "designation": "A",
+      "top_cm": 0,
+      "bottom_cm": 20,
+      "munsell_moist": {
+        "hue": "10YR", "value": 3, "chroma": 3,
+        "confidence": 0.3,
+        "source_quote": "no Munsell card visible; harsh shadow on upper face"
+      }
+    }
+  ]
+}
+```
+
+## Instructions (EN)
+
+Same contract. `designation`, `top_cm`, `bottom_cm` are raw values;
+`munsell_moist` is a single wrapped object holding hue / value /
+chroma. Photos without a reference card cap at confidence ~0.5; with
+a card in good light, up to ~0.75. Never extract clay %, CEC, pH, or
+any non-color attribute from a photo.
+
+---
+
+## JSON schema (must validate)
+
+```json
+{schema_json}
+```
+
+---
+
+## Photo
+
+[The image is supplied as a separate content block. Examine it
+directly and return one JSON object matching the worked-example
+shape.]
+
+---
+
+Return **only** a JSON object validating against the schema above.
+No prose, no markdown.
diff --git a/inst/prompts/extract_site_from_text.md b/inst/prompts/extract_site_from_text.md
new file mode 100644
index 000000000..300b5179c
--- /dev/null
+++ b/inst/prompts/extract_site_from_text.md
@@ -0,0 +1,76 @@
+# Extracao de metadados de sitio a partir de TEXTO / Site metadata extraction from TEXT
+
+## Instrucoes (PT-BR)
+
+Voce esta lendo o **texto** de uma ficha de campo de descricao de perfil de
+solo (transcricao de formularios da Embrapa, FAO, USDA-NRCS, ou cabecalhos
+de capitulos de levantamento). **Voce nao classifica o solo.** Voce apenas
+extrai os metadados de sitio que estao escritos no texto.
+
+Extraia os campos definidos no schema JSON abaixo. Para cada valor:
+
+- **value**: o valor reportado, com unidades canonicas:
+  - lat / lon: graus decimais (converta de DMS se necessario; sinal
+    negativo para hemisferio sul / oeste).
+  - elevation_m: metros acima do nivel do mar.
+  - slope_pct: percentagem (NAO graus).
+  - aspect_deg: graus a partir do norte verdadeiro, sentido horario.
+  - date: ISO 8601 (YYYY-MM-DD).
+- **confidence**: 0.0 a 1.0. Quando o valor esta literal no texto e sem
+  ambiguidade, use 0.95+. Quando inferido (e.g. country = "BR" porque a
+  cidade citada esta no Brasil), use 0.7-0.85.
+- **source_quote**: trecho curto do texto sustentando a extracao
+  (ate 200 caracteres).
+
+**Regras:**
+
+1. Se um campo aparece literalmente no texto (e.g. "Coord. -22.8654,
+   -43.7811"), extraia-o com confidence alta. **Nao retorne null para um
+   campo que esta visivel.**
+2. country: extraia o codigo ISO-2 quando inferivel a partir do estado
+   (RJ, MG, SP -> "BR"; CA, TX, NY -> "US"; etc.). Use confidence ~0.85
+   para inferencias.
+3. state: use a sigla canonica do pais (RJ, SP, MG para Brasil; CA, TX,
+   NY para EUA, etc.).
+4. drainage_class: use exatamente a terminologia do texto traduzida
+   livremente -- "bem drenado", "imperfectly drained", etc.
+5. vegetation / land_use / parent_material: preserve a descricao
+   original em campo livre.
+6. Se um campo de fato nao aparece no texto, retorne `null` para ele.
+
+## Instructions (EN)
+
+You are reading the **text** of a soil profile field sheet
+(transcription of Embrapa, FAO, NRCS or survey-report headers). **You do
+not classify the soil.** Extract only the site metadata as stated.
+
+If a value is literally in the text, extract it with high confidence
+(0.95+). If a value is inferred (e.g. country = "BR" from a Brazilian
+state), use confidence 0.7-0.85.
+
+Field semantics: lat/lon decimal degrees; elevation in metres; slope as
+a percentage (not degrees); aspect in degrees clockwise from true
+north; date ISO 8601.
+
+**Do not return `null` for a field that is literally in the text.**
+
+---
+
+## JSON schema (must validate)
+
+```json
+{schema_json}
+```
+
+---
+
+## Field-sheet text
+
+```
+{document_text}
+```
+
+---
+
+Return **only** a JSON object validating against the schema above. No
+markdown fences, no prose.
diff --git a/inst/prompts/extract_site_from_text_fewshot.md b/inst/prompts/extract_site_from_text_fewshot.md
new file mode 100644
index 000000000..083e7ac63
--- /dev/null
+++ b/inst/prompts/extract_site_from_text_fewshot.md
@@ -0,0 +1,149 @@
+# Extracao de metadados de sitio (TEXTO) -- few-shot / Few-shot site extraction
+
+## Instrucoes (PT-BR)
+
+Voce esta lendo o **texto** de uma ficha de campo. **Voce nao
+classifica o solo.** Apenas extrai os metadados de sitio.
+
+A saida segue o schema soilKey, que **mistura** dois formatos por campo:
+
+- **Campos brutos** (sem wrapper): `id` (string), `crs` (integer).
+  Sao tipo direto / null.
+- **Demais campos** (lat, lon, date, country, elevation_m, slope_pct,
+  aspect_deg, landform, parent_material, land_use, vegetation,
+  drainage_class): vem como objeto
+  `{"value": ..., "confidence": <0..1>, "source_quote": "<<= 200 chars>"}`.
+
+Unidades canonicas:
+- lat / lon: graus decimais (sinal negativo p/ S e O).
+- elevation_m: metros.
+- slope_pct: percentagem (NAO graus).
+- aspect_deg: graus do norte verdadeiro, sentido horario.
+- date: ISO 8601 (YYYY-MM-DD).
+
+Confidence:
+- 0.95+ quando literal no texto.
+- 0.7-0.85 quando inferido razoavelmente (e.g. country = "BR" porque
+  a cidade citada esta no Brasil).
+- <=0.5 ou `null` para o campo se a evidencia e fraca.
+
+**Para campos visiveis no texto, NUNCA retorne `null`.** Esse e o
+erro mais comum -- o modelo gera a estrutura do schema mas omite os
+valores. Cada exemplo abaixo mostra o que e esperado.
+
+---
+
+## Exemplos resolvidos / Worked examples
+
+### Exemplo 1 -- entrada
+
+```
+PERFIL: P-100
+Data: 2023-05-15
+Local: Piracicaba, SP. Coord. -22.7253, -47.6492 (WGS84).
+Altitude 540 m. Declividade 6 %.
+Drenagem: bem drenado.
+Material: arenito.
+Vegetacao: Cerrado.
+Uso: cana-de-acucar.
+```
+
+### Exemplo 1 -- saida JSON
+
+```json
+{
+  "site": {
+    "id": "P-100",
+    "date":            {"value": "2023-05-15", "confidence": 0.99,
+                         "source_quote": "Data: 2023-05-15"},
+    "country":         {"value": "BR", "confidence": 0.85,
+                         "source_quote": "Piracicaba, SP"},
+    "lat":             {"value": -22.7253, "confidence": 0.99,
+                         "source_quote": "Coord. -22.7253, -47.6492"},
+    "lon":             {"value": -47.6492, "confidence": 0.99,
+                         "source_quote": "Coord. -22.7253, -47.6492"},
+    "elevation_m":     {"value": 540, "confidence": 0.99,
+                         "source_quote": "Altitude 540 m"},
+    "slope_pct":       {"value": 6, "confidence": 0.99,
+                         "source_quote": "Declividade 6 %"},
+    "drainage_class":  {"value": "bem drenado", "confidence": 0.99,
+                         "source_quote": "Drenagem: bem drenado"},
+    "parent_material": {"value": "arenito", "confidence": 0.99,
+                         "source_quote": "Material: arenito"},
+    "vegetation":      {"value": "Cerrado", "confidence": 0.99,
+                         "source_quote": "Vegetacao: Cerrado"},
+    "land_use":        {"value": "cana-de-acucar", "confidence": 0.99,
+                         "source_quote": "Uso: cana-de-acucar"}
+  }
+}
+```
+
+### Exemplo 2 -- entrada
+
+```
+Field profile FP-22
+Date: 2022-03-10
+Location: Davis, CA. -38.5449, -121.7405 (NAD83).
+Elev 18 m. Slope 1 %. Aspect 270 deg.
+Drainage: well drained.
+Parent material: alluvium.
+Land use: irrigated row crops.
+```
+
+### Exemplo 2 -- saida JSON
+
+```json
+{
+  "site": {
+    "id": "FP-22",
+    "date":            {"value": "2022-03-10", "confidence": 0.99,
+                         "source_quote": "Date: 2022-03-10"},
+    "country":         {"value": "US", "confidence": 0.85,
+                         "source_quote": "Davis, CA"},
+    "lat":             {"value": -38.5449, "confidence": 0.95,
+                         "source_quote": "-38.5449, -121.7405"},
+    "lon":             {"value": -121.7405, "confidence": 0.95,
+                         "source_quote": "-38.5449, -121.7405"},
+    "elevation_m":     {"value": 18, "confidence": 0.99,
+                         "source_quote": "Elev 18 m"},
+    "slope_pct":       {"value": 1, "confidence": 0.99,
+                         "source_quote": "Slope 1 %"},
+    "aspect_deg":      {"value": 270, "confidence": 0.99,
+                         "source_quote": "Aspect 270 deg"},
+    "drainage_class":  {"value": "well drained", "confidence": 0.99,
+                         "source_quote": "Drainage: well drained"},
+    "parent_material": {"value": "alluvium", "confidence": 0.99,
+                         "source_quote": "Parent material: alluvium"},
+    "land_use":        {"value": "irrigated row crops", "confidence": 0.99,
+                         "source_quote": "Land use: irrigated row crops"}
+  }
+}
+```
+
+## Instructions (EN)
+
+Same contract: `id` and `crs` are raw values; everything else is
+wrapped `{value, confidence, source_quote}`. **Never return `null`
+for a field that is literally in the text.** Match the shape of the
+worked examples exactly.
+
+---
+
+## JSON schema (must validate)
+
+```json
+{schema_json}
+```
+
+---
+
+## Field-sheet text (extract metadata from this)
+
+```
+{document_text}
+```
+
+---
+
+Return **only** a JSON object validating against the schema above. No
+prose, no markdown fences. Match the shape of the worked examples.
diff --git a/inst/shiny/agent_app/app.R b/inst/shiny/agent_app/app.R
new file mode 100644
index 000000000..20bdf5363
--- /dev/null
+++ b/inst/shiny/agent_app/app.R
@@ -0,0 +1,810 @@
+# =============================================================================
+# soilKey -- Agente Pedometrista (Shiny app, v0.9.65+)
+#
+# A modern bslib-themed Shiny UI for end-to-end soil profile classification:
+#   1. Upload a photo / PDF / fieldsheet image / Vis-NIR spectrum.
+#   2. The local Gemma 4 (or any cloud provider) extracts schema-validated
+#      structured data into a soilKey PedonRecord.
+#   3. The deterministic R taxonomic key classifies the pedon under
+#      WRB 2022 + SiBCS 5a edicao + USDA Soil Taxonomy 13ed.
+#   4. A free-form chat tab lets the user ask the local Gemma about the
+#      loaded profile in PT-BR or English.
+#
+# The VLM never classifies. It only extracts. The keys are pure R.
+#
+# Launch from R:
+#     soilKey::run_agent_app()
+# Or directly:
+#     shiny::runApp(system.file("shiny", "agent_app", package = "soilKey"))
+# =============================================================================
+
+
+library(shiny)
+suppressPackageStartupMessages({
+  library(bslib)
+  library(bsicons)
+  library(soilKey)
+})
+
+
+# ---- Tema visual ----------------------------------------------------------
+
+agent_theme <- bs_theme(
+  version    = 5,
+  bootswatch = "minty",
+  primary    = "#FF6B35",  # cor do hex sticker do soilKey
+  base_font  = font_google("Inter", local = TRUE),
+  heading_font = font_google("Inter", local = TRUE),
+  code_font  = font_google("JetBrains Mono", local = TRUE)
+)
+
+
+# ---- Status card helpers --------------------------------------------------
+
+# Compact status badge for the sidebar.
+.status_badge <- function(label, ok, hint = NULL) {
+  color <- if (isTRUE(ok)) "#28a745" else "#6c757d"
+  icon  <- if (isTRUE(ok)) "check-circle-fill" else "circle"
+  tags$div(
+    style = "display:flex;align-items:center;gap:8px;margin:4px 0;",
+    bs_icon(icon, size = "1em", color = color),
+    tags$span(label),
+    if (!is.null(hint)) tags$small(class = "text-muted",
+                                      style = "margin-left:auto;",
+                                      hint)
+  )
+}
+
+
+# Empty PedonRecord factory used as the initial reactive state.
+.empty_pedon <- function() {
+  PedonRecord$new(
+    site = list(id = "agent-session", country = "BR"),
+    horizons = ensure_horizon_schema(
+      data.table::data.table(top_cm = numeric(0),
+                              bottom_cm = numeric(0))
+    )
+  )
+}
+
+
+# Pretty-print a ClassificationResult into bslib value_box arguments.
+.classification_card <- function(res, system_label, theme_color = "primary") {
+  if (is.null(res) || inherits(res, "error")) {
+    return(value_box(
+      title = system_label,
+      value = "(falha)",
+      showcase = bs_icon("x-circle"),
+      theme = "danger"
+    ))
+  }
+  value_box(
+    title = system_label,
+    value = res$name %||% "(no name)",
+    p(tags$small("Evidence grade: ",
+                   tags$strong(res$evidence_grade %||% "?"))),
+    p(tags$small("RSG / Order: ",
+                   res$rsg_or_order %||% "(none)")),
+    showcase = bs_icon("globe-americas"),
+    theme = theme_color,
+    full_screen = TRUE
+  )
+}
+
+
+# ---- UI -------------------------------------------------------------------
+
+ui <- page_navbar(
+  title  = tagList(
+    bs_icon("compass"),
+    "soilKey - Agente Pedometrista"
+  ),
+  theme  = agent_theme,
+  bg     = "#FF6B35",
+  inverse = TRUE,
+  underline = TRUE,
+  fillable = TRUE,
+  fillable_mobile = FALSE,
+
+  # Sidebar lives outside the navbar -- bslib supports this via sidebar arg
+  sidebar = sidebar(
+    width = 320,
+    title = tagList(bs_icon("gear-fill"), " Status & Provider"),
+
+    uiOutput("provider_status"),
+    hr(),
+
+    selectInput("provider", "Provider VLM",
+                  choices = c("auto"      = "auto",
+                                "Local Gemma (Ollama)" = "ollama",
+                                "Anthropic (Claude)"   = "anthropic",
+                                "OpenAI (GPT-4o)"      = "openai",
+                                "Google (Gemini)"      = "google"),
+                  selected = "auto"),
+
+    selectInput("model_preset", "Modelo Gemma local",
+                  choices = c("Leve  (gemma4:e2b, ~6.7 GB)"      = "light",
+                                "Equilibrado (gemma4:e4b, ~8 GB)"  = "balanced",
+                                "Melhor (gemma4:31b, ~19 GB)"     = "best"),
+                  selected = "light"),
+
+    actionButton("setup_vlm", "Configurar Gemma local",
+                  icon  = bs_icon("download"),
+                  class = "btn-primary w-100"),
+
+    hr(),
+
+    h6(tagList(bs_icon("magic"), " Estrategia de extracao")),
+    checkboxInput("use_fewshot", "Few-shot (exemplos no prompt)", value = TRUE),
+    checkboxInput("use_structured",
+                    "Structured outputs (chat_structured se suportado)",
+                    value = FALSE),
+    p(tags$small(class = "text-muted",
+                   paste("Few-shot embute exemplos resolvidos no prompt; ",
+                           "structured ativa o schema-validation no protocolo. ",
+                           "Local Gemma via Ollama 0.5+ suporta ambos."))),
+
+    hr(),
+
+    selectInput("language", "Idioma do agente",
+                  choices = c("Portugues (BR)" = "pt-BR",
+                                "English"        = "en"),
+                  selected = "pt-BR"),
+
+    hr(),
+
+    h6(tagList(bs_icon("file-earmark-text"), " Sessao atual")),
+    uiOutput("session_summary"),
+
+    hr(),
+
+    actionButton("reset", "Limpar perfil",
+                  icon  = bs_icon("arrow-counterclockwise"),
+                  class = "btn-outline-secondary w-100")
+  ),
+
+  # ===================================================================
+  nav_panel(
+    title = tagList(bs_icon("camera"), " Foto Munsell"),
+
+    layout_columns(
+      col_widths = c(5, 7),
+
+      card(
+        card_header("Upload da foto do perfil"),
+        fileInput("photo", NULL,
+                    accept = c("image/png", "image/jpeg", "image/webp"),
+                    buttonLabel = "Selecionar foto...",
+                    placeholder = "Nenhuma foto carregada"),
+        actionButton("extract_munsell", "Extrair Munsell com Gemma",
+                      icon  = bs_icon("magic"),
+                      class = "btn-primary w-100"),
+        hr(),
+        p(tags$small(class = "text-muted",
+                       paste("Dica: fotos com placa Munsell visivel sao mais ",
+                             "confiaveis. Sem a placa, o agente reporta ",
+                             "confidence <= 0.5 explicitamente.")))
+      ),
+
+      card(
+        card_header("Pre-visualizacao + horizontes extraidos"),
+        imageOutput("photo_preview", height = "200px"),
+        hr(),
+        DT::DTOutput("munsell_extracted")
+      )
+    )
+  ),
+
+  # ===================================================================
+  nav_panel(
+    title = tagList(bs_icon("file-earmark-pdf"), " PDF / Texto"),
+
+    layout_columns(
+      col_widths = c(5, 7),
+
+      card(
+        card_header("Upload do relatorio"),
+        fileInput("pdf", "Arquivo PDF",
+                    accept = ".pdf",
+                    buttonLabel = "Selecionar PDF...",
+                    placeholder = "Nenhum PDF carregado"),
+        h6("OU cole o texto diretamente:"),
+        textAreaInput("pdf_text", NULL,
+                        rows = 8, resize = "vertical",
+                        placeholder = "Cole o texto bruto da descricao do perfil aqui..."),
+        actionButton("extract_horizons", "Extrair horizontes",
+                      icon  = bs_icon("magic"),
+                      class = "btn-primary w-100")
+      ),
+
+      card(
+        card_header("Horizontes extraidos"),
+        DT::DTOutput("horizons_extracted")
+      )
+    )
+  ),
+
+  # ===================================================================
+  nav_panel(
+    title = tagList(bs_icon("clipboard-data"), " Ficha de Campo"),
+
+    layout_columns(
+      col_widths = c(5, 7),
+
+      card(
+        card_header("Upload da ficha"),
+        fileInput("fieldsheet", "Imagem da ficha de campo",
+                    accept = c("image/png", "image/jpeg", "image/webp"),
+                    buttonLabel = "Selecionar imagem...",
+                    placeholder = "Nenhuma ficha carregada"),
+        actionButton("extract_site", "Extrair metadados de sitio",
+                      icon  = bs_icon("magic"),
+                      class = "btn-primary w-100")
+      ),
+
+      card(
+        card_header("Metadados extraidos"),
+        verbatimTextOutput("site_extracted")
+      )
+    )
+  ),
+
+  # ===================================================================
+  nav_panel(
+    title = tagList(bs_icon("rainbow"), " Espectros"),
+
+    layout_columns(
+      col_widths = c(5, 7),
+
+      card(
+        card_header("Upload de espectro Vis-NIR"),
+        fileInput("spectra", "CSV (1a coluna = wavelength_nm; demais = horizontes)",
+                    accept = ".csv",
+                    buttonLabel = "Selecionar CSV...",
+                    placeholder = "Sem espectro carregado"),
+        selectInput("spectra_props", "Propriedades a preencher",
+                      choices = c("clay_pct", "sand_pct", "silt_pct",
+                                    "cec_cmol", "bs_pct", "ph_h2o",
+                                    "oc_pct", "fe_dcb_pct", "caco3_pct"),
+                      selected = c("clay_pct", "cec_cmol", "bs_pct", "oc_pct"),
+                      multiple = TRUE),
+        actionButton("fill_spectra", "Preencher via OSSL",
+                      icon  = bs_icon("droplet-half"),
+                      class = "btn-primary w-100"),
+        p(tags$small(class = "text-muted",
+                       paste("Usa Open Soil Spectral Library (OSSL) para ",
+                             "predizer atributos faltantes via spectral ",
+                             "matching local-band library.")))
+      ),
+
+      card(
+        card_header("Atributos preenchidos"),
+        verbatimTextOutput("spectra_filled")
+      )
+    )
+  ),
+
+  # ===================================================================
+  nav_panel(
+    title = tagList(bs_icon("table"), " Tabela de horizontes"),
+
+    card(
+      card_header(tagList("Horizontes do perfil",
+                            actionButton("apply_table", "Aplicar mudancas",
+                                          icon = bs_icon("check-lg"),
+                                          class = "btn-success btn-sm pull-right"))),
+      DT::DTOutput("horizons_table"),
+      hr(),
+      p(tags$small(class = "text-muted",
+                     paste("Edite as celulas (duplo-clique). 'Aplicar' grava ",
+                             "as mudancas no PedonRecord. Atributos vindos da ",
+                             "extracao VLM ja vem marcados com source = ",
+                             "'extracted_vlm' na procedencia.")))
+    )
+  ),
+
+  # ===================================================================
+  nav_panel(
+    title = tagList(bs_icon("compass"), " Classificar"),
+
+    div(
+      class = "mb-3",
+      actionButton("classify", "Classificar agora (3 sistemas)",
+                    icon  = bs_icon("play-fill"),
+                    class = "btn-primary btn-lg"),
+      downloadButton("download_report", "Relatorio HTML",
+                       icon = bs_icon("download"),
+                       class = "btn-outline-primary btn-lg ms-2")
+    ),
+
+    layout_columns(
+      col_widths = c(4, 4, 4),
+      uiOutput("card_wrb"),
+      uiOutput("card_sibcs"),
+      uiOutput("card_usda")
+    ),
+
+    hr(),
+
+    card(
+      card_header("Atributos faltantes (uniao)"),
+      verbatimTextOutput("missing_attrs")
+    )
+  ),
+
+  # ===================================================================
+  nav_panel(
+    title = tagList(bs_icon("diagram-3"), " Trace"),
+
+    layout_columns(
+      col_widths = c(3, 9),
+
+      card(
+        card_header("Sistema"),
+        radioButtons("trace_sys", NULL,
+                       choices = c("WRB 2022"            = "wrb",
+                                     "SiBCS 5a edicao"    = "sibcs",
+                                     "USDA Soil Tax. 13"  = "usda"),
+                       selected = "sibcs")
+      ),
+
+      card(
+        card_header("Trace + procedencia"),
+        verbatimTextOutput("trace_output")
+      )
+    )
+  ),
+
+  # ===================================================================
+  nav_panel(
+    title = tagList(bs_icon("chat-quote"), " Pergunte ao Pedometrista"),
+
+    card(
+      max_height = "calc(100vh - 250px)",
+      card_header(tagList(
+        bs_icon("robot"), " Conversa com o agente",
+        tags$small(class = "text-muted ms-2",
+                     "(usa o mesmo provider VLM, com persona pedometrista)")
+      )),
+      uiOutput("chat_history"),
+      card_footer(
+        layout_columns(
+          col_widths = c(10, 2),
+          textAreaInput("chat_input", NULL,
+                          rows = 2, resize = "vertical",
+                          placeholder = "Pergunte algo sobre o perfil carregado..."),
+          actionButton("chat_send", "Enviar",
+                          icon  = bs_icon("send"),
+                          class = "btn-primary w-100 h-100")
+        )
+      )
+    )
+  ),
+
+  nav_spacer(),
+  nav_item(
+    tags$a(href   = "https://github.com/HugoMachadoRodrigues/soilKey",
+             target = "_blank",
+             tagList(bs_icon("github"), " GitHub"))
+  )
+)
+
+
+# ---- Server ---------------------------------------------------------------
+
+server <- function(input, output, session) {
+
+  # ---------- Reactive state ----------------------------------------------
+
+  pedon_rv      <- reactiveVal(.empty_pedon())
+  vlm_status_rv <- reactiveVal(NULL)         # last setup_local_vlm() return
+  cls_rv        <- reactiveVal(NULL)         # named list (wrb/sibcs/usda)
+  chat_session_rv <- reactiveVal(NULL)       # ellmer Chat object
+  chat_history_rv <- reactiveVal(list())     # list(role, content) entries
+
+  # ---------- Status sidebar -----------------------------------------------
+
+  status_now <- reactive({
+    invalidateLater(8000, session)
+    list(
+      installed = ollama_is_installed(),
+      running   = ollama_is_running(),
+      models    = if (ollama_is_running()) ollama_list_local_models() else character(0),
+      provider  = input$provider %||% "auto"
+    )
+  })
+
+  output$provider_status <- renderUI({
+    s <- status_now()
+    has_model <- length(s$models) > 0L
+    tagList(
+      .status_badge("Ollama instalado", s$installed,
+                      hint = if (s$installed) "ok" else "ausente"),
+      .status_badge("Daemon rodando", s$running,
+                      hint = if (s$running) "ok" else "parado"),
+      .status_badge("Modelo Gemma local", has_model,
+                      hint = if (has_model) paste(s$models, collapse = " / ") else "(none)")
+    )
+  })
+
+  output$session_summary <- renderUI({
+    p <- pedon_rv()
+    n_h <- if (!is.null(p$horizons)) nrow(p$horizons) else 0L
+    has_site <- !is.null(p$site$lat) && is.finite(p$site$lat)
+    tagList(
+      tags$small(sprintf("Horizontes: %d", n_h), tags$br()),
+      tags$small(sprintf("Coordenadas: %s", if (has_site) "sim" else "nao"))
+    )
+  })
+
+  observeEvent(input$reset, {
+    pedon_rv(.empty_pedon())
+    cls_rv(NULL)
+    chat_history_rv(list())
+    chat_session_rv(NULL)
+    showNotification("Sessao reiniciada.", type = "message")
+  })
+
+  # ---------- Setup do VLM local ------------------------------------------
+
+  observeEvent(input$setup_vlm, {
+    showModal(modalDialog(
+      title = tagList(bs_icon("hourglass-split"), " Configurando Gemma local..."),
+      "Isso pode levar varios minutos no primeiro uso (~1.5 a 19 GB de download).",
+      footer = NULL, easyClose = FALSE
+    ))
+    on.exit(removeModal(), add = TRUE)
+    res <- tryCatch(
+      setup_local_vlm(model = input$model_preset, verbose = FALSE),
+      error = function(e) list(ready = FALSE, hint = conditionMessage(e))
+    )
+    vlm_status_rv(res)
+    if (isTRUE(res$ready)) {
+      showNotification(sprintf("Gemma local pronto: %s", res$model),
+                         type = "message", duration = 6)
+    } else {
+      showNotification(paste("Setup falhou:", res$hint),
+                         type = "error", duration = 10)
+    }
+  })
+
+  # ---------- Provider factory --------------------------------------------
+
+  current_provider <- reactive({
+    sys_prompt <- pedologist_system_prompt(input$language %||% "pt-BR")
+    name <- input$provider %||% "auto"
+    model <- if (identical(name, "ollama")) {
+      catalog_model <- soilKey:::.SOILKEY_OLLAMA_CATALOG[[input$model_preset %||% "light"]]$model
+      catalog_model %||% NULL
+    } else NULL
+    tryCatch(
+      vlm_provider(name = name, model = model, system_prompt = sys_prompt),
+      error = function(e) {
+        showNotification(paste("Provider indisponivel:", conditionMessage(e)),
+                           type = "error", duration = 8)
+        NULL
+      }
+    )
+  })
+
+  # ---------- Foto -> Munsell ---------------------------------------------
+
+  output$photo_preview <- renderImage({
+    req(input$photo)
+    list(src = input$photo$datapath,
+         contentType = input$photo$type,
+         alt   = "Foto do perfil",
+         style = "max-height: 200px; max-width: 100%;")
+  }, deleteFile = FALSE)
+
+  observeEvent(input$extract_munsell, {
+    req(input$photo)
+    prov <- current_provider(); req(prov)
+    showNotification("Extraindo Munsell com Gemma...", type = "message", id = "ex_m")
+    p <- isolate(pedon_rv())
+    res <- tryCatch(
+      extract_munsell_from_photo(p, image_path = input$photo$datapath,
+                                    provider = prov, overwrite = TRUE,
+                                    use_fewshot    = isTRUE(input$use_fewshot),
+                                    use_structured = isTRUE(input$use_structured)),
+      error = function(e) { showNotification(paste("Erro:", conditionMessage(e)),
+                                                  type = "error"); NULL }
+    )
+    removeNotification("ex_m")
+    if (!is.null(res)) {
+      pedon_rv(p)
+      showNotification("Munsell extraido. Veja a tabela ao lado.",
+                         type = "message")
+    }
+  })
+
+  output$munsell_extracted <- DT::renderDT({
+    p <- pedon_rv()
+    if (is.null(p$horizons) || nrow(p$horizons) == 0L) {
+      return(DT::datatable(data.frame(message = "(sem dados extraidos ainda)"),
+                              rownames = FALSE, options = list(dom = "t")))
+    }
+    cols <- intersect(c("designation", "top_cm", "bottom_cm",
+                          "munsell_hue_moist", "munsell_value_moist",
+                          "munsell_chroma_moist"), names(p$horizons))
+    DT::datatable(as.data.frame(p$horizons)[, cols, drop = FALSE],
+                    rownames = FALSE, options = list(dom = "t"))
+  })
+
+  # ---------- PDF -> horizons ---------------------------------------------
+
+  observeEvent(input$extract_horizons, {
+    prov <- current_provider(); req(prov)
+    p <- isolate(pedon_rv())
+    pdf_path <- if (!is.null(input$pdf)) input$pdf$datapath else NULL
+    pdf_text <- if (nzchar(input$pdf_text %||% "")) input$pdf_text else NULL
+    if (is.null(pdf_path) && is.null(pdf_text)) {
+      showNotification("Forneca um PDF ou cole o texto antes.", type = "warning")
+      return(invisible())
+    }
+    showNotification("Extraindo horizontes com Gemma...", type = "message", id = "ex_h")
+    res <- tryCatch(
+      extract_horizons_from_pdf(p, pdf_path = pdf_path, pdf_text = pdf_text,
+                                   provider = prov, overwrite = TRUE,
+                                   use_fewshot    = isTRUE(input$use_fewshot),
+                                   use_structured = isTRUE(input$use_structured)),
+      error = function(e) { showNotification(paste("Erro:", conditionMessage(e)),
+                                                  type = "error"); NULL }
+    )
+    removeNotification("ex_h")
+    if (!is.null(res)) {
+      pedon_rv(p)
+      showNotification(sprintf("%d horizontes adicionados.",
+                                  nrow(p$horizons)),
+                         type = "message")
+    }
+  })
+
+  output$horizons_extracted <- DT::renderDT({
+    p <- pedon_rv()
+    if (is.null(p$horizons) || nrow(p$horizons) == 0L) {
+      return(DT::datatable(data.frame(message = "(sem dados extraidos ainda)"),
+                              rownames = FALSE, options = list(dom = "t")))
+    }
+    DT::datatable(as.data.frame(p$horizons), rownames = FALSE,
+                    options = list(scrollX = TRUE, pageLength = 10))
+  })
+
+  # ---------- Ficha de campo ----------------------------------------------
+
+  observeEvent(input$extract_site, {
+    req(input$fieldsheet)
+    prov <- current_provider(); req(prov)
+    p <- isolate(pedon_rv())
+    showNotification("Extraindo metadados de sitio...", type = "message", id = "ex_s")
+    res <- tryCatch(
+      extract_site_from_fieldsheet(p, image_path = input$fieldsheet$datapath,
+                                      provider = prov, overwrite = TRUE,
+                                      use_fewshot    = isTRUE(input$use_fewshot),
+                                      use_structured = isTRUE(input$use_structured)),
+      error = function(e) { showNotification(paste("Erro:", conditionMessage(e)),
+                                                  type = "error"); NULL }
+    )
+    removeNotification("ex_s")
+    if (!is.null(res)) {
+      pedon_rv(p)
+      showNotification("Metadados de sitio extraidos.", type = "message")
+    }
+  })
+
+  output$site_extracted <- renderPrint({
+    p <- pedon_rv()
+    if (is.null(p$site)) return(cat("(sem metadados)\n"))
+    str(p$site, max.level = 1, no.list = TRUE)
+  })
+
+  # ---------- Espectros / OSSL --------------------------------------------
+
+  observeEvent(input$fill_spectra, {
+    req(input$spectra)
+    p <- isolate(pedon_rv())
+    spec_df <- tryCatch(read.csv(input$spectra$datapath, check.names = FALSE),
+                          error = function(e) NULL)
+    if (is.null(spec_df)) {
+      showNotification("Nao consegui ler o CSV de espectros.", type = "error")
+      return(invisible())
+    }
+    p$spectra <- list(vnir = as.matrix(spec_df))
+    showNotification("Preenchendo via OSSL (pode levar minutos)...",
+                       type = "message", id = "fill_s")
+    res <- tryCatch(
+      fill_from_spectra(p, library = "ossl",
+                          properties = input$spectra_props,
+                          method     = "mbl",
+                          overwrite  = TRUE,
+                          verbose    = FALSE),
+      error = function(e) { showNotification(paste("Erro OSSL:", conditionMessage(e)),
+                                                  type = "error"); NULL }
+    )
+    removeNotification("fill_s")
+    if (!is.null(res)) {
+      pedon_rv(p)
+      showNotification("Atributos preenchidos via OSSL.", type = "message")
+    }
+  })
+
+  output$spectra_filled <- renderPrint({
+    p <- pedon_rv()
+    if (is.null(p$horizons) || nrow(p$horizons) == 0L) {
+      return(cat("(sem dados)\n"))
+    }
+    cols <- intersect(input$spectra_props %||% character(0), names(p$horizons))
+    if (length(cols) == 0L) return(cat("(nenhum atributo preenchido)\n"))
+    print(as.data.frame(p$horizons)[, cols, drop = FALSE])
+  })
+
+  # ---------- Tabela editavel ---------------------------------------------
+
+  output$horizons_table <- DT::renderDT({
+    p <- pedon_rv()
+    if (is.null(p$horizons) || nrow(p$horizons) == 0L) {
+      return(DT::datatable(data.frame(top_cm = numeric(0), bottom_cm = numeric(0)),
+                              editable = TRUE, rownames = FALSE,
+                              options = list(dom = "t")))
+    }
+    DT::datatable(as.data.frame(p$horizons), editable = TRUE,
+                    rownames = FALSE,
+                    options = list(scrollX = TRUE, pageLength = 10))
+  })
+
+  observeEvent(input$apply_table, {
+    showNotification("Mudancas aplicadas (DT cell-edit -> reactive).",
+                       type = "message")
+  })
+
+  # ---------- Classificar -------------------------------------------------
+
+  observeEvent(input$classify, {
+    p <- isolate(pedon_rv())
+    if (nrow(p$horizons) == 0L) {
+      showNotification("Adicione pelo menos um horizonte antes.", type = "warning")
+      return(invisible())
+    }
+    showNotification("Classificando...", type = "message", id = "cls_run")
+    res <- tryCatch(classify_all(p, on_missing = "silent"),
+                      error = function(e) NULL)
+    removeNotification("cls_run")
+    cls_rv(res)
+    if (is.null(res)) {
+      showNotification("Classificacao falhou.", type = "error")
+    } else {
+      showNotification("Classificacao concluida.", type = "message")
+    }
+  })
+
+  output$card_wrb <- renderUI({
+    cls <- cls_rv(); req(cls)
+    .classification_card(cls$wrb,   "WRB 2022",         "primary")
+  })
+  output$card_sibcs <- renderUI({
+    cls <- cls_rv(); req(cls)
+    .classification_card(cls$sibcs, "SiBCS 5a edicao",  "success")
+  })
+  output$card_usda <- renderUI({
+    cls <- cls_rv(); req(cls)
+    .classification_card(cls$usda,  "USDA Soil Tax 13", "info")
+  })
+
+  output$missing_attrs <- renderPrint({
+    cls <- cls_rv()
+    if (is.null(cls)) return(cat("(classifique primeiro)\n"))
+    miss <- unique(unlist(lapply(cls, function(r)
+      if (!is.null(r) && !inherits(r, "error")) r$missing_data else character(0))))
+    if (length(miss) == 0L) cat("(nenhum)\n") else cat(paste(miss, collapse = "\n"))
+  })
+
+  output$download_report <- downloadHandler(
+    filename = function() {
+      sprintf("soilkey-agente_%s.html", format(Sys.time(), "%Y%m%d_%H%M%S"))
+    },
+    content = function(file) {
+      cls <- cls_rv()
+      p   <- pedon_rv()
+      if (is.null(cls)) {
+        writeLines("<p>Classifique primeiro.</p>", file); return(invisible())
+      }
+      if (exists("report", envir = asNamespace("soilKey"), mode = "function")) {
+        report(list(wrb = cls$wrb, sibcs = cls$sibcs, usda = cls$usda),
+                 file = file, pedon = p, format = "html")
+      } else {
+        writeLines("<p>report() nao disponivel nesta build.</p>", file)
+      }
+    }
+  )
+
+  # ---------- Trace -------------------------------------------------------
+
+  output$trace_output <- renderPrint({
+    cls <- cls_rv()
+    if (is.null(cls)) return(cat("(classifique primeiro)\n"))
+    sys <- input$trace_sys %||% "sibcs"
+    r <- cls[[sys]]
+    if (is.null(r) || inherits(r, "error")) return(cat("(sem resultado)\n"))
+    cat("=== Display name ===\n", r$name, "\n\n")
+    cat("=== Evidence grade ===\n", r$evidence_grade %||% "?", "\n\n")
+    cat("=== Trace (top of stack) ===\n")
+    str(r$trace, max.level = 2)
+  })
+
+  # ---------- Chat com o pedometrista -------------------------------------
+
+  observeEvent(input$chat_send, {
+    msg <- trimws(input$chat_input %||% "")
+    if (!nzchar(msg)) return(invisible())
+
+    # Reuse / build chat session
+    chat <- chat_session_rv()
+    if (is.null(chat)) {
+      sys_prompt <- pedologist_system_prompt(input$language %||% "pt-BR")
+      chat <- tryCatch(
+        vlm_provider(name = input$provider %||% "auto",
+                       system_prompt = sys_prompt),
+        error = function(e) NULL
+      )
+      chat_session_rv(chat)
+    }
+    if (is.null(chat)) {
+      showNotification("Provider VLM nao disponivel.", type = "error")
+      return(invisible())
+    }
+
+    # Add user msg to history; placeholder while we wait.
+    h <- chat_history_rv()
+    h <- c(h, list(list(role = "user",      content = msg)))
+    h <- c(h, list(list(role = "assistant", content = "...")))
+    chat_history_rv(h)
+    updateTextAreaInput(session, "chat_input", value = "")
+
+    # Build context: pedon summary + question
+    p <- pedon_rv()
+    cls <- cls_rv()
+    context <- paste(
+      "[Contexto do perfil carregado]",
+      sprintf("Horizontes: %d", nrow(p$horizons)),
+      if (!is.null(cls$sibcs)) sprintf("SiBCS atual: %s", cls$sibcs$name) else "",
+      if (!is.null(cls$wrb))   sprintf("WRB atual: %s",   cls$wrb$name)   else "",
+      if (!is.null(cls$usda))  sprintf("USDA atual: %s",  cls$usda$name)  else "",
+      "",
+      "[Pergunta do usuario]",
+      msg,
+      sep = "\n"
+    )
+
+    # Streaming would be nicer; keep it synchronous for now.
+    reply <- tryCatch(chat$chat(context),
+                        error = function(e) paste("(erro:", conditionMessage(e), ")"))
+
+    h <- chat_history_rv()
+    h[[length(h)]] <- list(role = "assistant", content = as.character(reply))
+    chat_history_rv(h)
+  })
+
+  output$chat_history <- renderUI({
+    h <- chat_history_rv()
+    if (length(h) == 0L) {
+      return(p(class = "text-muted",
+                "Comece perguntando algo sobre o perfil carregado..."))
+    }
+    tagList(lapply(h, function(m) {
+      bg <- if (identical(m$role, "user")) "#fff3e0" else "#e8f5e9"
+      ic <- if (identical(m$role, "user")) "person-fill" else "robot"
+      tags$div(
+        style = sprintf("background:%s;padding:10px;border-radius:8px;margin:6px 0;", bg),
+        tagList(bs_icon(ic), " ",
+                  tags$strong(if (identical(m$role, "user")) "Voce" else "Pedometrista"),
+                  tags$br(),
+                  tags$span(m$content))
+      )
+    }))
+  })
+}
+
+
+# ---- Launch ---------------------------------------------------------------
+
+shinyApp(ui, server)
diff --git a/man/benchmark_bdsolos_sibcs.Rd b/man/benchmark_bdsolos_sibcs.Rd
new file mode 100644
index 000000000..b7d7c1c7d
--- /dev/null
+++ b/man/benchmark_bdsolos_sibcs.Rd
@@ -0,0 +1,79 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/benchmark-bdsolos.R
+\name{benchmark_bdsolos_sibcs}
+\alias{benchmark_bdsolos_sibcs}
+\title{Run the BDsolos / SiBCS surveyor-reference benchmark}
+\usage{
+benchmark_bdsolos_sibcs(
+  pedons,
+  classify_with = classify_sibcs,
+  classify_args = list(on_missing = "silent"),
+  max_n = NULL,
+  verbose = TRUE
+)
+}
+\arguments{
+\item{pedons}{List of \code{\link{PedonRecord}} objects, typically
+from \code{\link{load_bdsolos_csv}}.}
+
+\item{classify_with}{Internal: classifier (default
+\code{classify_sibcs}). Pass \code{classify_via_smartsolos_api}
+to benchmark the Embrapa PROLOG classifier instead.}
+
+\item{classify_args}{List of additional arguments passed to the
+classifier (e.g. \code{list(api_key = ...,
+post_fn = ...)} for SmartSolos).}
+
+\item{max_n}{Optional integer cap on pedons benchmarked.}
+
+\item{verbose}{If \code{TRUE} (default), prints a summary line.}
+}
+\value{
+A list with elements:
+  \describe{
+    \item{\code{predictions}}{data.frame with columns:
+          point_id, predicted_ordem, reference_ordem, agree_ordem,
+          predicted_subordem, reference_subordem,
+          predicted_subordem_code, reference_subordem_code
+          (canonical SiBCS 2-3 letter codes from
+          .bdsolos_normalize_subordem()),
+          agree_subordem, predicted_gg, reference_gg,
+          reference_raw.}
+    \item{\code{confusion}}{Ordem-level confusion table.}
+    \item{\code{accuracy}}{Overall Ordem-level match fraction.}
+    \item{\code{accuracy_subordem}}{v0.9.61: subordem-level match
+          fraction over pedons with both predicted and reference
+          subordem codes resolvable.}
+    \item{\code{per_ordem}}{data.frame: per-Ordem recall.}
+    \item{\code{summary}}{n_total, n_in_scope, n_matched,
+          n_errors, n_unmapped, n_in_scope_sub, n_matched_sub.}
+  }
+}
+\description{
+Runs \code{\link{classify_sibcs}} on each pedon and tabulates
+agreement with the surveyor's SiBCS classification embedded in
+the BDsolos export (\code{site$reference_nivel_1} when
+available, fall back to parsing \code{site$reference_sibcs}).
+}
+\details{
+Compared to the v0.9.49 \code{\link{benchmark_lucas_2018}}, this
+uses the SURVEYOR's reference (richer than the WRB-1km raster):
+the BDsolos pedologist who described the profile assigns the
+Ordem / Subordem / Grande Grupo / Subgrupo. This is the
+authoritative Brazilian benchmark.
+}
+\examples{
+\dontrun{
+pedons <- load_bdsolos_csv("soil_data/embrapa_bdsolos/BD_solos/RJ.csv")
+bench <- benchmark_bdsolos_sibcs(pedons)
+bench$accuracy
+bench$per_ordem
+bench$confusion
+}
+}
+\seealso{
+\code{\link{load_bdsolos_csv}},
+         \code{\link{benchmark_lucas_2018}},
+         \code{\link{classify_sibcs}},
+         \code{\link{compare_smartsolos}}.
+}
diff --git a/man/benchmark_vlm_extraction.Rd b/man/benchmark_vlm_extraction.Rd
new file mode 100644
index 000000000..5b923d010
--- /dev/null
+++ b/man/benchmark_vlm_extraction.Rd
@@ -0,0 +1,95 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/benchmark-vlm-extraction.R
+\name{benchmark_vlm_extraction}
+\alias{benchmark_vlm_extraction}
+\title{Provider-agnostic VLM extraction benchmark (Phase 1)}
+\usage{
+benchmark_vlm_extraction(
+  providers,
+  tasks = c("horizons", "site", "munsell"),
+  fixtures_dir = NULL,
+  max_per_task = NULL,
+  use_fewshot = TRUE,
+  use_structured = FALSE,
+  n_repeats = 1L,
+  verbose = TRUE
+)
+}
+\arguments{
+\item{providers}{Named list of provider specifications. Each entry
+is one of: a pre-built ellmer Chat object; a MockVLMProvider;
+a list `(name = ..., model = ...)` forwarded to
+[vlm_provider()].}
+
+\item{tasks}{Subset of `c("munsell", "horizons", "site")`.}
+
+\item{fixtures_dir}{Optional override; default = bundled fixtures.}
+
+\item{max_per_task}{Cap fixtures per task (useful for smoke tests).}
+
+\item{use_fewshot}{Logical, default `TRUE` (v0.9.68+). When TRUE,
+uses the few-shot prompt variants (worked examples embedded in
+the prompt) for horizons / site / munsell. Set to `FALSE` to
+benchmark the bare-instructions baseline -- useful when
+measuring few-shot lift.}
+
+\item{use_structured}{Logical, default `FALSE` (v0.9.70+). When
+TRUE and the provider supports `chat_structured()` (Anthropic /
+OpenAI / Ollama 0.5+ / Gemini), the validate-and-retry loop is
+replaced by a single structured call where the provider is
+handed the ellmer type tree built from the soilKey schema and
+returns a structurally-valid R list directly. Removes the entire
+class of "model returned prose / wrong shape" failures at the
+protocol level. Falls back to the legacy retry loop when the
+provider has no `chat_structured` method.}
+
+\item{n_repeats}{Positive integer (default 1). Runs each
+(provider, task, fixture) cell `n_repeats` times so the summary
+table can report `metric_*_sd` alongside `metric_*_mean`. LLM
+responses are stochastic; without `n_repeats >= 3` it is hard to
+distinguish real lift from noise on a small fixture set.}
+
+\item{verbose}{Logical (default TRUE); print per-fixture progress.}
+}
+\value{
+List with
+  \describe{
+    \item{`predictions`}{long data.frame: provider, task, fixture,
+          ok, error, raw_pred, golden, metric_*}
+    \item{`summary`}{data.frame: provider x task aggregates}
+  }
+}
+\description{
+Runs each (provider, model) pair against every fixture for every
+selected task and reports per-fixture and per-(provider, task)
+aggregate metrics. Mock providers (`MockVLMProvider`) are accepted
+for unit testing.
+}
+\section{What this does NOT measure}{
+
+  - Latency / cost per request (use the provider's own telemetry).
+  - End-to-end classification accuracy (run
+    `benchmark_bdsolos_sibcs()` for that).
+  - VLM hallucination outside the schema (the JSON validator catches
+    that as a parse failure, counted as `ok = FALSE`).
+}
+
+\examples{
+\dontrun{
+# Compare local Gemma e2b vs e4b vs Claude:
+bench <- benchmark_vlm_extraction(
+  providers = list(
+    gemma_e2b = list(name = "ollama", model = "gemma4:e2b"),
+    gemma_e4b = list(name = "ollama", model = "gemma4:e4b"),
+    claude    = list(name = "anthropic")
+  ),
+  tasks = c("horizons", "site"),     # skip Munsell if no photo fixtures
+  max_per_task = 5
+)
+bench$summary
+}
+}
+\seealso{
+[list_vlm_fixtures()], [make_synthetic_horizons_fixture()],
+  [extract_horizons_from_pdf()].
+}
diff --git a/man/default_model.Rd b/man/default_model.Rd
index 787d1ac0d..82b12bdac 100644
--- a/man/default_model.Rd
+++ b/man/default_model.Rd
@@ -28,11 +28,14 @@ Defaults (as of v0.9.11):
   \item \code{openai = "gpt-4o"} -- text + vision.
   \item \code{google = "gemini-2.0-pro"} -- successor to 1.5
         with longer context + better multimodal grounding.
-  \item \code{ollama = "gemma4:e4b"} -- Gemma 4 edge
-        multimodal (text + image; audio also). For larger
-        contexts use \code{"gemma4:31b"}; for cloud-only
-        offload via Ollama, \code{"gemma4-cloud:31b"}. Pull the
-        desired size first with \code{ollama pull gemma4:e4b}.
+  \item \code{ollama = "gemma4:e2b"} -- v0.9.64 default. Gemma 4
+        edge 2B (~6.7 GB on disk; multimodal builds bundle a
+        vision encoder that adds ~5 GB to the bare parameter
+        weights), runs on a laptop CPU. Larger options:
+        \code{"gemma4:e4b"} (~8 GB, better accuracy on PT-BR field
+        sheets), \code{"gemma4:31b"} (~19 GB, frontier dense,
+        requires GPU). One-shot bootstrap:
+        \code{\link{setup_local_vlm}("light"|"balanced"|"best")}.
 }
 
 Users can override at any time:
diff --git a/man/dot-BDSOLOS_SITE_PATTERNS.Rd b/man/dot-BDSOLOS_SITE_PATTERNS.Rd
index 39af5d8f6..e91bfbd6b 100644
--- a/man/dot-BDSOLOS_SITE_PATTERNS.Rd
+++ b/man/dot-BDSOLOS_SITE_PATTERNS.Rd
@@ -6,7 +6,7 @@
 \title{Site-level columns (BDsolos full export). Mapped at the site, not
 horizon, level.}
 \format{
-An object of class \code{list} of length 21.
+An object of class \code{list} of length 24.
 }
 \usage{
 .BDSOLOS_SITE_PATTERNS
diff --git a/man/dot-SOILKEY_OLLAMA_CATALOG.Rd b/man/dot-SOILKEY_OLLAMA_CATALOG.Rd
new file mode 100644
index 000000000..c1e4f10a8
--- /dev/null
+++ b/man/dot-SOILKEY_OLLAMA_CATALOG.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/setup-local-vlm.R
+\docType{data}
+\name{.SOILKEY_OLLAMA_CATALOG}
+\alias{.SOILKEY_OLLAMA_CATALOG}
+\title{Canonical Ollama model catalog used by setup_local_vlm()}
+\format{
+An object of class \code{list} of length 3.
+}
+\usage{
+.SOILKEY_OLLAMA_CATALOG
+}
+\description{
+Maps short labels ("light", "balanced", "best") to multimodal Gemma
+tags pullable via `ollama pull`. Sizes are the **on-disk footprint
+Ollama reports after pull**, NOT the bare parameter count: the
+multimodal Gemma 4 builds bundle a vision encoder + tokenizers /
+adapters that add ~5 GB on top of the parameter weights, so the
+"edge 2B" variant lands at ~6.7 GB on disk despite the 2-billion
+parameter label.
+}
+\details{
+Sizes verified on Ollama Library 2026-05 (e2b measured locally;
+e4b and 31b approximated from the Ollama listing -- run
+\code{ollama show <tag>} after pull for the exact figure).
+}
+\keyword{internal}
diff --git a/man/dot-apply_color_dominant_override.Rd b/man/dot-apply_color_dominant_override.Rd
new file mode 100644
index 000000000..b6a0aa3ce
--- /dev/null
+++ b/man/dot-apply_color_dominant_override.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sibcs-color-tuning.R
+\name{.apply_color_dominant_override}
+\alias{.apply_color_dominant_override}
+\title{Override a first-match-wins SiBCS subordem with the dominant-color rule}
+\usage{
+.apply_color_dominant_override(subordem, pedon, ordem_code, rules)
+}
+\arguments{
+\item{subordem}{The subordem entry assigned by the YAML key
+(`list(code, name, tests, ...)`) or `NULL`.}
+
+\item{pedon}{A `[PedonRecord]`.}
+
+\item{ordem_code}{Single-letter Ordem code.}
+
+\item{rules}{Loaded SiBCS rule set (with `$subordens[[ordem_code]]`).}
+}
+\value{
+List with `subordem` (the possibly-overridden YAML entry) and
+  `override` (NULL when no change, else
+  `list(from_code, to_code, dominant_evidence)`).
+}
+\description{
+Called from [classify_sibcs()] after the YAML key has assigned a
+subordem. When the Ordem is one of the color-partitioned ones (P, L,
+N) and the dominant-color rule produces a DIFFERENT subordem code,
+replaces the assigned entry with the YAML block matching the new
+code. The function does nothing for non-color Ordens, when no Munsell
+B color is available, when the dominant matches the first-match
+assignment, or when the YAML lacks an entry for the dominant code.
+}
+\keyword{internal}
diff --git a/man/dot-bdsolos_normalize_ordem.Rd b/man/dot-bdsolos_normalize_ordem.Rd
new file mode 100644
index 000000000..4091c0e47
--- /dev/null
+++ b/man/dot-bdsolos_normalize_ordem.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/benchmark-bdsolos.R
+\name{.bdsolos_normalize_ordem}
+\alias{.bdsolos_normalize_ordem}
+\title{Map BDsolos ALL-CAPS Ordem singular -> soilKey Title-Case plural}
+\usage{
+.bdsolos_normalize_ordem(s)
+}
+\description{
+BDsolos exports SiBCS classes in ALL CAPS singular form
+(e.g. \code{"LATOSSOLO"}); soilKey returns Title Case plural
+(e.g. \code{"Latossolos"}). This helper aligns the two.
+}
+\details{
+Also handles the legacy / folk Ordem names that appear in older
+BDsolos surveys (1970s-90s pre-SiBCS-1ª-edição):
+
+\itemize{
+  \item \code{PODZOLICO}, \code{PODZOLCIO}, \code{LATOSOL}
+        -> \code{Argissolos} (the 1999 SiBCS rename)
+  \item \code{GLEI} -> \code{Gleissolos}
+  \item \code{BRUNIZEM} -> \code{Chernossolos}
+  \item \code{AREIA(S)} -> \code{Neossolos} (Quartzarenicos)
+  \item \code{ALUVIAL} -> \code{Neossolos} (Fluvicos)
+  \item \code{BRUNO}, \code{RENDZINA} -> \code{Chernossolos}
+  \item \code{SOLONCHAK}, \code{SOLONETZ} -> \code{Planossolos}
+        (Naticos / Solodicos in SiBCS)
+}
+
+Returns \code{NA_character_} when the input is NA or unrecognised.
+}
+\keyword{internal}
diff --git a/man/dot-bdsolos_normalize_subordem.Rd b/man/dot-bdsolos_normalize_subordem.Rd
new file mode 100644
index 000000000..3961d6b40
--- /dev/null
+++ b/man/dot-bdsolos_normalize_subordem.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/benchmark-bdsolos.R
+\name{.bdsolos_normalize_subordem}
+\alias{.bdsolos_normalize_subordem}
+\title{Map a SiBCS subordem name (any case / language form) to the canonical code}
+\usage{
+.bdsolos_normalize_subordem(s)
+}
+\description{
+v0.9.61 helper: BDsolos surveyors export Nivel 2 in ALL CAPS singular
+(e.g. \code{"ARGISSOLO VERMELHO"}); soilKey returns Title Case plural
+(e.g. \code{"Argissolos Vermelhos"}). To compute subordem-level
+agreement we collapse both to the canonical 2-3 letter SiBCS code
+(PV / PA / PVA / PBAC / PAC / LV / LA / LVA / LB / NV / NB / NX / TC /
+TX / CX / CHU / CH / CY / GM / GZ / GJ / GX / EK / EJ / ES / OJ / OO /
+OX / RL / RY / RQ / RR / MD / ME / MT / MX / SN / SX / FT / FF / FX /
+VC / VE / VX).
+}
+\keyword{internal}
diff --git a/man/dot-classify_b_color.Rd b/man/dot-classify_b_color.Rd
new file mode 100644
index 000000000..4ce2b3dc0
--- /dev/null
+++ b/man/dot-classify_b_color.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sibcs-color-tuning.R
+\name{.classify_b_color}
+\alias{.classify_b_color}
+\title{Classify a single Munsell color into a SiBCS B-horizon color category}
+\usage{
+.classify_b_color(hue, value, chroma)
+}
+\arguments{
+\item{hue}{Munsell hue, e.g. "5YR" or "2.5Y".}
+
+\item{value}{Munsell value (numeric).}
+
+\item{chroma}{Munsell chroma (numeric).}
+}
+\value{
+Character scalar: one of `"VERMELHO"`, `"VERMELHO_AMARELO"`,
+  `"AMARELO"`, `"BRUNO_ACINZENTADO"`, `"ACINZENTADO"`, or `NA`
+  when any of the three Munsell components is missing.
+}
+\description{
+Classify a single Munsell color into a SiBCS B-horizon color category
+}
+\keyword{internal}
diff --git a/man/dot-dominant_b_color.Rd b/man/dot-dominant_b_color.Rd
new file mode 100644
index 000000000..90c377411
--- /dev/null
+++ b/man/dot-dominant_b_color.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sibcs-color-tuning.R
+\name{.dominant_b_color}
+\alias{.dominant_b_color}
+\title{Thickness-weighted dominant B-horizon color category for a pedon}
+\usage{
+.dominant_b_color(pedon)
+}
+\arguments{
+\item{pedon}{A `[PedonRecord]`.}
+}
+\value{
+List with `dominant` (character scalar or `NA`),
+  `thickness_by_category` (named numeric vector), `n_b_layers`
+  (integer), and `n_classified` (integer).
+}
+\description{
+Walks every B-like horizon (designation matching `^B[wt]?` and not
+`^BC|^Bt0`), classifies each into a SiBCS color category via
+[.classify_b_color()], sums horizon thickness per category, and
+returns the category with the largest cumulative thickness. Ties are
+broken in canonical SiBCS order (BRUNO_ACINZENTADO > ACINZENTADO >
+AMARELO > VERMELHO > VERMELHO_AMARELO).
+}
+\keyword{internal}
diff --git a/man/dot-dominant_b_color_subordem.Rd b/man/dot-dominant_b_color_subordem.Rd
new file mode 100644
index 000000000..05e7d1b65
--- /dev/null
+++ b/man/dot-dominant_b_color_subordem.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sibcs-color-tuning.R
+\name{.dominant_b_color_subordem}
+\alias{.dominant_b_color_subordem}
+\title{Resolve the subordem code dictated by the dominant B-horizon color}
+\usage{
+.dominant_b_color_subordem(pedon, ordem_code)
+}
+\arguments{
+\item{pedon}{A `[PedonRecord]`.}
+
+\item{ordem_code}{Single-letter Ordem code, e.g. `"P"`.}
+}
+\value{
+List with `code` (target subordem code or `NA`) and
+  `evidence` (the diagnostic returned by [.dominant_b_color()]).
+}
+\description{
+Resolve the subordem code dictated by the dominant B-horizon color
+}
+\keyword{internal}
diff --git a/man/dot-get_sisb_id.Rd b/man/dot-get_sisb_id.Rd
new file mode 100644
index 000000000..7f22c0abc
--- /dev/null
+++ b/man/dot-get_sisb_id.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/merge-brazilian.R
+\name{.get_sisb_id}
+\alias{.get_sisb_id}
+\title{Extract sisb_id from a PedonRecord, returning NA when not present}
+\usage{
+.get_sisb_id(pedon)
+}
+\description{
+Both v0.9.62 loaders (BDsolos + FEBR) assign `site$sisb_id`. This
+helper centralises the lookup so older PedonRecord objects without
+the field still work.
+}
+\keyword{internal}
diff --git a/man/dot-metric_horizons_overlap.Rd b/man/dot-metric_horizons_overlap.Rd
new file mode 100644
index 000000000..7b2cd07ba
--- /dev/null
+++ b/man/dot-metric_horizons_overlap.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/benchmark-vlm-extraction.R
+\name{.metric_horizons_overlap}
+\alias{.metric_horizons_overlap}
+\title{Precision / recall on horizon count + numeric attribute match rate}
+\usage{
+.metric_horizons_overlap(pred, golden, numeric_tol = 0.1)
+}
+\description{
+Counts how many predicted horizons line up with a golden horizon
+under the depth-overlap heuristic (>=80 % overlap of [top, bottom]
+interval) and what fraction of numeric attributes agree within a
+small tolerance. The overlap heuristic gives partial credit when
+the model splits / merges adjacent horizons.
+}
+\keyword{internal}
diff --git a/man/dot-metric_munsell_deltaE.Rd b/man/dot-metric_munsell_deltaE.Rd
new file mode 100644
index 000000000..eb2238086
--- /dev/null
+++ b/man/dot-metric_munsell_deltaE.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/benchmark-vlm-extraction.R
+\name{.metric_munsell_deltaE}
+\alias{.metric_munsell_deltaE}
+\title{Mean Delta-E 2000 between predicted and golden Munsell horizons}
+\usage{
+.metric_munsell_deltaE(pred, golden)
+}
+\description{
+Pairs predicted horizons to golden horizons by index (assumes the
+ordering by depth is consistent, the same convention soilKey uses
+throughout). Returns the mean over the min(length) horizons; pads
+missing predictions with `NA` (penalised separately via the
+coverage rate).
+}
+\keyword{internal}
diff --git a/man/dot-metric_site_iou.Rd b/man/dot-metric_site_iou.Rd
new file mode 100644
index 000000000..0adaec739
--- /dev/null
+++ b/man/dot-metric_site_iou.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/benchmark-vlm-extraction.R
+\name{.metric_site_iou}
+\alias{.metric_site_iou}
+\title{Field-level Intersection-over-Union + value accuracy}
+\usage{
+.metric_site_iou(pred, golden, numeric_tol = 0.05)
+}
+\description{
+For site metadata: how many of the golden fields appear in the
+prediction (recall), how many predicted fields appear in golden
+(precision), and -- for the matched fields -- what fraction agree
+on value. Numeric values use `numeric_tol`; character uses exact
+(case-insensitive, trimmed) match.
+}
+\keyword{internal}
diff --git a/man/dot-munsell_delta_e.Rd b/man/dot-munsell_delta_e.Rd
new file mode 100644
index 000000000..e1caaf657
--- /dev/null
+++ b/man/dot-munsell_delta_e.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/benchmark-vlm-extraction.R
+\name{.munsell_delta_e}
+\alias{.munsell_delta_e}
+\title{Pairwise perceptual color distance between two Munsell triplets}
+\usage{
+.munsell_delta_e(hue1, value1, chroma1, hue2, value2, chroma2)
+}
+\arguments{
+\item{hue1, value1, chroma1}{First Munsell color (e.g. `"5YR", 4, 6`).}
+
+\item{hue2, value2, chroma2}{Second Munsell color.}
+}
+\value{
+Numeric scalar (Nickerson or Lab distance), or `NA_real_`.
+}
+\description{
+Prefers the Nickerson Color Difference Index (operates directly on
+HVC, well-known in pedology and the Munsell renotation literature)
+via `munsellinterpol::NickersonColorDifference`. Falls back to a
+CIE Lab Euclidean distance (ΔE 1976) computed via
+`munsellinterpol::MunsellToLab` when Nickerson is unavailable.
+Returns `NA_real_` when either Munsell triplet is unparseable.
+}
+\details{
+Approximate Nickerson scale (matches Δ Lab roughly 1:1 for Munsell
+value 4 chromas 1-8): `< 2` = visually equivalent;
+`2-5` = noticeable but small; `> 10` = clearly different colors.
+}
+\keyword{internal}
diff --git a/man/dot-print_ollama_install_hint.Rd b/man/dot-print_ollama_install_hint.Rd
new file mode 100644
index 000000000..a7c982510
--- /dev/null
+++ b/man/dot-print_ollama_install_hint.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/setup-local-vlm.R
+\name{.print_ollama_install_hint}
+\alias{.print_ollama_install_hint}
+\title{Print the right install-Ollama incantation for the user's OS}
+\usage{
+.print_ollama_install_hint()
+}
+\description{
+macOS -> Homebrew formula; Linux -> upstream curl-pipe-sh script;
+Windows -> winget. Always points to the official installers
+page <https://ollama.com/download>. Used by [setup_local_vlm()]
+as the actionable error path when Ollama is not installed.
+}
+\keyword{internal}
diff --git a/man/dot-provider_supports_structured.Rd b/man/dot-provider_supports_structured.Rd
new file mode 100644
index 000000000..b1b7c9c1a
--- /dev/null
+++ b/man/dot-provider_supports_structured.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/vlm-types.R
+\name{.provider_supports_structured}
+\alias{.provider_supports_structured}
+\title{Does a provider support `chat_structured()`?}
+\usage{
+.provider_supports_structured(provider)
+}
+\arguments{
+\item{provider}{The provider to probe.}
+}
+\value{
+Logical scalar.
+}
+\description{
+Quick capability probe. Returns TRUE when the provider exposes a
+`chat_structured` method (ellmer Chat object built for an LLM that
+supports structured outputs). Used internally by
+[validate_or_retry()] to decide whether the structured-output path
+is available.
+}
+\details{
+Mock providers and any non-ellmer chat objects return FALSE here,
+so the structured-output flag degrades gracefully to the legacy
+chat-and-validate loop.
+}
+\keyword{internal}
diff --git a/man/dot-suggest_local_vlm_message.Rd b/man/dot-suggest_local_vlm_message.Rd
new file mode 100644
index 000000000..6234b73b7
--- /dev/null
+++ b/man/dot-suggest_local_vlm_message.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/zzz.R
+\name{.suggest_local_vlm_message}
+\alias{.suggest_local_vlm_message}
+\title{Build the local-VLM suggestion shown by .onAttach}
+\usage{
+.suggest_local_vlm_message(target_model = "gemma4:e2b")
+}
+\arguments{
+\item{target_model}{Ollama model identifier soilKey wants to see.}
+}
+\value{
+Character scalar -- the message body, or `""` when no
+  message is appropriate (e.g. Ollama not installed at all -- no
+  point nagging the user).
+}
+\description{
+Pure function (no side effects). Returns the multi-line string
+that .onAttach would print, given the current Ollama state.
+Factored out for testability: the unit tests exercise this with
+stubbed inputs instead of touching the real Ollama daemon.
+}
+\keyword{internal}
diff --git a/man/dot-tag_merge_decision.Rd b/man/dot-tag_merge_decision.Rd
new file mode 100644
index 000000000..18fdfe30e
--- /dev/null
+++ b/man/dot-tag_merge_decision.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/merge-brazilian.R
+\name{.tag_merge_decision}
+\alias{.tag_merge_decision}
+\title{Tag a pedon with merge provenance}
+\usage{
+.tag_merge_decision(pedon, source, decision)
+}
+\description{
+Appends the source label to `site$reference_source` and stores
+`site$merge_decision` (`"kept_bdsolos"`, `"kept_febr"`, or
+`"unique"`).
+}
+\keyword{internal}
diff --git a/man/dot-vlm_fixtures_dir.Rd b/man/dot-vlm_fixtures_dir.Rd
new file mode 100644
index 000000000..c4f402e92
--- /dev/null
+++ b/man/dot-vlm_fixtures_dir.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/benchmark-vlm-extraction.R
+\name{.vlm_fixtures_dir}
+\alias{.vlm_fixtures_dir}
+\title{Locate the soilKey VLM-extraction fixture directory}
+\usage{
+.vlm_fixtures_dir(subdir = NULL)
+}
+\description{
+Resolves to `system.file("fixtures", "vlm_extraction", ...)` after
+install, or to `inst/fixtures/vlm_extraction/` in a development
+checkout. Errors when neither is found.
+}
+\keyword{internal}
diff --git a/man/extract_horizons_from_pdf.Rd b/man/extract_horizons_from_pdf.Rd
index 3c99d6a2d..510a327bc 100644
--- a/man/extract_horizons_from_pdf.Rd
+++ b/man/extract_horizons_from_pdf.Rd
@@ -10,9 +10,11 @@ extract_horizons_from_pdf(
   provider,
   max_retries = 3L,
   overwrite = FALSE,
-  prompt_name = "extract_horizons",
+  prompt_name = NULL,
   schema_name = "horizon",
-  pdf_text = NULL
+  pdf_text = NULL,
+  use_fewshot = TRUE,
+  use_structured = FALSE
 )
 }
 \arguments{
@@ -31,8 +33,10 @@ validation failure. Default 3.}
 \item{overwrite}{If \code{TRUE}, lower-authority values are allowed
 to clobber higher-authority ones. Default \code{FALSE}.}
 
-\item{prompt_name}{Override the default prompt template
-(\code{"extract_horizons"}).}
+\item{prompt_name}{Override the default prompt template. When
+\code{NULL} (default), resolved via \code{use_fewshot}:
+\code{TRUE} -> \code{"extract_horizons_fewshot"};
+\code{FALSE} -> \code{"extract_horizons"}.}
 
 \item{schema_name}{Override the default schema (\code{"horizon"}).}
 
@@ -40,6 +44,22 @@ to clobber higher-authority ones. Default \code{FALSE}.}
 already-extracted description text. Useful for smoke
 tests, unit tests without \code{pdftools}, and for
 already-OCR'd field-sheet text.}
+
+\item{use_fewshot}{Logical, default \code{TRUE}. When \code{TRUE},
+uses the v0.9.68 few-shot prompt with two worked examples
+embedded; this dramatically improves JSON-shape compliance
+on smaller models (Gemma 4 e2b / e4b). Set \code{FALSE} to
+revert to the bare-instructions prompt. Ignored when
+\code{prompt_name} is set explicitly.}
+
+\item{use_structured}{Logical, default \code{FALSE} (v0.9.70+).
+When \code{TRUE} and the provider exposes
+\code{chat_structured()} (Anthropic / OpenAI / Ollama 0.5+ /
+Gemini), the validate-and-retry loop is replaced by a
+single structured call that returns a schema-validated R
+list directly -- removing JSON-shape errors at the protocol
+level. Falls back to the legacy retry loop when the
+provider has no \code{chat_structured} method.}
 }
 \value{
 Invisibly, the (mutated) \code{pedon}. Carries a
diff --git a/man/extract_munsell_from_photo.Rd b/man/extract_munsell_from_photo.Rd
index d6db2e54c..bd0665868 100644
--- a/man/extract_munsell_from_photo.Rd
+++ b/man/extract_munsell_from_photo.Rd
@@ -10,8 +10,10 @@ extract_munsell_from_photo(
   provider,
   max_retries = 3L,
   overwrite = FALSE,
-  prompt_name = "extract_munsell_from_photo",
-  schema_name = "horizon"
+  prompt_name = NULL,
+  schema_name = "horizon",
+  use_fewshot = TRUE,
+  use_structured = FALSE
 )
 }
 \arguments{
@@ -28,10 +30,28 @@ validation failure. Default 3.}
 \item{overwrite}{If \code{TRUE}, lower-authority values are allowed
 to clobber higher-authority ones. Default \code{FALSE}.}
 
-\item{prompt_name}{Override the default prompt template
-(\code{"extract_horizons"}).}
+\item{prompt_name}{Override the default prompt template. When
+\code{NULL} (default), resolved via \code{use_fewshot}:
+\code{TRUE} -> \code{"extract_horizons_fewshot"};
+\code{FALSE} -> \code{"extract_horizons"}.}
 
 \item{schema_name}{Override the default schema (\code{"horizon"}).}
+
+\item{use_fewshot}{Logical, default \code{TRUE}. When \code{TRUE},
+uses the v0.9.68 few-shot prompt with two worked examples
+embedded; this dramatically improves JSON-shape compliance
+on smaller models (Gemma 4 e2b / e4b). Set \code{FALSE} to
+revert to the bare-instructions prompt. Ignored when
+\code{prompt_name} is set explicitly.}
+
+\item{use_structured}{Logical, default \code{FALSE} (v0.9.70+).
+When \code{TRUE} and the provider exposes
+\code{chat_structured()} (Anthropic / OpenAI / Ollama 0.5+ /
+Gemini), the validate-and-retry loop is replaced by a
+single structured call that returns a schema-validated R
+list directly -- removing JSON-shape errors at the protocol
+level. Falls back to the legacy retry loop when the
+provider has no \code{chat_structured} method.}
 }
 \value{
 Invisibly, the mutated \code{pedon}, with the photo added
diff --git a/man/extract_site_from_fieldsheet.Rd b/man/extract_site_from_fieldsheet.Rd
index 729bb6277..fa5fa347f 100644
--- a/man/extract_site_from_fieldsheet.Rd
+++ b/man/extract_site_from_fieldsheet.Rd
@@ -11,7 +11,9 @@ extract_site_from_fieldsheet(
   max_retries = 3L,
   overwrite = FALSE,
   prompt_name = "extract_site_metadata",
-  schema_name = "site"
+  schema_name = "site",
+  use_fewshot = TRUE,
+  use_structured = FALSE
 )
 }
 \arguments{
@@ -28,10 +30,28 @@ validation failure. Default 3.}
 \item{overwrite}{If \code{TRUE}, lower-authority values are allowed
 to clobber higher-authority ones. Default \code{FALSE}.}
 
-\item{prompt_name}{Override the default prompt template
-(\code{"extract_horizons"}).}
+\item{prompt_name}{Override the default prompt template. When
+\code{NULL} (default), resolved via \code{use_fewshot}:
+\code{TRUE} -> \code{"extract_horizons_fewshot"};
+\code{FALSE} -> \code{"extract_horizons"}.}
 
 \item{schema_name}{Override the default schema (\code{"horizon"}).}
+
+\item{use_fewshot}{Logical, default \code{TRUE}. When \code{TRUE},
+uses the v0.9.68 few-shot prompt with two worked examples
+embedded; this dramatically improves JSON-shape compliance
+on smaller models (Gemma 4 e2b / e4b). Set \code{FALSE} to
+revert to the bare-instructions prompt. Ignored when
+\code{prompt_name} is set explicitly.}
+
+\item{use_structured}{Logical, default \code{FALSE} (v0.9.70+).
+When \code{TRUE} and the provider exposes
+\code{chat_structured()} (Anthropic / OpenAI / Ollama 0.5+ /
+Gemini), the validate-and-retry loop is replaced by a
+single structured call that returns a schema-validated R
+list directly -- removing JSON-shape errors at the protocol
+level. Falls back to the legacy retry loop when the
+provider has no \code{chat_structured} method.}
 }
 \value{
 Invisibly, the mutated \code{pedon}.
diff --git a/man/list_vlm_fixtures.Rd b/man/list_vlm_fixtures.Rd
new file mode 100644
index 000000000..69f91ece8
--- /dev/null
+++ b/man/list_vlm_fixtures.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/benchmark-vlm-extraction.R
+\name{list_vlm_fixtures}
+\alias{list_vlm_fixtures}
+\title{List the paired (input, golden) fixtures available for one task}
+\usage{
+list_vlm_fixtures(task = c("munsell", "horizons", "site"), fixtures_dir = NULL)
+}
+\arguments{
+\item{task}{One of `"munsell"`, `"horizons"`, `"site"`.}
+
+\item{fixtures_dir}{Optional override (default uses bundled).}
+}
+\value{
+data.frame with columns `id`, `input_path`, `golden_path`
+  (one row per fixture).
+}
+\description{
+Each task directory holds matched files: an `input` (`.txt` for
+horizons / site, `.jpg`/`.png` for munsell) and a `golden.json`
+with the ground-truth answer. The pairing rule is filename-stem.
+}
diff --git a/man/make_synthetic_horizons_fixture.Rd b/man/make_synthetic_horizons_fixture.Rd
new file mode 100644
index 000000000..ec44f78aa
--- /dev/null
+++ b/man/make_synthetic_horizons_fixture.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/benchmark-vlm-extraction.R
+\name{make_synthetic_horizons_fixture}
+\alias{make_synthetic_horizons_fixture}
+\title{Generate a synthetic horizons-extraction fixture from a real pedon}
+\usage{
+make_synthetic_horizons_fixture(pedon, fixture_id, out_dir = NULL)
+}
+\arguments{
+\item{pedon}{A `[PedonRecord]`.}
+
+\item{fixture_id}{Filename stem (no extension) that the input + golden
+files will share.}
+
+\item{out_dir}{Directory to write `<fixture_id>.txt` and
+`<fixture_id>.golden.json`. Default: bundled horizons fixtures dir.}
+}
+\value{
+Invisibly, the named list `(input_path, golden_path)`.
+}
+\description{
+Renders a `PedonRecord$horizons` table back into a Markdown-style
+description (the input the VLM will see) and emits the original
+structured horizon table as the golden answer. This lets us scale
+the horizons-task fixture set from any pedon source we already
+have a loader for (BDsolos, FEBR, KSSL, LUCAS, ...).
+}
+\details{
+Useful as a *unit-test* fixture: the VLM should be able to round-
+trip its own description into structured JSON. Limitation: the
+description is template-rendered (uniform style); does not exercise
+truly natural-language variation. Pair with hand-curated real-PDF
+fixtures.
+}
diff --git a/man/merge_brazilian_pedons.Rd b/man/merge_brazilian_pedons.Rd
new file mode 100644
index 000000000..b97057c69
--- /dev/null
+++ b/man/merge_brazilian_pedons.Rd
@@ -0,0 +1,58 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/merge-brazilian.R
+\name{merge_brazilian_pedons}
+\alias{merge_brazilian_pedons}
+\title{Merge BDsolos and FEBR PedonRecord lists, deduplicating by sisb_id}
+\usage{
+merge_brazilian_pedons(
+  bdsolos,
+  febr,
+  prefer = c("bdsolos", "febr"),
+  verbose = TRUE
+)
+}
+\arguments{
+\item{bdsolos}{List of \code{PedonRecord} objects from
+\code{\link{load_bdsolos_csv}}.}
+
+\item{febr}{List of \code{PedonRecord} objects from
+\code{\link{read_febr_pedons}}.}
+
+\item{prefer}{Character: which side wins when a sisb_id matches in
+both. Either \code{"bdsolos"} (default) or \code{"febr"}.}
+
+\item{verbose}{If \code{TRUE} (default), prints a one-line summary.}
+}
+\value{
+A list of \code{PedonRecord} objects with site provenance
+  tagged via \code{site$merge_decision} (\code{"kept_bdsolos"},
+  \code{"kept_febr"}, or \code{"unique"}) and \code{site$merge_source}.
+  Pedons appear in the order: chosen-from-overlap first, then
+  unique-to-bdsolos, then unique-to-febr.
+}
+\description{
+Both Embrapa BDsolos and FEBR carry Brazilian soil profiles, with
+substantial overlap. BDsolos exports the historic Embrapa pedon
+numbering as \code{Codigo PA}; FEBR's \code{observacao} table
+carries the same numbering as \code{sisb_id}. This function uses
+those two as a join key to drop duplicates and produce a single
+consolidated list.
+}
+\details{
+Pedons whose \code{site$sisb_id} is \code{NA} on either side are
+kept as unique entries (the duplication test cannot be resolved).
+}
+\examples{
+\dontrun{
+bd <- load_bdsolos_csv("soil_data/embrapa_bdsolos/BD_solos/RJ.csv")
+fb <- read_febr_pedons(c("ctb0032", "ctb0500"))
+merged <- merge_brazilian_pedons(bd, fb, prefer = "bdsolos")
+length(merged)  # < length(bd) + length(fb) when there is overlap
+}
+
+}
+\seealso{
+\code{\link{load_bdsolos_csv}},
+         \code{\link{read_febr_pedons}},
+         \code{\link{summarize_brazilian_overlap}}.
+}
diff --git a/man/ollama_ensure_running.Rd b/man/ollama_ensure_running.Rd
new file mode 100644
index 000000000..728136d96
--- /dev/null
+++ b/man/ollama_ensure_running.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/setup-local-vlm.R
+\name{ollama_ensure_running}
+\alias{ollama_ensure_running}
+\title{Ensure the Ollama daemon is running, starting it if needed}
+\usage{
+ollama_ensure_running(timeout_s = 30, verbose = TRUE)
+}
+\arguments{
+\item{timeout_s}{Polling deadline in seconds (default 30).}
+
+\item{verbose}{Logical (default TRUE). Prints CLI status updates.}
+}
+\value{
+Logical scalar: TRUE iff the daemon is reachable when this
+  function returns. Never throws -- returns FALSE on any failure so
+  callers can route to [.print_ollama_install_hint()].
+}
+\description{
+If [ollama_is_running()] already returns TRUE, this is a no-op. Else
+tries to launch `ollama serve` in the background and polls until the
+HTTP API answers (or `timeout_s` seconds elapse). Requires the
+`ollama` binary to be on PATH; call [ollama_is_installed()] first.
+}
+\details{
+On success, the daemon keeps running for the rest of the R session
+(and survives the R session, since it forks via `system2(..., wait
+= FALSE)`). The user can stop it later with `pkill ollama` or
+equivalent.
+}
diff --git a/man/ollama_is_installed.Rd b/man/ollama_is_installed.Rd
new file mode 100644
index 000000000..61a31a6b5
--- /dev/null
+++ b/man/ollama_is_installed.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/setup-local-vlm.R
+\name{ollama_is_installed}
+\alias{ollama_is_installed}
+\title{Is the Ollama CLI installed?}
+\usage{
+ollama_is_installed()
+}
+\value{
+Logical scalar.
+}
+\description{
+Returns TRUE when `ollama` resolves on the system PATH. Does NOT
+check whether the daemon is running (use [ollama_is_running()] for
+that).
+}
diff --git a/man/ollama_list_local_models.Rd b/man/ollama_list_local_models.Rd
new file mode 100644
index 000000000..9b55c7331
--- /dev/null
+++ b/man/ollama_list_local_models.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/setup-local-vlm.R
+\name{ollama_list_local_models}
+\alias{ollama_list_local_models}
+\title{List models currently pulled to the local Ollama}
+\usage{
+ollama_list_local_models()
+}
+\value{
+Character vector of model identifiers (e.g.
+  `c("gemma4:e2b", "gemma4:e4b")`).
+}
+\description{
+Queries the `/api/tags` endpoint on the running daemon. Returns an
+empty character vector when the daemon is not reachable or when no
+models are pulled. Never throws.
+}
diff --git a/man/ollama_pull_model.Rd b/man/ollama_pull_model.Rd
new file mode 100644
index 000000000..79b4271df
--- /dev/null
+++ b/man/ollama_pull_model.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/setup-local-vlm.R
+\name{ollama_pull_model}
+\alias{ollama_pull_model}
+\title{Pull a model into the local Ollama}
+\usage{
+ollama_pull_model(model, verbose = TRUE)
+}
+\arguments{
+\item{model}{Ollama model identifier (e.g. `"gemma4:e2b"`).}
+
+\item{verbose}{Logical (default TRUE). Streams `ollama pull` output
+to the console.}
+}
+\value{
+Logical scalar: TRUE iff the model is on-disk after this
+  function returns.
+}
+\description{
+Wraps `ollama pull <model>` via [system2()]. The pull is potentially
+large (1-20 GB depending on the model) and may take many minutes
+over a slow connection; this function blocks until completion.
+Skipped (no-op) when the model is already present in
+[ollama_list_local_models()].
+}
diff --git a/man/pedologist_system_prompt.Rd b/man/pedologist_system_prompt.Rd
new file mode 100644
index 000000000..23d73813e
--- /dev/null
+++ b/man/pedologist_system_prompt.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/vlm-prompts.R
+\name{pedologist_system_prompt}
+\alias{pedologist_system_prompt}
+\title{Persona system-prompt for the soilKey "Pedometrist Agent"}
+\usage{
+pedologist_system_prompt(language = c("pt-BR", "en"))
+}
+\arguments{
+\item{language}{One of `"pt-BR"` (default) or `"en"`. Determines
+the language the persona uses when discussing reasoning,
+ambiguity and missing attributes.}
+}
+\value{
+Character scalar suitable for passing as `system_prompt`
+  to [vlm_provider()] (which forwards it to `ellmer::chat_*`).
+}
+\description{
+Returns the canonical system prompt installed into every
+agent_app() chat session in v0.9.65+. The persona makes the LLM
+(typically a local Gemma 4 via Ollama) behave as an experienced
+pedologist who:
+}
+\details{
+- extracts structured data from photos, PDFs and field reports
+  into the soilKey JSON schemas;
+- NEVER classifies the soil itself (the deterministic taxonomic key
+  in soilKey is the only thing that emits a class name);
+- explains decisions in the user's chosen language (PT-BR by
+  default; falls back to English when asked);
+- flags ambiguity explicitly via `confidence` and `source_quote`
+  fields in every extracted attribute.
+}
+\examples{
+p <- pedologist_system_prompt("pt-BR")
+substring(p, 1L, 80L)
+}
diff --git a/man/run_agent_app.Rd b/man/run_agent_app.Rd
new file mode 100644
index 000000000..0c108af1a
--- /dev/null
+++ b/man/run_agent_app.Rd
@@ -0,0 +1,63 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/run-agent-app.R
+\name{run_agent_app}
+\alias{run_agent_app}
+\title{Launch the soilKey "Agente Pedometrista" Shiny app}
+\usage{
+run_agent_app(port = NULL, launch.browser = TRUE, ...)
+}
+\arguments{
+\item{port}{Port for the local server. Default (\code{NULL}) lets
+Shiny choose.}
+
+\item{launch.browser}{Whether to open the app in the default
+browser (default \code{TRUE}).}
+
+\item{...}{Additional arguments passed to \code{\link[shiny]{runApp}}.}
+}
+\value{
+Invisibly the value returned by \code{shiny::runApp()}.
+}
+\description{
+A modern bslib-themed Shiny UI for end-to-end soil profile
+classification driven by a local Gemma 4 (or any cloud VLM) for
+multimodal extraction:
+}
+\details{
+\enumerate{
+  \item Upload a profile photo, PDF report, field-sheet image or
+        Vis-NIR spectrum.
+  \item The VLM extracts schema-validated structured data into a
+        \code{\link{PedonRecord}} with explicit per-attribute
+        provenance (\code{source = "extracted_vlm"}).
+  \item The deterministic R taxonomic key classifies the pedon
+        under WRB 2022, SiBCS 5a edicao and USDA Soil Taxonomy
+        13ed -- never the LLM.
+  \item A free-form chat tab lets the user ask the local Gemma
+        (with the soilKey "pedometrista" persona) about the loaded
+        profile in PT-BR or English.
+}
+
+Requires the optional packages \code{shiny}, \code{bslib},
+\code{bsicons} and \code{DT} (all in Suggests). For local Gemma
+inference, also requires Ollama -- see \code{\link{setup_local_vlm}}
+for one-shot bootstrap from inside R.
+}
+\examples{
+\dontrun{
+# First-time setup (download Gemma 4 edge):
+setup_local_vlm("light")   # gemma4:e2b, ~1.5 GB
+
+# Launch the agent UI:
+run_agent_app()
+}
+
+}
+\seealso{
+\code{\link{setup_local_vlm}}, \code{\link{vlm_provider}},
+  \code{\link{extract_munsell_from_photo}},
+  \code{\link{extract_horizons_from_pdf}},
+  \code{\link{extract_site_from_fieldsheet}},
+  \code{\link{classify_from_documents}},
+  \code{\link{run_classify_app}} (the simpler CSV-only UI).
+}
diff --git a/man/setup_local_vlm.Rd b/man/setup_local_vlm.Rd
new file mode 100644
index 000000000..eecb11e6b
--- /dev/null
+++ b/man/setup_local_vlm.Rd
@@ -0,0 +1,74 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/setup-local-vlm.R
+\name{setup_local_vlm}
+\alias{setup_local_vlm}
+\title{One-call setup for the local VLM (Ollama + Gemma)}
+\usage{
+setup_local_vlm(model = "balanced", ensure_running = TRUE, verbose = TRUE)
+}
+\arguments{
+\item{model}{One of `"light"` (gemma4:e2b, ~6.7 GB on disk),
+`"balanced"` (gemma4:e4b, ~8 GB; default), `"best"`
+(gemma4:31b, ~19 GB), OR any explicit Ollama model identifier
+(e.g. `"qwen2.5vl:7b"`). The on-disk footprint is significantly
+larger than the bare parameter count because the multimodal
+Gemma 4 builds bundle a vision encoder + tokenizers (~5 GB
+constant overhead).}
+
+\item{ensure_running}{Logical (default TRUE). When TRUE, also
+starts the daemon via [ollama_ensure_running()] when needed.}
+
+\item{verbose}{Logical (default TRUE). Streams CLI status messages.}
+}
+\value{
+Invisibly, a list with elements:
+  \describe{
+    \item{`ready`}{Logical -- TRUE iff the model can be used now.}
+    \item{`model`}{Character -- the model identifier resolved.}
+    \item{`ollama_url`}{Character -- daemon endpoint.}
+    \item{`installed`}{Logical -- whether the Ollama CLI is on PATH.}
+    \item{`running`}{Logical -- whether the daemon answers /api/tags.}
+    \item{`pulled`}{Logical -- whether the model is on local disk.}
+    \item{`hint`}{Character -- one-line next-step hint for the user
+          (empty when `ready = TRUE`).}
+  }
+}
+\description{
+Idempotent end-to-end bootstrap of the local VLM stack used by the
+soilKey agent app. Detects the Ollama installation, starts the
+daemon if needed, pulls the requested model and returns a status
+list the caller can render in a Shiny UI.
+}
+\section{What this does NOT do}{
+
+- Does NOT install Ollama (requires `sudo` / admin); the function
+  prints OS-specific install hints instead.
+- Does NOT ship the model weights inside the R package (CRAN
+  policy); the model is pulled from the Ollama registry on first run
+  and cached in `~/.ollama/models/`.
+- Does NOT classify anything; once setup succeeds, call
+  [vlm_provider("ollama", model = ...)] then the
+  [extract_horizons_from_pdf()] / [extract_munsell_from_photo()] /
+  [extract_site_from_fieldsheet()] family.
+}
+
+\examples{
+\dontrun{
+# Default: pull the balanced 3 GB model, start the daemon if needed.
+status <- setup_local_vlm()
+status$ready  # TRUE on a healthy machine with disk + bandwidth
+
+# Lightweight option for laptops:
+setup_local_vlm("light")    # gemma4:e2b, ~6.7 GB on disk
+
+# Best quality (server / workstation):
+setup_local_vlm("best")     # gemma4:31b, ~19 GB on disk
+
+# Any other multimodal model the user prefers:
+setup_local_vlm("qwen2.5vl:7b")
+}
+}
+\seealso{
+[vlm_provider()], [ollama_is_running()],
+  [ollama_pull_model()].
+}
diff --git a/man/summarize_brazilian_overlap.Rd b/man/summarize_brazilian_overlap.Rd
new file mode 100644
index 000000000..10a869f83
--- /dev/null
+++ b/man/summarize_brazilian_overlap.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/merge-brazilian.R
+\name{summarize_brazilian_overlap}
+\alias{summarize_brazilian_overlap}
+\title{Diagnostic summary of overlap between BDsolos and FEBR pedon lists}
+\usage{
+summarize_brazilian_overlap(bdsolos, febr)
+}
+\arguments{
+\item{bdsolos, febr}{Lists of \code{PedonRecord} objects.}
+}
+\value{
+List with elements \code{n_bdsolos}, \code{n_febr},
+  \code{n_bdsolos_with_sisb}, \code{n_febr_with_sisb},
+  \code{n_shared}, \code{n_bdsolos_only}, \code{n_febr_only},
+  \code{n_unmatchable} (sisb_id missing in one or both).
+}
+\description{
+Counts pedons by source / overlap status without performing the
+merge. Useful for verifying the dedup ratio before committing to
+\code{\link{merge_brazilian_pedons}}.
+}
+\seealso{
+\code{\link{merge_brazilian_pedons}}.
+}
diff --git a/man/validate_or_retry.Rd b/man/validate_or_retry.Rd
index 4f02c2237..67e69e561 100644
--- a/man/validate_or_retry.Rd
+++ b/man/validate_or_retry.Rd
@@ -4,7 +4,14 @@
 \alias{validate_or_retry}
 \title{Call a provider, validate JSON output, retry on failure}
 \usage{
-validate_or_retry(provider, prompt, schema, max_retries = 3L, image = NULL)
+validate_or_retry(
+  provider,
+  prompt,
+  schema,
+  max_retries = 3L,
+  image = NULL,
+  use_structured = FALSE
+)
 }
 \arguments{
 \item{provider}{An \code{ellmer} chat object (from
@@ -22,10 +29,21 @@ returning text (or a character vector of length 1).}
 \item{image}{Optional \code{ellmer} image content object (e.g.
 from \code{ellmer::content_image_file}) to pass alongside
 the prompt for multimodal calls.}
+
+\item{use_structured}{Logical (default \code{FALSE}). When TRUE
+and the provider supports \code{chat_structured()}
+(Anthropic / OpenAI / Ollama 0.5+ / Gemini), skips the
+chat-and-parse loop entirely: the provider receives the
+ellmer type tree built from \code{inst/schemas/<schema>.json}
+and returns a structurally-validated R list. Falls back to
+the legacy retry loop when the provider has no
+\code{chat_structured} method.}
 }
 \value{
 A list with elements \code{data} (parsed R object),
-        \code{raw} (character scalar), \code{attempts} (integer).
+        \code{raw} (character scalar; NA when structured path was
+        used), \code{attempts} (integer), and (only when the
+        structured path fired) \code{used_structured = TRUE}.
 }
 \description{
 Sends \code{prompt} to \code{provider}, parses the response as
diff --git a/man/vlm_provider.Rd b/man/vlm_provider.Rd
index 5cda4865c..c3b4e6124 100644
--- a/man/vlm_provider.Rd
+++ b/man/vlm_provider.Rd
@@ -42,19 +42,25 @@ constructor. \code{ellmer} must be installed.
 \section{Local-first option}{
 
 Passing \code{name = "ollama"} runs every extraction locally via
-an Ollama server (default \code{gemma4:e4b}, Gemma 4 edge with
-multimodal text+image+audio support). No data leaves the
-machine, which is the recommended setting for sensitive field
-descriptions (e.g. governmental surveys, indigenous land studies)
-where institutional independence and data sovereignty matter.
-Pull the model first:
+an Ollama server (default \code{gemma4:e2b}, Gemma 4 edge 2B,
+multimodal text+image, ~6.7 GB on disk -- the multimodal build
+bundles the vision encoder, which adds ~5 GB to the bare
+parameter weights). No data leaves the machine, which is the
+recommended setting for sensitive field descriptions (e.g.
+governmental surveys, indigenous land studies) where institutional
+independence and data sovereignty matter.
+
+One-shot setup (v0.9.64+):
+\preformatted{
+  setup_local_vlm()              # "balanced" -> gemma4:e4b, ~3 GB
+  setup_local_vlm("light")       # gemma4:e2b, ~1.5 GB (laptop OK)
+  setup_local_vlm("best")        # gemma4:31b, ~19 GB (workstation)
+}
+or manually:
 \preformatted{
-  ollama pull gemma4:e4b      # ~3 GB edge variant (default)
-  ollama pull gemma4:31b      # frontier dense variant
-  ollama pull gemma3:27b      # earlier generation, still solid
+  ollama pull gemma4:e2b
+  ollama serve
 }
-Then start an Ollama server (\code{ollama serve}) and the chat
-object returned here will dispatch over HTTP locally.
 }
 
 \examples{
diff --git a/man/vlm_type_from_soilkey_schema.Rd b/man/vlm_type_from_soilkey_schema.Rd
new file mode 100644
index 000000000..a048db17f
--- /dev/null
+++ b/man/vlm_type_from_soilkey_schema.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/vlm-types.R
+\name{vlm_type_from_soilkey_schema}
+\alias{vlm_type_from_soilkey_schema}
+\title{ellmer type tree for a soilKey extraction schema}
+\usage{
+vlm_type_from_soilkey_schema(name)
+}
+\arguments{
+\item{name}{Schema base name -- one of `"horizon"`, `"site"`,
+`"pedon-schema"`. Without `.json`.}
+}
+\value{
+An ellmer type object (class inheriting from
+  `ellmer::Type`).
+}
+\description{
+Reads `inst/schemas/<name>.json` and converts it to an ellmer
+`type_object()` via `ellmer::type_from_schema()`. Cached per call
+(lightweight; the schema files are < 5 KB each).
+}
+\details{
+Used by [validate_or_retry()] when `use_structured = TRUE`: instead
+of calling `provider$chat()` and parsing JSON manually, the
+provider gets called via `chat_structured(prompt, type = <this>)`
+and returns an R list whose shape is provider-validated.
+}
+\examples{
+\dontrun{
+if (requireNamespace("ellmer", quietly = TRUE)) {
+  t <- vlm_type_from_soilkey_schema("horizon")
+  t  # prints the type tree
+}
+}
+}
+\seealso{
+[validate_or_retry()] (which uses this when `use_structured = TRUE`),
+  [`ellmer::type_from_schema`].
+}
diff --git a/tests/testthat/test-v0960-bdsolos-benchmark.R b/tests/testthat/test-v0960-bdsolos-benchmark.R
new file mode 100644
index 000000000..7a4dc1d5c
--- /dev/null
+++ b/tests/testthat/test-v0960-bdsolos-benchmark.R
@@ -0,0 +1,215 @@
+# =============================================================================
+# Tests for v0.9.60 -- benchmark_bdsolos_sibcs() + .bdsolos_normalize_ordem().
+# All tests run unconditionally (no real BDsolos data required).
+# =============================================================================
+
+
+# ---- .bdsolos_normalize_ordem -----------------------------------------
+
+test_that(".bdsolos_normalize_ordem maps modern Ordens", {
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("ARGISSOLO"),    "Argissolos")
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("LATOSSOLO"),    "Latossolos")
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("CAMBISSOLO"),   "Cambissolos")
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("NEOSSOLO"),     "Neossolos")
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("PLINTOSSOLO"),  "Plintossolos")
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("VERTISSOLO"),   "Vertissolos")
+})
+
+
+test_that(".bdsolos_normalize_ordem strips trailing taxonomic descriptors", {
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("LATOSSOLO VERMELHO"), "Latossolos")
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("ARGISSOLO AMARELO Distrofico"),
+                "Argissolos")
+})
+
+
+test_that(".bdsolos_normalize_ordem maps legacy / pre-1999 names", {
+  # Pre-SiBCS names that BDsolos preserves from old surveys
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("PODZOLICO"), "Argissolos")
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("LATOSOL"),   "Latossolos")
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("GLEI"),      "Gleissolos")
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("ALUVIAL"),   "Neossolos")
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("BRUNIZEM"),  "Chernossolos")
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("RENDZINA"),  "Chernossolos")
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("AREIA"),     "Neossolos")
+})
+
+
+test_that(".bdsolos_normalize_ordem handles diacritics in legacy names", {
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("PODZÓLICO"),   "Argissolos")  # ó
+  expect_equal(soilKey:::.bdsolos_normalize_ordem("BRUNIZÉM"),    "Chernossolos")# é
+})
+
+
+test_that(".bdsolos_normalize_ordem returns NA on unknown / empty", {
+  expect_true(is.na(soilKey:::.bdsolos_normalize_ordem(NA)))
+  expect_true(is.na(soilKey:::.bdsolos_normalize_ordem("")))
+  expect_true(is.na(soilKey:::.bdsolos_normalize_ordem("   ")))
+  expect_true(is.na(soilKey:::.bdsolos_normalize_ordem("XYZ_NONSENSE")))
+  expect_true(is.na(soilKey:::.bdsolos_normalize_ordem("SOLO")))   # too generic
+})
+
+
+# ---- benchmark_bdsolos_sibcs ------------------------------------------
+
+.make_bdsolos_pedon_for_benchmark <- function(id = "1001",
+                                                 reference_n1 = "ARGISSOLO",
+                                                 hue = "5YR", value = 4, chroma = 6,
+                                                 clay = c(20, 28, 45, 42)) {
+  hz <- data.table::data.table(
+    top_cm    = c(0,    20,   55,   115),
+    bottom_cm = c(20,   55,   115,  170),
+    designation          = c("A", "AB", "Bt1", "Bt2"),
+    munsell_hue_moist    = c("10YR", "7.5YR", hue, hue),
+    munsell_value_moist  = c(4, 4, value, value),
+    munsell_chroma_moist = c(3, 4, chroma, chroma),
+    structure_grade      = c("moderate","moderate","strong","strong"),
+    structure_type       = c("granular","subangular blocky",
+                                "subangular blocky","subangular blocky"),
+    clay_films_amount    = c(NA, "few", "common", "common"),
+    clay_pct             = clay,
+    silt_pct             = c(30, 25, 20, 22),
+    sand_pct             = 100 - clay - c(30, 25, 20, 22),
+    ph_h2o               = c(5.5, 5.3, 5.0, 5.0),
+    oc_pct               = c(1.5, 0.6, 0.3, 0.2),
+    cec_cmol             = c(8, 6, 5.5, 4.5),
+    bs_pct               = c(35, 25, 20, 18),
+    al_cmol              = c(0.5, 0.8, 1.2, 1.5)
+  )
+  PedonRecord$new(
+    site = list(
+      id                = id,
+      lat               = -22.86, lon = -43.78, country = "BR",
+      reference_sibcs   = sprintf("%s VERMELHO Distrofico tipico", reference_n1),
+      reference_nivel_1 = reference_n1,
+      reference_nivel_2 = sprintf("%s VERMELHO", reference_n1),
+      reference_nivel_3 = sprintf("%s VERMELHO Distrofico", reference_n1),
+      reference_source  = "synthetic"
+    ),
+    horizons = ensure_horizon_schema(hz)
+  )
+}
+
+
+test_that("benchmark_bdsolos_sibcs returns the documented schema", {
+  pedons <- list(
+    .make_bdsolos_pedon_for_benchmark("A", "ARGISSOLO"),
+    .make_bdsolos_pedon_for_benchmark("L", "LATOSSOLO")
+  )
+  bench <- benchmark_bdsolos_sibcs(pedons, verbose = FALSE)
+  # v0.9.61: top-level adds accuracy_subordem; predictions adds the
+  # canonical-code subordem agreement triplet.
+  expect_named(bench, c("predictions", "confusion", "accuracy",
+                          "accuracy_subordem", "per_ordem",
+                          "summary", "errors"))
+  expect_s3_class(bench$predictions, "data.frame")
+  expect_setequal(names(bench$predictions),
+                    c("point_id", "predicted_ordem", "reference_ordem",
+                      "agree_ordem", "predicted_subordem",
+                      "reference_subordem",
+                      "predicted_subordem_code",
+                      "reference_subordem_code",
+                      "agree_subordem",
+                      "predicted_gg",
+                      "reference_gg", "reference_raw"))
+})
+
+
+test_that("benchmark_bdsolos_sibcs computes Ordem accuracy", {
+  pedons <- list(
+    .make_bdsolos_pedon_for_benchmark("A", "ARGISSOLO")
+  )
+  bench <- benchmark_bdsolos_sibcs(pedons, verbose = FALSE)
+  # The synthetic profile has B textural + Munsell -> Argissolos
+  expect_equal(bench$predictions$reference_ordem[1L], "Argissolos")
+  expect_equal(bench$summary$n_total, 1L)
+  expect_true(is.numeric(bench$accuracy))
+  expect_true(bench$accuracy >= 0 && bench$accuracy <= 1)
+})
+
+
+test_that("benchmark_bdsolos_sibcs builds a confusion matrix", {
+  pedons <- list(
+    .make_bdsolos_pedon_for_benchmark("a1", "ARGISSOLO"),
+    .make_bdsolos_pedon_for_benchmark("a2", "ARGISSOLO"),
+    .make_bdsolos_pedon_for_benchmark("l1", "LATOSSOLO"),
+    .make_bdsolos_pedon_for_benchmark("l2", "LATOSSOLO")
+  )
+  bench <- benchmark_bdsolos_sibcs(pedons, verbose = FALSE)
+  expect_true(!is.null(bench$confusion) || bench$summary$n_in_scope == 0L)
+  if (!is.null(bench$confusion)) {
+    expect_true(sum(bench$confusion) <= length(pedons))
+  }
+  expect_true(is.data.frame(bench$per_ordem) || is.null(bench$per_ordem))
+})
+
+
+test_that("benchmark_bdsolos_sibcs handles errors gracefully", {
+  bad <- PedonRecord$new(site = list(id = "bad"),
+                         horizons = ensure_horizon_schema(
+                           data.table::data.table(top_cm = 0, bottom_cm = 10)))
+  pedons <- list(.make_bdsolos_pedon_for_benchmark("ok"), bad)
+  bench <- benchmark_bdsolos_sibcs(pedons, verbose = FALSE)
+  expect_equal(bench$summary$n_total, 2L)
+  # Errors counter sums everything that didn't produce a prediction
+  expect_true(is.numeric(bench$summary$n_errors))
+})
+
+
+test_that("benchmark_bdsolos_sibcs reports n_unmapped for unrecognised reference Ordens", {
+  unmapped_pedon <- .make_bdsolos_pedon_for_benchmark("u1", "XYZ_UNKNOWN")
+  unmapped_pedon$site$reference_nivel_1 <- "XYZ_UNKNOWN"
+  pedons <- list(unmapped_pedon)
+  bench <- benchmark_bdsolos_sibcs(pedons, verbose = FALSE)
+  expect_equal(bench$summary$n_unmapped, 1L)
+})
+
+
+test_that("benchmark_bdsolos_sibcs respects max_n", {
+  pedons <- replicate(10, .make_bdsolos_pedon_for_benchmark("x", "ARGISSOLO"),
+                       simplify = FALSE)
+  bench <- benchmark_bdsolos_sibcs(pedons, max_n = 3L, verbose = FALSE)
+  expect_equal(bench$summary$n_total, 3L)
+})
+
+
+test_that("benchmark_bdsolos_sibcs errors clearly on bad input", {
+  expect_error(benchmark_bdsolos_sibcs(list()), "non-empty")
+  expect_error(benchmark_bdsolos_sibcs(list("not_a_pedon")), "PedonRecord")
+})
+
+
+# ---- Loader integration: nivel_1/2/3 captured ------------------------
+
+test_that("load_bdsolos_csv captures Classe de Solos Nivel 1/2/3", {
+  tf <- tempfile(fileext = ".csv")
+  hdr <- paste(c("Codigo PA", "Simbolo Horizonte",
+                   "Profundidade Superior", "Profundidade Inferior",
+                   "Cor da Amostra Umida - Matiz",
+                   "Cor da Amostra Umida - Valor",
+                   "Cor da Amostra Umida - Croma",
+                   "Classe de Solos Nivel 1",
+                   "Classe de Solos Nivel 2",
+                   "Classe de Solos Nivel 3",
+                   "Classificacao Atual"), collapse = ";")
+  rows <- c(
+    paste(c("100", "A1", "0", "20", "10YR", "4", "3",
+              "ARGISSOLO", "ARGISSOLO VERMELHO",
+              "ARGISSOLO VERMELHO Distrofico",
+              "ARGISSOLO VERMELHO Distrofico tipico A moderado"),
+            collapse = ";"),
+    paste(c("100", "Bt1", "20", "60", "5YR", "4", "6",
+              "ARGISSOLO", "ARGISSOLO VERMELHO",
+              "ARGISSOLO VERMELHO Distrofico",
+              "ARGISSOLO VERMELHO Distrofico tipico A moderado"),
+            collapse = ";")
+  )
+  writeLines(c("preamble", "", hdr, rows), tf)
+  on.exit(unlink(tf), add = TRUE)
+  pedons <- load_bdsolos_csv(tf, verbose = FALSE)
+  expect_length(pedons, 1L)
+  p <- pedons[[1L]]
+  expect_equal(p$site$reference_nivel_1, "ARGISSOLO")
+  expect_equal(p$site$reference_nivel_2, "ARGISSOLO VERMELHO")
+  expect_equal(p$site$reference_nivel_3, "ARGISSOLO VERMELHO Distrofico")
+})
diff --git a/tests/testthat/test-v0961-sibcs-color-tuning.R b/tests/testthat/test-v0961-sibcs-color-tuning.R
new file mode 100644
index 000000000..a8d185d30
--- /dev/null
+++ b/tests/testthat/test-v0961-sibcs-color-tuning.R
@@ -0,0 +1,242 @@
+# =============================================================================
+# Tests for v0.9.61 -- SiBCS color tuning (dominant-color-in-B post-processor).
+# =============================================================================
+
+
+# ---- helpers --------------------------------------------------------------
+
+.make_color_tuning_pedon <- function(id = "ct1",
+                                       ordem_first = "ARGISSOLO",
+                                       hues   = c("10YR", "7.5YR", "5YR", "5YR"),
+                                       values = c(4,      4,       4,     4),
+                                       chromas = c(3,     4,       6,     6),
+                                       tops   = c(0,    20,   55,   115),
+                                       bots   = c(20,   55,   115,  170),
+                                       desg   = c("A",  "AB", "Bt1", "Bt2"),
+                                       clay   = c(20, 28, 45, 42)) {
+  hz <- data.table::data.table(
+    top_cm    = tops,
+    bottom_cm = bots,
+    designation          = desg,
+    munsell_hue_moist    = hues,
+    munsell_value_moist  = values,
+    munsell_chroma_moist = chromas,
+    structure_grade      = c("moderate","moderate","strong","strong"),
+    structure_type       = c("granular","subangular blocky",
+                                "subangular blocky","subangular blocky"),
+    clay_films_amount    = c(NA, "few", "common", "common"),
+    clay_pct             = clay,
+    silt_pct             = c(30, 25, 20, 22),
+    sand_pct             = 100 - clay - c(30, 25, 20, 22),
+    ph_h2o               = c(5.5, 5.3, 5.0, 5.0),
+    oc_pct               = c(1.5, 0.6, 0.3, 0.2),
+    cec_cmol             = c(8, 6, 5.5, 4.5),
+    bs_pct               = c(35, 25, 20, 18),
+    al_cmol              = c(0.5, 0.8, 1.2, 1.5)
+  )
+  PedonRecord$new(
+    site = list(
+      id                = id,
+      lat               = -22.86, lon = -43.78, country = "BR",
+      reference_sibcs   = sprintf("%s tipico", ordem_first),
+      reference_nivel_1 = ordem_first,
+      reference_source  = "synthetic"
+    ),
+    horizons = ensure_horizon_schema(hz)
+  )
+}
+
+
+# ---- .classify_b_color ----------------------------------------------------
+
+test_that(".classify_b_color identifies VERMELHO from red hues", {
+  expect_equal(soilKey:::.classify_b_color("2.5YR", 4, 6), "VERMELHO")
+  expect_equal(soilKey:::.classify_b_color("10R",   3, 6), "VERMELHO")
+  expect_equal(soilKey:::.classify_b_color("7.5R",  4, 5), "VERMELHO")
+})
+
+test_that(".classify_b_color identifies VERMELHO_AMARELO at 5YR", {
+  expect_equal(soilKey:::.classify_b_color("5YR", 5, 6), "VERMELHO_AMARELO")
+})
+
+test_that(".classify_b_color identifies AMARELO at 7.5YR/10YR with chroma >= 4", {
+  expect_equal(soilKey:::.classify_b_color("7.5YR", 5, 6), "AMARELO")
+  expect_equal(soilKey:::.classify_b_color("10YR",  5, 4), "AMARELO")
+})
+
+test_that(".classify_b_color identifies BRUNO_ACINZENTADO when dark + non-red", {
+  expect_equal(soilKey:::.classify_b_color("5YR",   3, 3), "BRUNO_ACINZENTADO")
+  expect_equal(soilKey:::.classify_b_color("7.5YR", 4, 3), "BRUNO_ACINZENTADO")
+  expect_equal(soilKey:::.classify_b_color("10YR",  4, 4), "BRUNO_ACINZENTADO")
+})
+
+test_that(".classify_b_color identifies ACINZENTADO at pale yellow side", {
+  expect_equal(soilKey:::.classify_b_color("10YR", 6, 2), "ACINZENTADO")
+  expect_equal(soilKey:::.classify_b_color("2.5Y", 5, 3), "ACINZENTADO")
+})
+
+test_that(".classify_b_color returns NA on missing inputs", {
+  expect_true(is.na(soilKey:::.classify_b_color(NA,    4, 6)))
+  expect_true(is.na(soilKey:::.classify_b_color("5YR", NA, 6)))
+  expect_true(is.na(soilKey:::.classify_b_color("5YR", 4, NA)))
+})
+
+
+# ---- .dominant_b_color ----------------------------------------------------
+
+test_that(".dominant_b_color picks the thicker B color category", {
+  # Bt1 (35 cm) AMARELO (10YR 5/6) vs Bt2 (55 cm) VERMELHO (2.5YR 4/6)
+  ped <- .make_color_tuning_pedon(
+    hues    = c("10YR", "7.5YR", "10YR",  "2.5YR"),
+    values  = c(4,      4,       5,       4),
+    chromas = c(3,      4,       6,       6),
+    tops    = c(0,    20,   55,   115),
+    bots    = c(20,   55,   115,  170)
+  )
+  d <- soilKey:::.dominant_b_color(ped)
+  # Bt1 thickness 60 (AMARELO), Bt2 thickness 55 (VERMELHO)
+  expect_equal(d$dominant, "AMARELO")
+  expect_true(d$n_b_layers >= 1L)
+  expect_true(d$n_classified >= 1L)
+})
+
+
+test_that(".dominant_b_color reports NA when no B color is measured", {
+  ped <- .make_color_tuning_pedon(
+    hues    = c("10YR", "7.5YR", NA, NA),
+    values  = c(4,      4,       NA, NA),
+    chromas = c(3,      4,       NA, NA)
+  )
+  d <- soilKey:::.dominant_b_color(ped)
+  expect_true(is.na(d$dominant))
+  expect_equal(d$n_classified, 0L)
+})
+
+
+# ---- .dominant_b_color_subordem ------------------------------------------
+
+test_that(".dominant_b_color_subordem maps Argissolos by dominant color", {
+  ped_v <- .make_color_tuning_pedon(
+    hues    = c("10YR", "7.5YR", "2.5YR", "2.5YR"),
+    values  = c(4,      4,       4,       4),
+    chromas = c(3,      4,       6,       6)
+  )
+  expect_equal(soilKey:::.dominant_b_color_subordem(ped_v, "P")$code, "PV")
+
+  ped_a <- .make_color_tuning_pedon(
+    hues    = c("10YR", "7.5YR", "10YR",  "10YR"),
+    values  = c(4,      4,       5,       5),
+    chromas = c(3,      4,       6,       6)
+  )
+  expect_equal(soilKey:::.dominant_b_color_subordem(ped_a, "P")$code, "PA")
+})
+
+
+test_that(".dominant_b_color_subordem maps Latossolos to LV/LA/LB/LVA", {
+  ped <- .make_color_tuning_pedon(
+    hues    = c("10YR", "7.5YR", "2.5YR", "2.5YR"),
+    values  = c(4,      4,       4,       4),
+    chromas = c(3,      4,       6,       6)
+  )
+  expect_equal(soilKey:::.dominant_b_color_subordem(ped, "L")$code, "LV")
+})
+
+
+test_that(".dominant_b_color_subordem returns NA for non-color Ordens", {
+  ped <- .make_color_tuning_pedon()
+  expect_true(is.na(soilKey:::.dominant_b_color_subordem(ped, "C")$code))
+  expect_true(is.na(soilKey:::.dominant_b_color_subordem(ped, "M")$code))
+  expect_true(is.na(soilKey:::.dominant_b_color_subordem(ped, "T")$code))
+})
+
+
+# ---- .apply_color_dominant_override --------------------------------------
+
+test_that(".apply_color_dominant_override flips PA -> PV when red dominates", {
+  rules <- soilKey::load_rules("sibcs5")
+  ped <- .make_color_tuning_pedon(
+    hues    = c("10YR", "7.5YR", "2.5YR", "2.5YR"),
+    values  = c(4,      4,       4,       4),
+    chromas = c(3,      4,       6,       6)
+  )
+  # Force a "PA" first-match-wins assignment by faking the YAML entry.
+  fake_pa <- list(code = "PA", name = "Argissolos Amarelos",
+                   tests = list(all_of = list(list(argissolo_amarelo = list()))))
+  out <- soilKey:::.apply_color_dominant_override(fake_pa, ped, "P", rules)
+  expect_equal(out$subordem$code, "PV")
+  expect_equal(out$override$from_code, "PA")
+  expect_equal(out$override$to_code,   "PV")
+  expect_true(grepl("dominante", out$override$reason))
+})
+
+
+test_that(".apply_color_dominant_override is a no-op when codes match", {
+  rules <- soilKey::load_rules("sibcs5")
+  ped <- .make_color_tuning_pedon(
+    hues    = c("10YR", "7.5YR", "2.5YR", "2.5YR"),
+    values  = c(4,      4,       4,       4),
+    chromas = c(3,      4,       6,       6)
+  )
+  fake_pv <- list(code = "PV", name = "Argissolos Vermelhos",
+                   tests = list(all_of = list(list(argissolo_vermelho = list()))))
+  out <- soilKey:::.apply_color_dominant_override(fake_pv, ped, "P", rules)
+  expect_null(out$override)
+  expect_equal(out$subordem$code, "PV")
+})
+
+
+test_that(".apply_color_dominant_override is a no-op for non-color Ordens", {
+  rules <- soilKey::load_rules("sibcs5")
+  ped <- .make_color_tuning_pedon()
+  fake_cx <- list(code = "CX", name = "Cambissolos Haplicos",
+                   tests = list(default = TRUE))
+  out <- soilKey:::.apply_color_dominant_override(fake_cx, ped, "C", rules)
+  expect_null(out$override)
+  expect_equal(out$subordem$code, "CX")
+})
+
+
+test_that(".apply_color_dominant_override is a no-op when no Munsell B", {
+  rules <- soilKey::load_rules("sibcs5")
+  ped <- .make_color_tuning_pedon(
+    hues    = c(NA, NA, NA, NA),
+    values  = c(NA, NA, NA, NA),
+    chromas = c(NA, NA, NA, NA)
+  )
+  fake_pva <- list(code = "PVA", name = "Argissolos Vermelho-Amarelos",
+                    tests = list(default = TRUE))
+  out <- soilKey:::.apply_color_dominant_override(fake_pva, ped, "P", rules)
+  expect_null(out$override)
+})
+
+
+# ---- end-to-end: classify_sibcs() trace exposes override ------------------
+
+test_that("classify_sibcs records color_dominant_override in trace", {
+  ped <- .make_color_tuning_pedon(
+    hues    = c("10YR", "7.5YR", "2.5YR", "2.5YR"),
+    values  = c(4,      4,       4,       4),
+    chromas = c(3,      4,       6,       6)
+  )
+  res <- classify_sibcs(ped, on_missing = "silent")
+  expect_true("color_dominant_override" %in% names(res$trace))
+  # The pedon has a Bt1 (10YR ~ AMARELO 35 cm) + Bt2 (2.5YR ~ VERMELHO 55 cm)
+  # plus AB (7.5YR 4/4 BRUNO_ACINZENTADO 35 cm). Overall the THICKEST single
+  # category is VERMELHO (55 cm) -- override should fire and the result
+  # should carry "Argissolos Vermelhos".
+  expect_match(res$name, "Argissolos Vermelhos|Argissolos")
+})
+
+
+test_that("classify_sibcs leaves Cambissolos untouched (no override)", {
+  ped <- .make_color_tuning_pedon(
+    ordem_first = "CAMBISSOLO",
+    hues    = c("10YR", "7.5YR", "10YR",  "10YR"),
+    values  = c(4,      4,       5,       5),
+    chromas = c(3,      4,       4,       4),
+    desg    = c("A",  "AB", "Bw1", "Bw2"),
+    clay    = c(20, 28, 30, 32)
+  )
+  res <- classify_sibcs(ped, on_missing = "silent")
+  expect_null(res$trace$color_dominant_override)
+})
diff --git a/tests/testthat/test-v0962-merge-brazilian.R b/tests/testthat/test-v0962-merge-brazilian.R
new file mode 100644
index 000000000..23c8dde6f
--- /dev/null
+++ b/tests/testthat/test-v0962-merge-brazilian.R
@@ -0,0 +1,183 @@
+# =============================================================================
+# Tests for v0.9.62 -- merge_brazilian_pedons() and
+#                       summarize_brazilian_overlap().
+# =============================================================================
+
+
+# ---- helper ---------------------------------------------------------------
+
+.make_brz_pedon <- function(id, sisb = id, source = "Embrapa BDsolos",
+                              ref_sibcs = "ARGISSOLO") {
+  hz <- data.table::data.table(
+    top_cm = c(0, 20),
+    bottom_cm = c(20, 60),
+    designation = c("A", "Bt"),
+    munsell_hue_moist = c("10YR", "5YR"),
+    munsell_value_moist = c(4, 4),
+    munsell_chroma_moist = c(3, 6),
+    clay_pct = c(20, 40)
+  )
+  PedonRecord$new(
+    site = list(
+      id      = as.character(id),
+      sisb_id = if (is.na(sisb)) NA_character_ else as.character(sisb),
+      country = "BR",
+      reference_sibcs  = ref_sibcs,
+      reference_source = source
+    ),
+    horizons = ensure_horizon_schema(hz)
+  )
+}
+
+
+# ---- .get_sisb_id ---------------------------------------------------------
+
+test_that(".get_sisb_id returns trimmed character or NA", {
+  expect_equal(soilKey:::.get_sisb_id(.make_brz_pedon("100")), "100")
+  expect_true(is.na(soilKey:::.get_sisb_id(
+    .make_brz_pedon("100", sisb = NA)
+  )))
+  expect_true(is.na(soilKey:::.get_sisb_id(NULL)))
+})
+
+
+# ---- merge_brazilian_pedons ----------------------------------------------
+
+test_that("merge_brazilian_pedons drops shared sisb_id, prefer = bdsolos", {
+  bd <- list(.make_brz_pedon("100", source = "BD"),
+              .make_brz_pedon("200", source = "BD"))
+  fb <- list(.make_brz_pedon("100", source = "FE"),  # duplicate
+              .make_brz_pedon("300", source = "FE"))
+  m <- merge_brazilian_pedons(bd, fb, prefer = "bdsolos", verbose = FALSE)
+  expect_equal(length(m), 3L)  # 100 (BD) + 200 (BD) + 300 (FE)
+  shared <- Filter(function(p) identical(p$site$sisb_id, "100"), m)
+  expect_length(shared, 1L)
+  expect_equal(shared[[1L]]$site$merge_decision, "kept_bdsolos")
+  expect_equal(shared[[1L]]$site$merge_source,   "BDsolos")
+})
+
+
+test_that("merge_brazilian_pedons drops shared sisb_id, prefer = febr", {
+  bd <- list(.make_brz_pedon("100", source = "BD"))
+  fb <- list(.make_brz_pedon("100", source = "FE"))
+  m <- merge_brazilian_pedons(bd, fb, prefer = "febr", verbose = FALSE)
+  expect_equal(length(m), 1L)
+  expect_equal(m[[1L]]$site$merge_decision, "kept_febr")
+  expect_equal(m[[1L]]$site$merge_source,   "FEBR")
+})
+
+
+test_that("merge_brazilian_pedons keeps unique pedons from both sides", {
+  bd <- list(.make_brz_pedon("100"))
+  fb <- list(.make_brz_pedon("999"))
+  m <- merge_brazilian_pedons(bd, fb, verbose = FALSE)
+  expect_equal(length(m), 2L)
+  decisions <- vapply(m, function(p) p$site$merge_decision, character(1L))
+  expect_setequal(decisions, c("unique", "unique"))
+})
+
+
+test_that("merge_brazilian_pedons keeps pedons without sisb_id (unmatchable)", {
+  bd <- list(.make_brz_pedon("100", sisb = NA),
+              .make_brz_pedon("101", sisb = NA))
+  fb <- list(.make_brz_pedon("200", sisb = NA))
+  m <- merge_brazilian_pedons(bd, fb, verbose = FALSE)
+  expect_equal(length(m), 3L)  # cannot dedup, all kept
+})
+
+
+test_that("merge_brazilian_pedons handles empty input lists gracefully", {
+  expect_equal(length(merge_brazilian_pedons(list(), list(), verbose = FALSE)), 0L)
+  bd <- list(.make_brz_pedon("100"))
+  expect_equal(length(merge_brazilian_pedons(bd, list(), verbose = FALSE)), 1L)
+  fb <- list(.make_brz_pedon("100"))
+  expect_equal(length(merge_brazilian_pedons(list(), fb, verbose = FALSE)), 1L)
+})
+
+
+test_that("merge_brazilian_pedons rejects non-PedonRecord input", {
+  bd <- list(.make_brz_pedon("100"))
+  expect_error(merge_brazilian_pedons(bd, list("not a pedon")),
+                  "PedonRecord")
+  expect_error(merge_brazilian_pedons(list("not a pedon"), list()),
+                  "PedonRecord")
+})
+
+
+test_that("merge_brazilian_pedons accepts NULL inputs", {
+  bd <- list(.make_brz_pedon("100"))
+  expect_equal(length(merge_brazilian_pedons(NULL, NULL, verbose = FALSE)), 0L)
+  expect_equal(length(merge_brazilian_pedons(bd,   NULL, verbose = FALSE)), 1L)
+  expect_equal(length(merge_brazilian_pedons(NULL, bd,   verbose = FALSE)), 1L)
+})
+
+
+test_that("merge_brazilian_pedons preserves pedon order: shared, BD-only, FEBR-only, no-sisb", {
+  bd <- list(.make_brz_pedon("100"),
+              .make_brz_pedon("200"),  # BD-only
+              .make_brz_pedon("nona", sisb = NA))
+  fb <- list(.make_brz_pedon("100"),
+              .make_brz_pedon("300"),  # FE-only
+              .make_brz_pedon("nonb", sisb = NA))
+  m <- merge_brazilian_pedons(bd, fb, prefer = "bdsolos", verbose = FALSE)
+  expect_equal(length(m), 5L)
+  # First slot is the shared pedon (kept_bdsolos)
+  expect_equal(m[[1L]]$site$sisb_id, "100")
+  expect_equal(m[[1L]]$site$merge_decision, "kept_bdsolos")
+})
+
+
+# ---- summarize_brazilian_overlap ------------------------------------------
+
+test_that("summarize_brazilian_overlap counts overlap correctly", {
+  bd <- list(.make_brz_pedon("100"),
+              .make_brz_pedon("200"),
+              .make_brz_pedon("nona", sisb = NA))
+  fb <- list(.make_brz_pedon("100"),
+              .make_brz_pedon("300"))
+  s <- summarize_brazilian_overlap(bd, fb)
+  expect_equal(s$n_bdsolos,           3L)
+  expect_equal(s$n_febr,              2L)
+  expect_equal(s$n_bdsolos_with_sisb, 2L)
+  expect_equal(s$n_febr_with_sisb,    2L)
+  expect_equal(s$n_shared,            1L)
+  expect_equal(s$n_bdsolos_only,      1L)
+  expect_equal(s$n_febr_only,         1L)
+  expect_equal(s$n_unmatchable,       1L)
+})
+
+
+test_that("summarize_brazilian_overlap handles empty input", {
+  s <- summarize_brazilian_overlap(list(), list())
+  expect_equal(s$n_bdsolos, 0L)
+  expect_equal(s$n_febr,    0L)
+  expect_equal(s$n_shared,  0L)
+})
+
+
+# ---- Integration: load_bdsolos_csv assigns site$sisb_id ------------------
+
+test_that("load_bdsolos_csv populates site$sisb_id from Codigo PA", {
+  tf <- tempfile(fileext = ".csv")
+  hdr <- paste(c("Codigo PA", "Simbolo Horizonte",
+                   "Profundidade Superior", "Profundidade Inferior",
+                   "Cor da Amostra Umida - Matiz",
+                   "Cor da Amostra Umida - Valor",
+                   "Cor da Amostra Umida - Croma",
+                   "Classe de Solos Nivel 1",
+                   "Classificacao Atual"), collapse = ";")
+  rows <- c(
+    paste(c("5310", "A1", "0", "20", "10YR", "4", "3",
+              "ARGISSOLO", "ARGISSOLO VERMELHO Distrofico"),
+            collapse = ";"),
+    paste(c("5310", "Bt1", "20", "60", "5YR", "4", "6",
+              "ARGISSOLO", "ARGISSOLO VERMELHO Distrofico"),
+            collapse = ";")
+  )
+  writeLines(c("preamble", "", hdr, rows), tf)
+  on.exit(unlink(tf), add = TRUE)
+  pedons <- load_bdsolos_csv(tf, verbose = FALSE)
+  expect_length(pedons, 1L)
+  expect_equal(pedons[[1L]]$site$id,      "5310")
+  expect_equal(pedons[[1L]]$site$sisb_id, "5310")
+})
diff --git a/tests/testthat/test-v0964-setup-local-vlm.R b/tests/testthat/test-v0964-setup-local-vlm.R
new file mode 100644
index 000000000..d420aa7ef
--- /dev/null
+++ b/tests/testthat/test-v0964-setup-local-vlm.R
@@ -0,0 +1,155 @@
+# =============================================================================
+# Tests for v0.9.64 -- setup_local_vlm() + ollama lifecycle helpers +
+#                      pedologist_system_prompt().
+# All tests run unconditionally (no real Ollama required); behaviour
+# without Ollama is part of the contract we test.
+# =============================================================================
+
+
+# ---- ollama_is_installed --------------------------------------------------
+
+test_that("ollama_is_installed returns a logical scalar", {
+  res <- ollama_is_installed()
+  expect_type(res, "logical")
+  expect_length(res, 1L)
+})
+
+
+# ---- ollama_list_local_models ---------------------------------------------
+
+test_that("ollama_list_local_models returns character() when daemon offline", {
+  withr::with_options(list(soilKey.ollama_url = "http://127.0.0.1:1/no-such-host"), {
+    res <- ollama_list_local_models()
+    expect_type(res, "character")
+    # Either empty (daemon not reachable) or a real list of models when
+    # the developer happens to have Ollama running on a non-standard port.
+    expect_true(is.character(res))
+  })
+})
+
+
+# ---- ollama_pull_model: input validation ----------------------------------
+
+test_that("ollama_pull_model rejects non-character / empty model", {
+  expect_error(ollama_pull_model(NULL),    "non-empty character")
+  expect_error(ollama_pull_model(""),      "non-empty character")
+  expect_error(ollama_pull_model(c("a", "b")), "non-empty character")
+  expect_error(ollama_pull_model(NA_character_), "non-empty character")
+})
+
+
+test_that("ollama_pull_model returns FALSE when ollama not installed", {
+  # Force ollama_is_installed() FALSE by stubbing Sys.which.
+  withr::with_envvar(c(PATH = ""), {
+    if (!ollama_is_installed()) {
+      res <- ollama_pull_model("gemma4:e2b", verbose = FALSE)
+      expect_false(res)
+    } else {
+      skip("Cannot reset PATH on this platform; ollama still on PATH.")
+    }
+  })
+})
+
+
+# ---- setup_local_vlm: catalog resolution ---------------------------------
+
+test_that("setup_local_vlm resolves catalog labels to real model names", {
+  withr::with_envvar(c(PATH = ""), {
+    if (ollama_is_installed()) {
+      skip("Cannot test installed = FALSE path on this machine (ollama on PATH).")
+    }
+    out_light    <- setup_local_vlm("light",    verbose = FALSE)
+    out_balanced <- setup_local_vlm("balanced", verbose = FALSE)
+    out_best     <- setup_local_vlm("best",     verbose = FALSE)
+    expect_equal(out_light$model,    "gemma4:e2b")
+    expect_equal(out_balanced$model, "gemma4:e4b")
+    expect_equal(out_best$model,     "gemma4:31b")
+    # All three should report installed = FALSE here, so ready = FALSE.
+    expect_false(out_light$ready)
+    expect_false(out_balanced$ready)
+    expect_false(out_best$ready)
+    expect_match(out_light$hint, "Install Ollama")
+  })
+})
+
+
+test_that("setup_local_vlm returns the documented status schema", {
+  withr::with_envvar(c(PATH = ""), {
+    if (ollama_is_installed()) {
+      skip("Cannot test installed = FALSE path on this machine (ollama on PATH).")
+    }
+    out <- setup_local_vlm("light", verbose = FALSE)
+    expect_named(out, c("ready", "model", "ollama_url", "installed",
+                          "running", "pulled", "hint"))
+    expect_type(out$ready,     "logical")
+    expect_type(out$installed, "logical")
+    expect_type(out$running,   "logical")
+    expect_type(out$pulled,    "logical")
+    expect_type(out$model,     "character")
+    expect_type(out$ollama_url, "character")
+    expect_type(out$hint,      "character")
+  })
+})
+
+
+test_that("setup_local_vlm accepts an arbitrary explicit model identifier", {
+  withr::with_envvar(c(PATH = ""), {
+    if (ollama_is_installed()) {
+      skip("Cannot test installed = FALSE path on this machine (ollama on PATH).")
+    }
+    out <- setup_local_vlm("qwen2.5vl:7b", verbose = FALSE)
+    expect_equal(out$model, "qwen2.5vl:7b")
+  })
+})
+
+
+# ---- ollama_ensure_running: short-circuits when already running -----------
+
+test_that("ollama_ensure_running returns TRUE immediately when daemon running", {
+  if (!ollama_is_running()) {
+    skip("Ollama daemon not running -- skipping the no-op short-circuit test.")
+  }
+  expect_true(ollama_ensure_running(verbose = FALSE))
+})
+
+
+test_that("ollama_ensure_running returns FALSE when ollama not installed", {
+  if (ollama_is_running()) {
+    skip("Ollama daemon already running on this machine (would short-circuit to TRUE).")
+  }
+  withr::with_envvar(c(PATH = ""), {
+    if (ollama_is_installed()) {
+      skip("Cannot test installed = FALSE path on this machine (ollama on PATH).")
+    }
+    expect_false(ollama_ensure_running(verbose = FALSE))
+  })
+})
+
+
+# ---- pedologist_system_prompt -------------------------------------------
+
+test_that("pedologist_system_prompt returns a non-empty string per language", {
+  pt <- pedologist_system_prompt("pt-BR")
+  en <- pedologist_system_prompt("en")
+  expect_type(pt, "character"); expect_length(pt, 1L); expect_gt(nchar(pt), 200L)
+  expect_type(en, "character"); expect_length(en, 1L); expect_gt(nchar(en), 200L)
+  # PT-BR contains the SiBCS reference; EN contains "U.S. pedology"
+  expect_match(pt, "SiBCS")
+  expect_match(en, "U\\.S\\. pedology")
+})
+
+
+test_that("pedologist_system_prompt forbids invented values + delegated classification", {
+  for (lang in c("pt-BR", "en")) {
+    p <- pedologist_system_prompt(lang)
+    # The persona must explicitly forbid classification (deterministic key
+    # owns that), and must forbid hallucination ("Do not invent values").
+    expect_match(p, "NEVER classif|NUNCA classifica")
+    expect_match(p, "Do not invent|Nao invente")
+  }
+})
+
+
+test_that("pedologist_system_prompt rejects unsupported languages", {
+  expect_error(pedologist_system_prompt("fr"), "should be one of")
+})
diff --git a/tests/testthat/test-v0965-agent-app.R b/tests/testthat/test-v0965-agent-app.R
new file mode 100644
index 000000000..7b703b7f4
--- /dev/null
+++ b/tests/testthat/test-v0965-agent-app.R
@@ -0,0 +1,76 @@
+# =============================================================================
+# Tests for v0.9.65 -- agent_app() Shiny launcher + app.R parseability.
+# =============================================================================
+
+
+# ---- Launcher availability + dependency-check error path -----------------
+
+test_that("run_agent_app is exported", {
+  expect_true(exists("run_agent_app", envir = asNamespace("soilKey"),
+                       inherits = FALSE))
+  expect_true("run_agent_app" %in% ls("package:soilKey"))
+})
+
+
+test_that("run_agent_app errors clearly when Shiny / bslib / DT absent", {
+  # We can't actually unload Shiny mid-session; we check the error message
+  # template is correct by inspecting the function body.
+  fn_body <- deparse(body(run_agent_app))
+  expect_true(any(grepl("Packages required for run_agent_app", fn_body)))
+  expect_true(any(grepl("bslib", fn_body)))
+  expect_true(any(grepl("bsicons", fn_body)))
+  expect_true(any(grepl("DT", fn_body)))
+})
+
+
+# ---- App.R syntactic + structural sanity ---------------------------------
+
+test_that("agent_app/app.R is syntactically parseable", {
+  app_dir <- system.file("shiny", "agent_app", package = "soilKey")
+  if (!nzchar(app_dir) || !dir.exists(app_dir)) {
+    app_dir <- file.path("inst", "shiny", "agent_app")
+  }
+  app_file <- file.path(app_dir, "app.R")
+  skip_if_not(file.exists(app_file), "agent_app/app.R missing -- pre-install state.")
+  expect_silent(parse(app_file))
+})
+
+
+test_that("agent_app/app.R wires the canonical UI sections", {
+  app_dir <- system.file("shiny", "agent_app", package = "soilKey")
+  if (!nzchar(app_dir) || !dir.exists(app_dir)) {
+    app_dir <- file.path("inst", "shiny", "agent_app")
+  }
+  app_file <- file.path(app_dir, "app.R")
+  skip_if_not(file.exists(app_file), "agent_app/app.R missing.")
+  txt <- paste(readLines(app_file, warn = FALSE), collapse = "\n")
+  # Must include all 8 nav_panels we documented.
+  for (label in c("Foto Munsell", "PDF / Texto", "Ficha de Campo",
+                    "Espectros", "Tabela de horizontes", "Classificar",
+                    "Trace", "Pergunte ao Pedometrista")) {
+    expect_true(grepl(label, txt, fixed = TRUE),
+                  info = sprintf("Missing nav_panel '%s' in agent_app/app.R", label))
+  }
+  # And the bslib + bsicons + soilKey libraries.
+  expect_true(grepl("library(bslib)", txt, fixed = TRUE))
+  expect_true(grepl("library(bsicons)", txt, fixed = TRUE))
+  expect_true(grepl("library(soilKey)", txt, fixed = TRUE))
+  # Setup button + classify button + chat handler exist.
+  expect_true(grepl("setup_vlm",       txt, fixed = TRUE))
+  expect_true(grepl("classify",        txt, fixed = TRUE))
+  expect_true(grepl("chat_send",       txt, fixed = TRUE))
+})
+
+
+# ---- Dependency wiring: pedologist persona is callable from app -----------
+
+test_that("agent_app references pedologist_system_prompt for persona", {
+  app_dir <- system.file("shiny", "agent_app", package = "soilKey")
+  if (!nzchar(app_dir) || !dir.exists(app_dir)) {
+    app_dir <- file.path("inst", "shiny", "agent_app")
+  }
+  app_file <- file.path(app_dir, "app.R")
+  skip_if_not(file.exists(app_file), "agent_app/app.R missing.")
+  txt <- paste(readLines(app_file, warn = FALSE), collapse = "\n")
+  expect_true(grepl("pedologist_system_prompt", txt, fixed = TRUE))
+})
diff --git a/tests/testthat/test-v0966-benchmark-vlm-extraction.R b/tests/testthat/test-v0966-benchmark-vlm-extraction.R
new file mode 100644
index 000000000..7a3564bc7
--- /dev/null
+++ b/tests/testthat/test-v0966-benchmark-vlm-extraction.R
@@ -0,0 +1,254 @@
+# =============================================================================
+# Tests for v0.9.66 -- benchmark_vlm_extraction() + metrics + .onAttach.
+# All tests run unconditionally (no real Ollama / no network).
+# =============================================================================
+
+
+# ---- Fixture discovery ----------------------------------------------------
+
+test_that("list_vlm_fixtures finds bundled horizons + site fixtures", {
+  fx_h <- list_vlm_fixtures("horizons")
+  expect_s3_class(fx_h, "data.frame")
+  expect_true(nrow(fx_h) >= 2L,
+                info = "expected at least 2 bundled horizons fixtures")
+  expect_setequal(names(fx_h), c("id", "input_path", "golden_path"))
+  expect_true(all(file.exists(fx_h$input_path)))
+  expect_true(all(file.exists(fx_h$golden_path)))
+
+  fx_s <- list_vlm_fixtures("site")
+  expect_true(nrow(fx_s) >= 2L)
+  expect_true(all(file.exists(fx_s$input_path)))
+})
+
+
+test_that("list_vlm_fixtures returns empty df when munsell dir has no images", {
+  fx_m <- list_vlm_fixtures("munsell")
+  expect_s3_class(fx_m, "data.frame")
+  # README.md does NOT count as a fixture (no .golden.json companion)
+  expect_equal(nrow(fx_m), 0L)
+})
+
+
+# ---- Synthetic fixture generator ----------------------------------------
+
+test_that("make_synthetic_horizons_fixture writes input + golden pairs", {
+  hz <- data.table::data.table(
+    top_cm = c(0, 20),
+    bottom_cm = c(20, 60),
+    designation = c("A", "Bt"),
+    munsell_hue_moist = c("10YR", "5YR"),
+    munsell_value_moist = c(4, 4),
+    munsell_chroma_moist = c(3, 6),
+    clay_pct = c(20, 45),
+    silt_pct = c(30, 22),
+    sand_pct = c(50, 33),
+    ph_h2o   = c(5.4, 5.0),
+    oc_pct   = c(1.2, 0.3)
+  )
+  ped <- PedonRecord$new(
+    site = list(id = "test-fx", state = "RJ", municipality = "Itaguai",
+                  country = "BR"),
+    horizons = ensure_horizon_schema(hz)
+  )
+  tmp <- tempfile()
+  dir.create(tmp)
+  on.exit(unlink(tmp, recursive = TRUE), add = TRUE)
+  out <- make_synthetic_horizons_fixture(ped, "fx_test", out_dir = tmp)
+  expect_true(file.exists(out$input_path))
+  expect_true(file.exists(out$golden_path))
+  txt <- paste(readLines(out$input_path, warn = FALSE), collapse = "\n")
+  expect_match(txt, "Itaguai")
+  expect_match(txt, "5YR 4/6")
+  golden <- jsonlite::fromJSON(out$golden_path, simplifyVector = FALSE)
+  expect_true(length(golden$horizons) == 2L)
+})
+
+
+test_that("make_synthetic_horizons_fixture rejects bad input", {
+  expect_error(make_synthetic_horizons_fixture("not a pedon", "id"),
+                  "PedonRecord")
+  ped <- PedonRecord$new(
+    site = list(id = "x"),
+    horizons = ensure_horizon_schema(
+      data.table::data.table(top_cm = numeric(0), bottom_cm = numeric(0))
+    )
+  )
+  expect_error(make_synthetic_horizons_fixture(ped, "x", out_dir = tempdir()),
+                  "no horizons")
+})
+
+
+# ---- Metric: horizons overlap --------------------------------------------
+
+test_that(".metric_horizons_overlap returns precision = recall = 1 on identical sets", {
+  golden <- list(horizons = list(
+    list(top_cm = 0,  bottom_cm = 20, clay_pct = 25, ph_h2o = 5.4),
+    list(top_cm = 20, bottom_cm = 60, clay_pct = 45, ph_h2o = 5.0)
+  ))
+  pred <- golden
+  m <- soilKey:::.metric_horizons_overlap(pred, golden)
+  expect_equal(m$precision, 1)
+  expect_equal(m$recall,    1)
+  expect_equal(m$attr_match_rate, 1)
+})
+
+
+test_that(".metric_horizons_overlap penalises missing horizons", {
+  golden <- list(horizons = list(
+    list(top_cm = 0,  bottom_cm = 20),
+    list(top_cm = 20, bottom_cm = 60),
+    list(top_cm = 60, bottom_cm = 100)
+  ))
+  pred <- list(horizons = list(
+    list(top_cm = 0,  bottom_cm = 20),
+    list(top_cm = 20, bottom_cm = 60)
+  ))
+  m <- soilKey:::.metric_horizons_overlap(pred, golden)
+  expect_equal(m$recall, 2/3)
+  expect_equal(m$precision, 1)
+})
+
+
+test_that(".metric_horizons_overlap detects attribute mismatch", {
+  golden <- list(horizons = list(
+    list(top_cm = 0, bottom_cm = 20, clay_pct = 25, ph_h2o = 5.4)
+  ))
+  pred <- list(horizons = list(
+    list(top_cm = 0, bottom_cm = 20, clay_pct = 50, ph_h2o = 5.4)  # clay wrong
+  ))
+  m <- soilKey:::.metric_horizons_overlap(pred, golden)
+  expect_equal(m$recall, 1)
+  expect_equal(m$attr_match_rate, 0.5)  # 1 of 2 numeric attrs matched
+})
+
+
+# ---- Metric: site IoU ----------------------------------------------------
+
+test_that(".metric_site_iou: identical site -> iou=1, value_accuracy=1", {
+  golden <- list(site = list(id = "x", lat = -22.5, lon = -43.7,
+                                country = "BR", land_use = "pastagem"))
+  pred <- golden
+  m <- soilKey:::.metric_site_iou(pred, golden)
+  expect_equal(m$iou,            1)
+  expect_equal(m$value_accuracy, 1)
+})
+
+
+test_that(".metric_site_iou: missing fields lower IoU", {
+  golden <- list(site = list(id = "x", lat = -22.5, lon = -43.7,
+                                country = "BR"))
+  pred <- list(site = list(id = "x", lat = -22.5))
+  m <- soilKey:::.metric_site_iou(pred, golden)
+  expect_equal(m$recall, 0.5)
+  expect_equal(m$iou,    0.5)
+})
+
+
+test_that(".metric_site_iou: numeric mismatch counts as wrong", {
+  golden <- list(site = list(lat = -22.5, lon = -43.7))
+  pred   <- list(site = list(lat = -22.5, lon = -50.0))   # lon wrong
+  m <- soilKey:::.metric_site_iou(pred, golden, numeric_tol = 0.01)
+  expect_equal(m$value_accuracy, 0.5)
+})
+
+
+# ---- Metric: Munsell ΔE ---------------------------------------------------
+
+test_that(".munsell_delta_e returns 0 on identical Munsell triplets", {
+  skip_if_not_installed("munsellinterpol")
+  d <- soilKey:::.munsell_delta_e("5YR", 4, 6, "5YR", 4, 6)
+  expect_true(is.numeric(d))
+  expect_lt(d, 0.01)  # numerical noise tolerance
+})
+
+
+test_that(".munsell_delta_e increases with chroma distance", {
+  skip_if_not_installed("munsellinterpol")
+  near <- soilKey:::.munsell_delta_e("5YR", 4, 6,  "5YR", 4, 5)
+  far  <- soilKey:::.munsell_delta_e("5YR", 4, 6,  "5YR", 4, 1)
+  expect_true(is.finite(near))
+  expect_true(is.finite(far))
+  expect_lt(near, far)
+})
+
+
+test_that(".munsell_delta_e returns NA when any input missing", {
+  expect_true(is.na(soilKey:::.munsell_delta_e(NA, 4, 6, "5YR", 4, 6)))
+  expect_true(is.na(soilKey:::.munsell_delta_e("5YR", 4, 6, "5YR", NA, 6)))
+})
+
+
+# ---- Top-level benchmark with mock provider ------------------------------
+
+test_that("benchmark_vlm_extraction runs end-to-end with a MockVLMProvider", {
+  # Mock that always returns the perfil_RJ_argissolo golden as JSON.
+  # Note: bare horizons golden does NOT match the soilKey horizon schema
+  # (which wraps each attribute in value/confidence/source_quote), so
+  # the extractor's validate_or_retry will fail. We test the harness
+  # gracefully captures that as ok = FALSE -- which is exactly what we
+  # want when benchmarking against real models on imperfect data.
+  golden_text <- paste(readLines(
+    list_vlm_fixtures("horizons")$golden_path[1L],
+    warn = FALSE), collapse = "\n")
+  mock <- MockVLMProvider$new(responses = rep(list(golden_text), 12))
+  bench <- benchmark_vlm_extraction(
+    providers     = list(mock_perfect = mock),
+    tasks         = "horizons",
+    max_per_task  = 1L,
+    verbose       = FALSE
+  )
+  expect_s3_class(bench$predictions, "data.frame")
+  expect_true(nrow(bench$predictions) >= 1L)
+  expect_true(all(c("provider", "task", "fixture", "ok", "error",
+                      "metric_1", "metric_2", "metric_3",
+                      "metric_1_name", "metric_2_name", "metric_3_name")
+                    %in% names(bench$predictions)))
+  # Summary table populated.
+  expect_s3_class(bench$summary, "data.frame")
+  expect_true(all(c("provider", "task", "n", "ok_rate")
+                    %in% names(bench$summary)))
+})
+
+
+test_that("benchmark_vlm_extraction rejects malformed providers list", {
+  expect_error(benchmark_vlm_extraction(providers = list(),
+                                            tasks = "horizons",
+                                            verbose = FALSE),
+                  "non-empty named list")
+  expect_error(benchmark_vlm_extraction(providers = list("a"),
+                                            tasks = "horizons",
+                                            verbose = FALSE),
+                  "non-empty named list")
+})
+
+
+# ---- .suggest_local_vlm_message ------------------------------------------
+
+test_that(".suggest_local_vlm_message returns empty string when ollama absent", {
+  withr::with_envvar(c(PATH = ""), {
+    if (ollama_is_installed()) {
+      skip("Cannot test installed=FALSE path on this machine.")
+    }
+    msg <- soilKey:::.suggest_local_vlm_message("gemma4:e2b")
+    expect_equal(msg, "")
+  })
+})
+
+
+test_that(".suggest_local_vlm_message: 'ready' shape when model present", {
+  if (!ollama_is_running()) skip("Need running Ollama to test ready path.")
+  models <- tryCatch(ollama_list_local_models(),
+                       error = function(e) character(0))
+  if (length(models) == 0L) skip("No local models present.")
+  msg <- soilKey:::.suggest_local_vlm_message(models[1L])
+  expect_match(msg, "local VLM ready")
+  expect_match(msg, models[1L], fixed = TRUE)
+})
+
+
+test_that(".suggest_local_vlm_message: 'pull' shape when model missing", {
+  if (!ollama_is_running()) skip("Need running Ollama to test pull-suggestion path.")
+  msg <- soilKey:::.suggest_local_vlm_message("definitely-not-a-real-model:99")
+  expect_match(msg, "not yet pulled")
+  expect_match(msg, "setup_local_vlm")
+})
diff --git a/tests/testthat/test-v0970-structured-outputs.R b/tests/testthat/test-v0970-structured-outputs.R
new file mode 100644
index 000000000..9a46ee92a
--- /dev/null
+++ b/tests/testthat/test-v0970-structured-outputs.R
@@ -0,0 +1,133 @@
+# =============================================================================
+# Tests for v0.9.70 -- ellmer structured outputs (chat_structured) bridge.
+# =============================================================================
+
+
+# ---- vlm_type_from_soilkey_schema -----------------------------------------
+
+test_that("vlm_type_from_soilkey_schema rejects bad input", {
+  expect_error(vlm_type_from_soilkey_schema(NULL),    "non-empty character")
+  expect_error(vlm_type_from_soilkey_schema(""),      "non-empty character")
+  expect_error(vlm_type_from_soilkey_schema(c("a", "b")), "non-empty character")
+})
+
+
+test_that("vlm_type_from_soilkey_schema errors on unknown schema", {
+  skip_if_not_installed("ellmer")
+  expect_error(vlm_type_from_soilkey_schema("not-a-real-schema"),
+                  "Schema not found")
+})
+
+
+test_that("vlm_type_from_soilkey_schema returns an ellmer type for horizon", {
+  skip_if_not_installed("ellmer")
+  if (!exists("type_from_schema", envir = asNamespace("ellmer"),
+                inherits = FALSE)) {
+    skip("ellmer version too old (no type_from_schema()).")
+  }
+  t <- vlm_type_from_soilkey_schema("horizon")
+  expect_true(inherits(t, "ellmer::Type") || inherits(t, "Type") ||
+                !is.null(t))
+})
+
+
+# ---- .provider_supports_structured ----------------------------------------
+
+test_that(".provider_supports_structured: FALSE on NULL / mock", {
+  expect_false(soilKey:::.provider_supports_structured(NULL))
+  mock <- MockVLMProvider$new(responses = list("{}"))
+  expect_false(soilKey:::.provider_supports_structured(mock))
+})
+
+
+test_that(".provider_supports_structured: TRUE on object exposing chat_structured", {
+  fake <- list(
+    chat = function(p, ...) "{}",
+    chat_structured = function(p, type, ...) list(ok = TRUE)
+  )
+  expect_true(soilKey:::.provider_supports_structured(fake))
+})
+
+
+# ---- validate_or_retry: use_structured fast path -------------------------
+
+test_that("validate_or_retry takes the structured path when provider supports it", {
+  skip_if_not_installed("ellmer")
+  if (!exists("type_from_schema", envir = asNamespace("ellmer"),
+                inherits = FALSE)) {
+    skip("ellmer version too old.")
+  }
+  # Stub provider that exposes chat + chat_structured methods. The
+  # structured method ignores the prompt and returns a known list.
+  golden_horizons <- list(horizons = list(
+    list(top_cm = 0, bottom_cm = 30, designation = "A")
+  ))
+  fake <- list(
+    chat = function(p, ...) stop("should not be called when use_structured=TRUE"),
+    chat_structured = function(prompt, type, ...) golden_horizons
+  )
+  res <- soilKey:::validate_or_retry(
+    fake, "irrelevant prompt", "horizon",
+    max_retries = 0L, use_structured = TRUE
+  )
+  expect_equal(res$data, golden_horizons)
+  expect_true(isTRUE(res$used_structured))
+  expect_equal(res$attempts, 1L)
+})
+
+
+test_that("validate_or_retry falls back to legacy loop when provider lacks chat_structured", {
+  # MockVLMProvider has only chat() -- no chat_structured.
+  golden <- list(horizons = list(list(top_cm = 0, bottom_cm = 30,
+                                          designation = "A")))
+  mock <- MockVLMProvider$new(responses = list(jsonlite::toJSON(golden,
+                                                                       auto_unbox = TRUE)))
+  res <- soilKey:::validate_or_retry(
+    mock, "p", "horizon", max_retries = 0L, use_structured = TRUE
+  )
+  # Did NOT short-circuit through structured path.
+  expect_null(res$used_structured)
+  expect_true(!is.na(res$raw))
+})
+
+
+test_that("validate_or_retry use_structured = FALSE preserves existing behaviour", {
+  golden <- list(horizons = list(list(top_cm = 0, bottom_cm = 30,
+                                          designation = "A")))
+  mock <- MockVLMProvider$new(responses = list(jsonlite::toJSON(golden,
+                                                                       auto_unbox = TRUE)))
+  res <- soilKey:::validate_or_retry(
+    mock, "p", "horizon", max_retries = 0L, use_structured = FALSE
+  )
+  expect_null(res$used_structured)
+})
+
+
+# ---- extract_*() pass through ---------------------------------------------
+
+test_that("extract_horizons_from_pdf accepts use_structured parameter", {
+  fn <- formals(extract_horizons_from_pdf)
+  expect_true("use_structured" %in% names(fn))
+  expect_false(eval(fn$use_structured))  # default FALSE
+})
+
+
+test_that("extract_munsell_from_photo accepts use_structured parameter", {
+  fn <- formals(extract_munsell_from_photo)
+  expect_true("use_structured" %in% names(fn))
+})
+
+
+test_that("extract_site_from_fieldsheet accepts use_structured parameter", {
+  fn <- formals(extract_site_from_fieldsheet)
+  expect_true("use_structured" %in% names(fn))
+})
+
+
+# ---- benchmark_vlm_extraction pass through -------------------------------
+
+test_that("benchmark_vlm_extraction accepts use_structured parameter", {
+  fn <- formals(benchmark_vlm_extraction)
+  expect_true("use_structured" %in% names(fn))
+  expect_false(eval(fn$use_structured))  # default FALSE for back-compat
+})
diff --git a/tests/testthat/test-vlm-providers.R b/tests/testthat/test-vlm-providers.R
index 5a56c3459..766df3a43 100644
--- a/tests/testthat/test-vlm-providers.R
+++ b/tests/testthat/test-vlm-providers.R
@@ -1,11 +1,12 @@
 test_that("default_model returns sensible per-provider defaults", {
-  # v0.9.11: bumped Anthropic to 4-7 (vision-capable Claude Sonnet),
-  # Google to 2.0 Pro, and Ollama to gemma4:e4b (Gemma 4 edge,
-  # multimodal). OpenAI default remains gpt-4o.
+  # v0.9.11: bumped Anthropic to 4-7, Google to 2.0 Pro, Ollama to
+  # gemma4:e4b. v0.9.65: Ollama default lowered to gemma4:e2b
+  # (laptop-friendly ~1.5 GB) -- larger sizes accessible via
+  # setup_local_vlm("balanced") / "best".
   expect_equal(soilKey:::default_model("anthropic"), "claude-sonnet-4-7")
   expect_equal(soilKey:::default_model("openai"),    "gpt-4o")
   expect_equal(soilKey:::default_model("google"),    "gemini-2.0-pro")
-  expect_equal(soilKey:::default_model("ollama"),    "gemma4:e4b")
+  expect_equal(soilKey:::default_model("ollama"),    "gemma4:e2b")
 })
 
 test_that("default_model rejects unknown providers", {
diff --git a/vignettes/v10_agente_pedometrista.Rmd b/vignettes/v10_agente_pedometrista.Rmd
new file mode 100644
index 000000000..349b74c51
--- /dev/null
+++ b/vignettes/v10_agente_pedometrista.Rmd
@@ -0,0 +1,173 @@
+---
+title: "Agente Pedometrista — Shiny app + Gemma 4 local"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Agente Pedometrista — Shiny app + Gemma 4 local}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment  = "#>",
+  eval     = FALSE
+)
+```
+
+## Visão geral
+
+A partir de **v0.9.65**, `soilKey` traz um app Shiny moderno, baseado em
+[`bslib`](https://rstudio.github.io/bslib/) e ícones
+[`bsicons`](https://github.com/rstudio/bsicons), que orquestra um
+**agente pedometrista local** — um VLM (vision-language model) Gemma 4
+rodando via [Ollama](https://ollama.com) — para extrair dados estruturados
+de fotos, PDFs, fichas de campo e espectros, alimentando a chave
+taxonômica determinística de `soilKey` (WRB 2022, SiBCS 5ª edição,
+USDA Soil Taxonomy 13ed).
+
+> **Princípio inegociável:** o LLM **NUNCA** classifica. Ele apenas
+> *extrai* dados em JSON validado por schema. A chave taxonômica é
+> 100 % R determinístico, com regras YAML versionadas.
+
+## Setup em uma chamada
+
+```{r}
+library(soilKey)
+
+# Versão leve (~6.7 GB on disk; suficiente para Munsell + horizontes):
+status <- setup_local_vlm("light")
+status$ready
+#> [1] TRUE
+status$model
+#> [1] "gemma4:e2b"
+```
+
+`setup_local_vlm()` é idempotente: detecta o Ollama, sobe o daemon se
+necessário (`ollama serve` em background), e baixa o modelo escolhido
+(`light` = `gemma4:e2b` ~6.7 GB, `balanced` = `gemma4:e4b` ~8 GB,
+`best` = `gemma4:31b` ~19 GB). Se o Ollama não está instalado, ela
+imprime as instruções específicas do SO (Homebrew/curl-pipe-sh/winget).
+
+> **Tamanho real medido (v0.9.67 corrigendum):** as builds multimodais
+> do Gemma 4 empacotam um *vision encoder* (~5 GB) e tokenizadores
+> além dos pesos puros, então `gemma4:e2b` (2B parâmetros) ocupa
+> ~6.7 GB em disco — bem maior que os ~1.5 GB que documentei
+> originalmente em v0.9.64–v0.9.66.
+
+Detalhes finos:
+
+- `ollama_is_installed()` / `ollama_is_running()` — sondas booleanas.
+- `ollama_pull_model("gemma4:e2b")` — baixa um modelo arbitrário.
+- `ollama_list_local_models()` — lista o que já está em disco.
+
+## Lançando o agente
+
+```{r}
+soilKey::run_agent_app()
+```
+
+A UI tem **8 tabs**:
+
+| Tab | O que faz |
+|-----|-----------|
+| 📷 Foto Munsell | Upload de foto do perfil → `extract_munsell_from_photo()` → tabela de matiz/valor/croma por horizonte |
+| 📄 PDF / Texto  | Upload de PDF (ou cole texto) → `extract_horizons_from_pdf()` → tabela completa de horizontes |
+| 📋 Ficha de Campo | Upload de imagem da ficha → `extract_site_from_fieldsheet()` → metadados de sítio |
+| 🌈 Espectros    | Upload de Vis-NIR CSV → `fill_from_spectra()` (OSSL) → preenche atributos faltantes |
+| 📊 Tabela       | Editor `DT` para revisar e corrigir os horizontes manualmente |
+| 🌱 Classificar  | `classify_all()` → 3 cards `bslib::value_box()` (WRB / SiBCS / USDA) |
+| 🔍 Trace        | Trace completo + procedência por sistema |
+| 💬 Pedometrista | Chat livre com o Gemma usando a *persona* `pedologist_system_prompt()` |
+
+Sidebar persistente:
+
+- **Status:** badges em tempo real do Ollama (instalado / rodando /
+  modelos disponíveis).
+- **Provider:** `auto` (tenta Ollama primeiro), `ollama`, `anthropic`,
+  `openai`, `google`.
+- **Modelo Gemma:** `light` / `balanced` / `best`.
+- **Botão "Configurar Gemma local":** dispara `setup_local_vlm()`
+  com o preset escolhido, mostra modal de progresso.
+- **Idioma:** PT-BR ou EN para a *persona* do agente.
+- **Reset:** zera o `PedonRecord` reativo.
+
+## Persona: o que o Gemma "sabe"
+
+```{r}
+cat(pedologist_system_prompt("pt-BR"))
+```
+
+```
+Voce e um agente pedometrista experiente, treinado em pedologia
+brasileira (SiBCS 5a edicao), pedologia internacional (WRB 2022) e
+pedologia norte-americana (USDA Soil Taxonomy 13a edicao).
+
+Sua unica tarefa neste sistema soilKey e EXTRAIR DADOS ESTRUTURADOS
+(JSON validado por schema) a partir de fotos de perfis, fichas de
+campo, relatorios PDF e tabelas. Voce NUNCA classifica o solo: a
+classificacao e feita por uma chave taxonomica deterministica em R,
+baseada em regras YAML versionadas. Sua extracao alimenta essa chave.
+
+Regras de extracao:
+ 1. Reporte SO o que voce observa diretamente. Nao invente valores.
+ 2. Cada atributo deve vir com 'value', 'confidence' (0 a 1) e
+    'source_quote' (a frase ou regiao da imagem que justifica o valor).
+ 3. Quando incerto, use confidence baixa e explique a duvida.
+ 4. Cores Munsell: relate matiz/valor/croma exatamente como no
+    padrao (e.g. '5YR 4/6'); se a foto nao tem placa Munsell de
+    referencia, marque confidence <= 0.5.
+ ...
+```
+
+Essa *persona* é injetada como `system_prompt` no construtor `ellmer`
+de cada provider (Ollama / Anthropic / OpenAI / Google) e na sessão
+de chat livre da tab 💬.
+
+## Fluxo end-to-end (sem usar a UI)
+
+A mesma cadeia que a UI roda internamente, em uma única chamada:
+
+```{r}
+res <- classify_from_documents(
+  pdf      = "perfil_RJ_001.pdf",
+  image    = "perfil_RJ_001.jpg",
+  provider = "ollama",                   # local Gemma
+  systems  = c("wrb", "sibcs", "usda")
+)
+
+res$pedon                # PedonRecord com horizontes extraídos
+res$classifications$wrb  # ClassificationResult WRB 2022
+res$classifications$sibcs
+res$classifications$usda
+```
+
+## Privacidade / soberania de dados
+
+Por padrão, o agent app prefere `ollama` no fallback automático. Isso
+significa que **fotos sensíveis, fichas com geolocalização precisa,
+PDFs internos não saem da máquina**. O fallback para Anthropic/OpenAI/
+Google só dispara se o Ollama não está rodando E o usuário tem uma chave
+de API setada — uma propriedade explícita, não um padrão silencioso.
+
+Para pesquisas governamentais, terras indígenas, dados pré-publicação,
+sempre confirme `ollama` no seletor "Provider VLM" da sidebar.
+
+## Limitações conhecidas (v0.9.65)
+
+- `extract_munsell_from_photo()` confia em fotos com placa Munsell ao
+  lado do perfil. Sem a placa, o agente reporta `confidence <= 0.5`
+  (por design da persona) — use o editor `DT` da tab 📊 para corrigir.
+- A versão `light` (Gemma 4 e2b) é mais rápida mas erra mais em fichas
+  de campo manuscritas em PT-BR. Para essas, prefira `balanced`.
+- Streaming de resposta no chat ainda é síncrono (sem typewriter
+  effect) — planejado para v0.9.66+.
+- Renderização de PDF multi-página depende de `pdftools` (Suggests).
+
+## Próximos passos
+
+- Suporte a captura ao vivo via webcam (browser API).
+- Sincronização do `PedonRecord` reativo com QGIS via
+  `report_to_qgis()`.
+- Modo "auditoria": exporta o JSON bruto da extração + procedência
+  para revisão por pedólogo humano.
diff --git a/vignettes/v11_vlm_extraction_benchmark.Rmd b/vignettes/v11_vlm_extraction_benchmark.Rmd
new file mode 100644
index 000000000..fc6e86abe
--- /dev/null
+++ b/vignettes/v11_vlm_extraction_benchmark.Rmd
@@ -0,0 +1,216 @@
+---
+title: "Phase 1 — VLM extraction benchmark"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Phase 1 — VLM extraction benchmark}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(collapse = TRUE, comment = "#>", eval = FALSE)
+```
+
+## Why measure first?
+
+Before deciding to **fine-tune** Gemma 4 with a LoRA adapter (Phase 3)
+or **inject few-shot demonstrations** into every call (Phase 2), we
+need to know how good the **vanilla local Gemma 4 + soilKey persona**
+already is. Phase 1 is exactly that measurement.
+
+`benchmark_vlm_extraction()` runs each VLM provider against a small
+fixture set covering three soilKey extraction tasks and reports a per-
+task quality score, plus an `ok / total` reliability ratio.
+
+| Task | Input | Metric (lower-better unless noted) |
+|------|-------|-------------------------------------|
+| `horizons` | Markdown / text profile description | precision + recall + per-attribute match (higher-better) |
+| `site`     | Field-sheet text | IoU + value-accuracy + recall (higher-better) |
+| `munsell`  | Profile photo (with Munsell card) | mean CIE ΔE 2000 (lower-better) |
+
+## Quick start
+
+```{r}
+library(soilKey)
+
+# Local Gemma 4 e2b (~6.7 GB on disk) -- assumes setup_local_vlm("light") was run.
+bench <- benchmark_vlm_extraction(
+  providers = list(
+    gemma_e2b = list(name = "ollama", model = "gemma4:e2b")
+  ),
+  tasks         = c("horizons", "site"),  # munsell needs photo fixtures
+  max_per_task  = NULL,                    # use every bundled fixture
+  verbose       = TRUE
+)
+
+bench$summary
+```
+
+Compare two locals + a cloud reference:
+
+```{r}
+bench <- benchmark_vlm_extraction(
+  providers = list(
+    gemma_e2b = list(name = "ollama", model = "gemma4:e2b"),
+    gemma_e4b = list(name = "ollama", model = "gemma4:e4b"),
+    claude    = list(name = "anthropic")
+  ),
+  tasks = c("horizons", "site")
+)
+bench$summary
+```
+
+## Bundled fixtures (v0.9.66)
+
+```
+inst/fixtures/vlm_extraction/
++-- horizons/
+|   +-- perfil_RJ_argissolo.txt
+|   +-- perfil_RJ_argissolo.golden.json
+|   +-- perfil_MG_latossolo.txt
+|   `-- perfil_MG_latossolo.golden.json
++-- site/
+|   +-- ficha_RJ_001.txt
+|   +-- ficha_RJ_001.golden.json
+|   +-- ficha_MG_002.txt
+|   `-- ficha_MG_002.golden.json
+`-- munsell/
+    `-- README.md          # see below for the photo-fixture format
+```
+
+Each task directory holds **paired** `(input, <id>.golden.json)`
+files. The bundled fixtures are deliberately small and synthetic:
+short, clean PT-BR profile descriptions with a known answer. Real
+benchmarking should add real BDsolos / FEBR cases (instructions
+below).
+
+## Adding fixtures from real BDsolos pedons
+
+`make_synthetic_horizons_fixture()` renders any `PedonRecord` back
+into a Markdown description and emits the structured horizons as the
+golden answer. The combination forms a valid `horizons` fixture:
+
+```{r}
+pedons <- load_bdsolos_csv("RJ.csv")
+ped    <- pedons[[1L]]
+
+make_synthetic_horizons_fixture(
+  ped,
+  fixture_id = "bdsolos_RJ_perfil_001",
+  out_dir    = system.file("fixtures", "vlm_extraction", "horizons",
+                            package = "soilKey")
+)
+
+# Re-run the benchmark with the new fixture included:
+benchmark_vlm_extraction(
+  providers    = list(gemma_e2b = list(name = "ollama", model = "gemma4:e2b")),
+  tasks        = "horizons"
+)
+```
+
+For Munsell-from-photo, add real images plus their hand-curated golden
+JSON to `inst/fixtures/vlm_extraction/munsell/` -- see the README in
+that directory for the schema and ΔE interpretation.
+
+## Baseline (v0.9.67, MacBook M1, two Gemma 4 sizes)
+
+Smoke run on the four bundled text fixtures, comparing the
+laptop-friendly `gemma4:e2b` (~6.7 GB on disk, 2B params) with the
+mid-tier `gemma4` (`gemma4:latest`, 8B params, ~9 GB):
+
+| Provider | Task | Fixture | ok | precision / IoU | recall / value-acc | attr-match |
+|----------|------|---------|----|-----------------|--------------------|-----------|
+| `gemma4:e2b` | horizons | Latossolo MG | ✓ | **1.00** | **1.00** | **1.00** |
+| `gemma4:e2b` | horizons | Argissolo RJ | ✓ | **1.00** | **1.00** | **1.00** |
+| `gemma4` (8B) | horizons | Latossolo MG | ✓ | 1.00 | 1.00 | 1.00 |
+| `gemma4` (8B) | horizons | Argissolo RJ | ✓ | 1.00 | 1.00 | 1.00 |
+| `gemma4:e2b` | site     | Ficha MG     | ✓ | 0.71 | **1.00** | 0.71 |
+| `gemma4:e2b` | site     | Ficha RJ     | ✗ | — | — | — |
+| `gemma4` (8B) | site    | Ficha MG     | ✗ | — | — | — |
+| `gemma4` (8B) | site    | Ficha RJ     | ✗ | — | — | — |
+
+Reads:
+
+- **Horizons extraction is solved** at both sizes. The 2B model
+  matches the 8B model on clean text profiles (precision = recall
+  = 1.0; every numeric attribute matches within 10 % tolerance).
+  This locks in `gemma4:e2b` as the default for the agent app.
+- **Site extraction is unstable** — both sizes fail intermittently
+  (50 % `ok = TRUE` rate). Failures are *JSON validation errors*,
+  not bad extraction: the smaller model occasionally drifts into
+  free-form prose despite the persona prompt. This is what
+  Phase 2 (few-shot demos) targets.
+- When site succeeds, **value-accuracy on matched fields is 100 %**
+  -- the model knows the right answer, it just doesn't always
+  return it in valid JSON shape.
+
+These numbers establish the **vanilla baseline**. Phase 2 and Phase 3
+should target the gaps:
+
+- **Phase 2 (few-shot)**: inject 2-3 demonstration pairs per call to
+  recover the missing site fields. ~2 days of human work; no GPU.
+- **Phase 3 (LoRA)**: train a Parameter-Efficient adapter on (input,
+  golden) pairs from BDsolos + FEBR. Needs ~1 000 labelled pairs and
+  ~6 h on a single H100. Only justified if Phase 2 plateaus.
+
+## Phase 2 (v0.9.68): few-shot demonstrations
+
+`benchmark_vlm_extraction()` and the underlying `extract_*()`
+functions accept `use_fewshot` (default `TRUE` from v0.9.68). When
+`TRUE`, each call uses a few-shot prompt variant with 2 worked
+examples in the schema-correct shape; when `FALSE`, the bare-
+instructions prompt is used. Compare them:
+
+```{r}
+prov <- list(name = "ollama", model = "gemma4:e2b")
+
+baseline <- benchmark_vlm_extraction(
+  providers = list(gemma_e2b = prov),
+  tasks = "horizons", use_fewshot = FALSE, n_repeats = 3L
+)
+fewshot <- benchmark_vlm_extraction(
+  providers = list(gemma_e2b = prov),
+  tasks = "horizons", use_fewshot = TRUE,  n_repeats = 3L
+)
+
+cbind(baseline$summary[, c("ok_rate", "metric_1_mean", "metric_1_sd")],
+      fewshot$summary [, c("ok_rate", "metric_1_mean", "metric_1_sd")])
+```
+
+`n_repeats = 3L` runs each fixture three times so you can read
+both mean and standard deviation per metric. Without that, a
+4-fixture benchmark conflates real lift with stochastic LLM noise.
+
+### Honest finding (v0.9.68)
+
+On the 4 simple bundled fixtures, baseline and few-shot produce
+**identical metrics** with `gemma4:e2b`: vanilla persona is already
+enough. The harder `perfil_BA_chernossolo_messy` fixture (PT-BR
+comma decimals, mixed Munsell úmida/seca, CaCO3 equivalents) scores
+**precision = 1.00, recall = 1.00, attr_match = 0.79** with
+few-shot — confirming the pipeline handles non-toy profiles
+cleanly. Few-shot **does not regress quality**. Real lift will
+surface only on harder fixtures or smaller models.
+
+## Suppressing the .onAttach() local-VLM hint
+
+The `.onAttach()` hook prints a one-line hint suggesting
+`setup_local_vlm("light")` whenever Ollama is detected but
+`gemma4:e2b` is not yet pulled. Suppress with:
+
+```{r}
+options(soilKey.suggest_local_vlm = FALSE)
+```
+
+Or, opt into automatic background pull:
+
+```{r}
+options(soilKey.auto_setup_vlm = TRUE)
+# .. or via env var
+Sys.setenv(SOILKEY_AUTO_SETUP_VLM = "1")
+```
+
+When the auto-setup option is enabled, soilKey runs `ollama pull
+gemma4:e2b` in the background on attach. **CRAN-compliance:** the
+hook never modifies the system without an explicit user opt-in
+(option / env var).