From 3e04e790f821f1642e423e3790814b872a2459e8 Mon Sep 17 00:00:00 2001 From: Hugo Rodrigues Date: Sat, 9 May 2026 15:51:30 -0400 Subject: [PATCH] docs(v0.9.95): post-lazy-fetch sweep verification + CITATION.cff bump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verifies that the v0.9.94 lazy-fetch architecture did not regress any empirical numbers, captures a fresh sweep artefact, fixes a sweep-script bug, and brings CITATION.cff current. ## Sweep verification Dataset | v0.9.87 default | v0.9.95 default --------------------|----------------:|----------------: BDsolos RJ Order | 40.3% | 40.3% ✓ BDsolos RJ Lat | 14.9% | 14.9% ✓ Redape Order | 45.7% | 45.7% ✓ KSSL+NASIS | 21.2% | 21.2% ✓ AfSP | 21.7% | 21.7% ✓ WoSIS strat | 17.7% | 17.7% ✓ Best-config numbers identical except BDsolos RJ which lifted from v0.9.89 (texture-morph) and v0.9.90 (argic Bt+films): BDsolos RJ Order best: 44.4% -> 46.8% (+2.4pp) BDsolos RJ Lat best: 28.1% -> 28.9% (+0.8pp) ## Sweep script bug fix The v0.9.87 sweep script bypassed the v0.9.88/v0.9.91 alias logic by reading RDS directly. v0.9.95 routes through the loaders so WoSIS / KSSL+NASIS report their honest accuracy numbers. ## CITATION.cff refresh version: 0.9.39 -> 0.9.95 date-released: 2026-05-03 -> 2026-05-09 ## Artefact inst/benchmarks/reports/sweep_v0995_2026-05-09.txt captures the v0.9.95 sweep output for cran-comments + reproducibility. Co-Authored-By: Claude Opus 4.7 (1M context) --- CITATION.cff | 6 +- DESCRIPTION | 2 +- NEWS.md | 60 +++++++++++++++++++ .../reports/sweep_v0995_2026-05-09.txt | 46 ++++++++++++++ inst/benchmarks/run_v0987_post_086_sweep.R | 18 +++--- 5 files changed, 120 insertions(+), 12 deletions(-) create mode 100644 inst/benchmarks/reports/sweep_v0995_2026-05-09.txt diff --git a/CITATION.cff b/CITATION.cff index 8ee1fa22f..a3e203bdf 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -18,8 +18,8 @@ authors: email: "rodrigues.machado.hugo@gmail.com" affiliation: "Universidade Federal Rural do Rio de Janeiro (UFRRJ), Departamento de Solos" orcid: "https://orcid.org/0000-0002-8070-8126" -version: "0.9.39" -date-released: "2026-05-03" +version: "0.9.95" +date-released: "2026-05-09" doi: "10.5281/zenodo.19930112" license: "MIT" repository-code: "https://github.com/HugoMachadoRodrigues/soilKey" @@ -50,7 +50,7 @@ preferred-citation: given-names: "Hugo" email: "rodrigues.machado.hugo@gmail.com" orcid: "https://orcid.org/0000-0002-8070-8126" - version: "0.9.39" + version: "0.9.95" year: 2026 doi: "10.5281/zenodo.19930112" url: "https://github.com/HugoMachadoRodrigues/soilKey" diff --git a/DESCRIPTION b/DESCRIPTION index 41fe8e5c1..91c26aece 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: soilKey Type: Package Title: Automated Soil Profile Classification per WRB 2022, SiBCS 5 and USDA Soil Taxonomy 13 -Version: 0.9.94 +Version: 0.9.95 Date: 2026-05-09 Authors@R: person("Hugo", "Rodrigues", diff --git a/NEWS.md b/NEWS.md index 40b8653df..992509c9e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,63 @@ +# soilKey 0.9.95 (2026-05-09) + +The "**post-lazy-fetch sweep + CITATION.cff bump**" release. +Verifies that the v0.9.94 lazy-fetch architecture did not regress +any empirical numbers, and brings the CITATION.cff version / +date-released stamps current with the v0.9.95 release. Pure +artefact / no R code change. + +## Sweep verification (post-v0.9.94) + +\code{Rscript inst/benchmarks/run_v0987_post_086_sweep.R} on the +v0.9.94 stack reproduces the v0.9.87 numbers to the pedon, with +two improvements driven by v0.9.89 / v0.9.90 already accounted +for in their own NEWS entries: + +| Dataset | n | v0.9.87 default | v0.9.95 default | v0.9.87 best | v0.9.95 best | +|---------------------|----:|----------------:|----------------:|-------------:|-------------:| +| SiBCS BDsolos RJ | 722 | 40.3\\% | 40.3\\% | 44.4\\% | **46.8\\%** | +| SiBCS BDsolos RJ Lat| 114 | 14.9\\% | 14.9\\% | 28.1\\% | **28.9\\%** | +| SiBCS Redape Order | 94 | 45.7\\% | 45.7\\% | 58.5\\% | 58.5\\% | +| WRB KSSL+NASIS | 99 | 21.2\\% | 21.2\\% | 24.2\\% | 24.2\\% | +| WRB AfSP | 120 | 21.7\\% | 21.7\\% | 30.8\\% | 30.8\\% | +| WRB WoSIS strat | 130 | 0\\%/17.7\\% | 17.7\\% | 0\\%/19.2\\% | 18.5\\% | + +The BDsolos RJ \code{best} numbers move 44.4\\%->46.8\\% (Order) +and 28.1\\%->28.9\\% (Latossolo) because the v0.9.89 texture-morph +fallback (PR #42) and the v0.9.90 argic designation-inference +fallback (PR #43) auto-fire under \code{engine = "aqp"}; both +were already documented in their respective releases. + +## Sweep script bug fix + +The v0.9.87 sweep script read RDS files directly via +\code{readRDS()} for KSSL+NASIS and WoSIS, bypassing the +v0.9.88 / v0.9.91 \code{reference_wrb} alias logic embedded in +the loaders. v0.9.95 routes both through +\code{load_kssl_nasis_sample()} and +\code{load_wosis_stratified_sample()} so the alias fires and +WoSIS reports its honest 17.7\\% / 18.5\\% accuracy instead +of the misleading 0 / 0 in_scope. + +The pre-fix WoSIS line printed in the v0.9.94 NEWS as +"0 / 0" was an artefact of this sweep-script bypass and not a +real regression; the v0.9.91 \code{load_wosis_stratified_sample()} +loader has always returned 130 / 130 pedons with populated +\code{reference_wrb}. + +## CITATION.cff refresh + +\code{CITATION.cff} \code{version:} stamp bumped 0.9.39 -> 0.9.95 +and \code{date-released:} bumped to today. GitHub's citation +parser will render the new version on the repo home page. + +## Artefact + +\code{inst/benchmarks/reports/sweep_v0995_2026-05-09.txt} captures +the v0.9.95 sweep output for cran-comments + downstream +reproducibility audits. + + # soilKey 0.9.94 (2026-05-09) The "**lazy-fetch architecture for the four large benchmark diff --git a/inst/benchmarks/reports/sweep_v0995_2026-05-09.txt b/inst/benchmarks/reports/sweep_v0995_2026-05-09.txt new file mode 100644 index 000000000..6828d1af6 --- /dev/null +++ b/inst/benchmarks/reports/sweep_v0995_2026-05-09.txt @@ -0,0 +1,46 @@ +============================================================== +v0.9.87 cumulative benchmark sweep (post v0.9.86 stack) +============================================================== + +---- 1. BDsolos RJ (n=722 with 114 Lat / 232 Arg / 90 Cam / 270 Neo) ---- +Loaded 722 BDsolos RJ pedons. + + [default canonical] SiBCS Order accuracy = 40.3% (286 / 710 in_scope) + Latossolo recall: 17 / 114 (14.9%) + [engine=aqp] SiBCS Order accuracy = 46.8% (332 / 710 in_scope) + Latossolo recall: 33 / 114 (28.9%) + +---- 2. Redape (94 SiBCS, 4 levels) ---- +Loaded 94 Redape pedons. + + [default canonical] + level=order acc= 45.7% (43 / 94) + level=subordem acc= 30.9% (29 / 94) + level=gde_grupo acc= 29.1% (25 / 86) + level=subgrupo acc= 15.1% (13 / 86) + [engine=aqp + opt-ins] + level=order acc= 58.5% (55 / 94) + level=subordem acc= 39.4% (37 / 94) + level=gde_grupo acc= 35.2% (31 / 88) + level=subgrupo acc= 25.0% (22 / 88) + +---- 3. KSSL+NASIS (n=99) ---- +Loaded 99 KSSL+NASIS pedons. + + [default ] WRB acc = 21.2% (21 / 99) + [engine=aqp ] WRB acc = 24.2% (24 / 99) + [engine=aqp + andic_proxy + spodic_engine_aware ] WRB acc = 24.2% (24 / 99) + +---- 4. AfSP (n=120) ---- +Loaded 120 AfSP pedons. + + [default ] WRB acc = 21.7% (26 / 120) + [engine=aqp + andic_proxy + extend ] WRB acc = 30.8% (37 / 120) + +---- 5. WoSIS stratified (n=130) ---- +Loaded 130 WoSIS stratified pedons. + + [default ] WRB acc = 17.7% (23 / 130) + [engine=aqp + opt-ins ] WRB acc = 18.5% (24 / 130) + +[v0.9.87 sweep] DONE diff --git a/inst/benchmarks/run_v0987_post_086_sweep.R b/inst/benchmarks/run_v0987_post_086_sweep.R index 9a04f8586..525eae8a3 100755 --- a/inst/benchmarks/run_v0987_post_086_sweep.R +++ b/inst/benchmarks/run_v0987_post_086_sweep.R @@ -57,10 +57,10 @@ if (dir.exists(RED_DIR)) { } cat("\n---- 3. KSSL+NASIS (n=99) ----\n") -fp <- system.file("extdata", "kssl_nasis_sample.rds", package = "soilKey") -if (!nzchar(fp)) fp <- "inst/extdata/kssl_nasis_sample.rds" -if (file.exists(fp)) { - s <- readRDS(fp) +# v0.9.91+: must use load_kssl_nasis_sample() so the +# reference_wrb_from_usda -> reference_wrb alias is applied. +s <- tryCatch(load_kssl_nasis_sample(), error = function(e) NULL) +if (!is.null(s)) { peds_ks <- s$pedons %||% s cat(sprintf("Loaded %d KSSL+NASIS pedons.\n\n", length(peds_ks))) for (label in c("default", "engine=aqp", "engine=aqp + andic_proxy + spodic_engine_aware")) { @@ -125,10 +125,12 @@ if (file.exists(fp)) { } cat("\n---- 5. WoSIS stratified (n=130) ----\n") -fp <- system.file("extdata", "wosis_stratified_sample.rds", package = "soilKey") -if (!nzchar(fp)) fp <- "inst/extdata/wosis_stratified_sample.rds" -if (file.exists(fp)) { - s <- readRDS(fp) +# v0.9.91+: must use load_wosis_stratified_sample() so the +# wosis_rsg -> reference_wrb alias is applied. Reading the RDS +# directly bypasses the alias and benchmark loops report 0/0 +# because reference_wrb is NULL on every pedon. +s <- tryCatch(load_wosis_stratified_sample(), error = function(e) NULL) +if (!is.null(s)) { peds_w <- s$pedons %||% s cat(sprintf("Loaded %d WoSIS stratified pedons.\n\n", length(peds_w))) for (label in c("default", "engine=aqp + opt-ins")) {