diff --git a/CHANGELOG.md b/CHANGELOG.md index 67361e6..4802f10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,14 @@ # Changelog -## [1.1.0] - 2026-03-31 — Petit Press Fork +## [1.0.1] - 2026-04-09 + +### Bugfixes +- Fix: scalar params with `arrays_enabled=true` now deduplicate on first match (same as `arrays_enabled=false`) +- Fix: commit null terminator in `WS_Release` to prevent spurious cache misses on Varnish 6 + +--- + +## [1.0.0] - 2026-03-31 — Petit Press Fork > This fork is maintained by [Petit Press, a.s.](https://github.com/petitpress/libvmod-queryfilter). > Forked from [nytimes/libvmod-queryfilter](https://github.com/nytimes/libvmod-queryfilter) (last upstream release: v1.0.1, 2023-01-09). diff --git a/README.md b/README.md index 6597e7c..a520318 100644 --- a/README.md +++ b/README.md @@ -99,11 +99,27 @@ set req.url = queryfilter.filterparams(req.url, "id,q,vals[]", true); ``` #### Query Arrays -When query arrays are disabled, libvmod-queryfilter assumes query parameters are -individual name/value pairs (e.g. `a=1&b=2...`). Support for arrays in query -parameters - e.g. `a[]=1&a[]=2...` or `a[0]=1&a[1]=2...` - can be enabled by passing `true` for the `arrays_enabled` -argument. When this option is enabled, array parameters will be -preserved - in order - in the output URI. +RFC 3986 does not define semantics for duplicate query parameters or array +notation. The `[]` bracket convention (e.g. `a[]=1&a[]=2`) is a PHP-originated +de facto standard adopted by many web frameworks. + +**`arrays_enabled=false` (default)** + +All parameters are treated as individual scalar name/value pairs. When the same +parameter name appears more than once, only the **first** occurrence is kept and +the rest are dropped, producing a stable, deduplicated cache key. + +**`arrays_enabled=true`** + +Deduplication is decided **per filter entry**: + +- **Scalar filters** (no `[` in the filter name, e.g. `"date"`, `"id"`) — same + as disabled: first occurrence wins, duplicates are dropped. +- **Array filters** (filter name contains `[`, e.g. `"vals[]"`) — all + occurrences are collected and preserved in order in the output URI. + +This allows a mixed filter list such as `"date,id,vals[]"` to correctly +deduplicate scalar params while collecting all elements of an array param. The module supports both traditional array notation (`item[]=value`) and indexed array notation (`item[0]=value`, `item[1]=value`, etc.). Both notations diff --git a/src/tests/filterparams_check_duplicate_scalar_arrays_enabled.vtc b/src/tests/filterparams_check_duplicate_scalar_arrays_enabled.vtc new file mode 100644 index 0000000..63d958f --- /dev/null +++ b/src/tests/filterparams_check_duplicate_scalar_arrays_enabled.vtc @@ -0,0 +1,54 @@ +varnishtest "Test queryfilter vmod: scalar duplicate deduplication with arrays_enabled=true" + +# Verifies that scalar filters use first-wins deduplication even when +# arrays_enabled=true, while array filters still collect all occurrences. +# This is the mixed-filter production scenario: e.g. "date,id,urls[]". + +server s1 { + # req 1: date deduped to first value; both urls[] kept + rxreq + expect req.url == "/archive?date=2016-08-14&urls[]=foo&urls[]=bar" + txresp + + # req 2: date=2016-08-14 (same first value, different trailing) - cache HIT + # req 3: same - cache HIT +} -start + +varnish v1 -vcl+backend { + import std; + import queryfilter from "${vmod_topbuild}/src/.libs/libvmod_queryfilter.so"; + + sub vcl_hash { + std.syslog(180, "queryfilter before: " + req.url); + set req.url = queryfilter.filterparams(req.url, "date,urls[]", true); + std.syslog(180, "queryfilter after: " + req.url); + } +} -start + +client c1 { + # Duplicate scalar: first date wins; array: both urls[] kept + txreq -url "/archive?date=2016-08-14&date=2016-08-10&urls[]=foo&urls[]=bar" + rxresp + expect resp.status == 200 + + # Different trailing date, same leading - scalar dedupes to same key -> cache HIT + txreq -url "/archive?date=2016-08-14&date=2025-03-25&urls[]=foo&urls[]=bar" + rxresp + expect resp.status == 200 + + # Three duplicate dates - still first wins -> cache HIT + txreq -url "/archive?date=2016-08-14&date=2020-01-01&date=2021-06-15&urls[]=foo&urls[]=bar" + rxresp + expect resp.status == 200 +} + +varnish v1 -expect n_object == 0 +varnish v1 -expect cache_miss == 0 +varnish v1 -expect cache_hit == 0 + +client c1 -run +delay .1 + +varnish v1 -expect n_object == 1 +varnish v1 -expect cache_miss == 1 +varnish v1 -expect cache_hit == 2 diff --git a/src/tests/filterparams_check_for_duplicate_param.vtc b/src/tests/filterparams_check_for_duplicate_param.vtc new file mode 100644 index 0000000..5fa6117 --- /dev/null +++ b/src/tests/filterparams_check_for_duplicate_param.vtc @@ -0,0 +1,48 @@ +varnishtest "Test queryfilter vmod: duplicate params - first occurrence wins" + +server s1 { + # Only one backend request expected - all 3 client requests should + # normalize to the same cache key (?date=2016-08-14) + rxreq + expect req.url == "/archive?date=2016-08-14" + txresp +} -start + +varnish v1 -vcl+backend { + import std; + import queryfilter from "${vmod_topbuild}/src/.libs/libvmod_queryfilter.so"; + + sub vcl_hash { + std.syslog(180, "queryfilter before: " + req.url); + set req.url = queryfilter.filterparams(req.url, "date", false); + std.syslog(180, "queryfilter after: " + req.url); + } +} -start + +client c1 { + # First request - first value (2016-08-14) wins + txreq -url "/archive?date=2016-08-14&date=2016-08-10" + rxresp + expect resp.status == 200 + + # Same first date, different trailing date - normalizes to same cache key + txreq -url "/archive?date=2016-08-14&date=2025-03-25" + rxresp + expect resp.status == 200 + + # Three duplicates - first one still wins + txreq -url "/archive?date=2016-08-14&date=2020-01-01&date=2021-06-15" + rxresp + expect resp.status == 200 +} + +varnish v1 -expect n_object == 0 +varnish v1 -expect cache_miss == 0 +varnish v1 -expect cache_hit == 0 + +client c1 -run +delay .1 + +varnish v1 -expect n_object == 1 +varnish v1 -expect cache_miss == 1 +varnish v1 -expect cache_hit == 2 diff --git a/src/vmod_queryfilter.c b/src/vmod_queryfilter.c index 07faaaf..391057c 100644 --- a/src/vmod_queryfilter.c +++ b/src/vmod_queryfilter.c @@ -397,20 +397,23 @@ vmod_filterparams(req_ctx* sp, const char* uri, const char* params_in, unsigned /* After the first param, swap the separator: */ sep = '&'; - /* If arrays are not enabled (default), we just break after the - * first match to avoid unnecessary checks. However, for arrays it - * is necessary to keep iterating through the list to find - * additional matches. A side effect of this is that all elements of - * a given array will be rewritten in sequence next to each other in - * the output array: */ - if( !arrays_enabled ) { + /* Scalar filters break after the first match so that duplicate + * params (e.g. ?date=old&date=new) are deduplicated to a stable + * cache key. Array filters (e.g. urls[]) must keep iterating to + * collect all elements. */ + if( !arrays_enabled || strchr(filter_name, '[') == NULL ) { break; } }; }; release_okay: - WS_Release(workspace, (new_uri_end-new_uri)); + /* Include the null terminator in the release (+1). sprintf writes '\0' + * at new_uri_end but doesn't count it, so without +1 ws->f lands on that + * byte and the next workspace allocation (e.g. a VCL syslog string concat) + * overwrites it. hash_data() then walks past the intended end of req.url + * and produces an inconsistent hash, causing spurious cache misses. */ + WS_Release(workspace, (new_uri_end-new_uri) + 1); return new_uri; release_bail: