Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
# Changelog

## [1.1.0] - 2026-03-31 — Petit Press Fork
## [1.0.1] - 2026-04-09

### Bugfixes
- Fix: scalar params with `arrays_enabled=true` now deduplicate on first match (same as `arrays_enabled=false`)
- Fix: commit null terminator in `WS_Release` to prevent spurious cache misses on Varnish 6

---

## [1.0.0] - 2026-03-31 — Petit Press Fork

> This fork is maintained by [Petit Press, a.s.](https://github.com/petitpress/libvmod-queryfilter).
> Forked from [nytimes/libvmod-queryfilter](https://github.com/nytimes/libvmod-queryfilter) (last upstream release: v1.0.1, 2023-01-09).
Expand Down
26 changes: 21 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,27 @@ set req.url = queryfilter.filterparams(req.url, "id,q,vals[]", true);
```

#### Query Arrays
When query arrays are disabled, libvmod-queryfilter assumes query parameters are
individual name/value pairs (e.g. `a=1&b=2...`). Support for arrays in query
parameters - e.g. `a[]=1&a[]=2...` or `a[0]=1&a[1]=2...` - can be enabled by passing `true` for the `arrays_enabled`
argument. When this option is enabled, array parameters will be
preserved - in order - in the output URI.
RFC 3986 does not define semantics for duplicate query parameters or array
notation. The `[]` bracket convention (e.g. `a[]=1&a[]=2`) is a PHP-originated
de facto standard adopted by many web frameworks.

**`arrays_enabled=false` (default)**

All parameters are treated as individual scalar name/value pairs. When the same
parameter name appears more than once, only the **first** occurrence is kept and
the rest are dropped, producing a stable, deduplicated cache key.

**`arrays_enabled=true`**

Deduplication is decided **per filter entry**:

- **Scalar filters** (no `[` in the filter name, e.g. `"date"`, `"id"`) — same
as disabled: first occurrence wins, duplicates are dropped.
- **Array filters** (filter name contains `[`, e.g. `"vals[]"`) — all
occurrences are collected and preserved in order in the output URI.

This allows a mixed filter list such as `"date,id,vals[]"` to correctly
deduplicate scalar params while collecting all elements of an array param.

The module supports both traditional array notation (`item[]=value`) and
indexed array notation (`item[0]=value`, `item[1]=value`, etc.). Both notations
Expand Down
54 changes: 54 additions & 0 deletions src/tests/filterparams_check_duplicate_scalar_arrays_enabled.vtc
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
varnishtest "Test queryfilter vmod: scalar duplicate deduplication with arrays_enabled=true"

# Verifies that scalar filters use first-wins deduplication even when
# arrays_enabled=true, while array filters still collect all occurrences.
# This is the mixed-filter production scenario: e.g. "date,id,urls[]".

server s1 {
# req 1: date deduped to first value; both urls[] kept
rxreq
expect req.url == "/archive?date=2016-08-14&urls[]=foo&urls[]=bar"
txresp

# req 2: date=2016-08-14 (same first value, different trailing) - cache HIT
# req 3: same - cache HIT
} -start

varnish v1 -vcl+backend {
import std;
import queryfilter from "${vmod_topbuild}/src/.libs/libvmod_queryfilter.so";

sub vcl_hash {
std.syslog(180, "queryfilter before: " + req.url);
set req.url = queryfilter.filterparams(req.url, "date,urls[]", true);
std.syslog(180, "queryfilter after: " + req.url);
}
} -start

client c1 {
# Duplicate scalar: first date wins; array: both urls[] kept
txreq -url "/archive?date=2016-08-14&date=2016-08-10&urls[]=foo&urls[]=bar"
rxresp
expect resp.status == 200

# Different trailing date, same leading - scalar dedupes to same key -> cache HIT
txreq -url "/archive?date=2016-08-14&date=2025-03-25&urls[]=foo&urls[]=bar"
rxresp
expect resp.status == 200

# Three duplicate dates - still first wins -> cache HIT
txreq -url "/archive?date=2016-08-14&date=2020-01-01&date=2021-06-15&urls[]=foo&urls[]=bar"
rxresp
expect resp.status == 200
}

varnish v1 -expect n_object == 0
varnish v1 -expect cache_miss == 0
varnish v1 -expect cache_hit == 0

client c1 -run
delay .1

varnish v1 -expect n_object == 1
varnish v1 -expect cache_miss == 1
varnish v1 -expect cache_hit == 2
48 changes: 48 additions & 0 deletions src/tests/filterparams_check_for_duplicate_param.vtc
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
varnishtest "Test queryfilter vmod: duplicate params - first occurrence wins"

server s1 {
# Only one backend request expected - all 3 client requests should
# normalize to the same cache key (?date=2016-08-14)
rxreq
expect req.url == "/archive?date=2016-08-14"
txresp
} -start

varnish v1 -vcl+backend {
import std;
import queryfilter from "${vmod_topbuild}/src/.libs/libvmod_queryfilter.so";

sub vcl_hash {
std.syslog(180, "queryfilter before: " + req.url);
set req.url = queryfilter.filterparams(req.url, "date", false);
std.syslog(180, "queryfilter after: " + req.url);
}
} -start

client c1 {
# First request - first value (2016-08-14) wins
txreq -url "/archive?date=2016-08-14&date=2016-08-10"
rxresp
expect resp.status == 200

# Same first date, different trailing date - normalizes to same cache key
txreq -url "/archive?date=2016-08-14&date=2025-03-25"
rxresp
expect resp.status == 200

# Three duplicates - first one still wins
txreq -url "/archive?date=2016-08-14&date=2020-01-01&date=2021-06-15"
rxresp
expect resp.status == 200
}

varnish v1 -expect n_object == 0
varnish v1 -expect cache_miss == 0
varnish v1 -expect cache_hit == 0

client c1 -run
delay .1

varnish v1 -expect n_object == 1
varnish v1 -expect cache_miss == 1
varnish v1 -expect cache_hit == 2
19 changes: 11 additions & 8 deletions src/vmod_queryfilter.c
Original file line number Diff line number Diff line change
Expand Up @@ -397,20 +397,23 @@ vmod_filterparams(req_ctx* sp, const char* uri, const char* params_in, unsigned
/* After the first param, swap the separator: */
sep = '&';

/* If arrays are not enabled (default), we just break after the
* first match to avoid unnecessary checks. However, for arrays it
* is necessary to keep iterating through the list to find
* additional matches. A side effect of this is that all elements of
* a given array will be rewritten in sequence next to each other in
* the output array: */
if( !arrays_enabled ) {
/* Scalar filters break after the first match so that duplicate
* params (e.g. ?date=old&date=new) are deduplicated to a stable
* cache key. Array filters (e.g. urls[]) must keep iterating to
* collect all elements. */
if( !arrays_enabled || strchr(filter_name, '[') == NULL ) {
break;
}
};
};

release_okay:
WS_Release(workspace, (new_uri_end-new_uri));
/* Include the null terminator in the release (+1). sprintf writes '\0'
* at new_uri_end but doesn't count it, so without +1 ws->f lands on that
* byte and the next workspace allocation (e.g. a VCL syslog string concat)
* overwrites it. hash_data() then walks past the intended end of req.url
* and produces an inconsistent hash, causing spurious cache misses. */
WS_Release(workspace, (new_uri_end-new_uri) + 1);
return new_uri;

release_bail:
Expand Down
Loading