diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..9b2fc43 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,153 @@ +name: Build + +on: + pull_request: + branches: + - main + paths-ignore: + - "**/*.md" + +env: + # cross-platform-actions/action@v0.32.0 runs on Node.js 20 which is deprecated. + # Opt into Node.js 24 now ahead of the June 2026 forced migration. + # Remove once a new cross-platform-actions release with Node.js 24 support is available. + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true + +jobs: + build: + strategy: + fail-fast: false + matrix: + varnish: ["7.6.5", "7.7.3", "8.0.1"] + platform: + - { name: Ubuntu, runner: ubuntu-latest } + - { name: Rocky, runner: ubuntu-latest, container: rockylinux:9 } + - { name: macOS, runner: macos-latest } + - { name: FreeBSD, runner: ubuntu-latest } + exclude: + # Varnish 7.7.3 uses endian.h which does not exist on macOS (uses machine/endian.h instead). + # Upstream issue: https://github.com/varnishcache/varnish-cache/issues/4299 + - varnish: "7.7.3" + platform: { name: macOS, runner: macos-latest } + runs-on: ${{ matrix.platform.runner }} + container: ${{ matrix.platform.container }} + name: ${{ matrix.platform.name }} / Varnish ${{ matrix.varnish }} + steps: + - uses: actions/checkout@v6 + + - name: Install dependencies (Ubuntu) + if: "matrix.platform.name == 'Ubuntu'" + run: | + VERSION_SHORT=$(echo "${{ matrix.varnish }}" | cut -d. -f1,2 | tr -d '.') + curl -s "https://packagecloud.io/install/repositories/varnishcache/varnish${VERSION_SHORT}/script.deb.sh" | sudo bash + docker run --rm -v /tmp/pcre3:/out debian:bookworm-slim \ + bash -c 'apt-get update -qq && apt-get download -q \ + libpcre3 libpcre3-dev libpcre16-3 libpcre32-3 libpcrecpp0v5 \ + && mv *.deb /out/' + sudo dpkg -i /tmp/pcre3/*.deb + sudo apt-get install -y \ + varnish-dev \ + autoconf \ + automake \ + libtool \ + pkg-config \ + python3-docutils + + - name: Install dependencies (Rocky Linux) + if: "matrix.platform.name == 'Rocky'" + run: | + VERSION_SHORT=$(echo "${{ matrix.varnish }}" | cut -d. -f1,2 | tr -d '.') + dnf install -y epel-release + curl -s "https://packagecloud.io/install/repositories/varnishcache/varnish${VERSION_SHORT}/script.rpm.sh" | bash + dnf install -y \ + varnish-devel \ + autoconf \ + automake \ + libtool \ + pkgconfig \ + pcre-devel \ + python3-docutils + + - name: Install dependencies (macOS) + if: "matrix.platform.name == 'macOS'" + run: | + brew install \ + autoconf \ + automake \ + libtool \ + pkg-config \ + pcre \ + pcre2 \ + docutils \ + sphinx-doc + echo "$(brew --prefix sphinx-doc)/bin" >> "$GITHUB_PATH" + + - name: Build and install Varnish from source + if: "matrix.platform.name == 'macOS'" + run: | + git clone --recursive \ + --branch varnish-${{ matrix.varnish }} \ + https://code.vinyl-cache.org/vinyl-cache/vinyl-cache.git \ + /tmp/varnish-src + cd /tmp/varnish-src + ./autogen.sh + ./configure --prefix=/usr/local + make + sudo make install + + - name: Build and test + if: "matrix.platform.name != 'FreeBSD'" + run: | + ./autogen.sh + ./configure && make && make check + + - name: Install dependencies (FreeBSD) + if: "matrix.platform.name == 'FreeBSD'" + uses: cross-platform-actions/action@v0.32.0 + with: + operating_system: freebsd + version: "14.2" + shutdown_vm: false + sync_files: runner-to-vm + run: | + sudo pkg install -y \ + autoconf \ + automake \ + libtool \ + pkgconf \ + pcre \ + pcre2 \ + python3 \ + py311-docutils \ + py311-sphinx \ + git + + - name: Build and install Varnish from source (FreeBSD) + if: "matrix.platform.name == 'FreeBSD'" + uses: cross-platform-actions/action@v0.32.0 + with: + operating_system: freebsd + version: "14.2" + shutdown_vm: false + sync_files: false + run: | + git clone --recursive \ + --branch varnish-${{ matrix.varnish }} \ + https://code.vinyl-cache.org/vinyl-cache/vinyl-cache.git \ + /tmp/varnish-src + cd /tmp/varnish-src + ./autogen.sh + ./configure --prefix=/usr/local + make + sudo make install + + - name: Build and test (FreeBSD) + if: "matrix.platform.name == 'FreeBSD'" + uses: cross-platform-actions/action@v0.32.0 + with: + operating_system: freebsd + version: "14.2" + sync_files: false + run: | + ./autogen.sh + PKG_CONFIG_PATH=/usr/local/lib/pkgconfig ./configure && make && make check diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..bbed5a4 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,28 @@ +name: Tests + +on: + pull_request: + branches: + - main + paths-ignore: + - "**/*.md" + +jobs: + docker-tests: + runs-on: ubuntu-latest + strategy: + matrix: + varnish_version: + - "6.0.17" + - "7.6.5" + - "7.7.3" + - "8.0.1" + name: Varnish ${{ matrix.varnish_version }} + steps: + - uses: actions/checkout@v6 + + - name: Build and test + run: | + docker build . \ + --build-arg "VARNISH_VERSION=${{ matrix.varnish_version }}" \ + -t "libvmod-queryfilter:local-${{ matrix.varnish_version }}" diff --git a/.gitignore b/.gitignore index bdad4d8..9ce31b6 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,9 @@ vmod_queryfilter.3 # Things not to ignore !m4/ax_* + +# AI - commit skills (except gitnexus), ignore everything else +.claude/* +!.claude/skills/ +.claude/skills/gitnexus/ +.gitnexus/ diff --git a/CHANGELOG.md b/CHANGELOG.md index ab9bff5..57449c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,30 @@ -v1.0.1 2022/0109: +# Changelog + +## [Unreleased] — Petit Press Fork + +> This fork is maintained by [Petit Press, a.s.](https://github.com/petitpress/libvmod-queryfilter). +> Forked from [nytimes/libvmod-queryfilter](https://github.com/nytimes/libvmod-queryfilter) (last upstream release: v1.0.1, 2023-01-09). + +### Features +- Add support for indexed array notation (`item[0]=value`, `item[1]=value`) in addition to traditional (`item[]=value`) +- Add URL decoding of parameter names before matching: percent-encoding (`%5B` → `[`) and plus-encoding (`+` → space); original encoding is preserved verbatim in the output URI +- Add Varnish 8.x API compatibility +- Fix: plain filter (e.g. `item`) no longer matches array parameters (e.g. `item[]`) when arrays are enabled +- Fix: correct handling of long URL-encoded parameter names + +### Build & CI +- Introduce GitHub Actions workflows with build matrix for Linux, macOS, and FreeBSD +- Add Varnish version matrix for Linux builds (covering Varnish 6.x, 7.x, and 8.x) +- Add python3 as a build dependency + +--- + +## Upstream History (nytimes/libvmod-queryfilter) + +> Original changelog from https://github.com/nytimes/libvmod-queryfilter. +> Maintained until 2023 by The New York Times Company and contributors. + +v1.0.1 2022/01/09: ----------------- ### Updates to fix issue #6: @@ -59,7 +85,6 @@ v0.1.0 2015/07/09 re-ordered) - Use md2man (when available) to generate manpage for make dist - v0.0.4 2015/02/06 ----------------- Remove a kludge and save a few bytes of WS on success. @@ -69,13 +94,11 @@ v0.0.3 2015/02/03 ### Bugfixes Check for empty query parameter values. - v0.0.2 2015/01/29 ----------------- ### Bugfixes Removed linked list node removal to patch bug where match search could run off the end of the list. - v0.0.1 2015/01/27 ----------------- ### Optimizations @@ -85,15 +108,13 @@ superfluous node traversal. ### Bugfixes #### Single query param results in filtering error The following config: - + set req.url = queryfilter.filterparams(req.url, "q,tag"); - + Was failing for URI's containing only one of the query terms, e.g.: - + curl "http://my_host/?q=1" - v0.0.0 2015/01/26 ----------------- Initial version. - diff --git a/Dockerfile b/Dockerfile index 8a1dbb0..69aeade 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,35 +8,48 @@ # vmod-queryfilter-dev-base: tooling required to build varnish + vmod #------------------------------------------------------------------------------- -FROM gcc:latest AS vmod-queryfilter-dev-base +#------------------------------------------------------------------------------- +# pcre3-debs: fetch libpcre3/libpcre3-dev from bookworm (dropped in trixie) +#------------------------------------------------------------------------------- +FROM debian:bookworm-slim AS pcre3-debs + +RUN apt-get update && \ + apt-get download libpcre3 libpcre3-dev libpcre16-3 libpcre32-3 libpcrecpp0v5 + +#------------------------------------------------------------------------------- +# vmod-queryfilter-dev-base: tooling required to build varnish + vmod +#------------------------------------------------------------------------------- +FROM gcc:14 AS vmod-queryfilter-dev-base -RUN apt-get update -RUN apt-get install -y \ - pip -RUN pip install \ - docutils \ - sphinx +COPY --from=pcre3-debs /*.deb /tmp/pcre3/ +RUN apt-get update && \ + apt-get install -y \ + git \ + python3-docutils \ + python3-sphinx \ + pkg-config && \ + dpkg -i /tmp/pcre3/*.deb || apt-get install -f -y && \ + rm -rf /tmp/pcre3 #------------------------------------------------------------------------------- # vmod-queryfilter-varnish: fresh varnish install from source #------------------------------------------------------------------------------- -ARG VARNISH_VERSION=7.2.1 +ARG VARNISH_VERSION=8.0.1 FROM vmod-queryfilter-dev-base AS vmod-queryfilter-varnish ENV VARNISH_VERSION=${VARNISH_VERSION} -ENV VARNISHSRC=/src/varnish-cache-varnish-${VARNISH_VERSION} -ENV CFLAGS="-Wno-error=format-overflow" - -# Download and prep source: -RUN mkdir -p /src \ - && cd /src \ - && wget https://github.com/varnishcache/varnish-cache/archive/refs/tags/varnish-${VARNISH_VERSION}.tar.gz \ - && tar -xzf varnish-${VARNISH_VERSION}.tar.gz \ - && cd varnish-cache-varnish-${VARNISH_VERSION} \ - && ./autogen.sh \ - && ./configure \ - --prefix=/usr/local \ - && make \ - && make install +ENV VARNISHSRC=/src/varnish-${VARNISH_VERSION} + +# Clone source with submodules (archive tarballs omit them, breaking configure): +RUN git clone --recursive \ + --branch varnish-${VARNISH_VERSION} \ + https://code.vinyl-cache.org/vinyl-cache/vinyl-cache.git \ + ${VARNISHSRC} && \ + cd ${VARNISHSRC} && \ + ./autogen.sh && \ + ./configure \ + --prefix=/usr/local && \ + make CFLAGS="-Wno-error=format-overflow" && \ + make install #------------------------------------------------------------------------------- # vmod-queryfilter-vmod: build and test the vmod @@ -45,12 +58,12 @@ FROM vmod-queryfilter-varnish AS vmod-queryfilter-test COPY . /src/libvmod-queryfilter -RUN cd /src/libvmod-queryfilter \ - && ./autogen.sh +RUN cd /src/libvmod-queryfilter && \ + ./autogen.sh -RUN mkdir -p /src/build \ - && cd /src/build \ - && ../libvmod-queryfilter/configure \ - --prefix=/usr/local \ - && make \ - && make check +RUN mkdir -p /src/build && \ + cd /src/build && \ + ../libvmod-queryfilter/configure \ + --prefix=/usr/local && \ + make && \ + make check diff --git a/README.md b/README.md index 3a3bccb..6597e7c 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,53 @@ import queryfilter; set req.url = queryfilter.filterparams(req.url, "id,q,vals[]", true); ``` +### Examples + +#### Basic Parameter Filtering +``` +# Input: /path?a=1&b=2&c=3&d=4 +# Filter: "a,c" +# Output: /path?a=1&c=3 +``` + +#### Traditional Array Notation +``` +# Input: /path?item[]=first&item[]=second&other=remove +# Filter: "item[]" (with arrays enabled) +# Output: /path?item[]=first&item[]=second +``` + +#### Indexed Array Notation +``` +# Input: /path?item[0]=first&item[1]=second&other=remove +# Filter: "item[]" (with arrays enabled) +# Output: /path?item[0]=first&item[1]=second +``` + +#### Mixed Array Notations +``` +# Input: /path?item[]=first&item[0]=second&item[]=third +# Filter: "item[]" (with arrays enabled) +# Output: /path?item[]=first&item[0]=second&item[]=third +``` + +#### URL-Encoded Parameters +``` +# Input: /path?param=hello%20world&other=test%2Bvalue +# Filter: "param" +# Output: /path?param=hello%20world +# Note: percent-encoding is preserved verbatim in the output URI +``` + +#### URL-Encoded Array Brackets +``` +# Input: /path?items%5B0%5D=first&items%5B1%5D=second +# Filter: "items[]" (with arrays enabled) +# Output: /path?items%5B0%5D=first&items%5B1%5D=second +# Note: %5B = '[', %5D = ']'; decoding is used only for name matching, +# the original encoding is kept in the output URI +``` + #### Query Arrays When query arrays are disabled, libvmod-queryfilter assumes query parameters are individual name/value pairs (e.g. `a=1&b=2...`). Support for arrays in query @@ -58,6 +105,24 @@ parameters - e.g. `a[]=1&a[]=2...` or `a[0]=1&a[1]=2...` - can be enabled by pas argument. When this option is enabled, array parameters will be preserved - in order - in the output URI. +The module supports both traditional array notation (`item[]=value`) and +indexed array notation (`item[0]=value`, `item[1]=value`, etc.). Both notations +can be mixed in the same query string and will be properly filtered when arrays +are enabled. + +#### URL Decoding +The module decodes URL-encoded parameter *names* before comparing them against +the filter list. This includes both percent-encoding (e.g., `%5B` → `[`) and +plus-encoding (e.g., `+` → space). The decoded name is used **only for +matching**; the original percent-encoded form is preserved verbatim in the +output URI. + +**What this means in practice:** +- `items%5B0%5D=v` is matched by a filter that specifies `items[]` (arrays enabled) +- The output still contains `items%5B0%5D=v` - nothing is re-encoded or decoded +- Parameter values are never decoded; they are always copied as-is +- Supported encodings: `%XX` (any hex pair), `+` (space in names) + Building -------- Libvmod-queryfilter attempts to be 100% C99 conformant. You should, therefore, @@ -80,7 +145,7 @@ simply invoke: ``` (See `./configure --help` for configure-time options) -This vmod can also be compiled against a pre-built Varnish Cache 3.x/4.x/5.x/6.x/7.x +This vmod can also be compiled against a pre-built Varnish Cache 3.x/4.x/5.x/6.x/7.x/8.x source by indicating the path to the (pre-compiled!) varnish source using the `VARNISHSRC` configuration variable, like so: diff --git a/autogen.sh b/autogen.sh index f46ae9f..6a85fae 100755 --- a/autogen.sh +++ b/autogen.sh @@ -1,10 +1,10 @@ #!/bin/sh warn() { - echo "WARNING: $@" 1>&2 + echo "WARNING: $*" 1>&2 } -case `uname -s` in +case $(uname -s) in Darwin) LIBTOOLIZE=glibtoolize ;; @@ -18,11 +18,11 @@ SunOS) LIBTOOLIZE=libtoolize ;; *) - warn "unrecognized platform:" `uname -s` + warn "unrecognized platform: $(uname -s)" LIBTOOLIZE=libtoolize esac -automake_version=`automake --version | tr ' ' '\n' | egrep '^[0-9]\.[0-9a-z.-]+'` +automake_version=$(automake --version | tr ' ' '\n' | grep -E '^[0-9]\.[0-9a-z.-]+') if [ -z "$automake_version" ] ; then warn "unable to determine automake version" else diff --git a/src/tests/filterparams_check_array_filter_no_match.vtc b/src/tests/filterparams_check_array_filter_no_match.vtc new file mode 100644 index 0000000..425b044 --- /dev/null +++ b/src/tests/filterparams_check_array_filter_no_match.vtc @@ -0,0 +1,52 @@ +varnishtest "Test that a plain filter 'foo' does not match 'foo[]' or 'foo[0]' params" + +server s1 { + # req 1: foo=val kept (exact match), foo[]=val and foo[0]=val removed + rxreq + expect req.url == "/path?foo=val" + txresp + + # req 2: nothing matches — foo[] does not satisfy plain filter 'foo' + rxreq + expect req.url == "/path" + txresp +} -start + +varnish v1 -vcl+backend { + import std; + import queryfilter from "${vmod_topbuild}/src/.libs/libvmod_queryfilter.so"; + + sub vcl_hash { + std.syslog(180, "queryfilter before: " + req.url); + set req.url = queryfilter.filterparams(req.url, "foo", true); + std.syslog(180, "queryfilter after: " + req.url); + } +} -start + +client c1 { + # Plain param matches plain filter — kept + txreq -url "/path?foo=val&foo[]=arr&foo[0]=indexed&remove=this" + rxresp + expect resp.status == 200 + + # Only array params present — none should match plain filter 'foo' + txreq -url "/path?foo[]=first&foo[]=second&remove=this" + rxresp + expect resp.status == 200 + + # Indexed array — should not match plain filter 'foo' + txreq -url "/path?foo[0]=first&foo[1]=second&remove=this" + rxresp + expect resp.status == 200 +} + +varnish v1 -expect n_object == 0 +varnish v1 -expect cache_miss == 0 +varnish v1 -expect cache_hit == 0 + +client c1 -run +delay .1 + +varnish v1 -expect n_object == 2 +varnish v1 -expect cache_miss == 2 +varnish v1 -expect cache_hit == 1 diff --git a/src/tests/filterparams_check_for_encoded_ampersands.vtc b/src/tests/filterparams_check_for_encoded_ampersands.vtc new file mode 100644 index 0000000..18ebddd --- /dev/null +++ b/src/tests/filterparams_check_for_encoded_ampersands.vtc @@ -0,0 +1,57 @@ +varnishtest "Test edge case: URL-encoded ampersands in parameter values" + +server s1 { + rxreq + expect req.url == "/some_path?param1=value%26with%26encoded%26amps" + txresp + + rxreq + expect req.url == "/some_path?param1=value" + txresp +} -start + +varnish v1 -vcl+backend { + import std; + import queryfilter from "${vmod_topbuild}/src/.libs/libvmod_queryfilter.so"; + + sub vcl_hash { + std.syslog(180, "queryfilter before: " + req.url); + set req.url = queryfilter.filterparams(req.url, "param1", false); + std.syslog(180, "queryfilter after: " + req.url); + } +} -start + +client c1 { + # Test parameter value containing encoded ampersand. + # %26 is a literal '&' in the value; strtok_r splits on real '&' only, + # so the encoded ampersand does NOT split the parameter. + # Filtered result: /some_path?param1=value%26with%26encoded%26amps + txreq -url "/some_path?param1=value%26with%26encoded%26amps&remove=this&other=that" + rxresp + expect resp.status == 200 + + # This request has literal (unencoded) '&' in the URL, so the tokenizer + # splits it into: param1=value, with, encoded, amps, extra=remove. + # Only param1 matches the filter; filtered result: /some_path?param1=value + # This is a different cache object from req 1. + txreq -url "/some_path?other=different¶m1=value&with&encoded&s&extra=remove" + rxresp + expect resp.status == 200 + + # Same encoded form as req 1 (just different extra params) - should be a cache HIT. + # Filtered result: /some_path?param1=value%26with%26encoded%26amps + txreq -url "/some_path?param1=value%26with%26encoded%26amps&extra=param" + rxresp + expect resp.status == 200 +} + +varnish v1 -expect n_object == 0 +varnish v1 -expect cache_miss == 0 +varnish v1 -expect cache_hit == 0 + +client c1 -run +delay .1 + +varnish v1 -expect n_object == 2 +varnish v1 -expect cache_miss == 2 +varnish v1 -expect cache_hit == 1 diff --git a/src/tests/filterparams_check_for_indexed_array.vtc b/src/tests/filterparams_check_for_indexed_array.vtc new file mode 100644 index 0000000..ce4df7a --- /dev/null +++ b/src/tests/filterparams_check_for_indexed_array.vtc @@ -0,0 +1,63 @@ +varnishtest "Test queryfilter vmod for indexed array notation support" + +server s1 { + rxreq + txresp + rxreq + txresp + rxreq + txresp + rxreq + txresp + rxreq + txresp +} -start + +varnish v1 -vcl+backend { + import std; + import queryfilter from "${vmod_topbuild}/src/.libs/libvmod_queryfilter.so"; + + sub vcl_hash { + std.syslog(180, "queryfilter before: " + req.url); + set req.url = queryfilter.filterparams(req.url, "item[]", true); + std.syslog(180, "queryfilter after: " + req.url); + } +} -start + +client c1 { + # Test indexed array notation: item[0]=value1&item[1]=value2 + txreq -url "/some_path?item[0]=first&item[1]=second&item[2]=third&other=remove" + rxresp + expect resp.status == 200 + + # Test mixed array notations: item[]=value1&item[0]=value2 + txreq -url "/some_path?item[]=first&other=remove&item[0]=second&item[]=third" + rxresp + expect resp.status == 200 + + # Test traditional array notation should still work + txreq -url "/some_path?item[]=first&item[]=second&other=remove" + rxresp + expect resp.status == 200 + + # This should be a cache HIT - same as first request after filtering + txreq -url "/some_path?other=different&item[0]=first&item[1]=second&item[2]=third&extra=params" + rxresp + expect resp.status == 200 + + # This should be a cache HIT - same as third request after filtering + txreq -url "/some_path?item[]=first&item[]=second&unwanted=param" + rxresp + expect resp.status == 200 +} + +varnish v1 -expect n_object == 0 +varnish v1 -expect cache_miss == 0 +varnish v1 -expect cache_hit == 0 + +client c1 -run +delay .1 + +varnish v1 -expect n_object == 3 +varnish v1 -expect cache_miss == 3 +varnish v1 -expect cache_hit == 2 diff --git a/src/tests/filterparams_check_for_url_decoding.vtc b/src/tests/filterparams_check_for_url_decoding.vtc new file mode 100644 index 0000000..fb4007b --- /dev/null +++ b/src/tests/filterparams_check_for_url_decoding.vtc @@ -0,0 +1,140 @@ +varnishtest "Test queryfilter vmod for comprehensive URL decoding support" + +server s1 { + # req 1: param1 + param2 kept (encoding preserved) + rxreq + expect req.url == "/some_path?param1=hello%20world¶m2=test%2Bvalue" + txresp + + # req 2: + encoding preserved verbatim + rxreq + expect req.url == "/some_path?param1=hello+world¶m2=another+test" + txresp + + # req 3: mixed encoded/plain + rxreq + expect req.url == "/some_path?param1=normal¶m2=url%20encoded" + txresp + + # req 4: %5B/%5D brackets preserved; matched via decoded name items[0]/items[1] + rxreq + expect req.url == "/some_path?items%5B0%5D=first&items%5B1%5D=second" + txresp + + # req 5: mixed bracket styles, encoding preserved + rxreq + expect req.url == "/some_path?items%5B%5D=hello%20world&items%5B0%5D=test" + txresp + + # req 6: various encoded values in array params + rxreq + expect req.url == "/some_path?items%5B0%5D=hello%2C%20world%21&items%5B1%5D=test%26more" + txresp + + # req 7 (client req 7) is a cache HIT - does not reach backend + # req 8 (client req 8) is a cache HIT - does not reach backend + + # req 9: special%2Dname%5B0%5D decodes to special-name[0], no match in filter list + rxreq + expect req.url == "/some_path" + txresp + + # req 10: encoded brackets with non-encoded values + rxreq + expect req.url == "/some_path?items%5B%5D=normal&items%5B0%5D=url%20encoded" + txresp + + # req 11: param1 value contains an encoded URL (embedded query params in value) + rxreq + expect req.url == "/some_path?param1=https%3A%2F%2Fexample.com%3Fid%3D123%26name%3Dtest¶m2=simple" + txresp + + # req 12: param1 value contains encoded query string + rxreq + expect req.url == "/some_path?param1=base%3Fa%3D1%26b%3D2%26c%3D3¶m2=normal" + txresp +} -start + +varnish v1 -vcl+backend { + import std; + import queryfilter from "${vmod_topbuild}/src/.libs/libvmod_queryfilter.so"; + + sub vcl_hash { + std.syslog(180, "queryfilter before: " + req.url); + set req.url = queryfilter.filterparams(req.url, "param1,param2,items[]", true); + std.syslog(180, "queryfilter after: " + req.url); + } +} -start + +client c1 { + # Test basic URL encoded parameter names and values + txreq -url "/some_path?param1=hello%20world¶m2=test%2Bvalue&remove=this" + rxresp + expect resp.status == 200 + + # Test URL encoded parameter with + signs + txreq -url "/some_path?param1=hello+world¶m2=another+test&remove=this" + rxresp + expect resp.status == 200 + + # Test mix of encoded and non-encoded parameters + txreq -url "/some_path?param1=normal¶m2=url%20encoded&remove=this" + rxresp + expect resp.status == 200 + + # Test percent-encoded brackets: %5B = [, %5D = ] + txreq -url "/some_path?items%5B0%5D=first&items%5B1%5D=second&remove=this" + rxresp + expect resp.status == 200 + + # Test mixed encoded brackets with values + txreq -url "/some_path?items%5B%5D=hello%20world&items%5B0%5D=test&remove=this" + rxresp + expect resp.status == 200 + + # Test various percent-encoded characters + txreq -url "/some_path?items%5B0%5D=hello%2C%20world%21&items%5B1%5D=test%26more&remove=this" + rxresp + expect resp.status == 200 + + # This should be a cache HIT - same filtered URL as req 1 + txreq -url "/some_path?unwanted=param¶m1=hello%20world¶m2=test%2Bvalue&extra=remove" + rxresp + expect resp.status == 200 + + # This should be a cache HIT - same filtered URL as req 4 + txreq -url "/some_path?other=remove&items%5B0%5D=first&items%5B1%5D=second&extra=param" + rxresp + expect resp.status == 200 + + # Test combination of encoded parameter names and values (unique - no filter match) + txreq -url "/some_path?special%2Dname%5B0%5D=value%2Dwith%2Ddashes&remove=this" + rxresp + expect resp.status == 200 + + # Test mix of encoded indexed and traditional arrays (unique) + txreq -url "/some_path?items%5B%5D=normal&items%5B0%5D=url%20encoded&remove=this" + rxresp + expect resp.status == 200 + + # Test URL-encoded parameter value containing embedded query parameters + txreq -url "/some_path?param1=https%3A%2F%2Fexample.com%3Fid%3D123%26name%3Dtest¶m2=simple&remove=this" + rxresp + expect resp.status == 200 + + # Test multiple embedded query parameters in one value + txreq -url "/some_path?param1=base%3Fa%3D1%26b%3D2%26c%3D3¶m2=normal&remove=this" + rxresp + expect resp.status == 200 +} + +varnish v1 -expect n_object == 0 +varnish v1 -expect cache_miss == 0 +varnish v1 -expect cache_hit == 0 + +client c1 -run +delay .1 + +varnish v1 -expect n_object == 10 +varnish v1 -expect cache_miss == 10 +varnish v1 -expect cache_hit == 2 diff --git a/src/tests/filterparams_check_long_encoded_name.vtc b/src/tests/filterparams_check_long_encoded_name.vtc new file mode 100644 index 0000000..329edd1 --- /dev/null +++ b/src/tests/filterparams_check_long_encoded_name.vtc @@ -0,0 +1,54 @@ +varnishtest "Test URL decoding of param names whose encoded form exceeds 255 bytes" + +# The param name is 86 'a' characters encoded as %61 each. +# Encoded length: 86 * 3 = 258 bytes (> MAX_DECODED_PARAM_NAME_LEN = 255) +# Decoded length: 86 bytes (<= MAX_DECODED_PARAM_NAME_LEN) +# The filter contains the plain 86-char name. Correct behaviour: the param +# is kept because decoding is not skipped just because encoded > 255. +# +# Filter name (86 a's): +# aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +# Encoded param name (86 x %61): +# %61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61 + +server s1 { + # req 1: encoded param kept (encoding preserved in output URL) + rxreq + expect req.url == "/path?%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61=value" + txresp +} -start + +varnish v1 -vcl+backend { + import std; + import queryfilter from "${vmod_topbuild}/src/.libs/libvmod_queryfilter.so"; + + sub vcl_hash { + std.syslog(180, "queryfilter before: " + req.url); + set req.url = queryfilter.filterparams(req.url, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", false); + std.syslog(180, "queryfilter after: " + req.url); + } +} -start + +client c1 { + # Encoded param name (258 encoded bytes -> 86 decoded bytes) must be kept. + # A second extra param is removed to confirm filtering is active. + txreq -url "/path?%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61=value&remove=this" + rxresp + expect resp.status == 200 + + # Same filtered URL — cache HIT (different extra params, same result) + txreq -url "/path?other=junk&%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61%61=value" + rxresp + expect resp.status == 200 +} + +varnish v1 -expect n_object == 0 +varnish v1 -expect cache_miss == 0 +varnish v1 -expect cache_hit == 0 + +client c1 -run +delay .1 + +varnish v1 -expect n_object == 1 +varnish v1 -expect cache_miss == 1 +varnish v1 -expect cache_hit == 1 diff --git a/src/vmod_queryfilter.c b/src/vmod_queryfilter.c index 5a72369..07faaaf 100644 --- a/src/vmod_queryfilter.c +++ b/src/vmod_queryfilter.c @@ -2,11 +2,11 @@ * libvmod-queryfilter: Simple VMOD for filtering/sorting query strings * * Copyright © 2014-2020 The New York Times Company - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software @@ -14,7 +14,7 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * + * *===========================================================================*/ #include "config.h" @@ -23,6 +23,7 @@ #include #include #include +#include /*--- Varnish 3.x: ---*/ #if VARNISH_API_MAJOR == 3 @@ -41,8 +42,8 @@ typedef struct sess req_ctx; typedef const struct vrt_ctx req_ctx; #endif /* (VARNISH_API_MAJOR == 4 || VARNISH_API_MAJOR == 5 ) */ -/*--- Varnish 6.x ---*/ -#if (VARNISH_API_MAJOR == 6 || VARNISH_API_MAJOR == 7) +/*--- Varnish 6.x, 7.x and 8.x ---*/ +#if (VARNISH_API_MAJOR == 6 || VARNISH_API_MAJOR == 7 || VARNISH_API_MAJOR == 8) #include "cache/cache.h" #include "vcl.h" #include "vre.h" @@ -50,7 +51,7 @@ typedef const struct vrt_ctx req_ctx; #include "vsb.h" #include "vcc_if.h" typedef const struct vrt_ctx req_ctx; -#endif /* VARNISH_API_MAJOR == 6 */ +#endif /* (VARNISH_API_MAJOR == 6 || VARNISH_API_MAJOR == 7 || VARNISH_API_MAJOR == 8) */ /* WS_Reserve was deprecated in Varnish 6.3.0: */ #if (VARNISH_API_MAJOR < 6) || (VARNISH_API_MAJOR == 6 && VARNISH_API_MINOR < 3) @@ -58,6 +59,13 @@ typedef const struct vrt_ctx req_ctx; WS_Reserve(ws, 0) #endif /* Varnish Version >= 6.3 */ +/* WS_Reservation() was introduced in Varnish 6.4 to replace direct ws->f + * access (WS_Front() was removed in 7.0). For Varnish < 6.4, define a shim + * via the still-public ws->f member. */ +#if (VARNISH_API_MAJOR < 6) || (VARNISH_API_MAJOR == 6 && VARNISH_API_MINOR < 4) +#define WS_Reservation(ws) ((ws)->f) +#endif + /** Alignment macros ala varnish internals: */ #define PALIGN_SIZE (sizeof(void*)) @@ -69,6 +77,123 @@ typedef struct query_param { char* value; } query_param_t; +/** URL decode a string in-place. + * @param str the string to decode + * @return the length of the decoded string + */ +static int +url_decode_inplace(char* str) +{ + char* src = str; + char* dst = str; + + if (!str) return 0; + + while (*src) { + if (*src == '%' && src[1] && src[2] && + isxdigit((unsigned char)src[1]) && isxdigit((unsigned char)src[2])) { + int high = isdigit((unsigned char)src[1]) ? (src[1] - '0') : (toupper((unsigned char)src[1]) - 'A' + 10); + int low = isdigit((unsigned char)src[2]) ? (src[2] - '0') : (toupper((unsigned char)src[2]) - 'A' + 10); + *dst++ = (char)((high << 4) | low); + src += 3; + } else if (*src == '+') { + *dst++ = ' '; + src++; + } else { + *dst++ = *src++; + } + } + *dst = '\0'; + return dst - str; +} + +/** Maximum byte length of a parameter name that will be decoded for comparison. + * Names longer than this are compared as-is (without URL-decoding). */ +#define MAX_DECODED_PARAM_NAME_LEN 255 + +/** Extract the base parameter name from array notation. + * Handles both "param[]" and "param[0]", "param[1]" etc. + * @param param_name the parameter name to process + * @param base_len pointer to store the base name length + * @return pointer to start of base name, or NULL if not an array + */ +static const char* +extract_array_base_name_fast(const char* param_name, size_t* base_len) +{ + const char* bracket = strchr(param_name, '['); + if (!bracket) { + return NULL; /* Not an array parameter */ + } + + /* Require exactly one '[...]' group at the end of the name. + * strchr finds the first ']', so *(end+1) != '\0' rejects anything + * after the bracket (e.g. "param[0][1]" or "param[]extra"). */ + const char* end = strchr(bracket, ']'); + if (!end || *(end + 1) != '\0') { + return NULL; /* Not a valid single-level array notation */ + } + + *base_len = bracket - param_name; + return param_name; +} + +/** Check if two parameter names should be considered the same for array purposes. + * Temporarily decode parameter names for comparison without modifying originals. + * @param filter_name the name from the filter list + * @param param_name the name from the query string (may be URL-encoded) + * @param arrays_enabled whether array processing is enabled + * @return 1 if they match, 0 otherwise + */ +static int +param_names_match_with_decoding(const char* filter_name, const char* param_name, unsigned arrays_enabled) +{ + /* Each decoded byte is at most 3 encoded chars (%XX), so size the buffer + * for the worst-case encoded input. This handles names where the encoded + * form exceeds MAX_DECODED_PARAM_NAME_LEN but the decoded form does not. */ + char decoded_name[MAX_DECODED_PARAM_NAME_LEN * 3 + 1]; + const char* name_to_compare = param_name; + + size_t len = strlen(param_name); + if (len <= MAX_DECODED_PARAM_NAME_LEN * 3) { + memcpy(decoded_name, param_name, len + 1); + int decoded_len = url_decode_inplace(decoded_name); + /* Only use the decoded form if it fits within the comparison limit; + * pathologically long names fall back to raw comparison. */ + if ((size_t)decoded_len <= MAX_DECODED_PARAM_NAME_LEN) { + name_to_compare = decoded_name; + } + } + + if (!arrays_enabled) { + return strcmp(filter_name, name_to_compare) == 0; + } + + size_t filter_base_len, param_base_len; + const char* filter_base = extract_array_base_name_fast(filter_name, &filter_base_len); + const char* param_base = extract_array_base_name_fast(name_to_compare, ¶m_base_len); + + /* If filter expects arrays, match base names */ + if (filter_base && param_base) { + return filter_base_len == param_base_len && + strncmp(filter_base, param_base, filter_base_len) == 0; + } + + /* If filter expects arrays but param is not array, no match */ + if (filter_base && !param_base) { + return 0; + } + + /* If filter doesn't expect arrays but param is an array, no match. + * A plain filter "foo" must not implicitly absorb "foo[]" or "foo[0]"; + * callers who want array params must include "foo[]" in the filter list. */ + if (!filter_base && param_base) { + return 0; + } + + /* Both are regular parameters */ + return strcmp(filter_name, name_to_compare) == 0; +} + /** Query string tokenizer. This function takes a query string as input, and * yields array of name/value pairs. Allocation happens inside the * reserved workspace, pointed to by *ws_free. On error, no space is consumed. @@ -76,6 +201,7 @@ typedef struct query_param { * @param result pointer to query_param_t* at the head of the array * @param ws_free pointer to char* at the head of the reserved workspace * @param ws_remain the amount of reserved workspace remaining, in bytes + * @param query_str the query string to tokenize * @return the number of non-empty query params or -1 on OOM */ static int @@ -111,7 +237,7 @@ tokenize_querystring(query_param_t** result, char** ws_free, unsigned* remain, c /* "Allocate" space at the head of the workspace and place a node: */ query_param_t* param = (query_param_t*)ws_free_temp; param->name = param_str; - /* TODO: will varnish filter malformed queries, e.g.: "?=&"? + /* TODO: will varnish filter malformed queries, e.g.: "?=&"? * Else: this needs some more rigor: */ param->value = strchr(param_str,'='); @@ -205,7 +331,7 @@ vmod_filterparams(req_ctx* sp, const char* uri, const char* params_in, unsigned /* Reserve the *rest* of the workspace - it's okay, we're gonna release * almost all of it in the end ;) */ ws_remain = WS_ReserveAll(workspace); - ws_free = workspace->f; + ws_free = WS_Reservation(workspace); /* Duplicate the URI, bailing on OOM: */ new_uri = strtmp_append(&ws_free, &ws_remain, uri); @@ -254,7 +380,7 @@ vmod_filterparams(req_ctx* sp, const char* uri, const char* params_in, unsigned { for(i=0, current=head; iname)) { + if(!param_names_match_with_decoding(filter_name, current->name, arrays_enabled)) { continue; }; diff --git a/util/README.md b/util/README.md index 938da91..8836d26 100644 --- a/util/README.md +++ b/util/README.md @@ -1,4 +1,20 @@ # Util - `run_tests.sh`: Wrapper script used by the makefile to invoke varnishtest against the VCL files in this repo. - - `docker_tests.sh`: (**:warning:** this is a HACK/WIP!): export `VARNISH_VERSIONS` to be a space-separated list of varnish versions to test against and run `./util/docker_tests.sh`. + - `docker-tests.sh`: Build and test the VMOD against multiple Varnish versions using Docker. + + Usage: + ```bash + # Use default versions (6.0.17, 7.6.5, 7.7.3, 8.0.1) + ./util/docker-tests.sh + + # Test single version + VARNISH_VERSIONS=8.0.1 ./util/docker-tests.sh + + # Test multiple versions (space-separated string) + VARNISH_VERSIONS="6.0.17 8.0.1" ./util/docker-tests.sh + + # Using export also works + export VARNISH_VERSIONS="6.0.17 8.0.1" + ./util/docker-tests.sh + ``` diff --git a/util/docker-tests.sh b/util/docker-tests.sh index f7ff4db..d06b94a 100755 --- a/util/docker-tests.sh +++ b/util/docker-tests.sh @@ -1,15 +1,18 @@ -#!/usr/bin/env bash -repodir="$( cd ${0%/*}/.. ; echo $PWD )" +#!/bin/bash +set -eo pipefail +repodir="$( cd "${0%/*}"/.. || return ; echo "${PWD}" )" + +DEFAULT_VARNISH_VERSIONS="6.0.17 7.6.5 7.7.3 8.0.1" +VARNISH_VERSIONS="${VARNISH_VERSIONS:-$DEFAULT_VARNISH_VERSIONS}" queryfilter_test() { - cd ${repodir} + cd "${repodir}" || exit 1 docker build . \ - --build-arg "VARNISH_VERSION=${VARNISH_VERSION:-"7.2.1"}" \ - -t libvmod-queryfilter:local-${VARNISH_VERSION} + --build-arg "VARNISH_VERSION=${VARNISH_VERSION}" \ + -t "libvmod-queryfilter:local-${VARNISH_VERSION}" } -for VARNISH_VERSION in ${VARNISH_VERSIONS[@]}; do +for VARNISH_VERSION in ${VARNISH_VERSIONS}; do export VARNISH_VERSION queryfilter_test done - diff --git a/util/run_vtests.sh b/util/run_vtests.sh index 7a3bda2..5049e6b 100755 --- a/util/run_vtests.sh +++ b/util/run_vtests.sh @@ -1,13 +1,12 @@ #!/bin/bash -if [ -z "${VARNISHSRC}" -o ! -d ${VARNISHSRC} ] ; then - echo "Usage: VARNISHSRC= $(basename $0) TEST" +if [[ -z "${VARNISHSRC}" || ! -d "${VARNISHSRC}" ]] ; then + echo "Usage: VARNISHSRC= $(basename "$0") TEST" exit 1 fi -${VARNISHSRC}/bin/varnishtest/varnishtest \ - -Dvarnish_source=${VARNISHSRC} \ - -Dvarnishd=${VARNISHSRC}/bin/varnishd/varnishd \ - -Dvmod_topbuild=$(pwd) $* +"${VARNISHSRC}/bin/varnishtest/varnishtest" \ + -Dvarnish_source="${VARNISHSRC}" \ + -Dvarnishd="${VARNISHSRC}/bin/varnishd/varnishd" \ + -Dvmod_topbuild="$(pwd)" "$@" # EOF -