From ec55c5b4210ee3d8c564868e72add473c5f3f438 Mon Sep 17 00:00:00 2001 From: Adrian Gruntkowski Date: Wed, 6 May 2026 16:34:58 +0200 Subject: [PATCH 1/3] Exclude selected wilcard pattern matches from subsequent step suggestions --- extra/lib/plausible/stats/exploration.ex | 28 +++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/extra/lib/plausible/stats/exploration.ex b/extra/lib/plausible/stats/exploration.ex index 1bf0a5437349..769bedd99c28 100644 --- a/extra/lib/plausible/stats/exploration.ex +++ b/extra/lib/plausible/stats/exploration.ex @@ -299,6 +299,15 @@ defmodule Plausible.Stats.Exploration do from(s in q, where: ^step_condition) end) + last_step = List.last(steps) + + q_matches = + if last_step && last_step.includes_subpaths do + exclude_wildcard_matches(q_matches, last_step) + else + q_matches + end + # Fan out each q_combined row into up to two output rows (exact + wildcard) # using ARRAY JOIN over a small boolean array. # @@ -399,6 +408,15 @@ defmodule Plausible.Stats.Exploration do ) end + defp exclude_wildcard_matches(query, step) do + pattern = wildcard_pattern(step.pathname) + + from m in query, + where: + selected_as(:name) != ^step.name or + not fragment("match(?, ?)", selected_as(:pathname), ^pattern) + end + defp exclude_goal_matches(query, goals) do to_exclude = goals @@ -647,9 +665,7 @@ defmodule Plausible.Stats.Exploration do defp step_condition(step, count) when count <= @max_steps do cond do step.includes_subpaths -> - escaped = Regex.escape(step.pathname) - - pattern = "^#{escaped}(/.+)?$" + pattern = wildcard_pattern(step.pathname) dynamic( [s], @@ -675,6 +691,12 @@ defmodule Plausible.Stats.Exploration do end end + defp wildcard_pattern(pathname) when is_binary(pathname) do + escaped = Regex.escape(pathname) + + "^#{escaped}(/.+)?$" + end + defp maybe_search(query, search_term) do case String.trim(search_term) do term when byte_size(term) > 2 -> From 06dd58c8c8597154074e2075bb14db7b5d436788 Mon Sep 17 00:00:00 2001 From: Adrian Gruntkowski Date: Wed, 6 May 2026 16:54:40 +0200 Subject: [PATCH 2/3] Exclude goal pattern matches the same way --- extra/lib/plausible/stats/exploration.ex | 27 +++++++++++++++--------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/extra/lib/plausible/stats/exploration.ex b/extra/lib/plausible/stats/exploration.ex index 769bedd99c28..42cb19561b35 100644 --- a/extra/lib/plausible/stats/exploration.ex +++ b/extra/lib/plausible/stats/exploration.ex @@ -298,15 +298,7 @@ defmodule Plausible.Stats.Exploration do from(s in q, where: ^step_condition) end) - - last_step = List.last(steps) - - q_matches = - if last_step && last_step.includes_subpaths do - exclude_wildcard_matches(q_matches, last_step) - else - q_matches - end + |> maybe_exclude_step_matches(List.last(steps)) # Fan out each q_combined row into up to two output rows (exact + wildcard) # using ARRAY JOIN over a small boolean array. @@ -408,7 +400,7 @@ defmodule Plausible.Stats.Exploration do ) end - defp exclude_wildcard_matches(query, step) do + defp maybe_exclude_step_matches(query, %{includes_subpaths: true} = step) do pattern = wildcard_pattern(step.pathname) from m in query, @@ -417,6 +409,21 @@ defmodule Plausible.Stats.Exploration do not fragment("match(?, ?)", selected_as(:pathname), ^pattern) end + defp maybe_exclude_step_matches(query, %{is_goal: true, name: "pageview"} = step) do + if String.contains?(step.pathname, "*") do + pattern = Filters.Utils.page_regex(step.pathname) + + from m in query, + where: + selected_as(:name) != ^step.name or + not fragment("match(?, ?)", selected_as(:pathname), ^pattern) + else + query + end + end + + defp maybe_exclude_step_matches(query, _), do: query + defp exclude_goal_matches(query, goals) do to_exclude = goals From e65fda2cdb3aaba1d54a8e8cfd1f5d31cdb2640c Mon Sep 17 00:00:00 2001 From: Adrian Gruntkowski Date: Thu, 7 May 2026 09:35:25 +0200 Subject: [PATCH 3/3] Fix case of empty goal exclusions and add tests --- extra/lib/plausible/stats/exploration.ex | 14 ++- test/plausible/stats/exploration_test.exs | 118 ++++++++++++++++++++++ 2 files changed, 127 insertions(+), 5 deletions(-) diff --git a/extra/lib/plausible/stats/exploration.ex b/extra/lib/plausible/stats/exploration.ex index 42cb19561b35..5dc9296e67e8 100644 --- a/extra/lib/plausible/stats/exploration.ex +++ b/extra/lib/plausible/stats/exploration.ex @@ -435,12 +435,16 @@ defmodule Plausible.Stats.Exploration do } end) - types = %{name: :string, pathname: :string} + if to_exclude != [] do + types = %{name: :string, pathname: :string} - from m in subquery(query), - left_join: g in values(to_exclude, types), - on: g.name == m.name and g.pathname == m.pathname, - where: g.name == "" or m.includes_subpaths + from m in subquery(query), + left_join: g in values(to_exclude, types), + on: g.name == m.name and g.pathname == m.pathname, + where: g.name == "" or m.includes_subpaths + else + query + end end # Expand each (name, pathname, user_id) row into all prefix paths via diff --git a/test/plausible/stats/exploration_test.exs b/test/plausible/stats/exploration_test.exs index 4cb916f594f3..79d18b7f449b 100644 --- a/test/plausible/stats/exploration_test.exs +++ b/test/plausible/stats/exploration_test.exs @@ -1056,6 +1056,124 @@ defmodule Plausible.Stats.ExplorationTest do assert next_step7.step.is_goal assert next_step7.visitors == 1 end + + test "suggestions matching implicit wildcard from previous step are excluded" do + now = DateTime.utc_now() + site = new_site() + + populate_stats(site, [ + build(:pageview, + user_id: 123, + pathname: "/a", + timestamp: DateTime.shift(now, minute: -300) + ), + build(:pageview, + user_id: 123, + pathname: "/a/b", + timestamp: DateTime.shift(now, minute: -290) + ), + build(:pageview, + user_id: 124, + pathname: "/a/b", + timestamp: DateTime.shift(now, minute: -300) + ), + build(:pageview, + user_id: 124, + pathname: "/a", + timestamp: DateTime.shift(now, minute: -290) + ), + build(:pageview, + user_id: 125, + pathname: "/a/b", + timestamp: DateTime.shift(now, minute: -300) + ), + build(:pageview, + user_id: 125, + pathname: "/a-blog", + timestamp: DateTime.shift(now, minute: -290) + ) + ]) + + query = QueryBuilder.build!(site, input_date_range: :all) + + journey = [ + %Exploration.Journey.Step{ + name: "pageview", + pathname: "/a", + includes_subpaths: true, + subpaths_count: 2 + } + ] + + assert {:ok, [next_step]} = Exploration.next_steps(site, query, journey) + + assert next_step.step.label == "/a-blog" + end + + test "suggestions matching goal pattern from previous step are excluded" do + now = DateTime.utc_now() + site = new_site() + + Plausible.Goals.create(site, %{"page_path" => "/a*"}) + + populate_stats(site, [ + build(:pageview, + user_id: 123, + pathname: "/a", + timestamp: DateTime.shift(now, minute: -300) + ), + build(:pageview, + user_id: 123, + pathname: "/a/b", + timestamp: DateTime.shift(now, minute: -290) + ), + build(:pageview, + user_id: 124, + pathname: "/a/b", + timestamp: DateTime.shift(now, minute: -300) + ), + build(:pageview, + user_id: 124, + pathname: "/a", + timestamp: DateTime.shift(now, minute: -290) + ), + build(:pageview, + user_id: 125, + pathname: "/a/b", + timestamp: DateTime.shift(now, minute: -300) + ), + build(:pageview, + user_id: 125, + pathname: "/a-blog", + timestamp: DateTime.shift(now, minute: -290) + ), + build(:pageview, + user_id: 126, + pathname: "/a", + timestamp: DateTime.shift(now, minute: -300) + ), + build(:pageview, + user_id: 126, + pathname: "/blog", + timestamp: DateTime.shift(now, minute: -290) + ) + ]) + + query = QueryBuilder.build!(site, input_date_range: :all) + + journey = [ + %Exploration.Journey.Step{ + label: "Visit /a*", + name: "pageview", + pathname: "/a*", + is_goal: true + } + ] + + assert {:ok, [next_step]} = Exploration.next_steps(site, query, journey) + + assert next_step.step.label == "/blog" + end end describe "implicit wildcard pathnames" do