Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

APIv2: Behavioral filtering #4980

Merged
merged 20 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ All notable changes to this project will be documented in this file.

### Changed

- Filters appear in the search bar as ?f=is,page,/docs,/blog&f=... instead of ?filters=((is,page,(/docs,/blog)),...) for Plausible links sent on various platforms to work reliably.
- Filters appear in the search bar as ?f=is,page,/docs,/blog&f=... instead of ?filters=((is,page,(/docs,/blog)),...) for Plausible links sent on various platforms to work reliably.
- Details modal search inputs are now case-insensitive.
- Improved report performance in cases where site has a lot of unique pathnames

Expand Down
7 changes: 6 additions & 1 deletion assets/js/types/query-api.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ export type SimpleFilterDimensions =
export type CustomPropertyFilterDimensions = string;
export type GoalDimension = "event:goal";
export type TimeDimensions = "time" | "time:month" | "time:week" | "time:day" | "time:hour";
export type FilterTree = FilterEntry | FilterAndOr | FilterNot;
export type FilterTree = FilterEntry | FilterAndOr | FilterNot | FilterHasDone;
export type FilterEntry = FilterWithoutGoals | FilterWithGoals | FilterWithPattern | FilterForSegment;
/**
* @minItems 3
Expand Down Expand Up @@ -130,6 +130,11 @@ export type FilterAndOr = ["and" | "or", [FilterTree, ...FilterTree[]]];
* @maxItems 2
*/
export type FilterNot = ["not", FilterTree];
/**
* @minItems 2
* @maxItems 2
*/
export type FilterHasDone = ["has_done" | "has_not_done", FilterTree];
/**
* @minItems 2
* @maxItems 2
Expand Down
4 changes: 2 additions & 2 deletions extra/lib/plausible/stats/goal/revenue.ex
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ defmodule Plausible.Stats.Goal.Revenue do

The resulting data structure is attached to a `Query` and used below in `format_revenue_metric/3`.
"""
def preload(site, goals, metrics, dimensions) do
def preload(site, preloaded_goals, metrics, dimensions) do
cond do
not requested?(metrics) -> {nil, %{}}
not available?(site) -> {:revenue_goals_unavailable, %{}}
true -> preload(goals, dimensions)
true -> preload(preloaded_goals.matching_toplevel_filters, dimensions)
end
end

Expand Down
37 changes: 30 additions & 7 deletions lib/plausible/goals/filters.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,31 @@ defmodule Plausible.Goals.Filters do
import Ecto.Query
import Plausible.Stats.Filters.Utils, only: [page_regex: 1]

alias Plausible.Stats.Filters

@doc """
Preloads goals data if needed for query-building and related work.
"""
def preload_needed_goals(site, dimensions, filters) do
if Enum.member?(dimensions, "event:goal") or
Filters.filtering_on_dimension?(filters, "event:goal") do
goals = Plausible.Goals.for_site(site)

%{
# When grouping by event:goal, later pipeline needs to know which goals match filters exactly.
# This can affect both calculations whether all goals have the same revenue currency and
# whether we should skip imports.
matching_toplevel_filters: goals_matching_toplevel_filters(goals, filters),
all: goals
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is all used and why?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question.

I needed to change the structure of preloaded_goals since we need to know what goals match the top-level is/contains filter for group by and other logics as it will affect what gets displayed (e.g. do all have the same currency, whether we can include imports. For this reason matching_toplevel_filters exists.

Behavioral filters has_done and has_not_done do not impact these bits of business logic but we still need to know what condition(s) these goals have when building the Ecto Query. Hence the all key. This gets used only in this same file when building the Ecto query:

    Enum.reduce(clauses, false, fn clause, dynamic_statement ->
      condition =
        query.preloaded_goals.all
        |> filter_preloaded(filter, clause)
        |> build_condition(imported?)

      dynamic([e], ^condition or ^dynamic_statement)
    end)

}
else
%{
all: [],
matching_toplevel_filters: []
}
end
end

@doc """
Translates an event:goal filter into SQL. Similarly to other `add_filter` clauses in
`Plausible.Stats.SQL.WhereBuilder`, returns an `Ecto.Query.dynamic` expression.
Expand All @@ -26,17 +51,19 @@ defmodule Plausible.Goals.Filters do

Enum.reduce(clauses, false, fn clause, dynamic_statement ->
condition =
query.preloaded_goals
query.preloaded_goals.all
|> filter_preloaded(filter, clause)
|> build_condition(imported?)

dynamic([e], ^condition or ^dynamic_statement)
end)
end

def preload_needed_goals(site, filters) do
goals = Plausible.Goals.for_site(site)
defp filter_preloaded(goals, filter, clause) do
Enum.filter(goals, fn goal -> matches?(goal, filter, clause) end)
end

defp goals_matching_toplevel_filters(goals, filters) do
Enum.reduce(filters, goals, fn
[_, "event:goal" | _] = filter, goals ->
goals_matching_any_clause(goals, filter)
Expand All @@ -46,10 +73,6 @@ defmodule Plausible.Goals.Filters do
end)
end

defp filter_preloaded(preloaded_goals, filter, clause) do
Enum.filter(preloaded_goals, fn goal -> matches?(goal, filter, clause) end)
end

defp goals_matching_any_clause(goals, [_, _, clauses | _] = filter) do
goals
|> Enum.filter(fn goal ->
Expand Down
2 changes: 1 addition & 1 deletion lib/plausible/segments/filters.ex
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ defmodule Plausible.Segments.Filters do
filters
|> Filters.traverse()
|> Enum.flat_map(fn
{[_operation, "segment", clauses], _depth} -> clauses
{[_operation, "segment", clauses], _} -> clauses
_ -> []
end)

Expand Down
8 changes: 4 additions & 4 deletions lib/plausible/stats/base.ex
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ defmodule Plausible.Stats.Base do
end
end

defp query_events(site, query) do
q = from(e in "events_v2", where: ^SQL.WhereBuilder.build(:events, site, query))
defp query_events(_site, query) do
q = from(e in "events_v2", where: ^SQL.WhereBuilder.build(:events, query))

on_ee do
q = Plausible.Stats.Sampling.add_query_hint(q, query)
Expand All @@ -36,8 +36,8 @@ defmodule Plausible.Stats.Base do
q
end

def query_sessions(site, query) do
q = from(s in "sessions_v2", where: ^SQL.WhereBuilder.build(:sessions, site, query))
def query_sessions(_site, query) do
q = from(s in "sessions_v2", where: ^SQL.WhereBuilder.build(:sessions, query))

on_ee do
q = Plausible.Stats.Sampling.add_query_hint(q, query)
Expand Down
2 changes: 1 addition & 1 deletion lib/plausible/stats/clickhouse.ex
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ defmodule Plausible.Stats.Clickhouse do
def top_sources_for_spike(site, query, limit, page) do
offset = (page - 1) * limit

{first_datetime, last_datetime} = Plausible.Stats.Time.utc_boundaries(query, site)
{first_datetime, last_datetime} = Plausible.Stats.Time.utc_boundaries(query)

referrers =
from(s in "sessions_v2",
Expand Down
66 changes: 51 additions & 15 deletions lib/plausible/stats/filters/filters.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ defmodule Plausible.Stats.Filters do
A module for parsing filters used in stat queries.
"""

alias Plausible.Stats.Query
alias Plausible.Stats.Filters.QueryParser
alias Plausible.Stats.Filters.StatsAPIFilterParser

Expand Down Expand Up @@ -89,15 +90,45 @@ defmodule Plausible.Stats.Filters do

def dimensions_used_in_filters(filters, opts \\ []) do
min_depth = Keyword.get(opts, :min_depth, 0)
max_depth = Keyword.get(opts, :max_depth, 999)
# :ignore or :only
behavioral_filter_option = Keyword.get(opts, :behavioral_filters, nil)

filters
|> traverse()
|> Enum.filter(fn {_filter, depth} -> depth >= min_depth end)
|> traverse(
{0, false},
fn {depth, is_behavioral_filter}, operator ->
{depth + 1, is_behavioral_filter or operator in [:has_done, :has_not_done]}
end
)
|> Enum.filter(fn {_filter, {depth, is_behavioral_filter}} ->
matches_behavioral_filter_option? =
case behavioral_filter_option do
:ignore -> not is_behavioral_filter
:only -> is_behavioral_filter
_ -> true
end

depth >= min_depth and depth <= max_depth and matches_behavioral_filter_option?
end)
|> Enum.map(fn {[_operator, dimension | _rest], _depth} -> dimension end)
end

def filtering_on_dimension?(query, dimension) do
dimension in dimensions_used_in_filters(query.filters)
def filtering_on_dimension?(query, dimension, opts \\ []) do
filters =
case query do
%Query{filters: filters} -> filters
%{filters: filters} -> filters
filters when is_list(filters) -> filters
end

dimension in dimensions_used_in_filters(filters, opts)
end

def all_leaf_filters(filters) do
filters
|> traverse(nil, fn _, _ -> nil end)
|> Enum.map(fn {filter, _} -> filter end)
end

@doc """
Expand Down Expand Up @@ -144,12 +175,13 @@ defmodule Plausible.Stats.Filters do
defp transform_tree(filter, transformer) do
case {transformer.(filter), filter} do
# Transformer did not return that value - transform that subtree
{nil, [operation, child_filter]} when operation in [:not, :ignore_in_totals_query] ->
{nil, [operator, child_filter]}
when operator in [:not, :ignore_in_totals_query, :has_done, :has_not_done] ->
[transformed_child] = transform_tree(child_filter, transformer)
[[operation, transformed_child]]
[[operator, transformed_child]]

{nil, [operation, filters]} when operation in [:and, :or] ->
[[operation, transform_filters(filters, transformer)]]
{nil, [operator, filters]} when operator in [:and, :or] ->
[[operator, transform_filters(filters, transformer)]]

# Reached a leaf node, return existing value
{nil, filter} ->
Expand All @@ -161,22 +193,26 @@ defmodule Plausible.Stats.Filters do
end
end

def traverse(filters, depth \\ -1) do
@doc """
Traverses a filter tree while accumulating state.
"""
def traverse(filters, state \\ nil, state_transformer \\ fn state, _ -> state end) do
filters
|> Enum.flat_map(&traverse_tree(&1, depth + 1))
|> Enum.flat_map(&traverse_tree(&1, state, state_transformer))
end

defp traverse_tree(filter, depth) do
defp traverse_tree(filter, state, state_transformer) do
case filter do
[operation, child_filter] when operation in [:not, :ignore_in_totals_query] ->
traverse_tree(child_filter, depth + 1)
[operation, child_filter]
when operation in [:not, :ignore_in_totals_query, :has_done, :has_not_done] ->
traverse_tree(child_filter, state_transformer.(state, operation), state_transformer)

[operation, filters] when operation in [:and, :or] ->
traverse(filters, depth + 1)
traverse(filters, state_transformer.(state, operation), state_transformer)

# Leaf node
_ ->
[{filter, depth}]
[{filter, state}]
end
end
end
Loading
Loading