Skip to content

Commit

Permalink
Average Scroll Depth Metric: imported data (#4915)
Browse files Browse the repository at this point in the history
* include scroll_depth in full pages export

* import scroll_depth from CSV

* query scroll depth from imported data

* fix ordering by scroll depth with imported data

* fix imported scroll depth query + more tests

* enable scroll depth in top stats with imported data

* add main graph test

* fix test and native scroll depth sum select

* Update lib/plausible/exports.ex

Co-authored-by: ruslandoga <[email protected]>

* adjust test

* adjust test to catch error

* export/import/count pageleave_visitors

* extract base_q in export_pages_q

* rename total_visitors to pageleave_visitors

---------

Co-authored-by: ruslandoga <[email protected]>
  • Loading branch information
RobertJoonas and ruslandoga authored Jan 7, 2025
1 parent 7b05cb2 commit 3473910
Show file tree
Hide file tree
Showing 16 changed files with 648 additions and 47 deletions.
88 changes: 75 additions & 13 deletions lib/plausible/exports.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ defmodule Plausible.Exports do
import Ecto.Query

@doc "Schedules CSV export job to S3 storage"
@spec schedule_s3_export(pos_integer, String.t()) :: {:ok, Oban.Job.t()} | {:error, :no_data}
def schedule_s3_export(site_id, email_to) do
@spec schedule_s3_export(pos_integer, pos_integer | nil, String.t()) ::
{:ok, Oban.Job.t()} | {:error, :no_data}
def schedule_s3_export(site_id, current_user_id, email_to) do
with :ok <- ensure_has_data(site_id) do
args = %{
"storage" => "s3",
"site_id" => site_id,
"email_to" => email_to,
"current_user_id" => current_user_id,
"s3_bucket" => Plausible.S3.exports_bucket(),
"s3_path" => s3_export_key(site_id)
}
Expand Down Expand Up @@ -207,13 +209,13 @@ defmodule Plausible.Exports do
Builds Ecto queries to export data from `events_v2` and `sessions_v2`
tables into the format of `imported_*` tables for a website.
"""
@spec export_queries(pos_integer,
@spec export_queries(pos_integer, pos_integer | nil,
extname: String.t(),
date_range: Date.Range.t(),
timezone: String.t()
) ::
%{String.t() => Ecto.Query.t()}
def export_queries(site_id, opts \\ []) do
def export_queries(site_id, current_user_id, opts \\ []) do
extname = opts[:extname] || ".csv"
date_range = opts[:date_range]
timezone = opts[:timezone] || "UTC"
Expand All @@ -232,7 +234,8 @@ defmodule Plausible.Exports do
%{
filename.("imported_visitors") => export_visitors_q(site_id, timezone, date_range),
filename.("imported_sources") => export_sources_q(site_id, timezone, date_range),
filename.("imported_pages") => export_pages_q(site_id, timezone, date_range),
filename.("imported_pages") =>
export_pages_q(site_id, current_user_id, timezone, date_range),
filename.("imported_entry_pages") => export_entry_pages_q(site_id, timezone, date_range),
filename.("imported_exit_pages") => export_exit_pages_q(site_id, timezone, date_range),
filename.("imported_custom_events") =>
Expand Down Expand Up @@ -411,13 +414,71 @@ defmodule Plausible.Exports do
]
end

defp export_pages_q(site_id, timezone, date_range) do
from e in sampled("events_v2"),
where: ^export_filter(site_id, date_range),
where: [name: "pageview"],
group_by: [selected_as(:date), e.pathname],
order_by: selected_as(:date),
select: [
defp export_pages_q(site_id, current_user_id, timezone, date_range) do
site = Plausible.Repo.get(Plausible.Site, site_id)
current_user = current_user_id && Plausible.Repo.get(Plausible.Auth.User, current_user_id)

scroll_depth_enabled? =
PlausibleWeb.Api.StatsController.scroll_depth_enabled?(site, current_user)

base_q =
from(e in sampled("events_v2"),
where: ^export_filter(site_id, date_range),
where: [name: "pageview"],
group_by: [selected_as(:date), selected_as(:page)],
order_by: selected_as(:date)
)

if scroll_depth_enabled? do
max_scroll_depth_per_visitor_q =
from(e in "events_v2",
where: ^export_filter(site_id, date_range),
where: e.name == "pageleave" and e.scroll_depth <= 100,
select: %{
date: date(e.timestamp, ^timezone),
page: selected_as(e.pathname, :page),
user_id: e.user_id,
max_scroll_depth: max(e.scroll_depth)
},
group_by: [e.user_id, selected_as(:date), selected_as(:page)]
)

scroll_depth_q =
from(p in subquery(max_scroll_depth_per_visitor_q),
select: %{
date: p.date,
page: p.page,
scroll_depth:
fragment(
"if(isNull(sum(?)), NULL, toUInt64(sum(?)))",
p.max_scroll_depth,
p.max_scroll_depth
),
pageleave_visitors: count(p.user_id)
},
group_by: [:date, :page]
)

from(e in base_q,
left_join: s in subquery(scroll_depth_q),
on: s.date == selected_as(:date) and s.page == selected_as(:page),
select: [
date(e.timestamp, ^timezone),
selected_as(fragment("any(?)", e.hostname), :hostname),
selected_as(e.pathname, :page),
selected_as(
fragment("toUInt64(round(uniq(?)*any(_sample_factor)))", e.session_id),
:visits
),
visitors(e),
selected_as(fragment("toUInt64(round(count()*any(_sample_factor)))"), :pageviews),
selected_as(fragment("any(?)", s.scroll_depth), :scroll_depth),
selected_as(fragment("any(?)", s.pageleave_visitors), :pageleave_visitors)
]
)
else
base_q
|> select([e], [
date(e.timestamp, ^timezone),
selected_as(fragment("any(?)", e.hostname), :hostname),
selected_as(e.pathname, :page),
Expand All @@ -427,7 +488,8 @@ defmodule Plausible.Exports do
),
visitors(e),
selected_as(fragment("toUInt64(round(count()*any(_sample_factor)))"), :pageviews)
]
])
end
end

defp export_entry_pages_q(site_id, timezone, date_range) do
Expand Down
2 changes: 1 addition & 1 deletion lib/plausible/imported/csv_importer.ex
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ defmodule Plausible.Imported.CSVImporter do
"imported_operating_systems" =>
"date Date, operating_system String, operating_system_version String, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32, pageviews UInt64",
"imported_pages" =>
"date Date, hostname String, page String, visits UInt64, visitors UInt64, pageviews UInt64",
"date Date, hostname String, page String, visits UInt64, visitors UInt64, pageviews UInt64, scroll_depth Nullable(UInt64), pageleave_visitors UInt64",
"imported_sources" =>
"date Date, source String, referrer String, utm_source String, utm_medium String, utm_campaign String, utm_content String, utm_term String, pageviews UInt64, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32",
"imported_visitors" =>
Expand Down
2 changes: 2 additions & 0 deletions lib/plausible/imported/page.ex
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,7 @@ defmodule Plausible.Imported.Page do
field :pageviews, Ch, type: "UInt64"
field :exits, Ch, type: "UInt64"
field :time_on_page, Ch, type: "UInt64"
field :scroll_depth, Ch, type: "Nullable(UInt64)"
field :pageleave_visitors, Ch, type: "UInt64"
end
end
1 change: 1 addition & 0 deletions lib/plausible/stats/imported/imported.ex
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ defmodule Plausible.Stats.Imported do

defp can_order_by?(query) do
Enum.all?(query.order_by, fn
{:scroll_depth, _} -> false
{metric, _direction} when is_atom(metric) -> metric in query.metrics
_ -> true
end)
Expand Down
20 changes: 20 additions & 0 deletions lib/plausible/stats/imported/sql/expression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,13 @@ defmodule Plausible.Stats.Imported.SQL.Expression do
wrap_alias([i], %{pageviews: sum(i.pageviews), __internal_visits: sum(i.visits)})
end

defp select_metric(:scroll_depth, "imported_pages") do
wrap_alias([i], %{
scroll_depth_sum: sum(i.scroll_depth),
pageleave_visitors: sum(i.pageleave_visitors)
})
end

defp select_metric(_metric, _table), do: %{}

def group_imported_by(q, query) do
Expand Down Expand Up @@ -351,6 +358,19 @@ defmodule Plausible.Stats.Imported.SQL.Expression do
|> select_joined_metrics(rest)
end

# The final `scroll_depth` gets selected at a later querybuilding step
# (in `Plausible.Stats.SQL.SpecialMetrics.add/3`). But in order to avoid
# having to join with imported data there again, we select the required
# information from imported data here already.
def select_joined_metrics(q, [:scroll_depth | rest]) do
q
|> select_merge_as([s, i], %{
__internal_scroll_depth_sum: i.scroll_depth_sum,
__internal_pageleave_visitors: i.pageleave_visitors
})
|> select_joined_metrics(rest)
end

# Ignored as it's calculated separately
def select_joined_metrics(q, [metric | rest])
when metric in [:conversion_rate, :group_conversion_rate, :percentage] do
Expand Down
63 changes: 53 additions & 10 deletions lib/plausible/stats/sql/special_metrics.ex
Original file line number Diff line number Diff line change
Expand Up @@ -150,15 +150,12 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do
dim_shortnames
|> Enum.map(fn dim -> dynamic([p], field(p, ^dim)) end)

scroll_depth_q =
scroll_depth_sum_q =
subquery(max_per_visitor_q)
|> select([p], %{
scroll_depth:
fragment(
"if(isFinite(avg(?)), toUInt8(round(avg(?))), NULL)",
p.max_scroll_depth,
p.max_scroll_depth
)
scroll_depth_sum:
fragment("if(count(?) = 0, NULL, sum(?))", p.user_id, p.max_scroll_depth),
pageleave_visitors: fragment("count(?)", p.user_id)
})
|> select_merge(^dim_select)
|> group_by(^dim_group_by)
Expand All @@ -173,9 +170,55 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do
|> Enum.reduce(fn condition, acc -> dynamic([], ^acc and ^condition) end)
end

q
|> join(:left, [e], s in subquery(scroll_depth_q), on: ^join_on_dim_condition)
|> select_merge_as([_e, ..., s], %{scroll_depth: fragment("any(?)", s.scroll_depth)})
joined_q =
join(q, :left, [e], s in subquery(scroll_depth_sum_q), on: ^join_on_dim_condition)

if query.include_imported do
joined_q
|> select_merge_as([..., s], %{
scroll_depth:
fragment(
"""
case
when isNotNull(?) AND isNotNull(?) then
toUInt8(round((? + ?) / (? + ?)))
when isNotNull(?) then
toUInt8(round(? / ?))
when isNotNull(?) then
toUInt8(round(? / ?))
else
NULL
end
""",
# Case 1: Both imported and native scroll depth sums are present
selected_as(:__internal_scroll_depth_sum),
s.scroll_depth_sum,
selected_as(:__internal_scroll_depth_sum),
s.scroll_depth_sum,
selected_as(:__internal_pageleave_visitors),
s.pageleave_visitors,
# Case 2: Only imported scroll depth sum is present
selected_as(:__internal_scroll_depth_sum),
selected_as(:__internal_scroll_depth_sum),
selected_as(:__internal_pageleave_visitors),
# Case 3: Only native scroll depth sum is present
s.scroll_depth_sum,
s.scroll_depth_sum,
s.pageleave_visitors
)
})
else
joined_q
|> select_merge_as([..., s], %{
scroll_depth:
fragment(
"if(any(?) > 0, toUInt8(round(any(?) / any(?))), NULL)",
s.pageleave_visitors,
s.scroll_depth_sum,
s.pageleave_visitors
)
})
end
else
q
end
Expand Down
9 changes: 5 additions & 4 deletions lib/plausible_web/controllers/api/stats_controller.ex
Original file line number Diff line number Diff line change
Expand Up @@ -393,15 +393,16 @@ defmodule PlausibleWeb.Api.StatsController do

defp fetch_other_top_stats(site, query, current_user) do
page_filter? = Filters.filtering_on_dimension?(query, "event:page")
scroll_depth_enabled? = scroll_depth_enabled?(site, current_user)

metrics = [:visitors, :visits, :pageviews, :sample_percent]

metrics =
cond do
page_filter? && query.include_imported ->
metrics
page_filter? && scroll_depth_enabled? && query.include_imported ->
metrics ++ [:scroll_depth]

page_filter? && scroll_depth_enabled?(site, current_user) ->
page_filter? && scroll_depth_enabled? ->
metrics ++ [:bounce_rate, :scroll_depth, :time_on_page]

page_filter? ->
Expand Down Expand Up @@ -831,7 +832,7 @@ defmodule PlausibleWeb.Api.StatsController do
params = Map.put(params, "property", "event:page")
query = Query.from(site, params, debug_metadata(conn))

include_scroll_depth? = !query.include_imported && scroll_depth_enabled?(site, current_user)
include_scroll_depth? = scroll_depth_enabled?(site, current_user)

extra_metrics =
cond do
Expand Down
5 changes: 3 additions & 2 deletions lib/plausible_web/live/csv_export.ex
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,12 @@ defmodule PlausibleWeb.Live.CSVExport do

@impl true
def handle_event("export", _params, socket) do
%{storage: storage, site_id: site_id, email_to: email_to} = socket.assigns
%{storage: storage, site_id: site_id, email_to: email_to, current_user: current_user} =
socket.assigns

schedule_result =
case storage do
"s3" -> Exports.schedule_s3_export(site_id, email_to)
"s3" -> Exports.schedule_s3_export(site_id, current_user.id, email_to)
"local" -> Exports.schedule_local_export(site_id, email_to)
end

Expand Down
4 changes: 3 additions & 1 deletion lib/workers/export_analytics.ex
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ defmodule Plausible.Workers.ExportAnalytics do
"site_id" => site_id
} = args

current_user_id = args["current_user_id"]

site = Plausible.Repo.get!(Plausible.Site, site_id)
%Date.Range{} = date_range = Exports.date_range(site.id, site.timezone)

queries =
Exports.export_queries(site_id,
Exports.export_queries(site_id, current_user_id,
date_range: date_range,
timezone: site.timezone,
extname: ".csv"
Expand Down
9 changes: 3 additions & 6 deletions test/plausible/exports_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ defmodule Plausible.ExportsTest do

describe "export_queries/2" do
test "returns named ecto queries" do
queries = Plausible.Exports.export_queries(_site_id = 1)
queries = Plausible.Exports.export_queries(_site_id = 1, nil)
assert queries |> Map.values() |> Enum.all?(&match?(%Ecto.Query{}, &1))

assert Map.keys(queries) == [
Expand All @@ -26,7 +26,7 @@ defmodule Plausible.ExportsTest do

test "with date range" do
queries =
Plausible.Exports.export_queries(_site_id = 1,
Plausible.Exports.export_queries(_site_id = 1, nil,
date_range: Date.range(~D[2023-01-01], ~D[2024-03-12])
)

Expand All @@ -45,10 +45,7 @@ defmodule Plausible.ExportsTest do
end

test "with custom extension" do
queries =
Plausible.Exports.export_queries(_site_id = 1,
extname: ".ch"
)
queries = Plausible.Exports.export_queries(_site_id = 1, nil, extname: ".ch")

assert Map.keys(queries) == [
"imported_browsers.ch",
Expand Down
Loading

0 comments on commit 3473910

Please sign in to comment.