From 985e9ac5120bc1a7ad08148b6fddf6d52bd68192 Mon Sep 17 00:00:00 2001 From: Robert Joonas Date: Mon, 16 Dec 2024 14:27:18 +0100 Subject: [PATCH 01/14] include scroll_depth in full pages export --- lib/plausible/exports.ex | 104 ++++++++++++++---- lib/plausible_web/live/csv_export.ex | 5 +- lib/workers/export_analytics.ex | 4 +- test/plausible/exports_test.exs | 9 +- test/plausible/imported/csv_importer_test.exs | 96 +++++++++++++++- 5 files changed, 186 insertions(+), 32 deletions(-) diff --git a/lib/plausible/exports.ex b/lib/plausible/exports.ex index 15019443756d..c58d599540d3 100644 --- a/lib/plausible/exports.ex +++ b/lib/plausible/exports.ex @@ -8,13 +8,15 @@ defmodule Plausible.Exports do import Ecto.Query @doc "Schedules CSV export job to S3 storage" - @spec schedule_s3_export(pos_integer, String.t()) :: {:ok, Oban.Job.t()} | {:error, :no_data} - def schedule_s3_export(site_id, email_to) do + @spec schedule_s3_export(pos_integer, pos_integer | nil, String.t()) :: + {:ok, Oban.Job.t()} | {:error, :no_data} + def schedule_s3_export(site_id, current_user_id, email_to) do with :ok <- ensure_has_data(site_id) do args = %{ "storage" => "s3", "site_id" => site_id, "email_to" => email_to, + "current_user_id" => current_user_id, "s3_bucket" => Plausible.S3.exports_bucket(), "s3_path" => s3_export_key(site_id) } @@ -207,13 +209,13 @@ defmodule Plausible.Exports do Builds Ecto queries to export data from `events_v2` and `sessions_v2` tables into the format of `imported_*` tables for a website. """ - @spec export_queries(pos_integer, + @spec export_queries(pos_integer, pos_integer | nil, extname: String.t(), date_range: Date.Range.t(), timezone: String.t() ) :: %{String.t() => Ecto.Query.t()} - def export_queries(site_id, opts \\ []) do + def export_queries(site_id, current_user_id, opts \\ []) do extname = opts[:extname] || ".csv" date_range = opts[:date_range] timezone = opts[:timezone] || "UTC" @@ -232,7 +234,8 @@ defmodule Plausible.Exports do %{ filename.("imported_visitors") => export_visitors_q(site_id, timezone, date_range), filename.("imported_sources") => export_sources_q(site_id, timezone, date_range), - filename.("imported_pages") => export_pages_q(site_id, timezone, date_range), + filename.("imported_pages") => + export_pages_q(site_id, current_user_id, timezone, date_range), filename.("imported_entry_pages") => export_entry_pages_q(site_id, timezone, date_range), filename.("imported_exit_pages") => export_exit_pages_q(site_id, timezone, date_range), filename.("imported_custom_events") => @@ -411,23 +414,80 @@ defmodule Plausible.Exports do ] end - defp export_pages_q(site_id, timezone, date_range) do - from e in sampled("events_v2"), - where: ^export_filter(site_id, date_range), - where: [name: "pageview"], - group_by: [selected_as(:date), e.pathname], - order_by: selected_as(:date), - select: [ - date(e.timestamp, ^timezone), - selected_as(fragment("any(?)", e.hostname), :hostname), - selected_as(e.pathname, :page), - selected_as( - fragment("toUInt64(round(uniq(?)*any(_sample_factor)))", e.session_id), - :visits - ), - visitors(e), - selected_as(fragment("toUInt64(round(count()*any(_sample_factor)))"), :pageviews) - ] + defp export_pages_q(site_id, current_user_id, timezone, date_range) do + site = Plausible.Repo.get(Plausible.Site, site_id) + current_user = current_user_id && Plausible.Repo.get(Plausible.Auth.User, current_user_id) + + scroll_depth_enabled? = + PlausibleWeb.Api.StatsController.scroll_depth_enabled?(site, current_user) + + if scroll_depth_enabled? do + max_scroll_depth_per_visitor_q = + from(e in "events_v2", + where: ^export_filter(site_id, date_range), + where: e.name == "pageleave" and e.scroll_depth <= 100, + select: %{ + date: date(e.timestamp, ^timezone), + page: selected_as(e.pathname, :page), + user_id: e.user_id, + max_scroll_depth: max(e.scroll_depth) + }, + group_by: [e.user_id, selected_as(:date), selected_as(:page)] + ) + + scroll_depth_q = + from(p in subquery(max_scroll_depth_per_visitor_q), + select: %{ + date: p.date, + page: p.page, + scroll_depth: + fragment( + "if(isNull(sum(?)), NULL, toInt64(sum(?)))", + p.max_scroll_depth, + p.max_scroll_depth + ) + }, + group_by: [:date, :page] + ) + + from(e in sampled("events_v2"), + where: ^export_filter(site_id, date_range), + where: [name: "pageview"], + left_join: s in subquery(scroll_depth_q), + on: s.date == selected_as(:date) and s.page == selected_as(:page), + select: [ + date(e.timestamp, ^timezone), + selected_as(fragment("any(?)", e.hostname), :hostname), + selected_as(e.pathname, :page), + selected_as( + fragment("toUInt64(round(uniq(?)*any(_sample_factor)))", e.session_id), + :visits + ), + visitors(e), + selected_as(fragment("toUInt64(round(count()*any(_sample_factor)))"), :pageviews), + selected_as(fragment("any(?)", s.scroll_depth), :scroll_depth) + ] + ) + |> group_by([e], [selected_as(:date), selected_as(:page)]) + |> order_by([e], selected_as(:date)) + else + from e in sampled("events_v2"), + where: ^export_filter(site_id, date_range), + where: [name: "pageview"], + group_by: [selected_as(:date), e.pathname], + order_by: selected_as(:date), + select: [ + date(e.timestamp, ^timezone), + selected_as(fragment("any(?)", e.hostname), :hostname), + selected_as(e.pathname, :page), + selected_as( + fragment("toUInt64(round(uniq(?)*any(_sample_factor)))", e.session_id), + :visits + ), + visitors(e), + selected_as(fragment("toUInt64(round(count()*any(_sample_factor)))"), :pageviews) + ] + end end defp export_entry_pages_q(site_id, timezone, date_range) do diff --git a/lib/plausible_web/live/csv_export.ex b/lib/plausible_web/live/csv_export.ex index f76741dc7412..bdf2b1c6e402 100644 --- a/lib/plausible_web/live/csv_export.ex +++ b/lib/plausible_web/live/csv_export.ex @@ -210,11 +210,12 @@ defmodule PlausibleWeb.Live.CSVExport do @impl true def handle_event("export", _params, socket) do - %{storage: storage, site_id: site_id, email_to: email_to} = socket.assigns + %{storage: storage, site_id: site_id, email_to: email_to, current_user: current_user} = + socket.assigns schedule_result = case storage do - "s3" -> Exports.schedule_s3_export(site_id, email_to) + "s3" -> Exports.schedule_s3_export(site_id, current_user.id, email_to) "local" -> Exports.schedule_local_export(site_id, email_to) end diff --git a/lib/workers/export_analytics.ex b/lib/workers/export_analytics.ex index 51a1c0af06cb..d5da9f05d0d2 100644 --- a/lib/workers/export_analytics.ex +++ b/lib/workers/export_analytics.ex @@ -29,11 +29,13 @@ defmodule Plausible.Workers.ExportAnalytics do "site_id" => site_id } = args + current_user_id = args["current_user_id"] + site = Plausible.Repo.get!(Plausible.Site, site_id) %Date.Range{} = date_range = Exports.date_range(site.id, site.timezone) queries = - Exports.export_queries(site_id, + Exports.export_queries(site_id, current_user_id, date_range: date_range, timezone: site.timezone, extname: ".csv" diff --git a/test/plausible/exports_test.exs b/test/plausible/exports_test.exs index eebc8d1136a4..9c320235ff54 100644 --- a/test/plausible/exports_test.exs +++ b/test/plausible/exports_test.exs @@ -7,7 +7,7 @@ defmodule Plausible.ExportsTest do describe "export_queries/2" do test "returns named ecto queries" do - queries = Plausible.Exports.export_queries(_site_id = 1) + queries = Plausible.Exports.export_queries(_site_id = 1, nil) assert queries |> Map.values() |> Enum.all?(&match?(%Ecto.Query{}, &1)) assert Map.keys(queries) == [ @@ -26,7 +26,7 @@ defmodule Plausible.ExportsTest do test "with date range" do queries = - Plausible.Exports.export_queries(_site_id = 1, + Plausible.Exports.export_queries(_site_id = 1, nil, date_range: Date.range(~D[2023-01-01], ~D[2024-03-12]) ) @@ -45,10 +45,7 @@ defmodule Plausible.ExportsTest do end test "with custom extension" do - queries = - Plausible.Exports.export_queries(_site_id = 1, - extname: ".ch" - ) + queries = Plausible.Exports.export_queries(_site_id = 1, nil, extname: ".ch") assert Map.keys(queries) == [ "imported_browsers.ch", diff --git a/test/plausible/imported/csv_importer_test.exs b/test/plausible/imported/csv_importer_test.exs index ce06ff8daca3..d48920638984 100644 --- a/test/plausible/imported/csv_importer_test.exs +++ b/test/plausible/imported/csv_importer_test.exs @@ -524,7 +524,8 @@ defmodule Plausible.Imported.CSVImporterTest do # export archive to s3 on_ee do - assert {:ok, _job} = Plausible.Exports.schedule_s3_export(exported_site.id, user.email) + assert {:ok, _job} = + Plausible.Exports.schedule_s3_export(exported_site.id, nil, user.email) else assert {:ok, %{args: %{"local_path" => local_path}}} = Plausible.Exports.schedule_local_export(exported_site.id, user.email) @@ -1013,6 +1014,99 @@ defmodule Plausible.Imported.CSVImporterTest do assert exported["conversion_rate"] == imported["conversion_rate"] end) end + + @tag :tmp_dir + test "scroll_depth", %{user: user, tmp_dir: tmp_dir} do + exported_site = new_site(owner: user) + imported_site = new_site(owner: user) + + t0 = ~N[2020-01-01 00:00:00] + [t1, t2, t3] = for i <- 1..3, do: NaiveDateTime.add(t0, i, :minute) + + stats = + [ + build(:pageview, user_id: 12, pathname: "/blog", timestamp: t0), + build(:pageleave, user_id: 12, pathname: "/blog", timestamp: t1, scroll_depth: 20), + build(:pageview, user_id: 12, pathname: "/another", timestamp: t1), + build(:pageleave, user_id: 12, pathname: "/another", timestamp: t2, scroll_depth: 24), + build(:pageview, user_id: 34, pathname: "/blog", timestamp: t0), + build(:pageleave, user_id: 34, pathname: "/blog", timestamp: t1, scroll_depth: 17), + build(:pageview, user_id: 34, pathname: "/another", timestamp: t1), + build(:pageleave, user_id: 34, pathname: "/another", timestamp: t2, scroll_depth: 26), + build(:pageview, user_id: 34, pathname: "/blog", timestamp: t2), + build(:pageleave, user_id: 34, pathname: "/blog", timestamp: t3, scroll_depth: 60), + build(:pageview, user_id: 56, pathname: "/blog", timestamp: t0), + build(:pageleave, user_id: 56, pathname: "/blog", timestamp: t1, scroll_depth: 100), + build(:pageview, pathname: "/blog", timestamp: NaiveDateTime.add(t0, 1, :day)) + ] + |> Enum.map(fn event -> Map.put(event, :hostname, "csv.test") end) + + populate_stats(exported_site, stats) + + # export archive to s3 + on_ee do + assert {:ok, _job} = + Plausible.Exports.schedule_s3_export(exported_site.id, nil, user.email) + else + assert {:ok, %{args: %{"local_path" => local_path}}} = + Plausible.Exports.schedule_local_export(exported_site.id, user.email) + end + + assert %{success: 1} = Oban.drain_queue(queue: :analytics_exports, with_safety: false) + + # download archive + on_ee do + ExAws.request!( + ExAws.S3.download_file( + Plausible.S3.exports_bucket(), + to_string(exported_site.id), + Path.join(tmp_dir, "plausible-export.zip") + ) + ) + else + File.rename!(local_path, Path.join(tmp_dir, "plausible-export.zip")) + end + + # unzip archive + {:ok, files} = + :zip.unzip(to_charlist(Path.join(tmp_dir, "plausible-export.zip")), cwd: tmp_dir) + + # upload csvs + uploads = + Enum.map(files, fn file -> + on_ee do + %{s3_url: s3_url} = Plausible.S3.import_presign_upload(imported_site.id, file) + [bucket, key] = String.split(URI.parse(s3_url).path, "/", parts: 2) + content = File.read!(file) + ExAws.request!(ExAws.S3.put_object(bucket, key, content)) + %{"filename" => Path.basename(file), "s3_url" => s3_url, "content" => content} + else + %{ + "filename" => Path.basename(file), + "local_path" => file, + "content" => File.read!(file) + } + end + end) + + # assert the intermediary imported_pages.csv file + pages = + uploads + |> Enum.find(&String.contains?(&1["filename"], "imported_pages")) + |> Map.get("content") + |> NimbleCSV.RFC4180.parse_string(skip_headers: false) + + assert pages == [ + ["date", "hostname", "page", "visits", "visitors", "pageviews", "scroll_depth"], + ["2020-01-01", "csv.test", "/another", "2", "2", "2", "50"], + ["2020-01-01", "csv.test", "/blog", "3", "3", "4", "180"], + ["2020-01-02", "csv.test", "/blog", "1", "1", "1", "\\N"] + ] + + # TODO: Import from these CSV files and compare scroll depth + # between imported_site and exported_site + _imported_site = imported_site + end end defp clean_buckets(_context) do From 91233e02d46dcff6104444215135e2ce878df778 Mon Sep 17 00:00:00 2001 From: Robert Joonas Date: Tue, 17 Dec 2024 14:22:42 +0100 Subject: [PATCH 02/14] import scroll_depth from CSV --- lib/plausible/imported/csv_importer.ex | 2 +- lib/plausible/imported/page.ex | 1 + test/plausible/imported/csv_importer_test.exs | 107 +++++++++++++++++- 3 files changed, 106 insertions(+), 4 deletions(-) diff --git a/lib/plausible/imported/csv_importer.ex b/lib/plausible/imported/csv_importer.ex index ff763e304b8f..fee6188fe8fe 100644 --- a/lib/plausible/imported/csv_importer.ex +++ b/lib/plausible/imported/csv_importer.ex @@ -161,7 +161,7 @@ defmodule Plausible.Imported.CSVImporter do "imported_operating_systems" => "date Date, operating_system String, operating_system_version String, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32, pageviews UInt64", "imported_pages" => - "date Date, hostname String, page String, visits UInt64, visitors UInt64, pageviews UInt64", + "date Date, hostname String, page String, visits UInt64, visitors UInt64, pageviews UInt64, scroll_depth Nullable(UInt64)", "imported_sources" => "date Date, source String, referrer String, utm_source String, utm_medium String, utm_campaign String, utm_content String, utm_term String, pageviews UInt64, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32", "imported_visitors" => diff --git a/lib/plausible/imported/page.ex b/lib/plausible/imported/page.ex index 4720bef4f4cc..6b093e5c0bc3 100644 --- a/lib/plausible/imported/page.ex +++ b/lib/plausible/imported/page.ex @@ -15,5 +15,6 @@ defmodule Plausible.Imported.Page do field :pageviews, Ch, type: "UInt64" field :exits, Ch, type: "UInt64" field :time_on_page, Ch, type: "UInt64" + field :scroll_depth, Ch, type: "Nullable(UInt64)" end end diff --git a/test/plausible/imported/csv_importer_test.exs b/test/plausible/imported/csv_importer_test.exs index d48920638984..c72c8523ae17 100644 --- a/test/plausible/imported/csv_importer_test.exs +++ b/test/plausible/imported/csv_importer_test.exs @@ -363,6 +363,71 @@ defmodule Plausible.Imported.CSVImporterTest do assert Plausible.Stats.Clickhouse.imported_pageview_count(site) == 99 end + test "imports scroll_depth as null when the column does not exist in pages CSV", + %{site: site, user: user} = ctx do + _ = ctx + + pages_csv = + %{ + name: "imported_pages_20211230_20220101.csv", + body: """ + "date","visitors","pageviews","hostname","page" + "2021-12-30",1,1,"lucky.numbers.com","/14776416252794997127" + "2021-12-30",1,1,"lucky.numbers.com","/14776416252794997127" + "2021-12-30",6,6,"lucky.numbers.com","/14776416252794997127" + "2021-12-30",1,1,"lucky.numbers.com","/9102354072466236765" + "2021-12-30",1,1,"lucky.numbers.com","/7478911940502018071" + "2021-12-30",1,1,"lucky.numbers.com","/6402607186523575652" + "2021-12-30",2,2,"lucky.numbers.com","/9962503789684934900" + "2021-12-30",8,10,"lucky.numbers.com","/13595620304963848161" + "2021-12-30",2,2,"lucky.numbers.com","/17019199732013993436" + "2021-12-30",32,33,"lucky.numbers.com","/9874837495456455794" + "2021-12-31",4,4,"lucky.numbers.com","/14776416252794997127" + "2021-12-31",1,1,"lucky.numbers.com","/8738789417178304429" + "2021-12-31",1,1,"lucky.numbers.com","/7445073500314667742" + "2021-12-31",1,1,"lucky.numbers.com","/4897404798407749335" + "2021-12-31",1,2,"lucky.numbers.com","/11263893625781431659" + "2022-01-01",2,2,"lucky.numbers.com","/5878724061840196349" + """ + } + + uploads = + on_ee do + %{s3_url: s3_url} = Plausible.S3.import_presign_upload(site.id, pages_csv.name) + [bucket, key] = String.split(URI.parse(s3_url).path, "/", parts: 2) + ExAws.request!(ExAws.S3.put_object(bucket, key, pages_csv.body)) + %{"filename" => pages_csv.name, "s3_url" => s3_url} + else + local_path = Path.join(ctx.tmp_dir, pages_csv.name) + File.write!(local_path, pages_csv.body) + %{"filename" => pages_csv.name, "local_path" => local_path} + end + |> List.wrap() + + date_range = CSVImporter.date_range(uploads) + + {:ok, _job} = + CSVImporter.new_import(site, user, + start_date: date_range.first, + end_date: date_range.last, + uploads: uploads, + storage: on_ee(do: "s3", else: "local") + ) + + assert %{success: 1} = Oban.drain_queue(queue: :analytics_imports, with_safety?: false) + + assert %SiteImport{ + start_date: ~D[2021-12-30], + end_date: ~D[2022-01-01], + source: :csv, + status: :completed + } = Repo.get_by!(SiteImport, site_id: site.id) + + q = from(i in "imported_pages", where: i.site_id == ^site.id, select: i.scroll_depth) + + assert List.duplicate(nil, 16) == Plausible.IngestRepo.all(q) + end + test "accepts cells without quotes", %{site: site, user: user} = ctx do _ = ctx @@ -1103,9 +1168,45 @@ defmodule Plausible.Imported.CSVImporterTest do ["2020-01-02", "csv.test", "/blog", "1", "1", "1", "\\N"] ] - # TODO: Import from these CSV files and compare scroll depth - # between imported_site and exported_site - _imported_site = imported_site + # run importer + date_range = CSVImporter.date_range(uploads) + + {:ok, _job} = + CSVImporter.new_import(imported_site, user, + start_date: date_range.first, + end_date: date_range.last, + uploads: uploads, + storage: on_ee(do: "s3", else: "local") + ) + + assert %{success: 1} = Oban.drain_queue(queue: :analytics_imports, with_safety: false) + + # validate import + assert %SiteImport{ + start_date: ~D[2020-01-01], + end_date: ~D[2020-01-02], + source: :csv, + status: :completed + } = Repo.get_by!(SiteImport, site_id: imported_site.id) + + assert Plausible.Stats.Clickhouse.imported_pageview_count(exported_site) == 0 + assert Plausible.Stats.Clickhouse.imported_pageview_count(imported_site) == 7 + + # assert on the actual rows that got imported into the imported_pages table + imported_data = + from(i in "imported_pages", + where: i.site_id == ^imported_site.id, + select: %{ + date: i.date, + page: i.page, + scroll_depth: i.scroll_depth + } + ) + |> Plausible.IngestRepo.all() + + assert %{date: ~D[2020-01-01], page: "/another", scroll_depth: 50} in imported_data + assert %{date: ~D[2020-01-01], page: "/blog", scroll_depth: 180} in imported_data + assert %{date: ~D[2020-01-02], page: "/blog", scroll_depth: nil} in imported_data end end From 50f9b3d6448b70add9e5ee3199498108debab463 Mon Sep 17 00:00:00 2001 From: Robert Joonas Date: Wed, 18 Dec 2024 19:27:30 +0100 Subject: [PATCH 03/14] query scroll depth from imported data --- .../stats/imported/sql/expression.ex | 17 ++++++ lib/plausible/stats/sql/special_metrics.ex | 52 +++++++++++++---- .../controllers/api/stats_controller.ex | 2 +- .../api/stats_controller/imported_test.exs | 2 + .../api/stats_controller/pages_test.exs | 57 +++++++++++++++++-- .../controllers/stats_controller_test.exs | 11 +++- 6 files changed, 123 insertions(+), 18 deletions(-) diff --git a/lib/plausible/stats/imported/sql/expression.ex b/lib/plausible/stats/imported/sql/expression.ex index 057bc22d695a..2ea79e473ae4 100644 --- a/lib/plausible/stats/imported/sql/expression.ex +++ b/lib/plausible/stats/imported/sql/expression.ex @@ -115,6 +115,10 @@ defmodule Plausible.Stats.Imported.SQL.Expression do wrap_alias([i], %{pageviews: sum(i.pageviews), __internal_visits: sum(i.visits)}) end + defp select_metric(:scroll_depth, "imported_pages") do + wrap_alias([i], %{scroll_depth_sum: sum(i.scroll_depth), total_visitors: sum(i.visitors)}) + end + defp select_metric(_metric, _table), do: %{} def group_imported_by(q, query) do @@ -351,6 +355,19 @@ defmodule Plausible.Stats.Imported.SQL.Expression do |> select_joined_metrics(rest) end + # The final `scroll_depth` gets selected at a later querybuilding step + # (in `Plausible.Stats.SQL.SpecialMetrics.add/3`). But in order to avoid + # having to join with imported data there again, we select the required + # information from imported data here already. + def select_joined_metrics(q, [:scroll_depth | rest]) do + q + |> select_merge_as([s, i], %{ + __internal_scroll_depth_sum: i.scroll_depth_sum, + __internal_total_visitors: i.total_visitors + }) + |> select_joined_metrics(rest) + end + # Ignored as it's calculated separately def select_joined_metrics(q, [metric | rest]) when metric in [:conversion_rate, :group_conversion_rate, :percentage] do diff --git a/lib/plausible/stats/sql/special_metrics.ex b/lib/plausible/stats/sql/special_metrics.ex index b4a615a1cc83..0b4c5b975960 100644 --- a/lib/plausible/stats/sql/special_metrics.ex +++ b/lib/plausible/stats/sql/special_metrics.ex @@ -150,15 +150,11 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do dim_shortnames |> Enum.map(fn dim -> dynamic([p], field(p, ^dim)) end) - scroll_depth_q = + scroll_depth_sum_q = subquery(max_per_visitor_q) |> select([p], %{ - scroll_depth: - fragment( - "if(isFinite(avg(?)), toUInt8(round(avg(?))), NULL)", - p.max_scroll_depth, - p.max_scroll_depth - ) + scroll_depth_sum: fragment("sum(?)", p.max_scroll_depth), + total_visitors: fragment("count(?)", p.user_id) }) |> select_merge(^dim_select) |> group_by(^dim_group_by) @@ -173,9 +169,45 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do |> Enum.reduce(fn condition, acc -> dynamic([], ^acc and ^condition) end) end - q - |> join(:left, [e], s in subquery(scroll_depth_q), on: ^join_on_dim_condition) - |> select_merge_as([_e, ..., s], %{scroll_depth: fragment("any(?)", s.scroll_depth)}) + joined_q = + join(q, :left, [e], s in subquery(scroll_depth_sum_q), on: ^join_on_dim_condition) + + if query.include_imported do + joined_q + |> select_merge_as([..., s], %{ + scroll_depth: + fragment( + """ + if( + ? > 0 or ? > 0, + toUInt8( + round( + (? + ?) / (? + ?) + ) + ), + NULL + ) + """, + s.total_visitors, + selected_as(:__internal_total_visitors), + s.scroll_depth_sum, + selected_as(:__internal_scroll_depth_sum), + s.total_visitors, + selected_as(:__internal_total_visitors) + ) + }) + else + joined_q + |> select_merge_as([..., s], %{ + scroll_depth: + fragment( + "if(any(?) > 0, toUInt8(round(any(?) / any(?))), NULL)", + s.total_visitors, + s.scroll_depth_sum, + s.total_visitors + ) + }) + end else q end diff --git a/lib/plausible_web/controllers/api/stats_controller.ex b/lib/plausible_web/controllers/api/stats_controller.ex index a4261e2adfaf..36056145f6f2 100644 --- a/lib/plausible_web/controllers/api/stats_controller.ex +++ b/lib/plausible_web/controllers/api/stats_controller.ex @@ -831,7 +831,7 @@ defmodule PlausibleWeb.Api.StatsController do params = Map.put(params, "property", "event:page") query = Query.from(site, params, debug_metadata(conn)) - include_scroll_depth? = !query.include_imported && scroll_depth_enabled?(site, current_user) + include_scroll_depth? = scroll_depth_enabled?(site, current_user) extra_metrics = cond do diff --git a/test/plausible_web/controllers/api/stats_controller/imported_test.exs b/test/plausible_web/controllers/api/stats_controller/imported_test.exs index 4e36ba8effea..048fc2ccbf57 100644 --- a/test/plausible_web/controllers/api/stats_controller/imported_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/imported_test.exs @@ -882,6 +882,7 @@ defmodule PlausibleWeb.Api.StatsController.ImportedTest do "time_on_page" => 40, "visitors" => 3, "pageviews" => 4, + "scroll_depth" => nil, "name" => "/some-other-page" }, %{ @@ -889,6 +890,7 @@ defmodule PlausibleWeb.Api.StatsController.ImportedTest do "time_on_page" => 800.0, "visitors" => 2, "pageviews" => 2, + "scroll_depth" => nil, "name" => "/" } ] diff --git a/test/plausible_web/controllers/api/stats_controller/pages_test.exs b/test/plausible_web/controllers/api/stats_controller/pages_test.exs index 9661f1edd9af..6821f9aff6f6 100644 --- a/test/plausible_web/controllers/api/stats_controller/pages_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/pages_test.exs @@ -648,6 +648,46 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do ] end + test "calculates scroll_depth from native and imported data combined", %{ + conn: conn, + site: site + } do + populate_stats(site, [ + build(:pageview, user_id: @user_id, pathname: "/blog", timestamp: ~N[2020-01-01 00:00:00]), + build(:pageleave, + user_id: @user_id, + pathname: "/blog", + timestamp: ~N[2020-01-01 00:00:00], + scroll_depth: 80 + ), + build(:imported_pages, + date: ~D[2020-01-01], + visitors: 3, + pageviews: 3, + time_on_page: 90, + page: "/blog", + scroll_depth: 120 + ) + ]) + + conn = + get( + conn, + "/api/stats/#{site.domain}/pages?period=day&date=2020-01-01&detailed=true&with_imported=true" + ) + + assert json_response(conn, 200)["results"] == [ + %{ + "name" => "/blog", + "visitors" => 4, + "pageviews" => 4, + "bounce_rate" => 100, + "time_on_page" => 30.0, + "scroll_depth" => 50 + } + ] + end + test "can filter using the | (OR) filter", %{conn: conn, site: site} do populate_stats(site, [ @@ -1263,6 +1303,7 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do "time_on_page" => 800.0, "visitors" => 3, "pageviews" => 3, + "scroll_depth" => nil, "name" => "/" }, %{ @@ -1270,6 +1311,7 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do "time_on_page" => 60, "visitors" => 2, "pageviews" => 2, + "scroll_depth" => nil, "name" => "/some-other-page" } ] @@ -1341,7 +1383,8 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do "name" => "/", "pageviews" => 4, "time_on_page" => 90.0, - "visitors" => 4 + "visitors" => 4, + "scroll_depth" => nil } ] end @@ -1390,14 +1433,16 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do "name" => "/", "pageviews" => 4, "time_on_page" => 90.0, - "visitors" => 4 + "visitors" => 4, + "scroll_depth" => nil }, %{ "bounce_rate" => 100, "name" => "/a", "pageviews" => 1, "time_on_page" => 10.0, - "visitors" => 1 + "visitors" => 1, + "scroll_depth" => nil } ] end @@ -1446,14 +1491,16 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do "name" => "/aaa", "pageviews" => 4, "time_on_page" => 90.0, - "visitors" => 4 + "visitors" => 4, + "scroll_depth" => nil }, %{ "bounce_rate" => 100, "name" => "/a", "pageviews" => 1, "time_on_page" => 10.0, - "visitors" => 1 + "visitors" => 1, + "scroll_depth" => nil } ] end diff --git a/test/plausible_web/controllers/stats_controller_test.exs b/test/plausible_web/controllers/stats_controller_test.exs index 586cb5b6b658..743927ffb75b 100644 --- a/test/plausible_web/controllers/stats_controller_test.exs +++ b/test/plausible_web/controllers/stats_controller_test.exs @@ -547,8 +547,15 @@ defmodule PlausibleWeb.StatsControllerTest do {~c"pages.csv", data} -> assert parse_csv(data) == [ - ["name", "visitors", "pageviews", "bounce_rate", "time_on_page"], - ["/test", "1", "1", "0.0", "10.0"], + [ + "name", + "visitors", + "pageviews", + "bounce_rate", + "time_on_page", + "scroll_depth" + ], + ["/test", "1", "1", "0.0", "10.0", ""], [""] ] From 0c33d864ab55c919369a7d5cb10e05f3611b50f9 Mon Sep 17 00:00:00 2001 From: Robert Joonas Date: Thu, 19 Dec 2024 11:14:31 +0100 Subject: [PATCH 04/14] fix ordering by scroll depth with imported data --- lib/plausible/stats/imported/imported.ex | 1 + .../controllers/api/stats_controller/pages_test.exs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/plausible/stats/imported/imported.ex b/lib/plausible/stats/imported/imported.ex index b05e1d2112e0..9f7b038595d7 100644 --- a/lib/plausible/stats/imported/imported.ex +++ b/lib/plausible/stats/imported/imported.ex @@ -345,6 +345,7 @@ defmodule Plausible.Stats.Imported do defp can_order_by?(query) do Enum.all?(query.order_by, fn + {:scroll_depth, _} -> false {metric, _direction} when is_atom(metric) -> metric in query.metrics _ -> true end) diff --git a/test/plausible_web/controllers/api/stats_controller/pages_test.exs b/test/plausible_web/controllers/api/stats_controller/pages_test.exs index 6821f9aff6f6..4f5ba9854412 100644 --- a/test/plausible_web/controllers/api/stats_controller/pages_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/pages_test.exs @@ -673,7 +673,7 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do conn = get( conn, - "/api/stats/#{site.domain}/pages?period=day&date=2020-01-01&detailed=true&with_imported=true" + "/api/stats/#{site.domain}/pages?period=day&date=2020-01-01&detailed=true&with_imported=true&order_by=#{Jason.encode!([["scroll_depth", "desc"]])}" ) assert json_response(conn, 200)["results"] == [ From 60bb6beaf7a549fb28469fa5ccb8d52a9a220ecd Mon Sep 17 00:00:00 2001 From: Robert Joonas Date: Thu, 19 Dec 2024 12:15:20 +0100 Subject: [PATCH 05/14] fix imported scroll depth query + more tests --- .../stats/imported/sql/expression.ex | 5 +- lib/plausible/stats/sql/special_metrics.ex | 37 ++++-- .../api/stats_controller/pages_test.exs | 119 ++++++++++++++++++ 3 files changed, 147 insertions(+), 14 deletions(-) diff --git a/lib/plausible/stats/imported/sql/expression.ex b/lib/plausible/stats/imported/sql/expression.ex index 2ea79e473ae4..e2e813630a5d 100644 --- a/lib/plausible/stats/imported/sql/expression.ex +++ b/lib/plausible/stats/imported/sql/expression.ex @@ -116,7 +116,10 @@ defmodule Plausible.Stats.Imported.SQL.Expression do end defp select_metric(:scroll_depth, "imported_pages") do - wrap_alias([i], %{scroll_depth_sum: sum(i.scroll_depth), total_visitors: sum(i.visitors)}) + wrap_alias([i], %{ + scroll_depth_sum: sum(i.scroll_depth), + total_visitors: fragment("sumIf(?, isNotNull(?))", i.visitors, i.scroll_depth) + }) end defp select_metric(_metric, _table), do: %{} diff --git a/lib/plausible/stats/sql/special_metrics.ex b/lib/plausible/stats/sql/special_metrics.ex index 0b4c5b975960..4c67c58626b4 100644 --- a/lib/plausible/stats/sql/special_metrics.ex +++ b/lib/plausible/stats/sql/special_metrics.ex @@ -153,7 +153,8 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do scroll_depth_sum_q = subquery(max_per_visitor_q) |> select([p], %{ - scroll_depth_sum: fragment("sum(?)", p.max_scroll_depth), + scroll_depth_sum: + fragment("if(isNull(sum(?)), NULL, sum(?))", p.max_scroll_depth, p.max_scroll_depth), total_visitors: fragment("count(?)", p.user_id) }) |> select_merge(^dim_select) @@ -178,22 +179,32 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do scroll_depth: fragment( """ - if( - ? > 0 or ? > 0, - toUInt8( - round( - (? + ?) / (? + ?) - ) - ), - NULL - ) + case + when isNotNull(?) AND isNotNull(?) then + toUInt8(round((? + ?) / (? + ?))) + when isNotNull(?) then + toUInt8(round(? / ?)) + when isNotNull(?) then + toUInt8(round(? / ?)) + else + NULL + end """, - s.total_visitors, - selected_as(:__internal_total_visitors), + # Case 1: Both imported and native scroll depth sums are present + selected_as(:__internal_scroll_depth_sum), s.scroll_depth_sum, selected_as(:__internal_scroll_depth_sum), + s.scroll_depth_sum, + selected_as(:__internal_total_visitors), s.total_visitors, - selected_as(:__internal_total_visitors) + # Case 2: Only imported scroll depth sum is present + selected_as(:__internal_scroll_depth_sum), + selected_as(:__internal_scroll_depth_sum), + selected_as(:__internal_total_visitors), + # Case 3: Only native scroll depth sum is present + s.scroll_depth_sum, + s.scroll_depth_sum, + s.total_visitors ) }) else diff --git a/test/plausible_web/controllers/api/stats_controller/pages_test.exs b/test/plausible_web/controllers/api/stats_controller/pages_test.exs index 4f5ba9854412..442d48f5f733 100644 --- a/test/plausible_web/controllers/api/stats_controller/pages_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/pages_test.exs @@ -688,6 +688,125 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do ] end + test "handles missing scroll_depth data from native and imported sources", %{ + conn: conn, + site: site + } do + populate_stats(site, [ + build(:pageview, + user_id: @user_id, + pathname: "/native-and-imported", + timestamp: ~N[2020-01-01 00:00:00] + ), + build(:pageleave, + user_id: @user_id, + pathname: "/native-and-imported", + timestamp: ~N[2020-01-01 00:01:00], + scroll_depth: 80 + ), + build(:pageview, + user_id: @user_id, + pathname: "/native-only", + timestamp: ~N[2020-01-01 00:01:00] + ), + build(:pageleave, + user_id: @user_id, + pathname: "/native-only", + timestamp: ~N[2020-01-01 00:02:00], + scroll_depth: 40 + ), + build(:imported_pages, + date: ~D[2020-01-01], + visitors: 3, + pageviews: 3, + time_on_page: 180, + page: "/native-and-imported", + scroll_depth: 120 + ), + build(:imported_pages, + date: ~D[2020-01-01], + visitors: 10, + pageviews: 10, + time_on_page: 300, + page: "/imported-only", + scroll_depth: 100 + ) + ]) + + conn = + get( + conn, + "/api/stats/#{site.domain}/pages?period=day&date=2020-01-01&detailed=true&with_imported=true&order_by=#{Jason.encode!([["scroll_depth", "desc"]])}" + ) + + assert json_response(conn, 200)["results"] == [ + %{ + "name" => "/native-and-imported", + "visitors" => 4, + "pageviews" => 4, + "bounce_rate" => 0, + "time_on_page" => 60, + "scroll_depth" => 50 + }, + %{ + "name" => "/native-only", + "visitors" => 1, + "pageviews" => 1, + "bounce_rate" => 0, + "time_on_page" => nil, + "scroll_depth" => 40 + }, + %{ + "name" => "/imported-only", + "visitors" => 10, + "pageviews" => 10, + "bounce_rate" => 0, + "time_on_page" => 30.0, + "scroll_depth" => 10 + } + ] + end + + test "can query scroll depth only from imported data, ignoring rows where scroll depth doesn't exist", + %{ + conn: conn, + site: site + } do + populate_stats(site, [ + build(:imported_pages, + date: ~D[2020-01-01], + visitors: 10, + pageviews: 10, + page: "/blog", + scroll_depth: 100 + ), + build(:imported_pages, + date: ~D[2020-01-01], + visitors: 100, + pageviews: 150, + page: "/blog", + scroll_depth: nil + ) + ]) + + conn = + get( + conn, + "/api/stats/#{site.domain}/pages?period=7d&date=2020-01-02&detailed=true&with_imported=true" + ) + + assert json_response(conn, 200)["results"] == [ + %{ + "name" => "/blog", + "visitors" => 110, + "pageviews" => 160, + "bounce_rate" => 0, + "time_on_page" => 0.125, + "scroll_depth" => 10 + } + ] + end + test "can filter using the | (OR) filter", %{conn: conn, site: site} do populate_stats(site, [ From ac940bd747fcccbab5a0fde3f9d7149a25101342 Mon Sep 17 00:00:00 2001 From: Robert Joonas Date: Thu, 19 Dec 2024 12:44:27 +0100 Subject: [PATCH 06/14] enable scroll depth in top stats with imported data --- .../controllers/api/stats_controller.ex | 7 +++-- .../api/stats_controller/top_stats_test.exs | 29 +++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/lib/plausible_web/controllers/api/stats_controller.ex b/lib/plausible_web/controllers/api/stats_controller.ex index 36056145f6f2..1ab8792499c7 100644 --- a/lib/plausible_web/controllers/api/stats_controller.ex +++ b/lib/plausible_web/controllers/api/stats_controller.ex @@ -393,15 +393,16 @@ defmodule PlausibleWeb.Api.StatsController do defp fetch_other_top_stats(site, query, current_user) do page_filter? = Filters.filtering_on_dimension?(query, "event:page") + scroll_depth_enabled? = scroll_depth_enabled?(site, current_user) metrics = [:visitors, :visits, :pageviews, :sample_percent] metrics = cond do - page_filter? && query.include_imported -> - metrics + page_filter? && scroll_depth_enabled? && query.include_imported -> + metrics ++ [:scroll_depth] - page_filter? && scroll_depth_enabled?(site, current_user) -> + page_filter? && scroll_depth_enabled? -> metrics ++ [:bounce_rate, :scroll_depth, :time_on_page] page_filter? -> diff --git a/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs b/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs index 0bc99cdc3f26..8c52265ba0ba 100644 --- a/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs @@ -989,6 +989,35 @@ defmodule PlausibleWeb.Api.StatsController.TopStatsTest do ] end + test "returns scroll_depth with a page filter with imported data", %{conn: conn, site: site} do + site_import = insert(:site_import, site: site) + + populate_stats(site, site_import.id, [ + build(:pageview, user_id: 123, timestamp: ~N[2021-01-01 00:00:00]), + build(:pageleave, user_id: 123, timestamp: ~N[2021-01-01 00:00:10], scroll_depth: 40), + build(:pageview, user_id: 123, timestamp: ~N[2021-01-01 00:00:10]), + build(:pageleave, user_id: 123, timestamp: ~N[2021-01-01 00:00:20], scroll_depth: 60), + build(:pageview, user_id: 456, timestamp: ~N[2021-01-01 00:00:00]), + build(:pageleave, user_id: 456, timestamp: ~N[2021-01-01 00:00:10], scroll_depth: 80), + build(:imported_pages, page: "/", date: ~D[2021-01-01], visitors: 8, scroll_depth: 410), + build(:imported_pages, page: "/", date: ~D[2021-01-02], visitors: 100, scroll_depth: nil) + ]) + + filters = Jason.encode!([[:is, "event:page", ["/"]]]) + + conn = + get( + conn, + "/api/stats/#{site.domain}/top-stats?period=7d&date=2021-01-07&filters=#{filters}&with_imported=true" + ) + + res = json_response(conn, 200) + + assert %{"name" => "Scroll depth", "value" => 55, "graph_metric" => "scroll_depth"} in res[ + "top_stats" + ] + end + test "contains filter", %{conn: conn, site: site} do populate_stats(site, [ build(:pageview, pathname: "/some-blog-post"), From bc01a08e6b8829aeb01c88c3cf584a6fdb4f9cb5 Mon Sep 17 00:00:00 2001 From: Robert Joonas Date: Thu, 19 Dec 2024 12:56:42 +0100 Subject: [PATCH 07/14] add main graph test --- .../api/stats_controller/main_graph_test.exs | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs b/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs index 47704e616735..3434e8c85319 100644 --- a/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs @@ -633,6 +633,39 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do assert plot == [40, 20, 0, 0, 0, 0, 0] end + + test "returns scroll depth per day with imported data", %{conn: conn, site: site} do + site_import = insert(:site_import, site: site) + + populate_stats(site, site_import.id, [ + # 2020-01-01 - only native data + build(:pageview, user_id: 12, timestamp: ~N[2020-01-01 00:00:00]), + build(:pageleave, user_id: 12, timestamp: ~N[2020-01-01 00:01:00], scroll_depth: 20), + build(:pageview, user_id: 34, timestamp: ~N[2020-01-01 00:00:00]), + build(:pageleave, user_id: 34, timestamp: ~N[2020-01-01 00:01:00], scroll_depth: 17), + build(:pageview, user_id: 34, timestamp: ~N[2020-01-01 00:02:00]), + build(:pageleave, user_id: 34, timestamp: ~N[2020-01-01 00:03:00], scroll_depth: 60), + # 2020-01-02 - both imported and native data + build(:pageview, user_id: 56, timestamp: ~N[2020-01-02 00:00:00]), + build(:pageleave, user_id: 56, timestamp: ~N[2020-01-02 00:01:00], scroll_depth: 20), + build(:imported_pages, date: ~D[2020-01-02], page: "/", visitors: 1, scroll_depth: 40), + # 2020-01-03 - only imported data + build(:imported_pages, date: ~D[2020-01-03], page: "/", visitors: 1, scroll_depth: 90), + build(:imported_pages, date: ~D[2020-01-03], page: "/", visitors: 100, scroll_depth: nil) + ]) + + filters = Jason.encode!([[:is, "event:page", ["/"]]]) + + conn = + get( + conn, + "/api/stats/#{site.domain}/main-graph?period=7d&date=2020-01-07&metric=scroll_depth&filters=#{filters}&with_imported=true" + ) + + assert %{"plot" => plot} = json_response(conn, 200) + + assert plot == [40, 30, 90, 0, 0, 0, 0] + end end describe "GET /api/stats/main-graph - conversion_rate plot" do From 1d7beb7d119feb34645bae4a05a5fe2bc297aa1f Mon Sep 17 00:00:00 2001 From: Robert Joonas Date: Thu, 19 Dec 2024 13:53:25 +0100 Subject: [PATCH 08/14] fix test and native scroll depth sum select --- lib/plausible/stats/sql/special_metrics.ex | 2 +- .../controllers/api/stats_controller/top_stats_test.exs | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/plausible/stats/sql/special_metrics.ex b/lib/plausible/stats/sql/special_metrics.ex index 4c67c58626b4..54f7703be163 100644 --- a/lib/plausible/stats/sql/special_metrics.ex +++ b/lib/plausible/stats/sql/special_metrics.ex @@ -154,7 +154,7 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do subquery(max_per_visitor_q) |> select([p], %{ scroll_depth_sum: - fragment("if(isNull(sum(?)), NULL, sum(?))", p.max_scroll_depth, p.max_scroll_depth), + fragment("if(count(?) = 0, NULL, sum(?))", p.user_id, p.max_scroll_depth), total_visitors: fragment("count(?)", p.user_id) }) |> select_merge(^dim_select) diff --git a/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs b/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs index 8c52265ba0ba..8060f3b5f29a 100644 --- a/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs @@ -603,7 +603,7 @@ defmodule PlausibleWeb.Api.StatsController.TopStatsTest do ] end - test ":is filter on page returns only visitors, visits and pageviews", %{ + test ":is filter on page returns only visitors, visits, pageviews and scroll_depth", %{ conn: conn, site: site } do @@ -638,7 +638,8 @@ defmodule PlausibleWeb.Api.StatsController.TopStatsTest do assert res["top_stats"] == [ %{"name" => "Unique visitors", "value" => 2, "graph_metric" => "visitors"}, %{"name" => "Total visits", "value" => 4, "graph_metric" => "visits"}, - %{"name" => "Total pageviews", "value" => 36, "graph_metric" => "pageviews"} + %{"name" => "Total pageviews", "value" => 36, "graph_metric" => "pageviews"}, + %{"name" => "Scroll depth", "value" => nil, "graph_metric" => "scroll_depth"} ] end end From a41fa8f32b63b221845b43462f8503732cbeed6d Mon Sep 17 00:00:00 2001 From: RobertJoonas <56999674+RobertJoonas@users.noreply.github.com> Date: Mon, 30 Dec 2024 10:51:14 +0000 Subject: [PATCH 09/14] Update lib/plausible/exports.ex Co-authored-by: ruslandoga --- lib/plausible/exports.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/plausible/exports.ex b/lib/plausible/exports.ex index c58d599540d3..a6bfcb6c7d8b 100644 --- a/lib/plausible/exports.ex +++ b/lib/plausible/exports.ex @@ -442,7 +442,7 @@ defmodule Plausible.Exports do page: p.page, scroll_depth: fragment( - "if(isNull(sum(?)), NULL, toInt64(sum(?)))", + "if(isNull(sum(?)), NULL, toUInt64(sum(?)))", p.max_scroll_depth, p.max_scroll_depth ) From 3eee925417ebb2a8146ffcf3946b386a3725ec33 Mon Sep 17 00:00:00 2001 From: Robert Joonas Date: Mon, 30 Dec 2024 12:03:06 +0000 Subject: [PATCH 10/14] adjust test --- test/plausible/imported/csv_importer_test.exs | 44 +++++++++++-------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/test/plausible/imported/csv_importer_test.exs b/test/plausible/imported/csv_importer_test.exs index c72c8523ae17..388f5436e65c 100644 --- a/test/plausible/imported/csv_importer_test.exs +++ b/test/plausible/imported/csv_importer_test.exs @@ -1081,7 +1081,7 @@ defmodule Plausible.Imported.CSVImporterTest do end @tag :tmp_dir - test "scroll_depth", %{user: user, tmp_dir: tmp_dir} do + test "scroll_depth", %{conn: conn, user: user, tmp_dir: tmp_dir} do exported_site = new_site(owner: user) imported_site = new_site(owner: user) @@ -1144,30 +1144,15 @@ defmodule Plausible.Imported.CSVImporterTest do [bucket, key] = String.split(URI.parse(s3_url).path, "/", parts: 2) content = File.read!(file) ExAws.request!(ExAws.S3.put_object(bucket, key, content)) - %{"filename" => Path.basename(file), "s3_url" => s3_url, "content" => content} + %{"filename" => Path.basename(file), "s3_url" => s3_url} else %{ "filename" => Path.basename(file), - "local_path" => file, - "content" => File.read!(file) + "local_path" => file } end end) - # assert the intermediary imported_pages.csv file - pages = - uploads - |> Enum.find(&String.contains?(&1["filename"], "imported_pages")) - |> Map.get("content") - |> NimbleCSV.RFC4180.parse_string(skip_headers: false) - - assert pages == [ - ["date", "hostname", "page", "visits", "visitors", "pageviews", "scroll_depth"], - ["2020-01-01", "csv.test", "/another", "2", "2", "2", "50"], - ["2020-01-01", "csv.test", "/blog", "3", "3", "4", "180"], - ["2020-01-02", "csv.test", "/blog", "1", "1", "1", "\\N"] - ] - # run importer date_range = CSVImporter.date_range(uploads) @@ -1207,6 +1192,29 @@ defmodule Plausible.Imported.CSVImporterTest do assert %{date: ~D[2020-01-01], page: "/another", scroll_depth: 50} in imported_data assert %{date: ~D[2020-01-01], page: "/blog", scroll_depth: 180} in imported_data assert %{date: ~D[2020-01-02], page: "/blog", scroll_depth: nil} in imported_data + + # assert via stats queries that scroll_depth from imported + # data matches the scroll_depth from native data + expected_results = [ + %{"dimensions" => ["/another"], "metrics" => [25]}, + %{"dimensions" => ["/blog"], "metrics" => [60]} + ] + + query_scroll_depth_per_page = fn conn, site -> + post(conn, "/api/v2/query-internal-test", %{ + "site_id" => site.domain, + "metrics" => ["scroll_depth"], + "date_range" => "all", + "order_by" => [["scroll_depth", "asc"]], + "include" => %{"imports" => true}, + "dimensions" => ["event:page"] + }) + |> json_response(200) + |> Map.fetch!("results") + end + + assert query_scroll_depth_per_page.(conn, exported_site) == expected_results + assert query_scroll_depth_per_page.(conn, imported_site) == expected_results end end From ed6eac87420d26e4e7756c42b7a58837df5f31a2 Mon Sep 17 00:00:00 2001 From: Robert Joonas Date: Tue, 31 Dec 2024 09:38:28 +0000 Subject: [PATCH 11/14] adjust test to catch error --- test/plausible/imported/csv_importer_test.exs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/plausible/imported/csv_importer_test.exs b/test/plausible/imported/csv_importer_test.exs index 76b2d728f0d1..aee93276478d 100644 --- a/test/plausible/imported/csv_importer_test.exs +++ b/test/plausible/imported/csv_importer_test.exs @@ -1105,6 +1105,9 @@ defmodule Plausible.Imported.CSVImporterTest do build(:pageleave, user_id: 34, pathname: "/blog", timestamp: t3, scroll_depth: 60), build(:pageview, user_id: 56, pathname: "/blog", timestamp: t0), build(:pageleave, user_id: 56, pathname: "/blog", timestamp: t1, scroll_depth: 100), + build(:pageview, user_id: 78, pathname: "/", timestamp: t0), + build(:pageleave, user_id: 78, pathname: "/", timestamp: t1, scroll_depth: 20), + build(:pageview, pathname: "/", timestamp: t1), build(:pageview, pathname: "/blog", timestamp: NaiveDateTime.add(t0, 1, :day)) ] |> Enum.map(fn event -> Map.put(event, :hostname, "csv.test") end) @@ -1178,7 +1181,7 @@ defmodule Plausible.Imported.CSVImporterTest do } = Repo.get_by!(SiteImport, site_id: imported_site.id) assert Plausible.Stats.Clickhouse.imported_pageview_count(exported_site) == 0 - assert Plausible.Stats.Clickhouse.imported_pageview_count(imported_site) == 7 + assert Plausible.Stats.Clickhouse.imported_pageview_count(imported_site) == 9 # assert on the actual rows that got imported into the imported_pages table imported_data = @@ -1199,6 +1202,7 @@ defmodule Plausible.Imported.CSVImporterTest do # assert via stats queries that scroll_depth from imported # data matches the scroll_depth from native data expected_results = [ + %{"dimensions" => ["/"], "metrics" => [20]}, %{"dimensions" => ["/another"], "metrics" => [25]}, %{"dimensions" => ["/blog"], "metrics" => [60]} ] From 14d2a2904ea62b9bdc2b037a9b2e4ed1da8577b7 Mon Sep 17 00:00:00 2001 From: Robert Joonas Date: Tue, 31 Dec 2024 11:38:42 +0000 Subject: [PATCH 12/14] export/import/count pageleave_visitors --- lib/plausible/exports.ex | 6 ++-- lib/plausible/imported/csv_importer.ex | 2 +- lib/plausible/imported/page.ex | 1 + .../stats/imported/sql/expression.ex | 2 +- test/plausible/imported/csv_importer_test.exs | 13 +++++--- .../api/stats_controller/main_graph_test.exs | 16 ++++++++-- .../api/stats_controller/pages_test.exs | 32 +++++++++++-------- .../api/stats_controller/top_stats_test.exs | 8 ++++- 8 files changed, 55 insertions(+), 25 deletions(-) diff --git a/lib/plausible/exports.ex b/lib/plausible/exports.ex index a6bfcb6c7d8b..9d8d31d3c167 100644 --- a/lib/plausible/exports.ex +++ b/lib/plausible/exports.ex @@ -445,7 +445,8 @@ defmodule Plausible.Exports do "if(isNull(sum(?)), NULL, toUInt64(sum(?)))", p.max_scroll_depth, p.max_scroll_depth - ) + ), + pageleave_visitors: count(p.user_id) }, group_by: [:date, :page] ) @@ -465,7 +466,8 @@ defmodule Plausible.Exports do ), visitors(e), selected_as(fragment("toUInt64(round(count()*any(_sample_factor)))"), :pageviews), - selected_as(fragment("any(?)", s.scroll_depth), :scroll_depth) + selected_as(fragment("any(?)", s.scroll_depth), :scroll_depth), + selected_as(fragment("any(?)", s.pageleave_visitors), :pageleave_visitors) ] ) |> group_by([e], [selected_as(:date), selected_as(:page)]) diff --git a/lib/plausible/imported/csv_importer.ex b/lib/plausible/imported/csv_importer.ex index fee6188fe8fe..277a9dd8da07 100644 --- a/lib/plausible/imported/csv_importer.ex +++ b/lib/plausible/imported/csv_importer.ex @@ -161,7 +161,7 @@ defmodule Plausible.Imported.CSVImporter do "imported_operating_systems" => "date Date, operating_system String, operating_system_version String, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32, pageviews UInt64", "imported_pages" => - "date Date, hostname String, page String, visits UInt64, visitors UInt64, pageviews UInt64, scroll_depth Nullable(UInt64)", + "date Date, hostname String, page String, visits UInt64, visitors UInt64, pageviews UInt64, scroll_depth Nullable(UInt64), pageleave_visitors UInt64", "imported_sources" => "date Date, source String, referrer String, utm_source String, utm_medium String, utm_campaign String, utm_content String, utm_term String, pageviews UInt64, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32", "imported_visitors" => diff --git a/lib/plausible/imported/page.ex b/lib/plausible/imported/page.ex index 6b093e5c0bc3..d4ba204a3484 100644 --- a/lib/plausible/imported/page.ex +++ b/lib/plausible/imported/page.ex @@ -16,5 +16,6 @@ defmodule Plausible.Imported.Page do field :exits, Ch, type: "UInt64" field :time_on_page, Ch, type: "UInt64" field :scroll_depth, Ch, type: "Nullable(UInt64)" + field :pageleave_visitors, Ch, type: "UInt64" end end diff --git a/lib/plausible/stats/imported/sql/expression.ex b/lib/plausible/stats/imported/sql/expression.ex index e2e813630a5d..e01d59543a22 100644 --- a/lib/plausible/stats/imported/sql/expression.ex +++ b/lib/plausible/stats/imported/sql/expression.ex @@ -118,7 +118,7 @@ defmodule Plausible.Stats.Imported.SQL.Expression do defp select_metric(:scroll_depth, "imported_pages") do wrap_alias([i], %{ scroll_depth_sum: sum(i.scroll_depth), - total_visitors: fragment("sumIf(?, isNotNull(?))", i.visitors, i.scroll_depth) + total_visitors: sum(i.pageleave_visitors) }) end diff --git a/test/plausible/imported/csv_importer_test.exs b/test/plausible/imported/csv_importer_test.exs index aee93276478d..efc4b05443ce 100644 --- a/test/plausible/imported/csv_importer_test.exs +++ b/test/plausible/imported/csv_importer_test.exs @@ -1190,14 +1190,19 @@ defmodule Plausible.Imported.CSVImporterTest do select: %{ date: i.date, page: i.page, - scroll_depth: i.scroll_depth + scroll_depth: i.scroll_depth, + pageleave_visitors: i.pageleave_visitors } ) |> Plausible.IngestRepo.all() - assert %{date: ~D[2020-01-01], page: "/another", scroll_depth: 50} in imported_data - assert %{date: ~D[2020-01-01], page: "/blog", scroll_depth: 180} in imported_data - assert %{date: ~D[2020-01-02], page: "/blog", scroll_depth: nil} in imported_data + assert %{date: ~D[2020-01-01], page: "/", scroll_depth: 20, pageleave_visitors: 1} in imported_data + + assert %{date: ~D[2020-01-01], page: "/another", scroll_depth: 50, pageleave_visitors: 2} in imported_data + + assert %{date: ~D[2020-01-01], page: "/blog", scroll_depth: 180, pageleave_visitors: 3} in imported_data + + assert %{date: ~D[2020-01-02], page: "/blog", scroll_depth: nil, pageleave_visitors: 0} in imported_data # assert via stats queries that scroll_depth from imported # data matches the scroll_depth from native data diff --git a/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs b/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs index 3434e8c85319..38a05f725bc3 100644 --- a/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs @@ -648,9 +648,21 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do # 2020-01-02 - both imported and native data build(:pageview, user_id: 56, timestamp: ~N[2020-01-02 00:00:00]), build(:pageleave, user_id: 56, timestamp: ~N[2020-01-02 00:01:00], scroll_depth: 20), - build(:imported_pages, date: ~D[2020-01-02], page: "/", visitors: 1, scroll_depth: 40), + build(:imported_pages, + date: ~D[2020-01-02], + page: "/", + visitors: 1, + scroll_depth: 40, + pageleave_visitors: 1 + ), # 2020-01-03 - only imported data - build(:imported_pages, date: ~D[2020-01-03], page: "/", visitors: 1, scroll_depth: 90), + build(:imported_pages, + date: ~D[2020-01-03], + page: "/", + visitors: 1, + scroll_depth: 90, + pageleave_visitors: 1 + ), build(:imported_pages, date: ~D[2020-01-03], page: "/", visitors: 100, scroll_depth: nil) ]) diff --git a/test/plausible_web/controllers/api/stats_controller/pages_test.exs b/test/plausible_web/controllers/api/stats_controller/pages_test.exs index 442d48f5f733..6a70985668b3 100644 --- a/test/plausible_web/controllers/api/stats_controller/pages_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/pages_test.exs @@ -666,7 +666,8 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do pageviews: 3, time_on_page: 90, page: "/blog", - scroll_depth: 120 + scroll_depth: 120, + pageleave_visitors: 3 ) ]) @@ -717,19 +718,21 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do ), build(:imported_pages, date: ~D[2020-01-01], - visitors: 3, - pageviews: 3, + visitors: 4, + pageviews: 4, time_on_page: 180, page: "/native-and-imported", - scroll_depth: 120 + scroll_depth: 120, + pageleave_visitors: 3 ), build(:imported_pages, date: ~D[2020-01-01], - visitors: 10, - pageviews: 10, + visitors: 20, + pageviews: 30, time_on_page: 300, page: "/imported-only", - scroll_depth: 100 + scroll_depth: 100, + pageleave_visitors: 10 ) ]) @@ -742,10 +745,10 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do assert json_response(conn, 200)["results"] == [ %{ "name" => "/native-and-imported", - "visitors" => 4, - "pageviews" => 4, + "visitors" => 5, + "pageviews" => 5, "bounce_rate" => 0, - "time_on_page" => 60, + "time_on_page" => 48, "scroll_depth" => 50 }, %{ @@ -758,10 +761,10 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do }, %{ "name" => "/imported-only", - "visitors" => 10, - "pageviews" => 10, + "visitors" => 20, + "pageviews" => 30, "bounce_rate" => 0, - "time_on_page" => 30.0, + "time_on_page" => 10.0, "scroll_depth" => 10 } ] @@ -778,7 +781,8 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do visitors: 10, pageviews: 10, page: "/blog", - scroll_depth: 100 + scroll_depth: 100, + pageleave_visitors: 10 ), build(:imported_pages, date: ~D[2020-01-01], diff --git a/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs b/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs index 8060f3b5f29a..107be258178c 100644 --- a/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs @@ -1000,7 +1000,13 @@ defmodule PlausibleWeb.Api.StatsController.TopStatsTest do build(:pageleave, user_id: 123, timestamp: ~N[2021-01-01 00:00:20], scroll_depth: 60), build(:pageview, user_id: 456, timestamp: ~N[2021-01-01 00:00:00]), build(:pageleave, user_id: 456, timestamp: ~N[2021-01-01 00:00:10], scroll_depth: 80), - build(:imported_pages, page: "/", date: ~D[2021-01-01], visitors: 8, scroll_depth: 410), + build(:imported_pages, + page: "/", + date: ~D[2021-01-01], + visitors: 8, + scroll_depth: 410, + pageleave_visitors: 8 + ), build(:imported_pages, page: "/", date: ~D[2021-01-02], visitors: 100, scroll_depth: nil) ]) From 7b5eeeba0ff50f9df589280e8983dbdbfdbee20f Mon Sep 17 00:00:00 2001 From: Robert Joonas Date: Mon, 6 Jan 2025 17:00:24 +0000 Subject: [PATCH 13/14] extract base_q in export_pages_q --- lib/plausible/exports.ex | 42 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/lib/plausible/exports.ex b/lib/plausible/exports.ex index 9d8d31d3c167..46255d628cce 100644 --- a/lib/plausible/exports.ex +++ b/lib/plausible/exports.ex @@ -421,6 +421,14 @@ defmodule Plausible.Exports do scroll_depth_enabled? = PlausibleWeb.Api.StatsController.scroll_depth_enabled?(site, current_user) + base_q = + from(e in sampled("events_v2"), + where: ^export_filter(site_id, date_range), + where: [name: "pageview"], + group_by: [selected_as(:date), selected_as(:page)], + order_by: selected_as(:date) + ) + if scroll_depth_enabled? do max_scroll_depth_per_visitor_q = from(e in "events_v2", @@ -451,9 +459,7 @@ defmodule Plausible.Exports do group_by: [:date, :page] ) - from(e in sampled("events_v2"), - where: ^export_filter(site_id, date_range), - where: [name: "pageview"], + from(e in base_q, left_join: s in subquery(scroll_depth_q), on: s.date == selected_as(:date) and s.page == selected_as(:page), select: [ @@ -470,25 +476,19 @@ defmodule Plausible.Exports do selected_as(fragment("any(?)", s.pageleave_visitors), :pageleave_visitors) ] ) - |> group_by([e], [selected_as(:date), selected_as(:page)]) - |> order_by([e], selected_as(:date)) else - from e in sampled("events_v2"), - where: ^export_filter(site_id, date_range), - where: [name: "pageview"], - group_by: [selected_as(:date), e.pathname], - order_by: selected_as(:date), - select: [ - date(e.timestamp, ^timezone), - selected_as(fragment("any(?)", e.hostname), :hostname), - selected_as(e.pathname, :page), - selected_as( - fragment("toUInt64(round(uniq(?)*any(_sample_factor)))", e.session_id), - :visits - ), - visitors(e), - selected_as(fragment("toUInt64(round(count()*any(_sample_factor)))"), :pageviews) - ] + base_q + |> select([e], [ + date(e.timestamp, ^timezone), + selected_as(fragment("any(?)", e.hostname), :hostname), + selected_as(e.pathname, :page), + selected_as( + fragment("toUInt64(round(uniq(?)*any(_sample_factor)))", e.session_id), + :visits + ), + visitors(e), + selected_as(fragment("toUInt64(round(count()*any(_sample_factor)))"), :pageviews) + ]) end end From 3203218906a472873e657dab75bcdf851d4dd8c1 Mon Sep 17 00:00:00 2001 From: Robert Joonas Date: Mon, 6 Jan 2025 17:22:00 +0000 Subject: [PATCH 14/14] rename total_visitors to pageleave_visitors --- lib/plausible/stats/imported/sql/expression.ex | 4 ++-- lib/plausible/stats/sql/special_metrics.ex | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/plausible/stats/imported/sql/expression.ex b/lib/plausible/stats/imported/sql/expression.ex index e01d59543a22..d0fbfd4bf711 100644 --- a/lib/plausible/stats/imported/sql/expression.ex +++ b/lib/plausible/stats/imported/sql/expression.ex @@ -118,7 +118,7 @@ defmodule Plausible.Stats.Imported.SQL.Expression do defp select_metric(:scroll_depth, "imported_pages") do wrap_alias([i], %{ scroll_depth_sum: sum(i.scroll_depth), - total_visitors: sum(i.pageleave_visitors) + pageleave_visitors: sum(i.pageleave_visitors) }) end @@ -366,7 +366,7 @@ defmodule Plausible.Stats.Imported.SQL.Expression do q |> select_merge_as([s, i], %{ __internal_scroll_depth_sum: i.scroll_depth_sum, - __internal_total_visitors: i.total_visitors + __internal_pageleave_visitors: i.pageleave_visitors }) |> select_joined_metrics(rest) end diff --git a/lib/plausible/stats/sql/special_metrics.ex b/lib/plausible/stats/sql/special_metrics.ex index 54f7703be163..15eef2bed981 100644 --- a/lib/plausible/stats/sql/special_metrics.ex +++ b/lib/plausible/stats/sql/special_metrics.ex @@ -155,7 +155,7 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do |> select([p], %{ scroll_depth_sum: fragment("if(count(?) = 0, NULL, sum(?))", p.user_id, p.max_scroll_depth), - total_visitors: fragment("count(?)", p.user_id) + pageleave_visitors: fragment("count(?)", p.user_id) }) |> select_merge(^dim_select) |> group_by(^dim_group_by) @@ -195,16 +195,16 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do s.scroll_depth_sum, selected_as(:__internal_scroll_depth_sum), s.scroll_depth_sum, - selected_as(:__internal_total_visitors), - s.total_visitors, + selected_as(:__internal_pageleave_visitors), + s.pageleave_visitors, # Case 2: Only imported scroll depth sum is present selected_as(:__internal_scroll_depth_sum), selected_as(:__internal_scroll_depth_sum), - selected_as(:__internal_total_visitors), + selected_as(:__internal_pageleave_visitors), # Case 3: Only native scroll depth sum is present s.scroll_depth_sum, s.scroll_depth_sum, - s.total_visitors + s.pageleave_visitors ) }) else @@ -213,9 +213,9 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do scroll_depth: fragment( "if(any(?) > 0, toUInt8(round(any(?) / any(?))), NULL)", - s.total_visitors, + s.pageleave_visitors, s.scroll_depth_sum, - s.total_visitors + s.pageleave_visitors ) }) end