diff --git a/lib/plausible/exports.ex b/lib/plausible/exports.ex index 15019443756d..46255d628cce 100644 --- a/lib/plausible/exports.ex +++ b/lib/plausible/exports.ex @@ -8,13 +8,15 @@ defmodule Plausible.Exports do import Ecto.Query @doc "Schedules CSV export job to S3 storage" - @spec schedule_s3_export(pos_integer, String.t()) :: {:ok, Oban.Job.t()} | {:error, :no_data} - def schedule_s3_export(site_id, email_to) do + @spec schedule_s3_export(pos_integer, pos_integer | nil, String.t()) :: + {:ok, Oban.Job.t()} | {:error, :no_data} + def schedule_s3_export(site_id, current_user_id, email_to) do with :ok <- ensure_has_data(site_id) do args = %{ "storage" => "s3", "site_id" => site_id, "email_to" => email_to, + "current_user_id" => current_user_id, "s3_bucket" => Plausible.S3.exports_bucket(), "s3_path" => s3_export_key(site_id) } @@ -207,13 +209,13 @@ defmodule Plausible.Exports do Builds Ecto queries to export data from `events_v2` and `sessions_v2` tables into the format of `imported_*` tables for a website. """ - @spec export_queries(pos_integer, + @spec export_queries(pos_integer, pos_integer | nil, extname: String.t(), date_range: Date.Range.t(), timezone: String.t() ) :: %{String.t() => Ecto.Query.t()} - def export_queries(site_id, opts \\ []) do + def export_queries(site_id, current_user_id, opts \\ []) do extname = opts[:extname] || ".csv" date_range = opts[:date_range] timezone = opts[:timezone] || "UTC" @@ -232,7 +234,8 @@ defmodule Plausible.Exports do %{ filename.("imported_visitors") => export_visitors_q(site_id, timezone, date_range), filename.("imported_sources") => export_sources_q(site_id, timezone, date_range), - filename.("imported_pages") => export_pages_q(site_id, timezone, date_range), + filename.("imported_pages") => + export_pages_q(site_id, current_user_id, timezone, date_range), filename.("imported_entry_pages") => export_entry_pages_q(site_id, timezone, date_range), filename.("imported_exit_pages") => export_exit_pages_q(site_id, timezone, date_range), filename.("imported_custom_events") => @@ -411,13 +414,71 @@ defmodule Plausible.Exports do ] end - defp export_pages_q(site_id, timezone, date_range) do - from e in sampled("events_v2"), - where: ^export_filter(site_id, date_range), - where: [name: "pageview"], - group_by: [selected_as(:date), e.pathname], - order_by: selected_as(:date), - select: [ + defp export_pages_q(site_id, current_user_id, timezone, date_range) do + site = Plausible.Repo.get(Plausible.Site, site_id) + current_user = current_user_id && Plausible.Repo.get(Plausible.Auth.User, current_user_id) + + scroll_depth_enabled? = + PlausibleWeb.Api.StatsController.scroll_depth_enabled?(site, current_user) + + base_q = + from(e in sampled("events_v2"), + where: ^export_filter(site_id, date_range), + where: [name: "pageview"], + group_by: [selected_as(:date), selected_as(:page)], + order_by: selected_as(:date) + ) + + if scroll_depth_enabled? do + max_scroll_depth_per_visitor_q = + from(e in "events_v2", + where: ^export_filter(site_id, date_range), + where: e.name == "pageleave" and e.scroll_depth <= 100, + select: %{ + date: date(e.timestamp, ^timezone), + page: selected_as(e.pathname, :page), + user_id: e.user_id, + max_scroll_depth: max(e.scroll_depth) + }, + group_by: [e.user_id, selected_as(:date), selected_as(:page)] + ) + + scroll_depth_q = + from(p in subquery(max_scroll_depth_per_visitor_q), + select: %{ + date: p.date, + page: p.page, + scroll_depth: + fragment( + "if(isNull(sum(?)), NULL, toUInt64(sum(?)))", + p.max_scroll_depth, + p.max_scroll_depth + ), + pageleave_visitors: count(p.user_id) + }, + group_by: [:date, :page] + ) + + from(e in base_q, + left_join: s in subquery(scroll_depth_q), + on: s.date == selected_as(:date) and s.page == selected_as(:page), + select: [ + date(e.timestamp, ^timezone), + selected_as(fragment("any(?)", e.hostname), :hostname), + selected_as(e.pathname, :page), + selected_as( + fragment("toUInt64(round(uniq(?)*any(_sample_factor)))", e.session_id), + :visits + ), + visitors(e), + selected_as(fragment("toUInt64(round(count()*any(_sample_factor)))"), :pageviews), + selected_as(fragment("any(?)", s.scroll_depth), :scroll_depth), + selected_as(fragment("any(?)", s.pageleave_visitors), :pageleave_visitors) + ] + ) + else + base_q + |> select([e], [ date(e.timestamp, ^timezone), selected_as(fragment("any(?)", e.hostname), :hostname), selected_as(e.pathname, :page), @@ -427,7 +488,8 @@ defmodule Plausible.Exports do ), visitors(e), selected_as(fragment("toUInt64(round(count()*any(_sample_factor)))"), :pageviews) - ] + ]) + end end defp export_entry_pages_q(site_id, timezone, date_range) do diff --git a/lib/plausible/imported/csv_importer.ex b/lib/plausible/imported/csv_importer.ex index ff763e304b8f..277a9dd8da07 100644 --- a/lib/plausible/imported/csv_importer.ex +++ b/lib/plausible/imported/csv_importer.ex @@ -161,7 +161,7 @@ defmodule Plausible.Imported.CSVImporter do "imported_operating_systems" => "date Date, operating_system String, operating_system_version String, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32, pageviews UInt64", "imported_pages" => - "date Date, hostname String, page String, visits UInt64, visitors UInt64, pageviews UInt64", + "date Date, hostname String, page String, visits UInt64, visitors UInt64, pageviews UInt64, scroll_depth Nullable(UInt64), pageleave_visitors UInt64", "imported_sources" => "date Date, source String, referrer String, utm_source String, utm_medium String, utm_campaign String, utm_content String, utm_term String, pageviews UInt64, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32", "imported_visitors" => diff --git a/lib/plausible/imported/page.ex b/lib/plausible/imported/page.ex index 4720bef4f4cc..d4ba204a3484 100644 --- a/lib/plausible/imported/page.ex +++ b/lib/plausible/imported/page.ex @@ -15,5 +15,7 @@ defmodule Plausible.Imported.Page do field :pageviews, Ch, type: "UInt64" field :exits, Ch, type: "UInt64" field :time_on_page, Ch, type: "UInt64" + field :scroll_depth, Ch, type: "Nullable(UInt64)" + field :pageleave_visitors, Ch, type: "UInt64" end end diff --git a/lib/plausible/stats/imported/imported.ex b/lib/plausible/stats/imported/imported.ex index b05e1d2112e0..9f7b038595d7 100644 --- a/lib/plausible/stats/imported/imported.ex +++ b/lib/plausible/stats/imported/imported.ex @@ -345,6 +345,7 @@ defmodule Plausible.Stats.Imported do defp can_order_by?(query) do Enum.all?(query.order_by, fn + {:scroll_depth, _} -> false {metric, _direction} when is_atom(metric) -> metric in query.metrics _ -> true end) diff --git a/lib/plausible/stats/imported/sql/expression.ex b/lib/plausible/stats/imported/sql/expression.ex index 057bc22d695a..d0fbfd4bf711 100644 --- a/lib/plausible/stats/imported/sql/expression.ex +++ b/lib/plausible/stats/imported/sql/expression.ex @@ -115,6 +115,13 @@ defmodule Plausible.Stats.Imported.SQL.Expression do wrap_alias([i], %{pageviews: sum(i.pageviews), __internal_visits: sum(i.visits)}) end + defp select_metric(:scroll_depth, "imported_pages") do + wrap_alias([i], %{ + scroll_depth_sum: sum(i.scroll_depth), + pageleave_visitors: sum(i.pageleave_visitors) + }) + end + defp select_metric(_metric, _table), do: %{} def group_imported_by(q, query) do @@ -351,6 +358,19 @@ defmodule Plausible.Stats.Imported.SQL.Expression do |> select_joined_metrics(rest) end + # The final `scroll_depth` gets selected at a later querybuilding step + # (in `Plausible.Stats.SQL.SpecialMetrics.add/3`). But in order to avoid + # having to join with imported data there again, we select the required + # information from imported data here already. + def select_joined_metrics(q, [:scroll_depth | rest]) do + q + |> select_merge_as([s, i], %{ + __internal_scroll_depth_sum: i.scroll_depth_sum, + __internal_pageleave_visitors: i.pageleave_visitors + }) + |> select_joined_metrics(rest) + end + # Ignored as it's calculated separately def select_joined_metrics(q, [metric | rest]) when metric in [:conversion_rate, :group_conversion_rate, :percentage] do diff --git a/lib/plausible/stats/sql/special_metrics.ex b/lib/plausible/stats/sql/special_metrics.ex index b4a615a1cc83..15eef2bed981 100644 --- a/lib/plausible/stats/sql/special_metrics.ex +++ b/lib/plausible/stats/sql/special_metrics.ex @@ -150,15 +150,12 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do dim_shortnames |> Enum.map(fn dim -> dynamic([p], field(p, ^dim)) end) - scroll_depth_q = + scroll_depth_sum_q = subquery(max_per_visitor_q) |> select([p], %{ - scroll_depth: - fragment( - "if(isFinite(avg(?)), toUInt8(round(avg(?))), NULL)", - p.max_scroll_depth, - p.max_scroll_depth - ) + scroll_depth_sum: + fragment("if(count(?) = 0, NULL, sum(?))", p.user_id, p.max_scroll_depth), + pageleave_visitors: fragment("count(?)", p.user_id) }) |> select_merge(^dim_select) |> group_by(^dim_group_by) @@ -173,9 +170,55 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do |> Enum.reduce(fn condition, acc -> dynamic([], ^acc and ^condition) end) end - q - |> join(:left, [e], s in subquery(scroll_depth_q), on: ^join_on_dim_condition) - |> select_merge_as([_e, ..., s], %{scroll_depth: fragment("any(?)", s.scroll_depth)}) + joined_q = + join(q, :left, [e], s in subquery(scroll_depth_sum_q), on: ^join_on_dim_condition) + + if query.include_imported do + joined_q + |> select_merge_as([..., s], %{ + scroll_depth: + fragment( + """ + case + when isNotNull(?) AND isNotNull(?) then + toUInt8(round((? + ?) / (? + ?))) + when isNotNull(?) then + toUInt8(round(? / ?)) + when isNotNull(?) then + toUInt8(round(? / ?)) + else + NULL + end + """, + # Case 1: Both imported and native scroll depth sums are present + selected_as(:__internal_scroll_depth_sum), + s.scroll_depth_sum, + selected_as(:__internal_scroll_depth_sum), + s.scroll_depth_sum, + selected_as(:__internal_pageleave_visitors), + s.pageleave_visitors, + # Case 2: Only imported scroll depth sum is present + selected_as(:__internal_scroll_depth_sum), + selected_as(:__internal_scroll_depth_sum), + selected_as(:__internal_pageleave_visitors), + # Case 3: Only native scroll depth sum is present + s.scroll_depth_sum, + s.scroll_depth_sum, + s.pageleave_visitors + ) + }) + else + joined_q + |> select_merge_as([..., s], %{ + scroll_depth: + fragment( + "if(any(?) > 0, toUInt8(round(any(?) / any(?))), NULL)", + s.pageleave_visitors, + s.scroll_depth_sum, + s.pageleave_visitors + ) + }) + end else q end diff --git a/lib/plausible_web/controllers/api/stats_controller.ex b/lib/plausible_web/controllers/api/stats_controller.ex index a4261e2adfaf..1ab8792499c7 100644 --- a/lib/plausible_web/controllers/api/stats_controller.ex +++ b/lib/plausible_web/controllers/api/stats_controller.ex @@ -393,15 +393,16 @@ defmodule PlausibleWeb.Api.StatsController do defp fetch_other_top_stats(site, query, current_user) do page_filter? = Filters.filtering_on_dimension?(query, "event:page") + scroll_depth_enabled? = scroll_depth_enabled?(site, current_user) metrics = [:visitors, :visits, :pageviews, :sample_percent] metrics = cond do - page_filter? && query.include_imported -> - metrics + page_filter? && scroll_depth_enabled? && query.include_imported -> + metrics ++ [:scroll_depth] - page_filter? && scroll_depth_enabled?(site, current_user) -> + page_filter? && scroll_depth_enabled? -> metrics ++ [:bounce_rate, :scroll_depth, :time_on_page] page_filter? -> @@ -831,7 +832,7 @@ defmodule PlausibleWeb.Api.StatsController do params = Map.put(params, "property", "event:page") query = Query.from(site, params, debug_metadata(conn)) - include_scroll_depth? = !query.include_imported && scroll_depth_enabled?(site, current_user) + include_scroll_depth? = scroll_depth_enabled?(site, current_user) extra_metrics = cond do diff --git a/lib/plausible_web/live/csv_export.ex b/lib/plausible_web/live/csv_export.ex index f76741dc7412..bdf2b1c6e402 100644 --- a/lib/plausible_web/live/csv_export.ex +++ b/lib/plausible_web/live/csv_export.ex @@ -210,11 +210,12 @@ defmodule PlausibleWeb.Live.CSVExport do @impl true def handle_event("export", _params, socket) do - %{storage: storage, site_id: site_id, email_to: email_to} = socket.assigns + %{storage: storage, site_id: site_id, email_to: email_to, current_user: current_user} = + socket.assigns schedule_result = case storage do - "s3" -> Exports.schedule_s3_export(site_id, email_to) + "s3" -> Exports.schedule_s3_export(site_id, current_user.id, email_to) "local" -> Exports.schedule_local_export(site_id, email_to) end diff --git a/lib/workers/export_analytics.ex b/lib/workers/export_analytics.ex index 51a1c0af06cb..d5da9f05d0d2 100644 --- a/lib/workers/export_analytics.ex +++ b/lib/workers/export_analytics.ex @@ -29,11 +29,13 @@ defmodule Plausible.Workers.ExportAnalytics do "site_id" => site_id } = args + current_user_id = args["current_user_id"] + site = Plausible.Repo.get!(Plausible.Site, site_id) %Date.Range{} = date_range = Exports.date_range(site.id, site.timezone) queries = - Exports.export_queries(site_id, + Exports.export_queries(site_id, current_user_id, date_range: date_range, timezone: site.timezone, extname: ".csv" diff --git a/test/plausible/exports_test.exs b/test/plausible/exports_test.exs index eebc8d1136a4..9c320235ff54 100644 --- a/test/plausible/exports_test.exs +++ b/test/plausible/exports_test.exs @@ -7,7 +7,7 @@ defmodule Plausible.ExportsTest do describe "export_queries/2" do test "returns named ecto queries" do - queries = Plausible.Exports.export_queries(_site_id = 1) + queries = Plausible.Exports.export_queries(_site_id = 1, nil) assert queries |> Map.values() |> Enum.all?(&match?(%Ecto.Query{}, &1)) assert Map.keys(queries) == [ @@ -26,7 +26,7 @@ defmodule Plausible.ExportsTest do test "with date range" do queries = - Plausible.Exports.export_queries(_site_id = 1, + Plausible.Exports.export_queries(_site_id = 1, nil, date_range: Date.range(~D[2023-01-01], ~D[2024-03-12]) ) @@ -45,10 +45,7 @@ defmodule Plausible.ExportsTest do end test "with custom extension" do - queries = - Plausible.Exports.export_queries(_site_id = 1, - extname: ".ch" - ) + queries = Plausible.Exports.export_queries(_site_id = 1, nil, extname: ".ch") assert Map.keys(queries) == [ "imported_browsers.ch", diff --git a/test/plausible/imported/csv_importer_test.exs b/test/plausible/imported/csv_importer_test.exs index 7e5831a952b2..efc4b05443ce 100644 --- a/test/plausible/imported/csv_importer_test.exs +++ b/test/plausible/imported/csv_importer_test.exs @@ -363,6 +363,71 @@ defmodule Plausible.Imported.CSVImporterTest do assert Plausible.Stats.Clickhouse.imported_pageview_count(site) == 99 end + test "imports scroll_depth as null when the column does not exist in pages CSV", + %{site: site, user: user} = ctx do + _ = ctx + + pages_csv = + %{ + name: "imported_pages_20211230_20220101.csv", + body: """ + "date","visitors","pageviews","hostname","page" + "2021-12-30",1,1,"lucky.numbers.com","/14776416252794997127" + "2021-12-30",1,1,"lucky.numbers.com","/14776416252794997127" + "2021-12-30",6,6,"lucky.numbers.com","/14776416252794997127" + "2021-12-30",1,1,"lucky.numbers.com","/9102354072466236765" + "2021-12-30",1,1,"lucky.numbers.com","/7478911940502018071" + "2021-12-30",1,1,"lucky.numbers.com","/6402607186523575652" + "2021-12-30",2,2,"lucky.numbers.com","/9962503789684934900" + "2021-12-30",8,10,"lucky.numbers.com","/13595620304963848161" + "2021-12-30",2,2,"lucky.numbers.com","/17019199732013993436" + "2021-12-30",32,33,"lucky.numbers.com","/9874837495456455794" + "2021-12-31",4,4,"lucky.numbers.com","/14776416252794997127" + "2021-12-31",1,1,"lucky.numbers.com","/8738789417178304429" + "2021-12-31",1,1,"lucky.numbers.com","/7445073500314667742" + "2021-12-31",1,1,"lucky.numbers.com","/4897404798407749335" + "2021-12-31",1,2,"lucky.numbers.com","/11263893625781431659" + "2022-01-01",2,2,"lucky.numbers.com","/5878724061840196349" + """ + } + + uploads = + on_ee do + %{s3_url: s3_url} = Plausible.S3.import_presign_upload(site.id, pages_csv.name) + [bucket, key] = String.split(URI.parse(s3_url).path, "/", parts: 2) + ExAws.request!(ExAws.S3.put_object(bucket, key, pages_csv.body)) + %{"filename" => pages_csv.name, "s3_url" => s3_url} + else + local_path = Path.join(ctx.tmp_dir, pages_csv.name) + File.write!(local_path, pages_csv.body) + %{"filename" => pages_csv.name, "local_path" => local_path} + end + |> List.wrap() + + date_range = CSVImporter.date_range(uploads) + + {:ok, _job} = + CSVImporter.new_import(site, user, + start_date: date_range.first, + end_date: date_range.last, + uploads: uploads, + storage: on_ee(do: "s3", else: "local") + ) + + assert %{success: 1} = Oban.drain_queue(queue: :analytics_imports, with_safety?: false) + + assert %SiteImport{ + start_date: ~D[2021-12-30], + end_date: ~D[2022-01-01], + source: :csv, + status: :completed + } = Repo.get_by!(SiteImport, site_id: site.id) + + q = from(i in "imported_pages", where: i.site_id == ^site.id, select: i.scroll_depth) + + assert List.duplicate(nil, 16) == Plausible.IngestRepo.all(q) + end + test "accepts cells without quotes", %{site: site, user: user} = ctx do _ = ctx @@ -524,7 +589,8 @@ defmodule Plausible.Imported.CSVImporterTest do # export archive to s3 on_ee do - assert {:ok, _job} = Plausible.Exports.schedule_s3_export(exported_site.id, user.email) + assert {:ok, _job} = + Plausible.Exports.schedule_s3_export(exported_site.id, nil, user.email) else assert {:ok, %{args: %{"local_path" => local_path}}} = Plausible.Exports.schedule_local_export(exported_site.id, user.email) @@ -1016,6 +1082,152 @@ defmodule Plausible.Imported.CSVImporterTest do assert exported["conversion_rate"] == imported["conversion_rate"] end) end + + @tag :tmp_dir + test "scroll_depth", %{conn: conn, user: user, tmp_dir: tmp_dir} do + exported_site = new_site(owner: user) + imported_site = new_site(owner: user) + + t0 = ~N[2020-01-01 00:00:00] + [t1, t2, t3] = for i <- 1..3, do: NaiveDateTime.add(t0, i, :minute) + + stats = + [ + build(:pageview, user_id: 12, pathname: "/blog", timestamp: t0), + build(:pageleave, user_id: 12, pathname: "/blog", timestamp: t1, scroll_depth: 20), + build(:pageview, user_id: 12, pathname: "/another", timestamp: t1), + build(:pageleave, user_id: 12, pathname: "/another", timestamp: t2, scroll_depth: 24), + build(:pageview, user_id: 34, pathname: "/blog", timestamp: t0), + build(:pageleave, user_id: 34, pathname: "/blog", timestamp: t1, scroll_depth: 17), + build(:pageview, user_id: 34, pathname: "/another", timestamp: t1), + build(:pageleave, user_id: 34, pathname: "/another", timestamp: t2, scroll_depth: 26), + build(:pageview, user_id: 34, pathname: "/blog", timestamp: t2), + build(:pageleave, user_id: 34, pathname: "/blog", timestamp: t3, scroll_depth: 60), + build(:pageview, user_id: 56, pathname: "/blog", timestamp: t0), + build(:pageleave, user_id: 56, pathname: "/blog", timestamp: t1, scroll_depth: 100), + build(:pageview, user_id: 78, pathname: "/", timestamp: t0), + build(:pageleave, user_id: 78, pathname: "/", timestamp: t1, scroll_depth: 20), + build(:pageview, pathname: "/", timestamp: t1), + build(:pageview, pathname: "/blog", timestamp: NaiveDateTime.add(t0, 1, :day)) + ] + |> Enum.map(fn event -> Map.put(event, :hostname, "csv.test") end) + + populate_stats(exported_site, stats) + + # export archive to s3 + on_ee do + assert {:ok, _job} = + Plausible.Exports.schedule_s3_export(exported_site.id, nil, user.email) + else + assert {:ok, %{args: %{"local_path" => local_path}}} = + Plausible.Exports.schedule_local_export(exported_site.id, user.email) + end + + assert %{success: 1} = Oban.drain_queue(queue: :analytics_exports, with_safety: false) + + # download archive + on_ee do + ExAws.request!( + ExAws.S3.download_file( + Plausible.S3.exports_bucket(), + to_string(exported_site.id), + Path.join(tmp_dir, "plausible-export.zip") + ) + ) + else + File.rename!(local_path, Path.join(tmp_dir, "plausible-export.zip")) + end + + # unzip archive + {:ok, files} = + :zip.unzip(to_charlist(Path.join(tmp_dir, "plausible-export.zip")), cwd: tmp_dir) + + # upload csvs + uploads = + Enum.map(files, fn file -> + on_ee do + %{s3_url: s3_url} = Plausible.S3.import_presign_upload(imported_site.id, file) + [bucket, key] = String.split(URI.parse(s3_url).path, "/", parts: 2) + content = File.read!(file) + ExAws.request!(ExAws.S3.put_object(bucket, key, content)) + %{"filename" => Path.basename(file), "s3_url" => s3_url} + else + %{ + "filename" => Path.basename(file), + "local_path" => file + } + end + end) + + # run importer + date_range = CSVImporter.date_range(uploads) + + {:ok, _job} = + CSVImporter.new_import(imported_site, user, + start_date: date_range.first, + end_date: date_range.last, + uploads: uploads, + storage: on_ee(do: "s3", else: "local") + ) + + assert %{success: 1} = Oban.drain_queue(queue: :analytics_imports, with_safety: false) + + # validate import + assert %SiteImport{ + start_date: ~D[2020-01-01], + end_date: ~D[2020-01-02], + source: :csv, + status: :completed + } = Repo.get_by!(SiteImport, site_id: imported_site.id) + + assert Plausible.Stats.Clickhouse.imported_pageview_count(exported_site) == 0 + assert Plausible.Stats.Clickhouse.imported_pageview_count(imported_site) == 9 + + # assert on the actual rows that got imported into the imported_pages table + imported_data = + from(i in "imported_pages", + where: i.site_id == ^imported_site.id, + select: %{ + date: i.date, + page: i.page, + scroll_depth: i.scroll_depth, + pageleave_visitors: i.pageleave_visitors + } + ) + |> Plausible.IngestRepo.all() + + assert %{date: ~D[2020-01-01], page: "/", scroll_depth: 20, pageleave_visitors: 1} in imported_data + + assert %{date: ~D[2020-01-01], page: "/another", scroll_depth: 50, pageleave_visitors: 2} in imported_data + + assert %{date: ~D[2020-01-01], page: "/blog", scroll_depth: 180, pageleave_visitors: 3} in imported_data + + assert %{date: ~D[2020-01-02], page: "/blog", scroll_depth: nil, pageleave_visitors: 0} in imported_data + + # assert via stats queries that scroll_depth from imported + # data matches the scroll_depth from native data + expected_results = [ + %{"dimensions" => ["/"], "metrics" => [20]}, + %{"dimensions" => ["/another"], "metrics" => [25]}, + %{"dimensions" => ["/blog"], "metrics" => [60]} + ] + + query_scroll_depth_per_page = fn conn, site -> + post(conn, "/api/v2/query-internal-test", %{ + "site_id" => site.domain, + "metrics" => ["scroll_depth"], + "date_range" => "all", + "order_by" => [["scroll_depth", "asc"]], + "include" => %{"imports" => true}, + "dimensions" => ["event:page"] + }) + |> json_response(200) + |> Map.fetch!("results") + end + + assert query_scroll_depth_per_page.(conn, exported_site) == expected_results + assert query_scroll_depth_per_page.(conn, imported_site) == expected_results + end end defp clean_buckets(_context) do diff --git a/test/plausible_web/controllers/api/stats_controller/imported_test.exs b/test/plausible_web/controllers/api/stats_controller/imported_test.exs index 4e36ba8effea..048fc2ccbf57 100644 --- a/test/plausible_web/controllers/api/stats_controller/imported_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/imported_test.exs @@ -882,6 +882,7 @@ defmodule PlausibleWeb.Api.StatsController.ImportedTest do "time_on_page" => 40, "visitors" => 3, "pageviews" => 4, + "scroll_depth" => nil, "name" => "/some-other-page" }, %{ @@ -889,6 +890,7 @@ defmodule PlausibleWeb.Api.StatsController.ImportedTest do "time_on_page" => 800.0, "visitors" => 2, "pageviews" => 2, + "scroll_depth" => nil, "name" => "/" } ] diff --git a/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs b/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs index 47704e616735..38a05f725bc3 100644 --- a/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs @@ -633,6 +633,51 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do assert plot == [40, 20, 0, 0, 0, 0, 0] end + + test "returns scroll depth per day with imported data", %{conn: conn, site: site} do + site_import = insert(:site_import, site: site) + + populate_stats(site, site_import.id, [ + # 2020-01-01 - only native data + build(:pageview, user_id: 12, timestamp: ~N[2020-01-01 00:00:00]), + build(:pageleave, user_id: 12, timestamp: ~N[2020-01-01 00:01:00], scroll_depth: 20), + build(:pageview, user_id: 34, timestamp: ~N[2020-01-01 00:00:00]), + build(:pageleave, user_id: 34, timestamp: ~N[2020-01-01 00:01:00], scroll_depth: 17), + build(:pageview, user_id: 34, timestamp: ~N[2020-01-01 00:02:00]), + build(:pageleave, user_id: 34, timestamp: ~N[2020-01-01 00:03:00], scroll_depth: 60), + # 2020-01-02 - both imported and native data + build(:pageview, user_id: 56, timestamp: ~N[2020-01-02 00:00:00]), + build(:pageleave, user_id: 56, timestamp: ~N[2020-01-02 00:01:00], scroll_depth: 20), + build(:imported_pages, + date: ~D[2020-01-02], + page: "/", + visitors: 1, + scroll_depth: 40, + pageleave_visitors: 1 + ), + # 2020-01-03 - only imported data + build(:imported_pages, + date: ~D[2020-01-03], + page: "/", + visitors: 1, + scroll_depth: 90, + pageleave_visitors: 1 + ), + build(:imported_pages, date: ~D[2020-01-03], page: "/", visitors: 100, scroll_depth: nil) + ]) + + filters = Jason.encode!([[:is, "event:page", ["/"]]]) + + conn = + get( + conn, + "/api/stats/#{site.domain}/main-graph?period=7d&date=2020-01-07&metric=scroll_depth&filters=#{filters}&with_imported=true" + ) + + assert %{"plot" => plot} = json_response(conn, 200) + + assert plot == [40, 30, 90, 0, 0, 0, 0] + end end describe "GET /api/stats/main-graph - conversion_rate plot" do diff --git a/test/plausible_web/controllers/api/stats_controller/pages_test.exs b/test/plausible_web/controllers/api/stats_controller/pages_test.exs index 9661f1edd9af..6a70985668b3 100644 --- a/test/plausible_web/controllers/api/stats_controller/pages_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/pages_test.exs @@ -648,6 +648,169 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do ] end + test "calculates scroll_depth from native and imported data combined", %{ + conn: conn, + site: site + } do + populate_stats(site, [ + build(:pageview, user_id: @user_id, pathname: "/blog", timestamp: ~N[2020-01-01 00:00:00]), + build(:pageleave, + user_id: @user_id, + pathname: "/blog", + timestamp: ~N[2020-01-01 00:00:00], + scroll_depth: 80 + ), + build(:imported_pages, + date: ~D[2020-01-01], + visitors: 3, + pageviews: 3, + time_on_page: 90, + page: "/blog", + scroll_depth: 120, + pageleave_visitors: 3 + ) + ]) + + conn = + get( + conn, + "/api/stats/#{site.domain}/pages?period=day&date=2020-01-01&detailed=true&with_imported=true&order_by=#{Jason.encode!([["scroll_depth", "desc"]])}" + ) + + assert json_response(conn, 200)["results"] == [ + %{ + "name" => "/blog", + "visitors" => 4, + "pageviews" => 4, + "bounce_rate" => 100, + "time_on_page" => 30.0, + "scroll_depth" => 50 + } + ] + end + + test "handles missing scroll_depth data from native and imported sources", %{ + conn: conn, + site: site + } do + populate_stats(site, [ + build(:pageview, + user_id: @user_id, + pathname: "/native-and-imported", + timestamp: ~N[2020-01-01 00:00:00] + ), + build(:pageleave, + user_id: @user_id, + pathname: "/native-and-imported", + timestamp: ~N[2020-01-01 00:01:00], + scroll_depth: 80 + ), + build(:pageview, + user_id: @user_id, + pathname: "/native-only", + timestamp: ~N[2020-01-01 00:01:00] + ), + build(:pageleave, + user_id: @user_id, + pathname: "/native-only", + timestamp: ~N[2020-01-01 00:02:00], + scroll_depth: 40 + ), + build(:imported_pages, + date: ~D[2020-01-01], + visitors: 4, + pageviews: 4, + time_on_page: 180, + page: "/native-and-imported", + scroll_depth: 120, + pageleave_visitors: 3 + ), + build(:imported_pages, + date: ~D[2020-01-01], + visitors: 20, + pageviews: 30, + time_on_page: 300, + page: "/imported-only", + scroll_depth: 100, + pageleave_visitors: 10 + ) + ]) + + conn = + get( + conn, + "/api/stats/#{site.domain}/pages?period=day&date=2020-01-01&detailed=true&with_imported=true&order_by=#{Jason.encode!([["scroll_depth", "desc"]])}" + ) + + assert json_response(conn, 200)["results"] == [ + %{ + "name" => "/native-and-imported", + "visitors" => 5, + "pageviews" => 5, + "bounce_rate" => 0, + "time_on_page" => 48, + "scroll_depth" => 50 + }, + %{ + "name" => "/native-only", + "visitors" => 1, + "pageviews" => 1, + "bounce_rate" => 0, + "time_on_page" => nil, + "scroll_depth" => 40 + }, + %{ + "name" => "/imported-only", + "visitors" => 20, + "pageviews" => 30, + "bounce_rate" => 0, + "time_on_page" => 10.0, + "scroll_depth" => 10 + } + ] + end + + test "can query scroll depth only from imported data, ignoring rows where scroll depth doesn't exist", + %{ + conn: conn, + site: site + } do + populate_stats(site, [ + build(:imported_pages, + date: ~D[2020-01-01], + visitors: 10, + pageviews: 10, + page: "/blog", + scroll_depth: 100, + pageleave_visitors: 10 + ), + build(:imported_pages, + date: ~D[2020-01-01], + visitors: 100, + pageviews: 150, + page: "/blog", + scroll_depth: nil + ) + ]) + + conn = + get( + conn, + "/api/stats/#{site.domain}/pages?period=7d&date=2020-01-02&detailed=true&with_imported=true" + ) + + assert json_response(conn, 200)["results"] == [ + %{ + "name" => "/blog", + "visitors" => 110, + "pageviews" => 160, + "bounce_rate" => 0, + "time_on_page" => 0.125, + "scroll_depth" => 10 + } + ] + end + test "can filter using the | (OR) filter", %{conn: conn, site: site} do populate_stats(site, [ @@ -1263,6 +1426,7 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do "time_on_page" => 800.0, "visitors" => 3, "pageviews" => 3, + "scroll_depth" => nil, "name" => "/" }, %{ @@ -1270,6 +1434,7 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do "time_on_page" => 60, "visitors" => 2, "pageviews" => 2, + "scroll_depth" => nil, "name" => "/some-other-page" } ] @@ -1341,7 +1506,8 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do "name" => "/", "pageviews" => 4, "time_on_page" => 90.0, - "visitors" => 4 + "visitors" => 4, + "scroll_depth" => nil } ] end @@ -1390,14 +1556,16 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do "name" => "/", "pageviews" => 4, "time_on_page" => 90.0, - "visitors" => 4 + "visitors" => 4, + "scroll_depth" => nil }, %{ "bounce_rate" => 100, "name" => "/a", "pageviews" => 1, "time_on_page" => 10.0, - "visitors" => 1 + "visitors" => 1, + "scroll_depth" => nil } ] end @@ -1446,14 +1614,16 @@ defmodule PlausibleWeb.Api.StatsController.PagesTest do "name" => "/aaa", "pageviews" => 4, "time_on_page" => 90.0, - "visitors" => 4 + "visitors" => 4, + "scroll_depth" => nil }, %{ "bounce_rate" => 100, "name" => "/a", "pageviews" => 1, "time_on_page" => 10.0, - "visitors" => 1 + "visitors" => 1, + "scroll_depth" => nil } ] end diff --git a/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs b/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs index 0bc99cdc3f26..107be258178c 100644 --- a/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs @@ -603,7 +603,7 @@ defmodule PlausibleWeb.Api.StatsController.TopStatsTest do ] end - test ":is filter on page returns only visitors, visits and pageviews", %{ + test ":is filter on page returns only visitors, visits, pageviews and scroll_depth", %{ conn: conn, site: site } do @@ -638,7 +638,8 @@ defmodule PlausibleWeb.Api.StatsController.TopStatsTest do assert res["top_stats"] == [ %{"name" => "Unique visitors", "value" => 2, "graph_metric" => "visitors"}, %{"name" => "Total visits", "value" => 4, "graph_metric" => "visits"}, - %{"name" => "Total pageviews", "value" => 36, "graph_metric" => "pageviews"} + %{"name" => "Total pageviews", "value" => 36, "graph_metric" => "pageviews"}, + %{"name" => "Scroll depth", "value" => nil, "graph_metric" => "scroll_depth"} ] end end @@ -989,6 +990,41 @@ defmodule PlausibleWeb.Api.StatsController.TopStatsTest do ] end + test "returns scroll_depth with a page filter with imported data", %{conn: conn, site: site} do + site_import = insert(:site_import, site: site) + + populate_stats(site, site_import.id, [ + build(:pageview, user_id: 123, timestamp: ~N[2021-01-01 00:00:00]), + build(:pageleave, user_id: 123, timestamp: ~N[2021-01-01 00:00:10], scroll_depth: 40), + build(:pageview, user_id: 123, timestamp: ~N[2021-01-01 00:00:10]), + build(:pageleave, user_id: 123, timestamp: ~N[2021-01-01 00:00:20], scroll_depth: 60), + build(:pageview, user_id: 456, timestamp: ~N[2021-01-01 00:00:00]), + build(:pageleave, user_id: 456, timestamp: ~N[2021-01-01 00:00:10], scroll_depth: 80), + build(:imported_pages, + page: "/", + date: ~D[2021-01-01], + visitors: 8, + scroll_depth: 410, + pageleave_visitors: 8 + ), + build(:imported_pages, page: "/", date: ~D[2021-01-02], visitors: 100, scroll_depth: nil) + ]) + + filters = Jason.encode!([[:is, "event:page", ["/"]]]) + + conn = + get( + conn, + "/api/stats/#{site.domain}/top-stats?period=7d&date=2021-01-07&filters=#{filters}&with_imported=true" + ) + + res = json_response(conn, 200) + + assert %{"name" => "Scroll depth", "value" => 55, "graph_metric" => "scroll_depth"} in res[ + "top_stats" + ] + end + test "contains filter", %{conn: conn, site: site} do populate_stats(site, [ build(:pageview, pathname: "/some-blog-post"), diff --git a/test/plausible_web/controllers/stats_controller_test.exs b/test/plausible_web/controllers/stats_controller_test.exs index 586cb5b6b658..743927ffb75b 100644 --- a/test/plausible_web/controllers/stats_controller_test.exs +++ b/test/plausible_web/controllers/stats_controller_test.exs @@ -547,8 +547,15 @@ defmodule PlausibleWeb.StatsControllerTest do {~c"pages.csv", data} -> assert parse_csv(data) == [ - ["name", "visitors", "pageviews", "bounce_rate", "time_on_page"], - ["/test", "1", "1", "0.0", "10.0"], + [ + "name", + "visitors", + "pageviews", + "bounce_rate", + "time_on_page", + "scroll_depth" + ], + ["/test", "1", "1", "0.0", "10.0", ""], [""] ]