From 92b1bc5f2a50c8b737fdebc3bf34bd399865eb92 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Tue, 15 Oct 2024 06:27:01 +0200 Subject: [PATCH 1/3] Set root domain `(none)` for ingests without hostname --- lib/plausible/ingestion/event.ex | 1 + test/plausible/ingestion/event_test.exs | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/lib/plausible/ingestion/event.ex b/lib/plausible/ingestion/event.ex index 94e13d04c299..b7685108d958 100644 --- a/lib/plausible/ingestion/event.ex +++ b/lib/plausible/ingestion/event.ex @@ -541,6 +541,7 @@ defmodule Plausible.Ingestion.Event do end end + defp get_root_domain("(none)"), do: "(none)" defp get_root_domain(nil), do: "(none)" defp get_root_domain(hostname) do diff --git a/test/plausible/ingestion/event_test.exs b/test/plausible/ingestion/event_test.exs index bc2a63696a57..88fd25aab4d6 100644 --- a/test/plausible/ingestion/event_test.exs +++ b/test/plausible/ingestion/event_test.exs @@ -397,4 +397,20 @@ defmodule Plausible.Ingestion.EventTest do assert {:ok, %{buffered: [event]}} = Event.build_and_buffer(request) assert event.clickhouse_event.hostname == "foo.netlify.app" end + + test "hostname is (none) when no hostname can be derived from the url" do + site = insert(:site, domain: "foo.example.com") + + payload = %{ + domain: site.domain, + name: "pageview", + url: "/no/hostname" + } + + conn = build_conn(:post, "/api/events", payload) + assert {:ok, request} = Request.build(conn) + + assert {:ok, %{buffered: [event]}} = Event.build_and_buffer(request) + assert event.clickhouse_event.hostname == "(none)" + end end From 24185ef695d8cafb57d013aa5fecbe7f58699a02 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Tue, 15 Oct 2024 06:46:18 +0200 Subject: [PATCH 2/3] No `nil` hostname should be allowed to enter ingestion --- lib/plausible/ingestion/event.ex | 1 - lib/plausible/ingestion/request.ex | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/plausible/ingestion/event.ex b/lib/plausible/ingestion/event.ex index b7685108d958..f177ee940816 100644 --- a/lib/plausible/ingestion/event.ex +++ b/lib/plausible/ingestion/event.ex @@ -542,7 +542,6 @@ defmodule Plausible.Ingestion.Event do end defp get_root_domain("(none)"), do: "(none)" - defp get_root_domain(nil), do: "(none)" defp get_root_domain(hostname) do case :inet.parse_ipv4_address(String.to_charlist(hostname)) do diff --git a/lib/plausible/ingestion/request.ex b/lib/plausible/ingestion/request.ex index fa2218611b84..b7c4296fa4dd 100644 --- a/lib/plausible/ingestion/request.ex +++ b/lib/plausible/ingestion/request.ex @@ -295,6 +295,8 @@ defmodule Plausible.Ingestion.Request do sanitize_hostname(hostname) end + def sanitize_hostname("(none)"), do: "(none)" + def sanitize_hostname(hostname) when is_binary(hostname) do hostname |> String.trim() From ed17b1e9a559d2fc02232a673fbd1148cb7f7103 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Tue, 15 Oct 2024 08:59:44 +0200 Subject: [PATCH 3/3] Invoke `sanitize_hostname` only when applicable --- lib/plausible/ingestion/request.ex | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/plausible/ingestion/request.ex b/lib/plausible/ingestion/request.ex index b7c4296fa4dd..407b4c032590 100644 --- a/lib/plausible/ingestion/request.ex +++ b/lib/plausible/ingestion/request.ex @@ -194,11 +194,11 @@ defmodule Plausible.Ingestion.Request do defp put_hostname(changeset) do host = case Changeset.get_field(changeset, :uri) do - %{host: host} when is_binary(host) and host != "" -> host + %{host: host} when is_binary(host) and host != "" -> sanitize_hostname(host) _ -> "(none)" end - Changeset.put_change(changeset, :hostname, sanitize_hostname(host)) + Changeset.put_change(changeset, :hostname, host) end @max_props 30 @@ -295,8 +295,6 @@ defmodule Plausible.Ingestion.Request do sanitize_hostname(hostname) end - def sanitize_hostname("(none)"), do: "(none)" - def sanitize_hostname(hostname) when is_binary(hostname) do hostname |> String.trim()