From d18f83d5e2d2ce853c0b377d570436f83e040e7a Mon Sep 17 00:00:00 2001 From: Alexandre Negrel Date: Wed, 28 Feb 2024 11:39:24 +0100 Subject: [PATCH] fix: properly clean pageview path --- pkg/event/pageview.go | 17 ++++++++-------- tests/bun/events/events_pageviews.test.ts | 24 +++++++++++++++++++++++ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/pkg/event/pageview.go b/pkg/event/pageview.go index 6067ca2..fd9a414 100644 --- a/pkg/event/pageview.go +++ b/pkg/event/pageview.go @@ -2,6 +2,7 @@ package event import ( "net/url" + "path" "time" "github.com/prismelabs/analytics/pkg/services/ipgeolocator" @@ -22,10 +23,12 @@ type PageView struct { } // NewPageView creates a new PageView event. -func NewPageView(pvUrl *url.URL, +func NewPageView( + pvUrl *url.URL, cli uaparser.Client, pageReferrer string, - countryCode ipgeolocator.CountryCode) (PageView, error) { + countryCode ipgeolocator.CountryCode, +) (PageView, error) { domain, err := ParseDomainName(pvUrl.Hostname()) if err != nil { return PageView{}, err @@ -36,17 +39,15 @@ func NewPageView(pvUrl *url.URL, return PageView{}, err } - path := pvUrl.Path - if path == "" { - path = "/" - } else if path[len(path)-1] == '/' && len(path) > 1 { - path = path[:len(path)-1] + pageviewPath := pvUrl.Path + if pageviewPath == "" { + pageviewPath = "/" } return PageView{ Timestamp: time.Now().UTC(), DomainName: domain, - PathName: path, + PathName: path.Clean(pageviewPath), Client: cli, ReferrerDomain: referrerDomain, CountryCode: countryCode, diff --git a/tests/bun/events/events_pageviews.test.ts b/tests/bun/events/events_pageviews.test.ts index ad1179b..ef3d1d1 100644 --- a/tests/bun/events/events_pageviews.test.ts +++ b/tests/bun/events/events_pageviews.test.ts @@ -205,6 +205,30 @@ test('valid pageview with US IP address', async () => { }) }) +test('valid pageview with dirty path', async () => { + const response = await fetch(PRISME_PAGEVIEWS_URL, { + method: 'POST', + headers: { + 'X-Forwarded-For': '8.8.8.8', // Google public DNS + Referer: 'http://foo.mywebsite.localhost///another/../another/foo?bar=baz#qux' + } + }) + expect(response.status).toBe(200) + + const data = await getLatestPageview() + + expect(data).toMatchObject({ + timestamp: expect.stringMatching(TIMESTAMP_REGEX), + domain: 'foo.mywebsite.localhost', + path: '/another/foo', + operating_system: 'Other', + browser_family: 'Other', + device: 'Other', + referrer_domain: 'direct', + country_code: 'US' + }) +}) + async function getLatestPageview (): Promise { // Wait for clickhouse to ingest batch. Bun.sleepSync(1000)