Skip to content

Commit

Permalink
Scroll depth: update site.scroll_depth_visible_at in ingestion (#5035)
Browse files Browse the repository at this point in the history
* WIP: Update scroll_depth_visible_at in ingestion

* Simplify code and test genserver directly

* No more check_scroll_depth_visible!

* Update a test

* Update a test

* GenServer -> ets

* Additional where

* Fix a test
  • Loading branch information
macobo authored Feb 3, 2025
1 parent de62f80 commit 84aea97
Show file tree
Hide file tree
Showing 11 changed files with 109 additions and 83 deletions.
1 change: 1 addition & 0 deletions lib/plausible/application.ex
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ defmodule Plausible.Application do
setup_geolocation()
Location.load_all()
Plausible.Ingestion.Source.init()
Plausible.Ingestion.ScrollDepthVisibleAt.init()
Plausible.Geo.await_loader()

Supervisor.start_link(List.flatten(children), opts)
Expand Down
4 changes: 1 addition & 3 deletions lib/plausible/exports.ex
Original file line number Diff line number Diff line change
Expand Up @@ -418,8 +418,6 @@ defmodule Plausible.Exports do
site = Plausible.Repo.get(Plausible.Site, site_id)
current_user = current_user_id && Plausible.Repo.get(Plausible.Auth.User, current_user_id)

include_scroll_depth? = Plausible.Stats.ScrollDepth.check_feature_visible!(site, current_user)

base_q =
from(e in sampled("events_v2"),
where: ^export_filter(site_id, date_range),
Expand All @@ -428,7 +426,7 @@ defmodule Plausible.Exports do
order_by: selected_as(:date)
)

if include_scroll_depth? do
if Plausible.Stats.ScrollDepth.feature_visible?(site, current_user) do
max_scroll_depth_per_visitor_q =
from(e in "events_v2",
where: ^export_filter(site_id, date_range),
Expand Down
12 changes: 11 additions & 1 deletion lib/plausible/ingestion/event.ex
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ defmodule Plausible.Ingestion.Event do
put_user_id: &put_user_id/2,
validate_clickhouse_event: &validate_clickhouse_event/2,
register_session: &register_session/2,
write_to_buffer: &write_to_buffer/2
write_to_buffer: &write_to_buffer/2,
register_scroll_depth_visible: &register_scroll_depth_visible/2
]
end

Expand Down Expand Up @@ -405,6 +406,15 @@ defmodule Plausible.Ingestion.Event do
event
end

defp register_scroll_depth_visible(%__MODULE__{clickhouse_event: ev} = event, _context) do
if is_nil(event.site.scroll_depth_visible_at) and ev.name in ["pageleave", "engagement"] and
not is_nil(ev.scroll_depth) and ev.scroll_depth >= 0 and ev.scroll_depth <= 100 do
Plausible.Ingestion.ScrollDepthVisibleAt.mark_scroll_depth_visible(event.site.id)
end

event
end

@click_id_params ["gclid", "gbraid", "wbraid", "msclkid", "fbclid", "twclid"]

defp get_click_id_param(nil), do: nil
Expand Down
44 changes: 44 additions & 0 deletions lib/plausible/ingestion/scroll_depth_visible_at.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
defmodule Plausible.Ingestion.ScrollDepthVisibleAt do
@moduledoc """
Module that updates the `scroll_depth_visible_at` column for a site when needed.
This is called in a hot loop in ingestion, so it:
1. Only updates the database once per site async (if SiteCache doesn't know about visibility yet)
2. Does not retry the update if it fails, to be retried on server restart
"""

require Logger
alias Plausible.Repo

import Ecto.Query

def init() do
:ets.new(__MODULE__, [
:named_table,
:set,
:public,
{:read_concurrency, true},
{:write_concurrency, true}
])
end

def mark_scroll_depth_visible(site_id) do
if :ets.insert_new(__MODULE__, {site_id}) do
Task.start(fn -> attempt_update_repo(site_id) end)
end
end

defp attempt_update_repo(site_id) do
Repo.update_all(
from(s in Plausible.Site, where: s.id == ^site_id and is_nil(s.scroll_depth_visible_at)),
set: [
scroll_depth_visible_at: DateTime.utc_now()
]
)
rescue
error ->
Logger.error(
"Error updating scroll_depth_visible_at for site #{site_id}: #{inspect(error)}. This will not be retried until server restart."
)
end
end
1 change: 1 addition & 0 deletions lib/plausible/site/cache.ex
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ defmodule Plausible.Site.Cache do
domain_changed_from
ingest_rate_limit_scale_seconds
ingest_rate_limit_threshold
scroll_depth_visible_at
)a

@impl true
Expand Down
49 changes: 0 additions & 49 deletions lib/plausible/stats/scroll_depth.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@ defmodule Plausible.Stats.ScrollDepth do
Module to check whether the scroll depth metric is available and visible for a site.
"""

import Ecto.Query
require Logger

alias Plausible.ClickhouseRepo

def feature_available?(site, user) do
FunWithFlags.enabled?(:scroll_depth, for: user) ||
FunWithFlags.enabled?(:scroll_depth, for: site)
Expand All @@ -16,48 +11,4 @@ defmodule Plausible.Stats.ScrollDepth do
def feature_visible?(site, user) do
feature_available?(site, user) && not is_nil(site.scroll_depth_visible_at)
end

@doc """
Checks whether the scroll depth feature is visible for a site and updates the site record if it is.
Note this function queries ClickHouse and may take a while to complete.
"""
def check_feature_visible!(site, user) do
cond do
not feature_available?(site, user) ->
false

not is_nil(site.scroll_depth_visible_at) ->
true

is_nil(site.scroll_depth_visible_at) ->
visible? = has_scroll_data_last_30d?(site)

if visible? do
Plausible.Sites.set_scroll_depth_visible_at(site)
end

visible?
end
end

defp has_scroll_data_last_30d?(site) do
try do
ClickhouseRepo.exists?(
from(e in "events_v2",
where:
e.site_id == ^site.id and
e.name == "pageleave" and
e.timestamp >= fragment("toStartOfDay(now() - toIntervalDay(30))") and
e.scroll_depth > 0 and e.scroll_depth <= 100
)
)
rescue
# Avoid propagating error to the user, bringing down the site.
error ->
Logger.error("Error checking scroll data for site #{site.id}: #{inspect(error)}")

false
end
end
end
12 changes: 3 additions & 9 deletions lib/plausible_web/controllers/stats_controller.ex
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ defmodule PlausibleWeb.StatsController do
use Plausible.Repo

alias Plausible.Sites
alias Plausible.Stats.{Filters, Query}
alias Plausible.Stats.{Filters, Query, ScrollDepth}
alias PlausibleWeb.Api

plug(PlausibleWeb.Plugs.AuthorizeSiteAccess when action in [:stats, :csv_export])
Expand All @@ -59,9 +59,6 @@ defmodule PlausibleWeb.StatsController do
dogfood_page_path = if demo, do: "/#{site.domain}", else: "/:dashboard"
skip_to_dashboard? = conn.params["skip_to_dashboard"] == "true"

scroll_depth_visible? =
Plausible.Stats.ScrollDepth.check_feature_visible!(site, current_user)

cond do
(stats_start_date && can_see_stats?) || (can_see_stats? && skip_to_dashboard?) ->
conn
Expand All @@ -72,7 +69,7 @@ defmodule PlausibleWeb.StatsController do
revenue_goals: list_revenue_goals(site),
funnels: list_funnels(site),
has_props: Plausible.Props.configured?(site),
scroll_depth_visible: scroll_depth_visible?,
scroll_depth_visible: ScrollDepth.feature_visible?(site, current_user),
stats_start_date: stats_start_date,
native_stats_start_date: NaiveDateTime.to_date(site.native_stats_start_at),
title: title(conn, site),
Expand Down Expand Up @@ -350,9 +347,6 @@ defmodule PlausibleWeb.StatsController do
shared_link = Plausible.Repo.preload(shared_link, site: :owner)
stats_start_date = Plausible.Sites.stats_start_date(shared_link.site)

scroll_depth_visible? =
Plausible.Stats.ScrollDepth.check_feature_visible!(shared_link.site, current_user)

conn
|> put_resp_header("x-robots-tag", "noindex, nofollow")
|> delete_resp_header("x-frame-options")
Expand All @@ -362,7 +356,7 @@ defmodule PlausibleWeb.StatsController do
revenue_goals: list_revenue_goals(shared_link.site),
funnels: list_funnels(shared_link.site),
has_props: Plausible.Props.configured?(shared_link.site),
scroll_depth_visible: scroll_depth_visible?,
scroll_depth_visible: ScrollDepth.feature_visible?(shared_link.site, current_user),
stats_start_date: stats_start_date,
native_stats_start_date: NaiveDateTime.to_date(shared_link.site.native_stats_start_at),
title: title(conn, shared_link.site),
Expand Down
4 changes: 1 addition & 3 deletions test/plausible/imported/csv_importer_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -1065,6 +1065,7 @@ defmodule Plausible.Imported.CSVImporterTest do
|> Enum.map(fn event -> Map.put(event, :hostname, "csv.test") end)

populate_stats(exported_site, stats)
Plausible.Sites.set_scroll_depth_visible_at(exported_site)

initial_context = %{
user: user,
Expand All @@ -1081,9 +1082,6 @@ defmodule Plausible.Imported.CSVImporterTest do
|> upload_csvs()
|> run_import()

assert %NaiveDateTime{} =
Plausible.Repo.reload!(exported_site).scroll_depth_visible_at

assert %SiteImport{
start_date: start_date,
end_date: end_date,
Expand Down
15 changes: 15 additions & 0 deletions test/plausible/ingestion/scroll_depth_visible_at_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
defmodule Plausible.Ingestion.ScrollDepthVisibleAtTest do
use Plausible.DataCase
use Plausible.Teams.Test

test "mark_scroll_depth_visible" do
site = new_site()
Plausible.Ingestion.ScrollDepthVisibleAt.mark_scroll_depth_visible(site.id)

assert Plausible.TestUtils.eventually(fn ->
site = Plausible.Repo.reload!(site)
success = not is_nil(site.scroll_depth_visible_at)
{success, success}
end)
end
end
30 changes: 30 additions & 0 deletions test/plausible_web/controllers/api/external_controller_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do
use Plausible.ClickhouseRepo
use Plausible.Teams.Test

alias Plausible.Repo

@user_agent "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"
@user_agent_mobile "Mozilla/5.0 (Linux; Android 6.0; U007 Pro Build/MRA58K; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/44.0.2403.119 Mobile Safari/537.36"
@user_agent_tablet "Mozilla/5.0 (Linux; U; Android 4.2.2; it-it; Surfing TAB B 9.7 3G Build/JDQ39) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
Expand Down Expand Up @@ -1291,13 +1293,15 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do
pageleave = get_events(site) |> Enum.find(&(&1.name == "pageleave"))

assert pageleave.scroll_depth == 255
assert not scroll_depth_visible_at_set?(site)
end

test "sd field is ignored if name is not pageleave", %{conn: conn, site: site} do
post(conn, "/api/event", %{n: "pageview", u: "https://test.com", d: site.domain, sd: 10})
post(conn, "/api/event", %{n: "custom_e", u: "https://test.com", d: site.domain, sd: 10})

assert [%{scroll_depth: 0}, %{scroll_depth: 0}] = get_events(site)
assert not scroll_depth_visible_at_set?(site)
end

test "ingests valid scroll_depth for a pageleave", %{conn: conn, site: site} do
Expand All @@ -1307,6 +1311,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do
pageleave = get_events(site) |> Enum.find(&(&1.name == "pageleave"))

assert pageleave.scroll_depth == 25
assert scroll_depth_visible_at_set?(site)
end

test "ingests scroll_depth as 100 when sd > 100", %{conn: conn, site: site} do
Expand All @@ -1316,6 +1321,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do
pageleave = get_events(site) |> Enum.find(&(&1.name == "pageleave"))

assert pageleave.scroll_depth == 100
assert scroll_depth_visible_at_set?(site)
end

test "ingests scroll_depth as 255 when sd is a string", %{conn: conn, site: site} do
Expand All @@ -1325,6 +1331,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do
pageleave = get_events(site) |> Enum.find(&(&1.name == "pageleave"))

assert pageleave.scroll_depth == 255
assert not scroll_depth_visible_at_set?(site)
end

test "ingests scroll_depth as 255 when sd is a negative integer", %{conn: conn, site: site} do
Expand All @@ -1334,6 +1341,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do
pageleave = get_events(site) |> Enum.find(&(&1.name == "pageleave"))

assert pageleave.scroll_depth == 255
assert not scroll_depth_visible_at_set?(site)
end

test "ingests valid scroll_depth for a engagement event", %{conn: conn, site: site} do
Expand All @@ -1343,6 +1351,23 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do
event = get_events(site) |> Enum.find(&(&1.name == "engagement"))

assert event.scroll_depth == 25
assert scroll_depth_visible_at_set?(site)
end

test "does not update scroll_depth_visible_at twice", %{conn: conn, site: site} do
post(conn, "/api/event", %{n: "pageview", u: "https://test.com", d: site.domain})
post(conn, "/api/event", %{n: "pageleave", u: "https://test.com", d: site.domain, sd: 25})
Plausible.Event.WriteBuffer.flush()

site1 = Repo.reload!(site)
assert not is_nil(site1.scroll_depth_visible_at)

post(conn, "/api/event", %{n: "pageleave", u: "https://test.com", d: site.domain, sd: 50})
Plausible.Event.WriteBuffer.flush()

site2 = Repo.reload!(site)
assert not is_nil(site2.scroll_depth_visible_at)
assert site1.scroll_depth_visible_at == site2.scroll_depth_visible_at
end
end

Expand Down Expand Up @@ -3584,4 +3609,9 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do
)
)
end

defp scroll_depth_visible_at_set?(site) do
site = Repo.reload!(site)
not is_nil(site.scroll_depth_visible_at)
end
end
20 changes: 2 additions & 18 deletions test/plausible_web/controllers/stats_controller_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -85,23 +85,15 @@ defmodule PlausibleWeb.StatsControllerTest do

populate_stats(site, [build(:pageview)])

# No pageleaves yet - `scroll_depth_visible_at` will remain `nil`
html =
conn
|> get("/#{site.domain}")
|> html_response(200)

assert text_of_attr(html, @react_container, "data-scroll-depth-visible") == "false"

site = Repo.reload!(site)
assert is_nil(site.scroll_depth_visible_at)
Plausible.Sites.set_scroll_depth_visible_at(site)

populate_stats(site, [
build(:pageview, user_id: 123),
build(:pageleave, user_id: 123, scroll_depth: 20)
])

# Pageleaves exist now - `scroll_depth_visible_at` gets set to `utc_now`
html =
conn
|> get("/#{site.domain}")
Expand Down Expand Up @@ -1098,23 +1090,15 @@ defmodule PlausibleWeb.StatsControllerTest do
site = insert(:site)
link = insert(:shared_link, site: site)

# No pageleaves yet - `scroll_depth_visible_at` will remain `nil`
html =
conn
|> get("/share/#{site.domain}/?auth=#{link.slug}")
|> html_response(200)

assert text_of_attr(html, @react_container, "data-scroll-depth-visible") == "false"

site = Repo.reload!(site)
assert is_nil(site.scroll_depth_visible_at)

populate_stats(site, [
build(:pageview, user_id: 123),
build(:pageleave, user_id: 123, scroll_depth: 20)
])
Plausible.Sites.set_scroll_depth_visible_at(site)

# Pageleaves exist now - `scroll_depth_visible_at` gets set to `utc_now`
html =
conn
|> get("/share/#{site.domain}/?auth=#{link.slug}")
Expand Down

0 comments on commit 84aea97

Please sign in to comment.