diff --git a/lib/plausible/application.ex b/lib/plausible/application.ex index 1e0e6a23c551..98e2cfec58c9 100644 --- a/lib/plausible/application.ex +++ b/lib/plausible/application.ex @@ -120,6 +120,7 @@ defmodule Plausible.Application do setup_geolocation() Location.load_all() Plausible.Ingestion.Source.init() + Plausible.Ingestion.ScrollDepthVisibleAt.init() Plausible.Geo.await_loader() Supervisor.start_link(List.flatten(children), opts) diff --git a/lib/plausible/exports.ex b/lib/plausible/exports.ex index f082e5f5f2da..70f15b2b8f1e 100644 --- a/lib/plausible/exports.ex +++ b/lib/plausible/exports.ex @@ -418,8 +418,6 @@ defmodule Plausible.Exports do site = Plausible.Repo.get(Plausible.Site, site_id) current_user = current_user_id && Plausible.Repo.get(Plausible.Auth.User, current_user_id) - include_scroll_depth? = Plausible.Stats.ScrollDepth.check_feature_visible!(site, current_user) - base_q = from(e in sampled("events_v2"), where: ^export_filter(site_id, date_range), @@ -428,7 +426,7 @@ defmodule Plausible.Exports do order_by: selected_as(:date) ) - if include_scroll_depth? do + if Plausible.Stats.ScrollDepth.feature_visible?(site, current_user) do max_scroll_depth_per_visitor_q = from(e in "events_v2", where: ^export_filter(site_id, date_range), diff --git a/lib/plausible/ingestion/event.ex b/lib/plausible/ingestion/event.ex index e113ae8047ca..6b28d130c8ac 100644 --- a/lib/plausible/ingestion/event.ex +++ b/lib/plausible/ingestion/event.ex @@ -130,7 +130,8 @@ defmodule Plausible.Ingestion.Event do put_user_id: &put_user_id/2, validate_clickhouse_event: &validate_clickhouse_event/2, register_session: ®ister_session/2, - write_to_buffer: &write_to_buffer/2 + write_to_buffer: &write_to_buffer/2, + register_scroll_depth_visible: ®ister_scroll_depth_visible/2 ] end @@ -405,6 +406,15 @@ defmodule Plausible.Ingestion.Event do event end + defp register_scroll_depth_visible(%__MODULE__{clickhouse_event: ev} = event, _context) do + if is_nil(event.site.scroll_depth_visible_at) and ev.name in ["pageleave", "engagement"] and + not is_nil(ev.scroll_depth) and ev.scroll_depth >= 0 and ev.scroll_depth <= 100 do + Plausible.Ingestion.ScrollDepthVisibleAt.mark_scroll_depth_visible(event.site.id) + end + + event + end + @click_id_params ["gclid", "gbraid", "wbraid", "msclkid", "fbclid", "twclid"] defp get_click_id_param(nil), do: nil diff --git a/lib/plausible/ingestion/scroll_depth_visible_at.ex b/lib/plausible/ingestion/scroll_depth_visible_at.ex new file mode 100644 index 000000000000..215c9e880cd9 --- /dev/null +++ b/lib/plausible/ingestion/scroll_depth_visible_at.ex @@ -0,0 +1,44 @@ +defmodule Plausible.Ingestion.ScrollDepthVisibleAt do + @moduledoc """ + Module that updates the `scroll_depth_visible_at` column for a site when needed. + + This is called in a hot loop in ingestion, so it: + 1. Only updates the database once per site async (if SiteCache doesn't know about visibility yet) + 2. Does not retry the update if it fails, to be retried on server restart + """ + + require Logger + alias Plausible.Repo + + import Ecto.Query + + def init() do + :ets.new(__MODULE__, [ + :named_table, + :set, + :public, + {:read_concurrency, true}, + {:write_concurrency, true} + ]) + end + + def mark_scroll_depth_visible(site_id) do + if :ets.insert_new(__MODULE__, {site_id}) do + Task.start(fn -> attempt_update_repo(site_id) end) + end + end + + defp attempt_update_repo(site_id) do + Repo.update_all( + from(s in Plausible.Site, where: s.id == ^site_id and is_nil(s.scroll_depth_visible_at)), + set: [ + scroll_depth_visible_at: DateTime.utc_now() + ] + ) + rescue + error -> + Logger.error( + "Error updating scroll_depth_visible_at for site #{site_id}: #{inspect(error)}. This will not be retried until server restart." + ) + end +end diff --git a/lib/plausible/site/cache.ex b/lib/plausible/site/cache.ex index 1514bc006ae3..8cfba5196709 100644 --- a/lib/plausible/site/cache.ex +++ b/lib/plausible/site/cache.ex @@ -31,6 +31,7 @@ defmodule Plausible.Site.Cache do domain_changed_from ingest_rate_limit_scale_seconds ingest_rate_limit_threshold + scroll_depth_visible_at )a @impl true diff --git a/lib/plausible/stats/scroll_depth.ex b/lib/plausible/stats/scroll_depth.ex index 246c8a852723..4ebd80fe7c8b 100644 --- a/lib/plausible/stats/scroll_depth.ex +++ b/lib/plausible/stats/scroll_depth.ex @@ -3,11 +3,6 @@ defmodule Plausible.Stats.ScrollDepth do Module to check whether the scroll depth metric is available and visible for a site. """ - import Ecto.Query - require Logger - - alias Plausible.ClickhouseRepo - def feature_available?(site, user) do FunWithFlags.enabled?(:scroll_depth, for: user) || FunWithFlags.enabled?(:scroll_depth, for: site) @@ -16,48 +11,4 @@ defmodule Plausible.Stats.ScrollDepth do def feature_visible?(site, user) do feature_available?(site, user) && not is_nil(site.scroll_depth_visible_at) end - - @doc """ - Checks whether the scroll depth feature is visible for a site and updates the site record if it is. - - Note this function queries ClickHouse and may take a while to complete. - """ - def check_feature_visible!(site, user) do - cond do - not feature_available?(site, user) -> - false - - not is_nil(site.scroll_depth_visible_at) -> - true - - is_nil(site.scroll_depth_visible_at) -> - visible? = has_scroll_data_last_30d?(site) - - if visible? do - Plausible.Sites.set_scroll_depth_visible_at(site) - end - - visible? - end - end - - defp has_scroll_data_last_30d?(site) do - try do - ClickhouseRepo.exists?( - from(e in "events_v2", - where: - e.site_id == ^site.id and - e.name == "pageleave" and - e.timestamp >= fragment("toStartOfDay(now() - toIntervalDay(30))") and - e.scroll_depth > 0 and e.scroll_depth <= 100 - ) - ) - rescue - # Avoid propagating error to the user, bringing down the site. - error -> - Logger.error("Error checking scroll data for site #{site.id}: #{inspect(error)}") - - false - end - end end diff --git a/lib/plausible_web/controllers/stats_controller.ex b/lib/plausible_web/controllers/stats_controller.ex index 8d5dc4b8cbb3..4df58abbf2c8 100644 --- a/lib/plausible_web/controllers/stats_controller.ex +++ b/lib/plausible_web/controllers/stats_controller.ex @@ -45,7 +45,7 @@ defmodule PlausibleWeb.StatsController do use Plausible.Repo alias Plausible.Sites - alias Plausible.Stats.{Filters, Query} + alias Plausible.Stats.{Filters, Query, ScrollDepth} alias PlausibleWeb.Api plug(PlausibleWeb.Plugs.AuthorizeSiteAccess when action in [:stats, :csv_export]) @@ -59,9 +59,6 @@ defmodule PlausibleWeb.StatsController do dogfood_page_path = if demo, do: "/#{site.domain}", else: "/:dashboard" skip_to_dashboard? = conn.params["skip_to_dashboard"] == "true" - scroll_depth_visible? = - Plausible.Stats.ScrollDepth.check_feature_visible!(site, current_user) - cond do (stats_start_date && can_see_stats?) || (can_see_stats? && skip_to_dashboard?) -> conn @@ -72,7 +69,7 @@ defmodule PlausibleWeb.StatsController do revenue_goals: list_revenue_goals(site), funnels: list_funnels(site), has_props: Plausible.Props.configured?(site), - scroll_depth_visible: scroll_depth_visible?, + scroll_depth_visible: ScrollDepth.feature_visible?(site, current_user), stats_start_date: stats_start_date, native_stats_start_date: NaiveDateTime.to_date(site.native_stats_start_at), title: title(conn, site), @@ -350,9 +347,6 @@ defmodule PlausibleWeb.StatsController do shared_link = Plausible.Repo.preload(shared_link, site: :owner) stats_start_date = Plausible.Sites.stats_start_date(shared_link.site) - scroll_depth_visible? = - Plausible.Stats.ScrollDepth.check_feature_visible!(shared_link.site, current_user) - conn |> put_resp_header("x-robots-tag", "noindex, nofollow") |> delete_resp_header("x-frame-options") @@ -362,7 +356,7 @@ defmodule PlausibleWeb.StatsController do revenue_goals: list_revenue_goals(shared_link.site), funnels: list_funnels(shared_link.site), has_props: Plausible.Props.configured?(shared_link.site), - scroll_depth_visible: scroll_depth_visible?, + scroll_depth_visible: ScrollDepth.feature_visible?(shared_link.site, current_user), stats_start_date: stats_start_date, native_stats_start_date: NaiveDateTime.to_date(shared_link.site.native_stats_start_at), title: title(conn, shared_link.site), diff --git a/test/plausible/imported/csv_importer_test.exs b/test/plausible/imported/csv_importer_test.exs index a205355be5b9..41745a8f59bd 100644 --- a/test/plausible/imported/csv_importer_test.exs +++ b/test/plausible/imported/csv_importer_test.exs @@ -1065,6 +1065,7 @@ defmodule Plausible.Imported.CSVImporterTest do |> Enum.map(fn event -> Map.put(event, :hostname, "csv.test") end) populate_stats(exported_site, stats) + Plausible.Sites.set_scroll_depth_visible_at(exported_site) initial_context = %{ user: user, @@ -1081,9 +1082,6 @@ defmodule Plausible.Imported.CSVImporterTest do |> upload_csvs() |> run_import() - assert %NaiveDateTime{} = - Plausible.Repo.reload!(exported_site).scroll_depth_visible_at - assert %SiteImport{ start_date: start_date, end_date: end_date, diff --git a/test/plausible/ingestion/scroll_depth_visible_at_test.exs b/test/plausible/ingestion/scroll_depth_visible_at_test.exs new file mode 100644 index 000000000000..8eb23c7ef616 --- /dev/null +++ b/test/plausible/ingestion/scroll_depth_visible_at_test.exs @@ -0,0 +1,15 @@ +defmodule Plausible.Ingestion.ScrollDepthVisibleAtTest do + use Plausible.DataCase + use Plausible.Teams.Test + + test "mark_scroll_depth_visible" do + site = new_site() + Plausible.Ingestion.ScrollDepthVisibleAt.mark_scroll_depth_visible(site.id) + + assert Plausible.TestUtils.eventually(fn -> + site = Plausible.Repo.reload!(site) + success = not is_nil(site.scroll_depth_visible_at) + {success, success} + end) + end +end diff --git a/test/plausible_web/controllers/api/external_controller_test.exs b/test/plausible_web/controllers/api/external_controller_test.exs index c4d0e52b2315..75ecca940dad 100644 --- a/test/plausible_web/controllers/api/external_controller_test.exs +++ b/test/plausible_web/controllers/api/external_controller_test.exs @@ -3,6 +3,8 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do use Plausible.ClickhouseRepo use Plausible.Teams.Test + alias Plausible.Repo + @user_agent "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36" @user_agent_mobile "Mozilla/5.0 (Linux; Android 6.0; U007 Pro Build/MRA58K; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/44.0.2403.119 Mobile Safari/537.36" @user_agent_tablet "Mozilla/5.0 (Linux; U; Android 4.2.2; it-it; Surfing TAB B 9.7 3G Build/JDQ39) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30" @@ -1291,6 +1293,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageleave = get_events(site) |> Enum.find(&(&1.name == "pageleave")) assert pageleave.scroll_depth == 255 + assert not scroll_depth_visible_at_set?(site) end test "sd field is ignored if name is not pageleave", %{conn: conn, site: site} do @@ -1298,6 +1301,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do post(conn, "/api/event", %{n: "custom_e", u: "https://test.com", d: site.domain, sd: 10}) assert [%{scroll_depth: 0}, %{scroll_depth: 0}] = get_events(site) + assert not scroll_depth_visible_at_set?(site) end test "ingests valid scroll_depth for a pageleave", %{conn: conn, site: site} do @@ -1307,6 +1311,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageleave = get_events(site) |> Enum.find(&(&1.name == "pageleave")) assert pageleave.scroll_depth == 25 + assert scroll_depth_visible_at_set?(site) end test "ingests scroll_depth as 100 when sd > 100", %{conn: conn, site: site} do @@ -1316,6 +1321,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageleave = get_events(site) |> Enum.find(&(&1.name == "pageleave")) assert pageleave.scroll_depth == 100 + assert scroll_depth_visible_at_set?(site) end test "ingests scroll_depth as 255 when sd is a string", %{conn: conn, site: site} do @@ -1325,6 +1331,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageleave = get_events(site) |> Enum.find(&(&1.name == "pageleave")) assert pageleave.scroll_depth == 255 + assert not scroll_depth_visible_at_set?(site) end test "ingests scroll_depth as 255 when sd is a negative integer", %{conn: conn, site: site} do @@ -1334,6 +1341,7 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do pageleave = get_events(site) |> Enum.find(&(&1.name == "pageleave")) assert pageleave.scroll_depth == 255 + assert not scroll_depth_visible_at_set?(site) end test "ingests valid scroll_depth for a engagement event", %{conn: conn, site: site} do @@ -1343,6 +1351,23 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do event = get_events(site) |> Enum.find(&(&1.name == "engagement")) assert event.scroll_depth == 25 + assert scroll_depth_visible_at_set?(site) + end + + test "does not update scroll_depth_visible_at twice", %{conn: conn, site: site} do + post(conn, "/api/event", %{n: "pageview", u: "https://test.com", d: site.domain}) + post(conn, "/api/event", %{n: "pageleave", u: "https://test.com", d: site.domain, sd: 25}) + Plausible.Event.WriteBuffer.flush() + + site1 = Repo.reload!(site) + assert not is_nil(site1.scroll_depth_visible_at) + + post(conn, "/api/event", %{n: "pageleave", u: "https://test.com", d: site.domain, sd: 50}) + Plausible.Event.WriteBuffer.flush() + + site2 = Repo.reload!(site) + assert not is_nil(site2.scroll_depth_visible_at) + assert site1.scroll_depth_visible_at == site2.scroll_depth_visible_at end end @@ -3584,4 +3609,9 @@ defmodule PlausibleWeb.Api.ExternalControllerTest do ) ) end + + defp scroll_depth_visible_at_set?(site) do + site = Repo.reload!(site) + not is_nil(site.scroll_depth_visible_at) + end end diff --git a/test/plausible_web/controllers/stats_controller_test.exs b/test/plausible_web/controllers/stats_controller_test.exs index 5fd9f620a66d..999c0d3818f4 100644 --- a/test/plausible_web/controllers/stats_controller_test.exs +++ b/test/plausible_web/controllers/stats_controller_test.exs @@ -85,7 +85,6 @@ defmodule PlausibleWeb.StatsControllerTest do populate_stats(site, [build(:pageview)]) - # No pageleaves yet - `scroll_depth_visible_at` will remain `nil` html = conn |> get("/#{site.domain}") @@ -93,15 +92,8 @@ defmodule PlausibleWeb.StatsControllerTest do assert text_of_attr(html, @react_container, "data-scroll-depth-visible") == "false" - site = Repo.reload!(site) - assert is_nil(site.scroll_depth_visible_at) + Plausible.Sites.set_scroll_depth_visible_at(site) - populate_stats(site, [ - build(:pageview, user_id: 123), - build(:pageleave, user_id: 123, scroll_depth: 20) - ]) - - # Pageleaves exist now - `scroll_depth_visible_at` gets set to `utc_now` html = conn |> get("/#{site.domain}") @@ -1098,7 +1090,6 @@ defmodule PlausibleWeb.StatsControllerTest do site = insert(:site) link = insert(:shared_link, site: site) - # No pageleaves yet - `scroll_depth_visible_at` will remain `nil` html = conn |> get("/share/#{site.domain}/?auth=#{link.slug}") @@ -1106,15 +1097,8 @@ defmodule PlausibleWeb.StatsControllerTest do assert text_of_attr(html, @react_container, "data-scroll-depth-visible") == "false" - site = Repo.reload!(site) - assert is_nil(site.scroll_depth_visible_at) - - populate_stats(site, [ - build(:pageview, user_id: 123), - build(:pageleave, user_id: 123, scroll_depth: 20) - ]) + Plausible.Sites.set_scroll_depth_visible_at(site) - # Pageleaves exist now - `scroll_depth_visible_at` gets set to `utc_now` html = conn |> get("/share/#{site.domain}/?auth=#{link.slug}")