diff --git a/CHANGELOG.md b/CHANGELOG.md index 2042f5df75ae..fa4292529979 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file. ### Added +- Allow querying `views_per_visit` with a time dimension in Stats API - Add `bounce_rate` to page-filtered Top Stats even when imports are included, but render a metric warning about imported data not included in `bounce_rate` tooltip. - Add `time_on_page` to page-filtered Top Stats even when imports are included, unless legacy time on page is in view. - Adds team_id to query debug metadata (saved in system.query_log log_comment column) @@ -21,6 +22,7 @@ All notable changes to this project will be documented in this file. ### Fixed +- Fixed Stats API timeseries returning time buckets falling outside the queried range - Fixed issue with all non-interactive events being counted as interactive - Fixed countries map countries staying highlighted on Chrome diff --git a/lib/plausible/stats/query_builder.ex b/lib/plausible/stats/query_builder.ex index 580daa5d1485..1d9f6e137ad4 100644 --- a/lib/plausible/stats/query_builder.ex +++ b/lib/plausible/stats/query_builder.ex @@ -514,11 +514,11 @@ defmodule Plausible.Stats.QueryBuilder do message: "Metric `#{metric}` cannot be queried with a filter on `event:page`." }} - length(query.dimensions) > 0 -> + Enum.any?(query.dimensions, &(not Time.time_dimension?(&1))) -> {:error, %QueryError{ code: :invalid_metrics, - message: "Metric `#{metric}` cannot be queried with `dimensions`." + message: "Metric `#{metric}` cannot be queried with non-time dimensions." }} true -> diff --git a/lib/plausible/stats/sql/expression.ex b/lib/plausible/stats/sql/expression.ex index c48ae9e0d09f..221c1900da0d 100644 --- a/lib/plausible/stats/sql/expression.ex +++ b/lib/plausible/stats/sql/expression.ex @@ -12,7 +12,7 @@ defmodule Plausible.Stats.SQL.Expression do import Ecto.Query - alias Plausible.Stats.{Query, Filters, SQL} + alias Plausible.Stats.{Query, Filters, SQL, Time} @no_ref "Direct / None" @no_channel "Direct" @@ -27,25 +27,61 @@ defmodule Plausible.Stats.SQL.Expression do end end - defmacrop time_slots(query, period_in_seconds) do + defmacrop time_slots(query, period_in_seconds, first, last) do quote do fragment( - "timeSlots(toTimeZone(?, ?), toUInt32(timeDiff(?, ?)), toUInt32(?))", + """ + timeSlots( + toTimeZone(greatest(?, ?), ?), + toUInt32(timeDiff(greatest(?, ?), least(?, ?))), + toUInt32(?) + ) + """, s.start, + ^unquote(first), ^unquote(query).timezone, s.start, + ^unquote(first), s.timestamp, + ^unquote(last), ^unquote(period_in_seconds) ) end end + def select_dimension(q, key, "time:month", :sessions, query) do + {_first, last_datetime} = Time.utc_boundaries(query) + + select_merge_as(q, [t], %{ + key => + fragment( + "toStartOfMonth(toTimeZone(least(?, ?), ?))", + t.timestamp, + ^last_datetime, + ^query.timezone + ) + }) + end + def select_dimension(q, key, "time:month", _table, query) do select_merge_as(q, [t], %{ key => fragment("toStartOfMonth(toTimeZone(?, ?))", t.timestamp, ^query.timezone) }) end + def select_dimension(q, key, "time:week", :sessions, query) do + {_first, last_datetime} = Time.utc_boundaries(query) + date_range = Query.date_range(query) + + select_merge_as(q, [t], %{ + key => + weekstart_not_before( + to_timezone(fragment("least(?, ?)", t.timestamp, ^last_datetime), ^query.timezone), + ^date_range.first + ) + }) + end + def select_dimension(q, key, "time:week", _table, query) do date_range = Query.date_range(query) @@ -58,6 +94,20 @@ defmodule Plausible.Stats.SQL.Expression do }) end + def select_dimension(q, key, "time:day", :sessions, query) do + {_first, last_datetime} = Time.utc_boundaries(query) + + select_merge_as(q, [t], %{ + key => + fragment( + "toDate(toTimeZone(least(?, ?), ?))", + t.timestamp, + ^last_datetime, + ^query.timezone + ) + }) + end + def select_dimension(q, key, "time:day", _table, query) do select_merge_as(q, [t], %{ key => fragment("toDate(toTimeZone(?, ?))", t.timestamp, ^query.timezone) @@ -69,8 +119,10 @@ defmodule Plausible.Stats.SQL.Expression do # timezone-aware. This means that for e.g. Asia/Katmandu (GMT+5:45) # to work, we divide time into 15-minute buckets and later combine these # via toStartOfHour + {first, last} = Time.utc_boundaries(query) + q - |> join(:inner, [s], time_slot in time_slots(query, 15 * 60), + |> join(:inner, [s], time_slot in time_slots(query, 15 * 60, first, last), as: :time_slot, hints: "ARRAY", on: true @@ -89,8 +141,10 @@ defmodule Plausible.Stats.SQL.Expression do # :NOTE: This is not exposed in Query APIv2 def select_dimension(q, key, "time:minute", :sessions, query) when query.smear_session_metrics do + {first, last} = Time.utc_boundaries(query) + q - |> join(:inner, [s], time_slot in time_slots(query, 60), + |> join(:inner, [s], time_slot in time_slots(query, 60, first, last), as: :time_slot, hints: "ARRAY", on: true diff --git a/test/plausible/stats/query/query_parse_and_build_test.exs b/test/plausible/stats/query/query_parse_and_build_test.exs index ef705cc883d1..eb4320b220db 100644 --- a/test/plausible/stats/query/query_parse_and_build_test.exs +++ b/test/plausible/stats/query/query_parse_and_build_test.exs @@ -2039,7 +2039,7 @@ defmodule Plausible.Stats.Query.QueryParseAndBuildTest do assert error == "Metric `views_per_visit` cannot be queried with a filter on `event:page`." end - test "fails validation with dimensions", %{site: site} do + test "fails validation with non-time dimensions", %{site: site} do params = %{ "site_id" => site.domain, "metrics" => ["views_per_visit"], @@ -2050,7 +2050,7 @@ defmodule Plausible.Stats.Query.QueryParseAndBuildTest do assert {:error, %QueryError{message: error}} = Query.parse_and_build(site, params, now: @now) - assert error == "Metric `views_per_visit` cannot be queried with `dimensions`." + assert error == "Metric `views_per_visit` cannot be queried with non-time dimensions." end end diff --git a/test/plausible/stats/query/query_test.exs b/test/plausible/stats/query/query_test.exs index 90c4671aabc8..60aa33ee793a 100644 --- a/test/plausible/stats/query/query_test.exs +++ b/test/plausible/stats/query/query_test.exs @@ -125,4 +125,196 @@ defmodule Plausible.Stats.QueryTest do ] end end + + describe "session smearing respects query date range boundaries" do + test "time:hour does not include buckets from outside the query range", + %{site: site} do + populate_stats(site, [ + build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 23:55:00]), + build(:pageview, user_id: 1, timestamp: ~N[2021-01-02 00:10:00]), + build(:pageview, user_id: 2, timestamp: ~N[2021-01-02 23:55:00]), + build(:pageview, user_id: 2, timestamp: ~N[2021-01-03 00:10:00]) + ]) + + {:ok, query} = + QueryBuilder.build(site, %ParsedQueryParams{ + metrics: [:visitors], + input_date_range: {:date_range, ~D[2021-01-02], ~D[2021-01-02]}, + dimensions: ["time:hour"], + include: %QueryInclude{total_rows: true} + }) + + %Stats.QueryResult{results: results, meta: meta} = Stats.query(site, query) + + assert results == [ + %{dimensions: ["2021-01-02 00:00:00"], metrics: [1]}, + %{dimensions: ["2021-01-02 23:00:00"], metrics: [1]} + ] + + assert meta[:total_rows] == 2 + end + + test "time:hour does not include buckets from outside the query range (non-UTC timezone)", + %{site: site} do + # America/New_York is UTC-5 in January + site = %{site | timezone: "America/New_York"} + + populate_stats(site, [ + # 2020-12-31 23:55 in NYC (outside of query range) + build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 04:55:00]), + # 2021-01-01 00:10 in NYC (in query range) + build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 05:10:00]), + # 2021-01-01 23:55 in NYC (in query range) + build(:pageview, user_id: 1, timestamp: ~N[2021-01-02 04:55:00]), + # 2021-01-02 00:10 in NYC (outside of query range) + build(:pageview, user_id: 1, timestamp: ~N[2021-01-02 05:10:00]) + ]) + + {:ok, query} = + QueryBuilder.build(site, %ParsedQueryParams{ + metrics: [:visitors], + input_date_range: {:date_range, ~D[2021-01-01], ~D[2021-01-01]}, + dimensions: ["time:hour"] + }) + + %Stats.QueryResult{results: results} = Stats.query(site, query) + + assert results == [ + %{dimensions: ["2021-01-01 00:00:00"], metrics: [1]}, + %{dimensions: ["2021-01-01 23:00:00"], metrics: [1]} + ] + end + + test "time:minute does not include buckets from outside the query range", + %{site: site} do + populate_stats(site, [ + build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:05:00]), + build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:20:00]), + build(:pageview, user_id: 2, timestamp: ~N[2021-01-01 00:08:00]), + build(:pageview, user_id: 2, timestamp: ~N[2021-01-01 00:10:00]) + ]) + + {:ok, query} = + QueryBuilder.build(site, %ParsedQueryParams{ + metrics: [:visitors], + input_date_range: {:datetime_range, ~U[2021-01-01 00:08:00Z], ~U[2021-01-01 00:12:00Z]}, + dimensions: ["time:minute"], + include: %QueryInclude{total_rows: true} + }) + + %Stats.QueryResult{results: results, meta: meta} = Stats.query(site, query) + + assert results == [ + %{dimensions: ["2021-01-01 00:08:00"], metrics: [2]}, + %{dimensions: ["2021-01-01 00:09:00"], metrics: [2]}, + %{dimensions: ["2021-01-01 00:10:00"], metrics: [2]}, + %{dimensions: ["2021-01-01 00:11:00"], metrics: [1]}, + %{dimensions: ["2021-01-01 00:12:00"], metrics: [1]} + ] + + assert meta[:total_rows] == 5 + end + + test "time:minute does not include buckets from outside the query range (non-UTC timezone)", + %{site: site} do + # America/New_York is UTC-5 in January + site = %{site | timezone: "America/New_York"} + + populate_stats(site, [ + # 2020-12-31 23:59:00 in NYC (outside of queried range) + build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 04:59:00]), + # 2021-01-01 00:02:00 in NYC (in queried range) + build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 05:02:00]), + # 2021-01-01 23:59:00 in NYC (in queried range) + build(:pageview, user_id: 2, timestamp: ~N[2021-01-02 04:59:00]), + # 2021-01-02 00:01:00 in NYC (outside of queried range) + build(:pageview, user_id: 2, timestamp: ~N[2021-01-02 05:01:00]) + ]) + + {:ok, query} = + QueryBuilder.build(site, %ParsedQueryParams{ + metrics: [:visitors], + input_date_range: :day, + relative_date: ~D[2021-01-01], + dimensions: ["time:minute"] + }) + + %Stats.QueryResult{results: results} = Stats.query(site, query) + + assert results == [ + %{dimensions: ["2021-01-01 00:00:00"], metrics: [1]}, + %{dimensions: ["2021-01-01 00:01:00"], metrics: [1]}, + %{dimensions: ["2021-01-01 00:02:00"], metrics: [1]}, + %{dimensions: ["2021-01-01 23:59:00"], metrics: [1]} + ] + end + + test "time:day clamps sessions extending past the query range end into the last bucket", + %{site: site} do + populate_stats(site, [ + build(:pageview, user_id: 1, timestamp: ~N[2021-01-31 23:55:00]), + build(:pageview, user_id: 1, timestamp: ~N[2021-02-01 00:05:00]) + ]) + + {:ok, query} = + QueryBuilder.build(site, %ParsedQueryParams{ + metrics: [:visitors], + input_date_range: {:date_range, ~D[2021-01-01], ~D[2021-01-31]}, + dimensions: ["time:day"] + }) + + %Stats.QueryResult{results: results} = Stats.query(site, query) + + # Without clamping the session would bucket to "2021-02-01" (outside range) + assert results == [ + %{dimensions: ["2021-01-31"], metrics: [1]} + ] + end + + test "time:week clamps sessions extending past the query range end into the last bucket", + %{site: site} do + populate_stats(site, [ + build(:pageview, user_id: 1, timestamp: ~N[2021-01-31 23:55:00]), + build(:pageview, user_id: 1, timestamp: ~N[2021-02-01 00:05:00]) + ]) + + {:ok, query} = + QueryBuilder.build(site, %ParsedQueryParams{ + metrics: [:visitors], + input_date_range: {:date_range, ~D[2021-01-01], ~D[2021-01-31]}, + dimensions: ["time:week"] + }) + + %Stats.QueryResult{results: results} = Stats.query(site, query) + + # Without clamping the session would bucket to "2021-02-01" (outside range). + # Clamped to Jan 31 23:59:59 -> toMonday(Jan 31) = Jan 25. + assert results == [ + %{dimensions: ["2021-01-25"], metrics: [1]} + ] + end + + test "time:month clamps sessions extending past the query range end into the last bucket", + %{site: site} do + populate_stats(site, [ + build(:pageview, user_id: 1, timestamp: ~N[2021-02-28 23:55:00]), + build(:pageview, user_id: 1, timestamp: ~N[2021-03-01 00:05:00]) + ]) + + {:ok, query} = + QueryBuilder.build(site, %ParsedQueryParams{ + metrics: [:visitors], + input_date_range: {:date_range, ~D[2021-01-01], ~D[2021-02-28]}, + dimensions: ["time:month"] + }) + + %Stats.QueryResult{results: results} = Stats.query(site, query) + + # Without clamping the session would bucket to "2021-03-01" (outside range). + # Clamped to Feb 28 23:59:59 -> toStartOfMonth -> Feb 1. + assert results == [ + %{dimensions: ["2021-02-01"], metrics: [1]} + ] + end + end end diff --git a/test/plausible_web/controllers/api/external_stats_controller/query_imported_test.exs b/test/plausible_web/controllers/api/external_stats_controller/query_imported_test.exs index 3073dd537b2f..d3317f874033 100644 --- a/test/plausible_web/controllers/api/external_stats_controller/query_imported_test.exs +++ b/test/plausible_web/controllers/api/external_stats_controller/query_imported_test.exs @@ -341,6 +341,42 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryImportedTest do end end + describe "timeseries with imported data" do + setup :create_site_import + + test "views_per_visit breakdown by time:month", %{ + conn: conn, + site: site, + site_import: site_import + } do + populate_stats(site, site_import.id, [ + # January 2021 - only imported + build(:imported_visitors, date: ~D[2021-01-01], visits: 6, pageviews: 7), + # March 2021 - imported + native combined + build(:imported_visitors, date: ~D[2021-03-01], visits: 1, pageviews: 4), + build(:pageview, user_id: 1, timestamp: ~N[2021-03-15 00:00:00]), + build(:pageview, user_id: 1, timestamp: ~N[2021-03-15 00:05:00]), + # September 2021 - only native + build(:pageview, user_id: 2, timestamp: ~N[2021-09-01 00:00:00]) + ]) + + conn = + post(conn, "/api/v2/query", %{ + "site_id" => site.domain, + "metrics" => ["views_per_visit"], + "date_range" => ["2021-01-01", "2021-12-31"], + "dimensions" => ["time:month"], + "include" => %{"imports" => true} + }) + + assert json_response(conn, 200)["results"] == [ + %{"dimensions" => ["2021-01-01"], "metrics" => [1.17]}, + %{"dimensions" => ["2021-03-01"], "metrics" => [3.0]}, + %{"dimensions" => ["2021-09-01"], "metrics" => [1.0]} + ] + end + end + test "breaks down all metrics by visit:referrer with imported data", %{conn: conn, site: site} do site_import = insert(:site_import, diff --git a/test/plausible_web/controllers/api/external_stats_controller/query_test.exs b/test/plausible_web/controllers/api/external_stats_controller/query_test.exs index 810d7771f852..a318e543f099 100644 --- a/test/plausible_web/controllers/api/external_stats_controller/query_test.exs +++ b/test/plausible_web/controllers/api/external_stats_controller/query_test.exs @@ -3527,6 +3527,34 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do %{"dimensions" => ["/"], "metrics" => [2, 2, 2, 2, 50, 300]} ] end + + test "views_per_visit in a time:week dimension query", %{ + conn: conn, + site: site + } do + populate_stats(site, [ + build(:pageview, user_id: 1, timestamp: ~N[2021-01-04 00:00:00]), + build(:pageview, user_id: 1, timestamp: ~N[2021-01-04 00:05:00]), + build(:pageview, user_id: 2, timestamp: ~N[2021-01-18 00:00:00]), + build(:pageview, user_id: 2, timestamp: ~N[2021-01-18 00:05:00]), + build(:pageview, user_id: 2, timestamp: ~N[2021-01-18 00:10:00]) + ]) + + conn = + post(conn, "/api/v2/query", %{ + "site_id" => site.domain, + "metrics" => ["views_per_visit"], + "date_range" => ["2021-01-01", "2021-01-28"], + "dimensions" => ["time:week"] + }) + + %{"results" => results} = json_response(conn, 200) + + assert results == [ + %{"dimensions" => ["2021-01-04"], "metrics" => [2.0]}, + %{"dimensions" => ["2021-01-18"], "metrics" => [3.0]} + ] + end end test "filtering by custom event property", %{conn: conn, site: site} do diff --git a/test/plausible_web/controllers/api/external_stats_controller/query_validations_test.exs b/test/plausible_web/controllers/api/external_stats_controller/query_validations_test.exs index 1418f1ca2de1..fedf806a71df 100644 --- a/test/plausible_web/controllers/api/external_stats_controller/query_validations_test.exs +++ b/test/plausible_web/controllers/api/external_stats_controller/query_validations_test.exs @@ -196,10 +196,11 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryValidationsTest do } end - test "validates that metric views_per_visit cannot be used together with dimensions", %{ - conn: conn, - site: site - } do + test "validates that metric views_per_visit cannot be used together with non-time dimensions", + %{ + conn: conn, + site: site + } do conn = post(conn, "/api/v2/query", %{ "site_id" => site.domain, @@ -209,7 +210,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryValidationsTest do }) assert json_response(conn, 400) == %{ - "error" => "Metric `views_per_visit` cannot be queried with `dimensions`." + "error" => "Metric `views_per_visit` cannot be queried with non-time dimensions." } end diff --git a/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs b/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs index 21e1c52bc8fd..214ee469c5b4 100644 --- a/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs @@ -620,7 +620,7 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do assert %{"plot" => plot} = json_response(conn, 200) - assert plot == [1, 1, 0, 0, 0] + assert plot == [1, 1, 0, 0, 1] end end