Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file.

### Added

- Allow querying `views_per_visit` with a time dimension in Stats API
- Add `bounce_rate` to page-filtered Top Stats even when imports are included, but render a metric warning about imported data not included in `bounce_rate` tooltip.
- Add `time_on_page` to page-filtered Top Stats even when imports are included, unless legacy time on page is in view.
- Adds team_id to query debug metadata (saved in system.query_log log_comment column)
Expand All @@ -21,6 +22,7 @@ All notable changes to this project will be documented in this file.

### Fixed

- Fixed Stats API timeseries returning time buckets falling outside the queried range
- Fixed issue with all non-interactive events being counted as interactive
- Fixed countries map countries staying highlighted on Chrome

Expand Down
4 changes: 2 additions & 2 deletions lib/plausible/stats/query_builder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -514,11 +514,11 @@ defmodule Plausible.Stats.QueryBuilder do
message: "Metric `#{metric}` cannot be queried with a filter on `event:page`."
}}

length(query.dimensions) > 0 ->
Enum.any?(query.dimensions, &(not Time.time_dimension?(&1))) ->
{:error,
%QueryError{
code: :invalid_metrics,
message: "Metric `#{metric}` cannot be queried with `dimensions`."
message: "Metric `#{metric}` cannot be queried with non-time dimensions."
}}

true ->
Expand Down
64 changes: 59 additions & 5 deletions lib/plausible/stats/sql/expression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ defmodule Plausible.Stats.SQL.Expression do

import Ecto.Query

alias Plausible.Stats.{Query, Filters, SQL}
alias Plausible.Stats.{Query, Filters, SQL, Time}

@no_ref "Direct / None"
@no_channel "Direct"
Expand All @@ -27,25 +27,61 @@ defmodule Plausible.Stats.SQL.Expression do
end
end

defmacrop time_slots(query, period_in_seconds) do
defmacrop time_slots(query, period_in_seconds, first, last) do
quote do
fragment(
"timeSlots(toTimeZone(?, ?), toUInt32(timeDiff(?, ?)), toUInt32(?))",
"""
timeSlots(
toTimeZone(greatest(?, ?), ?),
toUInt32(timeDiff(greatest(?, ?), least(?, ?))),
toUInt32(?)
)
""",
s.start,
^unquote(first),
^unquote(query).timezone,
s.start,
^unquote(first),
s.timestamp,
^unquote(last),
^unquote(period_in_seconds)
)
end
end

def select_dimension(q, key, "time:month", :sessions, query) do
{_first, last_datetime} = Time.utc_boundaries(query)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of re-calculating, can we not access query.utc_time_range instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function itself uses query.utc_time_range under the hood. The function also evaluates start as the latest of site.native_stats_start_at and utc_time_range.first.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not a huge deal, and sort of micro-optimisation, but maybe it would be worth it to ensure Time.utc_boundaries is called once per query (given of course that none of the used parameters change. Perhaps its result could be stored under query.computed_utc_time_range or query.effective_utc_time_range?

BTW an absolutely out of scope nit but:

  defp beginning_of_time(candidate, native_stats_start_at) do
    if NaiveDateTime.after?(native_stats_start_at, candidate) do
      native_stats_start_at
    else
      candidate
    end
  end

could be rewritten as:

  defp beginning_of_time(candidate, native_stats_start_at) do
    Enum.max([candidate, native_stats_start_at], NaiveDateTime)
  end

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not a huge deal, and sort of micro-optimisation, but maybe it would be worth it to ensure Time.utc_boundaries is called once per query (given of course that none of the used parameters change. Perhaps its result could be stored under query.computed_utc_time_range or query.effective_utc_time_range?

Yeah, I guess the main difference between query.utc_time_range and Time.utc_boundaries is that utc_boundaries is strictly used for native queries, while utc_time_range also defines the range for imported data. So a fitting name would be native_utc_time_range.

There's also another difference -- one is NaiveDateTime and the other is DateTime.


select_merge_as(q, [t], %{
key =>
fragment(
"toStartOfMonth(toTimeZone(least(?, ?), ?))",
t.timestamp,
^last_datetime,
^query.timezone
)
})
end

def select_dimension(q, key, "time:month", _table, query) do
select_merge_as(q, [t], %{
key => fragment("toStartOfMonth(toTimeZone(?, ?))", t.timestamp, ^query.timezone)
})
end

def select_dimension(q, key, "time:week", :sessions, query) do
{_first, last_datetime} = Time.utc_boundaries(query)
date_range = Query.date_range(query)

select_merge_as(q, [t], %{
key =>
weekstart_not_before(
to_timezone(fragment("least(?, ?)", t.timestamp, ^last_datetime), ^query.timezone),
^date_range.first
)
})
end

def select_dimension(q, key, "time:week", _table, query) do
date_range = Query.date_range(query)

Expand All @@ -58,6 +94,20 @@ defmodule Plausible.Stats.SQL.Expression do
})
end

def select_dimension(q, key, "time:day", :sessions, query) do
{_first, last_datetime} = Time.utc_boundaries(query)

select_merge_as(q, [t], %{
key =>
fragment(
"toDate(toTimeZone(least(?, ?), ?))",
t.timestamp,
^last_datetime,
^query.timezone
)
})
end

def select_dimension(q, key, "time:day", _table, query) do
select_merge_as(q, [t], %{
key => fragment("toDate(toTimeZone(?, ?))", t.timestamp, ^query.timezone)
Expand All @@ -69,8 +119,10 @@ defmodule Plausible.Stats.SQL.Expression do
# timezone-aware. This means that for e.g. Asia/Katmandu (GMT+5:45)
# to work, we divide time into 15-minute buckets and later combine these
# via toStartOfHour
{first, last} = Time.utc_boundaries(query)

q
|> join(:inner, [s], time_slot in time_slots(query, 15 * 60),
|> join(:inner, [s], time_slot in time_slots(query, 15 * 60, first, last),
as: :time_slot,
hints: "ARRAY",
on: true
Expand All @@ -89,8 +141,10 @@ defmodule Plausible.Stats.SQL.Expression do
# :NOTE: This is not exposed in Query APIv2
def select_dimension(q, key, "time:minute", :sessions, query)
when query.smear_session_metrics do
{first, last} = Time.utc_boundaries(query)

q
|> join(:inner, [s], time_slot in time_slots(query, 60),
|> join(:inner, [s], time_slot in time_slots(query, 60, first, last),
as: :time_slot,
hints: "ARRAY",
on: true
Expand Down
4 changes: 2 additions & 2 deletions test/plausible/stats/query/query_parse_and_build_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2039,7 +2039,7 @@ defmodule Plausible.Stats.Query.QueryParseAndBuildTest do
assert error == "Metric `views_per_visit` cannot be queried with a filter on `event:page`."
end

test "fails validation with dimensions", %{site: site} do
test "fails validation with non-time dimensions", %{site: site} do
params = %{
"site_id" => site.domain,
"metrics" => ["views_per_visit"],
Expand All @@ -2050,7 +2050,7 @@ defmodule Plausible.Stats.Query.QueryParseAndBuildTest do
assert {:error, %QueryError{message: error}} =
Query.parse_and_build(site, params, now: @now)

assert error == "Metric `views_per_visit` cannot be queried with `dimensions`."
assert error == "Metric `views_per_visit` cannot be queried with non-time dimensions."
end
end

Expand Down
192 changes: 192 additions & 0 deletions test/plausible/stats/query/query_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,196 @@ defmodule Plausible.Stats.QueryTest do
]
end
end

describe "session smearing respects query date range boundaries" do
test "time:hour does not include buckets from outside the query range",
%{site: site} do
populate_stats(site, [
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 23:55:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-01-02 00:10:00]),
build(:pageview, user_id: 2, timestamp: ~N[2021-01-02 23:55:00]),
build(:pageview, user_id: 2, timestamp: ~N[2021-01-03 00:10:00])
])

{:ok, query} =
QueryBuilder.build(site, %ParsedQueryParams{
metrics: [:visitors],
input_date_range: {:date_range, ~D[2021-01-02], ~D[2021-01-02]},
dimensions: ["time:hour"],
include: %QueryInclude{total_rows: true}
})

%Stats.QueryResult{results: results, meta: meta} = Stats.query(site, query)

assert results == [
%{dimensions: ["2021-01-02 00:00:00"], metrics: [1]},
%{dimensions: ["2021-01-02 23:00:00"], metrics: [1]}
]

assert meta[:total_rows] == 2
end

test "time:hour does not include buckets from outside the query range (non-UTC timezone)",
%{site: site} do
# America/New_York is UTC-5 in January
site = %{site | timezone: "America/New_York"}

populate_stats(site, [
# 2020-12-31 23:55 in NYC (outside of query range)
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 04:55:00]),
# 2021-01-01 00:10 in NYC (in query range)
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 05:10:00]),
# 2021-01-01 23:55 in NYC (in query range)
build(:pageview, user_id: 1, timestamp: ~N[2021-01-02 04:55:00]),
# 2021-01-02 00:10 in NYC (outside of query range)
build(:pageview, user_id: 1, timestamp: ~N[2021-01-02 05:10:00])
])

{:ok, query} =
QueryBuilder.build(site, %ParsedQueryParams{
metrics: [:visitors],
input_date_range: {:date_range, ~D[2021-01-01], ~D[2021-01-01]},
dimensions: ["time:hour"]
})

%Stats.QueryResult{results: results} = Stats.query(site, query)

assert results == [
%{dimensions: ["2021-01-01 00:00:00"], metrics: [1]},
%{dimensions: ["2021-01-01 23:00:00"], metrics: [1]}
]
end

test "time:minute does not include buckets from outside the query range",
%{site: site} do
populate_stats(site, [
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:05:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:20:00]),
build(:pageview, user_id: 2, timestamp: ~N[2021-01-01 00:08:00]),
build(:pageview, user_id: 2, timestamp: ~N[2021-01-01 00:10:00])
])

{:ok, query} =
QueryBuilder.build(site, %ParsedQueryParams{
metrics: [:visitors],
input_date_range: {:datetime_range, ~U[2021-01-01 00:08:00Z], ~U[2021-01-01 00:12:00Z]},
dimensions: ["time:minute"],
include: %QueryInclude{total_rows: true}
})

%Stats.QueryResult{results: results, meta: meta} = Stats.query(site, query)

assert results == [
%{dimensions: ["2021-01-01 00:08:00"], metrics: [2]},
%{dimensions: ["2021-01-01 00:09:00"], metrics: [2]},
%{dimensions: ["2021-01-01 00:10:00"], metrics: [2]},
%{dimensions: ["2021-01-01 00:11:00"], metrics: [1]},
%{dimensions: ["2021-01-01 00:12:00"], metrics: [1]}
]

assert meta[:total_rows] == 5
end

test "time:minute does not include buckets from outside the query range (non-UTC timezone)",
%{site: site} do
# America/New_York is UTC-5 in January
site = %{site | timezone: "America/New_York"}

populate_stats(site, [
# 2020-12-31 23:59:00 in NYC (outside of queried range)
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 04:59:00]),
# 2021-01-01 00:02:00 in NYC (in queried range)
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 05:02:00]),
# 2021-01-01 23:59:00 in NYC (in queried range)
build(:pageview, user_id: 2, timestamp: ~N[2021-01-02 04:59:00]),
# 2021-01-02 00:01:00 in NYC (outside of queried range)
build(:pageview, user_id: 2, timestamp: ~N[2021-01-02 05:01:00])
])

{:ok, query} =
QueryBuilder.build(site, %ParsedQueryParams{
metrics: [:visitors],
input_date_range: :day,
relative_date: ~D[2021-01-01],
dimensions: ["time:minute"]
})

%Stats.QueryResult{results: results} = Stats.query(site, query)

assert results == [
%{dimensions: ["2021-01-01 00:00:00"], metrics: [1]},
%{dimensions: ["2021-01-01 00:01:00"], metrics: [1]},
%{dimensions: ["2021-01-01 00:02:00"], metrics: [1]},
%{dimensions: ["2021-01-01 23:59:00"], metrics: [1]}
]
end

test "time:day clamps sessions extending past the query range end into the last bucket",
%{site: site} do
populate_stats(site, [
build(:pageview, user_id: 1, timestamp: ~N[2021-01-31 23:55:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-02-01 00:05:00])
])

{:ok, query} =
QueryBuilder.build(site, %ParsedQueryParams{
metrics: [:visitors],
input_date_range: {:date_range, ~D[2021-01-01], ~D[2021-01-31]},
dimensions: ["time:day"]
})

%Stats.QueryResult{results: results} = Stats.query(site, query)

# Without clamping the session would bucket to "2021-02-01" (outside range)
assert results == [
%{dimensions: ["2021-01-31"], metrics: [1]}
]
end

test "time:week clamps sessions extending past the query range end into the last bucket",
%{site: site} do
populate_stats(site, [
build(:pageview, user_id: 1, timestamp: ~N[2021-01-31 23:55:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-02-01 00:05:00])
])

{:ok, query} =
QueryBuilder.build(site, %ParsedQueryParams{
metrics: [:visitors],
input_date_range: {:date_range, ~D[2021-01-01], ~D[2021-01-31]},
dimensions: ["time:week"]
})

%Stats.QueryResult{results: results} = Stats.query(site, query)

# Without clamping the session would bucket to "2021-02-01" (outside range).
# Clamped to Jan 31 23:59:59 -> toMonday(Jan 31) = Jan 25.
assert results == [
%{dimensions: ["2021-01-25"], metrics: [1]}
]
end

test "time:month clamps sessions extending past the query range end into the last bucket",
%{site: site} do
populate_stats(site, [
build(:pageview, user_id: 1, timestamp: ~N[2021-02-28 23:55:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-03-01 00:05:00])
])

{:ok, query} =
QueryBuilder.build(site, %ParsedQueryParams{
metrics: [:visitors],
input_date_range: {:date_range, ~D[2021-01-01], ~D[2021-02-28]},
dimensions: ["time:month"]
})

%Stats.QueryResult{results: results} = Stats.query(site, query)

# Without clamping the session would bucket to "2021-03-01" (outside range).
# Clamped to Feb 28 23:59:59 -> toStartOfMonth -> Feb 1.
assert results == [
%{dimensions: ["2021-02-01"], metrics: [1]}
]
end
end
end
Loading
Loading