Skip to content

Commit 27d019d

Browse files
authored
feat: add user activity inidcator (#950)
* feat: add new User Activity indicator * feat: add user activity indicator to topics * tests: remove wrongly committed received files * tests: remove unnecessary tests * tests: fix failing tests * chore: add to changelog * feat: improve figure and add trend line * refactor: incorporate feedback and remove unneeded code * fix: fill years with no user activity with zeros * feat: do not create figure if no user activity * chore: rerun cassettes * refactor: remove unused vcr cassettes decorator * chore: rerun cassettes * refactor: remove unused pytest params
1 parent 4ce7999 commit 27d019d

15 files changed

+5963
-6
lines changed

CHANGELOG.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,6 @@
22

33
## Current Main
44

5-
### New Feature
6-
7-
- currentness: Optionally use ohsomeDB to compute Currentness indicator ([#944] [#949])
8-
95
### Bug Fixes
106

117
- fix(currentness): make monthly buckets starting at beginning of a month ([#953])
@@ -14,10 +10,15 @@
1410
- Only the land cover completeness indicator will request area/density for
1511
land-cover
1612

13+
### New Features
14+
15+
- currentness: Optionally use ohsomeDB to compute Currentness indicator ([#944] [#949])
16+
- user-activity: add new no-quality indicator about user-activity ([#950])
17+
1718
[#944]: https://github.com/GIScience/ohsome-quality-api/pull/944
1819
[#949]: https://github.com/GIScience/ohsome-quality-api/pull/949
1920
[#953]: https://github.com/GIScience/ohsome-quality-api/pull/953
20-
21+
[#950]: https://github.com/GIScience/ohsome-quality-api/pull/950
2122

2223
## Release 1.12.0
2324

ohsome_quality_api/indicators/indicators.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,10 @@ land-cover-completeness:
7878
quality_dimension: completeness
7979
description: >-
8080
Percentage of the area of interest that is covered by OpenStreetMap land cover data.
81+
user-activity:
82+
name: User Activity
83+
projects:
84+
- bkg
85+
quality_dimension: none
86+
description: >-
87+
User activity over time.

ohsome_quality_api/indicators/user_activity/__init__.py

Whitespace-only changes.
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
import json
2+
import logging
3+
from dataclasses import dataclass
4+
from pathlib import Path
5+
from statistics import median
6+
from string import Template
7+
8+
import numpy as np
9+
import plotly.graph_objects as pgo
10+
from geojson import Feature
11+
from ohsome_filter_to_sql.main import ohsome_filter_to_sql
12+
13+
from ohsome_quality_api.config import get_config_value
14+
from ohsome_quality_api.geodatabase import client
15+
from ohsome_quality_api.indicators.base import BaseIndicator
16+
from ohsome_quality_api.topics.models import BaseTopic as Topic
17+
18+
19+
@dataclass
20+
class Bin:
21+
"""Bin or bucket of users.
22+
23+
Indices denote years since latest timestamp.
24+
"""
25+
26+
users_abs: list
27+
timestamps: list # middle of time period
28+
29+
30+
class UserActivity(BaseIndicator):
31+
def __init__(
32+
self,
33+
topic: Topic,
34+
feature: Feature,
35+
) -> None:
36+
super().__init__(topic=topic, feature=feature)
37+
self.bin_total = None
38+
39+
async def preprocess(self) -> None:
40+
where = ohsome_filter_to_sql(self.topic.filter)
41+
with open(Path(__file__).parent / "query.sql", "r") as file:
42+
template = file.read()
43+
query = Template(template).substitute(
44+
{
45+
"filter": where,
46+
"contributions_table": get_config_value("ohsomedb_contributions_table"),
47+
}
48+
)
49+
results = await client.fetch(
50+
query, json.dumps(self.feature["geometry"]), database="ohsomedb"
51+
)
52+
if len(results) == 0:
53+
return
54+
timestamps = []
55+
users_abs = []
56+
for r in reversed(results):
57+
timestamps.append(r[0])
58+
users_abs.append(r[1])
59+
self.bin_total = Bin(
60+
users_abs,
61+
timestamps,
62+
)
63+
self.result.timestamp_osm = timestamps[0]
64+
65+
def calculate(self):
66+
edge_cases = check_major_edge_cases(sum(self.bin_total.users_abs))
67+
if edge_cases:
68+
self.result.description = edge_cases
69+
return
70+
else:
71+
self.result.description = ""
72+
label_description = self.templates.label_description[self.result.label]
73+
self.result.description += Template(
74+
self.templates.result_description
75+
).substitute(
76+
median_users=f"{int(median(self.bin_total.users_abs))}",
77+
from_timestamp=self.bin_total.timestamps[-1].strftime("%b %Y"),
78+
to_timestamp=self.bin_total.timestamps[0].strftime("%b %Y"),
79+
)
80+
self.result.description += "\n" + label_description
81+
82+
def create_figure(self):
83+
if check_major_edge_cases(sum(self.bin_total.users_abs)):
84+
logging.info("No user activity. Skipping figure creation.")
85+
return
86+
fig = pgo.Figure()
87+
bucket = self.bin_total
88+
89+
values = bucket.users_abs
90+
timestamps = bucket.timestamps
91+
92+
window = 12
93+
weights = np.arange(1, window + 1)
94+
weighted_avg = []
95+
96+
for i in range(len(values)):
97+
start = max(0, i - window + 1)
98+
window_vals = values[start : i + 1]
99+
window_weights = weights[-len(window_vals) :]
100+
avg = np.dot(window_vals, window_weights) / window_weights.sum()
101+
weighted_avg.append(avg)
102+
103+
# regression trend line for the last 36 months
104+
if len(values) >= 36:
105+
x = np.arange(len(values))
106+
x_last = x[:36]
107+
y_last = np.array(values[:36])
108+
109+
coeffs = np.polyfit(x_last, y_last, 1)
110+
trend_y = np.polyval(coeffs, x_last)
111+
trend_timestamps = timestamps[:36]
112+
else:
113+
trend_timestamps = []
114+
trend_y = []
115+
116+
customdata = list(
117+
zip(bucket.users_abs, [ts.strftime("%b %Y") for ts in bucket.timestamps])
118+
)
119+
120+
hovertemplate = "%{y} Users were modifying in %{customdata[1]}<extra></extra>"
121+
122+
fig.add_trace(
123+
pgo.Bar(
124+
name="Users per Month",
125+
x=timestamps,
126+
y=values,
127+
marker_color="lightgrey",
128+
customdata=customdata,
129+
hovertemplate=hovertemplate,
130+
)
131+
)
132+
133+
fig.add_trace(
134+
pgo.Scatter(
135+
name="12-Month Weighted Avg",
136+
x=timestamps,
137+
y=weighted_avg,
138+
mode="lines",
139+
line=dict(color="steelblue", width=3),
140+
hovertemplate="Weighted Avg: %{y:.0f} Users<extra></extra>",
141+
)
142+
)
143+
144+
if len(trend_timestamps) > 0:
145+
fig.add_trace(
146+
pgo.Scatter(
147+
name="Last 36M Trend",
148+
x=trend_timestamps,
149+
y=trend_y,
150+
mode="lines",
151+
line=dict(color="red", width=4, dash="dash"),
152+
hovertemplate="Trend: %{y:.0f} Users<extra></extra>",
153+
)
154+
)
155+
156+
fig.update_layout(
157+
title=dict(
158+
text="User Activity",
159+
x=0.5,
160+
xanchor="center",
161+
font=dict(size=22),
162+
),
163+
plot_bgcolor="white",
164+
legend=dict(
165+
x=0.02,
166+
y=0.95,
167+
bgcolor="rgba(255,255,255,0.66)",
168+
bordercolor="rgba(0,0,0,0.1)",
169+
borderwidth=1,
170+
),
171+
margin=dict(l=60, r=30, t=60, b=60),
172+
)
173+
174+
fig.update_xaxes(
175+
title_text="Date",
176+
ticklabelmode="period",
177+
minor=dict(
178+
ticks="inside",
179+
dtick="M1",
180+
tickcolor="rgba(128,128,128,0.66)",
181+
),
182+
tickformat="%b %Y",
183+
ticks="outside",
184+
tick0=bucket.timestamps[-1],
185+
showgrid=True,
186+
gridcolor="rgba(200,200,200,0.3)",
187+
)
188+
189+
fig.update_yaxes(
190+
title_text="Active Users [#]",
191+
showgrid=True,
192+
gridcolor="rgba(200,200,200,0.3)",
193+
zeroline=False,
194+
)
195+
196+
raw = fig.to_dict()
197+
raw["layout"].pop("template") # remove boilerplate
198+
self.result.figure = raw
199+
200+
201+
def check_major_edge_cases(users_sum) -> str:
202+
"""Check edge cases and return description.
203+
204+
Major edge cases should lead to cancellation of calculation.
205+
"""
206+
if users_sum == 0: # no data
207+
return "In this region no user activity was recorded. "
208+
else:
209+
return ""
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
-- Parsing the GeoJSON directly in the WHERE clause instead of
2+
-- in a WITH clause makes the query faster
3+
WITH serie AS (
4+
SELECT
5+
Generate_series(
6+
'2007-10-01'::timestamp,
7+
NOW()::timestamp,
8+
'1 month'::interval
9+
)::date AS month
10+
),
11+
user_count AS (
12+
SELECT
13+
Date_trunc('month', c.valid_from) AS month,
14+
COUNT(DISTINCT c.user_id) AS user_count
15+
FROM
16+
$contributions_table c
17+
WHERE 1=1
18+
-- TODO: this would be more performant but ohsome-filter-to-sql can not generate this
19+
-- clause because is does not know about "latest"
20+
-- AND status_geom_type = ANY(ARRAY[('latest', 'Polygon'), ('latest', 'MultiPolygon')]::_status_geom_type_type)
21+
-- ohsome-filter-to-sql generated clause
22+
AND ($filter)
23+
AND ST_Intersects(c.geom, ST_GeomFromGeoJSON($$1))
24+
GROUP BY
25+
month
26+
)
27+
SELECT
28+
Date_trunc('month', serie.month) as month,
29+
COALESCE(user_count, 0) as user_count
30+
FROM
31+
-- Filling monthly gaps (no data) with 0
32+
serie LEFT JOIN user_count ON (serie.month = user_count.month)
33+
ORDER BY
34+
month;
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
label_description:
3+
red: >-
4+
5+
yellow: >-
6+
7+
green: >-
8+
9+
undefined: >-
10+
This is a no-quality indicator. No quality estimation will be calculated.
11+
result_description: >-
12+
In the period between $from_timestamp and $to_timestamp a median of $median_users users added or edited objects of the selected topic in this region.

0 commit comments

Comments
 (0)