|
| 1 | +import json |
| 2 | +import logging |
| 3 | +from dataclasses import dataclass |
| 4 | +from pathlib import Path |
| 5 | +from statistics import median |
| 6 | +from string import Template |
| 7 | + |
| 8 | +import numpy as np |
| 9 | +import plotly.graph_objects as pgo |
| 10 | +from geojson import Feature |
| 11 | +from ohsome_filter_to_sql.main import ohsome_filter_to_sql |
| 12 | + |
| 13 | +from ohsome_quality_api.config import get_config_value |
| 14 | +from ohsome_quality_api.geodatabase import client |
| 15 | +from ohsome_quality_api.indicators.base import BaseIndicator |
| 16 | +from ohsome_quality_api.topics.models import BaseTopic as Topic |
| 17 | + |
| 18 | + |
| 19 | +@dataclass |
| 20 | +class Bin: |
| 21 | + """Bin or bucket of users. |
| 22 | +
|
| 23 | + Indices denote years since latest timestamp. |
| 24 | + """ |
| 25 | + |
| 26 | + users_abs: list |
| 27 | + timestamps: list # middle of time period |
| 28 | + |
| 29 | + |
| 30 | +class UserActivity(BaseIndicator): |
| 31 | + def __init__( |
| 32 | + self, |
| 33 | + topic: Topic, |
| 34 | + feature: Feature, |
| 35 | + ) -> None: |
| 36 | + super().__init__(topic=topic, feature=feature) |
| 37 | + self.bin_total = None |
| 38 | + |
| 39 | + async def preprocess(self) -> None: |
| 40 | + where = ohsome_filter_to_sql(self.topic.filter) |
| 41 | + with open(Path(__file__).parent / "query.sql", "r") as file: |
| 42 | + template = file.read() |
| 43 | + query = Template(template).substitute( |
| 44 | + { |
| 45 | + "filter": where, |
| 46 | + "contributions_table": get_config_value("ohsomedb_contributions_table"), |
| 47 | + } |
| 48 | + ) |
| 49 | + results = await client.fetch( |
| 50 | + query, json.dumps(self.feature["geometry"]), database="ohsomedb" |
| 51 | + ) |
| 52 | + if len(results) == 0: |
| 53 | + return |
| 54 | + timestamps = [] |
| 55 | + users_abs = [] |
| 56 | + for r in reversed(results): |
| 57 | + timestamps.append(r[0]) |
| 58 | + users_abs.append(r[1]) |
| 59 | + self.bin_total = Bin( |
| 60 | + users_abs, |
| 61 | + timestamps, |
| 62 | + ) |
| 63 | + self.result.timestamp_osm = timestamps[0] |
| 64 | + |
| 65 | + def calculate(self): |
| 66 | + edge_cases = check_major_edge_cases(sum(self.bin_total.users_abs)) |
| 67 | + if edge_cases: |
| 68 | + self.result.description = edge_cases |
| 69 | + return |
| 70 | + else: |
| 71 | + self.result.description = "" |
| 72 | + label_description = self.templates.label_description[self.result.label] |
| 73 | + self.result.description += Template( |
| 74 | + self.templates.result_description |
| 75 | + ).substitute( |
| 76 | + median_users=f"{int(median(self.bin_total.users_abs))}", |
| 77 | + from_timestamp=self.bin_total.timestamps[-1].strftime("%b %Y"), |
| 78 | + to_timestamp=self.bin_total.timestamps[0].strftime("%b %Y"), |
| 79 | + ) |
| 80 | + self.result.description += "\n" + label_description |
| 81 | + |
| 82 | + def create_figure(self): |
| 83 | + if check_major_edge_cases(sum(self.bin_total.users_abs)): |
| 84 | + logging.info("No user activity. Skipping figure creation.") |
| 85 | + return |
| 86 | + fig = pgo.Figure() |
| 87 | + bucket = self.bin_total |
| 88 | + |
| 89 | + values = bucket.users_abs |
| 90 | + timestamps = bucket.timestamps |
| 91 | + |
| 92 | + window = 12 |
| 93 | + weights = np.arange(1, window + 1) |
| 94 | + weighted_avg = [] |
| 95 | + |
| 96 | + for i in range(len(values)): |
| 97 | + start = max(0, i - window + 1) |
| 98 | + window_vals = values[start : i + 1] |
| 99 | + window_weights = weights[-len(window_vals) :] |
| 100 | + avg = np.dot(window_vals, window_weights) / window_weights.sum() |
| 101 | + weighted_avg.append(avg) |
| 102 | + |
| 103 | + # regression trend line for the last 36 months |
| 104 | + if len(values) >= 36: |
| 105 | + x = np.arange(len(values)) |
| 106 | + x_last = x[:36] |
| 107 | + y_last = np.array(values[:36]) |
| 108 | + |
| 109 | + coeffs = np.polyfit(x_last, y_last, 1) |
| 110 | + trend_y = np.polyval(coeffs, x_last) |
| 111 | + trend_timestamps = timestamps[:36] |
| 112 | + else: |
| 113 | + trend_timestamps = [] |
| 114 | + trend_y = [] |
| 115 | + |
| 116 | + customdata = list( |
| 117 | + zip(bucket.users_abs, [ts.strftime("%b %Y") for ts in bucket.timestamps]) |
| 118 | + ) |
| 119 | + |
| 120 | + hovertemplate = "%{y} Users were modifying in %{customdata[1]}<extra></extra>" |
| 121 | + |
| 122 | + fig.add_trace( |
| 123 | + pgo.Bar( |
| 124 | + name="Users per Month", |
| 125 | + x=timestamps, |
| 126 | + y=values, |
| 127 | + marker_color="lightgrey", |
| 128 | + customdata=customdata, |
| 129 | + hovertemplate=hovertemplate, |
| 130 | + ) |
| 131 | + ) |
| 132 | + |
| 133 | + fig.add_trace( |
| 134 | + pgo.Scatter( |
| 135 | + name="12-Month Weighted Avg", |
| 136 | + x=timestamps, |
| 137 | + y=weighted_avg, |
| 138 | + mode="lines", |
| 139 | + line=dict(color="steelblue", width=3), |
| 140 | + hovertemplate="Weighted Avg: %{y:.0f} Users<extra></extra>", |
| 141 | + ) |
| 142 | + ) |
| 143 | + |
| 144 | + if len(trend_timestamps) > 0: |
| 145 | + fig.add_trace( |
| 146 | + pgo.Scatter( |
| 147 | + name="Last 36M Trend", |
| 148 | + x=trend_timestamps, |
| 149 | + y=trend_y, |
| 150 | + mode="lines", |
| 151 | + line=dict(color="red", width=4, dash="dash"), |
| 152 | + hovertemplate="Trend: %{y:.0f} Users<extra></extra>", |
| 153 | + ) |
| 154 | + ) |
| 155 | + |
| 156 | + fig.update_layout( |
| 157 | + title=dict( |
| 158 | + text="User Activity", |
| 159 | + x=0.5, |
| 160 | + xanchor="center", |
| 161 | + font=dict(size=22), |
| 162 | + ), |
| 163 | + plot_bgcolor="white", |
| 164 | + legend=dict( |
| 165 | + x=0.02, |
| 166 | + y=0.95, |
| 167 | + bgcolor="rgba(255,255,255,0.66)", |
| 168 | + bordercolor="rgba(0,0,0,0.1)", |
| 169 | + borderwidth=1, |
| 170 | + ), |
| 171 | + margin=dict(l=60, r=30, t=60, b=60), |
| 172 | + ) |
| 173 | + |
| 174 | + fig.update_xaxes( |
| 175 | + title_text="Date", |
| 176 | + ticklabelmode="period", |
| 177 | + minor=dict( |
| 178 | + ticks="inside", |
| 179 | + dtick="M1", |
| 180 | + tickcolor="rgba(128,128,128,0.66)", |
| 181 | + ), |
| 182 | + tickformat="%b %Y", |
| 183 | + ticks="outside", |
| 184 | + tick0=bucket.timestamps[-1], |
| 185 | + showgrid=True, |
| 186 | + gridcolor="rgba(200,200,200,0.3)", |
| 187 | + ) |
| 188 | + |
| 189 | + fig.update_yaxes( |
| 190 | + title_text="Active Users [#]", |
| 191 | + showgrid=True, |
| 192 | + gridcolor="rgba(200,200,200,0.3)", |
| 193 | + zeroline=False, |
| 194 | + ) |
| 195 | + |
| 196 | + raw = fig.to_dict() |
| 197 | + raw["layout"].pop("template") # remove boilerplate |
| 198 | + self.result.figure = raw |
| 199 | + |
| 200 | + |
| 201 | +def check_major_edge_cases(users_sum) -> str: |
| 202 | + """Check edge cases and return description. |
| 203 | +
|
| 204 | + Major edge cases should lead to cancellation of calculation. |
| 205 | + """ |
| 206 | + if users_sum == 0: # no data |
| 207 | + return "In this region no user activity was recorded. " |
| 208 | + else: |
| 209 | + return "" |
0 commit comments