|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +# (c) 2023 Andreas Motl <[email protected]> |
| 3 | +import calendar |
| 4 | +import json |
| 5 | +from decimal import Decimal |
| 6 | +from copy import deepcopy |
| 7 | +from datetime import datetime, date |
| 8 | + |
| 9 | +import crate.client.http |
| 10 | +import pytz |
| 11 | +import requests |
| 12 | +from crate import client |
| 13 | +from crate.client.exceptions import ProgrammingError |
| 14 | +from funcy import project |
| 15 | +from twisted.logger import Logger |
| 16 | + |
| 17 | +from kotori.daq.storage.util import format_chunk |
| 18 | + |
| 19 | +log = Logger() |
| 20 | + |
| 21 | + |
| 22 | +class CrateDBAdapter(object): |
| 23 | + """ |
| 24 | + Kotori database backend adapter for CrateDB. |
| 25 | +
|
| 26 | + CrateDB is a distributed SQL database for storing and analyzing |
| 27 | + massive amounts of data in real-time. Built on top of Lucene. |
| 28 | +
|
| 29 | + https://github.com/crate/crate |
| 30 | + """ |
| 31 | + |
| 32 | + def __init__(self, settings=None, database=None): |
| 33 | + """ |
| 34 | + Carry over connectivity parameters. |
| 35 | +
|
| 36 | + TODO: Verify with CrateDB Cloud. |
| 37 | + """ |
| 38 | + |
| 39 | + settings = deepcopy(settings) or {} |
| 40 | + settings.setdefault("host", "localhost") |
| 41 | + settings.setdefault("port", "4200") |
| 42 | + settings.setdefault("username", "crate") |
| 43 | + settings.setdefault("password", "") |
| 44 | + settings.setdefault("database", database) |
| 45 | + |
| 46 | + # TODO: Bring back pool size configuration. |
| 47 | + # settings.setdefault('pool_size', 10) |
| 48 | + |
| 49 | + settings["port"] = int(settings["port"]) |
| 50 | + |
| 51 | + # FIXME: This is bad style. Well, but it is currently |
| 52 | + # inherited from ~10 year old code, so c'est la vie. |
| 53 | + self.__dict__.update(**settings) |
| 54 | + |
| 55 | + # Bookkeeping for all databases having been touched already |
| 56 | + self.databases_written_once = set() |
| 57 | + |
| 58 | + self.host_uri = "{host}:{port}".format(**self.__dict__) |
| 59 | + |
| 60 | + # TODO: Bring back pool size configuration. |
| 61 | + # log.info('Storage target is {uri}, pool size is {pool_size}', uri=self.host_uri, pool_size=self.pool_size) |
| 62 | + log.info("Storage target is {uri}", uri=self.host_uri) |
| 63 | + self.db_client = client.connect( |
| 64 | + self.host_uri, username=self.username, password=self.password, pool_size=20, |
| 65 | + ) |
| 66 | + |
| 67 | + def get_tablename(self, meta): |
| 68 | + """ |
| 69 | + Get table name for SensorWAN channel. |
| 70 | + """ |
| 71 | + return f"{meta.database}.{meta.measurement}" |
| 72 | + |
| 73 | + def create_table(self, tablename): |
| 74 | + """ |
| 75 | + Create database table for SensorWAN channel. |
| 76 | + """ |
| 77 | + log.info(f"Creating table: {tablename}") |
| 78 | + sql_ddl = f""" |
| 79 | +CREATE TABLE IF NOT EXISTS {tablename} ( |
| 80 | + time TIMESTAMP WITH TIME ZONE DEFAULT NOW() NOT NULL, |
| 81 | + tags OBJECT(DYNAMIC), |
| 82 | + fields OBJECT(DYNAMIC) |
| 83 | +); |
| 84 | + """ |
| 85 | + cursor = self.db_client.cursor() |
| 86 | + cursor.execute(sql_ddl) |
| 87 | + cursor.close() |
| 88 | + |
| 89 | + def write(self, meta, data): |
| 90 | + """ |
| 91 | + Format ingress data chunk and store it into database table. |
| 92 | +
|
| 93 | + TODO: This dearly needs efficiency improvements. Currently, there is no |
| 94 | + batching, just single records/inserts. That yields bad performance. |
| 95 | + """ |
| 96 | + |
| 97 | + meta_copy = deepcopy(dict(meta)) |
| 98 | + data_copy = deepcopy(data) |
| 99 | + |
| 100 | + try: |
| 101 | + chunk = format_chunk(meta, data) |
| 102 | + |
| 103 | + except Exception as ex: |
| 104 | + log.failure( |
| 105 | + "Could not format chunk (ex={ex_name}: {ex}): data={data}, meta={meta}", |
| 106 | + ex_name=ex.__class__.__name__, |
| 107 | + ex=ex, |
| 108 | + meta=meta_copy, |
| 109 | + data=data_copy, |
| 110 | + ) |
| 111 | + raise |
| 112 | + |
| 113 | + try: |
| 114 | + success = self.write_chunk(meta, chunk) |
| 115 | + return success |
| 116 | + |
| 117 | + except requests.exceptions.ConnectionError as ex: |
| 118 | + log.failure( |
| 119 | + "Problem connecting to CrateDB at {uri}: {ex}", uri=self.host_uri, ex=ex |
| 120 | + ) |
| 121 | + raise |
| 122 | + |
| 123 | + except ProgrammingError as ex: |
| 124 | + if "SchemaUnknownException" in ex.message: |
| 125 | + db_table = self.get_tablename(meta) |
| 126 | + self.create_table(db_table) |
| 127 | + |
| 128 | + # Attempt second write |
| 129 | + success = self.write_chunk(meta, chunk) |
| 130 | + return success |
| 131 | + |
| 132 | + else: |
| 133 | + raise |
| 134 | + |
| 135 | + def write_chunk(self, meta, chunk): |
| 136 | + """ |
| 137 | + Run the SQL `INSERT` operation. |
| 138 | + """ |
| 139 | + db_table = self.get_tablename(meta) |
| 140 | + cursor = self.db_client.cursor() |
| 141 | + |
| 142 | + # With or without timestamp. |
| 143 | + if "time" in chunk: |
| 144 | + cursor.execute( |
| 145 | + f"INSERT INTO {db_table} (time, tags, fields) VALUES (?, ?, ?)", |
| 146 | + (chunk["time"], chunk["tags"], chunk["fields"]), |
| 147 | + ) |
| 148 | + else: |
| 149 | + cursor.execute( |
| 150 | + f"INSERT INTO {db_table} (tags, fields) VALUES (?, ?)", |
| 151 | + (chunk["tags"], chunk["fields"]), |
| 152 | + ) |
| 153 | + success = True |
| 154 | + self.databases_written_once.add(meta.database) |
| 155 | + cursor.close() |
| 156 | + if success: |
| 157 | + log.debug("Storage success: {chunk}", chunk=chunk) |
| 158 | + else: |
| 159 | + log.error("Storage failed: {chunk}", chunk=chunk) |
| 160 | + return success |
| 161 | + |
| 162 | + @staticmethod |
| 163 | + def get_tags(data): |
| 164 | + """ |
| 165 | + Derive tags from topology information. |
| 166 | +
|
| 167 | + TODO: Verify if this is used at all. |
| 168 | + """ |
| 169 | + return project(data, ["gateway", "node"]) |
| 170 | + |
| 171 | + |
| 172 | +class TimezoneAwareCrateJsonEncoder(json.JSONEncoder): |
| 173 | + epoch_aware = datetime(1970, 1, 1, tzinfo=pytz.UTC) |
| 174 | + epoch_naive = datetime(1970, 1, 1) |
| 175 | + |
| 176 | + def default(self, o): |
| 177 | + if isinstance(o, Decimal): |
| 178 | + return str(o) |
| 179 | + if isinstance(o, datetime): |
| 180 | + if o.tzinfo: |
| 181 | + delta = o - self.epoch_aware |
| 182 | + else: |
| 183 | + delta = o - self.epoch_naive |
| 184 | + return int(delta.microseconds / 1000.0 + |
| 185 | + (delta.seconds + delta.days * 24 * 3600) * 1000.0) |
| 186 | + if isinstance(o, date): |
| 187 | + return calendar.timegm(o.timetuple()) * 1000 |
| 188 | + return json.JSONEncoder.default(self, o) |
| 189 | + |
| 190 | + |
| 191 | +# Monkey patch. |
| 192 | +# TODO: Submit upstream. |
| 193 | +crate.client.http.CrateJsonEncoder = TimezoneAwareCrateJsonEncoder |
0 commit comments