diff --git a/bot/errors.py b/bot/errors.py index 7949dc1b4a..31d264686e 100644 --- a/bot/errors.py +++ b/bot/errors.py @@ -59,7 +59,7 @@ def __init__(self, converter: Converter, original: Exception, infraction_arg: in class BrandingMisconfigurationError(RuntimeError): - """Raised by the Branding cog when a misconfigured event is encountered.""" + """Raised by the Branding cog when branding misconfiguration is detected.""" diff --git a/bot/exts/backend/branding/_cog.py b/bot/exts/backend/branding/_cog.py index 433f152b43..154a94f195 100644 --- a/bot/exts/backend/branding/_cog.py +++ b/bot/exts/backend/branding/_cog.py @@ -342,14 +342,14 @@ async def synchronise(self) -> tuple[bool, bool]: """ log.debug("Synchronise: fetching current event.") - current_event, available_events = await self.repository.get_current_event() + try: + current_event, available_events = await self.repository.get_current_event() + except Exception: + log.exception("Synchronisation aborted: failed to fetch events.") + return False, False await self.populate_cache_events(available_events) - if current_event is None: - log.error("Failed to fetch event. Cannot synchronise!") - return False, False - return await self.enter_event(current_event) async def populate_cache_events(self, events: list[Event]) -> None: @@ -433,10 +433,6 @@ async def daemon_main(self) -> None: await self.populate_cache_events(available_events) - if new_event is None: - log.warning("Daemon main: failed to get current event from branding repository, will do nothing.") - return - if new_event.path != await self.cache_information.get("event_path"): log.debug("Daemon main: new event detected!") await self.enter_event(new_event) @@ -596,10 +592,24 @@ async def branding_calendar_refresh_cmd(self, ctx: commands.Context) -> None: log.info("Performing command-requested event cache refresh.") async with ctx.typing(): - available_events = await self.repository.get_events() - await self.populate_cache_events(available_events) + try: + available_events = await self.repository.get_events() + except Exception: + log.exception("Refresh aborted: failed to fetch events.") + resp = make_embed( + "Refresh aborted", + "Failed to fetch events. See log for details.", + success=False, + ) + else: + await self.populate_cache_events(available_events) + resp = make_embed( + "Refresh successful", + "The event calendar has been refreshed.", + success=True, + ) - await ctx.invoke(self.branding_calendar_group) + await ctx.send(embed=resp) # endregion # region: Command interface (branding daemon) diff --git a/bot/exts/backend/branding/_repository.py b/bot/exts/backend/branding/_repository.py index 20cad0a5dc..d661cd7a8c 100644 --- a/bot/exts/backend/branding/_repository.py +++ b/bot/exts/backend/branding/_repository.py @@ -2,6 +2,8 @@ from datetime import UTC, date, datetime import frontmatter +from aiohttp import ClientResponse, ClientResponseError +from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential from bot.bot import Bot from bot.constants import Keys @@ -71,6 +73,35 @@ def __str__(self) -> str: return f"" +class GitHubServerError(Exception): + """ + GitHub responded with 5xx status code. + + Such error shall be retried. + """ + + +def _raise_for_status(resp: ClientResponse) -> None: + """Raise custom error if resp status is 5xx.""" + # Use the response's raise_for_status so that we can + # attach the full traceback to our custom error. + log.trace(f"GitHub response status: {resp.status}") + try: + resp.raise_for_status() + except ClientResponseError as err: + if resp.status >= 500: + raise GitHubServerError from err + raise + + +_retry_server_error = retry( + retry=retry_if_exception_type(GitHubServerError), # Only retry this error. + stop=stop_after_attempt(5), # Up to 5 attempts. + wait=wait_exponential(), # Exponential backoff: 1, 2, 4, 8 seconds. + reraise=True, # After final failure, re-raise original exception. +) + + class BrandingRepository: """ Branding repository abstraction. @@ -93,6 +124,7 @@ class BrandingRepository: def __init__(self, bot: Bot) -> None: self.bot = bot + @_retry_server_error async def fetch_directory(self, path: str, types: t.Container[str] = ("file", "dir")) -> dict[str, RemoteObject]: """ Fetch directory found at `path` in the branding repository. @@ -105,14 +137,12 @@ async def fetch_directory(self, path: str, types: t.Container[str] = ("file", "d log.debug(f"Fetching directory from branding repository: '{full_url}'.") async with self.bot.http_session.get(full_url, params=PARAMS, headers=HEADERS) as response: - if response.status != 200: - raise RuntimeError(f"Failed to fetch directory due to status: {response.status}") - - log.debug("Fetch successful, reading JSON response.") + _raise_for_status(response) json_directory = await response.json() return {file["name"]: RemoteObject(file) for file in json_directory if file["type"] in types} + @_retry_server_error async def fetch_file(self, download_url: str) -> bytes: """ Fetch file as bytes from `download_url`. @@ -122,10 +152,7 @@ async def fetch_file(self, download_url: str) -> bytes: log.debug(f"Fetching file from branding repository: '{download_url}'.") async with self.bot.http_session.get(download_url, params=PARAMS, headers=HEADERS) as response: - if response.status != 200: - raise RuntimeError(f"Failed to fetch file due to status: {response.status}") - - log.debug("Fetch successful, reading payload.") + _raise_for_status(response) return await response.read() def parse_meta_file(self, raw_file: bytes) -> MetaFile: @@ -186,37 +213,34 @@ async def get_events(self) -> list[Event]: """ Discover available events in the branding repository. - Misconfigured events are skipped. May return an empty list in the catastrophic case. + Propagate errors if an event fails to fetch or deserialize. """ log.debug("Discovering events in branding repository.") - try: - event_directories = await self.fetch_directory("events", types=("dir",)) # Skip files. - except Exception: - log.exception("Failed to fetch 'events' directory.") - return [] + event_directories = await self.fetch_directory("events", types=("dir",)) # Skip files. instances: list[Event] = [] for event_directory in event_directories.values(): - log.trace(f"Attempting to construct event from directory: '{event_directory.path}'.") - try: - instance = await self.construct_event(event_directory) - except Exception as exc: - log.warning(f"Could not construct event '{event_directory.path}'.", exc_info=exc) - else: - instances.append(instance) + log.trace(f"Reading event directory: '{event_directory.path}'.") + instance = await self.construct_event(event_directory) + instances.append(instance) return instances - async def get_current_event(self) -> tuple[Event | None, list[Event]]: + async def get_current_event(self) -> tuple[Event, list[Event]]: """ Get the currently active event, or the fallback event. The second return value is a list of all available events. The caller may discard it, if not needed. Returning all events alongside the current one prevents having to query the API twice in some cases. - The current event may be None in the case that no event is active, and no fallback event is found. + Raise an error in the following cases: + * GitHub request fails + * The branding repo contains an invalid event + * No event is active and the fallback event is missing + + Events are validated in the branding repo. The bot assumes that events are valid. """ utc_now = datetime.now(tz=UTC) log.debug(f"Finding active event for: {utc_now}.") @@ -249,5 +273,4 @@ async def get_current_event(self) -> tuple[Event | None, list[Event]]: if event.meta.is_fallback: return event, available_events - log.warning("No event is currently active and no fallback event was found!") - return None, available_events + raise BrandingMisconfigurationError("No event is active and the fallback event is missing!") diff --git a/poetry.lock b/poetry.lock index 7cdd250b87..eae2b9446d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2396,6 +2396,21 @@ mslex = {version = ">=1.1.0,<2.0.0", markers = "sys_platform == \"win32\""} psutil = ">=5.7.2,<6.0.0" tomli = {version = ">=2.0.1,<3.0.0", markers = "python_version >= \"3.7\" and python_version < \"4.0\""} +[[package]] +name = "tenacity" +version = "9.0.0" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539"}, + {file = "tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b"}, +] + +[package.extras] +doc = ["reno", "sphinx"] +test = ["pytest", "tornado (>=4.5)", "typeguard"] + [[package]] name = "tldextract" version = "5.1.3" @@ -2597,4 +2612,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "3.12.*" -content-hash = "2bcb9640e8089861ff8f7523c0e6968a720cb3aa2329951dc14dd7cdedb08820" +content-hash = "202778de32f1760a2f501e2a524ea61973b8b8b27d45a7c25c21356b18155c7e" diff --git a/pyproject.toml b/pyproject.toml index 8fbdad4c2b..325707c550 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ python-frontmatter = "1.1.0" rapidfuzz = "3.10.1" regex = "2024.11.6" sentry-sdk = "2.19.0" +tenacity = "9.0.0" tldextract = "5.1.3" pydantic = "2.10.1" pydantic-settings = "2.6.1"