Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 174 additions & 9 deletions src/metpy/remote/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,12 +176,86 @@

def _closest_result(self, it, dt):
"""Iterate over a sequence and return a result built from the closest match."""
try:
min_obj = min(it,
key=lambda o: abs((self.dt_from_key(o.key) - dt).total_seconds()))
except ValueError as e:
raise ValueError(f'No result found for {dt}') from e
return self._build_result(min_obj)
best_obj = None
best_diff = None
for obj in it:
try:
obj_dt = self.dt_from_key(obj.key)
diff = abs(obj_dt - dt)
if best_diff is None or diff < best_diff:
best_obj = obj
best_diff = diff
except (ValueError, IndexError):

Check notice

Code scanning / CodeQL

Empty except Note

'except' clause does nothing but pass and there is no explanatory comment.
pass
if best_obj is None:
raise ValueError('No matching products found.')
return self._build_result(best_obj)

def _find_best_product(self, objects_iter, dt, filters=None):
"""Find the best product from a sequence based on time and optional filters.
Parameters
----------
objects_iter : iterable
Iterable of S3 objects to search through
dt : datetime.datetime
Target datetime to match
filters : dict, optional
Dictionary of attribute names and values to filter objects by.
For example, {'sector': 'M1', 'band': '02'}
Returns
-------
object
The best matching S3 object
Raises
------
ValueError
If no matching products are found
"""
best_obj = None
best_diff = None

for obj in objects_iter:
try:
# Skip if it doesn't match our filters
if filters and not self._matches_filters(obj.key, filters):
continue

obj_dt = self.dt_from_key(obj.key)
diff = abs(obj_dt - dt)
if best_diff is None or diff < best_diff:
best_obj = obj
best_diff = diff
except (ValueError, IndexError):

Check notice

Code scanning / CodeQL

Empty except Note

'except' clause does nothing but pass and there is no explanatory comment.
pass

if best_obj is None:
filter_desc = '' if not filters else f' matching filters {filters}'
raise ValueError(f'No matching products found{filter_desc}.')

return self._build_result(best_obj)

def _matches_filters(self, key, filters):
"""Check if a key matches all specified filters.
This is a generic method that should be overridden by subclasses
that need specific filtering logic.
Parameters
----------
key : str
The S3 object key to check
filters : dict
Dictionary of attribute names and values to filter by
Returns
-------
bool
True if the key matches all filters, False otherwise
"""
return True

def _build_result(self, obj):
"""Build a basic product with no reader."""
Expand Down Expand Up @@ -470,6 +544,14 @@
This consists of individual GOES image files stored in netCDF format, across a variety
of sectors, bands, and modes.
GOES filenames follow the pattern:
OR_ABI-L1b-RadX-MYC##_G##_s########_e########_c########.nc
Where:
- X is the sector (F=Full Disk, C=CONUS, M1=Mesoscale 1, M2=Mesoscale 2)
- Y is the mode (3, 4, 6)
- ## is the channel/band (01-16)
"""

def __init__(self, satellite):
Expand Down Expand Up @@ -559,7 +641,22 @@
dt = datetime.now(timezone.utc) if dt is None else ensure_timezone(dt)
time_prefix = self._build_time_prefix(product, dt)
prod_prefix = self._subprod_prefix(time_prefix, mode, band)
return self._closest_result(self.objects(prod_prefix), dt)

# Extract sector from product name (e.g., 'M1' from 'ABI-L1b-RadM1')
sector = None
if product.endswith(('M1', 'M2')):
sector = product[-2:]

# Build filters dictionary for precise matching
filters = {}
if sector:
filters['sector'] = sector
if band is not None:
filters['band'] = f'{int(band):02d}' if isinstance(band, int) else band
if mode is not None:
filters['mode'] = str(mode)

return self._find_best_product(self.objects(prod_prefix), dt, filters)

def get_range(self, product, start, end, mode=None, band=None):
"""Yield products within a particular date/time range.
Expand Down Expand Up @@ -589,12 +686,80 @@
"""
start = ensure_timezone(start)
end = ensure_timezone(end)

# Extract sector from product name (e.g., 'M1' from 'ABI-L1b-RadM1')
sector = None
if product.endswith(('M1', 'M2')):
sector = product[-2:]

# Build filters dictionary for precise matching
filters = {}
if sector:
filters['sector'] = sector
if band is not None:
filters['band'] = f'{int(band):02d}' if isinstance(band, int) else band
if mode is not None:
filters['mode'] = str(mode)

for dt in date_iterator(start, end, hours=1):
time_prefix = self._build_time_prefix(product, dt)
prod_prefix = self._subprod_prefix(time_prefix, mode, band)
for obj in self.objects(prod_prefix):
if start <= self.dt_from_key(obj.key) < end:
yield self._build_result(obj)
obj_dt = self.dt_from_key(obj.key)
# Check if object is within time range and matches filters
matches_time = start <= obj_dt < end
matches_filters = not filters or self._matches_filters(obj.key, filters)
if matches_time and matches_filters:
# Only yield if it matches our filters
yield self._build_result(obj)

def _matches_filters(self, key, filters):
"""Check if a GOES product key matches all specified filters.
Parameters
----------
key : str
The S3 object key to check
filters : dict
Dictionary of attribute names and values to filter by
Returns
-------
bool
True if the key matches all filters, False otherwise
"""
# Parse the filename from the key
filename = key.split('/')[-1]
parts = filename.split('_')
if len(parts) < 2:
return False

# Parse product info from filename (e.g., 'OR_ABI-L1b-RadM1-M6C02_G18_s...')
product_info = parts[1]

# Check sector filter (M1, M2, C, F)
if 'sector' in filters:
sector = filters['sector']
# For mesoscale sectors, check if the product has the right sector
# Check for mesoscale sectors (M1, M2)
if (sector in ('M1', 'M2') and
not product_info.endswith(sector + '-') and
('-Rad' + sector + '-') not in product_info):
return False

# Check band filter
if 'band' in filters:
band = filters['band']
if f'C{band}' not in product_info:
return False

# Check mode filter
if 'mode' in filters:
mode = filters['mode']
if f'-M{mode}' not in product_info:
return False

return True

def _build_result(self, obj):
"""Build a product that opens the data using `xarray.open_dataset`."""
Expand Down
45 changes: 45 additions & 0 deletions test_goes_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env python
# Copyright (c) 2015-2025 MetPy Developers.
"""Test script to verify the robustness of the GOES client at hour boundaries.

This script tests the recursive search implementation for finding products
across hour boundaries.
"""
import logging
from datetime import datetime, timezone

Check failure on line 9 in test_goes_client.py

View workflow job for this annotation

GitHub Actions / Run Lint Tools

[flake8] reported by reviewdog 🐶 I001 isort found an import in the wrong position Raw Output: ./test_goes_client.py:9:1: I001 isort found an import in the wrong position

from metpy.remote import GOESArchive

logger = logging.getLogger(__name__)

def test_goes_hour_boundary():

Check failure on line 15 in test_goes_client.py

View workflow job for this annotation

GitHub Actions / Run Lint Tools

[flake8] reported by reviewdog 🐶 E302 expected 2 blank lines, found 1 Raw Output: ./test_goes_client.py:15:1: E302 expected 2 blank lines, found 1
"""Test the GOES client's ability to find products across hour boundaries."""
# Create a GOES client
goes = GOESArchive(16)
# Test case 1: Exact hour boundary
try:
dt = datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
goes.get_product('ABI-L1b-RadC', dt, band=1)
except Exception:
logger.exception('Failed to get product at exact hour boundary')
# Test case 2: Just after hour boundary
try:
dt = datetime(2025, 1, 1, 0, 0, 30, tzinfo=timezone.utc)
goes.get_product('ABI-L1b-RadC', dt, band=1)
except Exception:
logger.exception('Failed to get product just after hour boundary')
# Test case 3: Just before hour boundary
try:
dt = datetime(2025, 1, 1, 0, 59, 30, tzinfo=timezone.utc)
goes.get_product('ABI-L1b-RadC', dt, band=1)
except Exception:
logger.exception('Failed to get product just before hour boundary')
# Test case 4: Day boundary
try:
dt = datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
goes.get_product('ABI-L1b-RadC', dt, band=1)
except Exception:
logger.exception('Failed to get product at day boundary')

if __name__ == '__main__':

Check failure on line 44 in test_goes_client.py

View workflow job for this annotation

GitHub Actions / Run Lint Tools

[flake8] reported by reviewdog 🐶 E305 expected 2 blank lines after class or function definition, found 1 Raw Output: ./test_goes_client.py:44:1: E305 expected 2 blank lines after class or function definition, found 1
test_goes_hour_boundary()
Loading
Loading