Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 45 additions & 38 deletions src/metpy/io/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import contextlib
from datetime import datetime, timezone
import logging
import re
import string

Expand All @@ -16,6 +17,8 @@

exporter = Exporter(globals())

log = logging.getLogger(__name__)


def _decode_coords(coordinates):
"""Turn a string of coordinates from WPC coded surface bulletin into a lon/lat tuple.
Expand Down Expand Up @@ -107,44 +110,48 @@ def parse_wpc_surface_bulletin(bulletin, year=None):
# A single file may have multiple sets of data that are valid at different times. Set
# the valid_time string that will correspond to all the following lines parsed, until
# the next valid_time is found.
if parts[0] in ('VALID', 'SURFACE PROG VALID'):
dtstr = parts[-1]
valid_time = valid_time.replace(year=year or valid_time.year, month=int(dtstr[:2]),
day=int(dtstr[2:4]), hour=int(dtstr[4:6]),
minute=0, second=0, microsecond=0)
else:
feature, *info = parts
if feature in {'HIGHS', 'LOWS'}:
# For each pressure center, add its data as a new row
# While ideally these occur in pairs, some bulletins have had multiple
# locations for a single center strength value. So instead walk one at a time
# and keep track of the most recent strength.
strength = np.nan
for item in info:
if len(item) <= 4 and item[0] in {'8', '9', '1'}:
strength = int(item)
try:
if parts[0] in ('VALID', 'SURFACE PROG VALID'):
dtstr = parts[-1]
valid_time = valid_time.replace(year=year or valid_time.year,
month=int(dtstr[:2]), day=int(dtstr[2:4]),
hour=int(dtstr[4:6]), minute=0, second=0,
microsecond=0)
else:
feature, *info = parts
if feature in {'HIGHS', 'LOWS'}:
# For each pressure center, add its data as a new row
# While ideally these occur in pairs, some bulletins have had multiple
# locations for a single center strength value. So instead walk one at a
# time and keep track of the most recent strength.
strength = np.nan
for item in info:
if len(item) <= 4 and item[0] in {'8', '9', '1'}:
strength = int(item)
else:
parsed_text.append((valid_time, feature.rstrip('S'), strength,
Point(_decode_coords(item))))
elif feature in {'WARM', 'COLD', 'STNRY', 'OCFNT', 'TROF'}:
# Some bulletins include 'WK', 'MDT', or 'STG' to indicate the front's
# strength. If present, separate it from the rest of the info, which gives
# the position of the front.
if info[0][0] in string.ascii_letters:
strength, *boundary = info
else:
parsed_text.append((valid_time, feature.rstrip('S'), strength,
Point(_decode_coords(item))))
elif feature in {'WARM', 'COLD', 'STNRY', 'OCFNT', 'TROF'}:
# Some bulletins include 'WK', 'MDT', or 'STG' to indicate the front's
# strength. If present, separate it from the rest of the info, which gives the
# position of the front.
if info[0][0] in string.ascii_letters:
strength, *boundary = info
else:
strength, boundary = np.nan, info

# Create a list of Points and create Line from points, if possible
boundary = [Point(_decode_coords(point)) for point in boundary]
boundary = LineString(boundary) if len(boundary) > 1 else boundary[0]

# Add new row in the data for each front
parsed_text.append((valid_time, feature, strength, boundary))
# Look for a year at the end of the line (from the product header)
elif (year is None and len(info) >= 2 and re.match(r'\d{4}', info[-1])
and re.match(r'\d{2}', info[-2])):
with contextlib.suppress(ValueError):
year = int(info[-1])
strength, boundary = np.nan, info

# Create a list of Points and create Line from points, if possible
boundary = [Point(_decode_coords(point)) for point in boundary]
boundary = LineString(boundary) if len(boundary) > 1 else boundary[0]

# Add new row in the data for each front
parsed_text.append((valid_time, feature, strength, boundary))
# Look for a year at the end of the line (from the product header)
elif (year is None and len(info) >= 2 and re.match(r'\d{4}', info[-1])
and re.match(r'\d{2}', info[-2])):
with contextlib.suppress(ValueError):
year = int(info[-1])
except ValueError:
log.warning('Could not parse: %s', ' '.join(parts))

return pd.DataFrame(parsed_text, columns=['valid', 'feature', 'strength', 'geometry'])
16 changes: 16 additions & 0 deletions tests/io/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,19 @@ def test_negative_lat():
""")
df = parse_wpc_surface_bulletin(sample)
assert df.geometry[0] == sgeom.Point([-51, -3])


@needs_module('shapely')
def test_bad_line_continue(caplog):
"""Test decoding of a file with some bad characters."""
from io import BytesIO

sample = BytesIO(b"""VALID 062818Z
HIGHS 1022 3961069 1020 3851069 1026 3750773 1022 4430845 1019 5520728
LOWS 1016 4510934 1002 3441145 1003 4271229 1002 4471230 1009 4631181
TROF 2971023 2831018 2691008 I2531003
TROF 2911100 2681082 2511055 2431024
""")
df = parse_wpc_surface_bulletin(sample)
assert len(df) == 11
assert 'Could not parse' in caplog.text
Loading