Skip to content

Commit 0cc3287

Browse files
authored
Add support for file buffers (#50)
<!-- Provide a general summary of your changes in the Title above --> ## Description Previously, `parsnip` expected a path to a file (provided as either a str or a pathlib.Path), which was opened into a file buffer in the __init__ method. This is not optional, and a direct file buffer can now be provided. We also support raw text blocks (str containing newlines), which are preprocessed appropriately. **We make the assumption that all strings that do not point to a file, do not contain a portion of a valid path, and do not contain the ".cif" suffix should be treated as CIF data blocks.** ## Motivation and Context Resolves #49 Blocked by #51 ## Types of Changes <!-- Please select all items that apply, either now or after creating the pull request: --> - [ ] Documentation update - [ ] Bug fix - [x] New feature - [ ] Breaking change<sup>1</sup> <sup>1</sup>The change breaks (or has the potential to break) existing functionality and should be merged into the `breaking` branch ## Checklist: <!-- Please select all items that apply either now or after creating the pull request. --> - [x] I am familiar with the [Development Guidelines](https://github.com/glotzerlab/parsnip/blob/main/doc/source/development.rst) - [x] The changes introduced by this pull request are covered by existing or newly introduced tests. - [x] I have updated the [changelog](https://github.com/glotzerlab/parsnip/blob/main/ChangeLog.rst) and added my name to the [credits](https://github.com/glotzerlab/parsnip/blob/main/doc/source/credits.rst).
1 parent fc4d1f0 commit 0cc3287

File tree

8 files changed

+117
-24
lines changed

8 files changed

+117
-24
lines changed

changelog.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ Changelog
44
The format is based on `Keep a Changelog <http://keepachangelog.com/en/1.1.0/>`__.
55
This project adheres to `Semantic Versioning <http://semver.org/spec/v2.0.0.html>`__.
66

7+
v0.4.0 - xxxx-xx-xx
8+
-------------------
9+
10+
Added
11+
~~~~~
12+
- Support for reading files via a context manager, text buffer, or string.
13+
14+
715
v0.3.1 - 2025-07-16
816
-------------------
917

parsnip/_errors.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,27 @@
11
# Copyright (c) 2025, The Regents of the University of Michigan
22
# This file is from the parsnip project, released under the BSD 3-Clause License.
33

4+
from pathlib import Path
5+
6+
7+
def _is_potentially_valid_path(file: str) -> bool:
8+
"""Check whether a file string could possibly be intended as a path.
9+
10+
This method returns true if the provided string is a valid path, whther the suffix
11+
".cif" is contained in the path, if the path links to a file, or if the path's
12+
parent is a directory.
13+
"""
14+
try:
15+
path = Path(file)
16+
return (
17+
".cif" in path.suffixes # Probably intended to parse as file
18+
or path.exists() # If it is a file, we definitely want to parse that
19+
# Possibly a typo, but we want to check that path regardless.
20+
or (path.parent.is_dir() and path.parent != Path("."))
21+
)
22+
except OSError:
23+
return False
24+
425

526
class ParseWarning(Warning):
627
def __init__(self, message):

parsnip/parsnip.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,13 @@
7474
from fnmatch import fnmatch
7575
from importlib.util import find_spec
7676
from pathlib import Path
77-
from typing import ClassVar
77+
from typing import ClassVar, TextIO
7878

7979
import numpy as np
8080
from more_itertools import flatten, peekable
8181
from numpy.lib.recfunctions import structured_to_unstructured
8282

83-
from parsnip._errors import ParseWarning
83+
from parsnip._errors import ParseWarning, _is_potentially_valid_path
8484
from parsnip.patterns import (
8585
_accumulate_nonsimple_data,
8686
_box_from_lengths_and_angles,
@@ -111,7 +111,7 @@ class CifFile:
111111
>>> from parsnip import CifFile
112112
>>> cif = CifFile("example_file.cif")
113113
>>> print(cif)
114-
CifFile(fn=example_file.cif) : 12 data entries, 2 data loops
114+
CifFile(file=example_file.cif) : 12 data entries, 2 data loops
115115
116116
Data entries are accessible via the :attr:`~.pairs` and :attr:`~.loops` attributes:
117117
@@ -141,21 +141,38 @@ class CifFile:
141141
Default value = ``False``
142142
"""
143143

144-
def __init__(self, fn: str | Path, cast_values: bool = False):
145-
"""Create a CifFile object from a filename.
144+
def __init__(
145+
self, file: str | Path | TextIO | Iterable[str], cast_values: bool = False
146+
):
147+
"""Create a CifFile object from a filename, file object, or iterator over `str`.
146148
147149
On construction, the entire file is parsed into key-value pairs and data loops.
148150
Comment lines are ignored.
149151
150152
"""
151-
self._fn = fn
153+
self._fn = file
152154
self._pairs = {}
153155
self._loops = []
154156

155157
self._cpat = {k: re.compile(pattern) for (k, pattern) in self.PATTERNS.items()}
156158
self._cast_values = cast_values
157159

158-
with open(fn) as file:
160+
if (isinstance(file, str) and _is_potentially_valid_path(file)) or isinstance(
161+
file, Path
162+
):
163+
with open(file) as file:
164+
self._parse(peekable(file))
165+
# We expect a TextIO | IOBase, but allow users to pass any Iterable[string_like]
166+
# This includes a str that does not point to a file!
167+
elif isinstance(file, str):
168+
msg = (
169+
"\nFile input was parsed as a raw CIF data block. "
170+
"If you intended to read the input string as a file path, please "
171+
"ensure it is validly formatted."
172+
)
173+
warnings.warn(msg, RuntimeWarning, stacklevel=2)
174+
self._parse(peekable(file.splitlines(True)))
175+
else:
159176
self._parse(peekable(file))
160177

161178
_SYMPY_AVAILABLE = find_spec("sympy") is not None
@@ -919,7 +936,7 @@ def _parse(self, data_iter: Iterable):
919936
def __repr__(self):
920937
n_pairs = len(self.pairs)
921938
n_tabs = len(self.loops)
922-
return f"CifFile(fn={self._fn}) : {n_pairs} data entries, {n_tabs} data loops"
939+
return f"CifFile(file={self._fn}) : {n_pairs} data entries, {n_tabs} data loops"
923940

924941
PATTERNS: ClassVar = {
925942
"key_value_general": r"^(_[\w\.\-/\[\d\]]+)\s+([^#]+)",

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ known-first-party = ["parsnip"]
9090

9191
[tool.ruff.lint.per-file-ignores]
9292
"tests/*" = ["D", "B018", "F811"]
93-
"tests/conftest.py" = ["N816", "N806"]
9493
"parsnip/*" = ["E741"]
9594
"__init__.py" = ["F401"] # Do not remove "unused" imports in __init__.py files
95+
"tests/conftest.py" = ["N806", "N816"] # Allow mixed-case globals
9696

9797
[tool.ruff.format]
9898
quote-style = "double"

tests/conftest.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# ruff: noqa: N816. Allow mixed-case global variables
21
from __future__ import annotations
32

43
import os
@@ -37,6 +36,18 @@ def remove_invalid(s):
3736
return s.replace("\r", "")
3837

3938

39+
def _array_assertion_verbose(keys, test_data, real_data):
40+
keys = np.asarray(keys)
41+
test_data = np.asarray(test_data)
42+
real_data = np.asarray(real_data)
43+
msg = (
44+
f"Key(s) {keys[test_data != real_data]} did not match:\n"
45+
f"{test_data[test_data != real_data]}!="
46+
f"{real_data[test_data != real_data]}\n"
47+
)
48+
np.testing.assert_equal(test_data, real_data, err_msg=msg)
49+
50+
4051
def _gemmi_read_keys(filename, keys, as_number=True):
4152
try:
4253
file_block = cif.read_file(filename).sole_block()

tests/test_ciffile.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1+
# ruff: noqa: SIM115
12
import re
3+
from pathlib import Path
24

5+
import numpy as np
36
import pytest
4-
from conftest import cif_files_mark
7+
from conftest import _array_assertion_verbose, cif_files_mark
58

9+
from parsnip import CifFile
610
from parsnip._errors import ParseWarning
711

812

@@ -28,3 +32,28 @@ def test_cast_values(cif_data):
2832

2933
cif_data.file._pairs = uncast_pairs # Need to reset the data
3034
assert cif_data.file.pairs == uncast_pairs
35+
36+
37+
@pytest.mark.parametrize(
38+
("input_preprocessor", "expect_warning"),
39+
[
40+
(lambda fn: open(fn), None), # IOBase
41+
(lambda fn: fn, None), # string file path
42+
(lambda fn: Path(fn), None), # Path
43+
(lambda fn: open(fn).readlines(), None), # list[str]
44+
(lambda fn: open(fn).read(), RuntimeWarning), # raw string
45+
],
46+
)
47+
@cif_files_mark
48+
def test_open_methods(cif_data, input_preprocessor, expect_warning):
49+
print(type(input_preprocessor(cif_data.filename)))
50+
keys = [*cif_data.file.pairs.keys()]
51+
stored_data = np.asarray([*cif_data.file.pairs.values()])
52+
53+
if expect_warning is not None:
54+
with pytest.warns(expect_warning, match="parsed as a raw CIF data block."):
55+
cif = CifFile(input_preprocessor(cif_data.filename))
56+
else:
57+
cif = CifFile(input_preprocessor(cif_data.filename))
58+
59+
_array_assertion_verbose(keys, cif.get_from_pairs(keys), stored_data)

tests/test_key_reader.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import numpy as np
22
from conftest import (
3+
_array_assertion_verbose,
34
_gemmi_read_keys,
45
all_files_mark,
56
bad_cif,
@@ -9,18 +10,6 @@
910
from more_itertools import flatten
1011

1112

12-
def _array_assertion_verbose(keys, test_data, real_data):
13-
keys = np.asarray(keys)
14-
test_data = np.asarray(test_data)
15-
real_data = np.asarray(real_data)
16-
msg = (
17-
f"Key(s) {keys[test_data != real_data]} did not match:\n"
18-
f"{test_data[test_data != real_data]}!="
19-
f"{real_data[test_data != real_data]}\n"
20-
)
21-
np.testing.assert_equal(test_data, real_data, err_msg=msg)
22-
23-
2413
@all_files_mark
2514
def test_read_key_value_pairs(cif_data):
2615
pycif = pycifrw_or_xfail(cif_data)

tests/test_utils.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
from pathlib import Path
2+
13
import pytest
24

3-
from parsnip._errors import ParseError, ParseWarning
5+
from parsnip._errors import ParseError, ParseWarning, _is_potentially_valid_path
46

57

68
def test_parse_error(capfd):
@@ -14,3 +16,19 @@ def test_parse_warning():
1416
raise ParseWarning("TEST_WARNING_RAISED")
1517

1618
assert "TEST_WARNING_RAISED" in str(warning.value)
19+
20+
21+
@pytest.mark.parametrize(
22+
("path_str", "expected"),
23+
[
24+
(str(Path(__file__)), True), # existing file
25+
(str(Path(__file__).parent / "conftest.py"), True), # real file
26+
(str(Path(__file__).parent / "nonexistent.txt"), True), # parent dir exists
27+
(str(Path(__file__).parent / "fake_file.cif"), True), # .cif suffix
28+
(str(Path(__file__).parent / "asdf/noparent.txt"), False), # no parent
29+
("asdfasdfasd", False),
30+
("asdfasdfasd.cif", True),
31+
],
32+
)
33+
def test_is_potentially_valid_path(path_str, expected):
34+
assert _is_potentially_valid_path(path_str) is expected

0 commit comments

Comments
 (0)