diff --git a/.gitignore b/.gitignore index f0fbdb3cc..332dd9ccc 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,4 @@ docs/source/generated # gcloud cli google-cloud-cli-* google-cloud-sdk +.venv/ diff --git a/malariagen_data/anopheles.py b/malariagen_data/anopheles.py index ed9cb65cf..f6cbfe037 100644 --- a/malariagen_data/anopheles.py +++ b/malariagen_data/anopheles.py @@ -11,6 +11,7 @@ import plotly.express as px # type: ignore import plotly.graph_objects as go # type: ignore from numpydoc_decorator import doc # type: ignore +from .util import parse_single_region from .anoph import ( @@ -553,6 +554,45 @@ def _sample_count_het( ) return sample_id, sample_set, windows, counts + + @check_types + @doc( + summary="Return windowed heterozygosity for a single sample over a genome region.", + ) + def heterozygosity( + self, + sample: base_params.sample, + region: base_params.region, + window_size: het_params.window_size = het_params.window_size_default, + site_mask: Optional[base_params.site_mask] = base_params.DEFAULT, + sample_set: Optional[base_params.sample_set] = None, + chunks: base_params.chunks = base_params.native_chunks, + inline_array: base_params.inline_array = base_params.inline_array_default, + ) -> Tuple[np.ndarray, np.ndarray]: + """ + Returns + ------- + windows : np.ndarray of shape (n_windows, 2) + Start and end positions of each window. + counts : np.ndarray of shape (n_windows,) + Number of heterozygous sites in each window. + """ + # Ensure region object + resolved_region: Region = parse_single_region(self, region) + del region + + # Delegate to the private helper + _, _, windows, counts = self._sample_count_het( + sample=sample, + region=resolved_region, + site_mask=site_mask, + window_size=window_size, + sample_set=sample_set, + chunks=chunks, + inline_array=inline_array, + ) + + return windows, counts @check_types @doc( diff --git a/tests/anoph/test_heterozygosity.py b/tests/anoph/test_heterozygosity.py new file mode 100644 index 000000000..ebdf5a177 --- /dev/null +++ b/tests/anoph/test_heterozygosity.py @@ -0,0 +1,43 @@ +import numpy as np +import pytest +import malariagen_data +from malariagen_data.anopheles import AnophelesDataResource +from malariagen_data.util import Region + +@pytest.fixture +def fake_windows_counts(): + # pretend we have two windows + windows = np.array([[0, 10], [10, 20]]) + counts = np.array([3, 7]) + return windows, counts + +def test_heterozygosity_wraps_sample_count_het(monkeypatch, fake_windows_counts): + # Define a dummy logger with a debug method + class DummyLogger: + def debug(self, *args, **kwargs): + pass + + # monkey-patch __init__ to set up a dummy _log attribute + monkeypatch.setattr( + AnophelesDataResource, + "__init__", + lambda self, *args, **kwargs: setattr(self, "_log", DummyLogger()) + ) + + # monkey-patch the private helper to return (sid, sset, windows, counts) + def fake_sample_count_het(self, sample, region, site_mask, window_size, sample_set, chunks, inline_array): + return "S1", "setA", fake_windows_counts[0], fake_windows_counts[1] + + monkeypatch.setattr(AnophelesDataResource, "_sample_count_het", fake_sample_count_het) + + resource = AnophelesDataResource() + # call for public method + windows, counts = resource.heterozygosity( + sample="any_sample", + region=Region(contig="2L", start=100, end=200), + window_size=10, + ) + + # assert that we got exactly the arrays the fake helper returned + assert np.array_equal(windows, fake_windows_counts[0]) + assert np.array_equal(counts, fake_windows_counts[1]) \ No newline at end of file