diff --git a/src/cool_seq_tool/mappers/mane_transcript.py b/src/cool_seq_tool/mappers/mane_transcript.py index b02c2cb..18c05da 100644 --- a/src/cool_seq_tool/mappers/mane_transcript.py +++ b/src/cool_seq_tool/mappers/mane_transcript.py @@ -1374,11 +1374,12 @@ async def grch38_to_mane_c_p( mane_transcripts = set() # Used if getting longest compatible remaining for current_mane_data in mane_data: mane_c_ac = current_mane_data["RefSeq_nuc"] + mane_alt_ac = current_mane_data["GRCh38_chr"] mane_transcripts |= {mane_c_ac, current_mane_data["Ensembl_nuc"]} # GRCh38 -> MANE C mane_tx_genomic_data = await self.uta_db.get_mane_c_genomic_data( - mane_c_ac, None, start_pos, end_pos + ac=mane_c_ac, alt_ac=mane_alt_ac, start_pos=start_pos, end_pos=end_pos ) if not mane_tx_genomic_data: continue diff --git a/src/cool_seq_tool/sources/uta_database.py b/src/cool_seq_tool/sources/uta_database.py index 87651f2..45aba11 100644 --- a/src/cool_seq_tool/sources/uta_database.py +++ b/src/cool_seq_tool/sources/uta_database.py @@ -543,7 +543,8 @@ async def get_mane_c_genomic_data( self, ac: str, alt_ac: str | None, start_pos: int, end_pos: int ) -> GenomicTxMetadata | None: """Get MANE transcript and genomic data. Used when going from g. to MANE c. - representation. + representation. This function parses queried data from the tx_exon_aln_v + table, and sorts the queried data by the most recent genomic build >>> import asyncio >>> from cool_seq_tool.sources import UtaDatabase @@ -569,11 +570,17 @@ async def get_mane_c_genomic_data( successful """ results = await self.get_tx_exon_aln_v_data( - ac, start_pos, end_pos, alt_ac=alt_ac, use_tx_pos=False + tx_ac=ac, + start_pos=start_pos, + end_pos=end_pos, + alt_ac=alt_ac, + use_tx_pos=False, ) if not results: return None - result = results[0] + + # Sort by most recent chromosomal accession + result = results[-1] genomic_tx_data = self.data_from_result(result) if not genomic_tx_data: diff --git a/tests/sources/test_uta_database.py b/tests/sources/test_uta_database.py index c077320..c05cd63 100644 --- a/tests/sources/test_uta_database.py +++ b/tests/sources/test_uta_database.py @@ -198,6 +198,33 @@ async def test_mane_c_genomic_data(test_db): } assert resp == GenomicTxMetadata(**expected_params) + # Test example where sorting of tx_exon_aln_v is needed + resp = await test_db.get_mane_c_genomic_data( + "NM_000077.5", "NC_000009.12", 21971186, 21971187 + ) + expected_params = { + "gene": "CDKN2A", + "strand": Strand.NEGATIVE, + "tx_pos_range": (180, 487), + "alt_pos_range": (21970901, 21971208), + "alt_aln_method": "splign", + "tx_exon_id": 8314723, + "alt_exon_id": 8960507, + "coding_start_site": 30, + "coding_end_site": 501, + "pos_change": (21, 285), + "alt_pos_change_range": (21971187, 21971186), + "tx_ac": "NM_000077.5", + "alt_ac": "NC_000009.12", + } + assert resp == GenomicTxMetadata(**expected_params) + + # Test case where chromosomal accession is not provided + resp = await test_db.get_mane_c_genomic_data( + "NM_000077.5", None, 21971186, 21971187 + ) + assert resp == GenomicTxMetadata(**expected_params) + @pytest.mark.asyncio async def test_get_genomic_tx_data(test_db, genomic_tx_data):