Skip to content

Commit 994602a

Browse files
authored
Merge pull request #59 from bigbio/dev
Update changes in PTM diann handling
2 parents e26ad9b + 4128e98 commit 994602a

File tree

7 files changed

+144
-78
lines changed

7 files changed

+144
-78
lines changed

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,3 +169,9 @@ cython_debug/
169169

170170
.qodo
171171
/tests/test_data/RD139_Narrow_UPS1_0_1fmol_inj1.mzML
172+
173+
# DIA-NN config file generated by tests
174+
diann_config.cfg
175+
176+
# Parquet files generated by tests
177+
*.parquet

environment.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ channels:
66
- nodefaults
77
dependencies:
88
- click
9-
- sdrf-pipelines>=0.0.31
10-
- pyopenms>=3.3.0
9+
- sdrf-pipelines==0.0.33
10+
- pyopenms>=3.2.0
1111
- pandas
1212
- pyarrow>=16.1.0
1313
- scipy

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ packages = [
3131
[tool.poetry.dependencies]
3232
python = "*"
3333
click = "*"
34-
sdrf-pipelines = ">=0.0.32"
35-
pyopenms = ">=3.2.0"
34+
sdrf-pipelines = "==0.0.33"
35+
pyopenms = ">=3.3.0"
3636
pandas = "*"
3737
pyarrow = ">=16.1.0"
3838
scipy = "*"

quantmsutils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.0.23"
1+
__version__ = "0.0.24"

quantmsutils/diann/dianncfg.py

Lines changed: 117 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,38 @@
77
import logging
88
import re
99
from typing import List, Tuple
10-
10+
from collections import defaultdict
1111
import click
1212
from sdrf_pipelines.openms.unimod import UnimodDatabase
1313

1414
logging.basicConfig(format="%(asctime)s [%(funcName)s] - %(message)s", level=logging.DEBUG)
1515
logger = logging.getLogger(__name__)
1616

17+
# Lazy initialization of UnimodDatabase for improved testability.
18+
# The database is created on first access rather than at module import time,
19+
# which allows tests to mock or replace it more easily.
20+
_unimod_database = None
21+
22+
23+
def get_unimod_database():
24+
"""
25+
Get the UnimodDatabase instance, creating it lazily on first access.
26+
27+
This pattern improves testability by avoiding database initialization at module
28+
import time. For testing purposes, the internal _unimod_database variable can be
29+
set to None to force re-initialization on the next call.
30+
31+
:return: The UnimodDatabase instance.
32+
"""
33+
global _unimod_database
34+
if _unimod_database is None:
35+
_unimod_database = UnimodDatabase()
36+
return _unimod_database
37+
38+
39+
# Met-loss modification constant (UniMod:765) with mass shift and site specification
40+
MET_LOSS_MODIFICATION = "UniMod:765,-131.040485,*nM"
41+
1742

1843
@click.command("dianncfg", short_help="Create DIA-NN config file with enzyme and PTMs")
1944
@click.option("--enzyme", "-e", help="")
@@ -32,8 +57,7 @@ def dianncfg(ctx, enzyme, fix_mod, var_mod):
3257
:param var_mod: A string of variable modifications, separated by commas.
3358
"""
3459
cut = enzyme_cut(enzyme)
35-
unimod_database = UnimodDatabase()
36-
fix_ptm, var_ptm = convert_mod(unimod_database, fix_mod, var_mod)
60+
fix_ptm, var_ptm = convert_mod(fix_mod, var_mod)
3761

3862
var_ptm_str = " --var-mod "
3963
fix_ptm_str = " --fixed-mod "
@@ -42,83 +66,106 @@ def dianncfg(ctx, enzyme, fix_mod, var_mod):
4266
for mod in fix_ptm:
4367
diann_fix_ptm += fix_ptm_str + mod
4468
for mod in var_ptm:
45-
diann_var_ptm += var_ptm_str + mod
69+
if mod == MET_LOSS_MODIFICATION:
70+
diann_var_ptm += " --met-excision "
71+
else:
72+
diann_var_ptm += var_ptm_str + mod
4673

4774
with open("diann_config.cfg", "w") as file:
4875
file.write("--cut " + cut + diann_fix_ptm + diann_var_ptm)
4976

5077

51-
def convert_mod(unimod_database, fix_mod: str, var_mod: str) -> Tuple[List, List]:
78+
def get_mod(mod, mod_type):
79+
"""
80+
Retrieve and format a modification from the Unimod database for DIA-NN compatibility.
81+
82+
:param mod: The modification string, typically containing the modification name and site.
83+
:param mod_type: The type of modification ('fixed_mod' or 'var_mod').
84+
:return: A tuple (diann_mod_accession, site), where diann_mod_accession is a formatted string
85+
for DIA-NN and site is the modification site.
86+
:raises SystemExit: If the modification is not found in the Unimod database, logs an error and exits.
87+
"""
5288
pattern = re.compile(r"\((.*?)\)")
89+
modification_found = 0
90+
diann_mod_accession = None
91+
diann_mod_name = None
92+
for modification in get_unimod_database().modifications:
93+
if modification.get_name() == mod.split(" ")[0]:
94+
diann_mod_accession = modification.get_accession().replace("UNIMOD:", "UniMod:") + "," + str(modification._delta_mono_mass)
95+
diann_mod_name = modification.get_name()
96+
modification_found = 1
97+
break
98+
99+
if modification_found == 0:
100+
logging.error(
101+
f"Only Unimod modifications are currently supported for the DIA pipeline. Unsupported modification: {mod}"
102+
)
103+
exit(1)
104+
105+
# TODO support DIA multiplex
106+
if (
107+
"TMT" in diann_mod_name
108+
or "Label:" in diann_mod_name
109+
or "iTRAQ" in diann_mod_name
110+
or "mTRAQ" in diann_mod_name
111+
or "Dimethyl:" in diann_mod_name
112+
):
113+
logging.error(
114+
"quantms DIA-NN workflow only supports LFQ now! Unsupported modifications: "
115+
+ mod
116+
)
117+
exit(1)
118+
119+
sites = re.findall(pattern, " ".join(mod.split(" ")[1:]))
120+
if not sites:
121+
logging.error(
122+
f"No site specification found in modification string: {mod}"
123+
)
124+
exit(1)
125+
site = sites[0]
126+
if site == "Protein N-term":
127+
site = "*n"
128+
elif site == "N-term":
129+
site = "n"
130+
elif len(site.split(" ")) >= 2:
131+
pp = " ".join(site.split(" ")[:-1])
132+
if pp == "Protein N-term":
133+
pp = "*n"
134+
elif pp == "N-term":
135+
pp = "n"
136+
aa = site.split(" ")[-1]
137+
site = pp + aa
138+
if site == "*nM" and diann_mod_name == "Met-loss" and mod_type == "var_mod":
139+
return diann_mod_accession, site
140+
else:
141+
logging.error("Restricting to certain terminal AAs isn't directly supported. Please see https://github.com/vdemichev/DiaNN/issues/1791")
142+
exit(1)
143+
return diann_mod_accession, site
144+
145+
146+
def convert_mod(fix_mod: str, var_mod: str) -> Tuple[List, List]:
53147
var_ptm = []
54148
fix_ptm = []
55-
56-
if fix_mod != "":
149+
if fix_mod:
150+
merged = defaultdict(list)
57151
for mod in fix_mod.split(","):
58-
tag = 0
59-
diann_mod = None
60-
for modification in unimod_database.modifications:
61-
if modification.get_name() == mod.split(" ")[0]:
62-
diann_mod = modification.get_name() + "," + str(modification._delta_mono_mass)
63-
tag = 1
64-
break
65-
if tag == 0:
66-
logging.info(
67-
"Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
68-
+ mod
69-
)
70-
continue
71-
site = re.findall(pattern, " ".join(mod.split(" ")[1:]))[0]
72-
if site == "Protein N-term":
73-
site = "*n"
74-
elif site == "N-term":
75-
site = "n"
76-
77-
if (
78-
"TMT" in diann_mod
79-
or "Label" in diann_mod
80-
or "iTRAQ" in diann_mod
81-
or "mTRAQ" in diann_mod
82-
):
83-
fix_ptm.append(diann_mod + "," + site + "," + "label")
84-
elif diann_mod is not None:
85-
fix_ptm.append(diann_mod + "," + site)
86-
else:
87-
print(
88-
"Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
89-
+ mod
90-
)
91-
92-
if var_mod != "":
152+
diann_mod, site = get_mod(mod, "fixed_mod")
153+
merged[diann_mod].append(site)
154+
155+
# merge same modification for different sites
156+
for name, site_list in merged.items():
157+
site_str = "".join(sorted(set(site_list)))
158+
fix_ptm.append(f"{name},{site_str}")
159+
160+
if var_mod:
161+
merged = defaultdict(list)
93162
for mod in var_mod.split(","):
94-
tag = 0
95-
diann_mod = None
96-
for modification in unimod_database.modifications:
97-
if modification.get_name() == mod.split(" ")[0]:
98-
diann_mod = modification.get_name() + "," + str(modification._delta_mono_mass)
99-
tag = 1
100-
break
101-
if tag == 0:
102-
print(
103-
"Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
104-
+ mod
105-
)
106-
continue
107-
site = re.findall(pattern, " ".join(mod.split(" ")[1:]))[0]
108-
if site == "Protein N-term":
109-
site = "*n"
110-
elif site == "N-term":
111-
site = "n"
112-
113-
if (
114-
"TMT" in diann_mod
115-
or "Label" in diann_mod
116-
or "iTRAQ" in diann_mod
117-
or "mTRAQ" in diann_mod
118-
):
119-
var_ptm.append(diann_mod + "," + site + "," + "label")
120-
else:
121-
var_ptm.append(diann_mod + "," + site)
163+
diann_mod, site = get_mod(mod, "var_mod")
164+
merged[diann_mod].append(site)
165+
# merge same modification for different sites
166+
for name, site_list in merged.items():
167+
site_str = "".join(sorted(set(site_list)))
168+
var_ptm.append(f"{name},{site_str}")
122169

123170
return fix_ptm, var_ptm
124171

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
click
2-
sdrf-pipelines>=0.0.31
3-
pyopenms>=3.2.0
2+
sdrf-pipelines==0.0.33
3+
pyopenms>=3.3.0
44
pandas
55
pyarrow>=16.1.0
66
scipy

tests/test_commands.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,20 @@ def test_diann2mztab_example(self):
8484
]
8585
result = run_cli_command("diann2mztab", args)
8686
assert result.exit_code == 0
87-
# Additional assertions could check for expected output files
87+
88+
def test_dianncfg_example(self):
89+
"""Test generating the DIA-NN config with example data"""
90+
args = [
91+
"--enzyme",
92+
"Trypsin",
93+
"--fix_mod",
94+
"Carbamidomethyl (C)",
95+
"--var_mod",
96+
"Oxidation (M),Phospho (S),Phospho (T),Phospho (Y),Acetyl (Protein N-term),Acetyl (K),Acetyl (R),Met-loss (Protein N-term M)",
97+
]
98+
result = run_cli_command("dianncfg", args)
99+
100+
assert result.exit_code == 0
88101

89102

90103
class TestSamplesheetCommands:

0 commit comments

Comments
 (0)