Skip to content

Commit 08715f3

Browse files
authored
Merge pull request #57 from daichengxin/dev
fixed diann PTM parse bug
2 parents 9b473c1 + 9feb130 commit 08715f3

File tree

6 files changed

+100
-73
lines changed

6 files changed

+100
-73
lines changed

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ channels:
66
- nodefaults
77
dependencies:
88
- click
9-
- sdrf-pipelines>=0.0.31
9+
- sdrf-pipelines==0.0.33
1010
- pyopenms>=3.3.0
1111
- pandas
1212
- pyarrow>=16.1.0

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "quantms-utils"
33
description = "Python scripts and helpers for the quantMS workflow"
44
readme = "README.md"
55
license = "MIT"
6-
version = "0.0.23"
6+
version = "0.0.24"
77
authors = [
88
"Yasset Perez-Riverol <[email protected]>",
99
"Dai Chengxin <[email protected]>",
@@ -31,7 +31,7 @@ packages = [
3131
[tool.poetry.dependencies]
3232
python = "*"
3333
click = "*"
34-
sdrf-pipelines = ">=0.0.32"
34+
sdrf-pipelines = "==0.0.33"
3535
pyopenms = ">=3.2.0"
3636
pandas = "*"
3737
pyarrow = ">=16.1.0"

quantmsutils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.0.23"
1+
__version__ = "0.0.24"

quantmsutils/diann/dianncfg.py

Lines changed: 81 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@
77
import logging
88
import re
99
from typing import List, Tuple
10-
10+
from collections import defaultdict
1111
import click
1212
from sdrf_pipelines.openms.unimod import UnimodDatabase
1313

1414
logging.basicConfig(format="%(asctime)s [%(funcName)s] - %(message)s", level=logging.DEBUG)
1515
logger = logging.getLogger(__name__)
16+
unimod_database = UnimodDatabase()
1617

1718

1819
@click.command("dianncfg", short_help="Create DIA-NN config file with enzyme and PTMs")
@@ -32,8 +33,7 @@ def dianncfg(ctx, enzyme, fix_mod, var_mod):
3233
:param var_mod: A string of variable modifications, separated by commas.
3334
"""
3435
cut = enzyme_cut(enzyme)
35-
unimod_database = UnimodDatabase()
36-
fix_ptm, var_ptm = convert_mod(unimod_database, fix_mod, var_mod)
36+
fix_ptm, var_ptm = convert_mod(fix_mod, var_mod)
3737

3838
var_ptm_str = " --var-mod "
3939
fix_ptm_str = " --fixed-mod "
@@ -42,83 +42,97 @@ def dianncfg(ctx, enzyme, fix_mod, var_mod):
4242
for mod in fix_ptm:
4343
diann_fix_ptm += fix_ptm_str + mod
4444
for mod in var_ptm:
45-
diann_var_ptm += var_ptm_str + mod
45+
if mod == "UniMod:765,-131.040485,*nM":
46+
diann_var_ptm += " --met-excision "
47+
else:
48+
diann_var_ptm += var_ptm_str + mod
4649

4750
with open("diann_config.cfg", "w") as file:
4851
file.write("--cut " + cut + diann_fix_ptm + diann_var_ptm)
4952

5053

51-
def convert_mod(unimod_database, fix_mod: str, var_mod: str) -> Tuple[List, List]:
54+
def get_mod(mod, mod_type):
5255
pattern = re.compile(r"\((.*?)\)")
56+
tag = 0
57+
diann_mod_accession = None
58+
diann_mod_name = None
59+
for modification in unimod_database.modifications:
60+
if modification.get_name() == mod.split(" ")[0]:
61+
diann_mod_accession = modification.get_accession().replace("UNIMOD:", "UniMod:") + "," + str(modification._delta_mono_mass)
62+
diann_mod_name = modification.get_name()
63+
tag = 1
64+
break
65+
66+
if tag == 0:
67+
logging.error(
68+
"Currently only supported unimod modifications for DIA pipeline. Skipped: "
69+
+ mod
70+
)
71+
exit(1)
72+
73+
# TODO support DIA multiplex
74+
if (
75+
"TMT" in diann_mod_name
76+
or "Label:" in diann_mod_name
77+
or "iTRAQ" in diann_mod_name
78+
or "mTRAQ" in diann_mod_name
79+
or "Dimethyl:" in diann_mod_name
80+
):
81+
logging.error(
82+
"quantms DIA-NN workflow only support LFQ now! Unsupported modifications: "
83+
+ mod
84+
)
85+
exit(1)
86+
elif diann_mod_accession is not None:
87+
site = re.findall(pattern, " ".join(mod.split(" ")[1:]))[0]
88+
if site == "Protein N-term":
89+
site = "*n"
90+
elif site == "N-term":
91+
site = "n"
92+
elif len(site.split(" ")) >= 2:
93+
pp = " ".join(site.split(" ")[:-1])
94+
if pp == "Protein N-term":
95+
pp = "*n"
96+
elif pp == "N-term":
97+
pp = "n"
98+
aa = site.split(" ")[-1]
99+
site = pp + aa
100+
if site == "*nM" and diann_mod_name == "Met-loss" and mod_type == "var_mod":
101+
return diann_mod_accession, site
102+
else:
103+
logging.warning("Restricting to certain terminal AAs isn't directly supported. Please see https://github.com/vdemichev/DiaNN/issues/1791")
104+
return diann_mod_accession, site
105+
else:
106+
logging.error(
107+
"Currently only supported unimod modifications for DIA pipeline. Skipped: "
108+
+ mod
109+
)
110+
exit(1)
111+
112+
113+
def convert_mod(fix_mod: str, var_mod: str) -> Tuple[List, List]:
53114
var_ptm = []
54115
fix_ptm = []
55-
56116
if fix_mod != "":
117+
merged = defaultdict(list)
57118
for mod in fix_mod.split(","):
58-
tag = 0
59-
diann_mod = None
60-
for modification in unimod_database.modifications:
61-
if modification.get_name() == mod.split(" ")[0]:
62-
diann_mod = modification.get_name() + "," + str(modification._delta_mono_mass)
63-
tag = 1
64-
break
65-
if tag == 0:
66-
logging.info(
67-
"Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
68-
+ mod
69-
)
70-
continue
71-
site = re.findall(pattern, " ".join(mod.split(" ")[1:]))[0]
72-
if site == "Protein N-term":
73-
site = "*n"
74-
elif site == "N-term":
75-
site = "n"
76-
77-
if (
78-
"TMT" in diann_mod
79-
or "Label" in diann_mod
80-
or "iTRAQ" in diann_mod
81-
or "mTRAQ" in diann_mod
82-
):
83-
fix_ptm.append(diann_mod + "," + site + "," + "label")
84-
elif diann_mod is not None:
85-
fix_ptm.append(diann_mod + "," + site)
86-
else:
87-
print(
88-
"Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
89-
+ mod
90-
)
119+
diann_mod, site = get_mod(mod, "fixed_mod")
120+
merged[diann_mod].append(site)
121+
122+
# merge same modification for different sites
123+
for name, site_list in merged.items():
124+
site_str = "".join(sorted(set(site_list)))
125+
fix_ptm.append(f"{name},{site_str}")
91126

92127
if var_mod != "":
128+
merged = defaultdict(list)
93129
for mod in var_mod.split(","):
94-
tag = 0
95-
diann_mod = None
96-
for modification in unimod_database.modifications:
97-
if modification.get_name() == mod.split(" ")[0]:
98-
diann_mod = modification.get_name() + "," + str(modification._delta_mono_mass)
99-
tag = 1
100-
break
101-
if tag == 0:
102-
print(
103-
"Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
104-
+ mod
105-
)
106-
continue
107-
site = re.findall(pattern, " ".join(mod.split(" ")[1:]))[0]
108-
if site == "Protein N-term":
109-
site = "*n"
110-
elif site == "N-term":
111-
site = "n"
112-
113-
if (
114-
"TMT" in diann_mod
115-
or "Label" in diann_mod
116-
or "iTRAQ" in diann_mod
117-
or "mTRAQ" in diann_mod
118-
):
119-
var_ptm.append(diann_mod + "," + site + "," + "label")
120-
else:
121-
var_ptm.append(diann_mod + "," + site)
130+
diann_mod, site = get_mod(mod, "var_mod")
131+
merged[diann_mod].append(site)
132+
# merge same modification for different sites
133+
for name, site_list in merged.items():
134+
site_str = "".join(sorted(set(site_list)))
135+
var_ptm.append(f"{name},{site_str}")
122136

123137
return fix_ptm, var_ptm
124138

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
click
2-
sdrf-pipelines>=0.0.31
2+
sdrf-pipelines==0.0.33
33
pyopenms>=3.2.0
44
pandas
55
pyarrow>=16.1.0

tests/test_commands.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,20 @@ def test_diann2mztab_example(self):
8484
]
8585
result = run_cli_command("diann2mztab", args)
8686
assert result.exit_code == 0
87-
# Additional assertions could check for expected output files
87+
88+
def test_dianncfg_example(self):
89+
"""Test generating the DIA-NN config with example data"""
90+
args = [
91+
"--enzyme",
92+
"Trypsin",
93+
"--fix_mod",
94+
"Carbamidomethyl (C)",
95+
"--var_mod",
96+
"Oxidation (M),Phospho (S),Phospho (T),Phospho (Y),Acetyl (Protein N-term),Acetyl (K),Acetyl (R),Met-loss (Protein N-term M)",
97+
]
98+
result = run_cli_command("dianncfg", args)
99+
100+
assert result.exit_code == 0
88101

89102

90103
class TestSamplesheetCommands:

0 commit comments

Comments
 (0)