77import logging
88import re
99from typing import List , Tuple
10-
10+ from collections import defaultdict
1111import click
1212from sdrf_pipelines .openms .unimod import UnimodDatabase
1313
1414logging .basicConfig (format = "%(asctime)s [%(funcName)s] - %(message)s" , level = logging .DEBUG )
1515logger = logging .getLogger (__name__ )
16+ unimod_database = UnimodDatabase ()
1617
1718
1819@click .command ("dianncfg" , short_help = "Create DIA-NN config file with enzyme and PTMs" )
@@ -32,8 +33,7 @@ def dianncfg(ctx, enzyme, fix_mod, var_mod):
3233 :param var_mod: A string of variable modifications, separated by commas.
3334 """
3435 cut = enzyme_cut (enzyme )
35- unimod_database = UnimodDatabase ()
36- fix_ptm , var_ptm = convert_mod (unimod_database , fix_mod , var_mod )
36+ fix_ptm , var_ptm = convert_mod (fix_mod , var_mod )
3737
3838 var_ptm_str = " --var-mod "
3939 fix_ptm_str = " --fixed-mod "
@@ -42,83 +42,97 @@ def dianncfg(ctx, enzyme, fix_mod, var_mod):
4242 for mod in fix_ptm :
4343 diann_fix_ptm += fix_ptm_str + mod
4444 for mod in var_ptm :
45- diann_var_ptm += var_ptm_str + mod
45+ if mod == "UniMod:765,-131.040485,*nM" :
46+ diann_var_ptm += " --met-excision "
47+ else :
48+ diann_var_ptm += var_ptm_str + mod
4649
4750 with open ("diann_config.cfg" , "w" ) as file :
4851 file .write ("--cut " + cut + diann_fix_ptm + diann_var_ptm )
4952
5053
51- def convert_mod ( unimod_database , fix_mod : str , var_mod : str ) -> Tuple [ List , List ] :
54+ def get_mod ( mod , mod_type ) :
5255 pattern = re .compile (r"\((.*?)\)" )
56+ tag = 0
57+ diann_mod_accession = None
58+ diann_mod_name = None
59+ for modification in unimod_database .modifications :
60+ if modification .get_name () == mod .split (" " )[0 ]:
61+ diann_mod_accession = modification .get_accession ().replace ("UNIMOD:" , "UniMod:" ) + "," + str (modification ._delta_mono_mass )
62+ diann_mod_name = modification .get_name ()
63+ tag = 1
64+ break
65+
66+ if tag == 0 :
67+ logging .error (
68+ "Currently only supported unimod modifications for DIA pipeline. Skipped: "
69+ + mod
70+ )
71+ exit (1 )
72+
73+ # TODO support DIA multiplex
74+ if (
75+ "TMT" in diann_mod_name
76+ or "Label:" in diann_mod_name
77+ or "iTRAQ" in diann_mod_name
78+ or "mTRAQ" in diann_mod_name
79+ or "Dimethyl:" in diann_mod_name
80+ ):
81+ logging .error (
82+ "quantms DIA-NN workflow only support LFQ now! Unsupported modifications: "
83+ + mod
84+ )
85+ exit (1 )
86+ elif diann_mod_accession is not None :
87+ site = re .findall (pattern , " " .join (mod .split (" " )[1 :]))[0 ]
88+ if site == "Protein N-term" :
89+ site = "*n"
90+ elif site == "N-term" :
91+ site = "n"
92+ elif len (site .split (" " )) >= 2 :
93+ pp = " " .join (site .split (" " )[:- 1 ])
94+ if pp == "Protein N-term" :
95+ pp = "*n"
96+ elif pp == "N-term" :
97+ pp = "n"
98+ aa = site .split (" " )[- 1 ]
99+ site = pp + aa
100+ if site == "*nM" and diann_mod_name == "Met-loss" and mod_type == "var_mod" :
101+ return diann_mod_accession , site
102+ else :
103+ logging .warning ("Restricting to certain terminal AAs isn't directly supported. Please see https://github.com/vdemichev/DiaNN/issues/1791" )
104+ return diann_mod_accession , site
105+ else :
106+ logging .error (
107+ "Currently only supported unimod modifications for DIA pipeline. Skipped: "
108+ + mod
109+ )
110+ exit (1 )
111+
112+
113+ def convert_mod (fix_mod : str , var_mod : str ) -> Tuple [List , List ]:
53114 var_ptm = []
54115 fix_ptm = []
55-
56116 if fix_mod != "" :
117+ merged = defaultdict (list )
57118 for mod in fix_mod .split ("," ):
58- tag = 0
59- diann_mod = None
60- for modification in unimod_database .modifications :
61- if modification .get_name () == mod .split (" " )[0 ]:
62- diann_mod = modification .get_name () + "," + str (modification ._delta_mono_mass )
63- tag = 1
64- break
65- if tag == 0 :
66- logging .info (
67- "Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
68- + mod
69- )
70- continue
71- site = re .findall (pattern , " " .join (mod .split (" " )[1 :]))[0 ]
72- if site == "Protein N-term" :
73- site = "*n"
74- elif site == "N-term" :
75- site = "n"
76-
77- if (
78- "TMT" in diann_mod
79- or "Label" in diann_mod
80- or "iTRAQ" in diann_mod
81- or "mTRAQ" in diann_mod
82- ):
83- fix_ptm .append (diann_mod + "," + site + "," + "label" )
84- elif diann_mod is not None :
85- fix_ptm .append (diann_mod + "," + site )
86- else :
87- print (
88- "Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
89- + mod
90- )
119+ diann_mod , site = get_mod (mod , "fixed_mod" )
120+ merged [diann_mod ].append (site )
121+
122+ # merge same modification for different sites
123+ for name , site_list in merged .items ():
124+ site_str = "" .join (sorted (set (site_list )))
125+ fix_ptm .append (f"{ name } ,{ site_str } " )
91126
92127 if var_mod != "" :
128+ merged = defaultdict (list )
93129 for mod in var_mod .split ("," ):
94- tag = 0
95- diann_mod = None
96- for modification in unimod_database .modifications :
97- if modification .get_name () == mod .split (" " )[0 ]:
98- diann_mod = modification .get_name () + "," + str (modification ._delta_mono_mass )
99- tag = 1
100- break
101- if tag == 0 :
102- print (
103- "Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
104- + mod
105- )
106- continue
107- site = re .findall (pattern , " " .join (mod .split (" " )[1 :]))[0 ]
108- if site == "Protein N-term" :
109- site = "*n"
110- elif site == "N-term" :
111- site = "n"
112-
113- if (
114- "TMT" in diann_mod
115- or "Label" in diann_mod
116- or "iTRAQ" in diann_mod
117- or "mTRAQ" in diann_mod
118- ):
119- var_ptm .append (diann_mod + "," + site + "," + "label" )
120- else :
121- var_ptm .append (diann_mod + "," + site )
130+ diann_mod , site = get_mod (mod , "var_mod" )
131+ merged [diann_mod ].append (site )
132+ # merge same modification for different sites
133+ for name , site_list in merged .items ():
134+ site_str = "" .join (sorted (set (site_list )))
135+ var_ptm .append (f"{ name } ,{ site_str } " )
122136
123137 return fix_ptm , var_ptm
124138
0 commit comments