Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ channels:
- nodefaults
dependencies:
- click
- sdrf-pipelines>=0.0.31
- sdrf-pipelines==0.0.33
- pyopenms>=3.3.0
- pandas
- pyarrow>=16.1.0
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "quantms-utils"
description = "Python scripts and helpers for the quantMS workflow"
readme = "README.md"
license = "MIT"
version = "0.0.23"
version = "0.0.24"
authors = [
"Yasset Perez-Riverol <[email protected]>",
"Dai Chengxin <[email protected]>",
Expand Down Expand Up @@ -31,7 +31,7 @@ packages = [
[tool.poetry.dependencies]
python = "*"
click = "*"
sdrf-pipelines = ">=0.0.32"
sdrf-pipelines = "==0.0.33"
pyopenms = ">=3.2.0"
pandas = "*"
pyarrow = ">=16.1.0"
Expand Down
2 changes: 1 addition & 1 deletion quantmsutils/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.23"
__version__ = "0.0.24"
148 changes: 81 additions & 67 deletions quantmsutils/diann/dianncfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
import logging
import re
from typing import List, Tuple

from collections import defaultdict
import click
from sdrf_pipelines.openms.unimod import UnimodDatabase

logging.basicConfig(format="%(asctime)s [%(funcName)s] - %(message)s", level=logging.DEBUG)
logger = logging.getLogger(__name__)
unimod_database = UnimodDatabase()


@click.command("dianncfg", short_help="Create DIA-NN config file with enzyme and PTMs")
Expand All @@ -32,8 +33,7 @@ def dianncfg(ctx, enzyme, fix_mod, var_mod):
:param var_mod: A string of variable modifications, separated by commas.
"""
cut = enzyme_cut(enzyme)
unimod_database = UnimodDatabase()
fix_ptm, var_ptm = convert_mod(unimod_database, fix_mod, var_mod)
fix_ptm, var_ptm = convert_mod(fix_mod, var_mod)

var_ptm_str = " --var-mod "
fix_ptm_str = " --fixed-mod "
Expand All @@ -42,83 +42,97 @@ def dianncfg(ctx, enzyme, fix_mod, var_mod):
for mod in fix_ptm:
diann_fix_ptm += fix_ptm_str + mod
for mod in var_ptm:
diann_var_ptm += var_ptm_str + mod
if mod == "UniMod:765,-131.040485,*nM":
diann_var_ptm += " --met-excision "
else:
diann_var_ptm += var_ptm_str + mod

with open("diann_config.cfg", "w") as file:
file.write("--cut " + cut + diann_fix_ptm + diann_var_ptm)


def convert_mod(unimod_database, fix_mod: str, var_mod: str) -> Tuple[List, List]:
def get_mod(mod, mod_type):
pattern = re.compile(r"\((.*?)\)")
tag = 0
diann_mod_accession = None
diann_mod_name = None
for modification in unimod_database.modifications:
if modification.get_name() == mod.split(" ")[0]:
diann_mod_accession = modification.get_accession().replace("UNIMOD:", "UniMod:") + "," + str(modification._delta_mono_mass)
diann_mod_name = modification.get_name()
tag = 1
break

if tag == 0:
logging.error(
"Currently only supported unimod modifications for DIA pipeline. Skipped: "
+ mod
)
exit(1)

# TODO support DIA multiplex
if (
"TMT" in diann_mod_name
or "Label:" in diann_mod_name
or "iTRAQ" in diann_mod_name
or "mTRAQ" in diann_mod_name
or "Dimethyl:" in diann_mod_name
):
logging.error(
"quantms DIA-NN workflow only support LFQ now! Unsupported modifications: "
+ mod
)
exit(1)
elif diann_mod_accession is not None:
site = re.findall(pattern, " ".join(mod.split(" ")[1:]))[0]
if site == "Protein N-term":
site = "*n"
elif site == "N-term":
site = "n"
elif len(site.split(" ")) >= 2:
pp = " ".join(site.split(" ")[:-1])
if pp == "Protein N-term":
pp = "*n"
elif pp == "N-term":
pp = "n"
aa = site.split(" ")[-1]
site = pp + aa
if site == "*nM" and diann_mod_name == "Met-loss" and mod_type == "var_mod":
return diann_mod_accession, site
else:
logging.warning("Restricting to certain terminal AAs isn't directly supported. Please see https://github.com/vdemichev/DiaNN/issues/1791")
return diann_mod_accession, site
else:
logging.error(
"Currently only supported unimod modifications for DIA pipeline. Skipped: "
+ mod
)
exit(1)


def convert_mod(fix_mod: str, var_mod: str) -> Tuple[List, List]:
var_ptm = []
fix_ptm = []

if fix_mod != "":
merged = defaultdict(list)
for mod in fix_mod.split(","):
tag = 0
diann_mod = None
for modification in unimod_database.modifications:
if modification.get_name() == mod.split(" ")[0]:
diann_mod = modification.get_name() + "," + str(modification._delta_mono_mass)
tag = 1
break
if tag == 0:
logging.info(
"Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
+ mod
)
continue
site = re.findall(pattern, " ".join(mod.split(" ")[1:]))[0]
if site == "Protein N-term":
site = "*n"
elif site == "N-term":
site = "n"

if (
"TMT" in diann_mod
or "Label" in diann_mod
or "iTRAQ" in diann_mod
or "mTRAQ" in diann_mod
):
fix_ptm.append(diann_mod + "," + site + "," + "label")
elif diann_mod is not None:
fix_ptm.append(diann_mod + "," + site)
else:
print(
"Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
+ mod
)
diann_mod, site = get_mod(mod, "fixed_mod")
merged[diann_mod].append(site)

# merge same modification for different sites
for name, site_list in merged.items():
site_str = "".join(sorted(set(site_list)))
fix_ptm.append(f"{name},{site_str}")

if var_mod != "":
merged = defaultdict(list)
for mod in var_mod.split(","):
tag = 0
diann_mod = None
for modification in unimod_database.modifications:
if modification.get_name() == mod.split(" ")[0]:
diann_mod = modification.get_name() + "," + str(modification._delta_mono_mass)
tag = 1
break
if tag == 0:
print(
"Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
+ mod
)
continue
site = re.findall(pattern, " ".join(mod.split(" ")[1:]))[0]
if site == "Protein N-term":
site = "*n"
elif site == "N-term":
site = "n"

if (
"TMT" in diann_mod
or "Label" in diann_mod
or "iTRAQ" in diann_mod
or "mTRAQ" in diann_mod
):
var_ptm.append(diann_mod + "," + site + "," + "label")
else:
var_ptm.append(diann_mod + "," + site)
diann_mod, site = get_mod(mod, "var_mod")
merged[diann_mod].append(site)
# merge same modification for different sites
for name, site_list in merged.items():
site_str = "".join(sorted(set(site_list)))
var_ptm.append(f"{name},{site_str}")

return fix_ptm, var_ptm

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
click
sdrf-pipelines>=0.0.31
sdrf-pipelines==0.0.33
pyopenms>=3.2.0
pandas
pyarrow>=16.1.0
Expand Down
15 changes: 14 additions & 1 deletion tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,20 @@ def test_diann2mztab_example(self):
]
result = run_cli_command("diann2mztab", args)
assert result.exit_code == 0
# Additional assertions could check for expected output files

def test_dianncfg_example(self):
"""Test generating the DIA-NN config with example data"""
args = [
"--enzyme",
"Trypsin",
"--fix_mod",
"Carbamidomethyl (C)",
"--var_mod",
"Oxidation (M),Phospho (S),Phospho (T),Phospho (Y),Acetyl (Protein N-term),Acetyl (K),Acetyl (R),Met-loss (Protein N-term M)",
]
result = run_cli_command("dianncfg", args)

assert result.exit_code == 0


class TestSamplesheetCommands:
Expand Down
Loading