Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
96d2fe1
Merge pull request #7 from bigbio/dev
daichengxin May 8, 2025
232e9ec
Merge pull request #9 from bigbio/dev
daichengxin Nov 21, 2025
65fbeb8
fixed diann PTM parse bug
daichengxin Nov 24, 2025
1a7b394
fixed
daichengxin Nov 25, 2025
c66ff69
Update dianncfg.py
daichengxin Nov 25, 2025
1b54714
Update test_commands.py
daichengxin Nov 25, 2025
4557f07
Update dianncfg.py
daichengxin Nov 25, 2025
b7cdfe7
Update dianncfg.py
daichengxin Nov 25, 2025
5f04fe4
Update dianncfg.py
daichengxin Nov 27, 2025
d76f991
Update test_commands.py
daichengxin Nov 27, 2025
9feb130
Update dianncfg.py
daichengxin Nov 27, 2025
08715f3
Merge pull request #57 from daichengxin/dev
ypriverol Nov 27, 2025
a88d479
Merge pull request #58 from bigbio/main
ypriverol Nov 27, 2025
6e14a1b
Initial plan
Copilot Nov 27, 2025
5008129
Update quantmsutils/diann/dianncfg.py
ypriverol Nov 27, 2025
51f8610
Update quantmsutils/diann/dianncfg.py
ypriverol Nov 27, 2025
2a03fcc
Define MET_LOSS_MODIFICATION constant to replace hardcoded magic value
Copilot Nov 27, 2025
4c436ad
Add diann_config.cfg to .gitignore
Copilot Nov 27, 2025
17ee180
Update quantmsutils/diann/dianncfg.py
daichengxin Nov 27, 2025
742695d
Update quantmsutils/diann/dianncfg.py
daichengxin Nov 27, 2025
88989e2
Merge pull request #10 from bigbio/dev
daichengxin Nov 27, 2025
af8aa49
update
daichengxin Nov 27, 2025
cd28956
Merge pull request #62 from daichengxin/dev
daichengxin Nov 27, 2025
a1dc5ac
Merge pull request #60 from bigbio/copilot/sub-pr-59
ypriverol Nov 27, 2025
465f606
Update environment.yml
ypriverol Nov 27, 2025
78c988d
Initial plan
Copilot Nov 27, 2025
2deda03
Implement lazy initialization for UnimodDatabase to improve testability
Copilot Nov 27, 2025
e5310ef
Clarify docstring for get_unimod_database testing usage
Copilot Nov 27, 2025
372dbeb
Merge pull request #63 from bigbio/copilot/sub-pr-59-another-one
ypriverol Nov 27, 2025
106abbf
Merge pull request #11 from bigbio/dev
daichengxin Nov 27, 2025
00008df
Initial plan
Copilot Nov 27, 2025
7adceb5
Remove unreachable else block in get_mod function
Copilot Nov 27, 2025
6255ef9
Remove parquet test artifacts and add to gitignore
Copilot Nov 27, 2025
a9251f7
Merge pull request #64 from bigbio/copilot/sub-pr-59-yet-again
daichengxin Nov 27, 2025
4631f79
Merge pull request #12 from bigbio/dev
daichengxin Nov 27, 2025
c82a93f
Update dianncfg.py
daichengxin Nov 27, 2025
8aea86b
Merge pull request #65 from daichengxin/dev
daichengxin Nov 27, 2025
9fdb40c
Update quantmsutils/diann/dianncfg.py
ypriverol Nov 27, 2025
1b0fe77
Update quantmsutils/diann/dianncfg.py
ypriverol Nov 27, 2025
4128e98
Update pyproject.toml
ypriverol Nov 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ channels:
- nodefaults
dependencies:
- click
- sdrf-pipelines>=0.0.31
- sdrf-pipelines==0.0.33
- pyopenms>=3.3.0
- pandas
- pyarrow>=16.1.0
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ packages = [
[tool.poetry.dependencies]
python = "*"
click = "*"
sdrf-pipelines = ">=0.0.32"
sdrf-pipelines = "==0.0.33"
pyopenms = ">=3.2.0"
pandas = "*"
pyarrow = ">=16.1.0"
Expand Down
2 changes: 1 addition & 1 deletion quantmsutils/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.23"
__version__ = "0.0.24"
148 changes: 81 additions & 67 deletions quantmsutils/diann/dianncfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
import logging
import re
from typing import List, Tuple

from collections import defaultdict
import click
from sdrf_pipelines.openms.unimod import UnimodDatabase

logging.basicConfig(format="%(asctime)s [%(funcName)s] - %(message)s", level=logging.DEBUG)
logger = logging.getLogger(__name__)
unimod_database = UnimodDatabase()


@click.command("dianncfg", short_help="Create DIA-NN config file with enzyme and PTMs")
Expand All @@ -32,8 +33,7 @@ def dianncfg(ctx, enzyme, fix_mod, var_mod):
:param var_mod: A string of variable modifications, separated by commas.
"""
cut = enzyme_cut(enzyme)
unimod_database = UnimodDatabase()
fix_ptm, var_ptm = convert_mod(unimod_database, fix_mod, var_mod)
fix_ptm, var_ptm = convert_mod(fix_mod, var_mod)

var_ptm_str = " --var-mod "
fix_ptm_str = " --fixed-mod "
Expand All @@ -42,83 +42,97 @@ def dianncfg(ctx, enzyme, fix_mod, var_mod):
for mod in fix_ptm:
diann_fix_ptm += fix_ptm_str + mod
for mod in var_ptm:
diann_var_ptm += var_ptm_str + mod
if mod == "UniMod:765,-131.040485,*nM":
diann_var_ptm += " --met-excision "
else:
diann_var_ptm += var_ptm_str + mod

with open("diann_config.cfg", "w") as file:
file.write("--cut " + cut + diann_fix_ptm + diann_var_ptm)


def convert_mod(unimod_database, fix_mod: str, var_mod: str) -> Tuple[List, List]:
def get_mod(mod, mod_type):
pattern = re.compile(r"\((.*?)\)")
Copy link

Copilot AI Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The regex pattern is compiled inside the get_mod() function, which means it will be recompiled for every modification processed. Since this function is called in a loop (lines 151, 162), consider moving the pattern compilation to module level (similar to how MET_LOSS_MODIFICATION is defined at line 40) to improve performance when processing multiple modifications.

Copilot uses AI. Check for mistakes.
tag = 0
diann_mod_accession = None
diann_mod_name = None
for modification in unimod_database.modifications:
if modification.get_name() == mod.split(" ")[0]:
Copy link

Copilot AI Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The modification name comparison uses mod.split(" ")[0] which assumes the modification string format is "ModName (Site)". However, this is fragile - if the input string has leading/trailing whitespace or inconsistent formatting, the comparison will fail. Consider stripping whitespace and adding validation for the expected format, or document the expected input format more clearly in the docstring.

Copilot uses AI. Check for mistakes.
diann_mod_accession = modification.get_accession().replace("UNIMOD:", "UniMod:") + "," + str(modification._delta_mono_mass)
Copy link

Copilot AI Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Accessing the private attribute modification._delta_mono_mass (with leading underscore) breaks encapsulation and could be fragile if the underlying library changes. Consider using a public method if available, or document why direct access to this private attribute is necessary.

Suggested change
diann_mod_accession = modification.get_accession().replace("UNIMOD:", "UniMod:") + "," + str(modification._delta_mono_mass)
# Prefer public accessor for mono mass if available; fallback to private attribute with documentation.
try:
# Try to use a public method/property if available
mono_mass = modification.get_delta_mono_mass()
except AttributeError:
# No public accessor; document why direct access is necessary
# Direct access to _delta_mono_mass is required as no public method is available.
# This may be fragile if the underlying library changes.
mono_mass = modification._delta_mono_mass
diann_mod_accession = modification.get_accession().replace("UNIMOD:", "UniMod:") + "," + str(mono_mass)

Copilot uses AI. Check for mistakes.
diann_mod_name = modification.get_name()
tag = 1
break

if tag == 0:
logging.error(
"Currently only supported unimod modifications for DIA pipeline. Skipped: "
+ mod
)
exit(1)
Copy link

Copilot AI Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using exit(1) directly is not ideal in library code as it terminates the entire Python process. Consider raising a custom exception (e.g., ValueError or a custom ModificationNotSupportedError) instead, allowing callers to handle the error appropriately. This pattern appears in lines 71, 85, and 110.

Copilot uses AI. Check for mistakes.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot open a new pull request to apply changes based on this feedback

Copy link

Copilot AI Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using exit(1) directly is not a best practice in library code, as it terminates the entire Python process and prevents calling code from handling the error. Consider raising an appropriate exception (e.g., ValueError or a custom exception) instead, allowing the CLI command handler to catch it and exit with the appropriate code. This pattern applies to all exit(1) calls in this file (lines 104, 118, 125).

Copilot uses AI. Check for mistakes.

# TODO support DIA multiplex
if (
"TMT" in diann_mod_name
or "Label:" in diann_mod_name
or "iTRAQ" in diann_mod_name
or "mTRAQ" in diann_mod_name
or "Dimethyl:" in diann_mod_name
):
logging.error(
"quantms DIA-NN workflow only support LFQ now! Unsupported modifications: "
+ mod
)
exit(1)
Copy link

Copilot AI Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using exit(1) directly is not ideal in library code as it terminates the entire Python process. Consider raising a custom exception (e.g., ValueError or a custom ModificationNotSupportedError) instead, allowing callers to handle the error appropriately.

Copilot uses AI. Check for mistakes.
elif diann_mod_accession is not None:
site = re.findall(pattern, " ".join(mod.split(" ")[1:]))[0]
if site == "Protein N-term":
site = "*n"
elif site == "N-term":
site = "n"
elif len(site.split(" ")) >= 2:
pp = " ".join(site.split(" ")[:-1])
if pp == "Protein N-term":
pp = "*n"
elif pp == "N-term":
pp = "n"
aa = site.split(" ")[-1]
site = pp + aa
if site == "*nM" and diann_mod_name == "Met-loss" and mod_type == "var_mod":
return diann_mod_accession, site
else:
logging.warning("Restricting to certain terminal AAs isn't directly supported. Please see https://github.com/vdemichev/DiaNN/issues/1791")
return diann_mod_accession, site
else:
logging.error(
"Currently only supported unimod modifications for DIA pipeline. Skipped: "
+ mod
)
exit(1)
Copy link

Copilot AI Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using exit(1) directly is not ideal in library code as it terminates the entire Python process. Consider raising a custom exception (e.g., ValueError or a custom ModificationNotSupportedError) instead, allowing callers to handle the error appropriately.

Copilot uses AI. Check for mistakes.


def convert_mod(fix_mod: str, var_mod: str) -> Tuple[List, List]:
var_ptm = []
fix_ptm = []

if fix_mod != "":
merged = defaultdict(list)
for mod in fix_mod.split(","):
tag = 0
diann_mod = None
for modification in unimod_database.modifications:
if modification.get_name() == mod.split(" ")[0]:
diann_mod = modification.get_name() + "," + str(modification._delta_mono_mass)
tag = 1
break
if tag == 0:
logging.info(
"Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
+ mod
)
continue
site = re.findall(pattern, " ".join(mod.split(" ")[1:]))[0]
if site == "Protein N-term":
site = "*n"
elif site == "N-term":
site = "n"

if (
"TMT" in diann_mod
or "Label" in diann_mod
or "iTRAQ" in diann_mod
or "mTRAQ" in diann_mod
):
fix_ptm.append(diann_mod + "," + site + "," + "label")
elif diann_mod is not None:
fix_ptm.append(diann_mod + "," + site)
else:
print(
"Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
+ mod
)
diann_mod, site = get_mod(mod, "fixed_mod")
merged[diann_mod].append(site)

# merge same modification for different sites
for name, site_list in merged.items():
site_str = "".join(sorted(set(site_list)))
Copy link

Copilot AI Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The site merging logic concatenates sites without a delimiter (e.g., "STY" for phosphorylation on S, T, and Y). However, if a site is multi-character (like "*n" for Protein N-term), this concatenation could produce ambiguous results. For example, "*n" + "K" becomes "*nK" which could be interpreted as "Protein N-term K" (a two-part site from lines 131-138) rather than two separate sites. Consider documenting this behavior or adding validation to ensure sites don't create ambiguous concatenations.

Copilot uses AI. Check for mistakes.
fix_ptm.append(f"{name},{site_str}")
Comment on lines +155 to +158
Copy link

Copilot AI Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment "merge same modification for different sites" could be more descriptive. Consider explaining the purpose and format of the merge. For example: "Merge same modification for different sites by concatenating sorted unique sites. E.g., 'Phospho,S' and 'Phospho,T' become 'Phospho,ST'". This helps developers understand the expected output format.

Copilot uses AI. Check for mistakes.

if var_mod != "":
merged = defaultdict(list)
for mod in var_mod.split(","):
tag = 0
diann_mod = None
for modification in unimod_database.modifications:
if modification.get_name() == mod.split(" ")[0]:
diann_mod = modification.get_name() + "," + str(modification._delta_mono_mass)
tag = 1
break
if tag == 0:
print(
"Warning: Currently only supported unimod modifications for DIA pipeline. Skipped: "
+ mod
)
continue
site = re.findall(pattern, " ".join(mod.split(" ")[1:]))[0]
if site == "Protein N-term":
site = "*n"
elif site == "N-term":
site = "n"

if (
"TMT" in diann_mod
or "Label" in diann_mod
or "iTRAQ" in diann_mod
or "mTRAQ" in diann_mod
):
var_ptm.append(diann_mod + "," + site + "," + "label")
else:
var_ptm.append(diann_mod + "," + site)
diann_mod, site = get_mod(mod, "var_mod")
merged[diann_mod].append(site)
# merge same modification for different sites
for name, site_list in merged.items():
site_str = "".join(sorted(set(site_list)))
var_ptm.append(f"{name},{site_str}")

return fix_ptm, var_ptm

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
click
sdrf-pipelines>=0.0.31
sdrf-pipelines==0.0.33
pyopenms>=3.2.0
pandas
pyarrow>=16.1.0
Expand Down
15 changes: 14 additions & 1 deletion tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,20 @@ def test_diann2mztab_example(self):
]
result = run_cli_command("diann2mztab", args)
assert result.exit_code == 0
# Additional assertions could check for expected output files

def test_dianncfg_example(self):
"""Test generating the DIA-NN config with example data"""
args = [
"--enzyme",
"Trypsin",
"--fix_mod",
"Carbamidomethyl (C)",
"--var_mod",
"Oxidation (M),Phospho (S),Phospho (T),Phospho (Y),Acetyl (Protein N-term),Acetyl (K),Acetyl (R),Met-loss (Protein N-term M)",
]
result = run_cli_command("dianncfg", args)

assert result.exit_code == 0
Comment on lines +88 to +100
Copy link

Copilot AI Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test only verifies that the command exits successfully but doesn't validate the actual output. Consider adding assertions to check that:

  1. The diann_config.cfg file is created
  2. The file contains expected content for the Met-loss modification (should use --met-excision flag)
  3. The modification merging logic works correctly when multiple sites are specified
  4. The format of fixed and variable modifications is correct

Copilot uses AI. Check for mistakes.


class TestSamplesheetCommands:
Expand Down
Loading