diff --git a/.github/auto_assign.yml b/.github/auto_assign.yml index b5a73ef6..e64a055c 100644 --- a/.github/auto_assign.yml +++ b/.github/auto_assign.yml @@ -2,9 +2,7 @@ addReviewers: true addAssignees: true reviewers: - MKoesters - skipKeywords: - wip - WIP - numberOfReviewers: 0 diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index cd898ea3..010656e1 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -1,5 +1,4 @@ name: Publish to pypi - on: push: tags: @@ -8,11 +7,11 @@ jobs: deploy: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: - python-version: '3.9' + python-version: '3.13' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/sphinx.yml b/.github/workflows/sphinx.yml new file mode 100644 index 00000000..6002eb7a --- /dev/null +++ b/.github/workflows/sphinx.yml @@ -0,0 +1,24 @@ +# For reference: https://github.com/JamesIves/github-pages-deploy-action +name: Build Sphinx docu. +on: + release: + types: [published] + workflow_dispatch: + +permissions: + contents: write +jobs: + build-and-deploy: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6 + - name: Install and Build HTML + run: | + pip install --upgrade pip + pip install ".[test]" + python -m sphinx.cmd.build ./docs/source ./docs/build + - name: Deploy HTML + uses: JamesIves/github-pages-deploy-action@v4 + with: + folder: docs/build/ # The folder the action should deploy. diff --git a/.github/workflows/tox_ci.yml b/.github/workflows/tox_ci.yml index b226e8f3..2700456e 100644 --- a/.github/workflows/tox_ci.yml +++ b/.github/workflows/tox_ci.yml @@ -1,31 +1,27 @@ name: Continious Integration - -on: - pull_request - +on: pull_request jobs: black: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Run black - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: 3.8 - name: install black run: pip install "black<23" - name: run black run: black --check pymzml - build: runs-on: ubuntu-latest strategy: matrix: - python: ['3.8', '3.9', '3.10'] + python: ['3.9', '3.10', '3.11'] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Setup Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python }} - name: install tox diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 96ac581e..88ad0f8b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,14 +1,14 @@ repos: -- repo: https://github.com/pre-commit/pre-commit-hooks + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.0.1 hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - - id: debug-statements -- repo: https://github.com/ambv/black + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: debug-statements + - repo: https://github.com/ambv/black rev: 18.9b0 hooks: - - id: black - language_version: python - exclude: ^docs/, ^tests/ + - id: black + language_version: python + exclude: ^docs/, ^tests/ diff --git a/.readthedocs.yml b/.readthedocs.yml index 25b0eb6e..b6a2de4d 100755 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -3,7 +3,6 @@ # Required version: 2 - # Set the OS, Python version and other tools you might need build: os: ubuntu-22.04 @@ -13,7 +12,6 @@ build: # nodejs: "20" # rust: "1.70" # golang: "1.20" - # Build documentation in the "docs/" directory with Sphinx sphinx: configuration: docs/source/conf.py @@ -21,7 +19,6 @@ sphinx: # builder: "dirhtml" # Fail on all warnings to avoid broken references # fail_on_warning: true - # Optionally build your docs in additional formats such as PDF and ePub # formats: # - pdf @@ -31,5 +28,5 @@ sphinx: # to build your documentation # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html python: - install: - - requirements: docs/requirements.txt + install: + - requirements: docs/requirements.txt diff --git a/README.rst b/README.rst index 43926464..e7c3de81 100755 --- a/README.rst +++ b/README.rst @@ -29,7 +29,7 @@ General information Module to parse mzML data in Python based on cElementTree -Copyright 2010-2021 by: +Copyright 2010-2024 by: | M. Kösters, | J. Leufken, @@ -88,8 +88,8 @@ Download Get the latest version via github | https://github.com/pymzml/pymzML -The complete Documentation can be found as pdf - | https://pymzml.readthedocs.io/_/downloads/en/latest/pdf/ +The complete Documentation can be found here + | https://pymzml.readthedocs.io/en/latest/ ******** diff --git a/codecov.yml b/codecov.yml index acc3ee88..0a1f428d 100644 --- a/codecov.yml +++ b/codecov.yml @@ -5,12 +5,10 @@ coverage: precision: 2 round: down range: "70...100" - status: project: yes patch: yes changes: no - parsers: gcov: branch_detection: @@ -19,9 +17,7 @@ parsers: method: False changes: False macro: False - comment: layout: "head, diff" behaviour: default require_change: no - diff --git a/docs/parse_example_scripts.py b/docs/parse_example_scripts.py index 3d4c3156..5d0c944f 100755 --- a/docs/parse_example_scripts.py +++ b/docs/parse_example_scripts.py @@ -1,45 +1,43 @@ #!/usr/bin/env python3 # encoding: utf-8 -''' +""" Originally created for Ursgal ( https://github.com/ursgal/ursgal ) -''' +""" import glob import os -if __name__ == '__main__': - print(''' +if __name__ == "__main__": + print( + """ Formatting example scripts into rst files for the docs -''') +""" + ) # input() example_script_path = os.path.join( os.path.dirname(__file__), os.pardir, - 'example_scripts', - '*.py', + "example_scripts", + "*.py", ) print(example_script_path) for example_script in glob.glob(example_script_path): if os.path.exists(example_script) is False: continue basename = os.path.basename(example_script) - print( - 'Reading: {0}'.format(example_script) - ) + print("Reading: {0}".format(example_script)) file_path = os.path.join( os.path.dirname(__file__), - 'source', - 'code_inc', - '{0}', + "source", + "code_inc", + "{0}", ) - with open( file_path.format( basename.replace('.py', '.inc')), 'w') as o: - print('''.. code-block:: python\n''', file=o) - with open( example_script ) as infile: + with open(file_path.format(basename.replace(".py", ".inc")), "w") as o: + print(""".. code-block:: python\n""", file=o) + with open(example_script) as infile: for line in infile: - print('\t{0}'.format( line.rstrip()), file=o) - - + print("\t{0}".format(line.rstrip()), file=o) diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100755 index 80d38366..00000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -sphinx==7.2.5 -sphinx_rtd_theme==2.0.0 -regex -numpydoc==1.6.0 diff --git a/docs/source/conf.py b/docs/source/conf.py index 0c937b1c..a8e493e5 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -25,7 +25,7 @@ os.path.dirname(__file__), os.pardir, os.pardir, - 'tests', + "tests", ) dir_path = os.path.join( os.path.dirname(__file__), @@ -36,7 +36,7 @@ os.path.dirname(__file__), os.pardir, os.pardir, - 'example_scripts', + "example_scripts", ) sys.path.insert(0, os.path.abspath(tests_path)) @@ -46,56 +46,49 @@ # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.coverage', - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode', - 'sphinx.ext.imgconverter', - 'numpydoc', + "sphinx.ext.autodoc", + "sphinx.ext.coverage", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx.ext.imgconverter", + "sphinx.ext.githubpages", + "numpydoc", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'pymzML' -copyright = u'2017, Kösters, M., Leufken, J., Schulze, S., Sugimoto, K., Zahedi, R. P., Hippler, M., Leidel, S. A. and Fufezan, C.' -author = u'Kösters, M., Leufken, J., Schulze, S., Sugimoto, K., Zahedi, R. P., Hippler, M., Leidel, S. A. and Fufezan, C.' +project = "pymzML" +copyright = "2017, Kösters, M., Leufken, J., Schulze, S., Sugimoto, K., Zahedi, R. P., Hippler, M., Leidel, S. A. and Fufezan, C." +author = "Kösters, M., Leufken, J., Schulze, S., Sugimoto, K., Zahedi, R. P., Hippler, M., Leidel, S. A. and Fufezan, C." # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # -# We store our version number in a simple text file: -version_path = os.path.join( - os.path.dirname(__file__), - os.pardir, os.pardir, - 'pymzml', 'version.txt' -) -with open(version_path, 'r') as version_file: - pymzml_version = version_file.read().strip() +# Get version +from importlib.metadata import version -# The short X.Y version. -version = pymzml_version -# The full version, including alpha/beta/rc tags. -release = pymzml_version +release = version("pymzml") +version = version("pymzml").rstrip(".dev0") # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -106,9 +99,9 @@ # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -116,27 +109,27 @@ # The reST default role (used for this markup: `text`) to use for all # documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -147,37 +140,37 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'alabaster' -on_rtd = os.environ.get('READTHEDOCS', None) == 'True' +on_rtd = os.environ.get("READTHEDOCS", None) == "True" if on_rtd: - html_theme = 'default' + html_theme = "default" else: # html_theme = 'default' - html_theme = 'sphinx_rtd_theme' + html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -187,122 +180,121 @@ # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. -#html_extra_path = [] +# html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' -#html_search_language = 'en' +# html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # Now only 'ja' uses this config value -#html_search_options = {'type': 'default'} +# html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. -#html_search_scorer = 'scorer.js' +# html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = 'pymzMLdoc' +htmlhelp_basename = "pymzMLdoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', - -# Latex figure (float) alignment -#'figure_align': 'htbp', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', + # Latex figure (float) alignment + #'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'pymzML.tex', 'pymzML Documentation', - 'Koesters, M. and Fufezan, C.', 'manual'), + ( + master_doc, + "pymzML.tex", + "pymzML Documentation", + "Koesters, M. and Fufezan, C.", + "manual", + ), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'pymzml', 'pymzML Documentation', - [author], 1) -] +man_pages = [(master_doc, "pymzml", "pymzML Documentation", [author], 1)] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------- @@ -311,19 +303,25 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'pymzML', 'pymzML Documentation', - author, 'pymzML', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "pymzML", + "pymzML Documentation", + author, + "pymzML", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False +# texinfo_no_detailmenu = False diff --git a/docs/source/pymzml_spec.rst b/docs/source/pymzml_spec.rst index 950d493e..900e462f 100755 --- a/docs/source/pymzml_spec.rst +++ b/docs/source/pymzml_spec.rst @@ -30,8 +30,8 @@ Chromatogram :exclude-members: __repr__, __str__ -MS_Spectrum +MsData ----------- -.. autoclass:: pymzml.spec.MS_Spectrum +.. autoclass:: pymzml.msdata.MsData :members: diff --git a/example_scripts/_playground2.py b/example_scripts/_playground2.py index 69090f78..b4961aab 100644 --- a/example_scripts/_playground2.py +++ b/example_scripts/_playground2.py @@ -13,13 +13,13 @@ @click.command() -@click.argument('input_files', nargs=-1) -@click.option('--shuffle', '-s', is_flag=True) -@click.option('--suffix') -def main(input_files, shuffle, suffix=''): +@click.argument("input_files", nargs=-1) +@click.option("--shuffle", "-s", is_flag=True) +@click.option("--suffix") +def main(input_files, shuffle, suffix=""): print(suffix) - output_name = f'failing_spectra{suffix}.txt' - with open(output_name, 'wt') as fout: + output_name = f"failing_spectra{suffix}.txt" + with open(output_name, "wt") as fout: for file in input_files: print(file) fails = 0 @@ -35,20 +35,25 @@ def main(input_files, shuffle, suffix=''): print(f"Shuffled indices md5: {md5_digest}") # exit(1) number_of_specs = len(indices) - basic_off_set_dict = reader.info["file_object"].file_handler.offset_dict.copy() + basic_off_set_dict = reader.info[ + "file_object" + ].file_handler.offset_dict.copy() basic_seek_list = reader.info["file_object"].file_handler.seek_list.copy() for pos, i in enumerate(indices): # reader.info["file_object"].file_handler.offset_dict = basic_off_set_dict.copy() # reader.info["file_object"].file_handler.seek_list = basic_seek_list.copy() current_precentage = 100 * float(pos) / float(number_of_specs) - print(f"[{current_precentage:0>3.1f}%] Access spectrum {i:<10}".format(), end='\r') - #spec = reader[i] + print( + f"[{current_precentage:0>3.1f}%] Access spectrum {i:<10}".format(), + end="\r", + ) + # spec = reader[i] try: spec = reader[i] # 100 / 0 except Exception as e: fails += 1 - fout.write(f'{os.path.basename(file)}\t{i}\n') + fout.write(f"{os.path.basename(file)}\t{i}\n") print(f"failed on spec {i}") # pprint.pprint(reader.info["file_object"].file_handler.offset_dict) @@ -58,9 +63,9 @@ def main(input_files, shuffle, suffix=''): # break print() perc = fails / len(indices) * 100 - print(f'{fails} of {len(indices)} spectra could not be accessed ({perc}%)') - print('\n\n') + print(f"{fails} of {len(indices)} spectra could not be accessed ({perc}%)") + print("\n\n") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/example_scripts/access_spectra_and_chromatograms.py b/example_scripts/access_spectra_and_chromatograms.py new file mode 100644 index 00000000..2b8af2b0 --- /dev/null +++ b/example_scripts/access_spectra_and_chromatograms.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import sys +import pymzml + + +def main(mzml_file): + """ + Example script demonstrating how to access both spectra and chromatograms + using pymzML. + + Usage: + ./access_spectra_and_chromatograms.py + """ + print("Initializing Reader...") + # Initialize with skip_chromatogram=False to include chromatograms during iteration + run = pymzml.run.Reader(mzml_file, skip_chromatogram=False) + + # Access the first spectrum using indexing (traditional way) + print("\nAccessing first spectrum using indexing (run[0]):") + try: + spectrum = run[0] + print(f"Spectrum ID: {spectrum.ID}") + print(f"MS Level: {spectrum.ms_level}") + print(f"Retention Time: {spectrum.scan_time_in_minutes()} minutes") + print(f"Number of peaks: {len(spectrum.peaks('raw'))}") + except Exception as e: + print(f"Error accessing spectrum: {e}") + + # Access the first spectrum using the new get_spectrum method + print("\nAccessing first spectrum using get_spectrum(0):") + try: + spectrum = run.get_spectrum(0) + print(f"Spectrum ID: {spectrum.ID}") + print(f"MS Level: {spectrum.ms_level}") + print(f"Retention Time: {spectrum.scan_time_in_minutes()} minutes") + print(f"Number of peaks: {len(spectrum.peaks('raw'))}") + except Exception as e: + print(f"Error accessing spectrum: {e}") + + # Access the TIC chromatogram using string identifier + print("\nAccessing TIC chromatogram using run['TIC']:") + try: + chromatogram = run["TIC"] + print(f"Chromatogram ID: {chromatogram.ID}") + print(f"Number of data points: {len(chromatogram.peaks())}") + + # Print the first few data points + print("\nFirst 5 data points (time, intensity):") + for i, (time, intensity) in enumerate(chromatogram.peaks()): + if i >= 5: + break + print(f" {time:.4f}, {intensity:.2f}") + except Exception as e: + print(f"Error accessing chromatogram: {e}") + + # Access the first chromatogram using the new get_chromatogram method + print("\nAccessing first chromatogram using get_chromatogram(0):") + try: + chromatogram = run.get_chromatogram(0) + print(f"Chromatogram ID: {chromatogram.ID}") + print(f"Number of data points: {len(chromatogram.peaks())}") + + # Print the first few data points + print("\nFirst 5 data points (time, intensity):") + for i, (time, intensity) in enumerate(chromatogram.peaks()): + if i >= 5: + break + print(f" {time:.4f}, {intensity:.2f}") + except Exception as e: + print(f"Error accessing chromatogram: {e}") + + # Demonstrate iterating through all items (spectra and chromatograms) + print("\nIterating through first few items (spectra and chromatograms):") + count = 0 + for item in run: + if count >= 5: + break + if isinstance(item, pymzml.spec.Spectrum): + print(f" Spectrum {item.ID}, MS level {item.ms_level}, RT {item.scan_time_in_minutes():.2f} min") + elif hasattr(item, 'time') and hasattr(item, 'i'): + print(f" Chromatogram {item.ID}, {len(item.peaks())} data points") + count += 1 + + print("\nDone!") + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print(main.__doc__) + print("Please provide a path to an mzML file.") + sys.exit(1) + + mzml_file = sys.argv[1] + if not os.path.exists(mzml_file): + print(f"Error: File '{mzml_file}' not found.") + sys.exit(1) + + main(mzml_file) diff --git a/example_scripts/plot_chromatogram.py b/example_scripts/plot_chromatogram.py index d5b38a53..437ef63f 100755 --- a/example_scripts/plot_chromatogram.py +++ b/example_scripts/plot_chromatogram.py @@ -26,24 +26,24 @@ def main(mzml_file): pf.save( "chromatogram_{0}.html".format(mzml_basename), layout={ - "xaxis":{ + "xaxis": { "title": "Retention time", - "ticks": 'outside', + "ticks": "outside", "ticklen": 2, "tickwidth": 0.25, "showgrid": False, - "linecolor": 'black', - }, + "linecolor": "black", + }, "yaxis": { "title": "TIC", - "ticks": 'outside', + "ticks": "outside", "ticklen": 2, "tickwidth": 0.25, "showgrid": False, - "linecolor": 'black', + "linecolor": "black", }, - "plot_bgcolor": 'rgba(255, 255, 255, 0)', - "paper_bgcolor": 'rgba(255, 255, 255, 0)', + "plot_bgcolor": "rgba(255, 255, 255, 0)", + "paper_bgcolor": "rgba(255, 255, 255, 0)", }, ) return diff --git a/example_scripts/plot_spectrum.py b/example_scripts/plot_spectrum.py index ee667b45..8d8e4066 100755 --- a/example_scripts/plot_spectrum.py +++ b/example_scripts/plot_spectrum.py @@ -31,22 +31,22 @@ def main(): p.save( filename=filename, layout={ - "xaxis":{ - "ticks": 'outside', + "xaxis": { + "ticks": "outside", "ticklen": 2, "tickwidth": 0.25, "showgrid": False, - "linecolor": 'black', - }, + "linecolor": "black", + }, "yaxis": { - "ticks": 'outside', + "ticks": "outside", "ticklen": 2, "tickwidth": 0.25, "showgrid": False, - "linecolor": 'black', + "linecolor": "black", }, - "plot_bgcolor": 'rgba(255, 255, 255, 0)', - "paper_bgcolor": 'rgba(255, 255, 255, 0)', + "plot_bgcolor": "rgba(255, 255, 255, 0)", + "paper_bgcolor": "rgba(255, 255, 255, 0)", }, ) print("Plotted file: {0}".format(filename)) diff --git a/example_scripts/plot_spectrum_with_annotation.py b/example_scripts/plot_spectrum_with_annotation.py index b704eb00..0e348116 100755 --- a/example_scripts/plot_spectrum_with_annotation.py +++ b/example_scripts/plot_spectrum_with_annotation.py @@ -32,14 +32,14 @@ def main(): "tickwidth": 1, "ticks": "outside", "showgrid": False, - "linecolor": 'black', + "linecolor": "black", }, "yaxis": { "ticklen": 5, "tickwidth": 1, "ticks": "outside", "showgrid": False, - "linecolor": 'black', + "linecolor": "black", }, } @@ -196,8 +196,8 @@ def main(): for axis in layout.keys(): plot_layout["{0}3".format(axis)] = copy.copy(layout[axis]) - plot_layout["plot_bgcolor"] = 'rgba(255, 255, 255, 0)' - plot_layout["paper_bgcolor"] = 'rgba(255, 255, 255, 0)' + plot_layout["plot_bgcolor"] = "rgba(255, 255, 255, 0)" + plot_layout["paper_bgcolor"] = "rgba(255, 255, 255, 0)" # Save the plot in a file using the defined plot_layout filename = "example_plot_{0}_annotation.html".format(os.path.basename(example_file)) diff --git a/example_scripts/simple_parser.py b/example_scripts/simple_parser.py index 780233ee..176accae 100755 --- a/example_scripts/simple_parser.py +++ b/example_scripts/simple_parser.py @@ -5,11 +5,11 @@ def main(mzml_file): """ - Basic example script to demonstrate the usage of pymzML. Requires a mzML + Basic example script to demonstrate the usage of pymzML. Requires a mzML file as first argument. usage: - + ./simple_parser.py Note: diff --git a/example_scripts/tic_calc.py b/example_scripts/tic_calc.py index c9dddc20..a9a3a6c4 100755 --- a/example_scripts/tic_calc.py +++ b/example_scripts/tic_calc.py @@ -5,11 +5,11 @@ def main(mzml_file): """ - Basic example script to demonstrate the usage of pymzML. Requires a mzML + Basic example script to demonstrate the usage of pymzML. Requires a mzML file as first argument. usage: - + ./simple_parser.py Note: diff --git a/pymzml/__init__.py b/pymzml/__init__.py index 07cdd5df..512ef378 100755 --- a/pymzml/__init__.py +++ b/pymzml/__init__.py @@ -24,23 +24,24 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -__all__ = ["run", "spec", "obo", "minimum", "plot", "file_classes"] +__all__ = ["run", "spec", "chromatogram", "obo", "minimum", "plot", "file_classes"] -import os import sys if not hasattr(sys, "version_info") or sys.version_info < (3, 4): raise RuntimeError("pymzML requires Python 3.4 or later.") # Set version -version_path = os.path.join(os.path.dirname(__file__), "version.txt") -with open(version_path, "r") as version_file: - __version__ = version_file.read().strip() +from importlib.metadata import version + +__version__ = version("pymzml").rstrip(".dev0") # Imports of individual modules import pymzml.run import pymzml.spec +import pymzml.chromatogram from pymzml.spec import MSDecoder +from pymzml.chromatogram import Chromatogram import pymzml.obo import pymzml.plot import pymzml.utils diff --git a/pymzml/chromatogram.py b/pymzml/chromatogram.py new file mode 100644 index 00000000..ccc6d9d4 --- /dev/null +++ b/pymzml/chromatogram.py @@ -0,0 +1,372 @@ +#!/usr/bin/env python3 +# -*- coding: latin-1 -*- +""" +The chromatogram class offers a python object for mass spectrometry chromatogram data. +The chromatogram object holds the basic information of the chromatogram and offers +methods to interrogate properties of the chromatogram. +Data, i.e. time and intensity decoding is performed on demand +and can be accessed via their properties, e.g. :py:attr:`~pymzml.chromatogram.Chromatogram.peaks`. + +The Chromatogram class is used in the :py:class:`~pymzml.run.Reader` class. +There each chromatogram is accessible as a chromatogram object. +""" + +# Python mzML module - pymzml +# Copyright (C) 2010-2019 M. Kösters, C. Fufezan +# The MIT License (MIT) + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import re +import numpy as np +from .msdata import MsData +from .obo import OboTranslator + + +class Chromatogram(MsData): + """ + Class for Chromatogram access and handling. + """ + + def __init__(self, element, measured_precision=5e-6, *, obo_version=None): + """ + Arguments: + element (xml.etree.ElementTree.Element): chromatogram as xml Element + + Keyword Arguments: + measured_precision (float): in ppm, i.e. 5e-6 equals to 5 ppm. + obo_version (str, optional): obo version number. + """ + self._measured_precision = measured_precision + self.element = element + self.noise_level_estimate = {} + # Property variables + self._time = None + self._ms_level = None + self._i = None + self._t_mass_set = None + self._peaks = None + self._t_mz_set = None + self._centroided_peaks = None + self._reprofiled_peaks = None + self._deconvoluted_peaks = None + self._profile = None + self._extreme_values = None + self._centroided_peaks_sorted_by_i = None + self._transformed_mz_with_error = None + self._transformed_mass_with_error = None + self._precursors = None + self._ID = None + self._chromatogram_type = None + self._precursor_mz = None + self._product_mz = None + self._polarity = None + self.obo_translator = OboTranslator.from_cache(obo_version) + + if self.element: + self.ns = ( + re.match(r"\{.*\}", element.tag).group(0) + if re.match(r"\{.*\}", element.tag) + else "" + ) + + self._decode = self._decode_to_numpy + # assign function to create numpy array to list??? + self._array = np.array + + def __repr__(self): + """ + Returns representative string for a chromatogram object class + """ + return "<__main__.Chromatogram object with native ID {0} at {1}>".format( + self.ID, hex(id(self)) + ) + + def __str__(self): + """ + Returns representative string for a chromatogram object class + """ + return "<__main__.Chromatogram object with native ID {0} at {1}>".format( + self.ID, hex(id(self)) + ) + + @property + def ID(self): + """ + Access the native id of the chromatogram. + + Returns: + ID (str): native ID of the chromatogram + """ + if self._ID is None: + if self.element: + self._ID = self.element.get("id") + return self._ID + + @property + def mz(self): + """ + Chromatogram has no property mz. This property is included for + compatibility with the Spectrum class. + + Returns: + time (list): list of time values from the chromatogram + """ + print("Chromatogram has no property mz.\nReturn retention time instead") + return self.time + + @property + def time(self): + """ + Returns the list of time values. If the time values are encoded, the + function _decode() is used to decode the encoded data.\n + The time property can also be set, e.g. for theoretical data. + However, it is recommended to use the profile property to set time and + intensity tuples at same time. + + Returns: + time (list): list of time values from the analyzed chromatogram + + """ + if self._time is None: + params = self._get_encoding_parameters("time array") + self._time = self._decode(*params) + return self._time + + @property + def i(self): + """ + Returns the list of intensity values from the analyzed chromatogram. + + Returns: + i (list): list of intensity values from the analyzed chromatogram + """ + if self._i is None: + params = self._get_encoding_parameters("intensity array") + self._i = self._decode(*params) + return self._i + + @property + def profile(self): + """ + Returns the list of peaks of the chromatogram as tuples (time, intensity). + + Returns: + peaks (list): list of time, i tuples + + Example: + + >>> import pymzml + >>> run = pymzml.run.Reader( + ... spectra.mzMl.gz, + ... MS_precisions = { + ... 1 : 5e-6, + ... 2 : 20e-6 + ... } + ... ) + >>> for entry in run: + ... if isinstance(entry, pymzml.chromatogram.Chromatogram): + ... for time, intensity in entry.peaks: + ... print(time, intensity) + + Note: + The peaks property can also be set, e.g. for theoretical data. + It requires a list of time/intensity tuples. + + """ + if self._profile is None: + if self._time is None and self._i is None: + self._profile = [] + for pos, t in enumerate(self.time): + self._profile.append([t, self.i[pos]]) + # much faster than zip ... list(zip(self.mz, self.i)) + elif self._time is not None and self._i is not None: + self._profile = [] + for pos, t in enumerate(self.time): + self._profile.append([t, self.i[pos]]) + elif self._profile is None: + self._profile = [] + return self._array(self._profile) + + @profile.setter + def profile(self, tuple_list): + """ + Set the chromatogram profile. + + Args: + tuple_list (list): list of tuples (time, intensity) + """ + if len(tuple_list) == 0: + return + self._time = [] + self._i = [] + for time, i in tuple_list: + self._time.append(time) + self._i.append(i) + self._peaks = tuple_list + self._reprofiledPeaks = None + self._centroidedPeaks = None + return self + + def peaks(self): + """ + Return the list of peaks of the chromatogram as tuples (time, intensity). + + Returns: + peaks (list): list of time, intensity tuples + + Example: + + >>> import pymzml + >>> run = pymzml.run.Reader( + ... spectra.mzMl.gz, + ... MS_precisions = { + ... 1 : 5e-6, + ... 2 : 20e-6 + ... } + ... ) + >>> for entry in run: + ... if isinstance(entry, pymzml.chromatogram.Chromatogram): + ... for time, intensity in entry.peaks: + ... print(time, intensity) + + Note: + The peaks property can also be set, e.g. for theoretical data. + It requires a list of time/intensity tuples. + + """ + return self.profile + + @property + def chromatogram_type(self): + """ + Returns the chromatogram type. + + Returns: + chromatogram_type (str): chromatogram type + """ + if self._chromatogram_type is None: + for element in self.element.iter(): + if element.tag.endswith("}cvParam"): + accession = element.get("accession") + # Check for chromatogram type accessions + if accession in [ + "MS:1000235", # total ion current chromatogram + "MS:1000627", # selected ion current chromatogram + "MS:1000628", # basepeak intensity chromatogram + "MS:1000810", # chromatogram + "MS:1000811", # chromatogram created by spectrum aggregation + "MS:1000812", # single ion monitoring chromatogram + "MS:1000813", # multiple reaction monitoring chromatogram + "MS:1000814", # selected reaction monitoring chromatogram + "MS:1000815", # consecutive reaction monitoring chromatogram + "MS:1001472", # selected ion monitoring chromatogram + "MS:1001473", # selected reaction monitoring chromatogram + "MS:1001474", # consecutive reaction monitoring chromatogram + "MS:1001475", # targeted SIM chromatogram + "MS:1001476", # automatic SIM chromatogram + "MS:1001477", # targeted SRM chromatogram + "MS:1001478", # automatic SRM chromatogram + "MS:1001479", # targeted CRM chromatogram + "MS:1001480", # automatic CRM chromatogram + ]: + self._chromatogram_type = element.get("name") + break + return self._chromatogram_type + + @property + def polarity(self): + """ + Returns the polarity of the chromatogram. + + Returns: + polarity (str): polarity (positive scan or negative scan) + """ + if self._polarity is None: + for element in self.element.iter(): + if element.tag.endswith("}cvParam"): + accession = element.get("accession") + # Check for polarity accessions + if accession in [ + "MS:1000129", # negative scan + "MS:1000130", # positive scan + ]: + self._polarity = element.get("name") + break + return self._polarity + + @property + def precursor_mz(self): + """ + Returns the precursor m/z value for SRM/MRM chromatograms. + + Returns: + precursor_mz (float): precursor m/z value + """ + if self._precursor_mz is None: + precursor = self.element.find(f".//{self.ns}precursor") + if precursor is not None: + isolation_window = precursor.find(f".//{self.ns}isolationWindow") + if isolation_window is not None: + for element in isolation_window.iter(): + if ( + element.tag.endswith("}cvParam") + and element.get("accession") == "MS:1000827" + ): # isolation window target m/z + self._precursor_mz = float(element.get("value")) + break + return self._precursor_mz + + @property + def product_mz(self): + """ + Returns the product m/z value for SRM/MRM chromatograms. + + Returns: + product_mz (float): product m/z value + """ + if self._product_mz is None: + product = self.element.find(f".//{self.ns}product") + if product is not None: + isolation_window = product.find(f".//{self.ns}isolationWindow") + if isolation_window is not None: + for element in isolation_window.iter(): + if ( + element.tag.endswith("}cvParam") + and element.get("accession") == "MS:1000827" + ): # isolation window target m/z + self._product_mz = float(element.get("value")) + break + return self._product_mz + + def get_chromatogram_properties(self): + """ + Returns a dictionary with the main properties of the chromatogram. + + Returns: + properties (dict): dictionary with chromatogram properties + """ + properties = { + "id": self.ID, + "chromatogram_type": self.chromatogram_type, + "polarity": self.polarity, + "precursor_mz": self.precursor_mz, + "product_mz": self.product_mz, + } + return properties diff --git a/pymzml/file_classes/indexedGzip.py b/pymzml/file_classes/indexedGzip.py index a3ac7e1d..bdfe68dd 100755 --- a/pymzml/file_classes/indexedGzip.py +++ b/pymzml/file_classes/indexedGzip.py @@ -33,6 +33,7 @@ from xml.etree.ElementTree import XML from .. import spec +from .. import chromatogram from ..utils.GSGR import GSGR @@ -89,7 +90,7 @@ def __getitem__(self, identifier): data = self.Reader.read_block(identifier) element = XML(ns_prefix + data.decode("utf-8") + ns_suffix) if "chromatogram" in element[0].tag: - return spec.Chromatogram(list(element)[0], measured_precision=5e-6) + return chromatogram.Chromatogram(list(element)[0], measured_precision=5e-6) else: return spec.Spectrum(list(element)[0], measured_precision=5e-6) diff --git a/pymzml/file_classes/standardGzip.py b/pymzml/file_classes/standardGzip.py index fd808ac0..b5108f61 100755 --- a/pymzml/file_classes/standardGzip.py +++ b/pymzml/file_classes/standardGzip.py @@ -32,6 +32,7 @@ from .. import regex_patterns from .. import spec +from .. import chromatogram class StandardGzip(object): @@ -100,7 +101,9 @@ def __getitem__(self, identifier): elif element.tag.endswith("}chromatogram"): if element.get("id") == identifier: self.file_handler.seek(old_pos, 0) - return spec.Chromatogram(element, measured_precision=5e-6) + return chromatogram.Chromatogram( + element, measured_precision=5e-6 + ) if __name__ == "__main__": diff --git a/pymzml/file_classes/standardMzml.py b/pymzml/file_classes/standardMzml.py index 18223a53..565234e5 100755 --- a/pymzml/file_classes/standardMzml.py +++ b/pymzml/file_classes/standardMzml.py @@ -35,6 +35,7 @@ from xml.etree.ElementTree import XML, iterparse from .. import spec +from .. import chromatogram from .. import regex_patterns @@ -54,7 +55,7 @@ def __init__( self.index_regex = index_regex self.path = path self.file_handler = self.get_file_handler(encoding) - self.offset_dict = dict() + self.offset_dict = {} self.spec_open = regex_patterns.SPECTRUM_OPEN_PATTERN self.spec_close = regex_patterns.SPECTRUM_CLOSE_PATTERN @@ -97,7 +98,7 @@ def __getitem__(self, identifier): if element.tag.endswith("}chromatogram"): if element.get("id") == "TIC": found = True - spectrum = spec.Chromatogram( + spectrum = chromatogram.Chromatogram( element, measured_precision=5e-6 ) elif event == "STOP": @@ -116,7 +117,7 @@ def __getitem__(self, identifier): if data.startswith("]*id="([^"]*)"') - chromcntexp = re.compile(b'<\s*chromatogramList\s*count="([^"]*)"') - specexp = re.compile(b'<\s*spectrum[^>]*id="([^"]*)"') - speccntexp = re.compile(b'<\s*spectrumList\s*count="([^"]*)"') + chromexp = re.compile(b'<\\s*chromatogram[^>]*id="([^"]*)"') + chromcntexp = re.compile(b'<\\s*chromatogramList\\s*count="([^"]*)"') + specexp = re.compile(b'<\\s*spectrum[^>]*id="([^"]*)"') + speccntexp = re.compile(b'<\\s*spectrumList\\s*count="([^"]*)"') # go to start of file fh.seek(0) prev_chunk = "" @@ -449,7 +450,7 @@ def get_data_indices(fh, chunksize=8192, lookback_size=100): if indices is not None: tmp_dict = {} - item_list = sorted(list(indices.items()), key=lambda x: x[1]) + item_list = sorted(indices.items(), key=lambda x: x[1]) for i in range(len(item_list)): key = item_list[i][0] tmp_dict[key] = (item_list[i][1],) @@ -732,7 +733,7 @@ def _search_string_identifier(self, search_string, chunk_size=8): # NOTE: This needs to go intp regex_patterns.py regex_string = re.compile( - '<\s*spectrum[^>]*index="[0-9]+"\sid="({0})"\sdefaultArrayLength="[0-9]+">'.format( + '<\\s*spectrum[^>]*index="[0-9]+"\\sid="({0})"\\sdefaultArrayLength="[0-9]+">'.format( "".join([".*", search_string, ".*"]) ).encode() ) @@ -765,7 +766,7 @@ def _search_string_identifier(self, search_string, chunk_size=8): seeker.seek(start) chrom_string = seeker.read(end) xml_string = XML(chrom_string) - return spec.Chromatogram(xml_string) + return chromatogram.Chromatogram(xml_string) elif len(data) == 0: raise Exception("cant find specified string") diff --git a/pymzml/msdata.py b/pymzml/msdata.py new file mode 100644 index 00000000..1414e20d --- /dev/null +++ b/pymzml/msdata.py @@ -0,0 +1,371 @@ +#!/usr/bin/env python3 +# -*- coding: latin-1 -*- +""" +The MsData class offers a base class for mass spectrometry data. +It provides common functionality for both Spectrum and Chromatogram classes. +""" + +# Python mzML module - pymzml +# Copyright (C) 2010-2019 M. Kösters, C. Fufezan +# The MIT License (MIT) + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import math +import re +import sys +import warnings +import xml.etree.ElementTree as ElementTree +import zlib +from base64 import b64decode as b64dec +from collections import defaultdict as ddict +from struct import unpack + +import numpy as np + +from .obo import OboTranslator + + +class MsData(object): + """ + General base class for mass spectrometry data handling. + Provides common functionality for both Spectrum and Chromatogram classes. + """ + + def _read_accessions(self): + """Set all required variables for this spectrum.""" + self.accessions = {} + for element in self.element.iter(): + accession = element.get("accession") + name = element.get("name") + if accession is not None: + self.accessions[name] = accession + if "profile spectrum" in self.accessions.keys(): + self._profile = True + + def get_element_by_name(self, name): + """ + Get element from the original tree by it's unit name. + + Arguments: + name (str): unit name of the mzml element. + + Keyword Arguments: + obo_version (str, optional): obo version number. + + """ + iterator = self.element.iter() + return_ele = None + for ele in iterator: + if ele.get("name", default=None) == name: + return_ele = ele + break + return return_ele + + def get_element_by_path(self, hooks): + """ + Find elements in spectrum by its path. + + Arguments: + hooks (list): list of parent elements for the target element. + + Returns: + elements (list): list of XML objects + found in the path + + Example: + + To access cvParam in scanWindow tag: + + >>> spec.get_element_by_path(['scanList', 'scan', 'scanWindowList', + ... 'scanWindow', 'cvParam']) + + """ + return_ele = None + if len(hooks) > 0: + path_array = ["."] + for hook in hooks: + path_array.append("{ns}{hook}".format(ns=self.ns, hook=hook)) + path = "/".join(path_array) + return_ele = self.element.findall(path) + + return return_ele + + def _register(self, decoded_tuple): + d_type, array = decoded_tuple + if d_type == "mz": + self._mz = array + elif d_type == "i": + self._i = array + elif d_type == "time": + self._time = array + else: + raise Exception("Unknown data Type ({0})".format(d_type)) + + def _get_encoding_parameters(self, array_type): + """ + Find the correct parameter for decoding and return them as tuple. + + Arguments: + array_type (str): data type of the array, e.g. m/z, time or + intensity + Returns: + data (str) : encoded data + comp (str) : compression method + d_type (str) : data type + d_array_length (str) : length of the data array + """ + numpress_encoding = False + + b_data_string = "./{ns}binaryDataArrayList/{ns}binaryDataArray/{ns}cvParam[@name='{name}']/..".format( + ns=self.ns, name=array_type + ) + float_type_string = "./{ns}cvParam[@accession='{Acc}']" + + b_data_array = self.element.find(b_data_string) + if not b_data_array: + # non-standard data array + b_data_string = "./{ns}binaryDataArrayList/{ns}binaryDataArray/{ns}cvParam[@value='{value}']/..".format( + ns=self.ns, value=array_type + ) + b_data_array = self.element.find(b_data_string) + + comp = [] + if b_data_array: + for cvParam in b_data_array.iterfind("./{ns}cvParam".format(ns=self.ns)): + if "compression" in cvParam.get("name"): + if "numpress" in cvParam.get("name").lower(): + numpress_encoding = True + comp.append(cvParam.get("name")) + d_array_length = self.element.get("defaultArrayLength") + if not numpress_encoding: + try: + # 32-bit float + d_type = b_data_array.find( + float_type_string.format( + ns=self.ns, + Acc=self.obo_translator["32-bit float"]["id"], + ) + ).get("name") + except: + try: + # 64-bit Float + d_type = b_data_array.find( + float_type_string.format( + ns=self.ns, + Acc=self.obo_translator["64-bit float"]["id"], + ) + ).get("name") + except: + try: + # 32-bit integer + d_type = b_data_array.find( + float_type_string.format( + ns=self.ns, + Acc=self.obo_translator["32-bit integer"]["id"], + ) + ).get("name") + except: + try: + # 64-bit integer + d_type = b_data_array.find( + float_type_string.format( + ns=self.ns, + Acc=self.obo_translator["64-bit integer"]["id"], + ) + ).get("name") + except: + # null-terminated ASCII string + d_type = b_data_array.find( + float_type_string.format( + ns=self.ns, + Acc=self.obo_translator[ + "null-terminated ASCII string" + ]["id"], + ) + ).get("name") + else: + # compression is numpress, dont need data type here + d_type = None + data = b_data_array.find("./{ns}binary".format(ns=self.ns)) + if data is not None: + data = data.text + else: + data = None + d_array_length = 0 + d_type = "64-bit float" + if data is not None: + data = data.encode("utf-8") + else: + data = "" + return (data, d_array_length, d_type, comp) + + @property + def measured_precision(self): + """ + Set the measured and internal precision. + + Returns: + value (float): measured Precision (e.g. 5e-6) + """ + return self._measured_precision + + @measured_precision.setter + def measured_precision(self, value): + self._measured_precision = value + self.internal_precision = int(round(50000.0 / (value * 1e6))) + return + + def _decode_to_numpy(self, data, d_array_length, data_type, comp): + """ + Decode the b64 encoded and packed strings from data as numpy arrays. + + Returns: + data (np.ndarray): Returns the unpacked data as a tuple. Returns an + empty list if there is no raw data or raises an + exception if data could not be decoded. + + d_array_length just for compatibility + """ + out_data = b64dec(data) + if len(out_data) != 0: + if "zlib" in comp or "zlib compression" in comp: + out_data = zlib.decompress(out_data) + if ( + "ms-np-linear" in comp + or "ms-np-pic" in comp + or "ms-np-slof" in comp + or "MS-Numpress linear prediction compression" in comp + or "MS-Numpress short logged float compression" in comp + ): + out_data = self._decodeNumpress_to_array(out_data, comp) + if data_type == "32-bit float": + # one character code may be sufficient too (f) + f_type = np.float32 + out_data = np.frombuffer(out_data, f_type) + elif data_type == "64-bit float": + # one character code may be sufficient too (d) + f_type = np.float64 + out_data = np.frombuffer(out_data, f_type) + elif data_type == "32-bit integer": + # one character code may be sufficient too (i) + i_type = np.int32 + out_data = np.frombuffer(out_data, i_type) + elif data_type == "64-bit integer": + # one character code may be sufficient too (l) + i_type = np.int64 + out_data = np.frombuffer(out_data, i_type) + # TODO elif data_type == "null-terminated ASCII string": + else: + raise ValueError(f"Unsupported data type: {data_type}") + else: + out_data = np.array([]) + return out_data + + def _decode_to_tuple(self, data, d_array_length, float_type, comp): + """ + Decode b64 encoded and packed strings. + + Returns: + data (tuple): Returns the unpacked data as a tuple. + Returns an empty list if there is no raw data or + raises an exception if data could not be decoded. + """ + dec_data = b64dec(data) + if len(dec_data) != 0: + if "zlib" in comp or "zlib compression" in comp: + dec_data = zlib.decompress(dec_data) + if set(["ms-np-linear", "ms-np-pic", "ms-np-slof"]) & set(comp): + self._decodeNumpress(data, comp) + # else: + # print( + # 'New data compression ({0}) detected, cant decompress'.format( + # comp + # ) + # ) + # sys.exit(1) + if float_type == "32-bit float": + f_type = "f" + elif float_type == "64-bit float": + f_type = "d" + fmt = "{endian}{array_length}{float_type}".format( + endian="<", array_length=d_array_length, float_type=f_type + ) + ret_data = unpack(fmt, dec_data) + else: + ret_data = [] + return ret_data + + def _decodeNumpress_to_array(self, data, compression): + """ + Decode golomb-rice encoded data (aka numpress encoded data). + + Arguments: + data (str) : Encoded data string + compression (str) : Decompression algorithm to be used + (valid are 'ms-np-linear', 'ms-np-pic', 'ms-np-slof') + + Returns: + array (list): Returns the unpacked data as an array of floats. + + """ + result = [] + comp_ms_tags = [self.calling_instance.OT[comp]["id"] for comp in compression] + data = np.frombuffer(data, dtype=np.uint8) + if "MS:1002312" in comp_ms_tags: + from .decoder import MSDecoder + + result = MSDecoder.decode_linear(data) + elif "MS:1002313" in comp_ms_tags: + from .decoder import MSDecoder + + result = MSDecoder.decode_pic(data) + elif "MS:1002314" in comp_ms_tags: + from .decoder import MSDecoder + + result = MSDecoder.decode_slof(data) + return result + + def _median(self, data): + """ + Compute median. + + Arguments: + data (list): list of numeric values + + Returns: + median (float): median of the input data + """ + return np.median(data) + + def to_string(self, encoding="latin-1", method="xml"): + """ + Return string representation of the xml element the + spectrum was initialized with. + + Keyword Arguments: + encoding (str) : text encoding of the returned string.\n + Default is latin-1. + method (str) : text format of the returned string.\n + Default is xml, alternatives are html and text. + + Returns: + element (str) : xml string representation of the spectrum. + """ + return ElementTree.tostring(self.element, encoding=encoding, method=method) diff --git a/pymzml/obo/psi-ms-4.1.16.obo.gz b/pymzml/obo/psi-ms-4.1.16_rc1.obo.gz similarity index 100% rename from pymzml/obo/psi-ms-4.1.16.obo.gz rename to pymzml/obo/psi-ms-4.1.16_rc1.obo.gz diff --git a/pymzml/obo/psi-ms-4.1.180.obo.gz b/pymzml/obo/psi-ms-4.1.180.obo.gz new file mode 100644 index 00000000..77a50381 Binary files /dev/null and b/pymzml/obo/psi-ms-4.1.180.obo.gz differ diff --git a/pymzml/obo/psi-ms-4.1.79.obo.gz b/pymzml/obo/psi-ms-4.1.79.obo.gz new file mode 100644 index 00000000..d6869924 Binary files /dev/null and b/pymzml/obo/psi-ms-4.1.79.obo.gz differ diff --git a/pymzml/regex_patterns.py b/pymzml/regex_patterns.py index 5800fdba..f5f2fecc 100755 --- a/pymzml/regex_patterns.py +++ b/pymzml/regex_patterns.py @@ -1,4 +1,5 @@ """Collection of regular expressions to catch spectrum XML-tags.""" + import re import regex diff --git a/pymzml/run.py b/pymzml/run.py index 6fe2b384..16a9494a 100644 --- a/pymzml/run.py +++ b/pymzml/run.py @@ -46,6 +46,7 @@ from pathlib import Path from . import spec +from . import chromatogram from . import obo from . import regex_patterns from .file_interface import FileInterface @@ -84,7 +85,7 @@ def __init__( build_index_from_scratch=False, skip_chromatogram=True, index_regex=None, - **kwargs + **kwargs, ): """Initialize and set required attributes.""" self.index_regex = index_regex @@ -131,8 +132,8 @@ def __init__( # obo version not specified -> try to identify from mzML by self._init_iter self.info["obo_version"] = None - self.iter = self._init_iter() self.OT = self._init_obo_translator() + self.iter = self._init_iter() def __next__(self): """ @@ -166,7 +167,9 @@ def __next__(self): if element.tag.endswith("}chromatogram"): if self.skip_chromatogram: continue - spectrum = spec.Chromatogram(element, obo_version=self.OT.version) + spectrum = chromatogram.Chromatogram( + element, obo_version=self.OT.version + ) # if has_ref_group: # spectrum._set_params_from_reference_group( # self.info['referenceable_param_group_list_element'] @@ -183,26 +186,29 @@ def __next__(self): def __getitem__(self, identifier): """ - Access spectrum with native id 'identifier'. + Access spectrum or chromatogram with native id 'identifier'. Arguments: identifier (str or int): last number in the id tag of the spectrum - element + element or a chromatogram identifier like 'TIC' Returns: spectrum (Spectrum or Chromatogram): spectrum/chromatogram object with native id 'identifier' """ try: - if int(identifier) > self.get_spectrum_count(): + if isinstance(identifier, int) and identifier > self.get_spectrum_count(): raise Exception("Requested identifier is out of range") except: pass - spectrum = self.info["file_object"][identifier] - spectrum.obo_translator = self.OT - if isinstance(spectrum, spec.Spectrum): - spectrum.measured_precision = self.ms_precisions[spectrum.ms_level] - return spectrum + + element = self.info["file_object"][identifier] + element.obo_translator = self.OT + + if isinstance(element, spec.Spectrum): + element.measured_precision = self.ms_precisions[element.ms_level] + + return element def __enter__(self): return self @@ -295,6 +301,8 @@ def _obo_version_validator(version): 2017: "4.1.0", 2018: "4.1.10", 2019: "4.1.22", + 2024: "4.1.79", + 2025: "4.1.188", } version_fixed = None if obo_rgx.match(version): @@ -343,7 +351,7 @@ def _init_obo_translator(self): # parse obo, check MS tags and if they are ok in minimum.py (minimum # required) ... if self.info.get("obo_version", None) is None: - self.info["obo_version"] = "1.1.0" + self.info["obo_version"] = "4.1.79" obo_translator = obo.OboTranslator.from_cache(version=self.info["obo_version"]) return obo_translator @@ -403,6 +411,9 @@ def _init_iter(self): elif element.tag.endswith("}dataProcessingList"): self.info["data_processing_list"] = True self.info["data_processing_list_element"] = element + elif element.tag.endswith("}cvParam"): + if self.term_is_a_member(element.attrib.get("accession"), "MS:1000494"): + self.info["instrument_name"] = element.attrib.get("name") elif element.tag.endswith("}spectrumList"): spec_cnt = element.attrib.get("count") @@ -450,9 +461,96 @@ def get_chromatogram_count(self): """ return self.info["chromatogram_count"] + def get_spectrum(self, identifier): + """ + Access spectrum with the given identifier. + + Arguments: + identifier (str or int): Either a string identifier or an index (0-based) + to access spectra in order. + + Returns: + spectrum (Spectrum): spectrum object with the given identifier + + Note: + This method provides the same functionality as using the indexing syntax + (e.g., run[0]), but with a more explicit method name. + """ + return self[identifier] + + def get_chromatogram(self, identifier): + """ + Access chromatogram with the given identifier. + + Arguments: + identifier (str or int): Either a string identifier like 'TIC' or + an index (0-based) to access chromatograms in order. + + Returns: + chromatogram (Chromatogram): chromatogram object with the given identifier + + Note: + This method is only useful when skip_chromatogram is set to False + if you want to access chromatograms by index. If skip_chromatogram is True, + you can still access chromatograms by string identifiers (e.g., 'TIC'). + """ + if isinstance(identifier, str): + return self[identifier] + + if isinstance(identifier, int): + if self.get_chromatogram_count() is None: + raise Exception("No chromatograms found in the file") + + if identifier >= self.get_chromatogram_count(): + raise Exception( + f"Chromatogram index {identifier} is out of range (0-{self.get_chromatogram_count()-1})" + ) + + # Reset the file pointer and iterate to find the chromatogram + temp_skip_chromatogram = self.skip_chromatogram + self.skip_chromatogram = False + + self.info["file_object"].close() + self.info["file_object"] = self._open_file( + self.path_or_file, build_index_from_scratch=False + ) + self.iter = self._init_iter() + + chrom_count = 0 + try: + for element in self: + if isinstance(element, chromatogram.Chromatogram): + if chrom_count == identifier: + return element + chrom_count += 1 + finally: + # Restore original skip_chromatogram setting + self.skip_chromatogram = temp_skip_chromatogram + + raise Exception(f"Chromatogram with index {identifier} not found") + + raise ValueError("Identifier must be a string or an integer") + def close(self): self.info["file_object"].close() + def term_is_a_member(self, tested_term, member_of_term): + """ + Use translated obo file to check if given term is_a member of the + + Returns: + is_member (bool) whether given term is a member of member_of_term + + """ + is_member = False + try: + term_in = self.OT[tested_term] + if term_in: + is_member = self.OT.id[tested_term]["is_a"].startswith(member_of_term) + except KeyError: + print(f"term not found ({tested_term})") + return is_member + if __name__ == "__main__": print(__doc__) diff --git a/pymzml/spec.py b/pymzml/spec.py index db9bc7c3..304b7570 100755 --- a/pymzml/spec.py +++ b/pymzml/spec.py @@ -69,10 +69,15 @@ PROTON = 1.00727646677 ISOTOPE_AVERAGE_DIFFERENCE = 1.002 +# Import Chromatogram from chromatogram.py for backward compatibility +# Import MsData from msdata.py +from .msdata import MsData -class MS_Spectrum(object): + +class MS_Spectrum(MsData): """ General spectrum class for data handling. + This class is kept for backward compatibility. """ def _read_accessions(self): @@ -392,7 +397,7 @@ def to_string(self, encoding="latin-1", method="xml"): return ElementTree.tostring(self.element, encoding=encoding, method=method) -class Spectrum(MS_Spectrum): +class Spectrum(MsData): """ Spectrum class which inherits from class :py:attr:`pymzml.spec.MS_Spectrum` @@ -904,8 +909,9 @@ def scan_time(self): scan_time_ele = self.element.find( ".//*[@accession='MS:1000016']".format(ns=self.ns) ) - self._scan_time = float(scan_time_ele.attrib.get("value")) - self._scan_time_unit = scan_time_ele.get("unitName", "unicorns") + if scan_time_ele is not None: + self._scan_time = float(scan_time_ele.attrib.get("value")) + self._scan_time_unit = scan_time_ele.get("unitName", "unicorns") return self._scan_time, self._scan_time_unit # @property @@ -1732,7 +1738,7 @@ def centroidedPeaks(self): return self.peaks("centroided") -class Chromatogram(MS_Spectrum): +class Chromatogram(MsData): """ Class for Chromatogram access and handling. """ diff --git a/pymzml/utils/GSGW.py b/pymzml/utils/GSGW.py index 8eebc299..bb8ce765 100755 --- a/pymzml/utils/GSGW.py +++ b/pymzml/utils/GSGW.py @@ -256,7 +256,7 @@ def _write_identifier(self, identifier): Arguments: identifier (str or int): identifier to write into index """ - id_format = "{0:\xAC>" + str(self.max_idx_len) + "}" + id_format = "{0:\xac>" + str(self.max_idx_len) + "}" identifier = str(identifier) identifier = id_format.format(identifier).encode("latin-1") self.file_out.write(identifier) @@ -270,7 +270,7 @@ def _write_offset(self, offset): offset (int): offset which will be formatted and written into file index """ - offset_format = "{0:\xAC>" + str(self.max_offset_len) + "}" + offset_format = "{0:\xac>" + str(self.max_offset_len) + "}" offset = str(offset) offset = offset_format.format(offset).encode("latin-1") self.file_out.write(offset) diff --git a/pymzml/utils/SQListeConnector.py b/pymzml/utils/SQListeConnector.py index 3fdf1582..fbc5bbe9 100755 --- a/pymzml/utils/SQListeConnector.py +++ b/pymzml/utils/SQListeConnector.py @@ -1,6 +1,7 @@ import sqlite3 import xml.etree.ElementTree as et from pymzml import spec +from pymzml import chromatogram from pymzml.run import Reader @@ -46,7 +47,7 @@ def __getitem__(self, key): if "spectrum" in element.tag: spectrum = spec.Spectrum(element) elif "chromatogram" in element.tag: - spectrum = spec.Chromatogram(element) + spectrum = chromatogram.Chromatogram(element) return spectrum def get_spectrum_count(self): diff --git a/pymzml/version.txt b/pymzml/version.txt deleted file mode 100644 index da6b0a8f..00000000 --- a/pymzml/version.txt +++ /dev/null @@ -1 +0,0 @@ -2.5.6 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..5df4564c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,83 @@ +[build-system] +requires = ["hatchling", "versioningit"] +build-backend = "hatchling.build" + +[project] +name = "pymzml" +description = "high-throughput mzML parsing" +authors = [ + { name = 'M. Koesters' }, + { name = 'J. Leufken' }, + { name = 'S. Schulze' }, + { name = 'K. Sugimoto' }, + { name = 'R. Zahedi' }, + { name = 'M. Hippler' }, + { name = 'C. Fufezan', email = 'christian@fufezan.net' } +] +urls = { Homepage = 'https://pymzml.github.com' } +requires-python = ">=3.7.0" +readme = "README.rst" +license = { text = 'The MIT license' } +classifiers = [ + 'Development Status :: 4 - Beta', + 'Environment :: Console', + 'Intended Audience :: Education', + 'Intended Audience :: Science/Research', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: Microsoft :: Windows', + 'Operating System :: POSIX', + 'Operating System :: POSIX :: SunOS/Solaris', + 'Operating System :: Unix', + 'Programming Language :: Python :: 3.5', + 'Topic :: Scientific/Engineering :: Bio-Informatics', + 'Topic :: Scientific/Engineering :: Chemistry', + 'Topic :: Scientific/Engineering :: Medical Science Apps.', +] +dependencies = [ + 'numpy >= 1.8.0', + 'regex', +] +dynamic = ["version"] + +[project.optional-dependencies] +full = [ + 'plotly<5.0', + 'matplotlib', + 'pynumpress>=0.0.4', + 'ms_deisotope==0.0.14', +] +plot = ['plotly<5.0', 'matplotlib',] +pynumpress = ['pynumpress>=0.0.4',] +deconvolution = ['ms_deisotope==0.0.14',] +test = [ + 'cython', + 'coverage >= 4.2', + 'codecov', + 'pytest', + 'tox', + 'sphinx', + 'sphinx_rtd_theme', + 'numpydoc==1.9.0', +] + +[tool.hatch.version] +source = "versioningit" + +[tool.versioningit] +default-version = "0.0.0+unknown" + +[tool.versioningit.next-version] +method = "smallest" + +[tool.versioningit.format] +distance = "{next_version}.dev{distance}" +dirty = "{next_version}.dev{distance}" +distance-dirty = "{next_version}.dev{distance}" + +[tool.hatch.build] +editable = true + +[tool.hatch.metadata] +dynamic = ["version"] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index d66892b0..00000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -# Installation -numpy -plotly==4.12.0 -pynumpress==0.0.5 -regex -ms_deisotope==0.0.14 \ No newline at end of file diff --git a/requirements_dev.txt b/requirements_dev.txt deleted file mode 100644 index 4f3a6eba..00000000 --- a/requirements_dev.txt +++ /dev/null @@ -1,10 +0,0 @@ -# install requirements --r requirements.txt - -# Testing and Development -cython -coverage >= 4.2 -codecov -pytest -tox -regex diff --git a/setup.py b/setup.py deleted file mode 100755 index dcb1a922..00000000 --- a/setup.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python3 -from setuptools import setup -import os - -version_path = os.path.join( - os.path.dirname(__file__), - 'pymzml', - 'version.txt' -) -with open(version_path, 'r') as version_file: - pymzml_version = version_file.read().strip() - -setup( - name = 'pymzml', - version = pymzml_version, - packages = ['pymzml', 'pymzml.file_classes', 'pymzml.utils'], - package_dir = {'pymzml': 'pymzml'}, - package_data = { - 'pymzml': [ - 'version.txt', - 'obo/*.obo.gz' - ] - }, - python_requires = '>=3.7.0', - install_requires = [ - 'numpy >= 1.8.0', - 'regex', - ], - extras_require = { - 'full': [ - 'plotly<5.0', - 'pynumpress>=0.0.4', - 'ms_deisotope', - ], - 'plot': ['plotly<5.0'], - 'pynumpress': ['pynumpress>=0.0.4'], - 'deconvolution': ['ms_deisotope==0.0.14'] - }, - description = 'high-throughput mzML parsing', - long_description = 'pymzML - python module for mzML parsing', - author = 'M. Koesters, J. Leufken, S. Schulze, K. Sugimoto, R. Zahedi, M. Hippler and C. Fufezan', - author_email = 'christian@fufezan.net', - url = 'http://pymzml.github.com', - license = 'The MIT license', - platforms = 'any that supports python 3.7', - classifiers = [ - 'Development Status :: 4 - Beta', - 'Environment :: Console', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: MIT License', - 'Operating System :: MacOS :: MacOS X', - 'Operating System :: Microsoft :: Windows', - 'Operating System :: POSIX', - 'Operating System :: POSIX :: SunOS/Solaris', - 'Operating System :: Unix', - 'Programming Language :: Python :: 3.5', - 'Topic :: Scientific/Engineering :: Bio-Informatics', - 'Topic :: Scientific/Engineering :: Chemistry', - 'Topic :: Scientific/Engineering :: Medical Science Apps.', - ] -) diff --git a/tests/access_spectra_and_chromatograms_test.py b/tests/access_spectra_and_chromatograms_test.py new file mode 100644 index 00000000..b5bddf18 --- /dev/null +++ b/tests/access_spectra_and_chromatograms_test.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Test cases for the new functionality in pymzml.run.Reader +related to accessing spectra and chromatograms. +""" +import os +import sys + +# Add parent directory to Python path +sys.path.append(os.path.abspath(os.path.dirname(os.path.dirname(__file__)))) + +import unittest +import pymzml.run as run +from pymzml.spec import Spectrum +import test_file_paths + + +class AccessSpectraAndChromatogramsTest(unittest.TestCase): + """ + Test cases for the new functionality in pymzml.run.Reader + related to accessing spectra and chromatograms. + """ + + def setUp(self): + """Set up test cases.""" + self.paths = test_file_paths.paths + + # Use a file with chromatograms for testing + # mini.chrom.mzML is at index 3 + for i, path in enumerate(self.paths): + if "mini.chrom.mzML" in path and not path.endswith(".gz") and not path.endswith(".idx.gz"): + self.chrom_file = path + break + else: + # Fallback to a known index if the file name is not found + self.chrom_file = self.paths[3] # mini.chrom.mzML + + # Initialize readers with different settings + self.reader_with_chromatograms = run.Reader( + self.chrom_file, + skip_chromatogram=False + ) + + self.reader_skip_chromatograms = run.Reader( + self.chrom_file, + skip_chromatogram=True + ) + + def test_get_spectrum_method(self): + """Test the get_spectrum method.""" + # Check if the file has spectra + spec_count = self.reader_with_chromatograms.get_spectrum_count() + if spec_count is None or spec_count == 0: + self.skipTest("Test file does not contain spectra") + + # Test that get_spectrum(0) returns the same as reader[0] + try: + spectrum_by_index = self.reader_with_chromatograms[0] + spectrum_by_method = self.reader_with_chromatograms.get_spectrum(0) + + self.assertIsInstance(spectrum_by_index, Spectrum) + self.assertIsInstance(spectrum_by_method, Spectrum) + self.assertEqual(spectrum_by_index.ID, spectrum_by_method.ID) + + # Test accessing a spectrum by ID + spectrum_id = spectrum_by_index.ID + if isinstance(spectrum_id, str): + spectrum_by_id = self.reader_with_chromatograms[spectrum_id] + self.assertEqual(spectrum_by_index.ID, spectrum_by_id.ID) + except IndexError: + self.skipTest("Could not access spectrum at index 0") + + def test_get_chromatogram_method(self): + """Test the get_chromatogram method.""" + # Check if the file has chromatograms + chrom_count = self.reader_with_chromatograms.get_chromatogram_count() + if chrom_count is None or chrom_count == 0: + self.skipTest("Test file does not contain chromatograms") + + # Test accessing chromatogram by index + try: + chrom_by_index = self.reader_with_chromatograms.get_chromatogram(0) + self.assertTrue(hasattr(chrom_by_index, 'time') and hasattr(chrom_by_index, 'i')) + + # If we successfully got a chromatogram by index, try to get it by ID + chrom_id = chrom_by_index.ID + if chrom_id: + chrom_by_id = self.reader_with_chromatograms[chrom_id] + self.assertTrue(hasattr(chrom_by_id, 'time') and hasattr(chrom_by_id, 'i')) + self.assertEqual(chrom_by_id.ID, chrom_id) + except Exception as e: + self.skipTest(f"Could not access chromatogram at index 0: {e}") + + # Test that the chromatogram count is correct + self.assertIsNotNone(self.reader_with_chromatograms.get_chromatogram_count()) + + def test_skip_chromatogram_parameter(self): + """Test the skip_chromatogram parameter.""" + # Check if the file has both spectra and chromatograms + spec_count = self.reader_with_chromatograms.get_spectrum_count() + chrom_count = self.reader_with_chromatograms.get_chromatogram_count() + + if spec_count is None or spec_count == 0: + self.skipTest("Test file does not contain spectra") + if chrom_count is None or chrom_count == 0: + self.skipTest("Test file does not contain chromatograms") + + # With skip_chromatogram=False, we should see both spectra and chromatograms + # Reset the reader to ensure we start from the beginning + self.reader_with_chromatograms.close() + self.reader_with_chromatograms = run.Reader( + self.chrom_file, + skip_chromatogram=False + ) + + # Collect items + spectra_found = False + chromatograms_found = False + count = 0 + max_items = 20 # Increase the limit to ensure we see both types + + for item in self.reader_with_chromatograms: + if isinstance(item, Spectrum): + spectra_found = True + elif hasattr(item, 'time') and hasattr(item, 'i'): + chromatograms_found = True + + if spectra_found and chromatograms_found: + break + + count += 1 + if count >= max_items: + break + + # Check that we found both types + self.assertTrue(spectra_found, "No spectra found when iterating with skip_chromatogram=False") + self.assertTrue(chromatograms_found, "No chromatograms found when iterating with skip_chromatogram=False") + + # With skip_chromatogram=True, we should only see spectra + # Reset the reader to ensure we start from the beginning + self.reader_skip_chromatograms.close() + self.reader_skip_chromatograms = run.Reader( + self.chrom_file, + skip_chromatogram=True + ) + + # Collect items + items_without_chromatograms = [] + for item in self.reader_skip_chromatograms: + items_without_chromatograms.append(item) + if len(items_without_chromatograms) >= 10: # Limit to first 10 items + break + + # Check that we only have spectra (if any items were found) + if items_without_chromatograms: + only_spectra = all(isinstance(item, Spectrum) for item in items_without_chromatograms) + self.assertTrue(only_spectra, "Found non-spectrum items when iterating with skip_chromatogram=True") + + def test_chromatogram_index_out_of_range(self): + """Test that accessing a chromatogram with an out-of-range index raises an exception.""" + # Check if the file has chromatograms + chrom_count = self.reader_with_chromatograms.get_chromatogram_count() + if chrom_count is None or chrom_count == 0: + self.skipTest("Test file does not contain chromatograms") + + with self.assertRaises(Exception): + self.reader_with_chromatograms.get_chromatogram(100) # Assuming there are fewer than 100 chromatograms + + def test_chromatogram_invalid_identifier(self): + """Test that accessing a chromatogram with an invalid identifier raises an exception.""" + # Check if the file has chromatograms + chrom_count = self.reader_with_chromatograms.get_chromatogram_count() + if chrom_count is None or chrom_count == 0: + self.skipTest("Test file does not contain chromatograms") + + with self.assertRaises(Exception): + self.reader_with_chromatograms.get_chromatogram("NonExistentChromatogram") + + def tearDown(self): + """Clean up after tests.""" + self.reader_with_chromatograms.close() + self.reader_skip_chromatograms.close() + + +if __name__ == "__main__": + unittest.main(verbosity=3) diff --git a/tests/chromatogram_properties_test.py b/tests/chromatogram_properties_test.py new file mode 100644 index 00000000..87936bfb --- /dev/null +++ b/tests/chromatogram_properties_test.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Test cases for the new properties in the Chromatogram class. +""" +import os +import sys + +# Add parent directory to Python path +sys.path.append(os.path.abspath(os.path.dirname(os.path.dirname(__file__)))) + +import unittest +import pymzml.run as run +from pymzml.chromatogram import Chromatogram +import test_file_paths + + +class ChromatogramPropertiesTest(unittest.TestCase): + """ + Test cases for the new properties in the Chromatogram class. + """ + + def setUp(self): + """Set up test cases.""" + self.paths = test_file_paths.paths + + # Use a file with chromatograms for testing + # mini.chrom.mzML is at index 3 + for i, path in enumerate(self.paths): + if "mini.chrom.mzML" in path and not path.endswith(".gz") and not path.endswith(".idx.gz"): + self.chrom_file = path + break + else: + # Fallback to a known index if the file name is not found + self.chrom_file = self.paths[3] # mini.chrom.mzML + + # Initialize reader with chromatograms + self.reader = run.Reader( + self.chrom_file, + skip_chromatogram=False + ) + + def test_chromatogram_type(self): + """Test the chromatogram_type property.""" + # Get the first chromatogram + chromatogram = self.reader.get_chromatogram(0) + + # Test that chromatogram_type is accessible + chromatogram_type = chromatogram.chromatogram_type + + # The type might be None depending on the test file, but the property should be accessible + self.assertIsNotNone(chromatogram, "Chromatogram should not be None") + + # Print the chromatogram type for debugging + print(f"Chromatogram type: {chromatogram_type}") + + def test_polarity(self): + """Test the polarity property.""" + # Get the first chromatogram + chromatogram = self.reader.get_chromatogram(0) + + # Test that polarity is accessible + polarity = chromatogram.polarity + + # The polarity might be None depending on the test file, but the property should be accessible + self.assertIsNotNone(chromatogram, "Chromatogram should not be None") + + # Print the polarity for debugging + print(f"Polarity: {polarity}") + + def test_precursor_mz(self): + """Test the precursor_mz property.""" + # Get the first chromatogram + chromatogram = self.reader.get_chromatogram(0) + + # Test that precursor_mz is accessible + precursor_mz = chromatogram.precursor_mz + + # The precursor_mz might be None depending on the test file, but the property should be accessible + self.assertIsNotNone(chromatogram, "Chromatogram should not be None") + + # Print the precursor_mz for debugging + print(f"Precursor m/z: {precursor_mz}") + + def test_product_mz(self): + """Test the product_mz property.""" + # Get the first chromatogram + chromatogram = self.reader.get_chromatogram(0) + + # Test that product_mz is accessible + product_mz = chromatogram.product_mz + + # The product_mz might be None depending on the test file, but the property should be accessible + self.assertIsNotNone(chromatogram, "Chromatogram should not be None") + + # Print the product_mz for debugging + print(f"Product m/z: {product_mz}") + + def test_get_chromatogram_properties(self): + """Test the get_chromatogram_properties method.""" + # Get the first chromatogram + chromatogram = self.reader.get_chromatogram(0) + + # Test that get_chromatogram_properties returns a dictionary + properties = chromatogram.get_chromatogram_properties() + + self.assertIsInstance(properties, dict, "get_chromatogram_properties should return a dictionary") + + # Check that the dictionary contains the expected keys + expected_keys = ["id", "chromatogram_type", "polarity", "precursor_mz", "product_mz"] + for key in expected_keys: + self.assertIn(key, properties, f"Properties dictionary should contain key '{key}'") + + # Print the properties for debugging + print("Chromatogram properties:") + for key, value in properties.items(): + print(f" {key}: {value}") + + def test_all_chromatograms(self): + """Test all chromatograms in the file.""" + # Get the number of chromatograms + chrom_count = self.reader.get_chromatogram_count() + + if chrom_count is None or chrom_count == 0: + self.skipTest("Test file does not contain chromatograms") + + print(f"\nTesting {chrom_count} chromatograms:") + + # Test each chromatogram + for i in range(chrom_count): + chromatogram = self.reader.get_chromatogram(i) + + # Print information about the chromatogram + print(f"\nChromatogram {i}:") + print(f" ID: {chromatogram.ID}") + print(f" Type: {chromatogram.chromatogram_type}") + print(f" Polarity: {chromatogram.polarity}") + print(f" Precursor m/z: {chromatogram.precursor_mz}") + print(f" Product m/z: {chromatogram.product_mz}") + + # Verify that the chromatogram has time and intensity data + self.assertIsNotNone(chromatogram.time, "Chromatogram should have time data") + self.assertIsNotNone(chromatogram.i, "Chromatogram should have intensity data") + + # Verify that the peaks method returns data + peaks = chromatogram.peaks() + self.assertIsNotNone(peaks, "Chromatogram peaks should not be None") + + # Print the first few data points + print(" First 3 data points (time, intensity):") + for j, (time, intensity) in enumerate(peaks): + if j >= 3: + break + print(f" {time:.4f}, {intensity:.2f}") + + def tearDown(self): + """Clean up after tests.""" + self.reader.close() + + +if __name__ == "__main__": + unittest.main(verbosity=3) diff --git a/tests/file_io_indexed_gzip_test.py b/tests/file_io_indexed_gzip_test.py index d9ee11ce..0bfc4bc3 100755 --- a/tests/file_io_indexed_gzip_test.py +++ b/tests/file_io_indexed_gzip_test.py @@ -7,7 +7,8 @@ from pymzml.file_classes.indexedGzip import IndexedGzip import unittest import random -from pymzml.spec import Spectrum, Chromatogram +from pymzml.spec import Spectrum +from pymzml.chromatogram import Chromatogram import struct import re import test_file_paths diff --git a/tests/file_io_indexed_gzip_writer_test.py b/tests/file_io_indexed_gzip_writer_test.py index 84379b19..da954016 100755 --- a/tests/file_io_indexed_gzip_writer_test.py +++ b/tests/file_io_indexed_gzip_writer_test.py @@ -30,7 +30,18 @@ def setUp(self): def tearDown(self): """ """ self.Writer.close() - os.remove(os.path.abspath(os.path.join(".", "tests", "data", "unittest.mzml"))) + try: + os.remove( + os.path.abspath(os.path.join(".", "tests", "data", "unittest.mzml")) + ) + except FileNotFoundError: + pass + try: + os.remove( + os.path.abspath(os.path.join(".", "tests", "data", "unittest2.mzml")) + ) + except FileNotFoundError: + pass def test_init(self): self.assertEqual(self.Writer.crc32, 0) @@ -128,7 +139,15 @@ def test_add_data(self): "uncompressed", "CF_07062012_pH8_2_3A.mzML", ) - self.Writer = GSGW(test_file, max_idx=80, max_idx_len=8, max_offset_len=8) + self.Writer = GSGW( + test_file, + max_idx=80, + max_idx_len=8, + max_offset_len=8, + output_path=os.path.abspath( + os.path.join(".", "tests", "data", "unittest2.mzml") + ), + ) self.Writer.add_data(test_string, "a") self.Writer.close() file = open(self.paths[0], "rb") diff --git a/tests/file_io_standard_gzip_test.py b/tests/file_io_standard_gzip_test.py index c6d8e220..34d0ac24 100755 --- a/tests/file_io_standard_gzip_test.py +++ b/tests/file_io_standard_gzip_test.py @@ -7,7 +7,8 @@ from pymzml.file_classes.standardGzip import StandardGzip import unittest import random -from pymzml.spec import Spectrum, Chromatogram +from pymzml.spec import Spectrum +from pymzml.chromatogram import Chromatogram import re import struct import test_file_paths diff --git a/tests/file_io_standard_mzml_test.py b/tests/file_io_standard_mzml_test.py index dcf56324..570c7c8e 100755 --- a/tests/file_io_standard_mzml_test.py +++ b/tests/file_io_standard_mzml_test.py @@ -6,7 +6,8 @@ import os from pymzml.file_classes.standardMzml import StandardMzml import unittest -from pymzml.spec import Spectrum, Chromatogram +from pymzml.spec import Spectrum +from pymzml.chromatogram import Chromatogram import test_file_paths diff --git a/tests/main_ms_numpress_ext_test.py b/tests/main_ms_numpress_ext_test.py index 1ad3a03c..a5c830ae 100755 --- a/tests/main_ms_numpress_ext_test.py +++ b/tests/main_ms_numpress_ext_test.py @@ -12,7 +12,6 @@ @unittest.skipIf(np is None, "Numpy is required for this test.") class test_MSNumpress(unittest.TestCase): - """ unittest for MSNumpress en- and decoding """ diff --git a/tests/main_ms_numpress_test.py b/tests/main_ms_numpress_test.py index 7be5a7a7..336d7253 100755 --- a/tests/main_ms_numpress_test.py +++ b/tests/main_ms_numpress_test.py @@ -12,7 +12,6 @@ @unittest.skipIf(np is None, "Numpy is required for this test.") class test_MSNumpress(unittest.TestCase): - """ unittest for MSNumpress en- and decoding """ diff --git a/tests/main_reader_test.py b/tests/main_reader_test.py index e551daca..d511b5df 100755 --- a/tests/main_reader_test.py +++ b/tests/main_reader_test.py @@ -30,7 +30,7 @@ def setUp(self): self.reader_uncompressed_unindexed = run.Reader(file_uncompressed_unindexed) self.reader_bad_obo_version = run.Reader(file_bad_obo_version) self.reader_set_obo_version = run.Reader( - file_bad_obo_version, obo_version="3.25.0" + file_bad_obo_version, obo_version="4.1.79" ) self.reader_set_year_obo_version = run.Reader( file_uncompressed_indexed, obo_version="23:06:2017" @@ -76,7 +76,7 @@ def test_init_iter(self): run_id = self.reader_uncompressed_unindexed.info["run_id"] start_time = self.reader_uncompressed_unindexed.info["start_time"] self.assertEqual(mzml_version, "1.1.0") - self.assertEqual(obo_version, "3.25.0") + self.assertEqual(obo_version, "4.1.79") self.assertIsInstance(spec_count, int) self.assertEqual(run_id, "exp105-01-ds5562-Pos") self.assertEqual(start_time, "2013-09-10T10:31:08Z") @@ -87,7 +87,7 @@ def test_init_iter(self): run_id = self.reader_uncompressed_unindexed.info["run_id"] start_time = self.reader_uncompressed_unindexed.info["start_time"] self.assertEqual(mzml_version, "1.1.0") - self.assertEqual(obo_version, "3.25.0") + self.assertEqual(obo_version, "4.1.79") self.assertIsInstance(spec_count, int) self.assertEqual(run_id, "exp105-01-ds5562-Pos") self.assertEqual(start_time, "2013-09-10T10:31:08Z") @@ -98,7 +98,7 @@ def test_init_iter(self): run_id = self.reader_uncompressed_unindexed.info["run_id"] start_time = self.reader_uncompressed_unindexed.info["start_time"] self.assertEqual(mzml_version, "1.1.0") - self.assertEqual(obo_version, "3.25.0") + self.assertEqual(obo_version, "4.1.79") self.assertIsInstance(spec_count, int) self.assertEqual(run_id, "exp105-01-ds5562-Pos") self.assertEqual(start_time, "2013-09-10T10:31:08Z") @@ -110,7 +110,7 @@ def test_init_iter(self): start_time = self.reader_uncompressed_unindexed.info["start_time"] self.assertEqual(mzml_version, "1.1.0") - self.assertEqual(obo_version, "3.25.0") + self.assertEqual(obo_version, "4.1.79") self.assertIsInstance(spec_count, int) self.assertEqual(run_id, "exp105-01-ds5562-Pos") self.assertEqual(start_time, "2013-09-10T10:31:08Z") @@ -122,8 +122,8 @@ def test_init_iter(self): start_time = self.reader_bad_obo_version.info["start_time"] self.assertEqual(mzml_version, "1.1.0") - # run._obo_version_validator 2017 default obo = 4.1.0 - self.assertEqual(obo_version, "4.1.0") + # run._obo_version_validator 2017 default obo = 4.1.79 + self.assertEqual(obo_version, "4.1.79") self.assertIsInstance(spec_count, int) self.assertEqual(run_id, "exp105-01-ds5562-Pos") self.assertEqual(start_time, "2013-09-10T10:31:08Z") @@ -135,7 +135,7 @@ def test_init_iter(self): start_time = self.reader_set_obo_version.info["start_time"] self.assertEqual(mzml_version, "1.1.0") - self.assertEqual(obo_version, "3.25.0") + self.assertEqual(obo_version, "4.1.79") self.assertIsInstance(spec_count, int) self.assertEqual(run_id, "exp105-01-ds5562-Pos") self.assertEqual(start_time, "2013-09-10T10:31:08Z") @@ -147,7 +147,7 @@ def test_init_iter(self): start_time = self.reader_set_year_obo_version.info["start_time"] self.assertEqual(mzml_version, "1.1.0") - # run._obo_version_validator 2017 default obo = 4.1.0 + # run._obo_version_validator 2017 default obo = 4.1.79 self.assertEqual(obo_version, "4.1.0") self.assertIsInstance(spec_count, int) self.assertEqual(run_id, "exp105-01-ds5562-Pos") @@ -174,7 +174,7 @@ def test_init_iter(self): self.assertEqual(mzml_version, "1.1.0") # run._obo_version_validator set invalid obo = 1.1.0 - self.assertEqual(obo_version, "1.1.0") + self.assertEqual(obo_version, "4.1.79") self.assertIsInstance(spec_count, int) self.assertEqual(run_id, "exp105-01-ds5562-Pos") self.assertEqual(start_time, "2013-09-10T10:31:08Z") diff --git a/tests/main_spec_test.py b/tests/main_spec_test.py index 4042460e..edd43274 100755 --- a/tests/main_spec_test.py +++ b/tests/main_spec_test.py @@ -3,7 +3,8 @@ sys.path.append(os.path.abspath(".")) import pymzml.run as run -from pymzml.spec import Spectrum, Chromatogram +from pymzml.spec import Spectrum +from pymzml.chromatogram import Chromatogram import random import statistics as stat import unittest diff --git a/tests/obo_test.py b/tests/obo_test.py index 390c1902..daefd9fb 100755 --- a/tests/obo_test.py +++ b/tests/obo_test.py @@ -51,7 +51,7 @@ def test_version(self): for fn in self.obo_files: v = self._get_file_version(os.path.join(self.obodir, fn)) _v = fn[7:-7].strip() - yield self._check_version, v, _v + self._check_version(v, _v) if __name__ == "__main__": diff --git a/tox.ini b/tox.ini index 58e20857..d91294c5 100644 --- a/tox.ini +++ b/tox.ini @@ -1,14 +1,12 @@ [tox] -envlist = py37,py38,py39,coverage,example_scripts,docu +envlist = py37,py38,py39,py312,py313,coverage,example_scripts,docu [testenv] deps = - numpy - pytest + .[full] + .[test] whitelist_externals = pytest -commands = - pip install -r {toxinidir}/requirements_dev.txt - pytest {posargs} +commands = pytest {posargs} [testenv:docu] whitelist_externals = @@ -16,7 +14,7 @@ whitelist_externals = /bin/bash /usr/bin/bash deps = - -Ur{toxinidir}/docs/requirements.txt + -e .[test] changedir = {toxinidir}/docs commands = sphinx-build source build @@ -33,10 +31,9 @@ commands = passenv = CI TRAVIS TRAVIS_* deps = - numpy + -e .[full] + -e .[test] commands = - pip install -e . - pip install -q -r {toxinidir}/requirements_dev.txt coverage erase coverage run {envbindir}/pytest coverage report --omit=".tox/*","tests/*" @@ -44,10 +41,11 @@ commands = [testenv:example_scripts] deps = - numpy + -e .[full] + -e .[test] commands = - pip install -Ur{toxinidir}/requirements.txt python example_scripts/access_run_info.py + python example_scripts/access_spectra_and_chromatograms.py python example_scripts/compare_spectra.py python example_scripts/extract_ion_chromatogram.py python example_scripts/extreme_values.py