diff --git a/docs/usage.rst b/docs/usage.rst index ee91249..1b4f8b0 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -13,9 +13,22 @@ To create a CITATION.cff file of a release, you must supply the doi that is asso The generated file must be checked for correctness and you are encouraged to enrich it further. +The command above works well for Zenodo doi's, which contain especially rich metadata. +It is also possible to produce a CITATION.cff file from any available information. This feature is experimental: + +.. code-block:: bash + + doi2cff init --experimental + + # For example + doi2cff init 10.1051/0004-6361/202037850 --experimental + + Whenever a new release is made of the software the CITATION.cff must be updated with new doi/version/release date. -This process can be automated by running +This process can be automated by running (only for Zenodo dois) .. code-block:: bash doi2cff update + + diff --git a/doi2cff/cli.py b/doi2cff/cli.py index cd4ac34..c27b453 100644 --- a/doi2cff/cli.py +++ b/doi2cff/cli.py @@ -4,6 +4,7 @@ import sys import re from datetime import datetime +from typing import Any, Tuple import click from nameparser import HumanName @@ -20,6 +21,7 @@ def main(): Current supported DOI types: * Zenodo upload of a GitHub release (https://guides.github.com/activities/citable-code/) + * Any DOIs with suitable metadata (experimental). """ pass @@ -29,6 +31,65 @@ def is_software_zenodo(zenodo_record): return zenodo_record['metadata']['resource_type']['type'] == 'software' +def zenodo_record_to_cff_yaml(zenodo_record: dict, template) -> Tuple[ruamel.yaml.YAML, Any]: + yaml = ruamel.yaml.YAML() + data = yaml.load(template) + data['title'] = zenodo_record['metadata']['title'] + data['doi'] = zenodo_record['doi'] + tagurl = tagurl_of_zenodo(zenodo_record) + if 'version' in zenodo_record['metadata']: + data['version'] = re.sub('^(v)', '', zenodo_record['metadata']['version']) + else: + data['version'] = tagurl2version(tagurl) + data['license'] = zenodo_record['metadata']['license']['id'] + data['date-released'] = datetime.strptime(zenodo_record['metadata']['publication_date'], "%Y-%m-%d").date() + data['repository-code'] = tagurl2repo(tagurl) + data['authors'] = authors_of_zenodo(zenodo_record) + references = references_of_zenodo(zenodo_record) + fixme = 'FIXME generic is too generic, ' \ + 'see https://citation-file-format.github.io/1.2.0/specifications/#/reference-types for more specific types' + if references: + data['references'] = yaml.seq(references) + for idx, r in enumerate(references): + if r['type'] == 'generic': + data['references'].yaml_add_eol_comment(fixme, idx) + + return yaml, data + + +def csljson_to_cff_yaml(cffjson: dict, template) -> Tuple[ruamel.yaml.YAML, Any]: + yaml = ruamel.yaml.YAML() + data = yaml.load(template) + + data['title'] = cffjson['title'] + if '\n' in data['title']: + data.yaml_add_eol_comment("FIXME: title contains new line: this is strange", "title") + + data['doi'] = cffjson['DOI'] + + if 'license' in cffjson: + data['license'] = cffjson['license'] + + data['date-released'] = datetime(*cffjson['published']['date-parts'][0], 1).date() + data['authors'] = authors_of_csl(cffjson) + + references = cffjson.get('reference', None) + fixme = 'FIXME generic is too generic, ' \ + 'see https://citation-file-format.github.io/1.2.0/specifications/#/reference-types for more specific types' + if references: + data['references'] = yaml.seq(references) + for idx, r in enumerate(references): + if r.get('type', 'generic') == 'generic': + data['references'].yaml_add_eol_comment(fixme, idx) + + # In CFF 1.2.0 these fields are optional + # https://github.com/citation-file-format/citation-file-format/releases/tag/1.2.0 + del data['version'] + del data['repository-code'] + + return yaml, data + + @main.command() @click.argument('doi') @click.option('--cff_fn', @@ -36,14 +97,19 @@ def is_software_zenodo(zenodo_record): default='CITATION.cff', help='Name of citation formatted output file', show_default=True) -def init(doi, cff_fn): +@click.option('--experimental/--no-experimental', + is_flag=True, + default=False, + help='experimental parsing of non-zenodo links', + show_default=True) +def init(doi, cff_fn, experimental): """Generate CITATION.cff file based on a Zenodo DOI of a Github release. * DOI, The Digital Object Identifier (DOI) name of a Zenodo upload of a GitHub release """ template = '''# YAML 1.2 # Metadata for citation of this software according to the CFF format (https://citation-file-format.github.io/) -cff-version: 1.0.3 +cff-version: 1.2.0 message: If you use this software, please cite it using these metadata. # FIXME title as repository name might not be the best name, please make human readable title: x @@ -56,35 +122,24 @@ def init(doi, cff_fn): license: x ''' - if not doi_is_from_zenodo(doi): - raise click.UsageError('Unable to process DOI name, only accept DOI name which is a Zenodo upload') + if doi_is_from_zenodo(doi): + zenodo_record = fetch_zenodo_by_doiurl(doi) - zenodo_record = fetch_zenodo_by_doiurl(doi) + if not is_software_zenodo(zenodo_record): + raise click.UsageError('Unable to process DOI name, only accept DOI name ' + 'which is a Zenodo upload of type software') - if not is_software_zenodo(zenodo_record): - raise click.UsageError('Unable to process DOI name, only accept DOI name which is a Zenodo upload of type software') - - yaml = ruamel.yaml.YAML() - data = yaml.load(template) - data['title'] = zenodo_record['metadata']['title'] - data['doi'] = zenodo_record['doi'] - tagurl = tagurl_of_zenodo(zenodo_record) - if 'version' in zenodo_record['metadata']: - data['version'] = re.sub('^(v)', '', zenodo_record['metadata']['version']) + yaml, data = zenodo_record_to_cff_yaml(zenodo_record, template) else: - data['version'] = tagurl2version(tagurl) - data['license'] = zenodo_record['metadata']['license']['id'] - data['date-released'] = datetime.strptime(zenodo_record['metadata']['publication_date'], "%Y-%m-%d").date() - data['repository-code'] = tagurl2repo(tagurl) - data['authors'] = authors_of_zenodo(zenodo_record) - references = references_of_zenodo(zenodo_record) - fixme = 'FIXME generic is too generic, ' \ - 'see https://citation-file-format.github.io/1.0.3/specifications/#/reference-types for more specific types' - if references: - data['references'] = yaml.seq(references) - for idx, r in enumerate(references): - if r['type'] == 'generic': - data['references'].yaml_add_eol_comment(fixme, idx) + if experimental: + click.echo("Trying experimental parsing of arbitrary DOI") + csljson = fetch_csljson(doi) + yaml, data = csljson_to_cff_yaml(csljson, template) + else: + raise click.UsageError('Unable to process DOI name, normally we only accept DOI name ' + 'which is a Zenodo upload' + 'You can try experimental parsing of other DOIs ' + '(see --experimental option).') yaml.dump(data, cff_fn) @@ -103,6 +158,12 @@ def update(doi, cff_fn): * DOI, The Digital Object Identifier (DOI) name of a Zenodo upload of a GitHub release """ + + if not doi_is_from_zenodo(doi): + raise click.UsageError('CITATION.cff update is only possible with Zenodo DOI. ' + 'For non-Zenodo DOIs, please consider recreating the citation, enabling experimental features: ' + '`doi2cff init --experimental`') + update_version(doi, cff_fn) diff --git a/tests/conftest.py b/tests/conftest.py index 34f0d1b..7666736 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -250,7 +250,7 @@ def zenodo_1194353(): def cff_1200251(): return '''# YAML 1.2 # Metadata for citation of this software according to the CFF format (https://citation-file-format.github.io/) -cff-version: 1.0.3 +cff-version: 1.2.0 message: If you use this software, please cite it using these metadata. # FIXME title as repository name might not be the best name, please make human readable title: Xenon @@ -304,7 +304,7 @@ def cff_1200251(): def cff_1194353(): return '''# YAML 1.2 # Metadata for citation of this software according to the CFF format (https://citation-file-format.github.io/) -cff-version: 1.0.3 +cff-version: 1.2.0 message: If you use this software, please cite it using these metadata. # FIXME title as repository name might not be the best name, please make human readable title: 'NLeSC/Xenon: This is release 2.6.0 of Xenon.' @@ -357,7 +357,7 @@ def cff_1194353(): def cff_1194353_updated_1200251(): return '''# YAML 1.2 # Metadata for citation of this software according to the CFF format (https://citation-file-format.github.io/) -cff-version: 1.0.3 +cff-version: 1.2.0 message: If you use this software, please cite it using these metadata. # FIXME title as repository name might not be the best name, please make human readable title: 'NLeSC/Xenon: This is release 2.6.0 of Xenon.' @@ -496,7 +496,7 @@ def zenodo_1197761(): def cff_1197761(): return '''# YAML 1.2 # Metadata for citation of this software according to the CFF format (https://citation-file-format.github.io/) -cff-version: 1.0.3 +cff-version: 1.2.0 message: If you use this software, please cite it using these metadata. # FIXME title as repository name might not be the best name, please make human readable title: Xenon gRPC server @@ -517,7 +517,7 @@ def cff_1197761(): repository-code: https://github.com/NLeSC/xenon-grpc license: Apache-2.0 references: -- # FIXME generic is too generic, see https://citation-file-format.github.io/1.0.3/specifications/#/reference-types for more specific types +- # FIXME generic is too generic, see https://citation-file-format.github.io/1.2.0/specifications/#/reference-types for more specific types type: generic doi: 10.5281/zenodo.597993 title: Xenon @@ -991,7 +991,7 @@ def cslfor_58369(): def cff_58369(): return '''# YAML 1.2 # Metadata for citation of this software according to the CFF format (https://citation-file-format.github.io/) -cff-version: 1.0.3 +cff-version: 1.2.0 message: If you use this software, please cite it using these metadata. # FIXME title as repository name might not be the best name, please make human readable title: 'knime-sstea: v1.0.5' @@ -1007,7 +1007,7 @@ def cff_58369(): repository-code: https://github.com/3D-e-Chem/knime-sstea license: Apache-2.0 references: -- # FIXME generic is too generic, see https://citation-file-format.github.io/1.0.3/specifications/#/reference-types for more specific types +- # FIXME generic is too generic, see https://citation-file-format.github.io/1.2.0/specifications/#/reference-types for more specific types type: generic doi: 10.1186/1471-2105-12-332 title: 'ss-TEA: Entropy based identification of receptor specific ligand binding @@ -1028,3 +1028,391 @@ def cff_58369(): - given-names: Jan PG family-names: Klomp ''' + + +@pytest.fixture +def cff_202037850(): + return '''# YAML 1.2 +# Metadata for citation of this software according to the CFF format (https://citation-file-format.github.io/) +cff-version: 1.2.0 +message: If you use this software, please cite it using these metadata. +# FIXME title as repository name might not be the best name, please make human readable +title: Online data analysis system of the INTEGRAL telescope +doi: 10.1051/0004-6361/202037850 +# FIXME splitting of full names is error prone, please check if given/family name are correct +authors: +- given-names: A. + family-names: Neronov +- given-names: V. + family-names: Savchenko +- given-names: A. + family-names: Tramacere +- given-names: M. + family-names: Meharga +- given-names: C. + family-names: Ferrigno +- given-names: S. + family-names: Paltani +date-released: 2021-04-27 +license: +- start: + date-parts: + - - 2021 + - 4 + - 27 + date-time: '2021-04-27T00:00:00Z' + timestamp: 1619481600000 + content-version: vor + delay-in-days: 0 + URL: https://www.edpsciences.org/en/authors/copyright-and-licensing +''' + + +@pytest.fixture +def cslfor_202037850(): + """Content was generated with: + + curl -L -H 'Accept: application/vnd.citationstyles.csl+json' \ + https://doi.org/10.1051/0004-6361/202037850 + """ + return { + "indexed": { + "date-parts": [ + [ + 2021, + 12, + 9 + ] + ], + "date-time": "2021-12-09T19:25:39Z", + "timestamp": 1639077939575 + }, + "reference-count": 0, + "publisher": "EDP Sciences", + "license": [ + { + "start": { + "date-parts": [ + [ + 2021, + 4, + 27 + ] + ], + "date-time": "2021-04-27T00:00:00Z", + "timestamp": 1619481600000 + }, + "content-version": "vor", + "delay-in-days": 0, + "URL": "https://www.edpsciences.org/en/authors/copyright-and-licensing" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": False + }, + "accepted": { + "date-parts": [ + [ + 2021, + 3, + 8 + ] + ] + }, + "DOI": "10.1051/0004-6361/202037850", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2021, + 4, + 27 + ] + ], + "date-time": "2021-04-27T08:30:49Z", + "timestamp": 1619512249000 + }, + "source": "Crossref", + "is-referenced-by-count": 0, + "title": "Online data analysis system of the INTEGRAL telescope", + "prefix": "10.1051", + "author": [ + { + "given": "A.", + "family": "Neronov", + "sequence": "first", + "affiliation": [] + }, + { + "given": "V.", + "family": "Savchenko", + "sequence": "additional", + "affiliation": [] + }, + { + "given": "A.", + "family": "Tramacere", + "sequence": "additional", + "affiliation": [] + }, + { + "given": "M.", + "family": "Meharga", + "sequence": "additional", + "affiliation": [] + }, + { + "given": "C.", + "family": "Ferrigno", + "sequence": "additional", + "affiliation": [] + }, + { + "given": "S.", + "family": "Paltani", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "250", + "published-online": { + "date-parts": [ + [ + 2021, + 4, + 27 + ] + ] + }, + "container-title": "Astronomy & Astrophysics", + "original-title": [], + "link": [ + { + "URL": "https://www.aanda.org/10.1051/0004-6361/202037850/pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2021, + 6, + 1 + ] + ], + "date-time": "2021-06-01T18:58:58Z", + "timestamp": 1622573938000 + }, + "score": 1, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2021, + 4, + 27 + ] + ] + }, + "references-count": 0, + "alternative-id": [ + "aa37850-20" + ], + "URL": "http://dx.doi.org/10.1051/0004-6361/202037850", + "relation": {}, + "ISSN": [ + "0004-6361", + "1432-0746" + ], + "subject": [ + "Space and Planetary Science", + "Astronomy and Astrophysics" + ], + "container-title-short": "A&A", + "published": { + "date-parts": [ + [ + 2021, + 4, + 27 + ] + ] + } +} + + + +@pytest.fixture +def cff_1995729(): + return '''# YAML 1.2 +# Metadata for citation of this software according to the CFF format (https://citation-file-format.github.io/) +cff-version: 1.2.0 +message: If you use this software, please cite it using these metadata. +# FIXME title as repository name might not be the best name, please make human readable +title: Albert Einstein to Max Born 1 +doi: 10.1063/1.1995729 +# FIXME splitting of full names is error prone, please check if given/family name are correct +authors: +- given-names: Albert + family-names: Einstein +date-released: 2005-05-01 +license: x +references: +- # FIXME generic is too generic, see https://citation-file-format.github.io/1.2.0/specifications/#/reference-types for more specific types + key: c1 + unstructured: 'M. Born , The Born–Einstein Letters 1916–1955: Freindship, Politics + and Physics in Uncertain Times, Macmillan, New York (2005), p. 88. Original letter + © The Hebrew University of Jerusalem, Israel.' +- # FIXME generic is too generic, see https://citation-file-format.github.io/1.2.0/specifications/#/reference-types for more specific types + key: c2 + volume-title: 'The End of the Uncertain World: The Life and Science of Max Born' + author: Greenspan N. T. + year: '2005' +''' + + +@pytest.fixture +def cslfor_1995729(): + """Content was generated with: + + curl -L -H 'Accept: application/vnd.citationstyles.csl+json' \ + https://doi.org/10.1063/1.1995729 + """ + return { + "indexed": { + "date-parts": [ + [ + 2021, + 12, + 16 + ] + ], + "date-time": "2021-12-16T18:54:42Z", + "timestamp": 1639680882985 + }, + "reference-count": 2, + "publisher": "AIP Publishing", + "issue": "5", + "content-domain": { + "domain": [], + "crossmark-restriction": True + }, + "published-print": { + "date-parts": [ + [ + 2005, + 5 + ] + ] + }, + "DOI": "10.1063/1.1995729", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2007, + 3, + 13 + ] + ], + "date-time": "2007-03-13T19:20:15Z", + "timestamp": 1173813615000 + }, + "page": "16-16", + "source": "Crossref", + "is-referenced-by-count": 0, + "title": "Albert Einstein to Max Born 1", + "prefix": "10.1063", + "volume": "58", + "author": [ + { + "given": "Albert", + "family": "Einstein", + "sequence": "first", + "affiliation": [] + } + ], + "member": "317", + "reference": [ + { + "key": "c1", + "unstructured": "M. Born , The Born–Einstein Letters 1916–1955: Freindship, Politics and Physics in Uncertain Times, Macmillan, New York (2005), p. 88. Original letter © The Hebrew University of Jerusalem, Israel." + }, + { + "key": "c2", + "volume-title": "The End of the Uncertain World: The Life and Science of Max Born", + "author": "Greenspan N. T.", + "year": "2005" + } + ], + "container-title": "Physics Today", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "http://physicstoday.scitation.org/doi/pdf/10.1063/1.1995729", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2016, + 12, + 28 + ] + ], + "date-time": "2016-12-28T18:02:11Z", + "timestamp": 1482948131000 + }, + "score": 1, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2005, + 5 + ] + ] + }, + "references-count": 2, + "journal-issue": { + "issue": "5", + "published-print": { + "date-parts": [ + [ + 2005, + 5 + ] + ] + } + }, + "alternative-id": [ + "10.1063/1.1995729" + ], + "URL": "http://dx.doi.org/10.1063/1.1995729", + "relation": {}, + "ISSN": [ + "0031-9228", + "1945-0699" + ], + "subject": [ + "General Physics and Astronomy" + ], + "container-title-short": "Physics Today", + "published": { + "date-parts": [ + [ + 2005, + 5 + ] + ] + } +} diff --git a/tests/test_cli.py b/tests/test_cli.py index a2189d3..2d4843e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -5,7 +5,7 @@ from io import StringIO import pytest -import yaml +import ruamel.yaml from click.testing import CliRunner import requests_mock @@ -13,6 +13,7 @@ from doi2cff import cli from doi2cff.cli import init, update_version +yaml = ruamel.yaml.YAML(typ='safe', pure=True) @pytest.fixture def runner(): @@ -87,10 +88,42 @@ def test_init_withnonzenodoref(runner, zenodo_58369, cslfor_58369, cff_58369): m.get('https://zenodo.org/api/records/58369', json=zenodo_58369) m.get('https://doi.org/10.1186/1471-2105-12-332', json=cslfor_58369) - runner.invoke(init, [doi]) + runner_result = runner.invoke(init, [doi]) + assert 'Trying experimental parsing of arbitrary DOI' not in runner_result.output with open('CITATION.cff', 'r') as f: result = f.read() expected = cff_58369 assert yaml.load(result) == yaml.load(expected) + + + +def test_init_csl_noref(runner, cff_202037850, cslfor_202037850): + doi = '10.1051/0004-6361/202037850' + + with runner.isolated_filesystem(), requests_mock.mock() as m: + m.get('https://doi.org/10.1051/0004-6361/202037850', json=cslfor_202037850) + + runner_result = runner.invoke(init, [doi, '--experimental', '--cff_fn', 'CITATION.cff'], catch_exceptions=False) + assert 'Trying experimental parsing of arbitrary DOI' in runner_result.output + + with open('CITATION.cff', 'r') as f: + result = f.read() + + assert yaml.load(result) == yaml.load(cff_202037850) + + +def test_init_csl_nolicense(runner, cff_1995729, cslfor_1995729): + # TODO: complete! + doi = '10.1063/1.1995729' + + with runner.isolated_filesystem(), requests_mock.mock() as m: + m.get('https://doi.org/10.1063/1.1995729', json=cslfor_1995729) + runner.invoke(init, [doi, '--experimental', '--cff_fn', 'CITATION.cff'], catch_exceptions=False) + + with open('CITATION.cff', 'r') as f: + result = f.read() + + assert yaml.load(result) == yaml.load(cff_1995729) + diff --git a/tox.ini b/tox.ini index 7084084..fbd2326 100644 --- a/tox.ini +++ b/tox.ini @@ -3,6 +3,8 @@ envlist = py35, flake8 [travis] python = + 3.9: py39 + 3.8: py38 3.7: py37 3.6: py36 3.5: py35