diff --git a/Dockerfile b/Dockerfile index f8599ad0..9610a278 100644 --- a/Dockerfile +++ b/Dockerfile @@ -107,6 +107,11 @@ FROM dev AS python-dist-builder ARG python_package_version RUN echo "Setting version to: $version" && \ uv version "$python_package_version" +RUN python scripts/dev/update_readme.py \ + --source=./doc/python_library.md \ + --target=./build/python_library.md \ + --source-base-path=doc \ + --link-prefix=https://github.com/elifesciences/sciencebeam-parser/blob/main RUN uv build && \ ls -l dist diff --git a/Makefile b/Makefile index 7f5d60ac..69729baa 100644 --- a/Makefile +++ b/Makefile @@ -137,6 +137,14 @@ dev-script-end-to-end-tests: ./scripts/dev/end-to-end-tests.sh +dev-build-python-readme: + $(PYTHON) scripts/dev/update_readme.py \ + --source=./doc/python_library.md \ + --target=./build/python_library.md \ + --source-base-path=doc \ + --link-prefix=https://github.com/elifesciences/sciencebeam-parser/blob/main + + run: $(PYTHON) -m sciencebeam_parser $(ARGS) diff --git a/pyproject.toml b/pyproject.toml index b3cc2d14..44423203 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "sciencebeam-parser" version = "0.0.0" description = "ScienceBeam Parser, parse scientific documents." -readme = "README.md" +readme = "build/python_library.md" requires-python = ">=3.9,<3.10" dependencies = [ "fastapi[standard]>=0.124.0", diff --git a/scripts/dev/update_readme.py b/scripts/dev/update_readme.py new file mode 100644 index 00000000..51216655 --- /dev/null +++ b/scripts/dev/update_readme.py @@ -0,0 +1,103 @@ +import argparse +import logging +import os +from pathlib import Path +import re + + +LOGGER = logging.getLogger(__name__) + + +def strip_special_links(markdown: str) -> str: + return '\n'.join([ + line.rstrip() + for line in markdown.splitlines() + if not line.startswith('[![') + ]) + + +def get_resolved_link( + link: str, + source_base_path: str, + link_prefix: str +) -> str: + if '://' in link: + return link + result = os.path.join(link_prefix, source_base_path, link) + if source_base_path and '/' not in source_base_path: + result = result.replace(f'/{source_base_path}/../', '/') + return result + + +def get_markdown_with_resolved_links( + markdown_content: str, + source_base_path: str, + link_prefix: str +) -> str: + return re.sub( + r'(\[.*\]\()(.*)(\))', + lambda m: ( + m.group(1) + + get_resolved_link( + m.group(2), + source_base_path=source_base_path, + link_prefix=link_prefix + ) + + m.group(3) + ), + markdown_content + ) + + +def update_readme_text( + original_readme: str, + source_base_path: str, + link_prefix: str +) -> str: + return get_markdown_with_resolved_links( + strip_special_links(original_readme), + source_base_path=source_base_path, + link_prefix=link_prefix + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser('Update README') + parser.add_argument('--source', required=True) + parser.add_argument('--target', required=True) + parser.add_argument('--source-base-path', required=True) + parser.add_argument('--link-prefix', required=True) + return parser.parse_args() + + +def run( + source: str, + target: str, + source_base_path: str, + link_prefix: str +): + LOGGER.info('Updating readme: %r -> %r', source, target) + original_readme = Path(source).read_text(encoding='utf-8') + updated_readme = update_readme_text( + original_readme, + source_base_path=source_base_path, + link_prefix=link_prefix + ) + target_path = Path(target) + target_path.parent.mkdir(parents=True, exist_ok=True) + target_path.write_text(updated_readme, encoding='utf-8') + + +def main(): + args = parse_args() + run( + source=args.source, + target=args.target, + source_base_path=args.source_base_path, + link_prefix=args.link_prefix + ) + + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + main() diff --git a/tests/dev/__init__.py b/tests/dev/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/dev/update_readme_test.py b/tests/dev/update_readme_test.py new file mode 100644 index 00000000..ac63ee8c --- /dev/null +++ b/tests/dev/update_readme_test.py @@ -0,0 +1,85 @@ +import textwrap + +from scripts.dev.update_readme import ( + get_markdown_with_resolved_links, + update_readme_text +) + + +PROJECT_URL = 'https://github.com/user/project' +RELEASE_VERSION_1 = '1.0.1' + +LINK_PREFIX_1 = f'{PROJECT_URL}/blob/{RELEASE_VERSION_1}' + + +class TestGetMarkdownWithResolvedLinks: + def test_should_return_unchanged_markdown_without_links(self): + assert get_markdown_with_resolved_links( + '# Header 1', + source_base_path='doc', + link_prefix=LINK_PREFIX_1 + ) == '# Header 1' + + def test_should_return_change_relative_link_without_source_base_path(self): + assert get_markdown_with_resolved_links( + '# Header 1\n[link](other.md)', + source_base_path='', + link_prefix=LINK_PREFIX_1 + ) == f'# Header 1\n[link]({LINK_PREFIX_1}/other.md)' + + def test_should_return_change_relative_link_with_source_base_path(self): + assert get_markdown_with_resolved_links( + '# Header 1\n[link](other.md)', + source_base_path='doc', + link_prefix=LINK_PREFIX_1 + ) == f'# Header 1\n[link]({LINK_PREFIX_1}/doc/other.md)' + + def test_should_return_change_relative_link_path_outside_source_base_path(self): + assert get_markdown_with_resolved_links( + '# Header 1\n[link](../src/file.xyz)', + source_base_path='doc', + link_prefix=LINK_PREFIX_1 + ) == f'# Header 1\n[link]({LINK_PREFIX_1}/src/file.xyz)' + + def test_should_not_replace_absolute_urls(self): + assert get_markdown_with_resolved_links( + '# Header 1\n[link](https://host/file.xyz)', + source_base_path='doc', + link_prefix=LINK_PREFIX_1 + ) == '# Header 1\n[link](https://host/file.xyz)' + + +class TestUpdateReadmeText: + def test_should_keep_original_text(self): + assert update_readme_text(textwrap.dedent( + ''' + Text + ''' + ), source_base_path='doc', link_prefix=LINK_PREFIX_1).strip() == textwrap.dedent( + ''' + Text + ''' + ).strip() + + def test_should_strip_special_links(self): + assert update_readme_text(textwrap.dedent( + ''' + Text + [![Label](image_path)](link) + ''' + ), source_base_path='doc', link_prefix=LINK_PREFIX_1).strip() == textwrap.dedent( + ''' + Text + ''' + ).strip() + + def test_should_resolve_relative_links(self): + assert update_readme_text(textwrap.dedent( + ''' + [label](../path/to/file.yml) + ''' + ), source_base_path='doc', link_prefix=LINK_PREFIX_1).strip() == textwrap.dedent( + f''' + [label]({LINK_PREFIX_1}/path/to/file.yml) + ''' + ).strip()