Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ FROM dev AS python-dist-builder
ARG python_package_version
RUN echo "Setting version to: $version" && \
uv version "$python_package_version"
RUN python scripts/dev/update_readme.py \
--source=./doc/python_library.md \
--target=./build/python_library.md \
--source-base-path=doc \
--link-prefix=https://github.com/elifesciences/sciencebeam-parser/blob/main
RUN uv build && \
ls -l dist

Expand Down
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,14 @@ dev-script-end-to-end-tests:
./scripts/dev/end-to-end-tests.sh


dev-build-python-readme:
$(PYTHON) scripts/dev/update_readme.py \
--source=./doc/python_library.md \
--target=./build/python_library.md \
--source-base-path=doc \
--link-prefix=https://github.com/elifesciences/sciencebeam-parser/blob/main


run:
$(PYTHON) -m sciencebeam_parser $(ARGS)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "sciencebeam-parser"
version = "0.0.0"
description = "ScienceBeam Parser, parse scientific documents."
readme = "README.md"
readme = "build/python_library.md"
requires-python = ">=3.9,<3.10"
dependencies = [
"fastapi[standard]>=0.124.0",
Expand Down
103 changes: 103 additions & 0 deletions scripts/dev/update_readme.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import argparse
import logging
import os
from pathlib import Path
import re


LOGGER = logging.getLogger(__name__)


def strip_special_links(markdown: str) -> str:
return '\n'.join([
line.rstrip()
for line in markdown.splitlines()
if not line.startswith('[![')
])


def get_resolved_link(
link: str,
source_base_path: str,
link_prefix: str
) -> str:
if '://' in link:
return link
result = os.path.join(link_prefix, source_base_path, link)
if source_base_path and '/' not in source_base_path:
result = result.replace(f'/{source_base_path}/../', '/')
return result


def get_markdown_with_resolved_links(
markdown_content: str,
source_base_path: str,
link_prefix: str
) -> str:
return re.sub(
r'(\[.*\]\()(.*)(\))',
lambda m: (
m.group(1)
+ get_resolved_link(
m.group(2),
source_base_path=source_base_path,
link_prefix=link_prefix
)
+ m.group(3)
),
markdown_content
)


def update_readme_text(
original_readme: str,
source_base_path: str,
link_prefix: str
) -> str:
return get_markdown_with_resolved_links(
strip_special_links(original_readme),
source_base_path=source_base_path,
link_prefix=link_prefix
)


def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser('Update README')
parser.add_argument('--source', required=True)
parser.add_argument('--target', required=True)
parser.add_argument('--source-base-path', required=True)
parser.add_argument('--link-prefix', required=True)
return parser.parse_args()


def run(
source: str,
target: str,
source_base_path: str,
link_prefix: str
):
LOGGER.info('Updating readme: %r -> %r', source, target)
original_readme = Path(source).read_text(encoding='utf-8')
updated_readme = update_readme_text(
original_readme,
source_base_path=source_base_path,
link_prefix=link_prefix
)
target_path = Path(target)
target_path.parent.mkdir(parents=True, exist_ok=True)
target_path.write_text(updated_readme, encoding='utf-8')


def main():
args = parse_args()
run(
source=args.source,
target=args.target,
source_base_path=args.source_base_path,
link_prefix=args.link_prefix
)


if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
main()
Empty file added tests/dev/__init__.py
Empty file.
85 changes: 85 additions & 0 deletions tests/dev/update_readme_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import textwrap

from scripts.dev.update_readme import (
get_markdown_with_resolved_links,
update_readme_text
)


PROJECT_URL = 'https://github.com/user/project'
RELEASE_VERSION_1 = '1.0.1'

LINK_PREFIX_1 = f'{PROJECT_URL}/blob/{RELEASE_VERSION_1}'


class TestGetMarkdownWithResolvedLinks:
def test_should_return_unchanged_markdown_without_links(self):
assert get_markdown_with_resolved_links(
'# Header 1',
source_base_path='doc',
link_prefix=LINK_PREFIX_1
) == '# Header 1'

def test_should_return_change_relative_link_without_source_base_path(self):
assert get_markdown_with_resolved_links(
'# Header 1\n[link](other.md)',
source_base_path='',
link_prefix=LINK_PREFIX_1
) == f'# Header 1\n[link]({LINK_PREFIX_1}/other.md)'

def test_should_return_change_relative_link_with_source_base_path(self):
assert get_markdown_with_resolved_links(
'# Header 1\n[link](other.md)',
source_base_path='doc',
link_prefix=LINK_PREFIX_1
) == f'# Header 1\n[link]({LINK_PREFIX_1}/doc/other.md)'

def test_should_return_change_relative_link_path_outside_source_base_path(self):
assert get_markdown_with_resolved_links(
'# Header 1\n[link](../src/file.xyz)',
source_base_path='doc',
link_prefix=LINK_PREFIX_1
) == f'# Header 1\n[link]({LINK_PREFIX_1}/src/file.xyz)'

def test_should_not_replace_absolute_urls(self):
assert get_markdown_with_resolved_links(
'# Header 1\n[link](https://host/file.xyz)',
source_base_path='doc',
link_prefix=LINK_PREFIX_1
) == '# Header 1\n[link](https://host/file.xyz)'


class TestUpdateReadmeText:
def test_should_keep_original_text(self):
assert update_readme_text(textwrap.dedent(
'''
Text
'''
), source_base_path='doc', link_prefix=LINK_PREFIX_1).strip() == textwrap.dedent(
'''
Text
'''
).strip()

def test_should_strip_special_links(self):
assert update_readme_text(textwrap.dedent(
'''
Text
[![Label](image_path)](link)
'''
), source_base_path='doc', link_prefix=LINK_PREFIX_1).strip() == textwrap.dedent(
'''
Text
'''
).strip()

def test_should_resolve_relative_links(self):
assert update_readme_text(textwrap.dedent(
'''
[label](../path/to/file.yml)
'''
), source_base_path='doc', link_prefix=LINK_PREFIX_1).strip() == textwrap.dedent(
f'''
[label]({LINK_PREFIX_1}/path/to/file.yml)
'''
).strip()