Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 31 additions & 12 deletions src/axiomatic/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,41 @@ class DocumentHelper:
def __init__(self, ax_client: Axiomatic):
self._ax_client = ax_client

def pdf_from_url(self, url: str) -> ParseResponse:
"""Download a PDF document from a URL and parse it into a Markdown response."""
if "arxiv" in url and "abs" in url:
url = url.replace("abs", "pdf")
print("The URL is an arXiv abstract page. Replacing 'abs' with 'pdf' to download the PDF.")
file = requests.get(url)
response = self._ax_client.document.parse(file=file.content)
return response

def pdf_from_file(self, path: str) -> ParseResponse:
def pdf_from_file(self, path: str):
"""Open a PDF document from a file path and parse it into a Markdown response."""
with open(path, "rb") as f:
file = f.read()
response = self._ax_client.document.parse(file=file)
file_bytes = f.read()

# Create a tuple with (filename, content and content-type)
# we do this because .parse expects a FastAPI Uploadfile
file_name = path.split("/")[-1]
file_tuple = (file_name, file_bytes, "application/pdf")

response = self._ax_client.document.parse(file=file_tuple)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so weird! I'm sure it was not required before - maybe fern just updated the sdk?

return response

def pdf_from_url(self, url: str):
"""Download a PDF document from a URL and parse it into a Markdown response."""
if "arxiv.org" in url and "abs" in url:
url = url.replace("abs", "pdf")
print("The URL is an arXiv abstract page. Replacing 'abs' with 'pdf' to download the PDF.")
response = requests.get(url)

if response.status_code != 200:
raise Exception(f"Failed to download PDF. Status code: {response.status_code}")

# Extract filename from URL or use a default
file_name = url.split("/")[-1]
if not file_name.endswith(".pdf"):
file_name = "document.pdf"

# Create a tuple with (filename, content and content-type)
# we do this because .parse expects a FastAPI Uploadfile
file_tuple = (file_name, response.content, "application/pdf")

parse_response = self._ax_client.document.parse(file=file_tuple)
return parse_response

def plot_b64_images(self, images: Dict[str, str]):
"""Plot a dictionary of base64 images."""
import ipywidgets as widgets # type: ignore
Expand Down