Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 33 additions & 12 deletions web_programming/covid_stats_via_xpath.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""
This is to show simple COVID19 info fetching from worldometers archive site using lxml
* The main motivation to use lxml in place of bs4 is that it is faster and therefore
more convenient to use in Python web projects (e.g. Django or Flask-based)
This script demonstrates fetching simple COVID-19 statistics from the
Worldometers archive site using lxml. lxml is chosen over BeautifulSoup
for its speed and convenience in Python web projects (such as Django or
Flask).
"""

# /// script
Expand All @@ -25,15 +26,35 @@ class CovidData(NamedTuple):


def covid_stats(
url: str = "https://web.archive.org/web/20250825095350/https://www.worldometers.info/coronavirus/",
url: str = (
"https://web.archive.org/web/20250825095350/"
"https://www.worldometers.info/coronavirus/"
),
) -> CovidData:
xpath_str = '//div[@class = "maincounter-number"]/span/text()'
return CovidData(
*html.fromstring(httpx.get(url, timeout=10).content).xpath(xpath_str)
try:
response = httpx.get(url, timeout=10)
response.raise_for_status()
Comment on lines +36 to +37
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
response = httpx.get(url, timeout=10)
response.raise_for_status()
response = httpx.get(url, timeout=10).raise_for_status()

except httpx.TimeoutException:
print(
"Request timed out. Please check your network connection "
"or try again later."
)
return CovidData("N/A", "N/A", "N/A")
except httpx.HTTPStatusError as e:
print(f"HTTP error occurred: {e}")
return CovidData("N/A", "N/A", "N/A")
data = html.fromstring(response.content).xpath(xpath_str)
if len(data) != 3:
print("Unexpected data format. The page structure may have changed.")
return CovidData("N/A", "N/A", "N/A")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return CovidData("N/A", "N/A", "N/A")
data = "N/A", "N/A", "N/A"

return CovidData(*data)


if __name__ == "__main__":
fmt = (
"Total COVID-19 cases in the world: {}\n"
"Total deaths due to COVID-19 in the world: {}\n"
"Total COVID-19 patients recovered in the world: {}"
)


fmt = """Total COVID-19 cases in the world: {}
Total deaths due to COVID-19 in the world: {}
Total COVID-19 patients recovered in the world: {}"""
print(fmt.format(*covid_stats()))
print(fmt.format(*covid_stats()))