Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ If you run into any issues, consult the logs or reach out on the repository's [I
---

# Changelog
- v0.7165 - Parsing improvements
- Improved text formatting & escaping in complex markdown vs. html cases
- v0.7614 - Better stock market data fetching from Yahoo Finance
- Changes made to `src/api_get_stock_prices_yfinance.py`
- => More accurate ticker symbol searches, fallbacks, multi-day data etc.
Expand Down
3 changes: 2 additions & 1 deletion config/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ Enabled = True

# The preferred, more capable model to use by default (e.g., gpt-4o, gpt-4.5-preview).
# This model will be used until its daily token limit (PremiumTokenLimit) is reached.
PremiumModel = gpt-4o
# PremiumModel = gpt-4o
PremiumModel = gpt-4.1

# The cheaper model to switch to when the PremiumTokenLimit is reached (e.g., gpt-4o-mini).
# This model has its own daily token limit (MiniTokenLimit).
Expand Down
2 changes: 1 addition & 1 deletion src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# https://github.com/FlyingFathead/TelegramBot-OpenAI-API
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# version of this program
version_number = "0.7614"
version_number = "0.7615"

# Add the project root directory to Python's path
import sys
Expand Down
144 changes: 105 additions & 39 deletions src/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,47 +106,113 @@ def preserve_html_and_escape_text(text):
escaped_text += html.escape(text[last_end:])
return escaped_text

# markdown to html parsing (v0.737.2)
# v0.7615
def markdown_to_html(text):
try:
# Handle the code blocks with optional language specification first
def replace_codeblock(match):
codeblock = match.group(2) # Get the actual code inside the block
language = match.group(1) # Get the language identifier
escaped_code = html.escape(codeblock.strip())
if language:
return f'<pre><code class="language-{language}">{escaped_code}</code></pre>'
else:
return f'<pre><code>{escaped_code}</code></pre>'

# Replace code blocks with <pre><code> tags
text = re.sub(r'```(\w+)?\n([\s\S]*?)```', replace_codeblock, text)

# Now handle Markdown links and convert them to HTML
def replace_markdown_link(match):
link_text = match.group(1) # The text to display
url = match.group(2) # The URL
return f'<a href="{html.escape(url)}">{html.escape(link_text)}</a>'

# Replace Markdown links [text](url) with HTML <a> tags
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', replace_markdown_link, text)

# Handle inline code and other markdown elements
text = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', text)
text = re.sub(r'\*(.*?)\*', r'<i>\1</i>', text)
text = re.sub(r'_(.*?)_', r'<i>\1</i>', text)
text = re.sub(r'`([^`]*)`', r'<code>\1</code>', text)
text = re.sub(r'######\s*(.*)', r'➤ <b>\1</b>', text)
text = re.sub(r'#####\s*(.*)', r'➤ <b>\1</b>', text)
text = re.sub(r'####\s*(.*)', r'➤ <b>\1</b>', text)
text = re.sub(r'###\s*(.*)', r'➤ <b>\1</b>', text)
text = re.sub(r'##\s*(.*)', r'➤ <b>\1</b>', text)
text = re.sub(r'#\s*(.*)', r'➤ <b>\1</b>', text)

return text
"""
Convert a simple subset of Markdown to HTML,
ensuring that code blocks are extracted first so they
don't get accidentally transformed by heading/bold/italic rules.
"""
# 1) Extract code blocks into placeholders
code_blocks = []

def extract_codeblock(match):
language = match.group(1) or "" # i.e. "python"
code_body = match.group(2) # the code text
code_blocks.append((language, code_body))
placeholder_index = len(code_blocks) - 1
# Return a placeholder token like [CODEBLOCK_0]
return f"[CODEBLOCK_{placeholder_index}]"

# Regex: triple backticks with optional language
# Use DOTALL ([\s\S]) so it can capture newlines
text = re.sub(
r'```(\w+)?\n([\s\S]*?)```',
extract_codeblock,
text
)

# 2) Now do the normal Markdown parsing on whatever’s left (outside code blocks)

# Headings: only match at the start of lines (via ^) and multiline
text = re.sub(r'^(######)\s+(.*)', r'➤ <b>\2</b>', text, flags=re.MULTILINE)
text = re.sub(r'^(#####)\s+(.*)', r'➤ <b>\2</b>', text, flags=re.MULTILINE)
text = re.sub(r'^(####)\s+(.*)', r'➤ <b>\2</b>', text, flags=re.MULTILINE)
text = re.sub(r'^(###)\s+(.*)', r'➤ <b>\2</b>', text, flags=re.MULTILINE)
text = re.sub(r'^(##)\s+(.*)', r'➤ <b>\2</b>', text, flags=re.MULTILINE)
text = re.sub(r'^#\s+(.*)', r'➤ <b>\1</b>', text, flags=re.MULTILINE)

# Links of the form [text](url)
def replace_markdown_link(m):
link_text = m.group(1)
url = m.group(2)
# Escape any HTML entities in the URL or text
return f'<a href="{html.escape(url)}">{html.escape(link_text)}</a>'
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', replace_markdown_link, text)

# Bold
text = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', text)

# Italics: also handle both `*text*` and `_text_`
text = re.sub(r'\*(.*?)\*', r'<i>\1</i>', text)
text = re.sub(r'_(.*?)_', r'<i>\1</i>', text)

# Inline code with single backticks
text = re.sub(r'`([^`]*)`', r'<code>\1</code>', text)

# 3) Re‐insert the code blocks
for i, (language, code_body) in enumerate(code_blocks):
escaped_code = html.escape(code_body.strip())
if language:
block_html = f'<pre><code class="language-{language}">{escaped_code}</code></pre>'
else:
block_html = f'<pre><code>{escaped_code}</code></pre>'
# Replace [CODEBLOCK_i] with the final <pre><code> block
text = text.replace(f"[CODEBLOCK_{i}]", block_html, 1)

except Exception as e:
return str(e)
return text

# # markdown to html parsing (v0.737.2)
# def markdown_to_html(text):
# try:
# # Handle the code blocks with optional language specification first
# def replace_codeblock(match):
# codeblock = match.group(2) # Get the actual code inside the block
# language = match.group(1) # Get the language identifier
# escaped_code = html.escape(codeblock.strip())
# if language:
# return f'<pre><code class="language-{language}">{escaped_code}</code></pre>'
# else:
# return f'<pre><code>{escaped_code}</code></pre>'

# # Replace code blocks with <pre><code> tags
# text = re.sub(r'```(\w+)?\n([\s\S]*?)```', replace_codeblock, text)

# # Now handle Markdown links and convert them to HTML
# def replace_markdown_link(match):
# link_text = match.group(1) # The text to display
# url = match.group(2) # The URL
# return f'<a href="{html.escape(url)}">{html.escape(link_text)}</a>'

# # Replace Markdown links [text](url) with HTML <a> tags
# text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', replace_markdown_link, text)

# # Handle inline code and other markdown elements
# text = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', text)
# text = re.sub(r'\*(.*?)\*', r'<i>\1</i>', text)
# text = re.sub(r'_(.*?)_', r'<i>\1</i>', text)
# text = re.sub(r'`([^`]*)`', r'<code>\1</code>', text)
# text = re.sub(r'######\s*(.*)', r'➤ <b>\1</b>', text)
# text = re.sub(r'#####\s*(.*)', r'➤ <b>\1</b>', text)
# text = re.sub(r'####\s*(.*)', r'➤ <b>\1</b>', text)
# text = re.sub(r'###\s*(.*)', r'➤ <b>\1</b>', text)
# text = re.sub(r'##\s*(.*)', r'➤ <b>\1</b>', text)
# text = re.sub(r'#\s*(.*)', r'➤ <b>\1</b>', text)

# return text

# except Exception as e:
# return str(e)

# Check and update the global rate limit.
def check_global_rate_limit(max_requests_per_minute, global_request_count, rate_limit_reset_time):
Expand Down