Fix telegram.error.BadRequest: Can't parse entities: unsupported start tag 'li' at byte offset 1191

This commit is contained in:
inubimambo
2025-07-12 21:20:18 +08:00
parent b48ec0a48d
commit 2a3b6c0151

33
main.py
View File

@@ -31,27 +31,40 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE) ->
# Simple markdown to HTML conversion
html = markdown.markdown(response_text)
# Debug: print the HTML before cleaning (remove this later)
print(f"DEBUG - Original HTML: {html}")
# Basic cleanup for Telegram HTML compatibility
html = html.replace('<strong>', '<b>').replace('</strong>', '</b>')
html = html.replace('<em>', '<i>').replace('</em>', '</i>')
html = re.sub(r'<p>(.*?)</p>', r'\1\n\n', html, flags=re.DOTALL)
# Handle lists - convert to simple bullet points
html = re.sub(r'<ul>\s*', '', html)
html = re.sub(r'\s*</ul>', '', html)
html = re.sub(r'<li>(.*?)</li>', r'\1\n', html, flags=re.DOTALL)
# Remove ALL list-related tags more aggressively
html = re.sub(r'</?ul[^>]*>', '', html, flags=re.IGNORECASE)
html = re.sub(r'</?ol[^>]*>', '', html, flags=re.IGNORECASE)
html = re.sub(r'<li[^>]*>(.*?)</li>', r'\1\n', html, flags=re.DOTALL | re.IGNORECASE)
html = re.sub(r'</?li[^>]*>', '', html, flags=re.IGNORECASE) # Remove any remaining li tags
# Clean up any other unsupported tags that might be present
html = re.sub(r'<ol>\s*', '', html)
html = re.sub(r'\s*</ol>', '', html)
html = re.sub(r'<h[1-6]>(.*?)</h[1-6]>', r'<b>\1</b>\n', html, flags=re.DOTALL)
# Handle paragraphs
html = re.sub(r'<p[^>]*>(.*?)</p>', r'\1\n\n', html, flags=re.DOTALL | re.IGNORECASE)
html = re.sub(r'</?p[^>]*>', '', html, flags=re.IGNORECASE) # Remove any remaining p tags
# Handle headers
html = re.sub(r'<h[1-6][^>]*>(.*?)</h[1-6]>', r'<b>\1</b>\n', html, flags=re.DOTALL | re.IGNORECASE)
# Remove any other potentially problematic tags
html = re.sub(r'</?div[^>]*>', '', html, flags=re.IGNORECASE)
html = re.sub(r'</?span[^>]*>', '', html, flags=re.IGNORECASE)
# Clean up extra whitespace
html = re.sub(r'\n\s*\n', '\n\n', html)
html = html.strip()
# Debug: print the cleaned HTML (remove this later)
print(f"DEBUG - Cleaned HTML: {html}")
# Send the formatted message using HTML parse mode
await update.message.reply_text(html.strip(), parse_mode='HTML')
await update.message.reply_text(html, parse_mode='HTML')
if __name__ == "__main__":