Fix telegram.error.BadRequest: Can't parse entities: unsupported start tag 'li' at byte offset 1191
This commit is contained in:
33
main.py
33
main.py
@@ -31,27 +31,40 @@ async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE) ->
|
||||
|
||||
# Simple markdown to HTML conversion
|
||||
html = markdown.markdown(response_text)
|
||||
|
||||
# Debug: print the HTML before cleaning (remove this later)
|
||||
print(f"DEBUG - Original HTML: {html}")
|
||||
|
||||
# Basic cleanup for Telegram HTML compatibility
|
||||
html = html.replace('<strong>', '<b>').replace('</strong>', '</b>')
|
||||
html = html.replace('<em>', '<i>').replace('</em>', '</i>')
|
||||
html = re.sub(r'<p>(.*?)</p>', r'\1\n\n', html, flags=re.DOTALL)
|
||||
|
||||
# Handle lists - convert to simple bullet points
|
||||
html = re.sub(r'<ul>\s*', '', html)
|
||||
html = re.sub(r'\s*</ul>', '', html)
|
||||
html = re.sub(r'<li>(.*?)</li>', r'• \1\n', html, flags=re.DOTALL)
|
||||
# Remove ALL list-related tags more aggressively
|
||||
html = re.sub(r'</?ul[^>]*>', '', html, flags=re.IGNORECASE)
|
||||
html = re.sub(r'</?ol[^>]*>', '', html, flags=re.IGNORECASE)
|
||||
html = re.sub(r'<li[^>]*>(.*?)</li>', r'• \1\n', html, flags=re.DOTALL | re.IGNORECASE)
|
||||
html = re.sub(r'</?li[^>]*>', '', html, flags=re.IGNORECASE) # Remove any remaining li tags
|
||||
|
||||
# Clean up any other unsupported tags that might be present
|
||||
html = re.sub(r'<ol>\s*', '', html)
|
||||
html = re.sub(r'\s*</ol>', '', html)
|
||||
html = re.sub(r'<h[1-6]>(.*?)</h[1-6]>', r'<b>\1</b>\n', html, flags=re.DOTALL)
|
||||
# Handle paragraphs
|
||||
html = re.sub(r'<p[^>]*>(.*?)</p>', r'\1\n\n', html, flags=re.DOTALL | re.IGNORECASE)
|
||||
html = re.sub(r'</?p[^>]*>', '', html, flags=re.IGNORECASE) # Remove any remaining p tags
|
||||
|
||||
# Handle headers
|
||||
html = re.sub(r'<h[1-6][^>]*>(.*?)</h[1-6]>', r'<b>\1</b>\n', html, flags=re.DOTALL | re.IGNORECASE)
|
||||
|
||||
# Remove any other potentially problematic tags
|
||||
html = re.sub(r'</?div[^>]*>', '', html, flags=re.IGNORECASE)
|
||||
html = re.sub(r'</?span[^>]*>', '', html, flags=re.IGNORECASE)
|
||||
|
||||
# Clean up extra whitespace
|
||||
html = re.sub(r'\n\s*\n', '\n\n', html)
|
||||
html = html.strip()
|
||||
|
||||
# Debug: print the cleaned HTML (remove this later)
|
||||
print(f"DEBUG - Cleaned HTML: {html}")
|
||||
|
||||
# Send the formatted message using HTML parse mode
|
||||
await update.message.reply_text(html.strip(), parse_mode='HTML')
|
||||
await update.message.reply_text(html, parse_mode='HTML')
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
|
||||
Reference in New Issue
Block a user