EduCatBot/main.py

import os
import re
import markdown
from dotenv import load_dotenv
from flowise import Flowise, PredictionData
from telegram import Update
from telegram.ext import Application, CommandHandler, ContextTypes, MessageHandler, filters

global flowiseClient
global chatflowId

async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
    """Send a message when the command /start is issued."""
    user = update.effective_user
    await update.message.reply_markdown_v2(
        fr"Hi {user.mention_markdown_v2()}\!",
    )

async def handle_message(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
    """Handle a message."""

    # Extract the session id from the message
    session_id = update.message.from_user.id

    # Create a prediction
    response = flowiseClient.create_prediction(PredictionData(question=update.message.text, chatflowId=chatflowId, streaming=False, chatId=session_id))
    # Extract text from the response
    response_text = ""
    for item in response:
        response_text += item["text"]

    # Simple markdown to HTML conversion
    html = markdown.markdown(response_text)

    # Debug: print the HTML before cleaning (remove this later)
    print(f"DEBUG - Original HTML: {html}")

    # Basic cleanup for Telegram HTML compatibility
    html = html.replace('<strong>', '<b>').replace('</strong>', '</b>')
    html = html.replace('<em>', '<i>').replace('</em>', '</i>')

    # Remove ALL list-related tags more aggressively
    html = re.sub(r'</?ul[^>]*>', '', html, flags=re.IGNORECASE)
    html = re.sub(r'</?ol[^>]*>', '', html, flags=re.IGNORECASE)
    html = re.sub(r'<li[^>]*>(.*?)</li>', r'• \1\n', html, flags=re.DOTALL | re.IGNORECASE)
    html = re.sub(r'</?li[^>]*>', '', html, flags=re.IGNORECASE)  # Remove any remaining li tags

    # Handle paragraphs
    html = re.sub(r'<p[^>]*>(.*?)</p>', r'\1\n\n', html, flags=re.DOTALL | re.IGNORECASE)
    html = re.sub(r'</?p[^>]*>', '', html, flags=re.IGNORECASE)  # Remove any remaining p tags

    # Handle headers
    html = re.sub(r'<h[1-6][^>]*>(.*?)</h[1-6]>', r'<b>\1</b>\n', html, flags=re.DOTALL | re.IGNORECASE)

    # Remove any other potentially problematic tags
    html = re.sub(r'</?div[^>]*>', '', html, flags=re.IGNORECASE)
    html = re.sub(r'</?span[^>]*>', '', html, flags=re.IGNORECASE)

    # Clean up extra whitespace
    html = re.sub(r'\n\s*\n', '\n\n', html)
    html = html.strip()

    # Debug: print the cleaned HTML (remove this later)
    print(f"DEBUG - Cleaned HTML: {html}")

    # Send the formatted message using HTML parse mode
    await update.message.reply_text(html, parse_mode='HTML')

if __name__ == "__main__":

    # Load environment variables
    load_dotenv()

    # Setup the Flowise client
    FLOWISE_API_KEY = os.getenv("FLOWISE_API_KEY")
    FLOWISE_API_URL = os.getenv("FLOWISE_API_URL")
    chatflowId = os.getenv("FLOWISE_CHATFLOW_ID")
    if not FLOWISE_API_KEY:
        raise ValueError("FLOWISE_API_KEY not found in environment variables")
    if not FLOWISE_API_URL:
        raise ValueError("FLOWISE_API_URL not found in environment variables")
    if not chatflowId:
        raise ValueError("FLOWISE_CHATFLOW_ID not found in environment variables")

    flowiseClient = Flowise(base_url=FLOWISE_API_URL, api_key=FLOWISE_API_KEY)

    # Setup the Telegram bot
    TELEGRAM_API_KEY = os.getenv("TELEGRAM_API_KEY")
    if not TELEGRAM_API_KEY:
        raise ValueError("TELEGRAM_API_KEY not found in environment variables")
    application = Application.builder().token(TELEGRAM_API_KEY).build()
    application.add_handler(CommandHandler("start", start))
    application.add_handler(MessageHandler(filters.TEXT, handle_message))

    # Run the bot
    application.run_polling()