From df1505bf483ce4b32c23f64bd81811152fd1ee50 Mon Sep 17 00:00:00 2001 From: inubimambo Date: Wed, 9 Jul 2025 22:51:51 +0800 Subject: [PATCH] Update README --- README.md | 225 +++++++++++++++++++++++++++++++++++--- server.js | 40 ++++++- views/partials/header.ejs | 7 ++ views/upload.ejs | 25 ++++- 4 files changed, 268 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 833eef0..0ec17da 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,50 @@ # EduCat - AI-Powered Note Revision Platform +EduCat is a modern web application that helps students improve their study notes using AI. Built with Node.js, Express, and EJS, it features secure file upload capabilities, AI-powered note revision, interactive quizzes, and an intelligent chatbot. + +## Features + +### ๐Ÿ” **Security & Authentication** +- Secure login and registration system +- Session-based authentication with flash messages +- Password hashing with bcrypt +- Strict file type whitelisting for secure uploads + +### ๐Ÿ“ **Document Processing** +- Upload notes in multiple formats (PDF, DOC, DOCX, XLSX, XLS, TXT, MD, JSON, CSV, XML) +- Secure document extraction with proper error handling +- File preview with extraction information and metadata +- Separate storage for original uploads and AI-revised notes + +### ๐Ÿค– **AI-Powered Features** +- **Note Revision**: Automatically improve, summarize, and generate study questions +- **Interactive Chatbot**: Chat with AI for study assistance +- **Quiz Generation**: Create interactive quizzes from uploaded documents +- **Smart Content Extraction**: Extract text from various document formats + +### ๐Ÿ“Š **Dashboard & Management** +- Modern dashboard to manage uploaded files +- Separate sections for original files and AI-revised notes +- File preview with extraction status and metadata +- Download and delete functionality for all file types + +### ๐ŸŽฏ **Quiz System** +- Generate quizzes from uploaded documents +- Multiple question types (multiple choice, true/false, short answer) +- Comprehensive quiz review with explanations +- Statistics tracking for quiz performance + +### ๐ŸŽจ **Modern UI/UX** +- Beautiful, responsive design with Bootstrap 5 +- Custom SVG favicon and branded interface +- Improved file icons and visual indicators +- Mobile-friendly responsive layout + +### ๐Ÿ”— **AI Integration** +- Connected to Flowise at https://flowise.suika.cc/ +- RAG (Retrieval-Augmented Generation) capabilities +- Secure API integration with proper error handling-Powered Note Revision Platform + EduCat is a modern web application that helps students improve their study notes using AI. Built with Node.js, Express, and EJS, it features file upload capabilities, AI-powered note revision, and an interactive chatbot. ## Features @@ -17,10 +62,15 @@ EduCat is a modern web application that helps students improve their study notes - **Backend**: Node.js, Express.js - **Frontend**: EJS templates, Bootstrap 5, Font Awesome - **Authentication**: bcrypt for password hashing, express-session -- **File Handling**: Multer for file uploads -- **AI Integration**: Flowise API integration +- **File Handling**: Multer for secure file uploads with type validation +- **Document Processing**: + - PDFParse for PDF extraction + - Mammoth for Word document processing + - ExcelJS for secure Excel file handling (replaced vulnerable xlsx) +- **AI Integration**: Flowise API integration with RAG capabilities - **Session Management**: Express Session with flash messages -- **Styling**: Custom CSS with Bootstrap +- **Security**: File type whitelisting, secure extraction methods +- **Styling**: Custom CSS with Bootstrap and custom SVG favicon ## Installation @@ -35,6 +85,17 @@ EduCat is a modern web application that helps students improve their study notes npm install ``` + **Key Dependencies:** + - `express` - Web framework + - `ejs` - Template engine + - `multer` - File upload handling + - `bcrypt` - Password hashing + - `express-session` - Session management + - `pdf-parse` - PDF text extraction + - `mammoth` - Word document processing + - `exceljs` - Secure Excel file handling + - `axios` - HTTP client for API calls + 3. **Configure environment variables**: - Copy `.env.example` to `.env` (if exists) or create a new `.env` file - Update the following variables: @@ -75,8 +136,12 @@ EduCat is a modern web application that helps students improve their study notes ### Uploading Notes 1. After logging in, click "Upload Notes" in the navigation 2. Drag and drop your file or click to browse -3. Select a file (PDF, DOC, TXT, or image) +3. Select a supported file type: + - **Documents**: PDF, DOC, DOCX + - **Spreadsheets**: XLSX, XLS + - **Text Files**: TXT, MD, JSON, CSV, XML 4. Click "Upload & Process" +5. View file preview with extraction information ### Revising Notes 1. Go to your Dashboard to see uploaded files @@ -86,11 +151,19 @@ EduCat is a modern web application that helps students improve their study notes - **Summarize**: Creates concise summaries - **Generate Questions**: Creates study questions 4. Click "Revise with AI" to process +5. Save and download revised notes from the "AI-Revised Notes" section + +### Quiz System +1. Upload a document to generate quizzes from +2. Navigate to the quiz section +3. Take interactive quizzes with multiple question types +4. Review answers with detailed explanations +5. Track your quiz performance and statistics ### Using the Chatbot 1. Navigate to the "Chat" section 2. Type your questions about study materials or academic topics -3. Get instant AI-powered responses +3. Get instant AI-powered responses using RAG technology ## Project Structure @@ -101,8 +174,10 @@ EduCat/ โ”‚ โ”‚ โ””โ”€โ”€ style.css โ”‚ โ”œโ”€โ”€ js/ โ”‚ โ”‚ โ””โ”€โ”€ main.js -โ”‚ โ””โ”€โ”€ images/ -โ”‚ โ””โ”€โ”€ logo.png +โ”‚ โ”œโ”€โ”€ images/ +โ”‚ โ”‚ โ””โ”€โ”€ favicon.svg +โ”‚ โ”œโ”€โ”€ favicon.svg +โ”‚ โ””โ”€โ”€ favicon-32x32.svg โ”œโ”€โ”€ views/ โ”‚ โ”œโ”€โ”€ partials/ โ”‚ โ”‚ โ”œโ”€โ”€ header.ejs @@ -112,8 +187,14 @@ EduCat/ โ”‚ โ”œโ”€โ”€ revise.ejs โ”‚ โ”œโ”€โ”€ chat.ejs โ”‚ โ”œโ”€โ”€ dashboard.ejs +โ”‚ โ”œโ”€โ”€ quiz.ejs โ”‚ โ””โ”€โ”€ error.ejs โ”œโ”€โ”€ uploads/ +โ”‚ โ””โ”€โ”€ revised-notes/ +โ”œโ”€โ”€ data/ +โ”‚ โ”œโ”€โ”€ user-files/ +โ”‚ โ”œโ”€โ”€ revised-files/ +โ”‚ โ””โ”€โ”€ quiz-results/ โ”œโ”€โ”€ server.js โ”œโ”€โ”€ package.json โ””โ”€โ”€ .env @@ -121,14 +202,38 @@ EduCat/ ## API Endpoints +### Authentication & Core Routes - `GET /` - Home page +- `GET /login` - Login page +- `POST /login` - Handle login +- `GET /register` - Registration page +- `POST /register` - Handle registration +- `GET /logout` - Logout user + +### File Management - `GET /upload` - File upload page -- `POST /upload` - Handle file uploads +- `POST /upload` - Handle file uploads with validation +- `GET /dashboard` - User dashboard with file management +- `GET /api/files/:fileId/preview` - File preview with extraction info +- `DELETE /api/files/:fileId` - Delete uploaded file + +### AI-Powered Features - `GET /revise/:fileId` - Note revision page - `POST /api/revise` - AI revision endpoint +- `POST /api/save-revised` - Save revised notes +- `GET /api/download-revised/:fileId` - Download revised notes +- `GET /api/revised-files/:fileId/info` - Get revised file info +- `DELETE /api/revised-files/:fileId` - Delete revised file + +### Quiz System +- `GET /quiz` - Quiz interface +- `POST /api/quiz/generate` - Generate quiz from document +- `POST /api/quiz/submit` - Submit quiz answers +- `GET /api/quiz/results` - Get quiz statistics + +### Chat Integration - `GET /chat` - Chat interface -- `POST /api/chat` - Chat API endpoint -- `GET /dashboard` - User dashboard +- `POST /api/chat` - Chat API endpoint with RAG support ## Configuration @@ -144,24 +249,37 @@ EduCat/ ### File Upload Settings - **Maximum file size**: 10MB -- **Allowed formats**: PDF, DOC, DOCX, TXT, JPG, JPEG, PNG, GIF -- **Upload directory**: `uploads/` +- **Allowed formats**: PDF, DOC, DOCX, XLSX, XLS, TXT, MD, JSON, CSV, XML +- **Upload directory**: `uploads/` (original files) +- **Revised notes directory**: `uploads/revised-notes/` +- **Security**: Strict file type whitelisting with MIME type validation +- **Processing**: Automatic text extraction with error handling ## Customization ### Styling - Edit `public/css/style.css` to customize the appearance - The design uses Bootstrap 5 with custom CSS variables +- Custom SVG favicon and file icons for better visual consistency +- Responsive design optimized for mobile and desktop ### AI Integration - Modify the Flowise API calls in `server.js` - Update prompts in the `/api/revise` endpoint - Customize chat responses in the `/api/chat` endpoint +- Configure RAG settings for document-based queries + +### Security +- File type restrictions configured in server.js and main.js +- MIME type validation for uploaded files +- Secure document extraction methods +- Session security with proper secret management ### Adding Features - Add new routes in `server.js` - Create corresponding EJS templates in `views/` - Add client-side JavaScript in `public/js/main.js` +- Update CSS in `public/css/style.css` ## Troubleshooting @@ -169,17 +287,38 @@ EduCat/ 1. **File upload fails**: - Check file size (max 10MB) - - Verify file format is supported - - Ensure `uploads/` directory exists + - Verify file format is supported (PDF, DOC, DOCX, XLSX, XLS, TXT, MD, JSON, CSV, XML) + - Ensure `uploads/` and `uploads/revised-notes/` directories exist + - Check file type validation in both client and server -2. **AI responses don't work**: +2. **Document extraction errors**: + - Verify document is not corrupted + - Check extraction status in file preview + - Ensure proper permissions for file access + - Review server logs for specific extraction errors + +3. **AI responses don't work**: - Verify Flowise API URL is correct - Check if your chatflow ID is valid - Ensure Flowise server is accessible + - Verify RAG configuration for document-based queries -3. **Session issues**: - - Verify SESSION_SECRET is set +4. **Quiz generation fails**: + - Ensure document has sufficient text content + - Check if document extraction was successful + - Verify AI service is properly connected + - Review quiz generation prompts + +5. **Session issues**: + - Verify SESSION_SECRET is set in .env - Check if sessions are properly configured + - Clear browser cookies and try again + +6. **Revised notes not saving**: + - Ensure `uploads/revised-notes/` directory exists + - Check file permissions + - Verify sufficient disk space + - Review server logs for save errors ### Development @@ -189,6 +328,58 @@ npm install -g nodemon npm run dev ``` +## Recent Updates & Security Improvements + +### Version 2.0 Security Enhancements +- **๐Ÿ”’ Enhanced Security**: Replaced vulnerable `xlsx` library with secure `exceljs` for Excel processing +- **๐Ÿ›ก๏ธ File Type Whitelisting**: Implemented strict file type validation to prevent malicious uploads +- **๐Ÿ” MIME Type Validation**: Added comprehensive file type checking on both client and server +- **๐Ÿ—‚๏ธ Secure Document Processing**: Improved extraction methods with proper error handling + +### UI/UX Improvements +- **๐ŸŽจ Custom Favicon**: Added custom SVG favicon for brand consistency +- **๐Ÿ“ฑ Responsive Design**: Enhanced mobile-friendly interface with improved layouts +- **๐Ÿ”ง File Icons**: Updated file type icons for better visual clarity +- **๐Ÿ“Š Dashboard Enhancements**: Separate sections for original and revised files +- **๐Ÿท๏ธ Badge System**: Improved status indicators and badge alignment + +### New Features +- **๐Ÿ’พ Revised Notes Management**: Save, download, and manage AI-revised notes separately +- **๐ŸŽฏ Enhanced Quiz System**: Fixed short-answer questions with explanations +- **๐Ÿ“ˆ Quiz Statistics**: Comprehensive tracking of quiz performance +- **๐Ÿ” File Preview**: Detailed extraction information and metadata display +- **๐Ÿ“ Persistent Storage**: Improved file tracking and session management + +### Performance & Reliability +- **โšก Optimized Extraction**: Faster and more reliable document processing +- **๐Ÿ”„ Error Handling**: Comprehensive error handling for all file operations +- **๐Ÿ’ช Robust API**: Improved API endpoints with better validation +- **๐Ÿงน Code Refactoring**: Cleaner, more maintainable codebase + +## Deployment & Production + +### Production Considerations +- Set strong `SESSION_SECRET` in production environment +- Configure proper file upload limits based on server capacity +- Set up proper logging and monitoring +- Implement rate limiting for API endpoints +- Configure HTTPS for secure file uploads +- Set up backup procedures for uploaded files and data + +### Environment Setup +- Ensure Node.js 14+ is installed +- Create proper directory structure with correct permissions +- Configure environment variables for production +- Set up reverse proxy (nginx/Apache) if needed +- Configure SSL certificates for HTTPS + +### Monitoring & Maintenance +- Monitor disk space for uploads directory +- Set up log rotation for application logs +- Regular security updates for dependencies +- Monitor API usage and performance +- Backup user data and quiz results regularly + ## Contributing 1. Fork the repository diff --git a/server.js b/server.js index 61f5622..c0c006e 100644 --- a/server.js +++ b/server.js @@ -238,15 +238,43 @@ const upload = multer({ fileSize: 10 * 1024 * 1024 // 10MB limit }, fileFilter: (req, file, cb) => { - // Allow text files, PDFs, and images - const allowedTypes = /jpeg|jpg|png|gif|pdf|txt|doc|docx/; - const extname = allowedTypes.test(path.extname(file.originalname).toLowerCase()); - const mimetype = allowedTypes.test(file.mimetype); + // Define allowed file types for RAG processing - only text-extractable documents + const allowedExtensions = ['.pdf', '.txt', '.doc', '.docx', '.xlsx', '.xls', '.md', '.json', '.csv', '.xml']; + const allowedMimeTypes = [ + 'application/pdf', + 'text/plain', + 'text/markdown', + 'text/csv', + 'text/xml', + 'application/xml', + 'application/json', + 'application/msword', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'application/vnd.ms-excel', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' + ]; - if (mimetype && extname) { + const fileExtension = path.extname(file.originalname).toLowerCase(); + const fileMimeType = file.mimetype.toLowerCase(); + + // Check if file extension is allowed + const isExtensionAllowed = allowedExtensions.includes(fileExtension); + + // Check if MIME type is allowed + const isMimeTypeAllowed = allowedMimeTypes.includes(fileMimeType); + + // Additional validation for specific file types + if (isExtensionAllowed && isMimeTypeAllowed) { return cb(null, true); } else { - cb(new Error('Only text files, PDFs, and images are allowed!')); + // Provide specific error messages for different rejection reasons + if (!isExtensionAllowed) { + return cb(new Error(`File type "${fileExtension}" is not supported. Only document files (PDF, Word, Excel, text files) are allowed for RAG processing.`)); + } else if (!isMimeTypeAllowed) { + return cb(new Error(`MIME type "${fileMimeType}" is not supported. Please upload valid document files only.`)); + } else { + return cb(new Error('Invalid file type. Only text-extractable documents are allowed to prevent RAG corruption.')); + } } } }); diff --git a/views/partials/header.ejs b/views/partials/header.ejs index 8563293..ac5b4dc 100644 --- a/views/partials/header.ejs +++ b/views/partials/header.ejs @@ -4,6 +4,13 @@ <%= title %> + + + + + + + diff --git a/views/upload.ejs b/views/upload.ejs index 14047d5..abfdde3 100644 --- a/views/upload.ejs +++ b/views/upload.ejs @@ -14,12 +14,18 @@
Drag & Drop your files here

or click to browse

- + +
+ + Supported file types: PDF, Word (.doc, .docx), Excel (.xlsx, .xls), text files (.txt, .md, .json, .csv, .xml) +
Only text-extractable documents are allowed to ensure optimal AI processing and prevent RAG corruption. +
+
@@ -46,25 +52,32 @@
-
+
PDF Files

Upload PDF documents

-
+
Word Documents

DOC & DOCX files

-
+
- + +
Excel Files
+

XLSX & XLS files

+
+
+
+
+
Text Files
-

Plain text documents

+

TXT, MD, JSON, CSV, XML