Improve text extraction in dashboard

This commit is contained in:
inubimambo
2025-07-08 21:00:17 +08:00
parent a87484b0e7
commit 9644f62dc5
4 changed files with 1193 additions and 22 deletions

View File

@@ -181,10 +181,8 @@ function previewFile(fileId) {
const file = result.file;
let content = '';
// Check file type and format content accordingly
const fileExtension = file.originalName.split('.').pop().toLowerCase();
if (['txt', 'md', 'json', 'js', 'html', 'css', 'py', 'java', 'cpp', 'c'].includes(fileExtension)) {
// Handle different preview types
if (file.previewType === 'text') {
// Text-based files - show with syntax highlighting
content = `
<div class="mb-3">
@@ -198,8 +196,35 @@ function previewFile(fileId) {
<pre style="margin: 0; white-space: pre-wrap; word-wrap: break-word;"><code>${escapeHtml(file.content)}</code></pre>
</div>
`;
} else if (['pdf', 'doc', 'docx'].includes(fileExtension)) {
// Document files - show basic info and content preview
} else if (file.previewType === 'extracted-text') {
// Successfully extracted text from document
const extractionInfo = file.extractionInfo || {};
const infoText = [];
if (extractionInfo.pages) infoText.push(`${extractionInfo.pages} pages`);
if (extractionInfo.sheets) infoText.push(`${extractionInfo.sheets} sheets`);
if (extractionInfo.totalLength) infoText.push(`${extractionInfo.totalLength} characters extracted`);
content = `
<div class="mb-3">
<h6><i class="fas fa-file-word me-2"></i>${file.originalName}</h6>
<small class="text-muted">
Size: ${Math.round(file.size / 1024)} KB |
Uploaded: ${new Date(file.uploadDate).toLocaleDateString()}
${infoText.length > 0 ? ' | ' + infoText.join(', ') : ''}
</small>
</div>
<div class="alert alert-success">
<i class="fas fa-check-circle me-2"></i>
${file.message || 'Text successfully extracted from document'}
</div>
<div class="border rounded p-3" style="background-color: #f8f9fa; max-height: 400px; overflow-y: auto;">
<pre style="margin: 0; white-space: pre-wrap; word-wrap: break-word;">${escapeHtml(file.content)}</pre>
</div>
${extractionInfo.truncated ? '<small class="text-muted mt-2 d-block"><i class="fas fa-info-circle me-1"></i>Full document content is available for AI processing</small>' : ''}
`;
} else if (file.previewType === 'binary') {
// Binary files - show info message
content = `
<div class="mb-3">
<h6><i class="fas fa-file-pdf me-2"></i>${file.originalName}</h6>
@@ -210,14 +235,47 @@ function previewFile(fileId) {
</div>
<div class="alert alert-info">
<i class="fas fa-info-circle me-2"></i>
Document preview: First few lines of extracted text
${file.message || 'This is a binary file that has been processed for AI use.'}
</div>
<div class="border rounded p-3" style="background-color: #f8f9fa; max-height: 400px; overflow-y: auto;">
<pre style="margin: 0; white-space: pre-wrap; word-wrap: break-word;">${escapeHtml(file.content.substring(0, 1000))}${file.content.length > 1000 ? '...' : ''}</pre>
${file.content && file.content !== 'File preview not available' ? `
<div class="border rounded p-3" style="background-color: #f8f9fa; max-height: 400px; overflow-y: auto;">
<small class="text-muted">Extracted content preview:</small>
<pre style="margin: 0; margin-top: 10px; white-space: pre-wrap; word-wrap: break-word;">${escapeHtml(file.content)}</pre>
</div>
` : ''}
`;
} else if (file.previewType === 'extraction-failed') {
// Failed to extract text
content = `
<div class="mb-3">
<h6><i class="fas fa-file-exclamation me-2"></i>${file.originalName}</h6>
<small class="text-muted">
Size: ${Math.round(file.size / 1024)} KB |
Uploaded: ${new Date(file.uploadDate).toLocaleDateString()}
</small>
</div>
<div class="alert alert-warning">
<i class="fas fa-exclamation-triangle me-2"></i>
${file.message || 'Could not extract text from this file.'}
</div>
`;
} else if (file.previewType === 'error') {
// Error reading file
content = `
<div class="mb-3">
<h6><i class="fas fa-file-times me-2"></i>${file.originalName}</h6>
<small class="text-muted">
Size: ${Math.round(file.size / 1024)} KB |
Uploaded: ${new Date(file.uploadDate).toLocaleDateString()}
</small>
</div>
<div class="alert alert-danger">
<i class="fas fa-exclamation-circle me-2"></i>
${file.message || 'Error reading file.'}
</div>
`;
} else {
// Other files - show basic info
// Fallback for unknown preview types
content = `
<div class="mb-3">
<h6><i class="fas fa-file me-2"></i>${file.originalName}</h6>