Add batch upload endpoint to avoid sequential upload+OCR bottleneck
Client sends all files in a single request to /api/upload/batch, server receives them all upfront via multer then processes OCR sequentially. Eliminates network round-trips between each file.
This commit is contained in:
parent
b52d6b1a6f
commit
bce534486c
2 changed files with 76 additions and 9 deletions
|
|
@ -730,17 +730,19 @@ export const useGame = (defaultGameConfig?: GameConfig) => {
|
||||||
};
|
};
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const uploadDocument = async (file: File, useOcr: boolean = false, maxRetries = 5): Promise<ProcessedDocument> => {
|
const uploadDocuments = async (files: File[], useOcr: boolean = false, maxRetries = 5): Promise<ProcessedDocument[]> => {
|
||||||
if (!auth.user?.access_token) {
|
if (!auth.user?.access_token) {
|
||||||
throw new Error('Authentication required to upload documents');
|
throw new Error('Authentication required to upload documents');
|
||||||
}
|
}
|
||||||
|
|
||||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||||
const formData = new FormData();
|
const formData = new FormData();
|
||||||
formData.append('document', file);
|
for (const file of files) {
|
||||||
|
formData.append('documents', file);
|
||||||
|
}
|
||||||
formData.append('useOcr', String(useOcr));
|
formData.append('useOcr', String(useOcr));
|
||||||
|
|
||||||
const response = await fetch(`${BACKEND_URL}/api/upload`, {
|
const response = await fetch(`${BACKEND_URL}/api/upload/batch`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
'Authorization': `Bearer ${auth.user.access_token}`,
|
'Authorization': `Bearer ${auth.user.access_token}`,
|
||||||
|
|
@ -758,13 +760,13 @@ export const useGame = (defaultGameConfig?: GameConfig) => {
|
||||||
|
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
const error = await response.json();
|
const error = await response.json();
|
||||||
throw new Error(error.error || 'Failed to upload document');
|
throw new Error(error.error || 'Failed to upload documents');
|
||||||
}
|
}
|
||||||
|
|
||||||
return response.json();
|
return response.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new Error('Failed to upload document after multiple retries');
|
throw new Error('Failed to upload documents after multiple retries');
|
||||||
};
|
};
|
||||||
|
|
||||||
const startQuizGen = async (options: {
|
const startQuizGen = async (options: {
|
||||||
|
|
@ -785,10 +787,7 @@ export const useGame = (defaultGameConfig?: GameConfig) => {
|
||||||
|
|
||||||
let documents: ProcessedDocument[] | undefined;
|
let documents: ProcessedDocument[] | undefined;
|
||||||
if (options.files && options.files.length > 0) {
|
if (options.files && options.files.length > 0) {
|
||||||
documents = [];
|
documents = await uploadDocuments(options.files, options.useOcr);
|
||||||
for (const file of options.files) {
|
|
||||||
documents.push(await uploadDocument(file, options.useOcr));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const generateOptions: GenerateQuizOptions = {
|
const generateOptions: GenerateQuizOptions = {
|
||||||
|
|
|
||||||
|
|
@ -149,6 +149,74 @@ router.post('/', upload.single('document'), async (req: AuthenticatedRequest, re
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const MAX_BATCH_FILES = 20;
|
||||||
|
|
||||||
|
router.post('/batch', upload.array('documents', MAX_BATCH_FILES), async (req: AuthenticatedRequest, res) => {
|
||||||
|
if (activeUploads >= MAX_CONCURRENT_UPLOADS && uploadQueue.length >= MAX_QUEUE_SIZE) {
|
||||||
|
return res.status(503).json({ error: 'Server busy processing uploads. Please try again shortly.' });
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await waitForSlot();
|
||||||
|
} catch {
|
||||||
|
return res.status(504).json({ error: 'Upload timed out waiting in queue. Please try again.' });
|
||||||
|
}
|
||||||
|
|
||||||
|
activeUploads++;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const files = req.files as Express.Multer.File[];
|
||||||
|
if (!files || files.length === 0) {
|
||||||
|
activeUploads--;
|
||||||
|
return res.status(400).json({ error: 'No files uploaded' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const useOcr = req.body?.useOcr === 'true' || req.body?.useOcr === true;
|
||||||
|
if (useOcr) {
|
||||||
|
const groups = req.user?.groups || [];
|
||||||
|
const hasGroupAccess = groups.includes('kaboot-ai-access');
|
||||||
|
const status = req.user ? getSubscriptionStatus(req.user.sub) : null;
|
||||||
|
const hasSubscriptionAccess = status?.status === 'active';
|
||||||
|
|
||||||
|
if (!hasGroupAccess && !hasSubscriptionAccess) {
|
||||||
|
return res.status(403).json({ error: 'OCR is available to Pro subscribers only.' });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = [];
|
||||||
|
for (const file of files) {
|
||||||
|
const normalizedMime = normalizeMimeType(file.mimetype, file.originalname);
|
||||||
|
const processed = await processDocument(file.buffer, normalizedMime, { useOcr });
|
||||||
|
|
||||||
|
if (processed.type === 'native') {
|
||||||
|
results.push({
|
||||||
|
type: 'native',
|
||||||
|
content: (processed.content as Buffer).toString('base64'),
|
||||||
|
mimeType: processed.mimeType,
|
||||||
|
originalName: file.originalname
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
results.push({
|
||||||
|
type: 'text',
|
||||||
|
content: processed.content as string,
|
||||||
|
mimeType: processed.mimeType,
|
||||||
|
originalName: file.originalname
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json(results);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Batch upload processing error:', error);
|
||||||
|
res.status(500).json({
|
||||||
|
error: error instanceof Error ? error.message : 'Failed to process documents'
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
activeUploads--;
|
||||||
|
drainQueue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
router.use((err: Error, _req: any, res: any, _next: any) => {
|
router.use((err: Error, _req: any, res: any, _next: any) => {
|
||||||
if (err instanceof multer.MulterError) {
|
if (err instanceof multer.MulterError) {
|
||||||
if (err.code === 'LIMIT_FILE_SIZE') {
|
if (err.code === 'LIMIT_FILE_SIZE') {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue