diff --git a/elastic/templates/elastic/upload.html b/elastic/templates/elastic/upload.html index e6dabf3..397f3e3 100644 --- a/elastic/templates/elastic/upload.html +++ b/elastic/templates/elastic/upload.html @@ -42,22 +42,33 @@ .preview-box {flex: 1; text-align: center; } .preview-box h3 {margin-top: 0;color: #334155; } .preview-box img { max-width: 100%;max-height: 300px;border: 1px solid #e2e8f0;border-radius: 8px;object-fit: contain;} - .preview-list {display: grid;grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));gap: 12px;} + .preview-list {display: grid;grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));gap: 12px; margin-top: 20px;} .preview-item {position: relative;} .preview-item img {width: 100%;max-height: 220px;border: 1px solid #e2e8f0;border-radius: 8px;object-fit: contain;} .preview-remove {position: absolute;top: 6px;right: 6px;border: none;border-radius: 999px;background: rgba(15,23,42,0.8);color: #fff;width: 24px;height: 24px;cursor: pointer;display: flex;align-items: center;justify-content: center;font-size: 14px;line-height: 1;} .result-box {flex: 1;} .result-box h3 { margin-top: 0; color: #334155;} .form-controls { display: flex;gap: 8px;margin-bottom: 12px;flex-wrap: wrap;} - #kvForm {border: 1px solid #e2e8f0; border-radius: 8px; padding: 12px; max-height: 400px; overflow: auto;margin-bottom: 12px;background: #f8fafc;} - .form-header { display: grid; grid-template-columns: 1fr 1fr auto; gap: 8px; margin-bottom: 8px; padding: 0 4px; font-weight: 600; color: #475569; font-size: 14px;} + .pending-item { background: #fff; border: 1px solid #e2e8f0; border-radius: 12px; padding: 20px; margin-bottom: 24px; box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1); } + .pending-item-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 16px; border-bottom: 1px solid #f1f5f9; padding-bottom: 12px; } + .pending-item-title { font-weight: 600; color: #1e293b; font-size: 16px; } + .pending-item-body { display: flex; gap: 20px; } + .pending-item-preview { flex: 0 0 240px; } + .pending-item-preview img { width: 100%; border-radius: 8px; border: 1px solid #f1f5f9; } + .pending-item-edit { flex: 1; } + .pending-item-footer { margin-top: 16px; text-align: right; } + @media (max-width: 992px) { + .pending-item-body { flex-direction: column; } + .pending-item-preview { flex: 0 0 auto; } + } .form-row {display: grid;grid-template-columns: 1fr 1fr auto;gap: 8px; margin-bottom: 6px; align-items: center;} .form-row input {padding: 8px;border: 1px solid #cbd5e1;border-radius: 4px; width: 100%; box-sizing: border-box;} - #resultBox { width: 100%;min-height: 200px;font-family: ui-monospace, SFMono-Regular, Menlo, monospace;font-size: 14px; padding: 12px; border: 1px solid #e2e8f0; - border-radius: 8px; resize: vertical;box-sizing: border-box; } + .kv-form-container {border: 1px solid #e2e8f0; border-radius: 8px; padding: 12px; max-height: 400px; overflow: auto; margin-bottom: 12px; background: #f8fafc;} + .form-header { display: grid; grid-template-columns: 1fr 1fr auto; gap: 8px; margin-bottom: 8px; padding: 0 4px; font-weight: 600; color: #475569; font-size: 14px;} + .result-textarea { width: 100%; min-height: 120px; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 13px; padding: 10px; border: 1px solid #e2e8f0; border-radius: 8px; resize: vertical; box-sizing: border-box; } .status-message { padding: 10px; margin: 10px 0; border-radius: 6px; display: none; } .status-message.success { background-color: #d4edda; color: #155724; border: 1px solid #c3e6cb; } - .status-message.error { background-color: #f8d7da;color: #721c24; border: 1px solid #f5c6cb; } + .status-message.error { background-color: #f8d7da; color: #721c24; border: 1px solid #f5c6cb; } .action-buttons { margin-top: 16px; display: flex; gap: 8px; flex-wrap: wrap; } .progress {position: relative; height: 12px; background: #e2e8f0; border-radius: 8px; overflow: hidden;} .progress-bar {height: 100%; width: 0; background: linear-gradient(90deg, #4f46e5 0%, #60a5fa 100%); transition: width .2s ease;} @@ -96,6 +107,7 @@ {% csrf_token %} +

@@ -107,19 +119,11 @@
-
-

图片预览

-
-
-
-

识别结果(可编辑)

-
- - +

待处理文件列表

+
+
-
-
@@ -142,20 +146,17 @@ const uploadForm = document.getElementById('uploadForm'); const fileInput = document.getElementById('fileInput'); const fileHint = document.getElementById('fileHint'); const previewList = document.getElementById('previewList'); -const resultBox = document.getElementById('resultBox'); +const pendingItems = document.getElementById('pendingItems'); const uploadMsg = document.getElementById('uploadMsg'); const confirmBtn = document.getElementById('confirmBtn'); const clearBtn = document.getElementById('clearBtn'); const confirmMsg = document.getElementById('confirmMsg'); -const kvForm = document.getElementById('kvForm'); -const addFieldBtn = document.getElementById('addFieldBtn'); -const syncFromTextBtn = document.getElementById('syncFromTextBtn'); const dropArea = document.getElementById('dropArea'); const progressWrap = document.getElementById('progressWrap'); const progressBar = document.getElementById('progressBar'); const progressText = document.getElementById('progressText'); -let currentImageRel = []; +let currentItems = []; // 存储当前待处理的所有文件结果 let selectedFiles = []; function setProgress(p, text){ @@ -251,10 +252,15 @@ function setPreviewList(urls) { const idx = Number(item.dataset.index); if (!Number.isNaN(idx)) { selectedFiles.splice(idx, 1); - const urls = selectedFiles.map(f => URL.createObjectURL(f)); + const urls = selectedFiles.map(f => { + if (f.name.toLowerCase().endsWith('.pdf')) { + return 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNlZjQ0NDQiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMTQgMmgyYTIgMiAwIDAgMSAyIDJ2MTZhMiAyIDAgMCAxLTIgMmgtMTJhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJoMiIvPjxwYXRoIGQ9Ik0xNCAydjRjMCAxLjEgLjkgMiAyIDJoNCIvPjxwYXRoIGQ9Ik03IDloNSIvPjxwYXRoIGQ9Ik03IDEzaDUiLz48cGF0aCBkPSJNNyAxN2g4Ii8+PC9zdmc+'; + } + return URL.createObjectURL(f); + }); setPreviewList(urls); updateFileHint(); - setTimeout(() => urls.forEach(u => URL.revokeObjectURL(u)), 0); + setTimeout(() => urls.forEach(u => { if (u.startsWith('blob:')) URL.revokeObjectURL(u); }), 0); } }; item.appendChild(img); @@ -265,7 +271,7 @@ function setPreviewList(urls) { function updateFileHint() { const count = selectedFiles.length; - fileHint.textContent = count ? `已选择 ${count} 张` : '未选择文件'; + fileHint.textContent = count ? `已选择 ${count} 个文件` : '未选择文件'; } function addFiles(files) { @@ -280,18 +286,13 @@ function addFiles(files) { }); const urls = selectedFiles.map(f => { if (f.name.toLowerCase().endsWith('.pdf')) { - // 使用一个简单的 SVG PDF 图标 Data URI - return 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNlZjQ0NDQiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMTQgMmgyYTIgMiAwIDAgMSAyIDJ2MTZhMiAyIDAgMCAxLTIgMmgtMTJhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJoMiIvPjxwYXRoIGQ9Ik0xNCAydjRjMCAxLjEgLjkgMiAyIDJoNCIvPjxwYXRoIGQ9Ik03IDloNSIvPjxwYXRoIGQ9Ik03IDEzaDUiLz48cGF0aCBkPSJNNyAxN2g4Ii8+PC9zdmc+'; + return 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNlZjQ0NDQiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMTQgMmgyYTIgMiAwIDAgMSAyIDJ2MTZhMiAyIDAgMCAxLTIgMmgtMTJhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJoMiIvPjxwYXRoIGQ9Ik0xNCAydjRjMCAxLjEgLjkgMiAyIDJoNCIvPjxwYXRoIGQ9Ik03IDloNSIvPjxwYXRoIGQ9Ik03IDEzaDUiLz48cGF0aCBkPSJNNyAxN2g4Ii8+PC9zdmc+'; } return URL.createObjectURL(f); }); setPreviewList(urls); updateFileHint(); - setTimeout(() => { - urls.forEach(u => { - if (u.startsWith('blob:')) URL.revokeObjectURL(u); - }); - }, 0); + setTimeout(() => urls.forEach(u => { if (u.startsWith('blob:')) URL.revokeObjectURL(u); }), 0); } fileInput.addEventListener('change', function(e) { @@ -299,7 +300,7 @@ fileInput.addEventListener('change', function(e) { fileInput.value = ''; }); -function createRow(k = '', v = '') { +function createKvRow(k = '', v = '', onInput) { const row = document.createElement('div'); row.className = 'form-row'; const keyInput = document.createElement('input'); @@ -314,93 +315,147 @@ function createRow(k = '', v = '') { delBtn.type = 'button'; delBtn.className = 'btn btn-danger'; delBtn.textContent = '删除'; + delBtn.onclick = () => { - if (kvForm.querySelectorAll('.form-row').length > 1) { - kvForm.removeChild(row); + const container = row.parentElement; + if (container.querySelectorAll('.form-row').length > 1) { + container.removeChild(row); } else { keyInput.value = ''; valInput.value = ''; } - syncTextarea(); + if (onInput) onInput(); }; - keyInput.oninput = syncTextarea; - valInput.oninput = syncTextarea; + + keyInput.oninput = onInput; + valInput.oninput = onInput; + row.appendChild(keyInput); row.appendChild(valInput); row.appendChild(delBtn); return row; } -function renderFormFromObject(obj) { - kvForm.innerHTML = ` -
-
字段名
-
字段值
-
操作
-
- `; - Object.keys(obj || {}).forEach(k => { - kvForm.appendChild(createRow(k, obj[k])); - }); - if (kvForm.children.length <= 1) kvForm.appendChild(createRow()); - syncTextarea(); -} +function renderPendingItems(items) { + pendingItems.innerHTML = ''; + currentItems = items; -function objectFromForm() { - const obj = {}; - Array.from(kvForm.children).forEach(row => { - if (row.classList.contains('form-header')) return; - const [kInput, vInput] = row.querySelectorAll('input'); - const k = (kInput.value || '').trim(); - if (!k) return; - const raw = vInput.value; - try { - obj[k] = JSON.parse(raw); - } catch (_) { - obj[k] = raw; + items.forEach((item, index) => { + const itemEl = document.createElement('div'); + itemEl.className = 'pending-item'; + + const header = document.createElement('div'); + header.className = 'pending-item-header'; + header.innerHTML = `${index + 1}. ${item.name}`; + + const removeBtn = document.createElement('button'); + removeBtn.className = 'btn btn-danger'; + removeBtn.textContent = '忽略此项'; + removeBtn.onclick = () => { + currentItems.splice(index, 1); + renderPendingItems(currentItems); + }; + header.appendChild(removeBtn); + + const body = document.createElement('div'); + body.className = 'pending-item-body'; + + const preview = document.createElement('div'); + preview.className = 'pending-item-preview'; + const mainImg = document.createElement('img'); + mainImg.src = item.image_urls[0]; + preview.appendChild(mainImg); + if (item.image_urls.length > 1) { + const hint = document.createElement('p'); + hint.className = 'muted'; + hint.style.textAlign = 'center'; + hint.textContent = `共 ${item.image_urls.length} 页`; + preview.appendChild(hint); } + + const edit = document.createElement('div'); + edit.className = 'pending-item-edit'; + + const controls = document.createElement('div'); + controls.className = 'form-controls'; + const addBtn = document.createElement('button'); + addBtn.className = 'btn btn-secondary'; + addBtn.textContent = '添加字段'; + const syncBtn = document.createElement('button'); + syncBtn.className = 'btn btn-secondary'; + syncBtn.textContent = '刷新表单'; + controls.appendChild(addBtn); + controls.appendChild(syncBtn); + + const kvForm = document.createElement('div'); + kvForm.className = 'kv-form-container'; + kvForm.innerHTML = '
字段名
字段值
操作
'; + + const textarea = document.createElement('textarea'); + textarea.className = 'result-textarea'; + + const syncData = () => { + const obj = {}; + kvForm.querySelectorAll('.form-row').forEach(row => { + const inputs = row.querySelectorAll('input'); + const k = inputs[0].value.trim(); + if (!k) return; + try { obj[k] = JSON.parse(inputs[1].value); } catch(e) { obj[k] = inputs[1].value; } + }); + item.data = obj; + textarea.value = JSON.stringify(obj, null, 2); + }; + + Object.entries(item.data).forEach(([k, v]) => { + kvForm.appendChild(createKvRow(k, v, syncData)); + }); + if (kvForm.querySelectorAll('.form-row').length === 0) { + kvForm.appendChild(createKvRow('', '', syncData)); + } + + addBtn.onclick = () => { + kvForm.appendChild(createKvRow('', '', syncData)); + syncData(); + }; + + syncBtn.onclick = () => { + try { + const obj = JSON.parse(textarea.value); + kvForm.innerHTML = '
字段名
字段值
操作
'; + Object.entries(obj).forEach(([k, v]) => kvForm.appendChild(createKvRow(k, v, syncData))); + item.data = obj; + } catch(e) { alert('JSON格式错误'); } + }; + + textarea.value = JSON.stringify(item.data, null, 2); + textarea.oninput = () => { item.data = JSON.parse(textarea.value); }; + + edit.appendChild(controls); + edit.appendChild(kvForm); + edit.appendChild(textarea); + + body.appendChild(preview); + body.appendChild(edit); + + itemEl.appendChild(header); + itemEl.appendChild(body); + pendingItems.appendChild(itemEl); }); - return obj; + + confirmBtn.disabled = items.length === 0; } -function syncTextarea() { - const obj = objectFromForm(); - resultBox.value = JSON.stringify(obj, null, 2); -} - -addFieldBtn.addEventListener('click', () => { - kvForm.appendChild(createRow()); - syncTextarea(); -}); - -syncFromTextBtn.addEventListener('click', () => { - try { - const obj = JSON.parse(resultBox.value || '{}'); - renderFormFromObject(obj); - uploadMsg.textContent = '已从文本区刷新表单'; - uploadMsg.className = 'status-message success'; - uploadMsg.style.display = 'block'; - setTimeout(() => { - uploadMsg.style.display = 'none'; - }, 2000); - } catch (e) { - uploadMsg.textContent = '文本区不是有效JSON'; - uploadMsg.className = 'status-message error'; - uploadMsg.style.display = 'block'; - } -}); - uploadForm.addEventListener('submit', async (e) => { e.preventDefault(); uploadMsg.textContent = ''; confirmMsg.textContent = ''; confirmBtn.disabled = true; - resultBox.value = ''; - currentImageRel = []; + previewList.innerHTML = ''; + pendingItems.innerHTML = ''; + currentItems = []; - const files = Array.from(selectedFiles || []).filter(f => f && (f.type.startsWith('image/') || f.name.toLowerCase().endsWith('.pdf'))); - if (!files.length) { - uploadMsg.textContent = '请选择图片或PDF文件'; + if (!selectedFiles.length) { + uploadMsg.textContent = '请选择文件'; uploadMsg.className = 'status-message error'; uploadMsg.style.display = 'block'; return; @@ -409,10 +464,10 @@ uploadForm.addEventListener('submit', async (e) => { showProgress(); setProgress(5, '预处理中'); const formData = new FormData(); - for (let i = 0; i < files.length; i++) { - const file = files[i]; + for (let i = 0; i < selectedFiles.length; i++) { + const file = selectedFiles[i]; if (file.type.startsWith('image/')) { - setProgress(5 + Math.round((i/files.length)*45), '转换图片'); + setProgress(5 + Math.round((i/selectedFiles.length)*45), '转换图片'); try { const jpegFile = await convertToJpeg(file); formData.append('file', jpegFile); @@ -420,7 +475,6 @@ uploadForm.addEventListener('submit', async (e) => { formData.append('file', file); } } else { - // PDF 直接添加 formData.append('file', file); } } @@ -431,7 +485,8 @@ uploadForm.addEventListener('submit', async (e) => { const timer = setInterval(() => { prog = Math.min(95, prog + 1); setProgress(prog, '识别中'); - }, 120); + }, 200); + const resp = await fetch('/elastic/upload/', { method: 'POST', credentials: 'same-origin', @@ -447,11 +502,8 @@ uploadForm.addEventListener('submit', async (e) => { uploadMsg.textContent = data.message || '识别成功'; uploadMsg.className = 'status-message success'; uploadMsg.style.display = 'block'; - const urls = data.image_urls || (data.image_url ? [data.image_url] : []); - setPreviewList(urls); - renderFormFromObject(data.data || {}); - currentImageRel = data.images || (data.image ? [data.image] : []); - confirmBtn.disabled = false; + + renderPendingItems(data.items || []); setTimeout(hideProgress, 800); } catch (e) { uploadMsg.textContent = e.message || '发生错误'; @@ -462,9 +514,14 @@ uploadForm.addEventListener('submit', async (e) => { }); confirmBtn.addEventListener('click', async () => { - confirmMsg.textContent = ''; + confirmMsg.textContent = '正在录入...'; try { - const edited = objectFromForm(); + const payload = { + items: currentItems.map(it => ({ + data: it.data, + image: it.images + })) + }; const resp = await fetch('/elastic/confirm/', { method: 'POST', credentials: 'same-origin', @@ -472,7 +529,7 @@ confirmBtn.addEventListener('click', async () => { 'Content-Type': 'application/json', 'X-CSRFToken': getCookie('csrftoken') || '' }, - body: JSON.stringify({ data: edited, image: currentImageRel }) + body: JSON.stringify(payload) }); const data = await resp.json(); if (!resp.ok || data.status !== 'success') { @@ -480,6 +537,12 @@ confirmBtn.addEventListener('click', async () => { } confirmMsg.textContent = data.message || '录入成功'; confirmMsg.style.color = '#179957'; + // 录入成功后清空待处理列表 + pendingItems.innerHTML = ''; + currentItems = []; + selectedFiles = []; + updateFileHint(); + confirmBtn.disabled = true; } catch (e) { confirmMsg.textContent = e.message || '发生错误'; confirmMsg.style.color = '#d14343'; @@ -489,13 +552,11 @@ confirmBtn.addEventListener('click', async () => { clearBtn.addEventListener('click', () => { fileInput.value = ''; previewList.innerHTML = ''; - resultBox.value = ''; - kvForm.innerHTML = ''; - kvForm.appendChild(createRow()); // 保留一个空行 + pendingItems.innerHTML = ''; uploadMsg.textContent = ''; confirmMsg.textContent = ''; confirmBtn.disabled = true; - currentImageRel = []; + currentItems = []; selectedFiles = []; updateFileHint(); }); diff --git a/elastic/views.py b/elastic/views.py index ea3a5ea..92c45b0 100644 --- a/elastic/views.py +++ b/elastic/views.py @@ -607,107 +607,96 @@ def upload(request): images_dir = os.path.join(settings.MEDIA_ROOT, "images") os.makedirs(images_dir, exist_ok=True) - rel_paths = [] - image_urls = [] - data_list = [] - # 预处理文件列表,处理PDF转换 - processed_files = [] + # 按照原始文件进行分组处理 + file_results = [] + for f in files: - if f.name.lower().endswith('.pdf'): + group_images = [] # 存储该文件生成的所有图片路径信息 (abs_path, filename) + is_pdf = f.name.lower().endswith('.pdf') + + if is_pdf: if not HAS_PDF_SUPPORT: return JsonResponse({"status": "error", "message": f"服务器未安装PDF处理组件(PyMuPDF): {PDF_ERROR}"}, status=500) - # 将PDF保存到临时文件 with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp: for chunk in f.chunks(): tmp.write(chunk) tmp_path = tmp.name try: - # 转换PDF为图片列表 doc = fitz.open(tmp_path) for i in range(len(doc)): page = doc.load_page(i) - # 降低 DPI 从 200 到 150,在保证准确率的同时显著减小图片体积,加快上传速度 - pix = page.get_pixmap(dpi=150) + pix = page.get_pixmap(dpi=150) img_filename = f"{uuid.uuid4()}_page_{i+1}.jpg" img_abs_path = os.path.join(images_dir, img_filename) pix.save(img_abs_path) - processed_files.append((img_abs_path, img_filename, f"{f.name}_p{i+1}")) + group_images.append((img_abs_path, img_filename)) doc.close() except Exception as e: - return JsonResponse({"status": "error", "message": f"PDF转换失败: {str(e)}"}, status=500) + return JsonResponse({"status": "error", "message": f"PDF {f.name} 转换失败: {str(e)}"}, status=500) finally: if os.path.exists(tmp_path): os.remove(tmp_path) else: - # 普通图片处理 filename = f"{uuid.uuid4()}_{f.name}" abs_path = os.path.join(images_dir, filename) with open(abs_path, "wb") as dst: for chunk in f.chunks(): dst.write(chunk) - processed_files.append((abs_path, filename, f.name)) + group_images.append((abs_path, filename)) - # 使用线程池并行调用 OCR 接口以提升速度 - def run_ocr(file_info): - abs_path, filename, original_name = file_info - try: - data = ocr_and_extract_info(abs_path) - return (data, filename) - except Exception as e: - return (e, filename) + # 对该组图片并行进行 OCR 识别 + def run_ocr(img_info): + abs_p, fname = img_info + try: + data = ocr_and_extract_info(abs_p) + return data + except Exception: + return None - rel_paths = [] - image_urls = [] - data_list = [] + group_data_list = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor: + futures = [executor.submit(run_ocr, img_info) for img_info in group_images] + for future in concurrent.futures.as_completed(futures): + res = future.result() + if res: + group_data_list.append(res) - # 限制最大线程数为 8,避免过多并发导致 API 限制或资源耗尽 - with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor: - future_to_file = {executor.submit(run_ocr, f_info): f_info for f_info in processed_files} - for future in concurrent.futures.as_completed(future_to_file): - result, filename = future.result() - if isinstance(result, Exception): - return JsonResponse({"status": "error", "message": f"文件 {filename} 识别失败: {str(result)}"}, status=500) - - if result: - data_list.append(result) - - rel_path = f"images/{filename}" - rel_paths.append(rel_path) - image_urls.append(request.build_absolute_uri(settings.MEDIA_URL + rel_path)) + # 合并该文件的多页识别结果 + merged_group_data = {} + for item in group_data_list: + if not isinstance(item, dict): continue + for k, v in item.items(): + key = str(k).strip() + if not key: continue + if key not in merged_group_data or merged_group_data.get(key) in (None, ''): + merged_group_data[key] = v + elif merged_group_data.get(key) != v: + base = key + idx = 2 + while f"{base}_{idx}" in merged_group_data: idx += 1 + merged_group_data[f"{base}_{idx}"] = v - if not data_list: - return JsonResponse({"status": "error", "message": "无法识别图片内容"}, status=400) + if not merged_group_data: + # 如果没识别到,至少保留一个空结构或者包含文件名的提示 + merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"} - merged = {} - for item in data_list: - if not isinstance(item, dict): - continue - for k, v in item.items(): - key = str(k).strip() - if not key: - continue - if key not in merged or merged.get(key) in (None, ''): - merged[key] = v - continue - if merged.get(key) == v: - continue - base = key - idx = 2 - while f"{base}_{idx}" in merged: - idx += 1 - merged[f"{base}_{idx}"] = v + rel_paths = [f"images/{img[1]}" for img in group_images] + image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths] + + file_results.append({ + "name": f.name, + "data": merged_group_data, + "images": rel_paths, + "image_urls": image_urls, + }) return JsonResponse({ "status": "success", - "message": "识别成功,请确认数据后点击录入", - "data": merged, - "images": rel_paths, - "image_urls": image_urls, - "image": rel_paths[0] if rel_paths else "", - "image_url": image_urls[0] if image_urls else "", + "message": f"成功处理 {len(file_results)} 个文件,请确认数据后点击录入", + "items": file_results, }) @@ -715,7 +704,6 @@ def upload(request): @require_http_methods(["POST"]) def confirm(request): if request.session.get("user_id") is None: - # 允许从payload中带入user_id作为后备(便于前端已知用户时继续操作) try: payload_for_uid = json.loads(request.body.decode("utf-8")) except Exception: @@ -732,77 +720,97 @@ def confirm(request): except json.JSONDecodeError: return JsonResponse({"status": "error", "message": "JSON无效"}, status=400) - edited = payload.get("data") or {} - image_rel = payload.get("image") or "" - if not isinstance(edited, dict) or not edited: - return JsonResponse({"status": "error", "message": "数据不能为空"}, status=400) + # 支持单项或批量入库 + items = payload.get("items") + if not items: + # 兼容旧版本单项入库 + items = [{ + "data": payload.get("data"), + "image": payload.get("image") + }] - ensure_type_in_list(edited.get("数据类型")) - image_ref_to_store = "" - temp_files_to_delete = [] - image_rels = _parse_image_refs(image_rel) - if image_rels: - images_dir = os.path.join(settings.MEDIA_ROOT, "images") - os.makedirs(images_dir, exist_ok=True) - image_refs = [] - for rel in image_rels: - src_abs = os.path.join(settings.MEDIA_ROOT, rel) - if not os.path.isfile(src_abs): - return JsonResponse({"status": "error", "message": "图片文件不存在"}, status=400) - webp_name = f"{uuid.uuid4().hex}.webp" - webp_abs = os.path.join(images_dir, webp_name) - try: - with Image.open(src_abs) as im: - if im.mode in ("RGBA", "LA", "P"): - im = im.convert("RGBA") - else: - im = im.convert("RGB") - im.save(webp_abs, format="WEBP", quality=80) - except Exception: + success_count = 0 + errors = [] + + for item in items: + edited = item.get("data") or {} + image_rel = item.get("image") or "" + + if not isinstance(edited, dict) or not edited: + errors.append("数据项不能为空") + continue + + ensure_type_in_list(edited.get("数据类型")) + image_ref_to_store = "" + temp_files_to_delete = [] + image_rels = _parse_image_refs(image_rel) + + if image_rels: + images_dir = os.path.join(settings.MEDIA_ROOT, "images") + os.makedirs(images_dir, exist_ok=True) + image_refs = [] + for rel in image_rels: + src_abs = os.path.join(settings.MEDIA_ROOT, rel) + if not os.path.isfile(src_abs): + errors.append(f"图片文件 {rel} 不存在") + continue + + webp_name = f"{uuid.uuid4().hex}.webp" + webp_abs = os.path.join(images_dir, webp_name) try: - if os.path.isfile(webp_abs): - os.remove(webp_abs) + with Image.open(src_abs) as im: + if im.mode in ("RGBA", "LA", "P"): + im = im.convert("RGBA") + else: + im = im.convert("RGB") + im.save(webp_abs, format="WEBP", quality=80) except Exception: - pass - return JsonResponse({"status": "error", "message": "图片转换WEBP失败"}, status=500) + errors.append(f"图片 {rel} 转换WEBP失败") + continue - try: - object_name = f"images/{webp_name}" - from minio_storage.minio_connect import upload_file, is_minio_configured - if is_minio_configured(): - upload_file(webp_abs, object_name, content_type="image/webp") - image_refs.append(f"minio:{object_name}") - temp_files_to_delete.extend([src_abs, webp_abs]) - else: - # Fallback to local storage - image_refs.append(f"local:{object_name}") - # In local case, we keep the webp file and only delete the original temporary file - temp_files_to_delete.append(src_abs) - except Exception as e: - return JsonResponse({"status": "error", "message": f"存储图片失败: {e}"}, status=500) - if len(image_refs) == 1: - image_ref_to_store = image_refs[0] - elif len(image_refs) > 1: - image_ref_to_store = json_to_string(image_refs) + try: + object_name = f"images/{webp_name}" + from minio_storage.minio_connect import upload_file, is_minio_configured + if is_minio_configured(): + upload_file(webp_abs, object_name, content_type="image/webp") + image_refs.append(f"minio:{object_name}") + temp_files_to_delete.extend([src_abs, webp_abs]) + else: + image_refs.append(f"local:{object_name}") + temp_files_to_delete.append(src_abs) + except Exception as e: + errors.append(f"存储图片 {rel} 失败: {e}") + continue + + if len(image_refs) == 1: + image_ref_to_store = image_refs[0] + elif len(image_refs) > 1: + image_ref_to_store = json_to_string(image_refs) - to_store = { - "writer_id": str(request.session.get("user_id")), - "data": json_to_string(edited), - "image": image_ref_to_store, - } + to_store = { + "writer_id": str(request.session.get("user_id")), + "data": json_to_string(edited), + "image": image_ref_to_store, + } - ok = insert_data(to_store) - if not ok: - return JsonResponse({"status": "error", "message": "写入ES失败"}, status=500) + ok = insert_data(to_store) + if ok: + success_count += 1 + # 清理临时文件 + for p in temp_files_to_delete: + if p and os.path.isfile(p): + try: os.remove(p) + except: pass + else: + errors.append("写入ES失败") - try: - for p in temp_files_to_delete: - if p and os.path.isfile(p): - os.remove(p) - except Exception: - pass - - return JsonResponse({"status": "success", "message": "数据录入成功", "data": edited}) + if success_count > 0: + msg = f"成功录入 {success_count} 条数据" + if errors: + msg += f" (遇到 {len(errors)} 个错误)" + return JsonResponse({"status": "success", "message": msg}) + else: + return JsonResponse({"status": "error", "message": "录入失败: " + "; ".join(errors[:3])}, status=500) @require_http_methods(["GET"])