diff --git a/elastic/templates/elastic/upload.html b/elastic/templates/elastic/upload.html
index e6dabf3..397f3e3 100644
--- a/elastic/templates/elastic/upload.html
+++ b/elastic/templates/elastic/upload.html
@@ -42,22 +42,33 @@
.preview-box {flex: 1; text-align: center; }
.preview-box h3 {margin-top: 0;color: #334155; }
.preview-box img { max-width: 100%;max-height: 300px;border: 1px solid #e2e8f0;border-radius: 8px;object-fit: contain;}
- .preview-list {display: grid;grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));gap: 12px;}
+ .preview-list {display: grid;grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));gap: 12px; margin-top: 20px;}
.preview-item {position: relative;}
.preview-item img {width: 100%;max-height: 220px;border: 1px solid #e2e8f0;border-radius: 8px;object-fit: contain;}
.preview-remove {position: absolute;top: 6px;right: 6px;border: none;border-radius: 999px;background: rgba(15,23,42,0.8);color: #fff;width: 24px;height: 24px;cursor: pointer;display: flex;align-items: center;justify-content: center;font-size: 14px;line-height: 1;}
.result-box {flex: 1;}
.result-box h3 { margin-top: 0; color: #334155;}
.form-controls { display: flex;gap: 8px;margin-bottom: 12px;flex-wrap: wrap;}
- #kvForm {border: 1px solid #e2e8f0; border-radius: 8px; padding: 12px; max-height: 400px; overflow: auto;margin-bottom: 12px;background: #f8fafc;}
- .form-header { display: grid; grid-template-columns: 1fr 1fr auto; gap: 8px; margin-bottom: 8px; padding: 0 4px; font-weight: 600; color: #475569; font-size: 14px;}
+ .pending-item { background: #fff; border: 1px solid #e2e8f0; border-radius: 12px; padding: 20px; margin-bottom: 24px; box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1); }
+ .pending-item-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 16px; border-bottom: 1px solid #f1f5f9; padding-bottom: 12px; }
+ .pending-item-title { font-weight: 600; color: #1e293b; font-size: 16px; }
+ .pending-item-body { display: flex; gap: 20px; }
+ .pending-item-preview { flex: 0 0 240px; }
+ .pending-item-preview img { width: 100%; border-radius: 8px; border: 1px solid #f1f5f9; }
+ .pending-item-edit { flex: 1; }
+ .pending-item-footer { margin-top: 16px; text-align: right; }
+ @media (max-width: 992px) {
+ .pending-item-body { flex-direction: column; }
+ .pending-item-preview { flex: 0 0 auto; }
+ }
.form-row {display: grid;grid-template-columns: 1fr 1fr auto;gap: 8px; margin-bottom: 6px; align-items: center;}
.form-row input {padding: 8px;border: 1px solid #cbd5e1;border-radius: 4px; width: 100%; box-sizing: border-box;}
- #resultBox { width: 100%;min-height: 200px;font-family: ui-monospace, SFMono-Regular, Menlo, monospace;font-size: 14px; padding: 12px; border: 1px solid #e2e8f0;
- border-radius: 8px; resize: vertical;box-sizing: border-box; }
+ .kv-form-container {border: 1px solid #e2e8f0; border-radius: 8px; padding: 12px; max-height: 400px; overflow: auto; margin-bottom: 12px; background: #f8fafc;}
+ .form-header { display: grid; grid-template-columns: 1fr 1fr auto; gap: 8px; margin-bottom: 8px; padding: 0 4px; font-weight: 600; color: #475569; font-size: 14px;}
+ .result-textarea { width: 100%; min-height: 120px; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 13px; padding: 10px; border: 1px solid #e2e8f0; border-radius: 8px; resize: vertical; box-sizing: border-box; }
.status-message { padding: 10px; margin: 10px 0; border-radius: 6px; display: none; }
.status-message.success { background-color: #d4edda; color: #155724; border: 1px solid #c3e6cb; }
- .status-message.error { background-color: #f8d7da;color: #721c24; border: 1px solid #f5c6cb; }
+ .status-message.error { background-color: #f8d7da; color: #721c24; border: 1px solid #f5c6cb; }
.action-buttons { margin-top: 16px; display: flex; gap: 8px; flex-wrap: wrap; }
.progress {position: relative; height: 12px; background: #e2e8f0; border-radius: 8px; overflow: hidden;}
.progress-bar {height: 100%; width: 0; background: linear-gradient(90deg, #4f46e5 0%, #60a5fa 100%); transition: width .2s ease;}
@@ -96,6 +107,7 @@
{% csrf_token %}
+
-
-
@@ -142,20 +146,17 @@ const uploadForm = document.getElementById('uploadForm');
const fileInput = document.getElementById('fileInput');
const fileHint = document.getElementById('fileHint');
const previewList = document.getElementById('previewList');
-const resultBox = document.getElementById('resultBox');
+const pendingItems = document.getElementById('pendingItems');
const uploadMsg = document.getElementById('uploadMsg');
const confirmBtn = document.getElementById('confirmBtn');
const clearBtn = document.getElementById('clearBtn');
const confirmMsg = document.getElementById('confirmMsg');
-const kvForm = document.getElementById('kvForm');
-const addFieldBtn = document.getElementById('addFieldBtn');
-const syncFromTextBtn = document.getElementById('syncFromTextBtn');
const dropArea = document.getElementById('dropArea');
const progressWrap = document.getElementById('progressWrap');
const progressBar = document.getElementById('progressBar');
const progressText = document.getElementById('progressText');
-let currentImageRel = [];
+let currentItems = []; // 存储当前待处理的所有文件结果
let selectedFiles = [];
function setProgress(p, text){
@@ -251,10 +252,15 @@ function setPreviewList(urls) {
const idx = Number(item.dataset.index);
if (!Number.isNaN(idx)) {
selectedFiles.splice(idx, 1);
- const urls = selectedFiles.map(f => URL.createObjectURL(f));
+ const urls = selectedFiles.map(f => {
+ if (f.name.toLowerCase().endsWith('.pdf')) {
+ return 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNlZjQ0NDQiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMTQgMmgyYTIgMiAwIDAgMSAyIDJ2MTZhMiAyIDAgMCAxLTIgMmgtMTJhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJoMiIvPjxwYXRoIGQ9Ik0xNCAydjRjMCAxLjEgLjkgMiAyIDJoNCIvPjxwYXRoIGQ9Ik03IDloNSIvPjxwYXRoIGQ9Ik03IDEzaDUiLz48cGF0aCBkPSJNNyAxN2g4Ii8+PC9zdmc+';
+ }
+ return URL.createObjectURL(f);
+ });
setPreviewList(urls);
updateFileHint();
- setTimeout(() => urls.forEach(u => URL.revokeObjectURL(u)), 0);
+ setTimeout(() => urls.forEach(u => { if (u.startsWith('blob:')) URL.revokeObjectURL(u); }), 0);
}
};
item.appendChild(img);
@@ -265,7 +271,7 @@ function setPreviewList(urls) {
function updateFileHint() {
const count = selectedFiles.length;
- fileHint.textContent = count ? `已选择 ${count} 张` : '未选择文件';
+ fileHint.textContent = count ? `已选择 ${count} 个文件` : '未选择文件';
}
function addFiles(files) {
@@ -280,18 +286,13 @@ function addFiles(files) {
});
const urls = selectedFiles.map(f => {
if (f.name.toLowerCase().endsWith('.pdf')) {
- // 使用一个简单的 SVG PDF 图标 Data URI
- return 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNlZjQ0NDQiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMTQgMmgyYTIgMiAwIDAgMSAyIDJ2MTZhMiAyIDAgMCAxLTIgMmgtMTJhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJoMiIvPjxwYXRoIGQ9Ik0xNCAydjRjMCAxLjEgLjkgMiAyIDJoNCIvPjxwYXRoIGQ9Ik03IDloNSIvPjxwYXRoIGQ9Ik03IDEzaDUiLz48cGF0aCBkPSJNNyAxN2g4Ii8+PC9zdmc+';
+ return 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNlZjQ0NDQiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMTQgMmgyYTIgMiAwIDAgMSAyIDJ2MTZhMiAyIDAgMCAxLTIgMmgtMTJhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJoMiIvPjxwYXRoIGQ9Ik0xNCAydjRjMCAxLjEgLjkgMiAyIDJoNCIvPjxwYXRoIGQ9Ik03IDloNSIvPjxwYXRoIGQ9Ik03IDEzaDUiLz48cGF0aCBkPSJNNyAxN2g4Ii8+PC9zdmc+';
}
return URL.createObjectURL(f);
});
setPreviewList(urls);
updateFileHint();
- setTimeout(() => {
- urls.forEach(u => {
- if (u.startsWith('blob:')) URL.revokeObjectURL(u);
- });
- }, 0);
+ setTimeout(() => urls.forEach(u => { if (u.startsWith('blob:')) URL.revokeObjectURL(u); }), 0);
}
fileInput.addEventListener('change', function(e) {
@@ -299,7 +300,7 @@ fileInput.addEventListener('change', function(e) {
fileInput.value = '';
});
-function createRow(k = '', v = '') {
+function createKvRow(k = '', v = '', onInput) {
const row = document.createElement('div');
row.className = 'form-row';
const keyInput = document.createElement('input');
@@ -314,93 +315,147 @@ function createRow(k = '', v = '') {
delBtn.type = 'button';
delBtn.className = 'btn btn-danger';
delBtn.textContent = '删除';
+
delBtn.onclick = () => {
- if (kvForm.querySelectorAll('.form-row').length > 1) {
- kvForm.removeChild(row);
+ const container = row.parentElement;
+ if (container.querySelectorAll('.form-row').length > 1) {
+ container.removeChild(row);
} else {
keyInput.value = '';
valInput.value = '';
}
- syncTextarea();
+ if (onInput) onInput();
};
- keyInput.oninput = syncTextarea;
- valInput.oninput = syncTextarea;
+
+ keyInput.oninput = onInput;
+ valInput.oninput = onInput;
+
row.appendChild(keyInput);
row.appendChild(valInput);
row.appendChild(delBtn);
return row;
}
-function renderFormFromObject(obj) {
- kvForm.innerHTML = `
-
- `;
- Object.keys(obj || {}).forEach(k => {
- kvForm.appendChild(createRow(k, obj[k]));
- });
- if (kvForm.children.length <= 1) kvForm.appendChild(createRow());
- syncTextarea();
-}
+function renderPendingItems(items) {
+ pendingItems.innerHTML = '';
+ currentItems = items;
-function objectFromForm() {
- const obj = {};
- Array.from(kvForm.children).forEach(row => {
- if (row.classList.contains('form-header')) return;
- const [kInput, vInput] = row.querySelectorAll('input');
- const k = (kInput.value || '').trim();
- if (!k) return;
- const raw = vInput.value;
- try {
- obj[k] = JSON.parse(raw);
- } catch (_) {
- obj[k] = raw;
+ items.forEach((item, index) => {
+ const itemEl = document.createElement('div');
+ itemEl.className = 'pending-item';
+
+ const header = document.createElement('div');
+ header.className = 'pending-item-header';
+ header.innerHTML = `
${index + 1}. ${item.name}`;
+
+ const removeBtn = document.createElement('button');
+ removeBtn.className = 'btn btn-danger';
+ removeBtn.textContent = '忽略此项';
+ removeBtn.onclick = () => {
+ currentItems.splice(index, 1);
+ renderPendingItems(currentItems);
+ };
+ header.appendChild(removeBtn);
+
+ const body = document.createElement('div');
+ body.className = 'pending-item-body';
+
+ const preview = document.createElement('div');
+ preview.className = 'pending-item-preview';
+ const mainImg = document.createElement('img');
+ mainImg.src = item.image_urls[0];
+ preview.appendChild(mainImg);
+ if (item.image_urls.length > 1) {
+ const hint = document.createElement('p');
+ hint.className = 'muted';
+ hint.style.textAlign = 'center';
+ hint.textContent = `共 ${item.image_urls.length} 页`;
+ preview.appendChild(hint);
}
+
+ const edit = document.createElement('div');
+ edit.className = 'pending-item-edit';
+
+ const controls = document.createElement('div');
+ controls.className = 'form-controls';
+ const addBtn = document.createElement('button');
+ addBtn.className = 'btn btn-secondary';
+ addBtn.textContent = '添加字段';
+ const syncBtn = document.createElement('button');
+ syncBtn.className = 'btn btn-secondary';
+ syncBtn.textContent = '刷新表单';
+ controls.appendChild(addBtn);
+ controls.appendChild(syncBtn);
+
+ const kvForm = document.createElement('div');
+ kvForm.className = 'kv-form-container';
+ kvForm.innerHTML = '';
+
+ const textarea = document.createElement('textarea');
+ textarea.className = 'result-textarea';
+
+ const syncData = () => {
+ const obj = {};
+ kvForm.querySelectorAll('.form-row').forEach(row => {
+ const inputs = row.querySelectorAll('input');
+ const k = inputs[0].value.trim();
+ if (!k) return;
+ try { obj[k] = JSON.parse(inputs[1].value); } catch(e) { obj[k] = inputs[1].value; }
+ });
+ item.data = obj;
+ textarea.value = JSON.stringify(obj, null, 2);
+ };
+
+ Object.entries(item.data).forEach(([k, v]) => {
+ kvForm.appendChild(createKvRow(k, v, syncData));
+ });
+ if (kvForm.querySelectorAll('.form-row').length === 0) {
+ kvForm.appendChild(createKvRow('', '', syncData));
+ }
+
+ addBtn.onclick = () => {
+ kvForm.appendChild(createKvRow('', '', syncData));
+ syncData();
+ };
+
+ syncBtn.onclick = () => {
+ try {
+ const obj = JSON.parse(textarea.value);
+ kvForm.innerHTML = '';
+ Object.entries(obj).forEach(([k, v]) => kvForm.appendChild(createKvRow(k, v, syncData)));
+ item.data = obj;
+ } catch(e) { alert('JSON格式错误'); }
+ };
+
+ textarea.value = JSON.stringify(item.data, null, 2);
+ textarea.oninput = () => { item.data = JSON.parse(textarea.value); };
+
+ edit.appendChild(controls);
+ edit.appendChild(kvForm);
+ edit.appendChild(textarea);
+
+ body.appendChild(preview);
+ body.appendChild(edit);
+
+ itemEl.appendChild(header);
+ itemEl.appendChild(body);
+ pendingItems.appendChild(itemEl);
});
- return obj;
+
+ confirmBtn.disabled = items.length === 0;
}
-function syncTextarea() {
- const obj = objectFromForm();
- resultBox.value = JSON.stringify(obj, null, 2);
-}
-
-addFieldBtn.addEventListener('click', () => {
- kvForm.appendChild(createRow());
- syncTextarea();
-});
-
-syncFromTextBtn.addEventListener('click', () => {
- try {
- const obj = JSON.parse(resultBox.value || '{}');
- renderFormFromObject(obj);
- uploadMsg.textContent = '已从文本区刷新表单';
- uploadMsg.className = 'status-message success';
- uploadMsg.style.display = 'block';
- setTimeout(() => {
- uploadMsg.style.display = 'none';
- }, 2000);
- } catch (e) {
- uploadMsg.textContent = '文本区不是有效JSON';
- uploadMsg.className = 'status-message error';
- uploadMsg.style.display = 'block';
- }
-});
-
uploadForm.addEventListener('submit', async (e) => {
e.preventDefault();
uploadMsg.textContent = '';
confirmMsg.textContent = '';
confirmBtn.disabled = true;
- resultBox.value = '';
- currentImageRel = [];
+ previewList.innerHTML = '';
+ pendingItems.innerHTML = '';
+ currentItems = [];
- const files = Array.from(selectedFiles || []).filter(f => f && (f.type.startsWith('image/') || f.name.toLowerCase().endsWith('.pdf')));
- if (!files.length) {
- uploadMsg.textContent = '请选择图片或PDF文件';
+ if (!selectedFiles.length) {
+ uploadMsg.textContent = '请选择文件';
uploadMsg.className = 'status-message error';
uploadMsg.style.display = 'block';
return;
@@ -409,10 +464,10 @@ uploadForm.addEventListener('submit', async (e) => {
showProgress();
setProgress(5, '预处理中');
const formData = new FormData();
- for (let i = 0; i < files.length; i++) {
- const file = files[i];
+ for (let i = 0; i < selectedFiles.length; i++) {
+ const file = selectedFiles[i];
if (file.type.startsWith('image/')) {
- setProgress(5 + Math.round((i/files.length)*45), '转换图片');
+ setProgress(5 + Math.round((i/selectedFiles.length)*45), '转换图片');
try {
const jpegFile = await convertToJpeg(file);
formData.append('file', jpegFile);
@@ -420,7 +475,6 @@ uploadForm.addEventListener('submit', async (e) => {
formData.append('file', file);
}
} else {
- // PDF 直接添加
formData.append('file', file);
}
}
@@ -431,7 +485,8 @@ uploadForm.addEventListener('submit', async (e) => {
const timer = setInterval(() => {
prog = Math.min(95, prog + 1);
setProgress(prog, '识别中');
- }, 120);
+ }, 200);
+
const resp = await fetch('/elastic/upload/', {
method: 'POST',
credentials: 'same-origin',
@@ -447,11 +502,8 @@ uploadForm.addEventListener('submit', async (e) => {
uploadMsg.textContent = data.message || '识别成功';
uploadMsg.className = 'status-message success';
uploadMsg.style.display = 'block';
- const urls = data.image_urls || (data.image_url ? [data.image_url] : []);
- setPreviewList(urls);
- renderFormFromObject(data.data || {});
- currentImageRel = data.images || (data.image ? [data.image] : []);
- confirmBtn.disabled = false;
+
+ renderPendingItems(data.items || []);
setTimeout(hideProgress, 800);
} catch (e) {
uploadMsg.textContent = e.message || '发生错误';
@@ -462,9 +514,14 @@ uploadForm.addEventListener('submit', async (e) => {
});
confirmBtn.addEventListener('click', async () => {
- confirmMsg.textContent = '';
+ confirmMsg.textContent = '正在录入...';
try {
- const edited = objectFromForm();
+ const payload = {
+ items: currentItems.map(it => ({
+ data: it.data,
+ image: it.images
+ }))
+ };
const resp = await fetch('/elastic/confirm/', {
method: 'POST',
credentials: 'same-origin',
@@ -472,7 +529,7 @@ confirmBtn.addEventListener('click', async () => {
'Content-Type': 'application/json',
'X-CSRFToken': getCookie('csrftoken') || ''
},
- body: JSON.stringify({ data: edited, image: currentImageRel })
+ body: JSON.stringify(payload)
});
const data = await resp.json();
if (!resp.ok || data.status !== 'success') {
@@ -480,6 +537,12 @@ confirmBtn.addEventListener('click', async () => {
}
confirmMsg.textContent = data.message || '录入成功';
confirmMsg.style.color = '#179957';
+ // 录入成功后清空待处理列表
+ pendingItems.innerHTML = '';
+ currentItems = [];
+ selectedFiles = [];
+ updateFileHint();
+ confirmBtn.disabled = true;
} catch (e) {
confirmMsg.textContent = e.message || '发生错误';
confirmMsg.style.color = '#d14343';
@@ -489,13 +552,11 @@ confirmBtn.addEventListener('click', async () => {
clearBtn.addEventListener('click', () => {
fileInput.value = '';
previewList.innerHTML = '';
- resultBox.value = '';
- kvForm.innerHTML = '';
- kvForm.appendChild(createRow()); // 保留一个空行
+ pendingItems.innerHTML = '';
uploadMsg.textContent = '';
confirmMsg.textContent = '';
confirmBtn.disabled = true;
- currentImageRel = [];
+ currentItems = [];
selectedFiles = [];
updateFileHint();
});
diff --git a/elastic/views.py b/elastic/views.py
index ea3a5ea..92c45b0 100644
--- a/elastic/views.py
+++ b/elastic/views.py
@@ -607,107 +607,96 @@ def upload(request):
images_dir = os.path.join(settings.MEDIA_ROOT, "images")
os.makedirs(images_dir, exist_ok=True)
- rel_paths = []
- image_urls = []
- data_list = []
- # 预处理文件列表,处理PDF转换
- processed_files = []
+ # 按照原始文件进行分组处理
+ file_results = []
+
for f in files:
- if f.name.lower().endswith('.pdf'):
+ group_images = [] # 存储该文件生成的所有图片路径信息 (abs_path, filename)
+ is_pdf = f.name.lower().endswith('.pdf')
+
+ if is_pdf:
if not HAS_PDF_SUPPORT:
return JsonResponse({"status": "error", "message": f"服务器未安装PDF处理组件(PyMuPDF): {PDF_ERROR}"}, status=500)
- # 将PDF保存到临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
for chunk in f.chunks():
tmp.write(chunk)
tmp_path = tmp.name
try:
- # 转换PDF为图片列表
doc = fitz.open(tmp_path)
for i in range(len(doc)):
page = doc.load_page(i)
- # 降低 DPI 从 200 到 150,在保证准确率的同时显著减小图片体积,加快上传速度
- pix = page.get_pixmap(dpi=150)
+ pix = page.get_pixmap(dpi=150)
img_filename = f"{uuid.uuid4()}_page_{i+1}.jpg"
img_abs_path = os.path.join(images_dir, img_filename)
pix.save(img_abs_path)
- processed_files.append((img_abs_path, img_filename, f"{f.name}_p{i+1}"))
+ group_images.append((img_abs_path, img_filename))
doc.close()
except Exception as e:
- return JsonResponse({"status": "error", "message": f"PDF转换失败: {str(e)}"}, status=500)
+ return JsonResponse({"status": "error", "message": f"PDF {f.name} 转换失败: {str(e)}"}, status=500)
finally:
if os.path.exists(tmp_path):
os.remove(tmp_path)
else:
- # 普通图片处理
filename = f"{uuid.uuid4()}_{f.name}"
abs_path = os.path.join(images_dir, filename)
with open(abs_path, "wb") as dst:
for chunk in f.chunks():
dst.write(chunk)
- processed_files.append((abs_path, filename, f.name))
+ group_images.append((abs_path, filename))
- # 使用线程池并行调用 OCR 接口以提升速度
- def run_ocr(file_info):
- abs_path, filename, original_name = file_info
- try:
- data = ocr_and_extract_info(abs_path)
- return (data, filename)
- except Exception as e:
- return (e, filename)
+ # 对该组图片并行进行 OCR 识别
+ def run_ocr(img_info):
+ abs_p, fname = img_info
+ try:
+ data = ocr_and_extract_info(abs_p)
+ return data
+ except Exception:
+ return None
- rel_paths = []
- image_urls = []
- data_list = []
+ group_data_list = []
+ with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor:
+ futures = [executor.submit(run_ocr, img_info) for img_info in group_images]
+ for future in concurrent.futures.as_completed(futures):
+ res = future.result()
+ if res:
+ group_data_list.append(res)
- # 限制最大线程数为 8,避免过多并发导致 API 限制或资源耗尽
- with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
- future_to_file = {executor.submit(run_ocr, f_info): f_info for f_info in processed_files}
- for future in concurrent.futures.as_completed(future_to_file):
- result, filename = future.result()
- if isinstance(result, Exception):
- return JsonResponse({"status": "error", "message": f"文件 {filename} 识别失败: {str(result)}"}, status=500)
-
- if result:
- data_list.append(result)
-
- rel_path = f"images/{filename}"
- rel_paths.append(rel_path)
- image_urls.append(request.build_absolute_uri(settings.MEDIA_URL + rel_path))
+ # 合并该文件的多页识别结果
+ merged_group_data = {}
+ for item in group_data_list:
+ if not isinstance(item, dict): continue
+ for k, v in item.items():
+ key = str(k).strip()
+ if not key: continue
+ if key not in merged_group_data or merged_group_data.get(key) in (None, ''):
+ merged_group_data[key] = v
+ elif merged_group_data.get(key) != v:
+ base = key
+ idx = 2
+ while f"{base}_{idx}" in merged_group_data: idx += 1
+ merged_group_data[f"{base}_{idx}"] = v
- if not data_list:
- return JsonResponse({"status": "error", "message": "无法识别图片内容"}, status=400)
+ if not merged_group_data:
+ # 如果没识别到,至少保留一个空结构或者包含文件名的提示
+ merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"}
- merged = {}
- for item in data_list:
- if not isinstance(item, dict):
- continue
- for k, v in item.items():
- key = str(k).strip()
- if not key:
- continue
- if key not in merged or merged.get(key) in (None, ''):
- merged[key] = v
- continue
- if merged.get(key) == v:
- continue
- base = key
- idx = 2
- while f"{base}_{idx}" in merged:
- idx += 1
- merged[f"{base}_{idx}"] = v
+ rel_paths = [f"images/{img[1]}" for img in group_images]
+ image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]
+
+ file_results.append({
+ "name": f.name,
+ "data": merged_group_data,
+ "images": rel_paths,
+ "image_urls": image_urls,
+ })
return JsonResponse({
"status": "success",
- "message": "识别成功,请确认数据后点击录入",
- "data": merged,
- "images": rel_paths,
- "image_urls": image_urls,
- "image": rel_paths[0] if rel_paths else "",
- "image_url": image_urls[0] if image_urls else "",
+ "message": f"成功处理 {len(file_results)} 个文件,请确认数据后点击录入",
+ "items": file_results,
})
@@ -715,7 +704,6 @@ def upload(request):
@require_http_methods(["POST"])
def confirm(request):
if request.session.get("user_id") is None:
- # 允许从payload中带入user_id作为后备(便于前端已知用户时继续操作)
try:
payload_for_uid = json.loads(request.body.decode("utf-8"))
except Exception:
@@ -732,77 +720,97 @@ def confirm(request):
except json.JSONDecodeError:
return JsonResponse({"status": "error", "message": "JSON无效"}, status=400)
- edited = payload.get("data") or {}
- image_rel = payload.get("image") or ""
- if not isinstance(edited, dict) or not edited:
- return JsonResponse({"status": "error", "message": "数据不能为空"}, status=400)
+ # 支持单项或批量入库
+ items = payload.get("items")
+ if not items:
+ # 兼容旧版本单项入库
+ items = [{
+ "data": payload.get("data"),
+ "image": payload.get("image")
+ }]
- ensure_type_in_list(edited.get("数据类型"))
- image_ref_to_store = ""
- temp_files_to_delete = []
- image_rels = _parse_image_refs(image_rel)
- if image_rels:
- images_dir = os.path.join(settings.MEDIA_ROOT, "images")
- os.makedirs(images_dir, exist_ok=True)
- image_refs = []
- for rel in image_rels:
- src_abs = os.path.join(settings.MEDIA_ROOT, rel)
- if not os.path.isfile(src_abs):
- return JsonResponse({"status": "error", "message": "图片文件不存在"}, status=400)
- webp_name = f"{uuid.uuid4().hex}.webp"
- webp_abs = os.path.join(images_dir, webp_name)
- try:
- with Image.open(src_abs) as im:
- if im.mode in ("RGBA", "LA", "P"):
- im = im.convert("RGBA")
- else:
- im = im.convert("RGB")
- im.save(webp_abs, format="WEBP", quality=80)
- except Exception:
+ success_count = 0
+ errors = []
+
+ for item in items:
+ edited = item.get("data") or {}
+ image_rel = item.get("image") or ""
+
+ if not isinstance(edited, dict) or not edited:
+ errors.append("数据项不能为空")
+ continue
+
+ ensure_type_in_list(edited.get("数据类型"))
+ image_ref_to_store = ""
+ temp_files_to_delete = []
+ image_rels = _parse_image_refs(image_rel)
+
+ if image_rels:
+ images_dir = os.path.join(settings.MEDIA_ROOT, "images")
+ os.makedirs(images_dir, exist_ok=True)
+ image_refs = []
+ for rel in image_rels:
+ src_abs = os.path.join(settings.MEDIA_ROOT, rel)
+ if not os.path.isfile(src_abs):
+ errors.append(f"图片文件 {rel} 不存在")
+ continue
+
+ webp_name = f"{uuid.uuid4().hex}.webp"
+ webp_abs = os.path.join(images_dir, webp_name)
try:
- if os.path.isfile(webp_abs):
- os.remove(webp_abs)
+ with Image.open(src_abs) as im:
+ if im.mode in ("RGBA", "LA", "P"):
+ im = im.convert("RGBA")
+ else:
+ im = im.convert("RGB")
+ im.save(webp_abs, format="WEBP", quality=80)
except Exception:
- pass
- return JsonResponse({"status": "error", "message": "图片转换WEBP失败"}, status=500)
+ errors.append(f"图片 {rel} 转换WEBP失败")
+ continue
- try:
- object_name = f"images/{webp_name}"
- from minio_storage.minio_connect import upload_file, is_minio_configured
- if is_minio_configured():
- upload_file(webp_abs, object_name, content_type="image/webp")
- image_refs.append(f"minio:{object_name}")
- temp_files_to_delete.extend([src_abs, webp_abs])
- else:
- # Fallback to local storage
- image_refs.append(f"local:{object_name}")
- # In local case, we keep the webp file and only delete the original temporary file
- temp_files_to_delete.append(src_abs)
- except Exception as e:
- return JsonResponse({"status": "error", "message": f"存储图片失败: {e}"}, status=500)
- if len(image_refs) == 1:
- image_ref_to_store = image_refs[0]
- elif len(image_refs) > 1:
- image_ref_to_store = json_to_string(image_refs)
+ try:
+ object_name = f"images/{webp_name}"
+ from minio_storage.minio_connect import upload_file, is_minio_configured
+ if is_minio_configured():
+ upload_file(webp_abs, object_name, content_type="image/webp")
+ image_refs.append(f"minio:{object_name}")
+ temp_files_to_delete.extend([src_abs, webp_abs])
+ else:
+ image_refs.append(f"local:{object_name}")
+ temp_files_to_delete.append(src_abs)
+ except Exception as e:
+ errors.append(f"存储图片 {rel} 失败: {e}")
+ continue
+
+ if len(image_refs) == 1:
+ image_ref_to_store = image_refs[0]
+ elif len(image_refs) > 1:
+ image_ref_to_store = json_to_string(image_refs)
- to_store = {
- "writer_id": str(request.session.get("user_id")),
- "data": json_to_string(edited),
- "image": image_ref_to_store,
- }
+ to_store = {
+ "writer_id": str(request.session.get("user_id")),
+ "data": json_to_string(edited),
+ "image": image_ref_to_store,
+ }
- ok = insert_data(to_store)
- if not ok:
- return JsonResponse({"status": "error", "message": "写入ES失败"}, status=500)
+ ok = insert_data(to_store)
+ if ok:
+ success_count += 1
+ # 清理临时文件
+ for p in temp_files_to_delete:
+ if p and os.path.isfile(p):
+ try: os.remove(p)
+ except: pass
+ else:
+ errors.append("写入ES失败")
- try:
- for p in temp_files_to_delete:
- if p and os.path.isfile(p):
- os.remove(p)
- except Exception:
- pass
-
- return JsonResponse({"status": "success", "message": "数据录入成功", "data": edited})
+ if success_count > 0:
+ msg = f"成功录入 {success_count} 条数据"
+ if errors:
+ msg += f" (遇到 {len(errors)} 个错误)"
+ return JsonResponse({"status": "success", "message": msg})
+ else:
+ return JsonResponse({"status": "error", "message": "录入失败: " + "; ".join(errors[:3])}, status=500)
@require_http_methods(["GET"])