From abc435afe6f24b927a68040c1247af607036f5b4 Mon Sep 17 00:00:00 2001 From: Viajero-tect <2737079298@qq.com> Date: Sat, 21 Feb 2026 16:35:06 +0800 Subject: [PATCH] =?UTF-8?q?=E7=89=88=E6=9C=AC=E6=9B=B4=E6=96=B0=EF=BC=9A?= =?UTF-8?q?=201=E3=80=81=E5=B7=B2=E5=AE=9E=E7=8E=B0=E5=A4=9A=E5=9B=BE?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E5=B9=B6=E5=85=A5=E5=BA=93=202=E3=80=81?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=9B=BE=E7=89=87=E4=B8=8A=E4=BC=A0=E6=97=B6?= =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=9B=BE=E7=89=87=E5=8A=9F=E8=83=BD=203?= =?UTF-8?q?=E3=80=81=E6=94=B9=E7=94=A8=E6=A8=A1=E5=9E=8Bglm-4.6v=E9=A2=84?= =?UTF-8?q?=E8=AE=A15=E6=9C=88=E4=BB=BD=E5=88=B0=E6=9C=9F=204=E3=80=81?= =?UTF-8?q?=E5=B7=B2=E5=AF=B9=E7=8E=AF=E5=A2=83txt=E5=81=9A=E6=9B=B4?= =?UTF-8?q?=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- elastic/templates/elastic/manage.html | 8 +- elastic/templates/elastic/upload.html | 123 +++++++++++---- elastic/views.py | 219 +++++++++++++++++++------- requirements.txt | 1 + 4 files changed, 257 insertions(+), 94 deletions(-) diff --git a/elastic/templates/elastic/manage.html b/elastic/templates/elastic/manage.html index 59041d9..6d7b160 100644 --- a/elastic/templates/elastic/manage.html +++ b/elastic/templates/elastic/manage.html @@ -328,7 +328,7 @@ function renderTable(data) { row.innerHTML = ` ${item._id || item.id || ''} - ${item.image_url ? `` : '无图片'} +
${buildImageCell(item)}
${escapeHtml(displayData)}
@@ -343,6 +343,12 @@ function renderTable(data) { }); } +function buildImageCell(item) { + const urls = Array.isArray(item.image_urls) ? item.image_urls : (item.image_url ? [item.image_url] : []); + if (!urls.length) return '无图片'; + return urls.map(u => ``).join(''); +} + // 转义HTML以防止XSS function escapeHtml(unsafe) { return unsafe diff --git a/elastic/templates/elastic/upload.html b/elastic/templates/elastic/upload.html index a4a443e..c7539df 100644 --- a/elastic/templates/elastic/upload.html +++ b/elastic/templates/elastic/upload.html @@ -42,6 +42,10 @@ .preview-box {flex: 1; text-align: center; } .preview-box h3 {margin-top: 0;color: #334155; } .preview-box img { max-width: 100%;max-height: 300px;border: 1px solid #e2e8f0;border-radius: 8px;object-fit: contain;} + .preview-list {display: grid;grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));gap: 12px;} + .preview-item {position: relative;} + .preview-item img {width: 100%;max-height: 220px;border: 1px solid #e2e8f0;border-radius: 8px;object-fit: contain;} + .preview-remove {position: absolute;top: 6px;right: 6px;border: none;border-radius: 999px;background: rgba(15,23,42,0.8);color: #fff;width: 24px;height: 24px;cursor: pointer;display: flex;align-items: center;justify-content: center;font-size: 14px;line-height: 1;} .result-box {flex: 1;} .result-box h3 { margin-top: 0; color: #334155;} .form-controls { display: flex;gap: 8px;margin-bottom: 12px;flex-wrap: wrap;} @@ -89,7 +93,8 @@

点击下方按钮选择图片,或拖拽图片到此区域

{% csrf_token %} - + +
@@ -103,7 +108,7 @@

图片预览

- 预览 +
@@ -134,7 +139,8 @@ function getCookie(name) { const uploadForm = document.getElementById('uploadForm'); const fileInput = document.getElementById('fileInput'); -const preview = document.getElementById('preview'); +const fileHint = document.getElementById('fileHint'); +const previewList = document.getElementById('previewList'); const resultBox = document.getElementById('resultBox'); const uploadMsg = document.getElementById('uploadMsg'); const confirmBtn = document.getElementById('confirmBtn'); @@ -148,7 +154,8 @@ const progressWrap = document.getElementById('progressWrap'); const progressBar = document.getElementById('progressBar'); const progressText = document.getElementById('progressText'); -let currentImageRel = ''; +let currentImageRel = []; +let selectedFiles = []; function setProgress(p, text){ const v = Math.max(0, Math.min(100, Math.round(p||0))); @@ -221,22 +228,64 @@ function handleDrop(e) { const dt = e.dataTransfer; const files = dt.files; if (files.length) { - fileInput.files = files; - const event = new Event('change', { bubbles: true }); - fileInput.dispatchEvent(event); + addFiles(files); } } -// 文件选择后预览 -fileInput.addEventListener('change', function(e) { - const file = e.target.files[0]; - if (file && file.type.startsWith('image/')) { - const reader = new FileReader(); - reader.onload = function(e) { - preview.src = e.target.result; +function setPreviewList(urls) { + previewList.innerHTML = ''; + (urls || []).forEach((url, index) => { + if (!url) return; + const item = document.createElement('div'); + item.className = 'preview-item'; + item.dataset.index = String(index); + const img = document.createElement('img'); + img.src = url; + img.alt = '预览'; + const btn = document.createElement('button'); + btn.type = 'button'; + btn.className = 'preview-remove'; + btn.textContent = '×'; + btn.onclick = () => { + const idx = Number(item.dataset.index); + if (!Number.isNaN(idx)) { + selectedFiles.splice(idx, 1); + const urls = selectedFiles.map(f => URL.createObjectURL(f)); + setPreviewList(urls); + updateFileHint(); + setTimeout(() => urls.forEach(u => URL.revokeObjectURL(u)), 0); + } }; - reader.readAsDataURL(file); - } + item.appendChild(img); + item.appendChild(btn); + previewList.appendChild(item); + }); +} + +function updateFileHint() { + const count = selectedFiles.length; + fileHint.textContent = count ? `已选择 ${count} 张` : '未选择文件'; +} + +function addFiles(files) { + const incoming = Array.from(files || []).filter(f => f && f.type.startsWith('image/')); + const existingKeys = new Set(selectedFiles.map(f => `${f.name}|${f.size}|${f.lastModified}`)); + incoming.forEach(f => { + const key = `${f.name}|${f.size}|${f.lastModified}`; + if (!existingKeys.has(key)) { + existingKeys.add(key); + selectedFiles.push(f); + } + }); + const urls = selectedFiles.map(f => URL.createObjectURL(f)); + setPreviewList(urls); + updateFileHint(); + setTimeout(() => urls.forEach(u => URL.revokeObjectURL(u)), 0); +} + +fileInput.addEventListener('change', function(e) { + addFiles(e.target.files || []); + fileInput.value = ''; }); function createRow(k = '', v = '') { @@ -329,10 +378,10 @@ uploadForm.addEventListener('submit', async (e) => { confirmMsg.textContent = ''; confirmBtn.disabled = true; resultBox.value = ''; - currentImageRel = ''; + currentImageRel = []; - const file = fileInput.files[0]; - if (!file) { + const files = Array.from(selectedFiles || []).filter(f => f && f.type.startsWith('image/')); + if (!files.length) { uploadMsg.textContent = '请选择图片文件'; uploadMsg.className = 'status-message error'; uploadMsg.style.display = 'block'; @@ -341,17 +390,21 @@ uploadForm.addEventListener('submit', async (e) => { showProgress(); setProgress(5, '转换为JPG'); - let jpegFile = file; - try { - jpegFile = await convertToJpeg(file); - setProgress(50, '转换为JPG'); - preview.src = URL.createObjectURL(jpegFile); - } catch (_) { - jpegFile = file; - setProgress(50, '转换为JPG'); - } const formData = new FormData(); - formData.append('file', jpegFile); + const converted = []; + for (let i = 0; i < files.length; i++) { + const file = files[i]; + let jpegFile = file; + try { + jpegFile = await convertToJpeg(file); + } catch (_) { + jpegFile = file; + } + converted.push(jpegFile); + const pct = 5 + Math.round(((i + 1) / files.length) * 45); + setProgress(pct, '转换为JPG'); + } + converted.forEach(f => formData.append('file', f)); try { let prog = 50; @@ -375,9 +428,10 @@ uploadForm.addEventListener('submit', async (e) => { uploadMsg.textContent = data.message || '识别成功'; uploadMsg.className = 'status-message success'; uploadMsg.style.display = 'block'; - preview.src = data.image_url; + const urls = data.image_urls || (data.image_url ? [data.image_url] : []); + setPreviewList(urls); renderFormFromObject(data.data || {}); - currentImageRel = data.image; + currentImageRel = data.images || (data.image ? [data.image] : []); confirmBtn.disabled = false; setTimeout(hideProgress, 800); } catch (e) { @@ -415,15 +469,20 @@ confirmBtn.addEventListener('click', async () => { clearBtn.addEventListener('click', () => { fileInput.value = ''; - preview.src = ''; + previewList.innerHTML = ''; resultBox.value = ''; kvForm.innerHTML = ''; kvForm.appendChild(createRow()); // 保留一个空行 uploadMsg.textContent = ''; confirmMsg.textContent = ''; confirmBtn.disabled = true; + currentImageRel = []; + selectedFiles = []; + updateFileHint(); }); +updateFileHint(); + // 退出登录处理 document.getElementById('logoutBtn').addEventListener('click', async () => { const msg = document.getElementById('logoutMsg'); diff --git a/elastic/views.py b/elastic/views.py index 8048e9d..3454adb 100644 --- a/elastic/views.py +++ b/elastic/views.py @@ -42,6 +42,29 @@ def _image_ref_to_url(request, image_ref: str) -> str: return '' +def _parse_image_refs(image_ref): + if not image_ref: + return [] + if isinstance(image_ref, (list, tuple)): + return [str(x) for x in image_ref if str(x).strip()] + if isinstance(image_ref, str): + s = image_ref.strip() + if not s: + return [] + parsed = None + if s[:1] in ('[', '"'): + try: + parsed = json.loads(s) + except Exception: + parsed = None + if isinstance(parsed, list): + return [str(x) for x in parsed if str(x).strip()] + if isinstance(parsed, str): + s = parsed.strip() + return [s] if s else [] + return [] + + def _attach_image_urls(request, items): out = [] for it in list(items or []): @@ -49,7 +72,11 @@ def _attach_image_urls(request, items): d = dict(it or {}) except Exception: continue - d['image_url'] = _image_ref_to_url(request, d.get('image', '')) + refs = _parse_image_refs(d.get('image', '')) + urls = [_image_ref_to_url(request, r) for r in refs if str(r).strip()] + urls = [u for u in urls if u] + d['image_urls'] = urls + d['image_url'] = urls[0] if urls else _image_ref_to_url(request, d.get('image', '')) out.append(d) return out @@ -180,7 +207,11 @@ def update_data(request, doc_id): if "writer_id" in payload: updated["writer_id"] = payload["writer_id"] if "image" in payload: - updated["image"] = payload["image"] + img_val = payload["image"] + if isinstance(img_val, list): + updated["image"] = json_to_string(img_val) + else: + updated["image"] = img_val if "data" in payload: v = payload["data"] if isinstance(v, dict): @@ -359,7 +390,7 @@ def string_to_json(s): # 移植自 a.py 的核心:调用大模型进行 OCR/信息抽取 def ocr_and_extract_info(image_path: str): - from openai import OpenAI + # from openai import OpenAI def encode_image(path: str) -> str: with open(path, "rb") as f: return base64.b64encode(f.read()).decode("utf-8") @@ -372,12 +403,42 @@ def ocr_and_extract_info(image_path: str): # raise RuntimeError("缺少 AISTUDIO_API_KEY,请在环境变量或 settings 中配置") - api_key = getattr(settings, "AISTUDIO_API_KEY", "") - base_url = getattr(settings, "OPENAI_BASE_URL", "") - if not api_key or not base_url: - raise RuntimeError("缺少模型服务配置,请设置 AISTUDIO_API_KEY 与 OPENAI_BASE_URL") - client = OpenAI(api_key=api_key, base_url=base_url) + # api_key = getattr(settings, "AISTUDIO_API_KEY", "") + # base_url = getattr(settings, "OPENAI_BASE_URL", "") + # if not api_key or not base_url: + # raise RuntimeError("缺少模型服务配置,请设置 AISTUDIO_API_KEY 与 OPENAI_BASE_URL") + # client = OpenAI(api_key=api_key, base_url=base_url) + # types = get_type_list() + # chat_completion = client.chat.completions.create( + # messages=[ + # {"role": "system", "content": "你是一个能理解图片和文本的助手,请根据用户提供的信息进行回答。"}, + # { + # "role": "user", + # "content": [ + # {"type": "text", "text": f"请识别这张图片中的信息,将你认为重要的数据转换为不包含嵌套的json,不要显示其它信息以便于解析,直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型,除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定,请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型,确定不包含后你才可以填入你觉得合适的大致分类。"}, + # {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}, + # ], + # }, + # ], + # model="glm-5", + # ) + # response_text = chat_completion.choices[0].message.content + from zai import ZhipuAiClient + import httpx + # api_key = ( + # getattr(settings, "ZHIPU_API_KEY", "") + # or getattr(settings, "ZAI_API_KEY", "") + # or getattr(settings, "AISTUDIO_API_KEY", "") + # ) + # if not api_key: + # raise RuntimeError("缺少模型服务配置,请设置 ZHIPU_API_KEY") + # base_url = ( + # getattr(settings, "ZHIPU_BASE_URL", "") + # or getattr(settings, "ZAI_BASE_URL", "") + # or "https://open.bigmodel.cn/api/paas/v4/" + # ) + client = ZhipuAiClient(api_key="fb83a3f91e8c4e45af811236548765a2.cX4kUhigHm7VNowf") types = get_type_list() chat_completion = client.chat.completions.create( messages=[ @@ -390,9 +451,8 @@ def ocr_and_extract_info(image_path: str): ], }, ], - model=getattr(settings, "OPENAI_MODEL_NAME", "ernie-4.5-turbo-vl-32k"), + model="glm-4.6v", ) - response_text = chat_completion.choices[0].message.content def parse_response(text: str): @@ -448,35 +508,66 @@ def upload(request): else: return JsonResponse({"status": "error", "message": "未登录"}, status=401) - file = request.FILES.get("file") - if not file: + files = request.FILES.getlist("file") + if not files: + one = request.FILES.get("file") + if one: + files = [one] + if not files: return JsonResponse({"status": "error", "message": "未选择文件"}, status=400) images_dir = os.path.join(settings.MEDIA_ROOT, "images") os.makedirs(images_dir, exist_ok=True) - filename = f"{uuid.uuid4()}_{file.name}" - abs_path = os.path.join(images_dir, filename) - - with open(abs_path, "wb") as dst: - for chunk in file.chunks(): - dst.write(chunk) - - try: - data = ocr_and_extract_info(abs_path) - if not data: - return JsonResponse({"status": "error", "message": "无法识别图片内容"}, status=400) - + rel_paths = [] + image_urls = [] + data_list = [] + for file in files: + filename = f"{uuid.uuid4()}_{file.name}" + abs_path = os.path.join(images_dir, filename) + with open(abs_path, "wb") as dst: + for chunk in file.chunks(): + dst.write(chunk) + try: + data = ocr_and_extract_info(abs_path) + except Exception as e: + return JsonResponse({"status": "error", "message": str(e)}, status=500) + if data: + data_list.append(data) rel_path = f"images/{filename}" - image_url = request.build_absolute_uri(settings.MEDIA_URL + rel_path) - return JsonResponse({ - "status": "success", - "message": "识别成功,请确认数据后点击录入", - "data": data, - "image": rel_path, - "image_url": image_url, - }) - except Exception as e: - return JsonResponse({"status": "error", "message": str(e)}, status=500) + rel_paths.append(rel_path) + image_urls.append(request.build_absolute_uri(settings.MEDIA_URL + rel_path)) + + if not data_list: + return JsonResponse({"status": "error", "message": "无法识别图片内容"}, status=400) + + merged = {} + for item in data_list: + if not isinstance(item, dict): + continue + for k, v in item.items(): + key = str(k).strip() + if not key: + continue + if key not in merged or merged.get(key) in (None, ''): + merged[key] = v + continue + if merged.get(key) == v: + continue + base = key + idx = 2 + while f"{base}_{idx}" in merged: + idx += 1 + merged[f"{base}_{idx}"] = v + + return JsonResponse({ + "status": "success", + "message": "识别成功,请确认数据后点击录入", + "data": merged, + "images": rel_paths, + "image_urls": image_urls, + "image": rel_paths[0] if rel_paths else "", + "image_url": image_urls[0] if image_urls else "", + }) # 确认并入库 @@ -508,38 +599,44 @@ def confirm(request): ensure_type_in_list(edited.get("数据类型")) image_ref_to_store = "" temp_files_to_delete = [] - if image_rel: + image_rels = _parse_image_refs(image_rel) + if image_rels: images_dir = os.path.join(settings.MEDIA_ROOT, "images") os.makedirs(images_dir, exist_ok=True) - src_abs = os.path.join(settings.MEDIA_ROOT, image_rel) - if not os.path.isfile(src_abs): - return JsonResponse({"status": "error", "message": "图片文件不存在"}, status=400) - - webp_name = f"{uuid.uuid4().hex}.webp" - webp_abs = os.path.join(images_dir, webp_name) - try: - with Image.open(src_abs) as im: - if im.mode in ("RGBA", "LA", "P"): - im = im.convert("RGBA") - else: - im = im.convert("RGB") - im.save(webp_abs, format="WEBP", quality=80) - except Exception: + image_refs = [] + for rel in image_rels: + src_abs = os.path.join(settings.MEDIA_ROOT, rel) + if not os.path.isfile(src_abs): + return JsonResponse({"status": "error", "message": "图片文件不存在"}, status=400) + webp_name = f"{uuid.uuid4().hex}.webp" + webp_abs = os.path.join(images_dir, webp_name) try: - if os.path.isfile(webp_abs): - os.remove(webp_abs) + with Image.open(src_abs) as im: + if im.mode in ("RGBA", "LA", "P"): + im = im.convert("RGBA") + else: + im = im.convert("RGB") + im.save(webp_abs, format="WEBP", quality=80) except Exception: - pass - return JsonResponse({"status": "error", "message": "图片转换WEBP失败"}, status=500) + try: + if os.path.isfile(webp_abs): + os.remove(webp_abs) + except Exception: + pass + return JsonResponse({"status": "error", "message": "图片转换WEBP失败"}, status=500) - try: - object_name = f"images/{webp_name}" - from minio_storage.minio_connect import upload_file - upload_file(webp_abs, object_name, content_type="image/webp") - image_ref_to_store = f"minio:{object_name}" - temp_files_to_delete.extend([src_abs, webp_abs]) - except Exception as e: - return JsonResponse({"status": "error", "message": f"上传到MinIO失败: {e}"}, status=500) + try: + object_name = f"images/{webp_name}" + from minio_storage.minio_connect import upload_file + upload_file(webp_abs, object_name, content_type="image/webp") + image_refs.append(f"minio:{object_name}") + temp_files_to_delete.extend([src_abs, webp_abs]) + except Exception as e: + return JsonResponse({"status": "error", "message": f"上传到MinIO失败: {e}"}, status=500) + if len(image_refs) == 1: + image_ref_to_store = image_refs[0] + elif len(image_refs) > 1: + image_ref_to_store = json_to_string(image_refs) to_store = { "writer_id": str(request.session.get("user_id")), diff --git a/requirements.txt b/requirements.txt index b1a6338..861eb21 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ elasticsearch-dsl==7.4.1 requests==2.32.3 openai==1.52.2 httpx==0.27.2 +zai-sdk==0.3.0 Pillow==10.4.0 minio>=7.2.0,<8 gunicorn==21.2.0