From abc435afe6f24b927a68040c1247af607036f5b4 Mon Sep 17 00:00:00 2001
From: Viajero-tect <2737079298@qq.com>
Date: Sat, 21 Feb 2026 16:35:06 +0800
Subject: [PATCH] =?UTF-8?q?=E7=89=88=E6=9C=AC=E6=9B=B4=E6=96=B0=EF=BC=9A?=
=?UTF-8?q?=201=E3=80=81=E5=B7=B2=E5=AE=9E=E7=8E=B0=E5=A4=9A=E5=9B=BE?=
=?UTF-8?q?=E8=AF=86=E5=88=AB=E5=B9=B6=E5=85=A5=E5=BA=93=202=E3=80=81?=
=?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=9B=BE=E7=89=87=E4=B8=8A=E4=BC=A0=E6=97=B6?=
=?UTF-8?q?=E5=88=A0=E9=99=A4=E5=9B=BE=E7=89=87=E5=8A=9F=E8=83=BD=203?=
=?UTF-8?q?=E3=80=81=E6=94=B9=E7=94=A8=E6=A8=A1=E5=9E=8Bglm-4.6v=E9=A2=84?=
=?UTF-8?q?=E8=AE=A15=E6=9C=88=E4=BB=BD=E5=88=B0=E6=9C=9F=204=E3=80=81?=
=?UTF-8?q?=E5=B7=B2=E5=AF=B9=E7=8E=AF=E5=A2=83txt=E5=81=9A=E6=9B=B4?=
=?UTF-8?q?=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
elastic/templates/elastic/manage.html | 8 +-
elastic/templates/elastic/upload.html | 123 +++++++++++----
elastic/views.py | 219 +++++++++++++++++++-------
requirements.txt | 1 +
4 files changed, 257 insertions(+), 94 deletions(-)
diff --git a/elastic/templates/elastic/manage.html b/elastic/templates/elastic/manage.html
index 59041d9..6d7b160 100644
--- a/elastic/templates/elastic/manage.html
+++ b/elastic/templates/elastic/manage.html
@@ -328,7 +328,7 @@ function renderTable(data) {
row.innerHTML = `
${item._id || item.id || ''} |
- ${item.image_url ? ` ` : '无图片'}
+ ${buildImageCell(item)}
|
${escapeHtml(displayData)}
@@ -343,6 +343,12 @@ function renderTable(data) {
});
}
+function buildImageCell(item) {
+ const urls = Array.isArray(item.image_urls) ? item.image_urls : (item.image_url ? [item.image_url] : []);
+ if (!urls.length) return '无图片';
+ return urls.map(u => ` `).join('');
+}
+
// 转义HTML以防止XSS
function escapeHtml(unsafe) {
return unsafe
diff --git a/elastic/templates/elastic/upload.html b/elastic/templates/elastic/upload.html
index a4a443e..c7539df 100644
--- a/elastic/templates/elastic/upload.html
+++ b/elastic/templates/elastic/upload.html
@@ -42,6 +42,10 @@
.preview-box {flex: 1; text-align: center; }
.preview-box h3 {margin-top: 0;color: #334155; }
.preview-box img { max-width: 100%;max-height: 300px;border: 1px solid #e2e8f0;border-radius: 8px;object-fit: contain;}
+ .preview-list {display: grid;grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));gap: 12px;}
+ .preview-item {position: relative;}
+ .preview-item img {width: 100%;max-height: 220px;border: 1px solid #e2e8f0;border-radius: 8px;object-fit: contain;}
+ .preview-remove {position: absolute;top: 6px;right: 6px;border: none;border-radius: 999px;background: rgba(15,23,42,0.8);color: #fff;width: 24px;height: 24px;cursor: pointer;display: flex;align-items: center;justify-content: center;font-size: 14px;line-height: 1;}
.result-box {flex: 1;}
.result-box h3 { margin-top: 0; color: #334155;}
.form-controls { display: flex;gap: 8px;margin-bottom: 12px;flex-wrap: wrap;}
@@ -89,7 +93,8 @@
点击下方按钮选择图片,或拖拽图片到此区域
@@ -103,7 +108,7 @@
图片预览
- ![预览]()
+
@@ -134,7 +139,8 @@ function getCookie(name) {
const uploadForm = document.getElementById('uploadForm');
const fileInput = document.getElementById('fileInput');
-const preview = document.getElementById('preview');
+const fileHint = document.getElementById('fileHint');
+const previewList = document.getElementById('previewList');
const resultBox = document.getElementById('resultBox');
const uploadMsg = document.getElementById('uploadMsg');
const confirmBtn = document.getElementById('confirmBtn');
@@ -148,7 +154,8 @@ const progressWrap = document.getElementById('progressWrap');
const progressBar = document.getElementById('progressBar');
const progressText = document.getElementById('progressText');
-let currentImageRel = '';
+let currentImageRel = [];
+let selectedFiles = [];
function setProgress(p, text){
const v = Math.max(0, Math.min(100, Math.round(p||0)));
@@ -221,22 +228,64 @@ function handleDrop(e) {
const dt = e.dataTransfer;
const files = dt.files;
if (files.length) {
- fileInput.files = files;
- const event = new Event('change', { bubbles: true });
- fileInput.dispatchEvent(event);
+ addFiles(files);
}
}
-// 文件选择后预览
-fileInput.addEventListener('change', function(e) {
- const file = e.target.files[0];
- if (file && file.type.startsWith('image/')) {
- const reader = new FileReader();
- reader.onload = function(e) {
- preview.src = e.target.result;
+function setPreviewList(urls) {
+ previewList.innerHTML = '';
+ (urls || []).forEach((url, index) => {
+ if (!url) return;
+ const item = document.createElement('div');
+ item.className = 'preview-item';
+ item.dataset.index = String(index);
+ const img = document.createElement('img');
+ img.src = url;
+ img.alt = '预览';
+ const btn = document.createElement('button');
+ btn.type = 'button';
+ btn.className = 'preview-remove';
+ btn.textContent = '×';
+ btn.onclick = () => {
+ const idx = Number(item.dataset.index);
+ if (!Number.isNaN(idx)) {
+ selectedFiles.splice(idx, 1);
+ const urls = selectedFiles.map(f => URL.createObjectURL(f));
+ setPreviewList(urls);
+ updateFileHint();
+ setTimeout(() => urls.forEach(u => URL.revokeObjectURL(u)), 0);
+ }
};
- reader.readAsDataURL(file);
- }
+ item.appendChild(img);
+ item.appendChild(btn);
+ previewList.appendChild(item);
+ });
+}
+
+function updateFileHint() {
+ const count = selectedFiles.length;
+ fileHint.textContent = count ? `已选择 ${count} 张` : '未选择文件';
+}
+
+function addFiles(files) {
+ const incoming = Array.from(files || []).filter(f => f && f.type.startsWith('image/'));
+ const existingKeys = new Set(selectedFiles.map(f => `${f.name}|${f.size}|${f.lastModified}`));
+ incoming.forEach(f => {
+ const key = `${f.name}|${f.size}|${f.lastModified}`;
+ if (!existingKeys.has(key)) {
+ existingKeys.add(key);
+ selectedFiles.push(f);
+ }
+ });
+ const urls = selectedFiles.map(f => URL.createObjectURL(f));
+ setPreviewList(urls);
+ updateFileHint();
+ setTimeout(() => urls.forEach(u => URL.revokeObjectURL(u)), 0);
+}
+
+fileInput.addEventListener('change', function(e) {
+ addFiles(e.target.files || []);
+ fileInput.value = '';
});
function createRow(k = '', v = '') {
@@ -329,10 +378,10 @@ uploadForm.addEventListener('submit', async (e) => {
confirmMsg.textContent = '';
confirmBtn.disabled = true;
resultBox.value = '';
- currentImageRel = '';
+ currentImageRel = [];
- const file = fileInput.files[0];
- if (!file) {
+ const files = Array.from(selectedFiles || []).filter(f => f && f.type.startsWith('image/'));
+ if (!files.length) {
uploadMsg.textContent = '请选择图片文件';
uploadMsg.className = 'status-message error';
uploadMsg.style.display = 'block';
@@ -341,17 +390,21 @@ uploadForm.addEventListener('submit', async (e) => {
showProgress();
setProgress(5, '转换为JPG');
- let jpegFile = file;
- try {
- jpegFile = await convertToJpeg(file);
- setProgress(50, '转换为JPG');
- preview.src = URL.createObjectURL(jpegFile);
- } catch (_) {
- jpegFile = file;
- setProgress(50, '转换为JPG');
- }
const formData = new FormData();
- formData.append('file', jpegFile);
+ const converted = [];
+ for (let i = 0; i < files.length; i++) {
+ const file = files[i];
+ let jpegFile = file;
+ try {
+ jpegFile = await convertToJpeg(file);
+ } catch (_) {
+ jpegFile = file;
+ }
+ converted.push(jpegFile);
+ const pct = 5 + Math.round(((i + 1) / files.length) * 45);
+ setProgress(pct, '转换为JPG');
+ }
+ converted.forEach(f => formData.append('file', f));
try {
let prog = 50;
@@ -375,9 +428,10 @@ uploadForm.addEventListener('submit', async (e) => {
uploadMsg.textContent = data.message || '识别成功';
uploadMsg.className = 'status-message success';
uploadMsg.style.display = 'block';
- preview.src = data.image_url;
+ const urls = data.image_urls || (data.image_url ? [data.image_url] : []);
+ setPreviewList(urls);
renderFormFromObject(data.data || {});
- currentImageRel = data.image;
+ currentImageRel = data.images || (data.image ? [data.image] : []);
confirmBtn.disabled = false;
setTimeout(hideProgress, 800);
} catch (e) {
@@ -415,15 +469,20 @@ confirmBtn.addEventListener('click', async () => {
clearBtn.addEventListener('click', () => {
fileInput.value = '';
- preview.src = '';
+ previewList.innerHTML = '';
resultBox.value = '';
kvForm.innerHTML = '';
kvForm.appendChild(createRow()); // 保留一个空行
uploadMsg.textContent = '';
confirmMsg.textContent = '';
confirmBtn.disabled = true;
+ currentImageRel = [];
+ selectedFiles = [];
+ updateFileHint();
});
+updateFileHint();
+
// 退出登录处理
document.getElementById('logoutBtn').addEventListener('click', async () => {
const msg = document.getElementById('logoutMsg');
diff --git a/elastic/views.py b/elastic/views.py
index 8048e9d..3454adb 100644
--- a/elastic/views.py
+++ b/elastic/views.py
@@ -42,6 +42,29 @@ def _image_ref_to_url(request, image_ref: str) -> str:
return ''
+def _parse_image_refs(image_ref):
+ if not image_ref:
+ return []
+ if isinstance(image_ref, (list, tuple)):
+ return [str(x) for x in image_ref if str(x).strip()]
+ if isinstance(image_ref, str):
+ s = image_ref.strip()
+ if not s:
+ return []
+ parsed = None
+ if s[:1] in ('[', '"'):
+ try:
+ parsed = json.loads(s)
+ except Exception:
+ parsed = None
+ if isinstance(parsed, list):
+ return [str(x) for x in parsed if str(x).strip()]
+ if isinstance(parsed, str):
+ s = parsed.strip()
+ return [s] if s else []
+ return []
+
+
def _attach_image_urls(request, items):
out = []
for it in list(items or []):
@@ -49,7 +72,11 @@ def _attach_image_urls(request, items):
d = dict(it or {})
except Exception:
continue
- d['image_url'] = _image_ref_to_url(request, d.get('image', ''))
+ refs = _parse_image_refs(d.get('image', ''))
+ urls = [_image_ref_to_url(request, r) for r in refs if str(r).strip()]
+ urls = [u for u in urls if u]
+ d['image_urls'] = urls
+ d['image_url'] = urls[0] if urls else _image_ref_to_url(request, d.get('image', ''))
out.append(d)
return out
@@ -180,7 +207,11 @@ def update_data(request, doc_id):
if "writer_id" in payload:
updated["writer_id"] = payload["writer_id"]
if "image" in payload:
- updated["image"] = payload["image"]
+ img_val = payload["image"]
+ if isinstance(img_val, list):
+ updated["image"] = json_to_string(img_val)
+ else:
+ updated["image"] = img_val
if "data" in payload:
v = payload["data"]
if isinstance(v, dict):
@@ -359,7 +390,7 @@ def string_to_json(s):
# 移植自 a.py 的核心:调用大模型进行 OCR/信息抽取
def ocr_and_extract_info(image_path: str):
- from openai import OpenAI
+ # from openai import OpenAI
def encode_image(path: str) -> str:
with open(path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
@@ -372,12 +403,42 @@ def ocr_and_extract_info(image_path: str):
# raise RuntimeError("缺少 AISTUDIO_API_KEY,请在环境变量或 settings 中配置")
- api_key = getattr(settings, "AISTUDIO_API_KEY", "")
- base_url = getattr(settings, "OPENAI_BASE_URL", "")
- if not api_key or not base_url:
- raise RuntimeError("缺少模型服务配置,请设置 AISTUDIO_API_KEY 与 OPENAI_BASE_URL")
- client = OpenAI(api_key=api_key, base_url=base_url)
+ # api_key = getattr(settings, "AISTUDIO_API_KEY", "")
+ # base_url = getattr(settings, "OPENAI_BASE_URL", "")
+ # if not api_key or not base_url:
+ # raise RuntimeError("缺少模型服务配置,请设置 AISTUDIO_API_KEY 与 OPENAI_BASE_URL")
+ # client = OpenAI(api_key=api_key, base_url=base_url)
+ # types = get_type_list()
+ # chat_completion = client.chat.completions.create(
+ # messages=[
+ # {"role": "system", "content": "你是一个能理解图片和文本的助手,请根据用户提供的信息进行回答。"},
+ # {
+ # "role": "user",
+ # "content": [
+ # {"type": "text", "text": f"请识别这张图片中的信息,将你认为重要的数据转换为不包含嵌套的json,不要显示其它信息以便于解析,直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型,除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定,请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型,确定不包含后你才可以填入你觉得合适的大致分类。"},
+ # {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}},
+ # ],
+ # },
+ # ],
+ # model="glm-5",
+ # )
+ # response_text = chat_completion.choices[0].message.content
+ from zai import ZhipuAiClient
+ import httpx
+ # api_key = (
+ # getattr(settings, "ZHIPU_API_KEY", "")
+ # or getattr(settings, "ZAI_API_KEY", "")
+ # or getattr(settings, "AISTUDIO_API_KEY", "")
+ # )
+ # if not api_key:
+ # raise RuntimeError("缺少模型服务配置,请设置 ZHIPU_API_KEY")
+ # base_url = (
+ # getattr(settings, "ZHIPU_BASE_URL", "")
+ # or getattr(settings, "ZAI_BASE_URL", "")
+ # or "https://open.bigmodel.cn/api/paas/v4/"
+ # )
+ client = ZhipuAiClient(api_key="fb83a3f91e8c4e45af811236548765a2.cX4kUhigHm7VNowf")
types = get_type_list()
chat_completion = client.chat.completions.create(
messages=[
@@ -390,9 +451,8 @@ def ocr_and_extract_info(image_path: str):
],
},
],
- model=getattr(settings, "OPENAI_MODEL_NAME", "ernie-4.5-turbo-vl-32k"),
+ model="glm-4.6v",
)
-
response_text = chat_completion.choices[0].message.content
def parse_response(text: str):
@@ -448,35 +508,66 @@ def upload(request):
else:
return JsonResponse({"status": "error", "message": "未登录"}, status=401)
- file = request.FILES.get("file")
- if not file:
+ files = request.FILES.getlist("file")
+ if not files:
+ one = request.FILES.get("file")
+ if one:
+ files = [one]
+ if not files:
return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
images_dir = os.path.join(settings.MEDIA_ROOT, "images")
os.makedirs(images_dir, exist_ok=True)
- filename = f"{uuid.uuid4()}_{file.name}"
- abs_path = os.path.join(images_dir, filename)
-
- with open(abs_path, "wb") as dst:
- for chunk in file.chunks():
- dst.write(chunk)
-
- try:
- data = ocr_and_extract_info(abs_path)
- if not data:
- return JsonResponse({"status": "error", "message": "无法识别图片内容"}, status=400)
-
+ rel_paths = []
+ image_urls = []
+ data_list = []
+ for file in files:
+ filename = f"{uuid.uuid4()}_{file.name}"
+ abs_path = os.path.join(images_dir, filename)
+ with open(abs_path, "wb") as dst:
+ for chunk in file.chunks():
+ dst.write(chunk)
+ try:
+ data = ocr_and_extract_info(abs_path)
+ except Exception as e:
+ return JsonResponse({"status": "error", "message": str(e)}, status=500)
+ if data:
+ data_list.append(data)
rel_path = f"images/{filename}"
- image_url = request.build_absolute_uri(settings.MEDIA_URL + rel_path)
- return JsonResponse({
- "status": "success",
- "message": "识别成功,请确认数据后点击录入",
- "data": data,
- "image": rel_path,
- "image_url": image_url,
- })
- except Exception as e:
- return JsonResponse({"status": "error", "message": str(e)}, status=500)
+ rel_paths.append(rel_path)
+ image_urls.append(request.build_absolute_uri(settings.MEDIA_URL + rel_path))
+
+ if not data_list:
+ return JsonResponse({"status": "error", "message": "无法识别图片内容"}, status=400)
+
+ merged = {}
+ for item in data_list:
+ if not isinstance(item, dict):
+ continue
+ for k, v in item.items():
+ key = str(k).strip()
+ if not key:
+ continue
+ if key not in merged or merged.get(key) in (None, ''):
+ merged[key] = v
+ continue
+ if merged.get(key) == v:
+ continue
+ base = key
+ idx = 2
+ while f"{base}_{idx}" in merged:
+ idx += 1
+ merged[f"{base}_{idx}"] = v
+
+ return JsonResponse({
+ "status": "success",
+ "message": "识别成功,请确认数据后点击录入",
+ "data": merged,
+ "images": rel_paths,
+ "image_urls": image_urls,
+ "image": rel_paths[0] if rel_paths else "",
+ "image_url": image_urls[0] if image_urls else "",
+ })
# 确认并入库
@@ -508,38 +599,44 @@ def confirm(request):
ensure_type_in_list(edited.get("数据类型"))
image_ref_to_store = ""
temp_files_to_delete = []
- if image_rel:
+ image_rels = _parse_image_refs(image_rel)
+ if image_rels:
images_dir = os.path.join(settings.MEDIA_ROOT, "images")
os.makedirs(images_dir, exist_ok=True)
- src_abs = os.path.join(settings.MEDIA_ROOT, image_rel)
- if not os.path.isfile(src_abs):
- return JsonResponse({"status": "error", "message": "图片文件不存在"}, status=400)
-
- webp_name = f"{uuid.uuid4().hex}.webp"
- webp_abs = os.path.join(images_dir, webp_name)
- try:
- with Image.open(src_abs) as im:
- if im.mode in ("RGBA", "LA", "P"):
- im = im.convert("RGBA")
- else:
- im = im.convert("RGB")
- im.save(webp_abs, format="WEBP", quality=80)
- except Exception:
+ image_refs = []
+ for rel in image_rels:
+ src_abs = os.path.join(settings.MEDIA_ROOT, rel)
+ if not os.path.isfile(src_abs):
+ return JsonResponse({"status": "error", "message": "图片文件不存在"}, status=400)
+ webp_name = f"{uuid.uuid4().hex}.webp"
+ webp_abs = os.path.join(images_dir, webp_name)
try:
- if os.path.isfile(webp_abs):
- os.remove(webp_abs)
+ with Image.open(src_abs) as im:
+ if im.mode in ("RGBA", "LA", "P"):
+ im = im.convert("RGBA")
+ else:
+ im = im.convert("RGB")
+ im.save(webp_abs, format="WEBP", quality=80)
except Exception:
- pass
- return JsonResponse({"status": "error", "message": "图片转换WEBP失败"}, status=500)
+ try:
+ if os.path.isfile(webp_abs):
+ os.remove(webp_abs)
+ except Exception:
+ pass
+ return JsonResponse({"status": "error", "message": "图片转换WEBP失败"}, status=500)
- try:
- object_name = f"images/{webp_name}"
- from minio_storage.minio_connect import upload_file
- upload_file(webp_abs, object_name, content_type="image/webp")
- image_ref_to_store = f"minio:{object_name}"
- temp_files_to_delete.extend([src_abs, webp_abs])
- except Exception as e:
- return JsonResponse({"status": "error", "message": f"上传到MinIO失败: {e}"}, status=500)
+ try:
+ object_name = f"images/{webp_name}"
+ from minio_storage.minio_connect import upload_file
+ upload_file(webp_abs, object_name, content_type="image/webp")
+ image_refs.append(f"minio:{object_name}")
+ temp_files_to_delete.extend([src_abs, webp_abs])
+ except Exception as e:
+ return JsonResponse({"status": "error", "message": f"上传到MinIO失败: {e}"}, status=500)
+ if len(image_refs) == 1:
+ image_ref_to_store = image_refs[0]
+ elif len(image_refs) > 1:
+ image_ref_to_store = json_to_string(image_refs)
to_store = {
"writer_id": str(request.session.get("user_id")),
diff --git a/requirements.txt b/requirements.txt
index b1a6338..861eb21 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,7 @@ elasticsearch-dsl==7.4.1
requests==2.32.3
openai==1.52.2
httpx==0.27.2
+zai-sdk==0.3.0
Pillow==10.4.0
minio>=7.2.0,<8
gunicorn==21.2.0
|