Merge branch 'Django' of gitea.spdis.space:Viajero/Achievement_Inputing into Django
All checks were successful
CI / docker-ci (push) Has been skipped

This commit is contained in:
2026-03-23 11:06:59 +08:00
2 changed files with 105 additions and 98 deletions

View File

@@ -494,6 +494,11 @@ uploadForm.addEventListener('submit', async (e) => {
body: formData, body: formData,
}); });
clearInterval(timer); clearInterval(timer);
const ct = (resp.headers.get('content-type') || '').toLowerCase();
if (!ct.includes('application/json')) {
const text = await resp.text();
throw new Error(text ? String(text).slice(0, 200) : `HTTP ${resp.status}`);
}
const data = await resp.json(); const data = await resp.json();
if (!resp.ok || data.status !== 'success') { if (!resp.ok || data.status !== 'success') {
throw new Error(data.message || '上传识别失败'); throw new Error(data.message || '上传识别失败');

View File

@@ -722,115 +722,117 @@ def upload_page(request):
# 上传并识别(不入库) # 上传并识别(不入库)
@require_http_methods(["POST"]) @require_http_methods(["POST"])
def upload(request): def upload(request):
if request.session.get("user_id") is None: try:
fallback_uid = request.POST.get("user_id") or request.GET.get("user_id") if request.session.get("user_id") is None:
if fallback_uid: fallback_uid = request.POST.get("user_id") or request.GET.get("user_id")
request.session["user_id"] = fallback_uid if fallback_uid:
request.session.setdefault("permission", 1) request.session["user_id"] = fallback_uid
else: request.session.setdefault("permission", 1)
return JsonResponse({"status": "error", "message": "未登录"}, status=401) else:
return JsonResponse({"status": "error", "message": "未登录"}, status=401)
files = request.FILES.getlist("file") files = request.FILES.getlist("file")
if not files: if not files:
one = request.FILES.get("file") one = request.FILES.get("file")
if one: if one:
files = [one] files = [one]
if not files: if not files:
return JsonResponse({"status": "error", "message": "未选择文件"}, status=400) return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
images_dir = os.path.join(settings.MEDIA_ROOT, "images") images_dir = os.path.join(settings.MEDIA_ROOT, "images")
os.makedirs(images_dir, exist_ok=True) os.makedirs(images_dir, exist_ok=True)
# 按照原始文件进行分组处理
file_results = []
for f in files:
group_images = [] # 存储该文件生成的所有图片路径信息 (abs_path, filename)
is_pdf = f.name.lower().endswith('.pdf')
if is_pdf: file_results = []
if not HAS_PDF_SUPPORT:
return JsonResponse({"status": "error", "message": f"服务器未安装PDF处理组件(PyMuPDF): {PDF_ERROR}"}, status=500) for f in files:
group_images = []
is_pdf = f.name.lower().endswith('.pdf')
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp: if is_pdf:
for chunk in f.chunks(): if not HAS_PDF_SUPPORT:
tmp.write(chunk) return JsonResponse({"status": "error", "message": f"服务器未安装PDF处理组件(PyMuPDF): {PDF_ERROR}"}, status=500)
tmp_path = tmp.name
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
try: for chunk in f.chunks():
doc = fitz.open(tmp_path) tmp.write(chunk)
for i in range(len(doc)): tmp_path = tmp.name
page = doc.load_page(i)
pix = page.get_pixmap(dpi=150) try:
img_filename = f"{uuid.uuid4()}_page_{i+1}.jpg" doc = fitz.open(tmp_path)
img_abs_path = os.path.join(images_dir, img_filename) for i in range(len(doc)):
pix.save(img_abs_path) page = doc.load_page(i)
group_images.append((img_abs_path, img_filename)) pix = page.get_pixmap(dpi=150)
doc.close() img_filename = f"{uuid.uuid4()}_page_{i+1}.jpg"
except Exception as e: img_abs_path = os.path.join(images_dir, img_filename)
return JsonResponse({"status": "error", "message": f"PDF {f.name} 转换失败: {str(e)}"}, status=500) pix.save(img_abs_path)
finally: group_images.append((img_abs_path, img_filename))
if os.path.exists(tmp_path): doc.close()
os.remove(tmp_path) except Exception as e:
else: return JsonResponse({"status": "error", "message": f"PDF {f.name} 转换失败: {str(e)}"}, status=500)
filename = f"{uuid.uuid4()}_{f.name}" finally:
abs_path = os.path.join(images_dir, filename) if os.path.exists(tmp_path):
with open(abs_path, "wb") as dst: os.remove(tmp_path)
for chunk in f.chunks(): else:
dst.write(chunk) filename = f"{uuid.uuid4()}_{f.name}"
group_images.append((abs_path, filename)) abs_path = os.path.join(images_dir, filename)
with open(abs_path, "wb") as dst:
for chunk in f.chunks():
dst.write(chunk)
group_images.append((abs_path, filename))
# 对该组图片并行进行 OCR 识别 def run_ocr(img_info):
def run_ocr(img_info): abs_p, fname = img_info
abs_p, fname = img_info try:
try: data = ocr_and_extract_info(abs_p)
data = ocr_and_extract_info(abs_p) return data
return data except Exception:
except Exception: return None
return None
group_data_list = [] group_data_list = []
with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor:
futures = [executor.submit(run_ocr, img_info) for img_info in group_images] futures = [executor.submit(run_ocr, img_info) for img_info in group_images]
for future in concurrent.futures.as_completed(futures): for future in concurrent.futures.as_completed(futures):
res = future.result() res = future.result()
if res: if res:
group_data_list.append(res) group_data_list.append(res)
# 合并该文件的多页识别结果 merged_group_data = {}
merged_group_data = {} for item in group_data_list:
for item in group_data_list: if not isinstance(item, dict):
if not isinstance(item, dict): continue continue
for k, v in item.items(): for k, v in item.items():
key = str(k).strip() key = str(k).strip()
if not key: continue if not key:
if key not in merged_group_data or merged_group_data.get(key) in (None, ''): continue
merged_group_data[key] = v if key not in merged_group_data or merged_group_data.get(key) in (None, ''):
elif merged_group_data.get(key) != v: merged_group_data[key] = v
base = key elif merged_group_data.get(key) != v:
idx = 2 base = key
while f"{base}_{idx}" in merged_group_data: idx += 1 idx = 2
merged_group_data[f"{base}_{idx}"] = v while f"{base}_{idx}" in merged_group_data:
idx += 1
merged_group_data[f"{base}_{idx}"] = v
if not merged_group_data: if not merged_group_data:
# 如果没识别到,至少保留一个空结构或者包含文件名的提示 merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"}
merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"}
rel_paths = [f"images/{img[1]}" for img in group_images] rel_paths = [f"images/{img[1]}" for img in group_images]
image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths] image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]
file_results.append({ file_results.append({
"name": f.name, "name": f.name,
"data": merged_group_data, "data": merged_group_data,
"images": rel_paths, "images": rel_paths,
"image_urls": image_urls, "image_urls": image_urls,
})
return JsonResponse({
"status": "success",
"message": f"成功处理 {len(file_results)} 个文件,请确认数据后点击录入",
"items": file_results,
}) })
except Exception as e:
return JsonResponse({ return JsonResponse({"status": "error", "message": str(e) or "上传失败"}, status=500)
"status": "success",
"message": f"成功处理 {len(file_results)} 个文件,请确认数据后点击录入",
"items": file_results,
})
# 确认并入库 # 确认并入库