diff --git a/elastic/templates/elastic/upload.html b/elastic/templates/elastic/upload.html
index 397f3e3..0b59b79 100644
--- a/elastic/templates/elastic/upload.html
+++ b/elastic/templates/elastic/upload.html
@@ -494,6 +494,11 @@ uploadForm.addEventListener('submit', async (e) => {
body: formData,
});
clearInterval(timer);
+ const ct = (resp.headers.get('content-type') || '').toLowerCase();
+ if (!ct.includes('application/json')) {
+ const text = await resp.text();
+ throw new Error(text ? String(text).slice(0, 200) : `HTTP ${resp.status}`);
+ }
const data = await resp.json();
if (!resp.ok || data.status !== 'success') {
throw new Error(data.message || '上传识别失败');
diff --git a/elastic/views.py b/elastic/views.py
index 8fb674b..2fb3665 100644
--- a/elastic/views.py
+++ b/elastic/views.py
@@ -722,115 +722,117 @@ def upload_page(request):
# 上传并识别(不入库)
@require_http_methods(["POST"])
def upload(request):
- if request.session.get("user_id") is None:
- fallback_uid = request.POST.get("user_id") or request.GET.get("user_id")
- if fallback_uid:
- request.session["user_id"] = fallback_uid
- request.session.setdefault("permission", 1)
- else:
- return JsonResponse({"status": "error", "message": "未登录"}, status=401)
+ try:
+ if request.session.get("user_id") is None:
+ fallback_uid = request.POST.get("user_id") or request.GET.get("user_id")
+ if fallback_uid:
+ request.session["user_id"] = fallback_uid
+ request.session.setdefault("permission", 1)
+ else:
+ return JsonResponse({"status": "error", "message": "未登录"}, status=401)
- files = request.FILES.getlist("file")
- if not files:
- one = request.FILES.get("file")
- if one:
- files = [one]
- if not files:
- return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
+ files = request.FILES.getlist("file")
+ if not files:
+ one = request.FILES.get("file")
+ if one:
+ files = [one]
+ if not files:
+ return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
- images_dir = os.path.join(settings.MEDIA_ROOT, "images")
- os.makedirs(images_dir, exist_ok=True)
-
- # 按照原始文件进行分组处理
- file_results = []
-
- for f in files:
- group_images = [] # 存储该文件生成的所有图片路径信息 (abs_path, filename)
- is_pdf = f.name.lower().endswith('.pdf')
+ images_dir = os.path.join(settings.MEDIA_ROOT, "images")
+ os.makedirs(images_dir, exist_ok=True)
- if is_pdf:
- if not HAS_PDF_SUPPORT:
- return JsonResponse({"status": "error", "message": f"服务器未安装PDF处理组件(PyMuPDF): {PDF_ERROR}"}, status=500)
+ file_results = []
+
+ for f in files:
+ group_images = []
+ is_pdf = f.name.lower().endswith('.pdf')
- with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
- for chunk in f.chunks():
- tmp.write(chunk)
- tmp_path = tmp.name
-
- try:
- doc = fitz.open(tmp_path)
- for i in range(len(doc)):
- page = doc.load_page(i)
- pix = page.get_pixmap(dpi=150)
- img_filename = f"{uuid.uuid4()}_page_{i+1}.jpg"
- img_abs_path = os.path.join(images_dir, img_filename)
- pix.save(img_abs_path)
- group_images.append((img_abs_path, img_filename))
- doc.close()
- except Exception as e:
- return JsonResponse({"status": "error", "message": f"PDF {f.name} 转换失败: {str(e)}"}, status=500)
- finally:
- if os.path.exists(tmp_path):
- os.remove(tmp_path)
- else:
- filename = f"{uuid.uuid4()}_{f.name}"
- abs_path = os.path.join(images_dir, filename)
- with open(abs_path, "wb") as dst:
- for chunk in f.chunks():
- dst.write(chunk)
- group_images.append((abs_path, filename))
+ if is_pdf:
+ if not HAS_PDF_SUPPORT:
+ return JsonResponse({"status": "error", "message": f"服务器未安装PDF处理组件(PyMuPDF): {PDF_ERROR}"}, status=500)
+
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
+ for chunk in f.chunks():
+ tmp.write(chunk)
+ tmp_path = tmp.name
+
+ try:
+ doc = fitz.open(tmp_path)
+ for i in range(len(doc)):
+ page = doc.load_page(i)
+ pix = page.get_pixmap(dpi=150)
+ img_filename = f"{uuid.uuid4()}_page_{i+1}.jpg"
+ img_abs_path = os.path.join(images_dir, img_filename)
+ pix.save(img_abs_path)
+ group_images.append((img_abs_path, img_filename))
+ doc.close()
+ except Exception as e:
+ return JsonResponse({"status": "error", "message": f"PDF {f.name} 转换失败: {str(e)}"}, status=500)
+ finally:
+ if os.path.exists(tmp_path):
+ os.remove(tmp_path)
+ else:
+ filename = f"{uuid.uuid4()}_{f.name}"
+ abs_path = os.path.join(images_dir, filename)
+ with open(abs_path, "wb") as dst:
+ for chunk in f.chunks():
+ dst.write(chunk)
+ group_images.append((abs_path, filename))
- # 对该组图片并行进行 OCR 识别
- def run_ocr(img_info):
- abs_p, fname = img_info
- try:
- data = ocr_and_extract_info(abs_p)
- return data
- except Exception:
- return None
+ def run_ocr(img_info):
+ abs_p, fname = img_info
+ try:
+ data = ocr_and_extract_info(abs_p)
+ return data
+ except Exception:
+ return None
- group_data_list = []
- with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor:
- futures = [executor.submit(run_ocr, img_info) for img_info in group_images]
- for future in concurrent.futures.as_completed(futures):
- res = future.result()
- if res:
- group_data_list.append(res)
+ group_data_list = []
+ with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor:
+ futures = [executor.submit(run_ocr, img_info) for img_info in group_images]
+ for future in concurrent.futures.as_completed(futures):
+ res = future.result()
+ if res:
+ group_data_list.append(res)
- # 合并该文件的多页识别结果
- merged_group_data = {}
- for item in group_data_list:
- if not isinstance(item, dict): continue
- for k, v in item.items():
- key = str(k).strip()
- if not key: continue
- if key not in merged_group_data or merged_group_data.get(key) in (None, ''):
- merged_group_data[key] = v
- elif merged_group_data.get(key) != v:
- base = key
- idx = 2
- while f"{base}_{idx}" in merged_group_data: idx += 1
- merged_group_data[f"{base}_{idx}"] = v
+ merged_group_data = {}
+ for item in group_data_list:
+ if not isinstance(item, dict):
+ continue
+ for k, v in item.items():
+ key = str(k).strip()
+ if not key:
+ continue
+ if key not in merged_group_data or merged_group_data.get(key) in (None, ''):
+ merged_group_data[key] = v
+ elif merged_group_data.get(key) != v:
+ base = key
+ idx = 2
+ while f"{base}_{idx}" in merged_group_data:
+ idx += 1
+ merged_group_data[f"{base}_{idx}"] = v
- if not merged_group_data:
- # 如果没识别到,至少保留一个空结构或者包含文件名的提示
- merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"}
+ if not merged_group_data:
+ merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"}
- rel_paths = [f"images/{img[1]}" for img in group_images]
- image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]
+ rel_paths = [f"images/{img[1]}" for img in group_images]
+ image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]
- file_results.append({
- "name": f.name,
- "data": merged_group_data,
- "images": rel_paths,
- "image_urls": image_urls,
+ file_results.append({
+ "name": f.name,
+ "data": merged_group_data,
+ "images": rel_paths,
+ "image_urls": image_urls,
+ })
+
+ return JsonResponse({
+ "status": "success",
+ "message": f"成功处理 {len(file_results)} 个文件,请确认数据后点击录入",
+ "items": file_results,
})
-
- return JsonResponse({
- "status": "success",
- "message": f"成功处理 {len(file_results)} 个文件,请确认数据后点击录入",
- "items": file_results,
- })
+ except Exception as e:
+ return JsonResponse({"status": "error", "message": str(e) or "上传失败"}, status=500)
# 确认并入库