Compare commits
8 Commits
0.2.7.7
...
e92964ce71
| Author | SHA1 | Date | |
|---|---|---|---|
| e92964ce71 | |||
| 1a3aee39e0 | |||
| 7fa7b42b1a | |||
| 26452161f8 | |||
| 07d3a4420c | |||
| 2c3c2d6acf | |||
| afc663844b | |||
| 9e3fe7150b |
@@ -831,12 +831,8 @@ def get_user_by_username(username):
|
|||||||
def get_all_users():
|
def get_all_users():
|
||||||
"""获取所有用户"""
|
"""获取所有用户"""
|
||||||
try:
|
try:
|
||||||
search = UserDocument.search()
|
|
||||||
search = search.query("match_all")
|
|
||||||
response = search.execute()
|
|
||||||
|
|
||||||
users = []
|
users = []
|
||||||
for hit in response:
|
for hit in UserDocument.search().query("match_all").scan():
|
||||||
users.append({
|
users.append({
|
||||||
"user_id": hit.user_id,
|
"user_id": hit.user_id,
|
||||||
"username": hit.username,
|
"username": hit.username,
|
||||||
@@ -848,7 +844,6 @@ def get_all_users():
|
|||||||
"key": list(getattr(hit, 'key', []) or []),
|
"key": list(getattr(hit, 'key', []) or []),
|
||||||
"manage_key": list(getattr(hit, 'manage_key', []) or []),
|
"manage_key": list(getattr(hit, 'manage_key', []) or []),
|
||||||
})
|
})
|
||||||
|
|
||||||
return users
|
return users
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"获取所有用户失败: {str(e)}")
|
print(f"获取所有用户失败: {str(e)}")
|
||||||
|
|||||||
@@ -494,6 +494,11 @@ uploadForm.addEventListener('submit', async (e) => {
|
|||||||
body: formData,
|
body: formData,
|
||||||
});
|
});
|
||||||
clearInterval(timer);
|
clearInterval(timer);
|
||||||
|
const ct = (resp.headers.get('content-type') || '').toLowerCase();
|
||||||
|
if (!ct.includes('application/json')) {
|
||||||
|
const text = await resp.text();
|
||||||
|
throw new Error(text ? String(text).slice(0, 200) : `HTTP ${resp.status}`);
|
||||||
|
}
|
||||||
const data = await resp.json();
|
const data = await resp.json();
|
||||||
if (!resp.ok || data.status !== 'success') {
|
if (!resp.ok || data.status !== 'success') {
|
||||||
throw new Error(data.message || '上传识别失败');
|
throw new Error(data.message || '上传识别失败');
|
||||||
|
|||||||
194
elastic/views.py
194
elastic/views.py
@@ -722,115 +722,117 @@ def upload_page(request):
|
|||||||
# 上传并识别(不入库)
|
# 上传并识别(不入库)
|
||||||
@require_http_methods(["POST"])
|
@require_http_methods(["POST"])
|
||||||
def upload(request):
|
def upload(request):
|
||||||
if request.session.get("user_id") is None:
|
try:
|
||||||
fallback_uid = request.POST.get("user_id") or request.GET.get("user_id")
|
if request.session.get("user_id") is None:
|
||||||
if fallback_uid:
|
fallback_uid = request.POST.get("user_id") or request.GET.get("user_id")
|
||||||
request.session["user_id"] = fallback_uid
|
if fallback_uid:
|
||||||
request.session.setdefault("permission", 1)
|
request.session["user_id"] = fallback_uid
|
||||||
else:
|
request.session.setdefault("permission", 1)
|
||||||
return JsonResponse({"status": "error", "message": "未登录"}, status=401)
|
else:
|
||||||
|
return JsonResponse({"status": "error", "message": "未登录"}, status=401)
|
||||||
|
|
||||||
files = request.FILES.getlist("file")
|
files = request.FILES.getlist("file")
|
||||||
if not files:
|
if not files:
|
||||||
one = request.FILES.get("file")
|
one = request.FILES.get("file")
|
||||||
if one:
|
if one:
|
||||||
files = [one]
|
files = [one]
|
||||||
if not files:
|
if not files:
|
||||||
return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
|
return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
|
||||||
|
|
||||||
images_dir = os.path.join(settings.MEDIA_ROOT, "images")
|
images_dir = os.path.join(settings.MEDIA_ROOT, "images")
|
||||||
os.makedirs(images_dir, exist_ok=True)
|
os.makedirs(images_dir, exist_ok=True)
|
||||||
|
|
||||||
# 按照原始文件进行分组处理
|
file_results = []
|
||||||
file_results = []
|
|
||||||
|
|
||||||
for f in files:
|
for f in files:
|
||||||
group_images = [] # 存储该文件生成的所有图片路径信息 (abs_path, filename)
|
group_images = []
|
||||||
is_pdf = f.name.lower().endswith('.pdf')
|
is_pdf = f.name.lower().endswith('.pdf')
|
||||||
|
|
||||||
if is_pdf:
|
if is_pdf:
|
||||||
if not HAS_PDF_SUPPORT:
|
if not HAS_PDF_SUPPORT:
|
||||||
return JsonResponse({"status": "error", "message": f"服务器未安装PDF处理组件(PyMuPDF): {PDF_ERROR}"}, status=500)
|
return JsonResponse({"status": "error", "message": f"服务器未安装PDF处理组件(PyMuPDF): {PDF_ERROR}"}, status=500)
|
||||||
|
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
|
||||||
for chunk in f.chunks():
|
for chunk in f.chunks():
|
||||||
tmp.write(chunk)
|
tmp.write(chunk)
|
||||||
tmp_path = tmp.name
|
tmp_path = tmp.name
|
||||||
|
|
||||||
try:
|
try:
|
||||||
doc = fitz.open(tmp_path)
|
doc = fitz.open(tmp_path)
|
||||||
for i in range(len(doc)):
|
for i in range(len(doc)):
|
||||||
page = doc.load_page(i)
|
page = doc.load_page(i)
|
||||||
pix = page.get_pixmap(dpi=150)
|
pix = page.get_pixmap(dpi=150)
|
||||||
img_filename = f"{uuid.uuid4()}_page_{i+1}.jpg"
|
img_filename = f"{uuid.uuid4()}_page_{i+1}.jpg"
|
||||||
img_abs_path = os.path.join(images_dir, img_filename)
|
img_abs_path = os.path.join(images_dir, img_filename)
|
||||||
pix.save(img_abs_path)
|
pix.save(img_abs_path)
|
||||||
group_images.append((img_abs_path, img_filename))
|
group_images.append((img_abs_path, img_filename))
|
||||||
doc.close()
|
doc.close()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return JsonResponse({"status": "error", "message": f"PDF {f.name} 转换失败: {str(e)}"}, status=500)
|
return JsonResponse({"status": "error", "message": f"PDF {f.name} 转换失败: {str(e)}"}, status=500)
|
||||||
finally:
|
finally:
|
||||||
if os.path.exists(tmp_path):
|
if os.path.exists(tmp_path):
|
||||||
os.remove(tmp_path)
|
os.remove(tmp_path)
|
||||||
else:
|
else:
|
||||||
filename = f"{uuid.uuid4()}_{f.name}"
|
filename = f"{uuid.uuid4()}_{f.name}"
|
||||||
abs_path = os.path.join(images_dir, filename)
|
abs_path = os.path.join(images_dir, filename)
|
||||||
with open(abs_path, "wb") as dst:
|
with open(abs_path, "wb") as dst:
|
||||||
for chunk in f.chunks():
|
for chunk in f.chunks():
|
||||||
dst.write(chunk)
|
dst.write(chunk)
|
||||||
group_images.append((abs_path, filename))
|
group_images.append((abs_path, filename))
|
||||||
|
|
||||||
# 对该组图片并行进行 OCR 识别
|
def run_ocr(img_info):
|
||||||
def run_ocr(img_info):
|
abs_p, fname = img_info
|
||||||
abs_p, fname = img_info
|
try:
|
||||||
try:
|
data = ocr_and_extract_info(abs_p)
|
||||||
data = ocr_and_extract_info(abs_p)
|
return data
|
||||||
return data
|
except Exception:
|
||||||
except Exception:
|
return None
|
||||||
return None
|
|
||||||
|
|
||||||
group_data_list = []
|
group_data_list = []
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor:
|
||||||
futures = [executor.submit(run_ocr, img_info) for img_info in group_images]
|
futures = [executor.submit(run_ocr, img_info) for img_info in group_images]
|
||||||
for future in concurrent.futures.as_completed(futures):
|
for future in concurrent.futures.as_completed(futures):
|
||||||
res = future.result()
|
res = future.result()
|
||||||
if res:
|
if res:
|
||||||
group_data_list.append(res)
|
group_data_list.append(res)
|
||||||
|
|
||||||
# 合并该文件的多页识别结果
|
merged_group_data = {}
|
||||||
merged_group_data = {}
|
for item in group_data_list:
|
||||||
for item in group_data_list:
|
if not isinstance(item, dict):
|
||||||
if not isinstance(item, dict): continue
|
continue
|
||||||
for k, v in item.items():
|
for k, v in item.items():
|
||||||
key = str(k).strip()
|
key = str(k).strip()
|
||||||
if not key: continue
|
if not key:
|
||||||
if key not in merged_group_data or merged_group_data.get(key) in (None, ''):
|
continue
|
||||||
merged_group_data[key] = v
|
if key not in merged_group_data or merged_group_data.get(key) in (None, ''):
|
||||||
elif merged_group_data.get(key) != v:
|
merged_group_data[key] = v
|
||||||
base = key
|
elif merged_group_data.get(key) != v:
|
||||||
idx = 2
|
base = key
|
||||||
while f"{base}_{idx}" in merged_group_data: idx += 1
|
idx = 2
|
||||||
merged_group_data[f"{base}_{idx}"] = v
|
while f"{base}_{idx}" in merged_group_data:
|
||||||
|
idx += 1
|
||||||
|
merged_group_data[f"{base}_{idx}"] = v
|
||||||
|
|
||||||
if not merged_group_data:
|
if not merged_group_data:
|
||||||
# 如果没识别到,至少保留一个空结构或者包含文件名的提示
|
merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"}
|
||||||
merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"}
|
|
||||||
|
|
||||||
rel_paths = [f"images/{img[1]}" for img in group_images]
|
rel_paths = [f"images/{img[1]}" for img in group_images]
|
||||||
image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]
|
image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]
|
||||||
|
|
||||||
file_results.append({
|
file_results.append({
|
||||||
"name": f.name,
|
"name": f.name,
|
||||||
"data": merged_group_data,
|
"data": merged_group_data,
|
||||||
"images": rel_paths,
|
"images": rel_paths,
|
||||||
"image_urls": image_urls,
|
"image_urls": image_urls,
|
||||||
|
})
|
||||||
|
|
||||||
|
return JsonResponse({
|
||||||
|
"status": "success",
|
||||||
|
"message": f"成功处理 {len(file_results)} 个文件,请确认数据后点击录入",
|
||||||
|
"items": file_results,
|
||||||
})
|
})
|
||||||
|
except Exception as e:
|
||||||
return JsonResponse({
|
return JsonResponse({"status": "error", "message": str(e) or "上传失败"}, status=500)
|
||||||
"status": "success",
|
|
||||||
"message": f"成功处理 {len(file_results)} 个文件,请确认数据后点击录入",
|
|
||||||
"items": file_results,
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
# 确认并入库
|
# 确认并入库
|
||||||
@@ -1025,7 +1027,7 @@ def analytics_types_view(request):
|
|||||||
size_int = int(size) if size is not None else 10
|
size_int = int(size) if size is not None else 10
|
||||||
except Exception:
|
except Exception:
|
||||||
size_int = 10
|
size_int = 10
|
||||||
data = es_analytics_types(gte=gte, lte=lte, size=size_int)
|
data = es_analytics_types(gte=gte, lte=lte, limit=size_int)
|
||||||
return JsonResponse({"status": "success", "data": data})
|
return JsonResponse({"status": "success", "data": data})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return JsonResponse({"status": "error", "message": str(e)}, status=500)
|
return JsonResponse({"status": "error", "message": str(e)}, status=500)
|
||||||
|
|||||||
Reference in New Issue
Block a user