|
|
|
|
@@ -8,6 +8,7 @@ import base64
|
|
|
|
|
import json
|
|
|
|
|
import csv
|
|
|
|
|
import io
|
|
|
|
|
from datetime import datetime, timezone, timedelta
|
|
|
|
|
import tempfile
|
|
|
|
|
import concurrent.futures
|
|
|
|
|
from django.conf import settings
|
|
|
|
|
@@ -721,115 +722,117 @@ def upload_page(request):
|
|
|
|
|
# 上传并识别(不入库)
|
|
|
|
|
@require_http_methods(["POST"])
|
|
|
|
|
def upload(request):
|
|
|
|
|
if request.session.get("user_id") is None:
|
|
|
|
|
fallback_uid = request.POST.get("user_id") or request.GET.get("user_id")
|
|
|
|
|
if fallback_uid:
|
|
|
|
|
request.session["user_id"] = fallback_uid
|
|
|
|
|
request.session.setdefault("permission", 1)
|
|
|
|
|
else:
|
|
|
|
|
return JsonResponse({"status": "error", "message": "未登录"}, status=401)
|
|
|
|
|
try:
|
|
|
|
|
if request.session.get("user_id") is None:
|
|
|
|
|
fallback_uid = request.POST.get("user_id") or request.GET.get("user_id")
|
|
|
|
|
if fallback_uid:
|
|
|
|
|
request.session["user_id"] = fallback_uid
|
|
|
|
|
request.session.setdefault("permission", 1)
|
|
|
|
|
else:
|
|
|
|
|
return JsonResponse({"status": "error", "message": "未登录"}, status=401)
|
|
|
|
|
|
|
|
|
|
files = request.FILES.getlist("file")
|
|
|
|
|
if not files:
|
|
|
|
|
one = request.FILES.get("file")
|
|
|
|
|
if one:
|
|
|
|
|
files = [one]
|
|
|
|
|
if not files:
|
|
|
|
|
return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
|
|
|
|
|
files = request.FILES.getlist("file")
|
|
|
|
|
if not files:
|
|
|
|
|
one = request.FILES.get("file")
|
|
|
|
|
if one:
|
|
|
|
|
files = [one]
|
|
|
|
|
if not files:
|
|
|
|
|
return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
|
|
|
|
|
|
|
|
|
|
images_dir = os.path.join(settings.MEDIA_ROOT, "images")
|
|
|
|
|
os.makedirs(images_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
# 按照原始文件进行分组处理
|
|
|
|
|
file_results = []
|
|
|
|
|
|
|
|
|
|
for f in files:
|
|
|
|
|
group_images = [] # 存储该文件生成的所有图片路径信息 (abs_path, filename)
|
|
|
|
|
is_pdf = f.name.lower().endswith('.pdf')
|
|
|
|
|
images_dir = os.path.join(settings.MEDIA_ROOT, "images")
|
|
|
|
|
os.makedirs(images_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
if is_pdf:
|
|
|
|
|
if not HAS_PDF_SUPPORT:
|
|
|
|
|
return JsonResponse({"status": "error", "message": f"服务器未安装PDF处理组件(PyMuPDF): {PDF_ERROR}"}, status=500)
|
|
|
|
|
file_results = []
|
|
|
|
|
|
|
|
|
|
for f in files:
|
|
|
|
|
group_images = []
|
|
|
|
|
is_pdf = f.name.lower().endswith('.pdf')
|
|
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
|
|
|
|
|
for chunk in f.chunks():
|
|
|
|
|
tmp.write(chunk)
|
|
|
|
|
tmp_path = tmp.name
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
doc = fitz.open(tmp_path)
|
|
|
|
|
for i in range(len(doc)):
|
|
|
|
|
page = doc.load_page(i)
|
|
|
|
|
pix = page.get_pixmap(dpi=150)
|
|
|
|
|
img_filename = f"{uuid.uuid4()}_page_{i+1}.jpg"
|
|
|
|
|
img_abs_path = os.path.join(images_dir, img_filename)
|
|
|
|
|
pix.save(img_abs_path)
|
|
|
|
|
group_images.append((img_abs_path, img_filename))
|
|
|
|
|
doc.close()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return JsonResponse({"status": "error", "message": f"PDF {f.name} 转换失败: {str(e)}"}, status=500)
|
|
|
|
|
finally:
|
|
|
|
|
if os.path.exists(tmp_path):
|
|
|
|
|
os.remove(tmp_path)
|
|
|
|
|
else:
|
|
|
|
|
filename = f"{uuid.uuid4()}_{f.name}"
|
|
|
|
|
abs_path = os.path.join(images_dir, filename)
|
|
|
|
|
with open(abs_path, "wb") as dst:
|
|
|
|
|
for chunk in f.chunks():
|
|
|
|
|
dst.write(chunk)
|
|
|
|
|
group_images.append((abs_path, filename))
|
|
|
|
|
if is_pdf:
|
|
|
|
|
if not HAS_PDF_SUPPORT:
|
|
|
|
|
return JsonResponse({"status": "error", "message": f"服务器未安装PDF处理组件(PyMuPDF): {PDF_ERROR}"}, status=500)
|
|
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
|
|
|
|
|
for chunk in f.chunks():
|
|
|
|
|
tmp.write(chunk)
|
|
|
|
|
tmp_path = tmp.name
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
doc = fitz.open(tmp_path)
|
|
|
|
|
for i in range(len(doc)):
|
|
|
|
|
page = doc.load_page(i)
|
|
|
|
|
pix = page.get_pixmap(dpi=150)
|
|
|
|
|
img_filename = f"{uuid.uuid4()}_page_{i+1}.jpg"
|
|
|
|
|
img_abs_path = os.path.join(images_dir, img_filename)
|
|
|
|
|
pix.save(img_abs_path)
|
|
|
|
|
group_images.append((img_abs_path, img_filename))
|
|
|
|
|
doc.close()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return JsonResponse({"status": "error", "message": f"PDF {f.name} 转换失败: {str(e)}"}, status=500)
|
|
|
|
|
finally:
|
|
|
|
|
if os.path.exists(tmp_path):
|
|
|
|
|
os.remove(tmp_path)
|
|
|
|
|
else:
|
|
|
|
|
filename = f"{uuid.uuid4()}_{f.name}"
|
|
|
|
|
abs_path = os.path.join(images_dir, filename)
|
|
|
|
|
with open(abs_path, "wb") as dst:
|
|
|
|
|
for chunk in f.chunks():
|
|
|
|
|
dst.write(chunk)
|
|
|
|
|
group_images.append((abs_path, filename))
|
|
|
|
|
|
|
|
|
|
# 对该组图片并行进行 OCR 识别
|
|
|
|
|
def run_ocr(img_info):
|
|
|
|
|
abs_p, fname = img_info
|
|
|
|
|
try:
|
|
|
|
|
data = ocr_and_extract_info(abs_p)
|
|
|
|
|
return data
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
def run_ocr(img_info):
|
|
|
|
|
abs_p, fname = img_info
|
|
|
|
|
try:
|
|
|
|
|
data = ocr_and_extract_info(abs_p)
|
|
|
|
|
return data
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
group_data_list = []
|
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor:
|
|
|
|
|
futures = [executor.submit(run_ocr, img_info) for img_info in group_images]
|
|
|
|
|
for future in concurrent.futures.as_completed(futures):
|
|
|
|
|
res = future.result()
|
|
|
|
|
if res:
|
|
|
|
|
group_data_list.append(res)
|
|
|
|
|
group_data_list = []
|
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor:
|
|
|
|
|
futures = [executor.submit(run_ocr, img_info) for img_info in group_images]
|
|
|
|
|
for future in concurrent.futures.as_completed(futures):
|
|
|
|
|
res = future.result()
|
|
|
|
|
if res:
|
|
|
|
|
group_data_list.append(res)
|
|
|
|
|
|
|
|
|
|
# 合并该文件的多页识别结果
|
|
|
|
|
merged_group_data = {}
|
|
|
|
|
for item in group_data_list:
|
|
|
|
|
if not isinstance(item, dict): continue
|
|
|
|
|
for k, v in item.items():
|
|
|
|
|
key = str(k).strip()
|
|
|
|
|
if not key: continue
|
|
|
|
|
if key not in merged_group_data or merged_group_data.get(key) in (None, ''):
|
|
|
|
|
merged_group_data[key] = v
|
|
|
|
|
elif merged_group_data.get(key) != v:
|
|
|
|
|
base = key
|
|
|
|
|
idx = 2
|
|
|
|
|
while f"{base}_{idx}" in merged_group_data: idx += 1
|
|
|
|
|
merged_group_data[f"{base}_{idx}"] = v
|
|
|
|
|
merged_group_data = {}
|
|
|
|
|
for item in group_data_list:
|
|
|
|
|
if not isinstance(item, dict):
|
|
|
|
|
continue
|
|
|
|
|
for k, v in item.items():
|
|
|
|
|
key = str(k).strip()
|
|
|
|
|
if not key:
|
|
|
|
|
continue
|
|
|
|
|
if key not in merged_group_data or merged_group_data.get(key) in (None, ''):
|
|
|
|
|
merged_group_data[key] = v
|
|
|
|
|
elif merged_group_data.get(key) != v:
|
|
|
|
|
base = key
|
|
|
|
|
idx = 2
|
|
|
|
|
while f"{base}_{idx}" in merged_group_data:
|
|
|
|
|
idx += 1
|
|
|
|
|
merged_group_data[f"{base}_{idx}"] = v
|
|
|
|
|
|
|
|
|
|
if not merged_group_data:
|
|
|
|
|
# 如果没识别到,至少保留一个空结构或者包含文件名的提示
|
|
|
|
|
merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"}
|
|
|
|
|
if not merged_group_data:
|
|
|
|
|
merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"}
|
|
|
|
|
|
|
|
|
|
rel_paths = [f"images/{img[1]}" for img in group_images]
|
|
|
|
|
image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]
|
|
|
|
|
rel_paths = [f"images/{img[1]}" for img in group_images]
|
|
|
|
|
image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]
|
|
|
|
|
|
|
|
|
|
file_results.append({
|
|
|
|
|
"name": f.name,
|
|
|
|
|
"data": merged_group_data,
|
|
|
|
|
"images": rel_paths,
|
|
|
|
|
"image_urls": image_urls,
|
|
|
|
|
file_results.append({
|
|
|
|
|
"name": f.name,
|
|
|
|
|
"data": merged_group_data,
|
|
|
|
|
"images": rel_paths,
|
|
|
|
|
"image_urls": image_urls,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
"status": "success",
|
|
|
|
|
"message": f"成功处理 {len(file_results)} 个文件,请确认数据后点击录入",
|
|
|
|
|
"items": file_results,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
return JsonResponse({
|
|
|
|
|
"status": "success",
|
|
|
|
|
"message": f"成功处理 {len(file_results)} 个文件,请确认数据后点击录入",
|
|
|
|
|
"items": file_results,
|
|
|
|
|
})
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return JsonResponse({"status": "error", "message": str(e) or "上传失败"}, status=500)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 确认并入库
|
|
|
|
|
@@ -1580,104 +1583,107 @@ def _dt_label(dt, interval: str):
|
|
|
|
|
|
|
|
|
|
@require_http_methods(["GET"])
|
|
|
|
|
def report_view(request):
|
|
|
|
|
session_user_id = request.session.get("user_id")
|
|
|
|
|
if session_user_id is None:
|
|
|
|
|
return JsonResponse({"status": "error", "message": "未登录"}, status=401)
|
|
|
|
|
is_admin = int(request.session.get("permission", 1)) == 0
|
|
|
|
|
me = get_user_by_id(session_user_id) or {}
|
|
|
|
|
has_manage_key = bool(me.get("manage_key") or [])
|
|
|
|
|
if (not is_admin) and (not has_manage_key):
|
|
|
|
|
return JsonResponse({"status": "error", "message": "无权限"}, status=403)
|
|
|
|
|
try:
|
|
|
|
|
session_user_id = request.session.get("user_id")
|
|
|
|
|
if session_user_id is None:
|
|
|
|
|
return JsonResponse({"status": "error", "message": "未登录"}, status=401)
|
|
|
|
|
is_admin = int(request.session.get("permission", 1)) == 0
|
|
|
|
|
me = get_user_by_id(session_user_id) or {}
|
|
|
|
|
has_manage_key = bool(me.get("manage_key") or [])
|
|
|
|
|
if (not is_admin) and (not has_manage_key):
|
|
|
|
|
return JsonResponse({"status": "error", "message": "无权限"}, status=403)
|
|
|
|
|
|
|
|
|
|
gte = (request.GET.get("from") or "").strip()
|
|
|
|
|
lte = (request.GET.get("to") or "").strip()
|
|
|
|
|
interval = (request.GET.get("interval") or "day").strip()
|
|
|
|
|
key = (request.GET.get("key") or "").strip()
|
|
|
|
|
typ = (request.GET.get("type") or "").strip()
|
|
|
|
|
gte = (request.GET.get("from") or "").strip()
|
|
|
|
|
lte = (request.GET.get("to") or "").strip()
|
|
|
|
|
interval = (request.GET.get("interval") or "day").strip()
|
|
|
|
|
key = (request.GET.get("key") or "").strip()
|
|
|
|
|
typ = (request.GET.get("type") or "").strip()
|
|
|
|
|
|
|
|
|
|
gte_dt = _parse_dt(gte) if gte else None
|
|
|
|
|
lte_dt = _parse_dt(lte) if lte else None
|
|
|
|
|
gte_dt = _parse_dt(gte) if gte else None
|
|
|
|
|
lte_dt = _parse_dt(lte) if lte else None
|
|
|
|
|
|
|
|
|
|
results = search_all()
|
|
|
|
|
results = _filter_results_for_user(request, results)
|
|
|
|
|
filtered = list(results or [])
|
|
|
|
|
results = search_all()
|
|
|
|
|
results = _filter_results_for_user(request, results)
|
|
|
|
|
filtered = list(results or [])
|
|
|
|
|
|
|
|
|
|
if key:
|
|
|
|
|
selected = str(key).strip()
|
|
|
|
|
try:
|
|
|
|
|
users = get_all_users() or []
|
|
|
|
|
except Exception:
|
|
|
|
|
users = []
|
|
|
|
|
writer_keys_by_id = {}
|
|
|
|
|
for u in users:
|
|
|
|
|
if key:
|
|
|
|
|
selected = str(key).strip()
|
|
|
|
|
try:
|
|
|
|
|
u_id = str(u.get("user_id", "")).strip()
|
|
|
|
|
users = get_all_users() or []
|
|
|
|
|
except Exception:
|
|
|
|
|
u_id = ""
|
|
|
|
|
if not u_id:
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
u_keys = {str(k).strip() for k in (u.get("key") or []) if str(k).strip()}
|
|
|
|
|
except Exception:
|
|
|
|
|
u_keys = set()
|
|
|
|
|
writer_keys_by_id[u_id] = u_keys
|
|
|
|
|
users = []
|
|
|
|
|
writer_keys_by_id = {}
|
|
|
|
|
for u in users:
|
|
|
|
|
try:
|
|
|
|
|
u_id = str(u.get("user_id", "")).strip()
|
|
|
|
|
except Exception:
|
|
|
|
|
u_id = ""
|
|
|
|
|
if not u_id:
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
u_keys = {str(k).strip() for k in (u.get("key") or []) if str(k).strip()}
|
|
|
|
|
except Exception:
|
|
|
|
|
u_keys = set()
|
|
|
|
|
writer_keys_by_id[u_id] = u_keys
|
|
|
|
|
|
|
|
|
|
tmp = []
|
|
|
|
|
tmp = []
|
|
|
|
|
for r in filtered:
|
|
|
|
|
writer_id = str(r.get("writer_id", "")).strip()
|
|
|
|
|
writer_keys = writer_keys_by_id.get(writer_id)
|
|
|
|
|
if writer_keys and selected in writer_keys:
|
|
|
|
|
tmp.append(r)
|
|
|
|
|
continue
|
|
|
|
|
if selected and selected in str(r.get("data", "")):
|
|
|
|
|
tmp.append(r)
|
|
|
|
|
filtered = tmp
|
|
|
|
|
|
|
|
|
|
if typ:
|
|
|
|
|
tsel = str(typ).strip()
|
|
|
|
|
filtered = [r for r in filtered if _extract_type_from_data(r.get("data")) == tsel]
|
|
|
|
|
|
|
|
|
|
ranged = []
|
|
|
|
|
for r in filtered:
|
|
|
|
|
writer_id = str(r.get("writer_id", "")).strip()
|
|
|
|
|
writer_keys = writer_keys_by_id.get(writer_id)
|
|
|
|
|
if writer_keys and selected in writer_keys:
|
|
|
|
|
tmp.append(r)
|
|
|
|
|
t = _parse_dt(r.get("time"))
|
|
|
|
|
if (gte_dt or lte_dt) and (t is None):
|
|
|
|
|
continue
|
|
|
|
|
if selected and selected in str(r.get("data", "")):
|
|
|
|
|
tmp.append(r)
|
|
|
|
|
filtered = tmp
|
|
|
|
|
if gte_dt and t and t < gte_dt:
|
|
|
|
|
continue
|
|
|
|
|
if lte_dt and t and t > lte_dt:
|
|
|
|
|
continue
|
|
|
|
|
rr = dict(r)
|
|
|
|
|
rr["_time_dt"] = t
|
|
|
|
|
ranged.append(rr)
|
|
|
|
|
|
|
|
|
|
if typ:
|
|
|
|
|
tsel = str(typ).strip()
|
|
|
|
|
filtered = [r for r in filtered if _extract_type_from_data(r.get("data")) == tsel]
|
|
|
|
|
by_type = {}
|
|
|
|
|
by_bucket = {}
|
|
|
|
|
for r in ranged:
|
|
|
|
|
tval = _extract_type_from_data(r.get("data"))
|
|
|
|
|
if tval:
|
|
|
|
|
by_type[tval] = by_type.get(tval, 0) + 1
|
|
|
|
|
bucket_dt = _floor_dt(r.get("_time_dt"), interval)
|
|
|
|
|
if bucket_dt:
|
|
|
|
|
label = _dt_label(bucket_dt, interval)
|
|
|
|
|
if label:
|
|
|
|
|
by_bucket[label] = by_bucket.get(label, 0) + 1
|
|
|
|
|
|
|
|
|
|
ranged = []
|
|
|
|
|
for r in filtered:
|
|
|
|
|
t = _parse_dt(r.get("time"))
|
|
|
|
|
if (gte_dt or lte_dt) and (t is None):
|
|
|
|
|
continue
|
|
|
|
|
if gte_dt and t and t < gte_dt:
|
|
|
|
|
continue
|
|
|
|
|
if lte_dt and t and t > lte_dt:
|
|
|
|
|
continue
|
|
|
|
|
rr = dict(r)
|
|
|
|
|
rr["_time_dt"] = t
|
|
|
|
|
ranged.append(rr)
|
|
|
|
|
by_type_arr = [{"type": k, "count": int(v)} for k, v in sorted(by_type.items(), key=lambda x: (-x[1], x[0]))]
|
|
|
|
|
by_time_arr = [{"bucket": k, "count": int(v)} for k, v in sorted(by_bucket.items(), key=lambda x: x[0])]
|
|
|
|
|
|
|
|
|
|
by_type = {}
|
|
|
|
|
by_bucket = {}
|
|
|
|
|
for r in ranged:
|
|
|
|
|
tval = _extract_type_from_data(r.get("data"))
|
|
|
|
|
if tval:
|
|
|
|
|
by_type[tval] = by_type.get(tval, 0) + 1
|
|
|
|
|
bucket_dt = _floor_dt(r.get("_time_dt"), interval)
|
|
|
|
|
if bucket_dt:
|
|
|
|
|
label = _dt_label(bucket_dt, interval)
|
|
|
|
|
if label:
|
|
|
|
|
by_bucket[label] = by_bucket.get(label, 0) + 1
|
|
|
|
|
|
|
|
|
|
by_type_arr = [{"type": k, "count": int(v)} for k, v in sorted(by_type.items(), key=lambda x: (-x[1], x[0]))]
|
|
|
|
|
by_time_arr = [{"bucket": k, "count": int(v)} for k, v in sorted(by_bucket.items(), key=lambda x: x[0])]
|
|
|
|
|
|
|
|
|
|
return JsonResponse(
|
|
|
|
|
{
|
|
|
|
|
"status": "success",
|
|
|
|
|
"data": {
|
|
|
|
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
|
|
|
"range": {"from": gte or "", "to": lte or ""},
|
|
|
|
|
"filters": {"key": key or "", "type": typ or "", "interval": interval or "day"},
|
|
|
|
|
"total": len(ranged),
|
|
|
|
|
"by_type": by_type_arr,
|
|
|
|
|
"by_time": by_time_arr,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
return JsonResponse(
|
|
|
|
|
{
|
|
|
|
|
"status": "success",
|
|
|
|
|
"data": {
|
|
|
|
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
|
|
|
"range": {"from": gte or "", "to": lte or ""},
|
|
|
|
|
"filters": {"key": key or "", "type": typ or "", "interval": interval or "day"},
|
|
|
|
|
"total": len(ranged),
|
|
|
|
|
"by_type": by_type_arr,
|
|
|
|
|
"by_time": by_time_arr,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return JsonResponse({"status": "error", "message": str(e) or "生成失败"}, status=500)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@require_http_methods(["GET"])
|
|
|
|
|
|