部分修改[0.2.8.14][ci]
All checks were successful
CI / docker-ci (push) Successful in 3m28s

This commit is contained in:
DSQ
2026-05-25 10:57:23 +08:00
parent 01a3b2dfdb
commit 27f8a64fdb
2 changed files with 44 additions and 7 deletions

View File

@@ -8,6 +8,7 @@ import base64
import json
import csv
import io
import mimetypes
from datetime import datetime, timezone, timedelta
import tempfile
import concurrent.futures
@@ -40,6 +41,8 @@ except ImportError as e:
HAS_PDF_SUPPORT = False
PDF_ERROR = str(e)
MAX_SINGLE_UPLOAD_COUNT = int(getattr(settings, "MAX_SINGLE_UPLOAD_COUNT", 3))
def _filter_results_for_user(request, results):
session_user_id = request.session.get("user_id")
@@ -614,6 +617,7 @@ def ocr_and_extract_info(image_path: str):
return base64.b64encode(f.read()).decode("utf-8")
base64_image = encode_image(image_path)
mime_type = mimetypes.guess_type(image_path)[0] or "image/jpeg"
# api_key = getattr(settings, "AISTUDIO_API_KEY", "188f57db3766e02ed2c7e18373996d84f4112272")
# base_url = getattr(settings, "OPENAI_BASE_URL", "https://aistudio.baidu.com/llm/lmapi/v3")
@@ -665,7 +669,7 @@ def ocr_and_extract_info(image_path: str):
"role": "user",
"content": [
{"type": "text", "text": f"请识别这张图片中的信息将你认为重要的数据转换为不包含嵌套的json不要显示其它信息以便于解析直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型,确定不包含后你才可以填入你觉得合适的大致分类。"},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}},
{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}},
],
},
],
@@ -715,6 +719,7 @@ def upload_page(request):
context = {
"user_id": user_id_qs or session_user_id,
"username": me.get("username"),
"max_single_upload_count": MAX_SINGLE_UPLOAD_COUNT,
}
return render(request, "elastic/upload.html", context)
@@ -738,6 +743,14 @@ def upload(request):
files = [one]
if not files:
return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
if len(files) > MAX_SINGLE_UPLOAD_COUNT:
return JsonResponse(
{
"status": "error",
"message": f"单次最多上传 {MAX_SINGLE_UPLOAD_COUNT} 个文件,请分批上传",
},
status=400,
)
images_dir = os.path.join(settings.MEDIA_ROOT, "images")
os.makedirs(images_dir, exist_ok=True)
@@ -784,17 +797,20 @@ def upload(request):
abs_p, fname = img_info
try:
data = ocr_and_extract_info(abs_p)
return data
except Exception:
return None
return data, None
except Exception as e:
return None, f"{fname}: {str(e)}"
group_data_list = []
group_errors = []
with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor:
futures = [executor.submit(run_ocr, img_info) for img_info in group_images]
for future in concurrent.futures.as_completed(futures):
res = future.result()
res, err = future.result()
if res:
group_data_list.append(res)
elif err:
group_errors.append(err)
merged_group_data = {}
for item in group_data_list:
@@ -814,7 +830,12 @@ def upload(request):
merged_group_data[f"{base}_{idx}"] = v
if not merged_group_data:
merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"}
merged_group_data = {
"文件名": f.name,
"提示": "未识别到具体内容" if not group_errors else "识别失败",
}
if group_errors:
merged_group_data["错误信息"] = "".join(group_errors[:3])
rel_paths = [f"images/{img[1]}" for img in group_images]
image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]