部分修改[0.2.8.14][ci]
All checks were successful
CI / docker-ci (push) Successful in 3m28s

This commit is contained in:
DSQ
2026-05-25 10:57:23 +08:00
parent 01a3b2dfdb
commit 27f8a64fdb
2 changed files with 44 additions and 7 deletions

View File

@@ -103,6 +103,7 @@
<div class="upload-section" id="dropArea"> <div class="upload-section" id="dropArea">
<h3>上传文件</h3> <h3>上传文件</h3>
<p>点击下方按钮选择图片或PDF文件或拖拽文件到此区域</p> <p>点击下方按钮选择图片或PDF文件或拖拽文件到此区域</p>
<p style="margin: 8px 0 0; font-size: 13px; color: #64748b;">单次最多上传 {{ max_single_upload_count|default:"3" }} 个文件。</p>
<form id="uploadForm" enctype="multipart/form-data"> <form id="uploadForm" enctype="multipart/form-data">
{% csrf_token %} {% csrf_token %}
<input type="file" id="fileInput" name="file" accept="image/*,.pdf" multiple /> <input type="file" id="fileInput" name="file" accept="image/*,.pdf" multiple />
@@ -155,6 +156,7 @@ const dropArea = document.getElementById('dropArea');
const progressWrap = document.getElementById('progressWrap'); const progressWrap = document.getElementById('progressWrap');
const progressBar = document.getElementById('progressBar'); const progressBar = document.getElementById('progressBar');
const progressText = document.getElementById('progressText'); const progressText = document.getElementById('progressText');
const MAX_SINGLE_UPLOAD_COUNT = Number('{{ max_single_upload_count|default:"3" }}');
let currentItems = []; // 存储当前待处理的所有文件结果 let currentItems = []; // 存储当前待处理的所有文件结果
let selectedFiles = []; let selectedFiles = [];
@@ -277,13 +279,21 @@ function updateFileHint() {
function addFiles(files) { function addFiles(files) {
const incoming = Array.from(files || []).filter(f => f && (f.type.startsWith('image/') || f.name.toLowerCase().endsWith('.pdf'))); const incoming = Array.from(files || []).filter(f => f && (f.type.startsWith('image/') || f.name.toLowerCase().endsWith('.pdf')));
const existingKeys = new Set(selectedFiles.map(f => `${f.name}|${f.size}|${f.lastModified}`)); const existingKeys = new Set(selectedFiles.map(f => `${f.name}|${f.size}|${f.lastModified}`));
const rejected = [];
incoming.forEach(f => { incoming.forEach(f => {
const key = `${f.name}|${f.size}|${f.lastModified}`; const key = `${f.name}|${f.size}|${f.lastModified}`;
if (!existingKeys.has(key)) { if (!existingKeys.has(key) && selectedFiles.length < MAX_SINGLE_UPLOAD_COUNT) {
existingKeys.add(key); existingKeys.add(key);
selectedFiles.push(f); selectedFiles.push(f);
} else if (!existingKeys.has(key) && selectedFiles.length >= MAX_SINGLE_UPLOAD_COUNT) {
rejected.push(f.name);
} }
}); });
if (rejected.length) {
uploadMsg.textContent = `单次最多上传 ${MAX_SINGLE_UPLOAD_COUNT} 个文件,以下文件未加入:${rejected.join('、')}`;
uploadMsg.className = 'status-message error';
uploadMsg.style.display = 'block';
}
const urls = selectedFiles.map(f => { const urls = selectedFiles.map(f => {
if (f.name.toLowerCase().endsWith('.pdf')) { if (f.name.toLowerCase().endsWith('.pdf')) {
return 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNlZjQ0NDQiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMTQgMmgyYTIgMiAwIDAgMSAyIDJ2MTZhMiAyIDAgMCAxLTIgMmgtMTJhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJoMiIvPjxwYXRoIGQ9Ik0xNCAydjRjMCAxLjEgLjkgMiAyIDJoNCIvPjxwYXRoIGQ9Ik03IDloNSIvPjxwYXRoIGQ9Ik03IDEzaDUiLz48cGF0aCBkPSJNNyAxN2g4Ii8+PC9zdmc+'; return 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNlZjQ0NDQiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMTQgMmgyYTIgMiAwIDAgMSAyIDJ2MTZhMiAyIDAgMCAxLTIgMmgtMTJhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJoMiIvPjxwYXRoIGQ9Ik0xNCAydjRjMCAxLjEgLjkgMiAyIDJoNCIvPjxwYXRoIGQ9Ik03IDloNSIvPjxwYXRoIGQ9Ik03IDEzaDUiLz48cGF0aCBkPSJNNyAxN2g4Ii8+PC9zdmc+';
@@ -460,6 +470,12 @@ uploadForm.addEventListener('submit', async (e) => {
uploadMsg.style.display = 'block'; uploadMsg.style.display = 'block';
return; return;
} }
if (selectedFiles.length > MAX_SINGLE_UPLOAD_COUNT) {
uploadMsg.textContent = `单次最多上传 ${MAX_SINGLE_UPLOAD_COUNT} 个文件,请分批上传`;
uploadMsg.className = 'status-message error';
uploadMsg.style.display = 'block';
return;
}
showProgress(); showProgress();
setProgress(5, '预处理中'); setProgress(5, '预处理中');

View File

@@ -8,6 +8,7 @@ import base64
import json import json
import csv import csv
import io import io
import mimetypes
from datetime import datetime, timezone, timedelta from datetime import datetime, timezone, timedelta
import tempfile import tempfile
import concurrent.futures import concurrent.futures
@@ -40,6 +41,8 @@ except ImportError as e:
HAS_PDF_SUPPORT = False HAS_PDF_SUPPORT = False
PDF_ERROR = str(e) PDF_ERROR = str(e)
MAX_SINGLE_UPLOAD_COUNT = int(getattr(settings, "MAX_SINGLE_UPLOAD_COUNT", 3))
def _filter_results_for_user(request, results): def _filter_results_for_user(request, results):
session_user_id = request.session.get("user_id") session_user_id = request.session.get("user_id")
@@ -614,6 +617,7 @@ def ocr_and_extract_info(image_path: str):
return base64.b64encode(f.read()).decode("utf-8") return base64.b64encode(f.read()).decode("utf-8")
base64_image = encode_image(image_path) base64_image = encode_image(image_path)
mime_type = mimetypes.guess_type(image_path)[0] or "image/jpeg"
# api_key = getattr(settings, "AISTUDIO_API_KEY", "188f57db3766e02ed2c7e18373996d84f4112272") # api_key = getattr(settings, "AISTUDIO_API_KEY", "188f57db3766e02ed2c7e18373996d84f4112272")
# base_url = getattr(settings, "OPENAI_BASE_URL", "https://aistudio.baidu.com/llm/lmapi/v3") # base_url = getattr(settings, "OPENAI_BASE_URL", "https://aistudio.baidu.com/llm/lmapi/v3")
@@ -665,7 +669,7 @@ def ocr_and_extract_info(image_path: str):
"role": "user", "role": "user",
"content": [ "content": [
{"type": "text", "text": f"请识别这张图片中的信息将你认为重要的数据转换为不包含嵌套的json不要显示其它信息以便于解析直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型,确定不包含后你才可以填入你觉得合适的大致分类。"}, {"type": "text", "text": f"请识别这张图片中的信息将你认为重要的数据转换为不包含嵌套的json不要显示其它信息以便于解析直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型,确定不包含后你才可以填入你觉得合适的大致分类。"},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}, {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}},
], ],
}, },
], ],
@@ -715,6 +719,7 @@ def upload_page(request):
context = { context = {
"user_id": user_id_qs or session_user_id, "user_id": user_id_qs or session_user_id,
"username": me.get("username"), "username": me.get("username"),
"max_single_upload_count": MAX_SINGLE_UPLOAD_COUNT,
} }
return render(request, "elastic/upload.html", context) return render(request, "elastic/upload.html", context)
@@ -738,6 +743,14 @@ def upload(request):
files = [one] files = [one]
if not files: if not files:
return JsonResponse({"status": "error", "message": "未选择文件"}, status=400) return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
if len(files) > MAX_SINGLE_UPLOAD_COUNT:
return JsonResponse(
{
"status": "error",
"message": f"单次最多上传 {MAX_SINGLE_UPLOAD_COUNT} 个文件,请分批上传",
},
status=400,
)
images_dir = os.path.join(settings.MEDIA_ROOT, "images") images_dir = os.path.join(settings.MEDIA_ROOT, "images")
os.makedirs(images_dir, exist_ok=True) os.makedirs(images_dir, exist_ok=True)
@@ -784,17 +797,20 @@ def upload(request):
abs_p, fname = img_info abs_p, fname = img_info
try: try:
data = ocr_and_extract_info(abs_p) data = ocr_and_extract_info(abs_p)
return data return data, None
except Exception: except Exception as e:
return None return None, f"{fname}: {str(e)}"
group_data_list = [] group_data_list = []
group_errors = []
with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor:
futures = [executor.submit(run_ocr, img_info) for img_info in group_images] futures = [executor.submit(run_ocr, img_info) for img_info in group_images]
for future in concurrent.futures.as_completed(futures): for future in concurrent.futures.as_completed(futures):
res = future.result() res, err = future.result()
if res: if res:
group_data_list.append(res) group_data_list.append(res)
elif err:
group_errors.append(err)
merged_group_data = {} merged_group_data = {}
for item in group_data_list: for item in group_data_list:
@@ -814,7 +830,12 @@ def upload(request):
merged_group_data[f"{base}_{idx}"] = v merged_group_data[f"{base}_{idx}"] = v
if not merged_group_data: if not merged_group_data:
merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"} merged_group_data = {
"文件名": f.name,
"提示": "未识别到具体内容" if not group_errors else "识别失败",
}
if group_errors:
merged_group_data["错误信息"] = "".join(group_errors[:3])
rel_paths = [f"images/{img[1]}" for img in group_images] rel_paths = [f"images/{img[1]}" for img in group_images]
image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths] image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]