2 Commits

Author SHA1 Message Date
DSQ
d4de99971a 修复了数据储存bug[0.2.8.15][ci]
All checks were successful
CI / docker-ci (push) Successful in 24s
2026-05-25 13:16:27 +08:00
DSQ
27f8a64fdb 部分修改[0.2.8.14][ci]
All checks were successful
CI / docker-ci (push) Successful in 3m28s
2026-05-25 10:57:23 +08:00
3 changed files with 53 additions and 12 deletions

View File

@@ -199,7 +199,8 @@ def get_registration_code(code: str):
def list_registration_codes(): def list_registration_codes():
try: try:
search = RegistrationCodeDocument.search() # 增加 size=1000 以支持返回更多注册码
search = RegistrationCodeDocument.search()[:1000]
body = { body = {
"sort": [{"created_at": {"order": "desc"}}], "sort": [{"created_at": {"order": "desc"}}],
"query": {"exists": {"field": "code"}} "query": {"exists": {"field": "code"}}
@@ -297,7 +298,8 @@ def search_data(query):
""" """
try: try:
# 使用Django-elasticsearch-dsl进行搜索 # 使用Django-elasticsearch-dsl进行搜索
search = AchievementDocument.search() # 增加 size=10000 以支持返回更多结果ES默认限制为10000如需更多需分页
search = AchievementDocument.search()[:10000]
search = search.query("multi_match", query=query, fields=['*']) search = search.query("multi_match", query=query, fields=['*'])
response = search.execute() response = search.execute()
@@ -319,7 +321,8 @@ def search_data(query):
def search_all(): def search_all():
"""获取所有文档""" """获取所有文档"""
try: try:
search = AchievementDocument.search() # 增加 size=10000 以支持返回更多结果ES默认限制为10000如需更多需分页
search = AchievementDocument.search()[:10000]
search = search.query("match_all") search = search.query("match_all")
response = search.execute() response = search.execute()
@@ -421,7 +424,8 @@ def search_by_any_field(keyword):
list: 包含搜索结果的列表 list: 包含搜索结果的列表
""" """
try: try:
search = AchievementDocument.search() # 增加 size=10000 以支持返回更多结果ES默认限制为10000如需更多需分页
search = AchievementDocument.search()[:10000]
# 使用multi_match查询在所有字段中搜索 # 使用multi_match查询在所有字段中搜索
search = search.query("multi_match", search = search.query("multi_match",
@@ -988,7 +992,7 @@ def list_registration_code_manage_requests(status: str = None, limit: int = 200)
if status: if status:
must.append({"term": {"status": str(status)}}) must.append({"term": {"status": str(status)}})
body = { body = {
"size": max(1, min(int(limit or 200), 500)), "size": max(1, min(int(limit or 200), 2000)),
"query": {"bool": {"must": must}}, "query": {"bool": {"must": must}},
"sort": [{"created_at": {"order": "desc"}}], "sort": [{"created_at": {"order": "desc"}}],
} }

View File

@@ -103,6 +103,7 @@
<div class="upload-section" id="dropArea"> <div class="upload-section" id="dropArea">
<h3>上传文件</h3> <h3>上传文件</h3>
<p>点击下方按钮选择图片或PDF文件或拖拽文件到此区域</p> <p>点击下方按钮选择图片或PDF文件或拖拽文件到此区域</p>
<p style="margin: 8px 0 0; font-size: 13px; color: #64748b;">单次最多上传 {{ max_single_upload_count|default:"3" }} 个文件。</p>
<form id="uploadForm" enctype="multipart/form-data"> <form id="uploadForm" enctype="multipart/form-data">
{% csrf_token %} {% csrf_token %}
<input type="file" id="fileInput" name="file" accept="image/*,.pdf" multiple /> <input type="file" id="fileInput" name="file" accept="image/*,.pdf" multiple />
@@ -155,6 +156,7 @@ const dropArea = document.getElementById('dropArea');
const progressWrap = document.getElementById('progressWrap'); const progressWrap = document.getElementById('progressWrap');
const progressBar = document.getElementById('progressBar'); const progressBar = document.getElementById('progressBar');
const progressText = document.getElementById('progressText'); const progressText = document.getElementById('progressText');
const MAX_SINGLE_UPLOAD_COUNT = Number('{{ max_single_upload_count|default:"3" }}');
let currentItems = []; // 存储当前待处理的所有文件结果 let currentItems = []; // 存储当前待处理的所有文件结果
let selectedFiles = []; let selectedFiles = [];
@@ -277,13 +279,21 @@ function updateFileHint() {
function addFiles(files) { function addFiles(files) {
const incoming = Array.from(files || []).filter(f => f && (f.type.startsWith('image/') || f.name.toLowerCase().endsWith('.pdf'))); const incoming = Array.from(files || []).filter(f => f && (f.type.startsWith('image/') || f.name.toLowerCase().endsWith('.pdf')));
const existingKeys = new Set(selectedFiles.map(f => `${f.name}|${f.size}|${f.lastModified}`)); const existingKeys = new Set(selectedFiles.map(f => `${f.name}|${f.size}|${f.lastModified}`));
const rejected = [];
incoming.forEach(f => { incoming.forEach(f => {
const key = `${f.name}|${f.size}|${f.lastModified}`; const key = `${f.name}|${f.size}|${f.lastModified}`;
if (!existingKeys.has(key)) { if (!existingKeys.has(key) && selectedFiles.length < MAX_SINGLE_UPLOAD_COUNT) {
existingKeys.add(key); existingKeys.add(key);
selectedFiles.push(f); selectedFiles.push(f);
} else if (!existingKeys.has(key) && selectedFiles.length >= MAX_SINGLE_UPLOAD_COUNT) {
rejected.push(f.name);
} }
}); });
if (rejected.length) {
uploadMsg.textContent = `单次最多上传 ${MAX_SINGLE_UPLOAD_COUNT} 个文件,以下文件未加入:${rejected.join('、')}`;
uploadMsg.className = 'status-message error';
uploadMsg.style.display = 'block';
}
const urls = selectedFiles.map(f => { const urls = selectedFiles.map(f => {
if (f.name.toLowerCase().endsWith('.pdf')) { if (f.name.toLowerCase().endsWith('.pdf')) {
return 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNlZjQ0NDQiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMTQgMmgyYTIgMiAwIDAgMSAyIDJ2MTZhMiAyIDAgMCAxLTIgMmgtMTJhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJoMiIvPjxwYXRoIGQ9Ik0xNCAydjRjMCAxLjEgLjkgMiAyIDJoNCIvPjxwYXRoIGQ9Ik03IDloNSIvPjxwYXRoIGQ9Ik03IDEzaDUiLz48cGF0aCBkPSJNNyAxN2g4Ii8+PC9zdmc+'; return 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNlZjQ0NDQiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMTQgMmgyYTIgMiAwIDAgMSAyIDJ2MTZhMiAyIDAgMCAxLTIgMmgtMTJhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJoMiIvPjxwYXRoIGQ9Ik0xNCAydjRjMCAxLjEgLjkgMiAyIDJoNCIvPjxwYXRoIGQ9Ik03IDloNSIvPjxwYXRoIGQ9Ik03IDEzaDUiLz48cGF0aCBkPSJNNyAxN2g4Ii8+PC9zdmc+';
@@ -460,6 +470,12 @@ uploadForm.addEventListener('submit', async (e) => {
uploadMsg.style.display = 'block'; uploadMsg.style.display = 'block';
return; return;
} }
if (selectedFiles.length > MAX_SINGLE_UPLOAD_COUNT) {
uploadMsg.textContent = `单次最多上传 ${MAX_SINGLE_UPLOAD_COUNT} 个文件,请分批上传`;
uploadMsg.className = 'status-message error';
uploadMsg.style.display = 'block';
return;
}
showProgress(); showProgress();
setProgress(5, '预处理中'); setProgress(5, '预处理中');

View File

@@ -8,6 +8,7 @@ import base64
import json import json
import csv import csv
import io import io
import mimetypes
from datetime import datetime, timezone, timedelta from datetime import datetime, timezone, timedelta
import tempfile import tempfile
import concurrent.futures import concurrent.futures
@@ -40,6 +41,8 @@ except ImportError as e:
HAS_PDF_SUPPORT = False HAS_PDF_SUPPORT = False
PDF_ERROR = str(e) PDF_ERROR = str(e)
MAX_SINGLE_UPLOAD_COUNT = int(getattr(settings, "MAX_SINGLE_UPLOAD_COUNT", 3))
def _filter_results_for_user(request, results): def _filter_results_for_user(request, results):
session_user_id = request.session.get("user_id") session_user_id = request.session.get("user_id")
@@ -614,6 +617,7 @@ def ocr_and_extract_info(image_path: str):
return base64.b64encode(f.read()).decode("utf-8") return base64.b64encode(f.read()).decode("utf-8")
base64_image = encode_image(image_path) base64_image = encode_image(image_path)
mime_type = mimetypes.guess_type(image_path)[0] or "image/jpeg"
# api_key = getattr(settings, "AISTUDIO_API_KEY", "188f57db3766e02ed2c7e18373996d84f4112272") # api_key = getattr(settings, "AISTUDIO_API_KEY", "188f57db3766e02ed2c7e18373996d84f4112272")
# base_url = getattr(settings, "OPENAI_BASE_URL", "https://aistudio.baidu.com/llm/lmapi/v3") # base_url = getattr(settings, "OPENAI_BASE_URL", "https://aistudio.baidu.com/llm/lmapi/v3")
@@ -665,7 +669,7 @@ def ocr_and_extract_info(image_path: str):
"role": "user", "role": "user",
"content": [ "content": [
{"type": "text", "text": f"请识别这张图片中的信息将你认为重要的数据转换为不包含嵌套的json不要显示其它信息以便于解析直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型,确定不包含后你才可以填入你觉得合适的大致分类。"}, {"type": "text", "text": f"请识别这张图片中的信息将你认为重要的数据转换为不包含嵌套的json不要显示其它信息以便于解析直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型,确定不包含后你才可以填入你觉得合适的大致分类。"},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}, {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}},
], ],
}, },
], ],
@@ -715,6 +719,7 @@ def upload_page(request):
context = { context = {
"user_id": user_id_qs or session_user_id, "user_id": user_id_qs or session_user_id,
"username": me.get("username"), "username": me.get("username"),
"max_single_upload_count": MAX_SINGLE_UPLOAD_COUNT,
} }
return render(request, "elastic/upload.html", context) return render(request, "elastic/upload.html", context)
@@ -738,6 +743,14 @@ def upload(request):
files = [one] files = [one]
if not files: if not files:
return JsonResponse({"status": "error", "message": "未选择文件"}, status=400) return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
if len(files) > MAX_SINGLE_UPLOAD_COUNT:
return JsonResponse(
{
"status": "error",
"message": f"单次最多上传 {MAX_SINGLE_UPLOAD_COUNT} 个文件,请分批上传",
},
status=400,
)
images_dir = os.path.join(settings.MEDIA_ROOT, "images") images_dir = os.path.join(settings.MEDIA_ROOT, "images")
os.makedirs(images_dir, exist_ok=True) os.makedirs(images_dir, exist_ok=True)
@@ -784,17 +797,20 @@ def upload(request):
abs_p, fname = img_info abs_p, fname = img_info
try: try:
data = ocr_and_extract_info(abs_p) data = ocr_and_extract_info(abs_p)
return data return data, None
except Exception: except Exception as e:
return None return None, f"{fname}: {str(e)}"
group_data_list = [] group_data_list = []
group_errors = []
with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor:
futures = [executor.submit(run_ocr, img_info) for img_info in group_images] futures = [executor.submit(run_ocr, img_info) for img_info in group_images]
for future in concurrent.futures.as_completed(futures): for future in concurrent.futures.as_completed(futures):
res = future.result() res, err = future.result()
if res: if res:
group_data_list.append(res) group_data_list.append(res)
elif err:
group_errors.append(err)
merged_group_data = {} merged_group_data = {}
for item in group_data_list: for item in group_data_list:
@@ -814,7 +830,12 @@ def upload(request):
merged_group_data[f"{base}_{idx}"] = v merged_group_data[f"{base}_{idx}"] = v
if not merged_group_data: if not merged_group_data:
merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"} merged_group_data = {
"文件名": f.name,
"提示": "未识别到具体内容" if not group_errors else "识别失败",
}
if group_errors:
merged_group_data["错误信息"] = "".join(group_errors[:3])
rel_paths = [f"images/{img[1]}" for img in group_images] rel_paths = [f"images/{img[1]}" for img in group_images]
image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths] image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]