diff --git a/elastic/templates/elastic/upload.html b/elastic/templates/elastic/upload.html index 0b59b79..18207d1 100644 --- a/elastic/templates/elastic/upload.html +++ b/elastic/templates/elastic/upload.html @@ -103,6 +103,7 @@

上传文件

点击下方按钮选择图片或PDF文件,或拖拽文件到此区域

+

单次最多上传 {{ max_single_upload_count|default:"3" }} 个文件。

{% csrf_token %} @@ -155,6 +156,7 @@ const dropArea = document.getElementById('dropArea'); const progressWrap = document.getElementById('progressWrap'); const progressBar = document.getElementById('progressBar'); const progressText = document.getElementById('progressText'); +const MAX_SINGLE_UPLOAD_COUNT = Number('{{ max_single_upload_count|default:"3" }}'); let currentItems = []; // 存储当前待处理的所有文件结果 let selectedFiles = []; @@ -277,13 +279,21 @@ function updateFileHint() { function addFiles(files) { const incoming = Array.from(files || []).filter(f => f && (f.type.startsWith('image/') || f.name.toLowerCase().endsWith('.pdf'))); const existingKeys = new Set(selectedFiles.map(f => `${f.name}|${f.size}|${f.lastModified}`)); + const rejected = []; incoming.forEach(f => { const key = `${f.name}|${f.size}|${f.lastModified}`; - if (!existingKeys.has(key)) { + if (!existingKeys.has(key) && selectedFiles.length < MAX_SINGLE_UPLOAD_COUNT) { existingKeys.add(key); selectedFiles.push(f); + } else if (!existingKeys.has(key) && selectedFiles.length >= MAX_SINGLE_UPLOAD_COUNT) { + rejected.push(f.name); } }); + if (rejected.length) { + uploadMsg.textContent = `单次最多上传 ${MAX_SINGLE_UPLOAD_COUNT} 个文件,以下文件未加入:${rejected.join('、')}`; + uploadMsg.className = 'status-message error'; + uploadMsg.style.display = 'block'; + } const urls = selectedFiles.map(f => { if (f.name.toLowerCase().endsWith('.pdf')) { return 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNlZjQ0NDQiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMTQgMmgyYTIgMiAwIDAgMSAyIDJ2MTZhMiAyIDAgMCAxLTIgMmgtMTJhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJoMiIvPjxwYXRoIGQ9Ik0xNCAydjRjMCAxLjEgLjkgMiAyIDJoNCIvPjxwYXRoIGQ9Ik03IDloNSIvPjxwYXRoIGQ9Ik03IDEzaDUiLz48cGF0aCBkPSJNNyAxN2g4Ii8+PC9zdmc+'; @@ -460,6 +470,12 @@ uploadForm.addEventListener('submit', async (e) => { uploadMsg.style.display = 'block'; return; } + if (selectedFiles.length > MAX_SINGLE_UPLOAD_COUNT) { + uploadMsg.textContent = `单次最多上传 ${MAX_SINGLE_UPLOAD_COUNT} 个文件,请分批上传`; + uploadMsg.className = 'status-message error'; + uploadMsg.style.display = 'block'; + return; + } showProgress(); setProgress(5, '预处理中'); diff --git a/elastic/views.py b/elastic/views.py index 86c686f..b6e8966 100644 --- a/elastic/views.py +++ b/elastic/views.py @@ -8,6 +8,7 @@ import base64 import json import csv import io +import mimetypes from datetime import datetime, timezone, timedelta import tempfile import concurrent.futures @@ -40,6 +41,8 @@ except ImportError as e: HAS_PDF_SUPPORT = False PDF_ERROR = str(e) +MAX_SINGLE_UPLOAD_COUNT = int(getattr(settings, "MAX_SINGLE_UPLOAD_COUNT", 3)) + def _filter_results_for_user(request, results): session_user_id = request.session.get("user_id") @@ -614,6 +617,7 @@ def ocr_and_extract_info(image_path: str): return base64.b64encode(f.read()).decode("utf-8") base64_image = encode_image(image_path) + mime_type = mimetypes.guess_type(image_path)[0] or "image/jpeg" # api_key = getattr(settings, "AISTUDIO_API_KEY", "188f57db3766e02ed2c7e18373996d84f4112272") # base_url = getattr(settings, "OPENAI_BASE_URL", "https://aistudio.baidu.com/llm/lmapi/v3") @@ -665,7 +669,7 @@ def ocr_and_extract_info(image_path: str): "role": "user", "content": [ {"type": "text", "text": f"请识别这张图片中的信息,将你认为重要的数据转换为不包含嵌套的json,不要显示其它信息以便于解析,直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型,除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定,请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型,确定不包含后你才可以填入你觉得合适的大致分类。"}, - {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}, + {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}}, ], }, ], @@ -715,6 +719,7 @@ def upload_page(request): context = { "user_id": user_id_qs or session_user_id, "username": me.get("username"), + "max_single_upload_count": MAX_SINGLE_UPLOAD_COUNT, } return render(request, "elastic/upload.html", context) @@ -738,6 +743,14 @@ def upload(request): files = [one] if not files: return JsonResponse({"status": "error", "message": "未选择文件"}, status=400) + if len(files) > MAX_SINGLE_UPLOAD_COUNT: + return JsonResponse( + { + "status": "error", + "message": f"单次最多上传 {MAX_SINGLE_UPLOAD_COUNT} 个文件,请分批上传", + }, + status=400, + ) images_dir = os.path.join(settings.MEDIA_ROOT, "images") os.makedirs(images_dir, exist_ok=True) @@ -784,17 +797,20 @@ def upload(request): abs_p, fname = img_info try: data = ocr_and_extract_info(abs_p) - return data - except Exception: - return None + return data, None + except Exception as e: + return None, f"{fname}: {str(e)}" group_data_list = [] + group_errors = [] with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor: futures = [executor.submit(run_ocr, img_info) for img_info in group_images] for future in concurrent.futures.as_completed(futures): - res = future.result() + res, err = future.result() if res: group_data_list.append(res) + elif err: + group_errors.append(err) merged_group_data = {} for item in group_data_list: @@ -814,7 +830,12 @@ def upload(request): merged_group_data[f"{base}_{idx}"] = v if not merged_group_data: - merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"} + merged_group_data = { + "文件名": f.name, + "提示": "未识别到具体内容" if not group_errors else "识别失败", + } + if group_errors: + merged_group_data["错误信息"] = ";".join(group_errors[:3]) rel_paths = [f"images/{img[1]}" for img in group_images] image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]