This commit is contained in:
@@ -103,6 +103,7 @@
|
|||||||
<div class="upload-section" id="dropArea">
|
<div class="upload-section" id="dropArea">
|
||||||
<h3>上传文件</h3>
|
<h3>上传文件</h3>
|
||||||
<p>点击下方按钮选择图片或PDF文件,或拖拽文件到此区域</p>
|
<p>点击下方按钮选择图片或PDF文件,或拖拽文件到此区域</p>
|
||||||
|
<p style="margin: 8px 0 0; font-size: 13px; color: #64748b;">单次最多上传 {{ max_single_upload_count|default:"3" }} 个文件。</p>
|
||||||
<form id="uploadForm" enctype="multipart/form-data">
|
<form id="uploadForm" enctype="multipart/form-data">
|
||||||
{% csrf_token %}
|
{% csrf_token %}
|
||||||
<input type="file" id="fileInput" name="file" accept="image/*,.pdf" multiple />
|
<input type="file" id="fileInput" name="file" accept="image/*,.pdf" multiple />
|
||||||
@@ -155,6 +156,7 @@ const dropArea = document.getElementById('dropArea');
|
|||||||
const progressWrap = document.getElementById('progressWrap');
|
const progressWrap = document.getElementById('progressWrap');
|
||||||
const progressBar = document.getElementById('progressBar');
|
const progressBar = document.getElementById('progressBar');
|
||||||
const progressText = document.getElementById('progressText');
|
const progressText = document.getElementById('progressText');
|
||||||
|
const MAX_SINGLE_UPLOAD_COUNT = Number('{{ max_single_upload_count|default:"3" }}');
|
||||||
|
|
||||||
let currentItems = []; // 存储当前待处理的所有文件结果
|
let currentItems = []; // 存储当前待处理的所有文件结果
|
||||||
let selectedFiles = [];
|
let selectedFiles = [];
|
||||||
@@ -277,13 +279,21 @@ function updateFileHint() {
|
|||||||
function addFiles(files) {
|
function addFiles(files) {
|
||||||
const incoming = Array.from(files || []).filter(f => f && (f.type.startsWith('image/') || f.name.toLowerCase().endsWith('.pdf')));
|
const incoming = Array.from(files || []).filter(f => f && (f.type.startsWith('image/') || f.name.toLowerCase().endsWith('.pdf')));
|
||||||
const existingKeys = new Set(selectedFiles.map(f => `${f.name}|${f.size}|${f.lastModified}`));
|
const existingKeys = new Set(selectedFiles.map(f => `${f.name}|${f.size}|${f.lastModified}`));
|
||||||
|
const rejected = [];
|
||||||
incoming.forEach(f => {
|
incoming.forEach(f => {
|
||||||
const key = `${f.name}|${f.size}|${f.lastModified}`;
|
const key = `${f.name}|${f.size}|${f.lastModified}`;
|
||||||
if (!existingKeys.has(key)) {
|
if (!existingKeys.has(key) && selectedFiles.length < MAX_SINGLE_UPLOAD_COUNT) {
|
||||||
existingKeys.add(key);
|
existingKeys.add(key);
|
||||||
selectedFiles.push(f);
|
selectedFiles.push(f);
|
||||||
|
} else if (!existingKeys.has(key) && selectedFiles.length >= MAX_SINGLE_UPLOAD_COUNT) {
|
||||||
|
rejected.push(f.name);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
if (rejected.length) {
|
||||||
|
uploadMsg.textContent = `单次最多上传 ${MAX_SINGLE_UPLOAD_COUNT} 个文件,以下文件未加入:${rejected.join('、')}`;
|
||||||
|
uploadMsg.className = 'status-message error';
|
||||||
|
uploadMsg.style.display = 'block';
|
||||||
|
}
|
||||||
const urls = selectedFiles.map(f => {
|
const urls = selectedFiles.map(f => {
|
||||||
if (f.name.toLowerCase().endsWith('.pdf')) {
|
if (f.name.toLowerCase().endsWith('.pdf')) {
|
||||||
return 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNlZjQ0NDQiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMTQgMmgyYTIgMiAwIDAgMSAyIDJ2MTZhMiAyIDAgMCAxLTIgMmgtMTJhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJoMiIvPjxwYXRoIGQ9Ik0xNCAydjRjMCAxLjEgLjkgMiAyIDJoNCIvPjxwYXRoIGQ9Ik03IDloNSIvPjxwYXRoIGQ9Ik03IDEzaDUiLz48cGF0aCBkPSJNNyAxN2g4Ii8+PC9zdmc+';
|
return 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNlZjQ0NDQiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMTQgMmgyYTIgMiAwIDAgMSAyIDJ2MTZhMiAyIDAgMCAxLTIgMmgtMTJhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJoMiIvPjxwYXRoIGQ9Ik0xNCAydjRjMCAxLjEgLjkgMiAyIDJoNCIvPjxwYXRoIGQ9Ik03IDloNSIvPjxwYXRoIGQ9Ik03IDEzaDUiLz48cGF0aCBkPSJNNyAxN2g4Ii8+PC9zdmc+';
|
||||||
@@ -460,6 +470,12 @@ uploadForm.addEventListener('submit', async (e) => {
|
|||||||
uploadMsg.style.display = 'block';
|
uploadMsg.style.display = 'block';
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (selectedFiles.length > MAX_SINGLE_UPLOAD_COUNT) {
|
||||||
|
uploadMsg.textContent = `单次最多上传 ${MAX_SINGLE_UPLOAD_COUNT} 个文件,请分批上传`;
|
||||||
|
uploadMsg.className = 'status-message error';
|
||||||
|
uploadMsg.style.display = 'block';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
showProgress();
|
showProgress();
|
||||||
setProgress(5, '预处理中');
|
setProgress(5, '预处理中');
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import base64
|
|||||||
import json
|
import json
|
||||||
import csv
|
import csv
|
||||||
import io
|
import io
|
||||||
|
import mimetypes
|
||||||
from datetime import datetime, timezone, timedelta
|
from datetime import datetime, timezone, timedelta
|
||||||
import tempfile
|
import tempfile
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
@@ -40,6 +41,8 @@ except ImportError as e:
|
|||||||
HAS_PDF_SUPPORT = False
|
HAS_PDF_SUPPORT = False
|
||||||
PDF_ERROR = str(e)
|
PDF_ERROR = str(e)
|
||||||
|
|
||||||
|
MAX_SINGLE_UPLOAD_COUNT = int(getattr(settings, "MAX_SINGLE_UPLOAD_COUNT", 3))
|
||||||
|
|
||||||
|
|
||||||
def _filter_results_for_user(request, results):
|
def _filter_results_for_user(request, results):
|
||||||
session_user_id = request.session.get("user_id")
|
session_user_id = request.session.get("user_id")
|
||||||
@@ -614,6 +617,7 @@ def ocr_and_extract_info(image_path: str):
|
|||||||
return base64.b64encode(f.read()).decode("utf-8")
|
return base64.b64encode(f.read()).decode("utf-8")
|
||||||
|
|
||||||
base64_image = encode_image(image_path)
|
base64_image = encode_image(image_path)
|
||||||
|
mime_type = mimetypes.guess_type(image_path)[0] or "image/jpeg"
|
||||||
|
|
||||||
# api_key = getattr(settings, "AISTUDIO_API_KEY", "188f57db3766e02ed2c7e18373996d84f4112272")
|
# api_key = getattr(settings, "AISTUDIO_API_KEY", "188f57db3766e02ed2c7e18373996d84f4112272")
|
||||||
# base_url = getattr(settings, "OPENAI_BASE_URL", "https://aistudio.baidu.com/llm/lmapi/v3")
|
# base_url = getattr(settings, "OPENAI_BASE_URL", "https://aistudio.baidu.com/llm/lmapi/v3")
|
||||||
@@ -665,7 +669,7 @@ def ocr_and_extract_info(image_path: str):
|
|||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": f"请识别这张图片中的信息,将你认为重要的数据转换为不包含嵌套的json,不要显示其它信息以便于解析,直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型,除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定,请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型,确定不包含后你才可以填入你觉得合适的大致分类。"},
|
{"type": "text", "text": f"请识别这张图片中的信息,将你认为重要的数据转换为不包含嵌套的json,不要显示其它信息以便于解析,直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型,除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定,请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型,确定不包含后你才可以填入你觉得合适的大致分类。"},
|
||||||
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}},
|
{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
@@ -715,6 +719,7 @@ def upload_page(request):
|
|||||||
context = {
|
context = {
|
||||||
"user_id": user_id_qs or session_user_id,
|
"user_id": user_id_qs or session_user_id,
|
||||||
"username": me.get("username"),
|
"username": me.get("username"),
|
||||||
|
"max_single_upload_count": MAX_SINGLE_UPLOAD_COUNT,
|
||||||
}
|
}
|
||||||
return render(request, "elastic/upload.html", context)
|
return render(request, "elastic/upload.html", context)
|
||||||
|
|
||||||
@@ -738,6 +743,14 @@ def upload(request):
|
|||||||
files = [one]
|
files = [one]
|
||||||
if not files:
|
if not files:
|
||||||
return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
|
return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
|
||||||
|
if len(files) > MAX_SINGLE_UPLOAD_COUNT:
|
||||||
|
return JsonResponse(
|
||||||
|
{
|
||||||
|
"status": "error",
|
||||||
|
"message": f"单次最多上传 {MAX_SINGLE_UPLOAD_COUNT} 个文件,请分批上传",
|
||||||
|
},
|
||||||
|
status=400,
|
||||||
|
)
|
||||||
|
|
||||||
images_dir = os.path.join(settings.MEDIA_ROOT, "images")
|
images_dir = os.path.join(settings.MEDIA_ROOT, "images")
|
||||||
os.makedirs(images_dir, exist_ok=True)
|
os.makedirs(images_dir, exist_ok=True)
|
||||||
@@ -784,17 +797,20 @@ def upload(request):
|
|||||||
abs_p, fname = img_info
|
abs_p, fname = img_info
|
||||||
try:
|
try:
|
||||||
data = ocr_and_extract_info(abs_p)
|
data = ocr_and_extract_info(abs_p)
|
||||||
return data
|
return data, None
|
||||||
except Exception:
|
except Exception as e:
|
||||||
return None
|
return None, f"{fname}: {str(e)}"
|
||||||
|
|
||||||
group_data_list = []
|
group_data_list = []
|
||||||
|
group_errors = []
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor:
|
||||||
futures = [executor.submit(run_ocr, img_info) for img_info in group_images]
|
futures = [executor.submit(run_ocr, img_info) for img_info in group_images]
|
||||||
for future in concurrent.futures.as_completed(futures):
|
for future in concurrent.futures.as_completed(futures):
|
||||||
res = future.result()
|
res, err = future.result()
|
||||||
if res:
|
if res:
|
||||||
group_data_list.append(res)
|
group_data_list.append(res)
|
||||||
|
elif err:
|
||||||
|
group_errors.append(err)
|
||||||
|
|
||||||
merged_group_data = {}
|
merged_group_data = {}
|
||||||
for item in group_data_list:
|
for item in group_data_list:
|
||||||
@@ -814,7 +830,12 @@ def upload(request):
|
|||||||
merged_group_data[f"{base}_{idx}"] = v
|
merged_group_data[f"{base}_{idx}"] = v
|
||||||
|
|
||||||
if not merged_group_data:
|
if not merged_group_data:
|
||||||
merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"}
|
merged_group_data = {
|
||||||
|
"文件名": f.name,
|
||||||
|
"提示": "未识别到具体内容" if not group_errors else "识别失败",
|
||||||
|
}
|
||||||
|
if group_errors:
|
||||||
|
merged_group_data["错误信息"] = ";".join(group_errors[:3])
|
||||||
|
|
||||||
rel_paths = [f"images/{img[1]}" for img in group_images]
|
rel_paths = [f"images/{img[1]}" for img in group_images]
|
||||||
image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]
|
image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]
|
||||||
|
|||||||
Reference in New Issue
Block a user