4 Commits

Author SHA1 Message Date
DSQ
0404c7e274 修BUG[0.2.8.16][ci]
All checks were successful
CI / docker-ci (push) Successful in 21s
2026-05-31 15:17:28 +08:00
DSQ
69c5747867 增加一键导出excel的功能[0.2.8.15][ci]
All checks were successful
CI / docker-ci (push) Successful in 23s
2026-05-25 18:39:34 +08:00
DSQ
d4de99971a 修复了数据储存bug[0.2.8.15][ci]
All checks were successful
CI / docker-ci (push) Successful in 24s
2026-05-25 13:16:27 +08:00
DSQ
27f8a64fdb 部分修改[0.2.8.14][ci]
All checks were successful
CI / docker-ci (push) Successful in 3m28s
2026-05-25 10:57:23 +08:00
5 changed files with 288 additions and 13 deletions

View File

@@ -199,7 +199,8 @@ def get_registration_code(code: str):
def list_registration_codes():
try:
search = RegistrationCodeDocument.search()
# 增加 size=1000 以支持返回更多注册码
search = RegistrationCodeDocument.search()[:1000]
body = {
"sort": [{"created_at": {"order": "desc"}}],
"query": {"exists": {"field": "code"}}
@@ -297,7 +298,8 @@ def search_data(query):
"""
try:
# 使用Django-elasticsearch-dsl进行搜索
search = AchievementDocument.search()
# 增加 size=10000 以支持返回更多结果ES默认限制为10000如需更多需分页
search = AchievementDocument.search()[:10000]
search = search.query("multi_match", query=query, fields=['*'])
response = search.execute()
@@ -319,7 +321,8 @@ def search_data(query):
def search_all():
"""获取所有文档"""
try:
search = AchievementDocument.search()
# 增加 size=10000 以支持返回更多结果ES默认限制为10000如需更多需分页
search = AchievementDocument.search()[:10000]
search = search.query("match_all")
response = search.execute()
@@ -421,7 +424,8 @@ def search_by_any_field(keyword):
list: 包含搜索结果的列表
"""
try:
search = AchievementDocument.search()
# 增加 size=10000 以支持返回更多结果ES默认限制为10000如需更多需分页
search = AchievementDocument.search()[:10000]
# 使用multi_match查询在所有字段中搜索
search = search.query("multi_match",
@@ -988,7 +992,7 @@ def list_registration_code_manage_requests(status: str = None, limit: int = 200)
if status:
must.append({"term": {"status": str(status)}})
body = {
"size": max(1, min(int(limit or 200), 500)),
"size": max(1, min(int(limit or 200), 2000)),
"query": {"bool": {"must": must}},
"sort": [{"created_at": {"order": "desc"}}],
}

View File

@@ -86,6 +86,7 @@
<button class="btn btn-primary" onclick="performSearch('exact')">关键词搜索</button>
<button class="btn btn-secondary" onclick="performSearch('fuzzy')">模糊搜索</button>
<button class="btn" onclick="loadAllData()">显示全部</button>
<button class="btn btn-primary" onclick="exportAllData()">一键导出Excel</button>
<button class="btn" onclick="clearSearch()">清空结果</button>
</div>
@@ -507,6 +508,10 @@ function downloadReportCsv() {
window.location.href = `/elastic/report/csv/?${params.toString()}`;
}
function exportAllData() {
window.location.href = "/elastic/export_achievements_csv/";
}
// 渲染表格
function renderTable(data) {
tableBody.innerHTML = '';

View File

@@ -103,6 +103,7 @@
<div class="upload-section" id="dropArea">
<h3>上传文件</h3>
<p>点击下方按钮选择图片或PDF文件或拖拽文件到此区域</p>
<p style="margin: 8px 0 0; font-size: 13px; color: #64748b;">单次最多上传 {{ max_single_upload_count|default:"3" }} 个文件。</p>
<form id="uploadForm" enctype="multipart/form-data">
{% csrf_token %}
<input type="file" id="fileInput" name="file" accept="image/*,.pdf" multiple />
@@ -155,6 +156,7 @@ const dropArea = document.getElementById('dropArea');
const progressWrap = document.getElementById('progressWrap');
const progressBar = document.getElementById('progressBar');
const progressText = document.getElementById('progressText');
const MAX_SINGLE_UPLOAD_COUNT = Number('{{ max_single_upload_count|default:"3" }}');
let currentItems = []; // 存储当前待处理的所有文件结果
let selectedFiles = [];
@@ -277,13 +279,21 @@ function updateFileHint() {
function addFiles(files) {
const incoming = Array.from(files || []).filter(f => f && (f.type.startsWith('image/') || f.name.toLowerCase().endsWith('.pdf')));
const existingKeys = new Set(selectedFiles.map(f => `${f.name}|${f.size}|${f.lastModified}`));
const rejected = [];
incoming.forEach(f => {
const key = `${f.name}|${f.size}|${f.lastModified}`;
if (!existingKeys.has(key)) {
if (!existingKeys.has(key) && selectedFiles.length < MAX_SINGLE_UPLOAD_COUNT) {
existingKeys.add(key);
selectedFiles.push(f);
} else if (!existingKeys.has(key) && selectedFiles.length >= MAX_SINGLE_UPLOAD_COUNT) {
rejected.push(f.name);
}
});
if (rejected.length) {
uploadMsg.textContent = `单次最多上传 ${MAX_SINGLE_UPLOAD_COUNT} 个文件,以下文件未加入:${rejected.join('、')}`;
uploadMsg.className = 'status-message error';
uploadMsg.style.display = 'block';
}
const urls = selectedFiles.map(f => {
if (f.name.toLowerCase().endsWith('.pdf')) {
return 'data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0OCIgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9IiNlZjQ0NDQiIHN0cm9rZS13aWR0aD0iMiIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIiBzdHJva2UtbGluZWpvaW49InJvdW5kIj48cGF0aCBkPSJNMTQgMmgyYTIgMiAwIDAgMSAyIDJ2MTZhMiAyIDAgMCAxLTIgMmgtMTJhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJoMiIvPjxwYXRoIGQ9Ik0xNCAydjRjMCAxLjEgLjkgMiAyIDJoNCIvPjxwYXRoIGQ9Ik03IDloNSIvPjxwYXRoIGQ9Ik03IDEzaDUiLz48cGF0aCBkPSJNNyAxN2g4Ii8+PC9zdmc+';
@@ -460,6 +470,12 @@ uploadForm.addEventListener('submit', async (e) => {
uploadMsg.style.display = 'block';
return;
}
if (selectedFiles.length > MAX_SINGLE_UPLOAD_COUNT) {
uploadMsg.textContent = `单次最多上传 ${MAX_SINGLE_UPLOAD_COUNT} 个文件,请分批上传`;
uploadMsg.className = 'status-message error';
uploadMsg.style.display = 'block';
return;
}
showProgress();
setProgress(5, '预处理中');

View File

@@ -23,6 +23,7 @@ urlpatterns = [
path('filter/', views.filter_view, name='filter'),
path('report/', views.report_view, name='report'),
path('report/csv/', views.report_csv_view, name='report_csv'),
path('export_achievements_csv/', views.export_achievements_csv, name='export_achievements_csv'),
# 用户管理
path('users/', views.get_users, name='get_users'),

View File

@@ -8,6 +8,14 @@ import base64
import json
import csv
import io
import mimetypes
try:
import openpyxl
from openpyxl.utils import get_column_letter
from openpyxl.drawing.image import Image as XLImage
HAS_OPENPYXL = True
except ImportError:
HAS_OPENPYXL = False
from datetime import datetime, timezone, timedelta
import tempfile
import concurrent.futures
@@ -40,6 +48,8 @@ except ImportError as e:
HAS_PDF_SUPPORT = False
PDF_ERROR = str(e)
MAX_SINGLE_UPLOAD_COUNT = int(getattr(settings, "MAX_SINGLE_UPLOAD_COUNT", 3))
def _filter_results_for_user(request, results):
session_user_id = request.session.get("user_id")
@@ -614,6 +624,7 @@ def ocr_and_extract_info(image_path: str):
return base64.b64encode(f.read()).decode("utf-8")
base64_image = encode_image(image_path)
mime_type = mimetypes.guess_type(image_path)[0] or "image/jpeg"
# api_key = getattr(settings, "AISTUDIO_API_KEY", "188f57db3766e02ed2c7e18373996d84f4112272")
# base_url = getattr(settings, "OPENAI_BASE_URL", "https://aistudio.baidu.com/llm/lmapi/v3")
@@ -665,7 +676,7 @@ def ocr_and_extract_info(image_path: str):
"role": "user",
"content": [
{"type": "text", "text": f"请识别这张图片中的信息将你认为重要的数据转换为不包含嵌套的json不要显示其它信息以便于解析直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型,确定不包含后你才可以填入你觉得合适的大致分类。"},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}},
{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}},
],
},
],
@@ -715,6 +726,7 @@ def upload_page(request):
context = {
"user_id": user_id_qs or session_user_id,
"username": me.get("username"),
"max_single_upload_count": MAX_SINGLE_UPLOAD_COUNT,
}
return render(request, "elastic/upload.html", context)
@@ -738,6 +750,14 @@ def upload(request):
files = [one]
if not files:
return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
if len(files) > MAX_SINGLE_UPLOAD_COUNT:
return JsonResponse(
{
"status": "error",
"message": f"单次最多上传 {MAX_SINGLE_UPLOAD_COUNT} 个文件,请分批上传",
},
status=400,
)
images_dir = os.path.join(settings.MEDIA_ROOT, "images")
os.makedirs(images_dir, exist_ok=True)
@@ -784,17 +804,20 @@ def upload(request):
abs_p, fname = img_info
try:
data = ocr_and_extract_info(abs_p)
return data
except Exception:
return None
return data, None
except Exception as e:
return None, f"{fname}: {str(e)}"
group_data_list = []
group_errors = []
with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(group_images), 8)) as executor:
futures = [executor.submit(run_ocr, img_info) for img_info in group_images]
for future in concurrent.futures.as_completed(futures):
res = future.result()
res, err = future.result()
if res:
group_data_list.append(res)
elif err:
group_errors.append(err)
merged_group_data = {}
for item in group_data_list:
@@ -814,10 +837,34 @@ def upload(request):
merged_group_data[f"{base}_{idx}"] = v
if not merged_group_data:
merged_group_data = {"文件名": f.name, "提示": "未识别到具体内容"}
merged_group_data = {
"文件名": f.name,
"提示": "未识别到具体内容" if not group_errors else "识别失败",
}
if group_errors:
merged_group_data["错误信息"] = "".join(group_errors[:3])
rel_paths = [f"images/{img[1]}" for img in group_images]
image_urls = [request.build_absolute_uri(settings.MEDIA_URL + rp) for rp in rel_paths]
# 改进:如果配置了 MinIO则在上传阶段就同步到 MinIO确保在线版本待处理列表能显示图片
image_urls = []
from minio_storage.minio_connect import is_minio_configured, upload_file, presigned_get_url
minio_enabled = is_minio_configured()
for rp in rel_paths:
abs_p = os.path.join(settings.MEDIA_ROOT, rp)
if minio_enabled:
try:
# 上传到 MinIO
upload_file(abs_p, rp)
# 生成预签名 URL
url = presigned_get_url(rp)
image_urls.append(url)
except Exception as e:
print(f"上传临时图片到 MinIO 失败: {e}")
image_urls.append(request.build_absolute_uri(settings.MEDIA_URL + rp))
else:
image_urls.append(request.build_absolute_uri(settings.MEDIA_URL + rp))
file_results.append({
"name": f.name,
@@ -1729,6 +1776,208 @@ def report_csv_view(request):
out["Content-Disposition"] = 'attachment; filename="report.csv"'
return out
@require_http_methods(["GET"])
def export_achievements_csv(request):
"""一键导出所有可见成果为 Excel (如果支持) 或 CSV"""
try:
session_user_id = request.session.get("user_id")
if session_user_id is None:
return HttpResponse("Unauthorized", status=401)
# 1. 获取所有数据
results = search_all()
# 2. 根据权限过滤
results = _filter_results_for_user(request, results)
# 3. 补充录入人姓名
results = _attach_writer_names(results)
if not results:
return HttpResponse("No data to export", status=404)
# 4. 解析数据并准备数据列表
parsed_data_list = []
all_data_keys = set()
for item in results:
raw_data = item.get("data", "{}")
try:
if isinstance(raw_data, str):
parsed_dict = json.loads(raw_data)
else:
parsed_dict = raw_data
except Exception:
parsed_dict = {"原始数据": str(raw_data)}
if not isinstance(parsed_dict, dict):
parsed_dict = {"数据内容": str(parsed_dict)}
# 展平基础字段和动态数据字段
flat_item = {
"ID": item.get("_id", ""),
"录入人": item.get("writer_name") or item.get("writer_id", ""),
"时间": format_datetime_for_export(item.get("time")),
}
# 清理动态字段中的换行符
clean_parsed_dict = {}
for k, v in parsed_dict.items():
if isinstance(v, str):
clean_parsed_dict[k] = v.replace('\r', '').replace('\n', ' ')
else:
clean_parsed_dict[k] = v
flat_item.update(clean_parsed_dict)
# 保存原始图片引用以便导出 Excel 时使用
flat_item["_image_refs"] = _parse_image_refs(item.get("image", ""))
parsed_data_list.append(flat_item)
all_data_keys.update(clean_parsed_dict.keys())
# 确定表头:基础字段 + 动态字段(按字母排序)
dynamic_headers = sorted(list(all_data_keys))
headers = ["ID", "录入人", "时间"] + dynamic_headers
# 如果是 Excel 且支持图片,添加图片列
if HAS_OPENPYXL:
headers.append("成果图片")
filename_base = f"achievements_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
# 5. 如果安装了 openpyxl生成 Excel
if HAS_OPENPYXL:
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "成果数据"
# 写入表头
for col_num, header in enumerate(headers, 1):
cell = ws.cell(row=1, column=col_num, value=header)
cell.font = openpyxl.styles.Font(bold=True)
cell.alignment = openpyxl.styles.Alignment(horizontal='center', vertical='center')
# 写入数据
img_col_index = headers.index("成果图片") + 1 if "成果图片" in headers else None
for row_num, row_data in enumerate(parsed_data_list, 2):
for col_num, header in enumerate(headers, 1):
if header == "成果图片":
continue # 图片单独处理
ws.cell(row=row_num, column=col_num, value=row_data.get(header, ""))
# 处理图片插入
if img_col_index and row_data.get("_image_refs"):
first_ref = row_data["_image_refs"][0]
img_bytes = _get_image_bytes(first_ref)
if img_bytes:
try:
img = XLImage(img_bytes)
# 调整图片大小以适应单元格 (假设高度 80 像素左右)
aspect_ratio = img.width / img.height
img.height = 80
img.width = 80 * aspect_ratio
# 计算插入位置
cell_address = f"{get_column_letter(img_col_index)}{row_num}"
ws.add_image(img, cell_address)
# 设置行高以容纳图片 (80 像素约为 60 磅)
ws.row_dimensions[row_num].height = 65
except Exception as e:
ws.cell(row=row_num, column=img_col_index, value=f"图片加载失败: {str(e)}")
# 自动调整列宽
for i, column_cells in enumerate(ws.columns, 1):
header = headers[i-1]
if header == "成果图片":
ws.column_dimensions[get_column_letter(i)].width = 20
continue
length = max(len(str(cell.value or "")) for cell in column_cells)
ws.column_dimensions[get_column_letter(i)].width = min(length + 2, 50)
response = HttpResponse(
content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
)
response['Content-Disposition'] = f'attachment; filename="{filename_base}.xlsx"'
wb.save(response)
return response
# 6. 否则回退到 CSV
output = io.StringIO()
output.write('\ufeff') # UTF-8 BOM
# 增加 extrasaction='ignore' 以忽略 _image_refs 等内部辅助字段
writer = csv.DictWriter(output, fieldnames=headers, extrasaction='ignore')
writer.writeheader()
for row in parsed_data_list:
writer.writerow(row)
response = HttpResponse(output.getvalue(), content_type='text/csv; charset=utf-8')
response['Content-Disposition'] = f'attachment; filename="{filename_base}.csv"'
return response
except Exception as e:
import traceback
traceback.print_exc()
return HttpResponse(f"导出失败: {str(e)}", status=500)
def format_datetime_for_export(t):
if not t: return ""
try:
if isinstance(t, datetime):
return t.strftime("%Y-%m-%d %H:%M:%S")
d = datetime.fromisoformat(str(t).replace('Z', '+00:00'))
return d.strftime("%Y-%m-%d %H:%M:%S")
except Exception:
return str(t)
def _get_image_bytes(image_ref):
"""根据 image_ref 获取图片字节流,并确保转换为 Excel 支持的格式 (如 JPEG/PNG)"""
s = str(image_ref or '').strip()
if not s:
return None
img_raw_bytes = None
if s.startswith('minio:'):
object_name = s[len('minio:'):].lstrip('/')
try:
from minio_storage.minio_connect import get_minio_client, get_bucket_name
client = get_minio_client()
bucket = get_bucket_name()
if client:
response = client.get_object(bucket, object_name)
img_raw_bytes = response.read()
except Exception:
pass
elif s.startswith('local:'):
rel_path = s[len('local:'):].lstrip('/')
abs_path = os.path.join(settings.MEDIA_ROOT, rel_path)
if os.path.isfile(abs_path):
try:
with open(abs_path, 'rb') as f:
img_raw_bytes = f.read()
except Exception:
pass
if not img_raw_bytes:
return None
# 处理 WebP 或其他 openpyxl 可能不支持的格式
try:
from PIL import Image as PILImage
img_io = io.BytesIO(img_raw_bytes)
with PILImage.open(img_io) as pil_img:
# 如果是 WebP 或带有透明通道的图片,转换为 RGB 格式并存为 JPEG 或 PNG
# Excel 对 PNG 支持较好
output_io = io.BytesIO()
if pil_img.format == 'WEBP' or pil_img.mode in ('RGBA', 'LA', 'P'):
rgb_img = pil_img.convert('RGB')
rgb_img.save(output_io, format='JPEG', quality=85)
else:
pil_img.save(output_io, format=pil_img.format or 'JPEG')
output_io.seek(0)
return output_io
except Exception as e:
print(f"图片转换失败: {str(e)}")
return io.BytesIO(img_raw_bytes) # 尝试直接返回原始数据作为最后手段
@require_http_methods(["POST"])
@csrf_protect
def revoke_registration_code_view(request):