版本更新:
1、已实现多图识别并入库 2、增加图片上传时删除图片功能 3、改用模型glm-4.6v预计5月份到期 4、已对环境txt做更改
This commit is contained in:
219
elastic/views.py
219
elastic/views.py
@@ -42,6 +42,29 @@ def _image_ref_to_url(request, image_ref: str) -> str:
|
||||
return ''
|
||||
|
||||
|
||||
def _parse_image_refs(image_ref):
|
||||
if not image_ref:
|
||||
return []
|
||||
if isinstance(image_ref, (list, tuple)):
|
||||
return [str(x) for x in image_ref if str(x).strip()]
|
||||
if isinstance(image_ref, str):
|
||||
s = image_ref.strip()
|
||||
if not s:
|
||||
return []
|
||||
parsed = None
|
||||
if s[:1] in ('[', '"'):
|
||||
try:
|
||||
parsed = json.loads(s)
|
||||
except Exception:
|
||||
parsed = None
|
||||
if isinstance(parsed, list):
|
||||
return [str(x) for x in parsed if str(x).strip()]
|
||||
if isinstance(parsed, str):
|
||||
s = parsed.strip()
|
||||
return [s] if s else []
|
||||
return []
|
||||
|
||||
|
||||
def _attach_image_urls(request, items):
|
||||
out = []
|
||||
for it in list(items or []):
|
||||
@@ -49,7 +72,11 @@ def _attach_image_urls(request, items):
|
||||
d = dict(it or {})
|
||||
except Exception:
|
||||
continue
|
||||
d['image_url'] = _image_ref_to_url(request, d.get('image', ''))
|
||||
refs = _parse_image_refs(d.get('image', ''))
|
||||
urls = [_image_ref_to_url(request, r) for r in refs if str(r).strip()]
|
||||
urls = [u for u in urls if u]
|
||||
d['image_urls'] = urls
|
||||
d['image_url'] = urls[0] if urls else _image_ref_to_url(request, d.get('image', ''))
|
||||
out.append(d)
|
||||
return out
|
||||
|
||||
@@ -180,7 +207,11 @@ def update_data(request, doc_id):
|
||||
if "writer_id" in payload:
|
||||
updated["writer_id"] = payload["writer_id"]
|
||||
if "image" in payload:
|
||||
updated["image"] = payload["image"]
|
||||
img_val = payload["image"]
|
||||
if isinstance(img_val, list):
|
||||
updated["image"] = json_to_string(img_val)
|
||||
else:
|
||||
updated["image"] = img_val
|
||||
if "data" in payload:
|
||||
v = payload["data"]
|
||||
if isinstance(v, dict):
|
||||
@@ -359,7 +390,7 @@ def string_to_json(s):
|
||||
|
||||
# 移植自 a.py 的核心:调用大模型进行 OCR/信息抽取
|
||||
def ocr_and_extract_info(image_path: str):
|
||||
from openai import OpenAI
|
||||
# from openai import OpenAI
|
||||
def encode_image(path: str) -> str:
|
||||
with open(path, "rb") as f:
|
||||
return base64.b64encode(f.read()).decode("utf-8")
|
||||
@@ -372,12 +403,42 @@ def ocr_and_extract_info(image_path: str):
|
||||
# raise RuntimeError("缺少 AISTUDIO_API_KEY,请在环境变量或 settings 中配置")
|
||||
|
||||
|
||||
api_key = getattr(settings, "AISTUDIO_API_KEY", "")
|
||||
base_url = getattr(settings, "OPENAI_BASE_URL", "")
|
||||
if not api_key or not base_url:
|
||||
raise RuntimeError("缺少模型服务配置,请设置 AISTUDIO_API_KEY 与 OPENAI_BASE_URL")
|
||||
client = OpenAI(api_key=api_key, base_url=base_url)
|
||||
# api_key = getattr(settings, "AISTUDIO_API_KEY", "")
|
||||
# base_url = getattr(settings, "OPENAI_BASE_URL", "")
|
||||
# if not api_key or not base_url:
|
||||
# raise RuntimeError("缺少模型服务配置,请设置 AISTUDIO_API_KEY 与 OPENAI_BASE_URL")
|
||||
# client = OpenAI(api_key=api_key, base_url=base_url)
|
||||
# types = get_type_list()
|
||||
# chat_completion = client.chat.completions.create(
|
||||
# messages=[
|
||||
# {"role": "system", "content": "你是一个能理解图片和文本的助手,请根据用户提供的信息进行回答。"},
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": [
|
||||
# {"type": "text", "text": f"请识别这张图片中的信息,将你认为重要的数据转换为不包含嵌套的json,不要显示其它信息以便于解析,直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型,除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定,请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型,确定不包含后你才可以填入你觉得合适的大致分类。"},
|
||||
# {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}},
|
||||
# ],
|
||||
# },
|
||||
# ],
|
||||
# model="glm-5",
|
||||
# )
|
||||
# response_text = chat_completion.choices[0].message.content
|
||||
|
||||
from zai import ZhipuAiClient
|
||||
import httpx
|
||||
# api_key = (
|
||||
# getattr(settings, "ZHIPU_API_KEY", "")
|
||||
# or getattr(settings, "ZAI_API_KEY", "")
|
||||
# or getattr(settings, "AISTUDIO_API_KEY", "")
|
||||
# )
|
||||
# if not api_key:
|
||||
# raise RuntimeError("缺少模型服务配置,请设置 ZHIPU_API_KEY")
|
||||
# base_url = (
|
||||
# getattr(settings, "ZHIPU_BASE_URL", "")
|
||||
# or getattr(settings, "ZAI_BASE_URL", "")
|
||||
# or "https://open.bigmodel.cn/api/paas/v4/"
|
||||
# )
|
||||
client = ZhipuAiClient(api_key="fb83a3f91e8c4e45af811236548765a2.cX4kUhigHm7VNowf")
|
||||
types = get_type_list()
|
||||
chat_completion = client.chat.completions.create(
|
||||
messages=[
|
||||
@@ -390,9 +451,8 @@ def ocr_and_extract_info(image_path: str):
|
||||
],
|
||||
},
|
||||
],
|
||||
model=getattr(settings, "OPENAI_MODEL_NAME", "ernie-4.5-turbo-vl-32k"),
|
||||
model="glm-4.6v",
|
||||
)
|
||||
|
||||
response_text = chat_completion.choices[0].message.content
|
||||
|
||||
def parse_response(text: str):
|
||||
@@ -448,35 +508,66 @@ def upload(request):
|
||||
else:
|
||||
return JsonResponse({"status": "error", "message": "未登录"}, status=401)
|
||||
|
||||
file = request.FILES.get("file")
|
||||
if not file:
|
||||
files = request.FILES.getlist("file")
|
||||
if not files:
|
||||
one = request.FILES.get("file")
|
||||
if one:
|
||||
files = [one]
|
||||
if not files:
|
||||
return JsonResponse({"status": "error", "message": "未选择文件"}, status=400)
|
||||
|
||||
images_dir = os.path.join(settings.MEDIA_ROOT, "images")
|
||||
os.makedirs(images_dir, exist_ok=True)
|
||||
filename = f"{uuid.uuid4()}_{file.name}"
|
||||
abs_path = os.path.join(images_dir, filename)
|
||||
|
||||
with open(abs_path, "wb") as dst:
|
||||
for chunk in file.chunks():
|
||||
dst.write(chunk)
|
||||
|
||||
try:
|
||||
data = ocr_and_extract_info(abs_path)
|
||||
if not data:
|
||||
return JsonResponse({"status": "error", "message": "无法识别图片内容"}, status=400)
|
||||
|
||||
rel_paths = []
|
||||
image_urls = []
|
||||
data_list = []
|
||||
for file in files:
|
||||
filename = f"{uuid.uuid4()}_{file.name}"
|
||||
abs_path = os.path.join(images_dir, filename)
|
||||
with open(abs_path, "wb") as dst:
|
||||
for chunk in file.chunks():
|
||||
dst.write(chunk)
|
||||
try:
|
||||
data = ocr_and_extract_info(abs_path)
|
||||
except Exception as e:
|
||||
return JsonResponse({"status": "error", "message": str(e)}, status=500)
|
||||
if data:
|
||||
data_list.append(data)
|
||||
rel_path = f"images/{filename}"
|
||||
image_url = request.build_absolute_uri(settings.MEDIA_URL + rel_path)
|
||||
return JsonResponse({
|
||||
"status": "success",
|
||||
"message": "识别成功,请确认数据后点击录入",
|
||||
"data": data,
|
||||
"image": rel_path,
|
||||
"image_url": image_url,
|
||||
})
|
||||
except Exception as e:
|
||||
return JsonResponse({"status": "error", "message": str(e)}, status=500)
|
||||
rel_paths.append(rel_path)
|
||||
image_urls.append(request.build_absolute_uri(settings.MEDIA_URL + rel_path))
|
||||
|
||||
if not data_list:
|
||||
return JsonResponse({"status": "error", "message": "无法识别图片内容"}, status=400)
|
||||
|
||||
merged = {}
|
||||
for item in data_list:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
for k, v in item.items():
|
||||
key = str(k).strip()
|
||||
if not key:
|
||||
continue
|
||||
if key not in merged or merged.get(key) in (None, ''):
|
||||
merged[key] = v
|
||||
continue
|
||||
if merged.get(key) == v:
|
||||
continue
|
||||
base = key
|
||||
idx = 2
|
||||
while f"{base}_{idx}" in merged:
|
||||
idx += 1
|
||||
merged[f"{base}_{idx}"] = v
|
||||
|
||||
return JsonResponse({
|
||||
"status": "success",
|
||||
"message": "识别成功,请确认数据后点击录入",
|
||||
"data": merged,
|
||||
"images": rel_paths,
|
||||
"image_urls": image_urls,
|
||||
"image": rel_paths[0] if rel_paths else "",
|
||||
"image_url": image_urls[0] if image_urls else "",
|
||||
})
|
||||
|
||||
|
||||
# 确认并入库
|
||||
@@ -508,38 +599,44 @@ def confirm(request):
|
||||
ensure_type_in_list(edited.get("数据类型"))
|
||||
image_ref_to_store = ""
|
||||
temp_files_to_delete = []
|
||||
if image_rel:
|
||||
image_rels = _parse_image_refs(image_rel)
|
||||
if image_rels:
|
||||
images_dir = os.path.join(settings.MEDIA_ROOT, "images")
|
||||
os.makedirs(images_dir, exist_ok=True)
|
||||
src_abs = os.path.join(settings.MEDIA_ROOT, image_rel)
|
||||
if not os.path.isfile(src_abs):
|
||||
return JsonResponse({"status": "error", "message": "图片文件不存在"}, status=400)
|
||||
|
||||
webp_name = f"{uuid.uuid4().hex}.webp"
|
||||
webp_abs = os.path.join(images_dir, webp_name)
|
||||
try:
|
||||
with Image.open(src_abs) as im:
|
||||
if im.mode in ("RGBA", "LA", "P"):
|
||||
im = im.convert("RGBA")
|
||||
else:
|
||||
im = im.convert("RGB")
|
||||
im.save(webp_abs, format="WEBP", quality=80)
|
||||
except Exception:
|
||||
image_refs = []
|
||||
for rel in image_rels:
|
||||
src_abs = os.path.join(settings.MEDIA_ROOT, rel)
|
||||
if not os.path.isfile(src_abs):
|
||||
return JsonResponse({"status": "error", "message": "图片文件不存在"}, status=400)
|
||||
webp_name = f"{uuid.uuid4().hex}.webp"
|
||||
webp_abs = os.path.join(images_dir, webp_name)
|
||||
try:
|
||||
if os.path.isfile(webp_abs):
|
||||
os.remove(webp_abs)
|
||||
with Image.open(src_abs) as im:
|
||||
if im.mode in ("RGBA", "LA", "P"):
|
||||
im = im.convert("RGBA")
|
||||
else:
|
||||
im = im.convert("RGB")
|
||||
im.save(webp_abs, format="WEBP", quality=80)
|
||||
except Exception:
|
||||
pass
|
||||
return JsonResponse({"status": "error", "message": "图片转换WEBP失败"}, status=500)
|
||||
try:
|
||||
if os.path.isfile(webp_abs):
|
||||
os.remove(webp_abs)
|
||||
except Exception:
|
||||
pass
|
||||
return JsonResponse({"status": "error", "message": "图片转换WEBP失败"}, status=500)
|
||||
|
||||
try:
|
||||
object_name = f"images/{webp_name}"
|
||||
from minio_storage.minio_connect import upload_file
|
||||
upload_file(webp_abs, object_name, content_type="image/webp")
|
||||
image_ref_to_store = f"minio:{object_name}"
|
||||
temp_files_to_delete.extend([src_abs, webp_abs])
|
||||
except Exception as e:
|
||||
return JsonResponse({"status": "error", "message": f"上传到MinIO失败: {e}"}, status=500)
|
||||
try:
|
||||
object_name = f"images/{webp_name}"
|
||||
from minio_storage.minio_connect import upload_file
|
||||
upload_file(webp_abs, object_name, content_type="image/webp")
|
||||
image_refs.append(f"minio:{object_name}")
|
||||
temp_files_to_delete.extend([src_abs, webp_abs])
|
||||
except Exception as e:
|
||||
return JsonResponse({"status": "error", "message": f"上传到MinIO失败: {e}"}, status=500)
|
||||
if len(image_refs) == 1:
|
||||
image_ref_to_store = image_refs[0]
|
||||
elif len(image_refs) > 1:
|
||||
image_ref_to_store = json_to_string(image_refs)
|
||||
|
||||
to_store = {
|
||||
"writer_id": str(request.session.get("user_id")),
|
||||
|
||||
Reference in New Issue
Block a user