From ee46e4cebb2e63efeeccc1492812823caa3be47a Mon Sep 17 00:00:00 2001 From: spdis Date: Fri, 14 Nov 2025 21:15:02 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8A=A8=E6=80=81=E7=B1=BB=E5=9E=8B=E5=88=97?= =?UTF-8?q?=E8=A1=A8=E4=B8=8A=E7=BA=BF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- elastic/apps.py | 4 +++- elastic/documents.py | 9 +++++++ elastic/es_connect.py | 55 ++++++++++++++++++++++++++++++++++++++++--- elastic/indexes.py | 5 ++-- elastic/views.py | 4 +++- 5 files changed, 70 insertions(+), 7 deletions(-) diff --git a/elastic/apps.py b/elastic/apps.py index d9cf102..b9cce81 100644 --- a/elastic/apps.py +++ b/elastic/apps.py @@ -17,8 +17,10 @@ class ElasticConfig(AppConfig): return # 延迟导入,避免循环导入或过早加载 - from .es_connect import create_index_with_mapping + from .es_connect import create_index_with_mapping, get_type_list try: create_index_with_mapping() + types = get_type_list() + print(f"🔎 启动时 type_list: {types}") except Exception as e: print(f"❌ ES 初始化失败: {e}") \ No newline at end of file diff --git a/elastic/documents.py b/elastic/documents.py index f552640..111bf49 100644 --- a/elastic/documents.py +++ b/elastic/documents.py @@ -6,6 +6,8 @@ ACHIEVEMENT_INDEX = Index(ACHIEVEMENT_INDEX_NAME) ACHIEVEMENT_INDEX.settings(number_of_shards=1, number_of_replicas=0) USER_INDEX = Index(USER_INDEX_NAME) USER_INDEX.settings(number_of_shards=1, number_of_replicas=0) +GLOBAL_INDEX = Index(GLOBAL_INDEX_NAME) +GLOBAL_INDEX.settings(number_of_shards=1, number_of_replicas=0) @@ -37,3 +39,10 @@ class UserDocument(Document): model = User # fields列表应该只包含需要特殊处理的字段,或者可以完全省略 # 因为我们已经显式定义了所有字段 + +@GLOBAL_INDEX.doc_type +class GlobalDocument(Document): + type_list = fields.KeywordField() + + class Django: + model = ElasticNews diff --git a/elastic/es_connect.py b/elastic/es_connect.py index 913bd8d..47afdf5 100644 --- a/elastic/es_connect.py +++ b/elastic/es_connect.py @@ -4,8 +4,8 @@ Django版本的ES连接和操作模块 """ from elasticsearch import Elasticsearch from elasticsearch_dsl import connections -from .documents import AchievementDocument, UserDocument -from .indexes import ACHIEVEMENT_INDEX_NAME, USER_INDEX_NAME +from .documents import AchievementDocument, UserDocument, GlobalDocument +from .indexes import ACHIEVEMENT_INDEX_NAME, USER_INDEX_NAME, GLOBAL_INDEX_NAME import hashlib import time @@ -17,6 +17,7 @@ es = connections.get_connection() DATA_INDEX_NAME = ACHIEVEMENT_INDEX_NAME USERS_INDEX_NAME = USER_INDEX_NAME +GLOBAL_TYPES_INDEX_NAME = GLOBAL_INDEX_NAME def create_index_with_mapping(): """创建索引和映射配置(仅当索引不存在时)""" @@ -36,7 +37,25 @@ def create_index_with_mapping(): else: print(f"ℹ️ 索引 {USERS_INDEX_NAME} 已存在,跳过创建") - # --- 3. 创建默认管理员用户(可选:也可检查用户是否已存在)--- + # --- 3. 处理全局类型索引 --- + if not es.indices.exists(index=GLOBAL_TYPES_INDEX_NAME): + GlobalDocument.init() + default_types = ['软著', '专利', '奖状'] + doc = GlobalDocument(type_list=default_types) + doc.meta.id = 'types' + doc.save() + print(f"✅ 创建索引 {GLOBAL_TYPES_INDEX_NAME} 并写入默认类型") + else: + try: + GlobalDocument.get(id='types') + except Exception: + default_types = ['软著', '专利', '奖状'] + doc = GlobalDocument(type_list=default_types) + doc.meta.id = 'types' + doc.save() + print("ℹ️ 全局类型文档缺失,已补充默认类型") + + # --- 4. 创建默认管理员用户(可选:也可检查用户是否已存在)--- # 这里简单处理:每次初始化都写入(可能重复),建议加唯一性判断 admin_user = { "user_id": 0, @@ -57,6 +76,36 @@ def create_index_with_mapping(): print(f"❌ 创建索引失败: {str(e)}") # raise # 可选:在 AppConfig 中捕获,这里可以 re-raise 便于调试 +def get_type_list(): + try: + doc = GlobalDocument.get(id='types') + lst = [str(t).strip().strip(';') for t in (doc.type_list or [])] + return lst + except Exception: + return ['软著', '专利', '奖状'] + +def ensure_type_in_list(type_name: str): + if not type_name: + return False + norm = str(type_name).strip().strip(';') + try: + try: + doc = GlobalDocument.get(id='types') + cur = list(doc.type_list or []) + except Exception: + cur = ['软著', '专利', '奖状'] + doc = GlobalDocument(type_list=cur) + doc.meta.id = 'types' + cur_sanitized = {str(t).strip().strip(';') for t in cur} + if norm not in cur_sanitized: + cur.append(norm) + doc.type_list = cur + doc.save() + return True + return False + except Exception: + return False + def get_doc_id(data): """ 根据数据内容生成唯一ID(用于去重) diff --git a/elastic/indexes.py b/elastic/indexes.py index 356b835..8deb548 100644 --- a/elastic/indexes.py +++ b/elastic/indexes.py @@ -1,4 +1,5 @@ -INDEX_NAME = "wordsearch266666" -USER_NAME = "users" +INDEX_NAME = "wordsearch266666789" +USER_NAME = "users_123" ACHIEVEMENT_INDEX_NAME = INDEX_NAME USER_INDEX_NAME = USER_NAME +GLOBAL_INDEX_NAME = "global11111" diff --git a/elastic/views.py b/elastic/views.py index 7a8b13d..45d94f7 100644 --- a/elastic/views.py +++ b/elastic/views.py @@ -244,13 +244,14 @@ def ocr_and_extract_info(image_path: str): base_url="https://aistudio.baidu.com/llm/lmapi/v3" client = OpenAI(api_key=api_key, base_url=base_url) + types = get_type_list() chat_completion = client.chat.completions.create( messages=[ {"role": "system", "content": "你是一个能理解图片和文本的助手,请根据用户提供的信息进行回答。"}, { "role": "user", "content": [ - {"type": "text", "text": "请识别这张图片中的信息,将你认为重要的数据转换为不包含嵌套的json,不要显示其它信息以便于解析直接输出json结果即可你可以自行决定使用哪些json字段"}, + {"type": "text", "text": f"请识别这张图片中的信息,将你认为重要的数据转换为不包含嵌套的json,不要显示其它信息以便于解析,直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型,除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定,请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型,确定不包含后你才可以填入你觉得合适的大致分类。"}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}, ], }, @@ -370,6 +371,7 @@ def confirm(request): if not isinstance(edited, dict) or not edited: return JsonResponse({"status": "error", "message": "数据不能为空"}, status=400) + ensure_type_in_list(edited.get("数据类型")) to_store = { "writer_id": str(request.session.get("user_id")), "data": json_to_string(edited),