From ee46e4cebb2e63efeeccc1492812823caa3be47a Mon Sep 17 00:00:00 2001
From: spdis <q17721073823@outlook.com>
Date: Fri, 14 Nov 2025 21:15:02 +0800
Subject: [PATCH] =?UTF-8?q?=E5=8A=A8=E6=80=81=E7=B1=BB=E5=9E=8B=E5=88=97?=
 =?UTF-8?q?=E8=A1=A8=E4=B8=8A=E7=BA=BF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 elastic/apps.py       |  4 +++-
 elastic/documents.py  |  9 +++++++
 elastic/es_connect.py | 55 ++++++++++++++++++++++++++++++++++++++++---
 elastic/indexes.py    |  5 ++--
 elastic/views.py      |  4 +++-
 5 files changed, 70 insertions(+), 7 deletions(-)

diff --git a/elastic/apps.py b/elastic/apps.py
index d9cf102..b9cce81 100644
--- a/elastic/apps.py
+++ b/elastic/apps.py
@@ -17,8 +17,10 @@ class ElasticConfig(AppConfig):
             return
 
         # 延迟导入，避免循环导入或过早加载
-        from .es_connect import create_index_with_mapping
+        from .es_connect import create_index_with_mapping, get_type_list
         try:
             create_index_with_mapping()
+            types = get_type_list()
+            print(f"🔎 启动时 type_list: {types}")
         except Exception as e:
             print(f"❌ ES 初始化失败: {e}")
\ No newline at end of file
diff --git a/elastic/documents.py b/elastic/documents.py
index f552640..111bf49 100644
--- a/elastic/documents.py
+++ b/elastic/documents.py
@@ -6,6 +6,8 @@ ACHIEVEMENT_INDEX = Index(ACHIEVEMENT_INDEX_NAME)
 ACHIEVEMENT_INDEX.settings(number_of_shards=1, number_of_replicas=0)
 USER_INDEX = Index(USER_INDEX_NAME)
 USER_INDEX.settings(number_of_shards=1, number_of_replicas=0)
+GLOBAL_INDEX = Index(GLOBAL_INDEX_NAME)
+GLOBAL_INDEX.settings(number_of_shards=1, number_of_replicas=0)
 
 
 
@@ -37,3 +39,10 @@ class UserDocument(Document):
         model = User
         # fields列表应该只包含需要特殊处理的字段，或者可以完全省略
         # 因为我们已经显式定义了所有字段
+
+@GLOBAL_INDEX.doc_type
+class GlobalDocument(Document):
+    type_list = fields.KeywordField()
+
+    class Django:
+        model = ElasticNews
diff --git a/elastic/es_connect.py b/elastic/es_connect.py
index 913bd8d..47afdf5 100644
--- a/elastic/es_connect.py
+++ b/elastic/es_connect.py
@@ -4,8 +4,8 @@ Django版本的ES连接和操作模块
 """
 from elasticsearch import Elasticsearch
 from elasticsearch_dsl import connections
-from .documents import AchievementDocument, UserDocument
-from .indexes import ACHIEVEMENT_INDEX_NAME, USER_INDEX_NAME
+from .documents import AchievementDocument, UserDocument, GlobalDocument
+from .indexes import ACHIEVEMENT_INDEX_NAME, USER_INDEX_NAME, GLOBAL_INDEX_NAME
 import hashlib
 import time
 
@@ -17,6 +17,7 @@ es = connections.get_connection()
 
 DATA_INDEX_NAME = ACHIEVEMENT_INDEX_NAME
 USERS_INDEX_NAME = USER_INDEX_NAME
+GLOBAL_TYPES_INDEX_NAME = GLOBAL_INDEX_NAME
 
 def create_index_with_mapping():
     """创建索引和映射配置（仅当索引不存在时）"""
@@ -36,7 +37,25 @@ def create_index_with_mapping():
         else:
             print(f"ℹ️ 索引 {USERS_INDEX_NAME} 已存在，跳过创建")
 
-        # --- 3. 创建默认管理员用户（可选：也可检查用户是否已存在）---
+        # --- 3. 处理全局类型索引 ---
+        if not es.indices.exists(index=GLOBAL_TYPES_INDEX_NAME):
+            GlobalDocument.init()
+            default_types = ['软著', '专利', '奖状']
+            doc = GlobalDocument(type_list=default_types)
+            doc.meta.id = 'types'
+            doc.save()
+            print(f"✅ 创建索引 {GLOBAL_TYPES_INDEX_NAME} 并写入默认类型")
+        else:
+            try:
+                GlobalDocument.get(id='types')
+            except Exception:
+                default_types = ['软著', '专利', '奖状']
+                doc = GlobalDocument(type_list=default_types)
+                doc.meta.id = 'types'
+                doc.save()
+                print("ℹ️ 全局类型文档缺失，已补充默认类型")
+
+        # --- 4. 创建默认管理员用户（可选：也可检查用户是否已存在）---
         # 这里简单处理：每次初始化都写入（可能重复），建议加唯一性判断
         admin_user = {
             "user_id": 0,
@@ -57,6 +76,36 @@ def create_index_with_mapping():
         print(f"❌ 创建索引失败: {str(e)}")
         # raise  # 可选：在 AppConfig 中捕获，这里可以 re-raise 便于调试
 
+def get_type_list():
+    try:
+        doc = GlobalDocument.get(id='types')
+        lst = [str(t).strip().strip(';') for t in (doc.type_list or [])]
+        return lst
+    except Exception:
+        return ['软著', '专利', '奖状']
+
+def ensure_type_in_list(type_name: str):
+    if not type_name:
+        return False
+    norm = str(type_name).strip().strip(';')
+    try:
+        try:
+            doc = GlobalDocument.get(id='types')
+            cur = list(doc.type_list or [])
+        except Exception:
+            cur = ['软著', '专利', '奖状']
+            doc = GlobalDocument(type_list=cur)
+            doc.meta.id = 'types'
+        cur_sanitized = {str(t).strip().strip(';') for t in cur}
+        if norm not in cur_sanitized:
+            cur.append(norm)
+            doc.type_list = cur
+            doc.save()
+            return True
+        return False
+    except Exception:
+        return False
+
 def get_doc_id(data):
     """
     根据数据内容生成唯一ID（用于去重）
diff --git a/elastic/indexes.py b/elastic/indexes.py
index 356b835..8deb548 100644
--- a/elastic/indexes.py
+++ b/elastic/indexes.py
@@ -1,4 +1,5 @@
-INDEX_NAME = "wordsearch266666"
-USER_NAME = "users"
+INDEX_NAME = "wordsearch266666789"
+USER_NAME = "users_123"
 ACHIEVEMENT_INDEX_NAME = INDEX_NAME
 USER_INDEX_NAME = USER_NAME
+GLOBAL_INDEX_NAME = "global11111"
diff --git a/elastic/views.py b/elastic/views.py
index 7a8b13d..45d94f7 100644
--- a/elastic/views.py
+++ b/elastic/views.py
@@ -244,13 +244,14 @@ def ocr_and_extract_info(image_path: str):
     base_url="https://aistudio.baidu.com/llm/lmapi/v3"
     client = OpenAI(api_key=api_key, base_url=base_url)
 
+    types = get_type_list()
     chat_completion = client.chat.completions.create(
         messages=[
             {"role": "system", "content": "你是一个能理解图片和文本的助手，请根据用户提供的信息进行回答。"},
             {
                 "role": "user",
                 "content": [
-                    {"type": "text", "text": "请识别这张图片中的信息，将你认为重要的数据转换为不包含嵌套的json，不要显示其它信息以便于解析直接输出json结果即可你可以自行决定使用哪些json字段"},
+                    {"type": "text", "text": f"请识别这张图片中的信息，将你认为重要的数据转换为不包含嵌套的json，不要显示其它信息以便于解析，直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型，除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定，请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型，确定不包含后你才可以填入你觉得合适的大致分类。"},
                     {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}},
                 ],
             },
@@ -370,6 +371,7 @@ def confirm(request):
     if not isinstance(edited, dict) or not edited:
         return JsonResponse({"status": "error", "message": "数据不能为空"}, status=400)
 
+    ensure_type_in_list(edited.get("数据类型"))
     to_store = {
         "writer_id": str(request.session.get("user_id")),
         "data": json_to_string(edited),