动态类型列表上线

2025-11-14 21:15:02 +08:00
parent e2c93d6933
commit ee46e4cebb
5 changed files with 70 additions and 7 deletions
--- a/elastic/apps.py
+++ b/elastic/apps.py
@@ -17,8 +17,10 @@ class ElasticConfig(AppConfig):
            return
        # 延迟导入，避免循环导入或过早加载
-        from .es_connect import create_index_with_mapping
+        from .es_connect import create_index_with_mapping, get_type_list
        try:
            create_index_with_mapping()
            types = get_type_list()
            print(f"🔎 启动时 type_list: {types}")
        except Exception as e:
            print(f"❌ ES 初始化失败: {e}")
--- a/elastic/documents.py
+++ b/elastic/documents.py
@@ -6,6 +6,8 @@ ACHIEVEMENT_INDEX = Index(ACHIEVEMENT_INDEX_NAME)
 ACHIEVEMENT_INDEX.settings(number_of_shards=1, number_of_replicas=0)
 USER_INDEX = Index(USER_INDEX_NAME)
 USER_INDEX.settings(number_of_shards=1, number_of_replicas=0)
 GLOBAL_INDEX = Index(GLOBAL_INDEX_NAME)
 GLOBAL_INDEX.settings(number_of_shards=1, number_of_replicas=0)
@@ -37,3 +39,10 @@ class UserDocument(Document):
        model = User
        # fields列表应该只包含需要特殊处理的字段，或者可以完全省略
        # 因为我们已经显式定义了所有字段
@GLOBAL_INDEX.doc_type
 class GlobalDocument(Document):
    type_list = fields.KeywordField()
    class Django:
        model = ElasticNews
--- a/elastic/es_connect.py
+++ b/elastic/es_connect.py
@@ -4,8 +4,8 @@ Django版本的ES连接和操作模块
 """
 from elasticsearch import Elasticsearch
 from elasticsearch_dsl import connections
-from .documents import AchievementDocument, UserDocument
+from .documents import AchievementDocument, UserDocument, GlobalDocument
-from .indexes import ACHIEVEMENT_INDEX_NAME, USER_INDEX_NAME
+from .indexes import ACHIEVEMENT_INDEX_NAME, USER_INDEX_NAME, GLOBAL_INDEX_NAME
 import hashlib
 import time
@@ -17,6 +17,7 @@ es = connections.get_connection()
 DATA_INDEX_NAME = ACHIEVEMENT_INDEX_NAME
 USERS_INDEX_NAME = USER_INDEX_NAME
 GLOBAL_TYPES_INDEX_NAME = GLOBAL_INDEX_NAME
 def create_index_with_mapping():
    """创建索引和映射配置（仅当索引不存在时）"""
@@ -36,7 +37,25 @@ def create_index_with_mapping():
        else:
            print(f"ℹ️ 索引 {USERS_INDEX_NAME} 已存在，跳过创建")
-        # --- 3. 创建默认管理员用户（可选：也可检查用户是否已存在）---
+        # --- 3. 处理全局类型索引 ---
        if not es.indices.exists(index=GLOBAL_TYPES_INDEX_NAME):
            GlobalDocument.init()
            default_types = ['软著', '专利', '奖状']
            doc = GlobalDocument(type_list=default_types)
            doc.meta.id = 'types'
            doc.save()
            print(f"✅ 创建索引 {GLOBAL_TYPES_INDEX_NAME} 并写入默认类型")
        else:
            try:
                GlobalDocument.get(id='types')
            except Exception:
                default_types = ['软著', '专利', '奖状']
                doc = GlobalDocument(type_list=default_types)
                doc.meta.id = 'types'
                doc.save()
                print("ℹ️ 全局类型文档缺失，已补充默认类型")
        # --- 4. 创建默认管理员用户（可选：也可检查用户是否已存在）---
        # 这里简单处理：每次初始化都写入（可能重复），建议加唯一性判断
        admin_user = {
            "user_id": 0,
@@ -57,6 +76,36 @@ def create_index_with_mapping():
        print(f"❌ 创建索引失败: {str(e)}")
        # raise  # 可选：在 AppConfig 中捕获，这里可以 re-raise 便于调试
 def get_type_list():
    try:
        doc = GlobalDocument.get(id='types')
        lst = [str(t).strip().strip(';') for t in (doc.type_list or [])]
        return lst
    except Exception:
        return ['软著', '专利', '奖状']
 def ensure_type_in_list(type_name: str):
    if not type_name:
        return False
    norm = str(type_name).strip().strip(';')
    try:
        try:
            doc = GlobalDocument.get(id='types')
            cur = list(doc.type_list or [])
        except Exception:
            cur = ['软著', '专利', '奖状']
            doc = GlobalDocument(type_list=cur)
            doc.meta.id = 'types'
        cur_sanitized = {str(t).strip().strip(';') for t in cur}
        if norm not in cur_sanitized:
            cur.append(norm)
            doc.type_list = cur
            doc.save()
            return True
        return False
    except Exception:
        return False
 def get_doc_id(data):
    """
    根据数据内容生成唯一ID（用于去重）
--- a/elastic/indexes.py
+++ b/elastic/indexes.py
@@ -1,4 +1,5 @@
-INDEX_NAME = "wordsearch266666"
+INDEX_NAME = "wordsearch266666789"
-USER_NAME = "users"
+USER_NAME = "users_123"
 ACHIEVEMENT_INDEX_NAME = INDEX_NAME
 USER_INDEX_NAME = USER_NAME
 GLOBAL_INDEX_NAME = "global11111"
--- a/elastic/views.py
+++ b/elastic/views.py
@@ -244,13 +244,14 @@ def ocr_and_extract_info(image_path: str):
    base_url="https://aistudio.baidu.com/llm/lmapi/v3"
    client = OpenAI(api_key=api_key, base_url=base_url)
    types = get_type_list()
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "你是一个能理解图片和文本的助手，请根据用户提供的信息进行回答。"},
            {
                "role": "user",
                "content": [
-                    {"type": "text", "text": "请识别这张图片中的信息，将你认为重要的数据转换为不包含嵌套的json，不要显示其它信息以便于解析直接输出json结果即可你可以自行决定使用哪些json字段"},
+                    {"type": "text", "text": f"请识别这张图片中的信息，将你认为重要的数据转换为不包含嵌套的json，不要显示其它信息以便于解析，直接输出json结果即可。使用“数据类型”字段表示这个东西的大致类型，除此之外你可以自行决定使用哪些json字段。“数据类型”的内容有严格规定，请查看{json.dumps(types, ensure_ascii=False)}中是否包含你所需要的类型，确定不包含后你才可以填入你觉得合适的大致分类。"},
                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}},
                ],
            },
@@ -370,6 +371,7 @@ def confirm(request):
    if not isinstance(edited, dict) or not edited:
        return JsonResponse({"status": "error", "message": "数据不能为空"}, status=400)
    ensure_type_in_list(edited.get("数据类型"))
    to_store = {
        "writer_id": str(request.session.get("user_id")),
        "data": json_to_string(edited),