上传文件至 /
This commit is contained in:
230
ESConnect.py
Normal file
230
ESConnect.py
Normal file
@@ -0,0 +1,230 @@
|
||||
from elasticsearch import Elasticsearch
|
||||
import os
|
||||
import json
|
||||
import hashlib
|
||||
import requests
|
||||
import json
|
||||
|
||||
# Elasticsearch连接配置
|
||||
ES_URL = "http://localhost:9200"
|
||||
AUTH = None # 如需认证则改为("用户名","密码")
|
||||
|
||||
# document=os.open('results/output.json', os.O_RDONLY)
|
||||
|
||||
# 创建Elasticsearch客户端实例,连接到本地Elasticsearch服务
|
||||
es = Elasticsearch(["http://localhost:9200"])
|
||||
|
||||
# 定义索引名称和类型名称
|
||||
index_name = "wordsearch2"
|
||||
|
||||
def create_index_with_mapping():
|
||||
"""修正后的索引映射配置"""
|
||||
# 修正映射结构(移除keyword字段的非法参数)
|
||||
mapping = {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "text", # 改为text类型支持分词
|
||||
"analyzer": "ik_max_word",
|
||||
"search_analyzer": "ik_smart"
|
||||
},
|
||||
"name": {
|
||||
"type": "text",
|
||||
"analyzer": "ik_max_word",
|
||||
"search_analyzer": "ik_smart"
|
||||
},
|
||||
"students": {"type": "keyword"}, # 仅保留type参数
|
||||
"teacher": {"type": "keyword"}, # 仅保留type参数
|
||||
"timestamp": {
|
||||
"type": "date",
|
||||
"format": "strict_date_optional_time||epoch_millis"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# 检查索引是否存在,不存在则创建
|
||||
if not es.indices.exists(index=index_name):
|
||||
es.indices.create(index=index_name, body=mapping)
|
||||
print(f"创建索引 {index_name} 并设置映射")
|
||||
else:
|
||||
print(f"索引 {index_name} 已存在")
|
||||
|
||||
|
||||
|
||||
def get_doc_id(data):
|
||||
"""
|
||||
根据关键字段生成唯一ID(用于去重)
|
||||
可以根据实际需求调整字段组合
|
||||
|
||||
参数:
|
||||
data (dict): 包含文档数据的字典
|
||||
|
||||
返回:
|
||||
str: 基于数据内容生成的MD5哈希值作为唯一ID
|
||||
"""
|
||||
# 组合关键字段生成唯一字符串
|
||||
unique_str = f"{data['id']}{data['name']}{data['students']}{data['teacher']}"
|
||||
# 使用MD5哈希生成唯一ID
|
||||
return hashlib.md5(unique_str.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
def insert_data(data):
|
||||
"""
|
||||
向Elasticsearch插入数据
|
||||
|
||||
参数:
|
||||
data (dict): 要插入的数据
|
||||
|
||||
返回:
|
||||
bool: 插入成功返回True,失败返回False
|
||||
"""
|
||||
# 生成文档唯一ID
|
||||
return batch_write_data(data)
|
||||
|
||||
|
||||
def search_data(query):
|
||||
"""
|
||||
在Elasticsearch中搜索数据
|
||||
|
||||
参数:
|
||||
query (str): 搜索关键词
|
||||
|
||||
返回:
|
||||
list: 包含搜索结果的列表,每个元素是一个文档的源数据
|
||||
"""
|
||||
# 执行多字段匹配搜索
|
||||
result = es.search(index=index_name, body={"query": {"multi_match": {"query": query, "fields": ["*"]}}})
|
||||
# 返回搜索结果的源数据部分
|
||||
return [hit["_source"] for hit in result['hits']['hits']]
|
||||
|
||||
def search_all():
|
||||
"""
|
||||
获取所有文档
|
||||
|
||||
返回:
|
||||
list: 包含所有文档的列表,每个元素包含文档ID和源数据
|
||||
"""
|
||||
# 执行匹配所有文档的查询
|
||||
result = es.search(index=index_name, body={"query": {"match_all": {}}})
|
||||
# 返回包含文档ID和源数据的列表
|
||||
return [{
|
||||
"_id": hit["_id"],
|
||||
**hit["_source"]
|
||||
} for hit in result['hits']['hits']]
|
||||
|
||||
def delete_by_id(doc_id):
|
||||
"""
|
||||
根据 doc_id 删除文档
|
||||
|
||||
参数:
|
||||
doc_id (str): 要删除的文档ID
|
||||
|
||||
返回:
|
||||
bool: 删除成功返回True,失败返回False
|
||||
"""
|
||||
try:
|
||||
# 执行删除操作
|
||||
es.delete(index=index_name, id=doc_id)
|
||||
return True
|
||||
except Exception as e:
|
||||
print("删除失败:", str(e))
|
||||
return False
|
||||
|
||||
def search_by_any_field(keyword):
|
||||
"""全字段模糊搜索(支持拼写错误)"""
|
||||
try:
|
||||
# update_mapping()
|
||||
response = requests.post(
|
||||
f"{ES_URL}/{index_name}/_search",
|
||||
auth=AUTH,
|
||||
json={
|
||||
"query": {
|
||||
"multi_match": {
|
||||
"query": keyword,
|
||||
"fields": ["*"], # 匹配所有字段
|
||||
"fuzziness": "AUTO", # 启用模糊匹配
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
results = response.json()["hits"]["hits"]
|
||||
print(f"\n模糊搜索 '{keyword}' 找到 {len(results)} 条结果:")
|
||||
|
||||
for doc in results:
|
||||
print(f"\n文档ID: {doc['_id']}")
|
||||
if '_source' in doc:
|
||||
max_key_len = max(len(k) for k in doc['_source'].keys())
|
||||
for key, value in doc['_source'].items():
|
||||
# 提取高亮部分
|
||||
highlight = doc.get('highlight', {}).get(key, [value])[0]
|
||||
print(f"{key:>{max_key_len + 2}} : {highlight}")
|
||||
else:
|
||||
print("无_source数据")
|
||||
|
||||
return results
|
||||
except requests.exceptions.HTTPError as e:
|
||||
print(f"搜索失败: {e.response.text}")
|
||||
return []
|
||||
|
||||
def batch_write_data(data):
|
||||
"""批量写入获奖数据"""
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{ES_URL}/{index_name}/_doc",
|
||||
json=data,
|
||||
auth=AUTH,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
response.raise_for_status()
|
||||
doc_id = response.json()["_id"]
|
||||
print(f"文档写入成功,ID: {doc_id}, 内容: {data}")
|
||||
return True
|
||||
except requests.exceptions.HTTPError as e:
|
||||
print(f"文档写入失败: {e.response.text}, 数据: {data}")
|
||||
return False
|
||||
|
||||
def update_mapping():
|
||||
# 定义新的映射配置
|
||||
new_mapping = {
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "text",
|
||||
"analyzer": "ik_max_word",
|
||||
"search_analyzer": "ik_smart"
|
||||
},
|
||||
"name": {
|
||||
"type": "text",
|
||||
"analyzer": "ik_max_word"
|
||||
},
|
||||
"students": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"teacher": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# 执行PUT请求更新映射
|
||||
try:
|
||||
response = requests.put(
|
||||
f"{ES_URL}/{index_name}/_mapping",
|
||||
auth=AUTH,
|
||||
json=new_mapping,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
response.raise_for_status()
|
||||
print("索引映射更新成功")
|
||||
print(response.json())
|
||||
|
||||
# 验证映射更新结果
|
||||
verify = requests.get(
|
||||
f"{ES_URL}/{index_name}/_mapping",
|
||||
auth=AUTH
|
||||
)
|
||||
print("\n验证结果:")
|
||||
print(verify.json())
|
||||
except requests.exceptions.HTTPError as e:
|
||||
print(f"请求失败: {e.response.text}")
|
||||
Reference in New Issue
Block a user