import io
|
import json
|
|
import fitz
|
|
from Log import logger
|
from app.config.agent_base_url import RG_CHAT_DIALOG, DF_CHAT_AGENT, DF_CHAT_PARAMETERS, RG_CHAT_SESSIONS, \
|
DF_CHAT_WORKFLOW, DF_UPLOAD_FILE
|
from app.config.config import settings
|
from app.config.const import *
|
from app.models import DialogModel, ApiTokenModel, UserTokenModel
|
from app.models.v2.session_model import ChatSessionDao, ChatData
|
from app.service.v2.app_driver.chat_agent import ChatAgent
|
from app.service.v2.app_driver.chat_data import ChatBaseApply
|
from app.service.v2.app_driver.chat_dialog import ChatDialog
|
from app.service.v2.app_driver.chat_workflow import ChatWorkflow
|
from docx import Document
|
from dashscope import get_tokenizer # dashscope版本 >= 1.14.0
|
|
|
async def update_session_log(db, session_id: str, message: dict, conversation_id: str):
|
await ChatSessionDao(db).update_session_by_id(
|
session_id=session_id,
|
session=None,
|
message=message,
|
conversation_id=conversation_id
|
)
|
|
|
async def add_session_log(db, session_id: str, question: str, chat_id: str, user_id, event_type: str,
|
conversation_id: str):
|
try:
|
session = await ChatSessionDao(db).update_or_insert_by_id(
|
session_id=session_id,
|
name=question[:255],
|
agent_id=chat_id,
|
agent_type=1,
|
tenant_id=user_id,
|
message={"role": "user", "content": question},
|
conversation_id=conversation_id,
|
event_type=event_type
|
)
|
return session
|
except Exception as e:
|
logger.error(e)
|
return None
|
|
async def get_app_token(db, app_id):
|
app_token = db.query(UserTokenModel).filter_by(id=app_id).first()
|
if app_token:
|
return app_token.access_token
|
return ""
|
|
|
|
async def get_chat_token(db, app_id):
|
app_token = db.query(ApiTokenModel).filter_by(app_id=app_id).first()
|
if app_token:
|
return app_token.token
|
return ""
|
|
|
async def add_chat_token(db, data):
|
try:
|
api_token = ApiTokenModel(**data)
|
db.add(api_token)
|
db.commit()
|
except Exception as e:
|
logger.error(e)
|
|
|
|
async def get_chat_info(db, chat_id: str):
|
return db.query(DialogModel).filter_by(id=chat_id, status=Dialog_STATSU_ON).first()
|
|
|
async def get_chat_object(mode):
|
if mode == workflow_chat:
|
url = settings.dify_base_url + DF_CHAT_WORKFLOW
|
return ChatWorkflow(), url
|
else:
|
url = settings.dify_base_url + DF_CHAT_AGENT
|
return ChatAgent(), url
|
|
|
async def service_chat_dialog(db, chat_id: str, question: str, session_id: str, user_id, mode: str):
|
conversation_id = ""
|
token = await get_chat_token(db, rg_api_token)
|
url = settings.fwr_base_url + RG_CHAT_DIALOG.format(chat_id)
|
chat = ChatDialog()
|
session = await add_session_log(db, session_id, question, chat_id, user_id, mode, session_id)
|
if session:
|
conversation_id = session.conversation_id
|
message = {"role": "assistant", "answer": "", "reference": {}}
|
try:
|
async for ans in chat.chat_completions(url, await chat.request_data(question, conversation_id),
|
await chat.get_headers(token)):
|
data = {}
|
error = ""
|
status = http_200
|
if ans.get("code", None) == 102:
|
error = ans.get("message", "error!")
|
status = http_400
|
event = smart_message_error
|
else:
|
if isinstance(ans.get("data"), bool) and ans.get("data") is True:
|
event = smart_message_end
|
else:
|
data = ans.get("data", {})
|
# conversation_id = data.get("session_id", "")
|
if "session_id" in data:
|
del data["session_id"]
|
message = data
|
event = smart_message_cover
|
message_str = "data: " + json.dumps(
|
{"event": event, "data": data, "error": error, "status": status, "session_id": session_id},
|
ensure_ascii=False) + "\n\n"
|
for i in range(0, len(message_str), max_chunk_size):
|
chunk = message_str[i:i + max_chunk_size]
|
# print(chunk)
|
yield chunk # 发送分块消息
|
except Exception as e:
|
|
logger.error(e)
|
try:
|
yield "data: " + json.dumps({"message": smart_message_error,
|
"error": "\n**ERROR**: " + str(e), "status": http_500},
|
ensure_ascii=False) + "\n\n"
|
except:
|
...
|
finally:
|
await update_session_log(db, session_id, message, conversation_id)
|
|
|
async def service_chat_workflow(db, chat_id: str, chat_data: ChatData, session_id: str, user_id, mode: str):
|
conversation_id = ""
|
answer_event = ""
|
answer_agent = ""
|
message_id = ""
|
task_id = ""
|
error = ""
|
files = []
|
node_list = []
|
token = await get_chat_token(db, chat_id)
|
chat, url = await get_chat_object(mode)
|
if hasattr(chat_data, "query"):
|
query = chat_data.query
|
else:
|
query = "start new workflow"
|
session = await add_session_log(db, session_id, query, chat_id, user_id, mode, conversation_id)
|
if session:
|
conversation_id = session.conversation_id
|
try:
|
async for ans in chat.chat_completions(url,
|
await chat.request_data(query, conversation_id, str(user_id), chat_data),
|
await chat.get_headers(token)):
|
data = {}
|
status = http_200
|
conversation_id = ans.get("conversation_id")
|
task_id = ans.get("task_id")
|
if ans.get("event") == message_error:
|
error = ans.get("message", "参数异常!")
|
status = http_400
|
event = smart_message_error
|
elif ans.get("event") == message_agent:
|
data = {"answer": ans.get("answer", ""), "id": ans.get("message_id", "")}
|
answer_agent += ans.get("answer", "")
|
message_id = ans.get("message_id", "")
|
event = smart_message_stream
|
elif ans.get("event") == message_event:
|
data = {"answer": ans.get("answer", ""), "id": ans.get("message_id", "")}
|
answer_event += ans.get("answer", "")
|
message_id = ans.get("message_id", "")
|
event = smart_message_stream
|
elif ans.get("event") == message_file:
|
data = {"url": ans.get("url", ""), "id": ans.get("id", ""),
|
"type": ans.get("type", "")}
|
files.append(data)
|
event = smart_message_file
|
elif ans.get("event") in [workflow_started, node_started, node_finished]:
|
data = ans.get("data", {})
|
data["inputs"] = []
|
data["outputs"] = []
|
data["process_data"] = ""
|
node_list.append(ans)
|
event = [smart_workflow_started, smart_node_started, smart_node_finished][
|
[workflow_started, node_started, node_finished].index(ans.get("event"))]
|
elif ans.get("event") == workflow_finished:
|
data = ans.get("data", {})
|
event = smart_workflow_finished
|
node_list.append(ans)
|
|
elif ans.get("event") == message_end:
|
event = smart_message_end
|
else:
|
continue
|
|
yield "data: " + json.dumps(
|
{"event": event, "data": data, "error": error, "status": status, "task_id": task_id,
|
"session_id": session_id},
|
ensure_ascii=False) + "\n\n"
|
|
except Exception as e:
|
logger.error(e)
|
try:
|
yield "data: " + json.dumps({"message": smart_message_error,
|
"error": "\n**ERROR**: " + str(e), "status": http_500},
|
ensure_ascii=False) + "\n\n"
|
except:
|
...
|
finally:
|
await update_session_log(db, session_id, {"role": "assistant", "answer": answer_event or answer_agent,
|
"node_list": node_list, "task_id": task_id, "id": message_id,
|
"error": error}, conversation_id)
|
|
|
async def service_chat_basic(db, chat_id: str, chat_data: ChatData, session_id: str, user_id, mode: str):
|
...
|
|
|
async def service_chat_parameters(db, chat_id, user_id):
|
chat_info = db.query(DialogModel).filter_by(id=chat_id).first()
|
if not chat_info:
|
return {}
|
return chat_info.parameters
|
# if chat_info.dialog_type == RG_TYPE:
|
# return {"retriever_resource":
|
# {
|
# "enabled": True
|
# }
|
# }
|
# elif chat_info.dialog_type == BASIC_TYPE:
|
# ...
|
# elif chat_info.dialog_type == DF_TYPE:
|
# token = await get_chat_token(db, chat_id)
|
# if not token:
|
# return {}
|
# url = settings.dify_base_url + DF_CHAT_PARAMETERS
|
# chat = ChatBaseApply()
|
# return await chat.chat_get(url, {"user": str(user_id)}, await chat.get_headers(token))
|
|
|
async def service_chat_sessions(db, chat_id, name):
|
token = await get_chat_token(db, rg_api_token)
|
if not token:
|
return {}
|
url = settings.fwr_base_url + RG_CHAT_SESSIONS.format(chat_id)
|
chat = ChatDialog()
|
return await chat.chat_sessions(url, {"name": name}, await chat.get_headers(token))
|
|
|
async def service_chat_upload(db, chat_id, file, user_id):
|
files = []
|
token = await get_chat_token(db, chat_id)
|
if not token:
|
return files
|
url = settings.dify_base_url + DF_UPLOAD_FILE
|
chat = ChatBaseApply()
|
for f in file:
|
try:
|
file_content = await f.read()
|
file_upload = await chat.chat_upload(url, {"file": (f.filename, file_content)}, {"user": str(user_id)},
|
{'Authorization': f'Bearer {token}'})
|
try:
|
tokens = await read_file(file_content, f.filename, f.content_type)
|
file_upload["tokens"] = tokens
|
except:
|
...
|
files.append(file_upload)
|
except Exception as e:
|
logger.error(e)
|
return json.dumps(files) if files else ""
|
|
|
async def get_str_token(input_str):
|
# 获取tokenizer对象,目前只支持通义千问系列模型
|
tokenizer = get_tokenizer('qwen-turbo')
|
# 将字符串切分成token并转换为token id
|
tokens = tokenizer.encode(input_str)
|
# print(f"经过切分后的token id为:{tokens}。")
|
# # 经过切分后的token id为: [31935, 64559, 99320, 56007, 100629, 104795, 99788, 1773]
|
# print(f"经过切分后共有{len(tokens)}个token")
|
# # 经过切分后共有8个token
|
#
|
# # 将token id转化为字符串并打印出来
|
# for i in range(len(tokens)):
|
# print(f"token id为{tokens[i]}对应的字符串为:{tokenizer.decode(tokens[i])}")
|
return len(tokens)
|
|
async def read_pdf(pdf_stream):
|
text = ""
|
with fitz.open(stream=pdf_stream, filetype="pdf") as pdf_document:
|
for page in pdf_document:
|
text += page.get_text()
|
return text
|
|
|
async def read_word(word_stream):
|
# 使用 python-docx 打开 Word 文件流
|
doc = Document(io.BytesIO(word_stream))
|
|
# 提取每个段落的文本
|
text = ""
|
for para in doc.paragraphs:
|
text += para.text
|
|
return text
|
|
async def read_file(file, filename, content_type):
|
text = ""
|
if content_type == "application/pdf" or filename.endswith('.pdf'):
|
|
# 提取 PDF 内容
|
text = await read_pdf(file)
|
elif content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" or filename.endswith(
|
'.docx'):
|
text = await read_word(file)
|
|
return await get_str_token(text)
|