From a7edbb743cc26d1daafbb0c48ce584b1964a5c5f Mon Sep 17 00:00:00 2001 From: zhaoqingang <zhaoqg0118@163.com> Date: 星期二, 18 二月 2025 09:43:15 +0800 Subject: [PATCH] tmp --- app/service/v2/chat.py | 80 +++++++++++++++++++++++++++++++++++++++- 1 files changed, 78 insertions(+), 2 deletions(-) diff --git a/app/service/v2/chat.py b/app/service/v2/chat.py index 7af7071..246b3f1 100644 --- a/app/service/v2/chat.py +++ b/app/service/v2/chat.py @@ -1,8 +1,11 @@ +import io import json + +import fitz from Log import logger from app.config.agent_base_url import RG_CHAT_DIALOG, DF_CHAT_AGENT, DF_CHAT_PARAMETERS, RG_CHAT_SESSIONS, \ - DF_CHAT_WORKFLOW + DF_CHAT_WORKFLOW, DF_UPLOAD_FILE from app.config.config import settings from app.config.const import * from app.models import DialogModel, ApiTokenModel, UserTokenModel @@ -11,6 +14,8 @@ from app.service.v2.app_driver.chat_data import ChatBaseApply from app.service.v2.app_driver.chat_dialog import ChatDialog from app.service.v2.app_driver.chat_workflow import ChatWorkflow +from docx import Document +from dashscope import get_tokenizer # dashscope鐗堟湰 >= 1.14.0 async def update_session_log(db, session_id: str, message: dict, conversation_id: str): @@ -185,6 +190,7 @@ data = ans.get("data", {}) event = smart_workflow_finished node_list.append(ans) + elif ans.get("event") == message_end: event = smart_message_end else: @@ -209,7 +215,7 @@ "error": error}, conversation_id) -async def service_chat_basic(db, chat_id: str, question: str, session_id: str, user_id): +async def service_chat_basic(db, chat_id: str, chat_data: ChatData, session_id: str, user_id, mode: str): ... @@ -242,3 +248,73 @@ url = settings.fwr_base_url + RG_CHAT_SESSIONS.format(chat_id) chat = ChatDialog() return await chat.chat_sessions(url, {"name": name}, await chat.get_headers(token)) + + +async def service_chat_upload(db, chat_id, file, user_id): + files = [] + token = await get_chat_token(db, chat_id) + if not token: + return files + url = settings.dify_base_url + DF_UPLOAD_FILE + chat = ChatBaseApply() + for f in file: + try: + file_content = await f.read() + file_upload = await chat.chat_upload(url, {"file": (f.filename, file_content)}, {"user": str(user_id)}, + {'Authorization': f'Bearer {token}'}) + try: + tokens = await read_file(file_content, f.filename, f.content_type) + file_upload["tokens"] = tokens + except: + ... + files.append(file_upload) + except Exception as e: + logger.error(e) + return json.dumps(files) if files else "" + + +async def get_str_token(input_str): + # 鑾峰彇tokenizer瀵硅薄锛岀洰鍓嶅彧鏀寔閫氫箟鍗冮棶绯诲垪妯″瀷 + tokenizer = get_tokenizer('qwen-turbo') + # 灏嗗瓧绗︿覆鍒囧垎鎴恡oken骞惰浆鎹负token id + tokens = tokenizer.encode(input_str) + # print(f"缁忚繃鍒囧垎鍚庣殑token id涓猴細{tokens}銆�") + # # 缁忚繃鍒囧垎鍚庣殑token id涓猴細 [31935, 64559, 99320, 56007, 100629, 104795, 99788, 1773] + # print(f"缁忚繃鍒囧垎鍚庡叡鏈墈len(tokens)}涓猼oken") + # # 缁忚繃鍒囧垎鍚庡叡鏈�8涓猼oken + # + # # 灏唗oken id杞寲涓哄瓧绗︿覆骞舵墦鍗板嚭鏉� + # for i in range(len(tokens)): + # print(f"token id涓簕tokens[i]}瀵瑰簲鐨勫瓧绗︿覆涓猴細{tokenizer.decode(tokens[i])}") + return len(tokens) + +async def read_pdf(pdf_stream): + text = "" + with fitz.open(stream=pdf_stream, filetype="pdf") as pdf_document: + for page in pdf_document: + text += page.get_text() + return text + + +async def read_word(word_stream): + # 浣跨敤 python-docx 鎵撳紑 Word 鏂囦欢娴� + doc = Document(io.BytesIO(word_stream)) + + # 鎻愬彇姣忎釜娈佃惤鐨勬枃鏈� + text = "" + for para in doc.paragraphs: + text += para.text + + return text + +async def read_file(file, filename, content_type): + text = "" + if content_type == "application/pdf" or filename.endswith('.pdf'): + + # 鎻愬彇 PDF 鍐呭 + text = await read_pdf(file) + elif content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" or filename.endswith( + '.docx'): + text = await read_word(file) + + return await get_str_token(text) -- Gitblit v1.8.0