| | |
| | | for i in session.log_to_json().get("message", []): |
| | | if i.get("role") == "user": |
| | | tmp_data["question"]=i.get("content") |
| | | if "upload_filenames" in i: |
| | | tmp_data["upload_filenames"] = i.get("upload_filenames") |
| | | elif i.get("role") == "assistant": |
| | | |
| | | if isinstance(i.get("content"), dict): |
| | |
| | | import random |
| | | import string |
| | | |
| | | from fastapi import APIRouter, File, UploadFile, Form, BackgroundTasks, Depends, Request |
| | | from fastapi import APIRouter, File, UploadFile, Form, BackgroundTasks, Depends, Request, WebSocket |
| | | from fastapi.responses import JSONResponse, FileResponse |
| | | from sqlalchemy.orm import Session |
| | | from starlette.websockets import WebSocket |
| | | # from starlette.websockets import WebSocket |
| | | |
| | | from app.api import get_current_user, get_current_user_websocket, Response |
| | | from app.models import UserModel, AgentType |
| | |
| | | return prefix + random_part |
| | | |
| | | |
| | | def db_create_session(db: Session, user_id: str): |
| | | def db_create_session(db: Session, user_id: str, message:str, upload_filenames: list): |
| | | db_id = generate_db_id() |
| | | session = SessionService(db).create_session( |
| | | db_id, |
| | | "合并Excel", |
| | | message, |
| | | "basic_excel_merge", |
| | | AgentType.BASIC, |
| | | int(user_id) |
| | | int(user_id), |
| | | {"role": "user", "content": message, "upload_filenames": upload_filenames} |
| | | ) |
| | | return session |
| | | |
| | |
| | | user_excel = EXCEL_FILES_PATH |
| | | create_dir_if_not_exists(user_source) |
| | | create_dir_if_not_exists(user_excel) |
| | | |
| | | while True: |
| | | data = await websocket.receive_text() |
| | | # data = await websocket.receive_text()git |
| | | receive_message = await websocket.receive_json() |
| | | try: |
| | | if data == "\"合并Excel\"": |
| | | if receive_message.get("message") == "合并Excel": |
| | | upload_filenames = receive_message.get('upload_filenames', []) |
| | | merge_file = run_conformity(user_source, user_excel) |
| | | if merge_file is not None: |
| | | |
| | |
| | | "type": "close", |
| | | }) |
| | | # 创建会话记录 |
| | | session = db_create_session(db, user_id) |
| | | session = db_create_session(db, user_id, receive_message.get("message"), upload_filenames) |
| | | # 更新会话记录 |
| | | if session: |
| | | session_id = session.id |
| | |
| | | await websocket.send_json({"error": "合并失败", "type": "stream", "files": []}) |
| | | await websocket.close() |
| | | else: |
| | | print(f"Received data: {data}") |
| | | await websocket.send_json({"error": "未知指令", "data": str(data)}) |
| | | print(f"Received data: {receive_message.get('message')}") |
| | | await websocket.send_json({"error": "未知指令", "data": str(receive_message.get('message'))}) |
| | | await websocket.close() |
| | | except Exception as e: |
| | | await websocket.send_json({"error": str(e)}) |
| | |
| | | from starlette.responses import StreamingResponse |
| | | from werkzeug.utils import send_file |
| | | |
| | | from Log import logger |
| | | from app.api import Response, get_current_user, ResponseList |
| | | from app.config.config import settings |
| | | from app.config.const import DOCUMENT_TO_REPORT, IMAGE_TO_TEXT, DOCUMENT_TO_REPORT_TITLE, DOCUMENT_IA_QUESTIONS, \ |
| | |
| | | from app.models.user_model import UserModel |
| | | from app.service.basic import BasicService |
| | | from app.service.bisheng import BishengService |
| | | from app.service.files import read_file |
| | | from app.service.v2.api_token import DfTokenDao |
| | | from app.service.difyService import DifyService |
| | | from app.service.ragflow import RagflowService |
| | |
| | | return Response(code=400, msg=str(e)) |
| | | try: |
| | | file_upload = await dify_service.upload(token, f.filename, file_content, current_user.id) |
| | | try: |
| | | tokens = await read_file(file_content, f.filename, f.content_type) |
| | | file_upload["tokens"] = tokens |
| | | except Exception as e: |
| | | logger.error(e) |
| | | result.append(file_upload) |
| | | except Exception as e: |
| | | raise HTTPException(status_code=500, detail=str(e)) |
New file |
| | |
| | | import fitz |
| | | import io |
| | | from docx import Document |
| | | from dashscope import get_tokenizer # dashscope版本 >= 1.14.0 |
| | | |
| | | from app.service.auth import decode_access_token |
| | | |
| | | |
| | | async def get_str_token(input_str): |
| | | # 获取tokenizer对象,目前只支持通义千问系列模型 |
| | | tokenizer = get_tokenizer('qwen-turbo') |
| | | # 将字符串切分成token并转换为token id |
| | | tokens = tokenizer.encode(input_str) |
| | | # print(f"经过切分后的token id为:{tokens}。") |
| | | # # 经过切分后的token id为: [31935, 64559, 99320, 56007, 100629, 104795, 99788, 1773] |
| | | # print(f"经过切分后共有{len(tokens)}个token") |
| | | # # 经过切分后共有8个token |
| | | # |
| | | # # 将token id转化为字符串并打印出来 |
| | | # for i in range(len(tokens)): |
| | | # print(f"token id为{tokens[i]}对应的字符串为:{tokenizer.decode(tokens[i])}") |
| | | return len(tokens) |
| | | |
| | | |
| | | async def read_pdf(pdf_stream): |
| | | text = "" |
| | | with fitz.open(stream=pdf_stream, filetype="pdf") as pdf_document: |
| | | for page in pdf_document: |
| | | text += page.get_text() |
| | | return text |
| | | |
| | | |
| | | async def read_word(word_stream): |
| | | # 使用 python-docx 打开 Word 文件流 |
| | | doc = Document(io.BytesIO(word_stream)) |
| | | |
| | | # 提取每个段落的文本 |
| | | text = "" |
| | | for para in doc.paragraphs: |
| | | text += para.text |
| | | |
| | | return text |
| | | |
| | | |
| | | async def read_file(file, filename, content_type): |
| | | text = "" |
| | | if content_type == "application/pdf" or filename.endswith('.pdf'): |
| | | |
| | | # 提取 PDF 内容 |
| | | text = await read_pdf(file) |
| | | elif content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" or filename.endswith( |
| | | '.docx'): |
| | | text = await read_word(file) |
| | | |
| | | return await get_str_token(text) |