From 6202db458678153934fb4a31a041c58764a69138 Mon Sep 17 00:00:00 2001
From: zhangqian <zhangqian@123.com>
Date: 星期五, 18 十月 2024 22:59:51 +0800
Subject: [PATCH] 增加文件下载转发接口,把毕昇返回的文件地址改成我们的下载地址

---
 pip_install.sh      |    3 +
 app/api/report.py   |    3 +
 app/api/__init__.py |   46 ++++++++++++++++++++++
 app/api/files.py    |   49 ++++++++++++++++++++++++
 4 files changed, 97 insertions(+), 4 deletions(-)

diff --git a/app/api/__init__.py b/app/api/__init__.py
index 8bd4579..a96baa1 100644
--- a/app/api/__init__.py
+++ b/app/api/__init__.py
@@ -1,3 +1,6 @@
+import urllib
+from urllib.parse import urlencode
+
 import jwt
 from fastapi import FastAPI, Depends, HTTPException
 from fastapi.security import OAuth2PasswordBearer
@@ -72,4 +75,45 @@
     except jwt.PyJWTError as e:
         print(e)
         await websocket.close(code=1008)
-        raise WebSocketDisconnect(code=status.WS_1008_POLICY_VIOLATION)
\ No newline at end of file
+        raise WebSocketDisconnect(code=status.WS_1008_POLICY_VIOLATION)
+
+
+def format_file_url(agent_id: str, file_url: str, doc_id: str = None, doc_name: str = None) -> str:
+    if file_url:
+        # 瀵� file_url 杩涜 URL 缂栫爜
+        encoded_file_url = urllib.parse.quote(file_url, safe=':/')
+        return f"./api/files/download/?url={encoded_file_url}&agent_id={agent_id}"
+
+    if doc_id:
+        # 瀵� doc_id 鍜� doc_name 杩涜 URL 缂栫爜
+        encoded_doc_id = urllib.parse.quote(doc_id, safe='')
+        encoded_doc_name = urllib.parse.quote(doc_name, safe='')
+        return f"./api/files/download/?doc_id={encoded_doc_id}&doc_name={encoded_doc_name}&agent_id={agent_id}"
+
+    return file_url
+
+
+def process_files(files, agent_id):
+    """
+    澶勭悊鏂囦欢鍒楄〃锛屾牸寮忓寲姣忎釜鏂囦欢鐨� URL銆�
+
+    :param files: 鏂囦欢鍒楄〃锛屾瘡涓枃浠舵槸涓�涓瓧鍏�
+    :param agent_id: 浠g悊 ID
+    """
+    if not files:
+        return  # 濡傛灉鏂囦欢鍒楄〃涓虹┖锛岀洿鎺ヨ繑鍥�
+
+    for file in files:
+        if "file_url" in file and file["file_url"]:
+            try:
+                file["file_url"] = format_file_url(agent_id, file["file_url"])
+            except Exception as e:
+                # 璁板綍寮傚父淇℃伅锛屼絾缁х画澶勭悊鍏朵粬鏂囦欢
+                print(f"Error processing file URL: {e}")
+if __name__=="__main__":
+
+    files1 = [{"file_url": "aaa.com"}, {"file_url":"bbb.com"}]
+    print(files1)
+
+    process_files(files1,11111)
+    print(files1)
\ No newline at end of file
diff --git a/app/api/files.py b/app/api/files.py
index 5da2cc4..12c55b0 100644
--- a/app/api/files.py
+++ b/app/api/files.py
@@ -1,5 +1,10 @@
-from fastapi import Depends, APIRouter, HTTPException, UploadFile, File, requests, Query
+from typing import Optional
+
+import requests
+from fastapi import Depends, APIRouter, HTTPException, UploadFile, File, Query
+from pydantic import BaseModel
 from sqlalchemy.orm import Session
+from starlette.responses import StreamingResponse
 
 from app.api import Response, get_current_user, ResponseList
 from app.config.config import settings
@@ -9,6 +14,7 @@
 from app.service.bisheng import BishengService
 from app.service.ragflow import RagflowService
 from app.service.token import get_ragflow_token, get_bisheng_token
+import urllib.parse
 
 router = APIRouter()
 
@@ -55,3 +61,44 @@
 
     else:
         return Response(code=200, msg="Unsupported agent type")
+
+
+@router.get("/download/", response_model=Response)
+async def download_file(
+        url: Optional[str] = Query(None, description="URL of the file to download for bisheng"),
+        agent_id: str = Query(..., description="Agent ID"),
+        doc_id: Optional[str] = Query(None, description="Optional doc id for ragflow agents"),
+        doc_name: Optional[str] = Query(None, description="Optional doc name for ragflow agents"),
+        db: Session = Depends(get_db)
+):
+    agent = db.query(AgentModel).filter(AgentModel.id == agent_id).first()
+    if not agent:
+        return Response(code=404, msg="Agent not found")
+
+    if agent.agent_type == AgentType.BISHENG:
+        url = urllib.parse.unquote(url)
+        # 浠� URL 涓彁鍙栨枃浠跺悕
+        parsed_url = urllib.parse.urlparse(url)
+        filename = urllib.parse.unquote(parsed_url.path.split('/')[-1])
+        url = url.replace("http://minio:9000", settings.bisheng_base_url)
+    elif agent.agent_type == AgentType.RAGFLOW:
+        if not doc_id:
+            return Response(code=400, msg="doc_id is required")
+        url = f"{settings.ragflow_base_url}/v1/document/get/{doc_id}"
+        filename = doc_name
+    else:
+        return Response(code=400, msg="Unsupported agent type")
+
+    try:
+        # 鍙戦�丟ET璇锋眰鑾峰彇鏂囦欢鍐呭
+        response = requests.get(url, stream=True)
+        response.raise_for_status()  # 妫�鏌ヨ姹傛槸鍚︽垚鍔�
+
+        # 杩斿洖娴佸紡鍝嶅簲
+        return StreamingResponse(
+            response.iter_content(chunk_size=1024),
+            media_type="application/octet-stream",
+            headers={"Content-Disposition": f"attachment; filename*=utf-8''{urllib.parse.quote(filename)}"}
+        )
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Error downloading file: {e}")
diff --git a/app/api/report.py b/app/api/report.py
index 386dcf1..516ead9 100644
--- a/app/api/report.py
+++ b/app/api/report.py
@@ -4,7 +4,7 @@
 import asyncio
 import websockets
 from sqlalchemy.orm import Session
-from app.api import get_current_user_websocket, ResponseList, get_current_user
+from app.api import get_current_user_websocket, ResponseList, get_current_user, format_file_url, process_files
 from app.config.config import settings
 from app.models.agent_model import AgentModel, AgentType
 from app.models.base_model import get_db
@@ -68,6 +68,7 @@
                             t = "close"
                         else:
                             t = "stream"
+                        process_files(files, agent_id)
                         result = {"step_message": steps, "type": t, "files": files}
                         await websocket.send_json(result)
                         print(f"Forwarded to client, {chat_id}: {result}")
diff --git a/pip_install.sh b/pip_install.sh
index c4519b8..d25c9b7 100644
--- a/pip_install.sh
+++ b/pip_install.sh
@@ -1,4 +1,5 @@
 pip install PyMySQL & pip install fastapi & pip install sqlalchemy & pip install PyJWT & pip install rsa & pip install httpx & pip install uvicorn & pip install bcrypt & pip install PyYAML & pip install pycryptodomex & pip install passlib
 pip install werkzeug
 pip install xlwings
-pip install python-multipart
\ No newline at end of file
+pip install python-multipart
+pip install requests
\ No newline at end of file

--
Gitblit v1.8.0