From 52d7d14795d40dc6c07baa1637c84304eadbe3f0 Mon Sep 17 00:00:00 2001 From: laowang Date: Wed, 11 Mar 2026 16:49:00 +0800 Subject: [PATCH] first --- .dockerignore | 10 + .env | 36 ++ .env.example | 28 + README-ToolHub.md | 47 ++ README.md | 152 +++++ agent_runtime/__init__.py | 7 + .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 391 bytes .../__pycache__/code_tool.cpython-310.pyc | Bin 0 -> 2372 bytes .../image_source_map.cpython-310.pyc | Bin 0 -> 1233 bytes .../image_zoom_tool.cpython-310.pyc | Bin 0 -> 6365 bytes .../__pycache__/memory_tools.cpython-310.pyc | Bin 0 -> 2805 bytes .../readonly_tools.cpython-310.pyc | Bin 0 -> 4489 bytes .../__pycache__/search_tools.cpython-310.pyc | Bin 0 -> 3894 bytes .../__pycache__/system_tools.cpython-310.pyc | Bin 0 -> 5102 bytes .../web_fetch_tool.cpython-310.pyc | Bin 0 -> 4176 bytes .../workflow_tools.cpython-310.pyc | Bin 0 -> 5166 bytes .../__pycache__/write_tools.cpython-310.pyc | Bin 0 -> 2629 bytes agent_runtime/code_tool.py | 74 +++ agent_runtime/image_source_map.py | 32 + agent_runtime/image_zoom_tool.py | 185 ++++++ agent_runtime/memory_tools.py | 74 +++ agent_runtime/readonly_tools.py | 107 ++++ agent_runtime/search_tools.py | 135 ++++ agent_runtime/system_tools.py | 159 +++++ agent_runtime/web_fetch_tool.py | 104 +++ agent_runtime/workflow_tools.py | 170 +++++ agent_runtime/write_tools.py | 43 ++ bootstrap.bat | 13 + bootstrap_q8.bat | 13 + compose.yml | 56 ++ docker/backend/Dockerfile | 15 + docker/backend/entrypoint.sh | 176 ++++++ docker/backend/entrypoint_helpers.sh | 156 +++++ docker/gateway/Dockerfile | 14 + docs/DOCKER_COMPOSE.md | 84 +++ docs/QUICKSTART.md | 137 ++++ docs/RELEASE_NOTES.md | 21 + docs/TROUBLESHOOTING.md | 116 ++++ env_config.ps1 | 120 ++++ install.cmd | 5 + install.ps1 | 49 ++ install.sh | 96 +++ install.win.ps1 | 438 +++++++++++++ install_q8.cmd | 5 + install_q8.ps1 | 63 ++ requirements.txt | 10 + run_8080_toolhub_gateway.py | 593 ++++++++++++++++++ start_8080_toolhub_stack.cmd | 5 + start_8080_toolhub_stack.ps1 | 292 +++++++++ start_8080_toolhub_stack.sh | 105 ++++ switch_qwen35_webui.ps1 | 499 +++++++++++++++ switch_qwen35_webui.sh | 101 +++ toolhub_gateway_agent.py | 446 +++++++++++++ 53 files changed, 4991 insertions(+) create mode 100644 .dockerignore create mode 100644 .env create mode 100644 .env.example create mode 100644 README-ToolHub.md create mode 100644 README.md create mode 100644 agent_runtime/__init__.py create mode 100644 agent_runtime/__pycache__/__init__.cpython-310.pyc create mode 100644 agent_runtime/__pycache__/code_tool.cpython-310.pyc create mode 100644 agent_runtime/__pycache__/image_source_map.cpython-310.pyc create mode 100644 agent_runtime/__pycache__/image_zoom_tool.cpython-310.pyc create mode 100644 agent_runtime/__pycache__/memory_tools.cpython-310.pyc create mode 100644 agent_runtime/__pycache__/readonly_tools.cpython-310.pyc create mode 100644 agent_runtime/__pycache__/search_tools.cpython-310.pyc create mode 100644 agent_runtime/__pycache__/system_tools.cpython-310.pyc create mode 100644 agent_runtime/__pycache__/web_fetch_tool.cpython-310.pyc create mode 100644 agent_runtime/__pycache__/workflow_tools.cpython-310.pyc create mode 100644 agent_runtime/__pycache__/write_tools.cpython-310.pyc create mode 100644 agent_runtime/code_tool.py create mode 100644 agent_runtime/image_source_map.py create mode 100644 agent_runtime/image_zoom_tool.py create mode 100644 agent_runtime/memory_tools.py create mode 100644 agent_runtime/readonly_tools.py create mode 100644 agent_runtime/search_tools.py create mode 100644 agent_runtime/system_tools.py create mode 100644 agent_runtime/web_fetch_tool.py create mode 100644 agent_runtime/workflow_tools.py create mode 100644 agent_runtime/write_tools.py create mode 100644 bootstrap.bat create mode 100644 bootstrap_q8.bat create mode 100644 compose.yml create mode 100644 docker/backend/Dockerfile create mode 100644 docker/backend/entrypoint.sh create mode 100644 docker/backend/entrypoint_helpers.sh create mode 100644 docker/gateway/Dockerfile create mode 100644 docs/DOCKER_COMPOSE.md create mode 100644 docs/QUICKSTART.md create mode 100644 docs/RELEASE_NOTES.md create mode 100644 docs/TROUBLESHOOTING.md create mode 100644 env_config.ps1 create mode 100644 install.cmd create mode 100644 install.ps1 create mode 100644 install.sh create mode 100644 install.win.ps1 create mode 100644 install_q8.cmd create mode 100644 install_q8.ps1 create mode 100644 requirements.txt create mode 100644 run_8080_toolhub_gateway.py create mode 100644 start_8080_toolhub_stack.cmd create mode 100644 start_8080_toolhub_stack.ps1 create mode 100644 start_8080_toolhub_stack.sh create mode 100644 switch_qwen35_webui.ps1 create mode 100644 switch_qwen35_webui.sh create mode 100644 toolhub_gateway_agent.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b290a1e --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +.git +.tmp +.venv* +__pycache__ +*.pyc +*.pyo +*.pyd +*.log +*.pid +README.draft.md diff --git a/.env b/.env new file mode 100644 index 0000000..83e3c9a --- /dev/null +++ b/.env @@ -0,0 +1,36 @@ +# ===== 网关与后端端口 ===== +GATEWAY_HOST=127.0.0.1 +GATEWAY_PORT=8080 +BACKEND_HOST=127.0.0.1 +BACKEND_PORT=8081 + +# ===== 推理参数 ===== +THINK_MODE=think-on +CTX_SIZE=16384 +IMAGE_MIN_TOKENS=256 +IMAGE_MAX_TOKENS=2048 +MMPROJ_OFFLOAD=on + +# ===== 文件系统只读范围 ===== +READONLY_FS_ROOTS=C:\;D:\;E:\ # 留空时默认只允许读取项目目录;多个目录用分号分隔,例如 D:\docs;D:\projects +READONLY_FS_MAX_READ_BYTES=524288 # 单次读取上限,默认 512KB 524288 + +# ===== 文件系统操作权限 ===== +ENABLE_FILE_WRITE=True # 总开关:是否允许大模型调用写入工具 关闭 False +REQUIRE_HUMAN_CONFIRM=True # 高危开关:写入前是否强制终端弹出确认提示 (y/n) 关闭 False +WRITEABLE_FS_ROOTS=E:\AI_Workspace;E:\Temp\Output # 安全红线:仅允许写入的根目录列表,多个用分号隔开。留空则禁止一切写入。 + +# 记忆文件的存放路径,建议使用相对路径或灵活的绝对路径 +MEMORY_FILE_PATH=./.tmp/super_agent_data/memory.json + +# ===== 9B 模型路径(可不改,使用默认目录) ===== +MODEL_PATH=.tmp/models/crossrepo/lmstudio-community__Qwen3.5-9B-GGUF/Qwen3.5-9B-Q4_K_M.gguf +MMPROJ_PATH=.tmp/models/crossrepo/lmstudio-community__Qwen3.5-9B-GGUF/mmproj-Qwen3.5-9B-BF16.gguf +# 如果要一键切到 Q8,可执行 .\install_q8.cmd,它会自动把下面两项改成 Q8 + +# ===== 安装阶段下载源(可选覆盖) ===== +# LLAMA_WIN_CUDA_URL= +# MODEL_GGUF_URL= +# MODEL_MMPROJ_URL= +# MODEL_GGUF_SHA256= +# MODEL_MMPROJ_SHA256= diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..b0e45f8 --- /dev/null +++ b/.env.example @@ -0,0 +1,28 @@ +# ===== 网关与后端端口 ===== +GATEWAY_HOST=127.0.0.1 +GATEWAY_PORT=8080 +BACKEND_HOST=127.0.0.1 +BACKEND_PORT=8081 + +# ===== 推理参数 ===== +THINK_MODE=think-on +CTX_SIZE=16384 +IMAGE_MIN_TOKENS=256 +IMAGE_MAX_TOKENS=1024 +MMPROJ_OFFLOAD=off + +# ===== 文件系统只读范围 ===== +READONLY_FS_ROOTS= # 留空时默认只允许读取项目目录;多个目录用分号分隔,例如 D:\docs;D:\projects +READONLY_FS_MAX_READ_BYTES=524288 # 单次读取上限,默认 512KB + +# ===== 9B 模型路径(可不改,使用默认目录) ===== +MODEL_PATH=.tmp/models/crossrepo/lmstudio-community__Qwen3.5-9B-GGUF/Qwen3.5-9B-Q4_K_M.gguf +MMPROJ_PATH=.tmp/models/crossrepo/lmstudio-community__Qwen3.5-9B-GGUF/mmproj-Qwen3.5-9B-BF16.gguf +# 如果要一键切到 Q8,可执行 .\install_q8.cmd,它会自动把下面两项改成 Q8 + +# ===== 安装阶段下载源(可选覆盖) ===== +# LLAMA_WIN_CUDA_URL= +# MODEL_GGUF_URL= +# MODEL_MMPROJ_URL= +# MODEL_GGUF_SHA256= +# MODEL_MMPROJ_SHA256= diff --git a/README-ToolHub.md b/README-ToolHub.md new file mode 100644 index 0000000..c875c82 --- /dev/null +++ b/README-ToolHub.md @@ -0,0 +1,47 @@ +# Qwen3.5-9B ToolHub + +基于 Qwen3.5-9B 多模态模型 + 可调用工具的本地一体化部署方案 + +✅联网搜索、看图、读文件 + +模型推理在本机 GPU 完成,可通过 API 接口使用 + +需要 Windows 10/11、NVIDIA 显卡(≥ 8 GB 显存)、Python 3.10+ + +## 启动 + +``` +1. 双击 bootstrap.bat ← 首次安装,下载约 6 GB 模型 +2. .\start_8080_toolhub_stack.cmd start +3. 浏览器打开 http://127.0.0.1:8080 +停止:.\start_8080_toolhub_stack.cmd stop +``` + +每次启动需要 30–60 秒加载模型。 + +## 其他路线 + +上面是 Windows 默认主线。如果你的情况不同,可以选择: + +- **Docker Compose** — 已装好 Docker 且 GPU 容器可用的环境。`docker compose up --build` 即可。→ [详细说明](docs/DOCKER_COMPOSE.md) +- **WSL** — 已有 WSL 环境的用户。`./install.sh` + `./start_8080_toolhub_stack.sh start`,底层复用 Windows 主链路。 +- **Q8 量化(约占用10.2 GB)** — 如果你的显存 ≥ 12 GB ,双击 `bootstrap_q8.bat`,脚本自动切换模型并下载。 + +## 能做什么 + +- 联网搜索,抓取网页,提炼摘要并附来源 +- 上传图片直接提问,支持局部放大和以图搜图 +- 只读浏览本机文件,让 AI 帮你看文档和日志 +- 内置思维链,复杂问题可展开推理过程 +- OpenAI 兼容 API(`http://127.0.0.1:8080/v1`),可对接任意兼容客户端 + +## 文档 + +- [详细介绍](docs/QUICKSTART.md) — 安装、启动、配置、服务管理 +- [常见问题](docs/TROUBLESHOOTING.md) — 排障指引 +- [Docker Compose](docs/DOCKER_COMPOSE.md) — 容器化部署 + +## 致谢 + +- [Qwen3.5](https://github.com/QwenLM/Qwen3) — 通义千问多模态大模型 +- [llama.cpp](https://github.com/ggml-org/llama.cpp) — 高性能 GGUF 推理引擎 diff --git a/README.md b/README.md new file mode 100644 index 0000000..4f54849 --- /dev/null +++ b/README.md @@ -0,0 +1,152 @@ + +# Qwen3.5-9B ToolHub Enhanced Version + +> **版本标识**:**原版基础功能 + 二开增强模块** | **Qwen3.5 多模态工具链本地一体化部署方案** + +基于 Qwen3.5-9B 多模态模型 + 可调用工具的本地一体化部署方案。本项目是在原版基础上深度二次开发,实现了 AI 从"只能看"到"**能写、能记、能感知**"的质变。 + +--- + +## 📌 项目定位与声明 + +### 基础定位 +- ✅联网搜索、看图、读文件(原版能力) +- **模型推理在本机 GPU 完成,可通过 API 接口使用** +- 需要 Windows 10/11、NVIDIA 显卡(≥8GB 显存)、Python 3.10+ + +### 声明 +本版本由 **老王 (Lao Wang)** 及 AI 协作伙伴共同完成,旨在探索本地小规模参数模型在实际办公场景中的生产力极限。 +开源致谢:[Qwen3.5](https://github.com/QwenLM/Qwen3) | [llama.cpp](https://github.com/ggml-org/llama.cpp) + +--- + +## 🚀 核心增强功能(二开版独有) + +### 1. ⚡ 原子化物理写入引擎 (Atomic Write Engine) +- **功能突破**:新增 `write_tools.py` 模块,赋予模型真正的"物理写权限" +- **静默落盘**:通过 `.env`环境变量配置 `WRITEABLE_FS_ROOTS`白名单,实现安全、极速的自动化文件保存 +- **沙盒保护**:严格限制写入目录,确保系统核心文件的安全 + +### 2. 🧠 "睁眼即知"的持久化记忆热注入 (Persistent Memory Injection) +- **长期记忆库**:独立开发 `memory_tools.py`,支持基于 JSON 的偏好、身份和习惯存储 +- **热加载技术**:重构 `toolhub_gateway_agent.py`,在每一轮对话初始化时,将 `memory.json`内容动态注入 System Prompt +- **零开销感知**:AI 无需主动翻阅本子,即可毫秒级感知用户昵称(如"老王")、特定排版偏好(如 Markdown)等重要信息 + +### 3. 🛡️ 反侦察网页抓取增强 (Robust Web Fetcher) +- **HTTP 429 修复**:解决了原版抓取 GitHub 等站点时频发的 HTTP 429 错误 +- **技术细节**:集成主流浏览器 User-Agent伪装,引入指数补偿重试逻辑 + +### 4. ⏰ 实时环境感知系统 +- **时效性补全**:动态注入系统实时时间、星期及运行环境上下文,显著提升处理时间敏感型指令的准确度 + +--- + +## ✅ 完整功能清单(合并版) + +| 能力类别 | 基础功能 (原版) | 增强功能 (二开) | +|---------|-----------------|---------------| +| **联网搜索** | ✅网页抓取、摘要提炼、附来源链接 | ✅反爬优化,429错误自动重试 | +| **图片处理** | ✅看图提问、局部放大、以图搜图 | - | +| **文件操作** | ✅只读浏览本机文件/日志 | ⭐原子化写入(白名单沙盒) | +| **记忆管理** | 无长期记忆 | ⭐JSON持久化偏好库 | +| **环境感知** | 基础上下文 | ⭐实时时间/星期动态注入 | +| **API接口** | ✅OpenAI兼容 API (v1) | - | + +--- + +## 🛠️ 安装与配置(完整流程) + +### 主线部署:Windows 默认方式(推荐新手) + +#### 首次安装(约6GB模型) +```bash +# 方法一:标准启动脚本(双击运行) +bootstrap.bat + +# 方法二:Q8量化版(显存≥12GB,占用约10.2GB) +bootstrap_q8.bat +``` + +#### 启动服务 +```bash +.\start_8080_toolhub_stack.cmd start +# 浏览器访问 http://127.0.0.1:8080 + +停止:.\start_8080_toolhub_stack.cmd stop +``` + +> ⚠️ **每次启动需要30–60秒加载模型** + +--- + +### 🔧 增强配置(二开版专属) + +在 `.env`文件中添加以下配置以启用增强功能: + +```env +# ========================================= +# 【二开版】增强功能开关与路径配置 +# ========================================= + +# ✅ 开启文件写入功能 +ENABLE_FILE_WRITE=True + +# 📂 写入权限白名单(沙盒保护) +WRITEABLE_FS_ROOTS=E:\AI_Workspace + +# 💾 记忆文件存储路径 +MEMORY_FILE_PATH=./.tmp/super_agent_data/memory.json +``` + +--- + +### 其他部署路线 + +- **🧊 WSL 模式** — 已有WSL环境的用户 + ```bash + ./install.sh + ./start_8080_toolhub_stack.sh start + ``` + 底层复用Windows主链路,适合双系统开发场景。 + +--- + +## 📖 文档导航 + +| 章节 | 说明 | 路径 | +|------|------|------| +| [详细介绍](docs/QUICKSTART.md) | 安装、启动、配置、服务管理 | docs/QUICKSTART.md | +| [常见问题](docs/TROUBLESHOOTING.md) | 排障指引,含HTTP 429处理 | docs/TROUBLESHOOTING.md | +| [Docker Compose](docs/DOCKER_COMPOSE.md) | 容器化部署指南 | docs/DOCKER_COMPOSE.md | + +--- + +## 📊 系统实时状态(环境上下文) + +```text +当前时间:🕒 2026-03-11 星期三 16:29:03 CST +运行模式:本地 GPU 推理 + OpenAI API兼容层 +API端点:http://127.0.0.1:8080/v1 +``` + +--- + +## ⚙️ 启动命令速查表 + +| 操作 | Windows命令 | +|------|------------| +| **首次安装** | `bootstrap.bat` | +| **Q8量化版** | `bootstrap_q8.bat` (≥12GB显存) | +| **启动服务** | `.\start_8080_toolhub_stack.cmd start` | +| **停止服务** | `.\start_8080_toolhub_stack.cmd stop` | + +--- + +## 🤝 开源致谢 + +- **[Qwen3.5](https://github.com/QwenLM/Qwen3)** — 通义千问多模态大模型 +- **[llama.cpp](https://github.com/ggml-org/llama.cpp)** — 高性能GGUF推理引擎 + +--- + +祝你调试愉快!🚀 + diff --git a/agent_runtime/__init__.py b/agent_runtime/__init__.py new file mode 100644 index 0000000..273a4bf --- /dev/null +++ b/agent_runtime/__init__.py @@ -0,0 +1,7 @@ +from . import code_tool # noqa: F401 +from . import image_zoom_tool # noqa: F401 +from . import search_tools # noqa: F401 +from . import system_tools # noqa: F401 +from . import web_fetch_tool # noqa: F401 +from . import workflow_tools # noqa: F401 +from . import memory_tools # noqa: F401 \ No newline at end of file diff --git a/agent_runtime/__pycache__/__init__.cpython-310.pyc b/agent_runtime/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5b84c819332e80eb1c6ba7ea7d2efe595add5759 GIT binary patch literal 391 zcmYk2%Sr<=6o!+zSEkhm=mR*5)`hrGL_`o=38D)L1EKAt8k|fd$uNv-U#3f6XSS|< z1y`PvS~MXa-=Cb2iyn{n2(?5Ee6|IS)j8gOfJYa$M5Ikg| z_Xs>>U(HERe3ObBdx)R)JT|?NHYVjoao7QSKm0Phe zt7jiAl;pOzPHdMkPdc&WYvF!N{ERj)>sohzdz4Vx^xJ$}WCJnyN)sX?M2tufDIx=G zh{2EY_&&NmEAKkd9M4Z?r&qHFxb986Ea9?^8>BpAK*7$QUCw| literal 0 HcmV?d00001 diff --git a/agent_runtime/__pycache__/code_tool.cpython-310.pyc b/agent_runtime/__pycache__/code_tool.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3a9ab0907950d9f92ed6f68a582f2310fa1e11fc GIT binary patch literal 2372 zcmZ`*|8E?{6`$Fi{c?L}-^FPoLRw@*OLe3%MO9Qn5rPm00@Q)TA?hm9YJD^3>}79v zGcy~<)>%s9Ahl{NBvceBSjZMo)T&Tg6(R)?|HnxDM9ztS>JR;muYGUNh(OEk&D)us zH*ep3KJUG`d~7Tra1|fFlMHKwe1e19F9!yXLQ_{kaKdRy!qJn4v>IEXRXy#{hNqRX zz7skWb?nsbd!bkLS?c#|VXYs8K{a;LdcP4iC=rerTcPXY8>H9d?rsvcM2mZS2=}4Y z_K_3dZty4lMJjCQh=NVd(Aja^MxosC**=mwCJEyw_h#{9)+g5AOt}O0X_=Q zS={D~J8KqqxhEL+*KA&cQ7}(=ea#LX-VmdiM_ zYTI%greza^@wn*5WvbC0GGXdE=?m~KQlcX#KxJFq1cl6jOPvU1d@0VBi4@$pMJ(gK zGA?je%B*9-imxxs2p`(h(9}Oc49Gee&<(n54{T0%lVzsqz+ShwrR@!Cg)Tdq^&B7& zC|tED(QeP<4tF)<9`|`|)p@bC><-*@f50{vl)3)Kz`QzGrkT{z>OS%~GlGWC(e z5l=e$8%m7TE#yy759&(4K!`bVw3y~G{|k|i!PKX}n11Nga|7WRJhC1XCdwr}?{%V!jC_w$y! z^yBZV5L0OJTiLh1w-2>|+IZjTJEMP?2J!kRfUuQr@7a)%Y6tWAlZr$4;>Hye5uj=u6OeDBkxz2D2p|K`I?yKej_ z!XSV3Z*o&NooiyxY*tpHJlOGpA#O_LHA6|7XNjc4qvvfedk z$2F;J3CZw=#8-$P`^M)Pi6@@;3Pq@#S-T+$vzptPGiT5I=091zUIi}wKYov2dH{dA z$>rkngY)J$S=cOsM{1{qgEp?!Sj(7`nMafN2 zrB+q>I#6Y{N)@Qe&$6|ks%%+sR)rm3)l{9WwM%}BYKbj%g-vKI|HWCregmz16qys( z@5F6mSHkAjm%HKPgYe++{#W<+xAtNP0PI4ay~2#b0marQZ3dLK(Z!e-!95=vwz=jX zax;Q+n8FxGc#aCENGl^WLg54z{K7{Wqw$O!@!TEs+@He4B_{TtmpgHy@7=o9DBDtQ zDDK;8ZXz=IFg7RFOLS^Iwv+p%t?D29QK|;HCR-+*XUQ|&5L9w&9!0|!K#M$n`O)sZ zqi=>fy}R+j&5ysh`8dmxuLs9RQCFuXq(N%po<53uJP-41Ky4lNqW(tz%r1p#M!hJB zpX<;;aJQT@5(u!0%eacP?EY2Bqt491)-#H`aT+Csf7_+`x;PQIy#=LTd0?2+1jmAV zCNcI5{&ojp)b7SSHKFb?EtvIJ^&M7E3M8|onc7egog6x^5PLd6+$9sx9nKesp*LBiej;ARs zwiox{V&?1I1Q?*twT1$h|EbA!9*bA+{Y`K4;{QZ=fp?Qv=XjB^E+Q}@Z^VL}iYFj0 z!E6HKc=IX_y3PR_vH(J>JT6dJudB^mVC1s9dX%-f2>G$hRZyJf2D{E>8PP?}3^p(WNGiCectj|_*ugc23tbO`l7UWdO(hQ?C%}iwCEX~7^-eBu2 rUx%jn!=?1xJD>u0u-VJhAkm+?7o59$t3r+W24WQ@Uh~$ literal 0 HcmV?d00001 diff --git a/agent_runtime/__pycache__/image_zoom_tool.cpython-310.pyc b/agent_runtime/__pycache__/image_zoom_tool.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1fe2b7db09d6a8ba0f4f10b18fc84aba97abf6c0 GIT binary patch literal 6365 zcmZ`-|8o>acAsxMJK9~Xgph>zVtb8k>^);4WQ=W&vkh3rx`5yij(vN$rdHd%(n_;C zOV2ElsL{oTOn?ON<<73V1YE3~O68oY7*ch0mE#D1NLQsQRp%8@`5~$M?VQBU=k=@v zQu%gkdS1VN-P7Ig)9*|Bot-HSzxu!5sx-H1+V83I=&uKrH}C`*L(`bXbWbb9Z@r+a zx=}FH+bo!P8=h576cRegn4Vot7Ltm#Jg3@G=%}U&scO2ARy^Iy6f%%Yc%9X3A**<{ z*H!H_vA-fCZ=Pt`lT{_3W}CeSIC_6Dk(3!8O~Z{d1#E6eb0 ztaDLcG8Xm1V{KlRb*cK}?Rq!cpz2SsoS9mjB)Z!pT@(m&t?kArD56fwYbn!>PO}ts|8nl!=p#y2oYWom$1Op*1mD zElzR}=|g@k_dJjhdt}{K|tIK3CYFI)4B7 z$YJ*f?;JQXdh&=mZwHbh0*f`<)J zE78nXemHTOiRTcWU@wYDYidi{f*$Ejo#{)u*bwQV!SqF(#PiVUf*zWo6`|ftENM-9 zNn0dM8DnRYW$fj?UAyv0nFvBrsmoLl7DX7$VxOhy@tQQj7xX!q;&b(4jWq%;WU{@5 zc|-J3n<gs;rX*%hyl=_>*7hg)N11536kh>D?yADm%5XF zo&rB0ee~$!X}s3VB>@ImQKhG85NA^~8>Ta3$v9;!=nKZXT5W@+h5GzfU0X19?Q9~{ z8O~Ba!Yy?wZDuc19OF(4JV-&q1S2D>9b!1uIN6?JyZxuxxtMVYXUh za!l$wMJGPmApOvMbMKrO%`G?YwC;TL;N!o&fAi+q#M|Ww*-DJna-aU+$e8xqR z_?BPiH9>nMlO?})hKn$tQ2Q(GDPDw_O!6AZGima=MooEdKkVReCkv2|HQO76fRunc@^bkKZNCpSS z9Q!Ic6^3;jn6Dfn0y;XT(!2tW$YMxApjbco0e8jbtm2jnESnEQI3Scbx`ioQx}%Fd zs7hzn7c(xah!XAH%8xK*;Zd0S8Hn}*oKTM^iK#1`2bel2TEugqIc-H+m|*4=W5I|F zvH-dkCYw&!5gF4dg`70A2svSf##u1K&gpEkD>Bi4A4w9`T#=!+?g(F8GJrp7McF*ZU~Q9h-u*@>c2Ipoyj!FhwPn%~{B(l{VIa*(9?I^wPo6yPo*X?oa_pUx z?)b{7Q`YF#1t~^a z9xsQ|4E?%Hz{O03I30c%`c;{zR43ip_%&6sA-wY20D=yT((E5Vgs>D5pf{i`8Vk0n znUSt)$;b}%nS^*cO2+RPq9|eGQ6j=Y2|H4@pi$V0$c~bW##U{?0p;`}9_!T>I-=w% z4a*c6n__+%nl;;W2H!P9Bea@{C>?bSXiNA4=F|BIKNF=^`B{9+Qt~#*Z(5P>0zc8_ zLw=Rt9d$<8s4MEGndYsrd+~Ps_v@1}@gJ`XI2vgi9kS|wibfV4N!Q;bmY78Qr7c$T8LVPi|O3h2B#%Eon;v`X|<_;oaRi}uI)x91_-7c>M4Z&SZ*`1j1=f01L z1mxkeP8JB-1`K8N_&;C_==9I%;zu;yB+0wOtYo6L^6LVz3YuG}kaPJEmh4R&;#1aw zxnM0M6nIP+2&k1=TeV2<1*D+Fq*5Y*%u;-SLh+q8Uyl-Nd5w1= zD3MK4Hs~@4SOtKEd56bO!Jt z<|M7bN5Cc|AhR<}L;#>_O@shZ1go%U0-*3Bp#y0f5JZo5U0^^rWD6SP?w5bj zT>j)@9HO*t-EQ6d*L^vu&j~tE`3~_MUb1_ABrlE;+X_yLP-#$RA4-rtQ5pqvO(ZXl z(vZoAJ61^7D|1k5$ww$#=u&3GotSqOHz;I0zlK~Rbb|`xh0fJxs)~~sYduWUZY&cZ z?mdlmrMcL)Z?t923vsbw#sl4 zkx@2g4AI|?df+OEADTtpWBjX2-9zQbTw$l}O(>kqZ6-zQf zrVrkYUQ_GW*-s%5#r7rP*WF6lt#OW^?=NV{6ab~*OtDNn;tZ;?CytaZtn07q{tTAz+DA@Sms($3YMuYpgI`{3eQ@XAozIs)zI*@f&s!I6|Net>()K5&c?lVGQHbKa zw8Hs1mv*gDo#5gD+06#UC+!a{{Uyea1OD8P=63I#-@S8gc<21^`}EmQf4bbfaR2gR z>)Nk#L%G()Te;msLqkw$gaH6DWZ;2x;HM}TC#pCXQ)$M75;DaQxx2I}X{=2)deG_K zwJ$Ja>*jCo|H~4Hd;feF$svs<&Y@`F7D%Cup*1GcvCh_I(uF|*fe9-#GzlKpI z8;V%0IKe%cq&TU<=D=1HO4R`xqz)C1xlqJd5y%(pdQlXsfuMM7jUl7NhcHEUI@k}Z zfnXS>2|2-af!@t>QkhUkW5FZ>E0;K(qSLIoVj8l ztT$2bq*=WSFN)-{fBsV>ml@2dKit$D2scL=3iwHqWuwf)RlEz^|3h5REo6S28E?NS z{mgfbLI3SLl*Tq}sY?6y<(9Ai)AFUezzzZQ{6ECn#Syhax`jCp!PsC^sNU|-y#QG*o{hg-klEo8s!ui$1RG< zKxAmTuTbGoK~`UEreZ$|!v73MG8U-ZL1kzv)Q`O>nQcU!fT%KE;(Jt2!lzRTB3@Um zjYfsZ1eJSWU5jV9%&fT->KK_TadoOhhw46C6g8YJu?4k!C;D)&EG|-$J%hH%K-$Ve z2JucmiIzet@EfAUVQI^1Bcw$kP9aHCt8-SMYa476lW8Pm^+u>}Si~-LU2m-|M8n=^ zXb1*T(9MfQfvM4pbf?=ekoGvJ_rpOs`gXkswwwNsQ~v$+*)88{5C>_sEc%@QBX1F6lFFQ8q)S)cNL!zXL&WiP{Tku}UcvWbI;A-5}~=bfhQ>k;FWT z3wQzw2OP~Z@H0_F(x~|D zl#>+eRVLz#F&k!>G^M6Q&N!az%hbg@K6)g+2t}Eih@IFy0Td^p9jt>V#E%C7+ zeSDjuq_aOp!D|%X1>^@Dm0uIYGRU$izs{%}TR{-fP#h@q0F*suPqNqQNe?(iH?;Z< jbo@2+`+tUI{4X^7KZb35l})DpxMR!Xo&DLA)35(Ox$OcF-ct#H7+HXa2Ev$q}GHe!X&oE|1m!X@7ew(^w%O)?fdT9l%VWt=VRxc zulIT0XU6aDwg^1S#T)KW7a@1B^5COGbETY084qpeKRmenSeE|%kkL4}zte@3fxGb^oiu>#b3xXHR% z59^&(eo0s#+jfSqZ9F%rPSQ#*qy40`eFIiQ4wtCZ-gLruAlHBAxxQCYr1^>y@FTuo zm)(Nb+#uw_4zb#I=-{E>ymice?Tud_wBP*sk=Mt&%g;4|_+EuCcpH+Cv}l_IMve57@h3tuq%g{qS=v(DWP-#b*ij=#p)J%}dQ94Mf`&#+ z!`!4Io{TBBWbGOW&9O2i_|}L)jzP-^DXG!VV3K5dYGd+h^6pA!?(@6LA14cElc}r8 zd#&}AmGyns20%znCh3(Du$+gCfSl_vM9 zK6AZVDKEPo*uOORD7Yq&dBGiKhkP`YdJqb?DYeuWdfj(eAT2j=y&!bFDwkRvddU32 z(JF6-nXhwI-wU}HN{u<8bB&0{VDa}}Jh*S*w^BgR1V4bpR5Ys4KB`fJTGUbu$P3h@%74X9XqIMb@A9BZ zp}O@TXvpdmv_UA0?*yP|3KfE-P=Ikr4YeVFh^b6t`mD+fmcy2#gys7Qz+;TPK*B0# zu>IlJHgLhvRN!6^32r+<)pe!edJGH&(of_eiO?g-^o3;VO2&fD-`-tax|w`-WBto3 z`-+E4ra;F@Bb!0$jbY{rX^jXsxCsn?7|x5DS}z@IQ}Wv zfpdy)!h*XCdLU7hK3b$b^heY@4N&|TExQ|z2M}+X9)XbTb?SA0#P&~dkuKpofdyTF z^_$xA78E!^J*hxARaB;AsLW$-FdcG0AkFjhDzj>obbL;+|xtwQ=FIwJ$$Smgh>)R~r6_AzlrofidLO zq9ox zLJgHkGNHwQ2M}1uvzMX0fhTjulw<84ypj{{@+ZnI8w~1lu0473oa9uo@#a7>P)5ob|k5C?*2+1iE~>kVLM_KqaQf+=;73RQUz2ppOO@c zjgKy@U%!rGog%k-Yj%Bd1$24s+Sfpk2!-*R`Ke-O=7Z$D%OC|1#9(~q^j{(sn(mxF zC$>W#JqgL$#f9YTZ0C!1Xa4N!($eb6+=H~m(fYt@lP9U{!$_8-)^|h@hee?J;R}k8 z)BvjEg#l%PPACi@gU!)cfo*9YDkpJk%02q~apib14SyhyQhr`iSJKAAqG0VuCRhDN z^T5kEI`X4P#-ILwu$8M2`{n(0AM)S;6t?W8k&#q92Ym#JcDkAS-pd$Wj8MLdo%5~E z+zf6Qwdh}$&wqIL*12M`_(c|foBl&$gS)f%UHkY<7I*uKr7pSC2JLYN$80>)>_J~N zoX~beEs$`&jc}12B5I9?0%c7aPP559mI<0R_N+m@DXH+&Y=v0Sk2laW);S_Do6vF->kgey@X zkRMV7y@JErt%l+as6VhyB&u+4e6~A*|d-vh^5oenRj|EEGEySgYXlWYKBuz|d!3nXUumWAJcgD^&cORL1 z*T&XbA~NmW$^NBD0)JkoCXRhsdld!8hcV_N9 z&Ybf*zjLPE)s->un_T{LaI)JlzM#hDpNqx;JkdWHE%v<@H_aOrwx9m@aC*JR@%nduu;70I!^gC?5sIQ!$`Z6Af3h4lMJYp#1PG;UD8|c`X}!zMvTf;< zVH*iZxXLjyFO?bnhNqAlC=Z_E)6qLL+Tkw!7`LHGV_lQL_)dI~yHEgY_fh>OJ#}x$ z5|<@Td=JKGeD^a+W>Rj_uWUM8g&b`CM|BU6_SbKqGUJiQk|CM#b9n495E--SJ_ds_ z<2jJ{urWnGJ2D80%SsWJ<6u&NF;wOm(%=y(>#}Id`>~j$6>pU`i2SJ_J`vQkW@%tV zCo-Wj7pila`9E1ihjO%KIs_&V5C+LtqMxFOAuRmDjM-UZOnb{|uVoUo%rV(Z2;|m+ z2@fFZ(fx~T)`~6ew5)UB{a1~1#szzhy~2vA+Y`|q+KOzT;qDRKU!_gW8z9#vf^|sh zi>4$X=(AsBi5|4Uh=wE~`8#;l(7fH4`KB*wQB#V|I=LODXUqo+9pE9`X*JMt#3ow4 z6v;;_xmCIX#>fY#=md}!>bvM)6Dn^sA4IGD#-G|}POV%Y0Q`uTfHe?Pap z@VAw7m)f7b)BeSomEWCSy>hwzkJEb#O$rTPetK^Cmv60HyRf$a!Xy+bt6ZB__C!6X zsqSM!R)Z))a7-{_C%XAI#+ACtVOXCMJn`sKZoL_cm*PZPk|?2t{@W>RH$F$iBp$fc zX8rifhtO0R3Xjn=qHlEsfudWl?AIrSoRZ-C3dVH&2wVs7L=6vq&afk)m{j=Y)#{BuPm;q$KIJo+Ks7Z0hObTak2Rc|T&| zHkCh)Abb=-c<_eY2-eLXK^$0Q_42jlch4?e`()*>*H*6mzJ29`_L(bRe){_I+n1ML zKUJWJRcOEUdVxZDVd?7oD<94;|9XDuv-4lP{EBkxqZG@POVP46-rR~cm#&@JG`B_b z;MjIN-4H6vMN~mtjbXDUVFspOUO1Z=9i!6q29h57Dk)IQN2s7$(URZ8OJ$HfU`QN@ z2n;ArHUlZRa*?2vtC37$NQZM%bR-A=86K_wbuI#?5%?$rO0pped9zjvHko8{XKZuS z;l?JpF$KO^J9hO9n`VF>oH*5TTK1gD49s#~H02(w?X{dmFdS;>q*io?^da_>Z-FkM z3}ur}o8;Mv`d2=FcjcWA+E@P4e)ICm+aE#qD}Ow-{P7)H z#2n?mIHgzA1|WypL_6BobqZu7tqC8)*N8$*1}@^UEzC80K)e*MGHhnoacpoa`UYsG zUbDkq6gkL_Bk)<%WW zgBdXhBFU|>b~&m9fwFiYi$;5LUn%S)ArRJ& zeElvr8luMK#`Xo0;i~|@tf=9E4KlI$lk>MuYfRY)jpPy~N{~@9vabIC{Iu;RVVfB+U_yEFgzwz$U z)z_}S_hwB@umc#h{$$ z27xPk6ti_wsJUEAa=YUwZ(pLY{R|7}ORtASCPzMG+U6E3Z|B{2nmv6~L!wW@l$-53Pha1QZbtIW`HhF(!#+Y))|`Lg+^J+piX17>8zeyLF23yN?M_$nYKX5N*1OoXe(VgsqRGIysO)>ka%M3l1(v|!wJ!CW$+n`HCRuHANT*RGyD2kf4%{Rdy{9t{k+ zn}^)w%7&WWUHiIxC!rP#<(wha7jK>M;4y!9eE!4NQL9|O?)g^r>|53Gt6;pj zaCdV4_W1nmH@(H1fADQ|g|?N;KV8-?j@O#k%AlhPlvOUg>s78yd6kRTJQyOeNd!)n z86Ob_XKtAn(gf$CcB$~x;obwcGXTO*P(syN6`mT^@u>MmO6L2h!wVWY}@?o?#&nn2g(D-QW*#ko0ki2HqVYZEXC8uWmGsFr4bRg3q_t!<+7v9 zmh%lcrZ_PWCRtA`5+iYt5yIEtkb%>wT$w$#6u2z79C9hvgW^FBo>aJj5e&L|Qs)t> zq7IX&buVFqr3eW5)(OIK(xmQ-)nJO4ozA85C7Uy+oO2!Q*TIiAiB9m=3CA*F4Y2{+ z|E503ho)36R~)io7Woj*x-1Pok6pL~JK=T?fnW*}BB@f6)ToN#LIcw(HE59O)ST96 z2rMf@Xn0yn1|ZuF7G&n2+NMB`2kTiMqH*}LX3=t}m`>%6vVmSX$2kAxtuRdW!t}xi z*XHlMy?Ex*!bcY?AO3#vkC!S_?^S2cEnT~EfBekSJ0DkncD?fU`Rc_hi|?PoSTc$O zCs^7Q2F6GsQ*NsFQ+e%oOP6je-kh8RD!3yhCc>0CX^12j*dT&g078cGPc}e{+BjEd ztCw#Q&Q6uHoKazvQaqJ+co_OhV>m2dQBbh(`*1sd1%delxdvJ2Lbg)Urc97A z&C{G_SEcF%d6l@Dt3w_KsOCc6P7tWq-Qa}s){3!!YkGkbAy4yk*K)&DpVXn&I0|6b z;WwcXrC@GRoM(mzQSkRN+@9g=qybNxQm#T6W|Xm@8}STW=1Fz0^2+98>XoR(THbGK`?k0$e0%f+2loF9AFDkP931}nI;d2gW=IF zXYmtEFPUfU^S37#CqA0LeP?0r4*v!e&*{=d>PWo|tY^}4QiIG7@bWrvs!so=`m1q> zCs@OUyYE!rn}&sz9X`+3VkZQ-lH#ygxLG&HI7W`pj{{VMK2W4156O=ns;{(<8diVL zq0aVl2E_9kr zyBz7&@Z$0lkemEVATXaLF-=8R;nLv6|EUJ>)%Abqf78RpUpB<0r-)?puszfO`o*w3 z3_bo|unaM)O{oJjTmxpSoU4QQ3Ju7_X|JAtqs}=WpOvYP7v4BmXD67|pP%y+0C>sU zp%LE*qP!A>?_6uR%uD_R`g{e&S5bToMFNG4oo}LsA@zBNVG5y-WLOXEU;i7!M%({O zifwD8*ioghPFLK>$V&tWMwHPeHwsh>BGsrdNiyW9VgT`ix_y&Cf z`o5BbrW*qe0`nvXw*v=5z%Xu$zjooNCNhNT^;+YxC;>vj~l7n0mV`o)ocwAqUy z1IJ>XITLrIUJS=XB6<=2eyuD*Tl}(9Cp;7QxWeZne-c*!X~;1S%B>)3tLank*S}r- z;Cg2}92<6E{b!Nr`#z3D{xo(cvtGh5AcJ0LWuD`h=={6bljNY^yiUcd*Q<9f$ie!- zCDF{EMRz<2@lF(Y1mVx2*p33zRsxT|0Gcr5I1b;5#k;U1)HR%Da-Zh-w}tOU5l68H zM52jzp)P}PA8K`|@%?BX03kFsoW`S{JooeOVL8U7{5}!#^BKM@X_hdv@f&fX8ekzg zfdI+C?d%2dDdbeuL`r-XbK|kl24aq@k94i20eI^HdOF4qhj zhx9D$!z<89exr;Z88tF~5ZUrA8`h`}bYX7yH;NbV*zDlEnQ#b>lZYBoVfNLC+@N61 R9<>BoEX!;m)|0EqzX0q&KMw!^ literal 0 HcmV?d00001 diff --git a/agent_runtime/__pycache__/system_tools.cpython-310.pyc b/agent_runtime/__pycache__/system_tools.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..38ab22f865104bcd0efa1731cfe3c824b8dc168e GIT binary patch literal 5102 zcma)A`)?e@9pBmK-THh!JBgcw0H#!JuWHiLl(s}E4Uhy-!G+=!ae{6yH)H26_sZ-p zPOOWR;Dn?o7L;dc+;WznG*U}YTO|-T;oqo6>QDF_`zQKKg_Np~&v*9xB&fZ;`OeID zW@o4bG}YA*oq`>AqPob!`WBr^xVdOV0f@)(p&y@XG zF*=0WIZ!V*>UmS8?k|R7q|vCVUMa@PVI*V_p}V*nZz#Ko>B^NNie?1y0LTLuBRU#7xkSx?%saS{o9GqhvI`z7ss$xAme&eu8F6^V<5Yx$qsik zkEv`R>R~KJkSyPeg+ij$Wdls+&`@5fQL7c}e9e*Qc*{o`LMJllFo#Xq!%c2+yJd{o zPe5wiMa#m`ykTY5qls*@F)n0yNK~sk#8ENfZy#_`ohbFA; z)e=r_HbQ0Bi#3sFvL9RN(0hb0ZQq4{g)ZsLQ$X4YZ9 zz&8WiyAHdBZv{5H4topV25cX{6_UA4W%sY}1zHn7j^ob1er9g=()B;hUY~udbNQXl z>C2z|<=p(M7w6BNyng<@&ehjCXWy9n;M&J0PA*)W?L7ZxG9TT}+4G&*E8RISpX;1$ zFTVNq$0wdsZsTA@lp^JZku2B8Chn|4xm!{cc}uguRctmzowu}t17Cw+U=4P3tSM9v z7hy@3n-NSgCauQ?^X)SWmoIj%or=E(1Ze1-ey8*N>CSr}g4DU$(?fdMq4~3~4<$wJ%XLc+pv;GU9jXfw3XkKG&d>>*rOCj;<6N6TVt^n z$`q!Q$iA0;q%vfgKST*5fR@oVT5N`qy>qt3XtNokZO$0St;mY(iqm$1hjPrYN!Ip& zhv`&OC}|WQr3WR0(#x%B_qfxtTF$df?u@c9GV8qYYjcu4$6CN|FDw6clS%F#?wZo&~s22W0lm>rXEn)_g?E9GCE8d~PkuAzLN+8n@A%A?1EO4z6e zqYYUrMnO@Ig~}n3HkAW&5>gUBSl6NwAhiLAu@RMRS7W<@ZNIGsPs-yBw+S@%mP6TRhC^te| zL@IR%)_W4W1dp%-?}b|de3oOInQhu^>2YtQQdyQ|6C2hV%no`_ei!eQEP$mS zDF<;t`~COOXtB02V}v~sG@-?oWyeS#c#u0BDtD$J*)ixx zl)}vPj4{df8qeGgjd0pN-oJ~xb!gIY7kyf9H)^a`@qjgsdo8=&({i9mZp-7|v^B$+ zaU@R~$ldN<2&;N0xUVU4**(MmAb*?K81$m^hhKK4f46H$*(9obVjzi(?XWx{_QR)U zy5Xc4mda&iQK+)yMy#T(N$y5#&5I8FZY^9SmAPQb@1s&~xz1sV`CjGf_!p{vV)t+( z+TV!lTuWJbc9 zw0uh5O>GjORvWpWT0a0#PU#S0i%Juj0eK>QgmsD|NoVdw36T#ODQHLv&o<~z4mvc5 zXB(T?vQB4K3}B7YkOEQBN<)4vQ5s1?hwBLYB1E9o%2UzNXo_+ddpIV6C9Zgtz-~Bk zG`jtJ7Z@^%da1$Tee<@Y9ppX&djV8h8(}1lMsR)W_(A!_O2n1lK=%<89bj`fZssP( zTej&n$D^cSIdRy9PRnc~MTCE}T1WpnvH%7KRA>w~pz|arIXsT*4$IrI zLOcY}b@i`I&0U@Dd~|woW_JF?m*%dWA6jzkk71Iez!uYCl3+1^_NC4XZ^5_O%7SB% z-=;Y_>G2aUMBD(U)^US2U9SP(1ddh@DK`09{-g&!o7lQu?kf&%4QD8ACXb;7&(Zk* zI756P-|%8DB8S~ZOd!jC68WT!XVPLuJ$086esvsJdY(c-Ql`BRc<6O`cn3D@2Fdx) zWujt5xt5O$A6@Od&|c=FBM)GmReZD&LF)3$K<1sBu~C_$&GoFj3$vd>2}u%5tjs~j zw*AL9CvI8K&Q&2|4L`|U|p$HfB^;hRHA43Q5L*5>6> zB=4cV&L+sgYPTcF&?KpdB<4$^aFtZFqoP}ZB#BcAzYdTxY!g4rWcI&JX9e1yx>@({ zTQ>~m2D5|CU}`YE)z)todT?P{x=WO>!kQli^*tyd>BRrI7kE4+Y@SA7^-+4p7|-Bd z;84~tJdx7(f-LR_Ht*v(dCgy>1X)BXV_G?L?_49k~^s(jc8|XDEd_?nb?pY}D7n*MSyp za@=XzxY2o2E>b3olBG%xr5|Mg>5+3h6>X?&Y^5+~B~8)?iZ_DL?(K63!FOTu>f8rc zu3xw!$%3Q+7}*amNDW+S5Lr~VRBYmFfe65uW*jN6%OuJ=MCmXl69bbs{O*T^2B(xJ z;&?^Z$hYMlnspyQev`~ooAL(jg_Q-tX{W6E_U_w%-+@O)f{}+G-S?vdBRWG@F5({K zqpdD6HBxR#MkBWaC_ju3;tEs}ArdPXg}zD4!!%wcP$TdlfrkL{>BP(5Nr>+d&?g`c z6g=z`$l`c%o-|n1titY*nKg^Ys*NIFWyjit$^6ftXcMM;yp6b;!-omV%3WqIKkf}P z4WrhN`_G&=v0<(8==^**@5^V1qMs572#~tU27#XuSav_MNL}rHRx+Oz*Fqk@i4uMh z;O4Fc)cni-)b4XXxoxAQ`T2o_N%~q;074!Cprc2Js^x>4^|+uMYa%z(gs4qN@GU3M zUvGBMSEg_WK8grQAx%=QkPuEWG7;XFsI`dzxjnt!wI(mq>!@G VlfjcSZF9gHbbNP!`A-hA{{lXNQThM? literal 0 HcmV?d00001 diff --git a/agent_runtime/__pycache__/web_fetch_tool.cpython-310.pyc b/agent_runtime/__pycache__/web_fetch_tool.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..97b7f782c12c1046e8f76a8ed0b8982da6ef8845 GIT binary patch literal 4176 zcmZ`+>vJ5%5#QO@?X7fr`E9^Hp7DWnGJX)S9m>L%aDpuoOT^*I)Ma%ucUoz0_cXg_ zNmqL*2iX`W*oI&rU_m5fh)pntf)E}ie92$2U-6x!Pp%Zj7gCj^d-o&@1FPEBOixd5 zPfz!+`s5SWXWSd=nSQO?LD z;wu>i_>!d*)r=bQWGhzGGFma7iHCE=(u+-*rf{rU&Ba6}QEbVyz&w^|r5cS>oi@>C znwVBHZM228(^l4=muVYqKPG27=-qTB@K&)EbTz!KOky%qK|)+7u$4&j8eK!z9#b-_ z=sJ*G-;ipLWb!J#h29F9R3#FR8eGHYP%j-|Cb>0PjP zS7U80B?Ch0o(1$%e@YCL!E(uBf%1fHI(CW#+Ck=)9NT5ErVR}2@8#U#LEAw#R%Wi( zJCS20FOp0=!i=(K=F8TAQ!b(SBZkWc9mfh1oE1#hV?67@)NK2SAXJNG&1nKRJv z?g`}FH*qt!Xzd*P+2-^^oo3M}ux{gsnQsio*-&YbTe1tC_jcbKiauCbQyK2s)8APc zZfs?CW1+oJF-x6E%JP=svCiZWujr$UmAVYeJi<~+MSsF&ysM|cY%frIa_H1bYxKWs zzhM{3V8arl*UmZAv_rUAk3xpCX9us_F zMwk`$3R9#?s1$NYV1HvT%!bWjD zZk8^IftcPL$d+j{-U9L=`F4Bnj>EqiXZF_g_O6E>>B11%R~|YHXSBU6FWa73WQW1c z**x=d!(q&%OOt`V%Yq=vImOb$Ye3i6c(9d1<<6|_@FIA!!m{Xu^e)TE8J7Dn3>M|T zcM5dK51fLFK8a3o!(0wf5*Z%xR^Sv5zZM6pfCNN`aMxW3Di4q92J(nw+9{DE;si>_ zTc!{-1g$rjy&9OV3Z#Ry5S{S#FkIMx1kV{rWo`vA^o*9O0H<_Emb1Xi%8J!$n zbRUF(C-@>IYv79a3QxUOl`7kPxad)tulLA|NF`62ks%nQS<#nP3TuQ}LIh74QGLOS z`Gm?;IVD$RUmnq@Ix6z5zT}Ie0#6?j##EX(B!~h5C;IYCd=^H&OtrkY$dv_8pR5qz z6QQztnO1F4>(VY6cd5dY4Sle)X>yVXRRwmgpmDF+SEk50;dL?6kCIfcD!)^1?1|>% zWL$vViOHG9?pqob|MAe$9xqkJvxpVK;XHLw0@xt8SUR`+gdHBw?&z_mT@{jrl zPcNK*Vea&aL|FHh@iEE#K3|sojx5Kvp#*@+au6=U0cHzXr(UbL;KdsL`Tf6YV{6}x?NcNX8 zzR`xyryIv=tfa(p2Q1a6-kGIc@k!-T9tfhhG0&OHTmL*$TuP#krw~Y$g_eC zd0Uk|1y@xj6Q>Mu*>T~%Jpe%l zp8ZA>BX#>zU!h1qJOZ8(AEN(R{%Nfr)kagqS^jHo=zVqPS?1uO2{Xm$2 z?O+D5GXr=D#SxHjTBItCF~GQKiN;~37YJ>dmY_jwhK4Q10Q=SnG{Iuf+_lhFXvyNV z4Q`-4Xol9M4=WiKx-wSaC~+Xa7H0omzx*boXYJFE=f6B#JNHKI%oWIA;6VOD!Un1{ zGy)}_?}BaL;8CMp3orh&e)=t_ud+8;VnGvyT7cJztw7AQJyu}+8>~%{*823(+J(Q? zF26GO?%M!Qmn8mkSmVC{62zb_G0K*ANsKZFuO%EcqyoLfof6|7)SD=4P0nbbVh9vn zo&fRhggN%$8oZCe>rMc{P6PW1Y!fJ1Ctzq-B@asN47k^mXGI8I3}xt54~tc$s(Pxg zc(EywCw*l=@HAf;iTm=D2&=KG=BvDuN{|3@HBJ>jc21zGF9D7D@sMgzEHrSv)_>zf z>T6WAQ%R2*fOMa$O~rW`2g-$N5@08A+Ci<18trQH2I)*XR}3zmMt7NXR~8v!-`hIVB&{BD+CON27|A=TmnXTx&u zzJBI;EYzo7y7tLC%X1LYjrYNNm}~q|9PLAbfy~io`~Z^2k%V{gOB~@9eLo2h=kX1b z09ydW`Vquv!1fUc{X8+DJG($mW1CI-Q&Mz5DVHyVK1#{_Nz@#w&%h`X3%g;)mw7LA z9D0<&>*Duxv4d1r-F)om9{E!sux$4Y-{bc1Yv@sqGpOe1DvqY7*73V>48PhS7y)k( z*Rilgmz%;7emk>bv81Qj;_pH?tkhv;4x}M>TUaiFRb$ZoXTyF!jlT%p^bq`15Z(PwX!ypHbLg{ zP97UIAU9Y$+ko0`*0EO0nwXBBovaIz%{;|6vQ4ae&Uj2?n_16EjrH*4tU0Tvdm!7g zD!YUALe^WCZGr60Raqaq3$nZFvO6IA%BpNDyBo5*>#|;^AJ7KwSs_gi59rcmnUIIM z0!P_;IujllFr~S(I3=xJxojxyXNoz$IABQkkxalxd_OOn1Ru)Od@X`8|*5OQWWZE3eXR$xS52RJhkRC)5Q(e?V8(yD2u>0YI zKb+*n`-dJFeCUxu(!k^8!v~{1igJ-ucW}}d6Qg;5QfVX@Dox1_?{GPnXP#PvSKlNJ z7&yohdP+~|Vgrg)tyn1|a2kPh`EeYY=TA%Bi&s)M|OI5lB%wNF(f_wL@iZ~yb&o+qB#EgdyfAQO`!7jiG;$3vy7hO8ZHmu(&|24%q& z6g&(VY(#ZHR^O~Q>#p9eTi3;%_`F34Wc%vWH=z(}vl=$9R}E~{Qrp+(3B~5H0C8x7 zh8?igNYWyxNjvXnSRl4QAmf5(*ypb@{&6;_@W_Gb-iSQY%V9AT#@03ZrO@jy`U9|K4j8nG=gqXml3-RRVnDE^1Fmag&EPthwcylRW%B{7b+2Lu zvZwO5Fva543$;JIv3TXg(uMhz*RL*}|JCw)mupu~|MjCYOJ`o`o8VJ@%fCB)?X7dQ z({I*J{q~a+Kb4Muc$|ZyoFEiHa5;rxqE0Jk->bbg2TfbyREf(Z<3U#BN+GDMp;hhl zsoHxVz(`DtF70EPe3=Vci?|=zMN{01mrRs|4^|6v97ZJg(Q;03&=u?|QwRjjnWbaB z@sQq3q$iPrJ;*9rRjUB6GkOK4+pSggd2Pn5m_b`;Fk@E#sUDiM+KdIM*{xNqin&b- zt%^0P!|fasr~tnks>2KJlr>6 z%S|2*IX61xk!5(Jz9?iuFC)eRX_MkgGVv_!e76w3P>@%MK}Rd#`BCv*3@e?9N$R1N zAf3ooZib+h1*V9!0$L+coCOl~bN~ zw4BLD5^*1`A)!>xaq$F#%ukwFKFkx>l-Q2%hV_st1aXk~2C}aq1)Gt%nq}ZI4c+>$ zVN-5><|Led^tPyWGO3s%+TG_H#14#fJA+`(c_XKfX)LCUz-=&sxH5tSODZEs;W(RQ z^M24$gAqK4VV5s0)LuPzGyKn9S~~M$B&yB5zIf&L${LnlK36;S?#k@DFaQMLB+o6A?v*4}zM>J;Hae3SZpn=(4)+oPEftp4Y(%$mc^tR!{rpzj-a-Xr>H;(L&R!AS=Z>zZRc zRJ11sBGLCqq!zi2O5dkU*{VA0wF4(496$=lG82Gb0x;aHw*r9c6&?Xl{D1~p8)I#t z#~|2E1JM;zM~-3W3Om;g$N5)pQc#<8g4`PK_AE*`-GLPBM|L~VfH@r&VCn=j9L_oB0tk*?a<-|*))RLpQDRdQE3n6i5PNmVWIZ3`K2qTfSILp zFD)*-zX}wE5zVDLC0PR_G8-vRK7J?q0BZv>BVlqqan!%osCf9DFCJ@1ev@tm0H-ELn+>bRI-| z^g9)M+-0e%U2!lnt}EqBwN&gn{k9HIH}yZ?U+8CE|Kt68`wvKMolihl0MULxsSnwU z*wdjTk$zR1*F>VC7qQ}J;m90?!Qki&se9yr$8rI9u3SKcF?6pqvPT-=NE{~P+u%_1 z<0Y}ZOru-Kj1#@ubpN`eS9l)UmiIxd!S^=ox#2z z^&RSydScE0D3d?c2p`-k5vFjT#3LgmuT9XVrCIbRr3)$^C07{!m!B&Nf}wOWr4lbP z<-!cOV!Z`RBMj1sATtUFlF*ZmD>Et~0)h9IE8@1b2xK=YLoxkPj2?6#Bl~XF`}7WY zYbm4-x@g*ORmg!?ZrbCDG*ky|-OZTeuVmUv9Y_zKo&Y zaFT3Uhc$ntCvaR6o?e#qzrX` zOS|ZYJW#QdcmZ9$=vs+GP8^`Fl_eBzZ~r%({km(Ug9Q-f%p0&NgnU%OU2B0Kl>0<= zU<&T zln^p=&o76iGD7w25uTlZI8hI!ea6W*W{|5l7lOj{YZgcM(;X-8;UGwo8S zfPl#tWEbh?dhqH+Jsa-p_M50cJcIT!o(ufQ$rR);$V5;+3>P}fgFu=Hm{rs(I7DKU zvN6?ZWaoh=qv+Ctw~YW!-KT3FfY^m*AfKB^#OB8+bUH!4y3JKfmD+#{CIR(1&b2 zp)~lN|FoK&zx8eq)Ox^Bj@R(OozZ^j-W3y+DT2jL!Xx?IVL|tcbP&-X*b#IK2)aWf zU%2xq{-cQgc^L{3Qw2l#bAyW*H6TkPH=F+cfNLGYLHc>b1jL}y-M??&2*voKo7z&u ztk_!=s6<+qRv!NxM8~FRe=A8yeadTAPLQHTwV4KeuliEb-4;3a+vy*jfWj(Qa}gG# i@axc9jdp{A1p0NE38yQz*-qH)dSVMw7t&o_`u_mDi!IIo literal 0 HcmV?d00001 diff --git a/agent_runtime/__pycache__/write_tools.cpython-310.pyc b/agent_runtime/__pycache__/write_tools.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9058d6900b28ccb8af72f6ff87ab515cc1ef1ff0 GIT binary patch literal 2629 zcmZuz?Qa~#8K0SbxxMq9ZPMTd6j7J9LWhV&iz-2}q7n$S5;8P4b$be0U2f*gI(xh4 z*|}V7&Q*f##6a9YB_hCydX}1~L24pO)Ih__|1lp+)^oRshTFL*-6%2eAgSJi9qbY6*qI4U#k9jGQ^B922{l42@I zl#sr{rX{Ow?Mk6qGOAHr-@EYLhVm?wWH(fta<)i1qzj+3MxP@KXwn|LYZ2=$8plO@ zp_+mPuNLNMhjz&tkt}<@%VyX(xlGW*M9H9enQ1oixV8f&8q$Mxrn>cugTFfbyS9j) z82RPCrw;5x|9{gua=1DrBIV0gq=JSx+?KG)d`vRZoYjTV5I(OKH=6r#qXb>rOXi0C zq!|XvM+?cwXc*V3VX_}ywkF@APaAh|R+&n7%ZbdL5;S#j`yK#^Z1dez-Usg^HK=Z1 zcSY}E=vJglF@kGso+!pGZZ8sLbs&luNfS$Ftg>tJ&~GOWK6Bv3gZ}g5{-Kv%ni$t) zM}!f136`HibrK_7nx~2=I?cLPhpxePU3-oEo=`HwIDtshv9cpE5}L({wwr(sT@c5c z)rhweA$3lQBo2=WiIBgO0BJruKML`N$#Q=YjUp)6iHFJDBYtm{pQS*?vDzNmtyN`P zB7U?xC6tJcX-Bo1p)f&%i1(Rg{;_J<68Bo({e2eDMdL(~4pE69(}m1|dJu{U*m9s6 zhH=}E;d{)$U73OTB@WQsiki|Ka;L}lAgV~v3hO&&ll6>)%3={*O$+2 zUOKh0dM?`ycF=BoWLngec9BETRBIR5+E~4^-aWl><3f7%vutxgikTGRi62VrsB!XENnlf$*sREH` zPl}mVAO+WMvns2NL?Sh6h_j@AMEXLuj8ze3m`Rf zk1mtxzUd;*!(CWml&I1&;e9BmE7sZ4yxVcR{T6)ddhz}v z6yCo~aL-Agl}*&Z?CX@w=O~#g@d5BRw?MN|7iM!9`BQGZ_iWQP;A8AgFeT<|2f2}6 zPhgxLgvyZHo2SdYc-^72`~eID*8775ND@|6a12EdQinm-HnQq6)vl^apxqr2lbHA) zL+SB&51ISfU5Fh4VGOqkB5X*)--+caIhUlB0uH3^I@p&uffa-fPD_|2=E1n zisK;jetcBZ_MFpF^?gr5=sXeCVvfWa221}rzcjqQ{7(ATUtx8|gY^18Z+>+l{m1*8 zr#~5P4Zx%xhyV8R=CzA34d}kd2bBrxv;K4vM}9q)jjHk?=82J8BYauLhhVvU467eP zr7g^g5ihG6&1!o(4kAM!a9BHyqdbt>298DAF&d`x2anf;;cnT}b|S*Md;~XgkG73D zq^EX_nChGnKwu=TYuZ!QN71y=a~m^^=t?l&1-h3GQr8-yyJ>+kHVCaE-TPgU_0tEb zcY=L4w`(Ur#>#o^`F>Pw2;bKQ-*3dc72h3!G3GIk+TXgid+VXR#PRY#cCy$Qd`!1`Eewui2b~L= z2e~x{_5CyO7vY=#h7tVjNk)#q|BytkNjwaC9^ub`IhQ1c$E3&D`924=H=2F(emv`Y SUV0F^A_Q=V4Os str: + params_dict = self._parse_code_params(params) + code = params_dict['code'] + timeout_sec = int(params_dict.get('timeout_sec', DEFAULT_TIMEOUT)) + RUN_DIR.mkdir(parents=True, exist_ok=True) + + with tempfile.NamedTemporaryFile(mode='w', suffix='.py', dir=RUN_DIR, delete=False, encoding='utf-8') as fp: + fp.write(code) + script_path = fp.name + + completed = subprocess.run( + [sys.executable, script_path], + text=True, + capture_output=True, + timeout=timeout_sec, + check=False, + ) + payload = { + 'script_path': script_path, + 'returncode': completed.returncode, + 'stdout': completed.stdout, + 'stderr': completed.stderr, + } + return json.dumps(payload, ensure_ascii=False, indent=2) + + def _parse_code_params(self, params: Union[str, dict]) -> dict: + if isinstance(params, dict): + if 'code' not in params: + raise ValueError('code 字段缺失') + return params + try: + parsed = json.loads(params) + if isinstance(parsed, dict) and 'code' in parsed: + return parsed + except json.JSONDecodeError: + pass + code = extract_code(params) + if not code.strip(): + raise ValueError('未检测到可执行代码') + return {'code': code} diff --git a/agent_runtime/image_source_map.py b/agent_runtime/image_source_map.py new file mode 100644 index 0000000..670a83e --- /dev/null +++ b/agent_runtime/image_source_map.py @@ -0,0 +1,32 @@ +import threading +from pathlib import Path +from typing import Dict + +_MAP_LOCK = threading.Lock() +_SAFE_TO_ORIGINAL: Dict[str, str] = {} +MAX_RECORDS = 2048 + + +def _normalize_path(path_or_uri: str) -> str: + raw = path_or_uri.strip() + if raw.startswith('file://'): + raw = raw[len('file://'):] + return str(Path(raw).expanduser().resolve()) + + +def register_safe_image(safe_path: str, original_path: str) -> None: + safe_abs = _normalize_path(safe_path) + original_abs = _normalize_path(original_path) + with _MAP_LOCK: + _SAFE_TO_ORIGINAL[safe_abs] = original_abs + if len(_SAFE_TO_ORIGINAL) <= MAX_RECORDS: + return + overflow = len(_SAFE_TO_ORIGINAL) - MAX_RECORDS + for key in list(_SAFE_TO_ORIGINAL.keys())[:overflow]: + del _SAFE_TO_ORIGINAL[key] + + +def resolve_original_image(path_or_uri: str) -> str: + safe_abs = _normalize_path(path_or_uri) + with _MAP_LOCK: + return _SAFE_TO_ORIGINAL.get(safe_abs, safe_abs) diff --git a/agent_runtime/image_zoom_tool.py b/agent_runtime/image_zoom_tool.py new file mode 100644 index 0000000..59c007a --- /dev/null +++ b/agent_runtime/image_zoom_tool.py @@ -0,0 +1,185 @@ +import math +import os +import uuid +import base64 +from io import BytesIO +from pathlib import Path +from typing import List, Tuple, Union + +import requests +from PIL import Image + +from qwen_agent.llm.schema import ContentItem +from qwen_agent.log import logger +from qwen_agent.tools.base import BaseToolWithFileAccess, register_tool +from qwen_agent.utils.utils import extract_images_from_messages + +from .image_source_map import resolve_original_image + +MAX_IMAGE_PIXELS = int(os.getenv('SAFE_MAX_IMAGE_PIXELS', str(4 * 1024 * 1024))) +MAX_IMAGE_SIDE = int(os.getenv('SAFE_MAX_IMAGE_SIDE', '3072')) +MIN_IMAGE_SIDE = int(os.getenv('SAFE_MIN_IMAGE_SIDE', '28')) +MIN_BBOX_SIDE = 32 +JPEG_QUALITY = int(os.getenv('SAFE_JPEG_QUALITY', '90')) +RESAMPLE_LANCZOS = getattr(getattr(Image, 'Resampling', Image), 'LANCZOS') +HTTP_TIMEOUT_SEC = 30 + + +def _normalize_local_path(path_or_uri: str) -> str: + raw = path_or_uri.strip() + if raw.startswith('file://'): + raw = raw[len('file://'):] + return str(Path(raw).expanduser().resolve()) + + +def _is_image_data_uri(image_ref: str) -> bool: + return image_ref.strip().lower().startswith('data:image') + + +def _load_data_uri_image(image_ref: str) -> Image.Image: + try: + header, encoded = image_ref.split(',', 1) + except ValueError as exc: + raise ValueError('data URI 格式错误') from exc + if ';base64' not in header.lower(): + raise ValueError('仅支持 base64 图片 data URI') + decoded = base64.b64decode(encoded) + return Image.open(BytesIO(decoded)).convert('RGB') + + +def _resolve_image_reference(image_ref: str) -> str: + if _is_image_data_uri(image_ref): + return image_ref + if image_ref.startswith('http://') or image_ref.startswith('https://'): + return image_ref + return resolve_original_image(image_ref) + + +def _load_image(image_ref: str, work_dir: str) -> Image.Image: + if _is_image_data_uri(image_ref): + return _load_data_uri_image(image_ref) + if image_ref.startswith('http://') or image_ref.startswith('https://'): + response = requests.get(image_ref, timeout=HTTP_TIMEOUT_SEC) + response.raise_for_status() + return Image.open(BytesIO(response.content)).convert('RGB') + + local = _normalize_local_path(image_ref) + if os.path.exists(local): + return Image.open(local).convert('RGB') + + fallback = os.path.join(work_dir, image_ref) + return Image.open(fallback).convert('RGB') + + +def _ensure_min_bbox( + left: float, + top: float, + right: float, + bottom: float, + img_w: int, + img_h: int, +) -> Tuple[int, int, int, int]: + width = max(1.0, right - left) + height = max(1.0, bottom - top) + if width >= MIN_BBOX_SIDE and height >= MIN_BBOX_SIDE: + return int(left), int(top), int(right), int(bottom) + + scale = MIN_BBOX_SIDE / min(width, height) + half_w = width * scale * 0.5 + half_h = height * scale * 0.5 + center_x = (left + right) * 0.5 + center_y = (top + bottom) * 0.5 + + new_left = max(0, int(math.floor(center_x - half_w))) + new_top = max(0, int(math.floor(center_y - half_h))) + new_right = min(img_w, int(math.ceil(center_x + half_w))) + new_bottom = min(img_h, int(math.ceil(center_y + half_h))) + return new_left, new_top, new_right, new_bottom + + +def _relative_bbox_to_absolute(bbox_2d: list, img_w: int, img_h: int) -> Tuple[int, int, int, int]: + rel_x1, rel_y1, rel_x2, rel_y2 = [float(v) for v in bbox_2d] + abs_x1 = max(0.0, min(img_w, rel_x1 / 1000.0 * img_w)) + abs_y1 = max(0.0, min(img_h, rel_y1 / 1000.0 * img_h)) + abs_x2 = max(0.0, min(img_w, rel_x2 / 1000.0 * img_w)) + abs_y2 = max(0.0, min(img_h, rel_y2 / 1000.0 * img_h)) + left = min(abs_x1, abs_x2) + top = min(abs_y1, abs_y2) + right = max(abs_x1, abs_x2) + bottom = max(abs_y1, abs_y2) + return _ensure_min_bbox(left, top, right, bottom, img_w, img_h) + + +def _scale_size(width: int, height: int) -> Tuple[int, int]: + pixel_count = width * height + if pixel_count <= 0: + raise ValueError(f'无效图片尺寸: {width}x{height}') + scale_by_pixels = math.sqrt(MAX_IMAGE_PIXELS / pixel_count) if pixel_count > MAX_IMAGE_PIXELS else 1.0 + longest_side = max(width, height) + scale_by_side = MAX_IMAGE_SIDE / longest_side if longest_side > MAX_IMAGE_SIDE else 1.0 + scale = min(1.0, scale_by_pixels, scale_by_side) + return ( + max(MIN_IMAGE_SIDE, int(width * scale)), + max(MIN_IMAGE_SIDE, int(height * scale)), + ) + + +def _resize_crop_if_needed(image: Image.Image) -> Image.Image: + width, height = image.size + new_w, new_h = _scale_size(width, height) + if (new_w, new_h) == (width, height): + return image + return image.resize((new_w, new_h), RESAMPLE_LANCZOS) + + +@register_tool('image_zoom_in_tool', allow_overwrite=True) +class OriginalImageZoomTool(BaseToolWithFileAccess): + description = '基于原图裁切指定区域,并在裁切后按安全阈值缩放输出。' + parameters = { + 'type': 'object', + 'properties': { + 'bbox_2d': { + 'type': 'array', + 'items': { + 'type': 'number' + }, + 'minItems': 4, + 'maxItems': 4, + 'description': '裁切框,格式 [x1,y1,x2,y2],坐标范围 0 到 1000' + }, + 'label': { + 'type': 'string', + 'description': '目标对象标签' + }, + 'img_idx': { + 'type': 'number', + 'description': '图片索引,从 0 开始' + } + }, + 'required': ['bbox_2d', 'label', 'img_idx'] + } + + def call(self, params: Union[str, dict], **kwargs) -> List[ContentItem]: + params = self._verify_json_format_args(params) + images = extract_images_from_messages(kwargs.get('messages', [])) + if not images: + return [ContentItem(text='Error: 未找到输入图片')] + + img_idx = int(params['img_idx']) + if img_idx < 0 or img_idx >= len(images): + return [ContentItem(text=f'Error: img_idx 越界,当前图片数量 {len(images)}')] + + os.makedirs(self.work_dir, exist_ok=True) + try: + image_ref = images[img_idx] + source_ref = _resolve_image_reference(image_ref) + image = _load_image(source_ref, self.work_dir) + bbox = _relative_bbox_to_absolute(params['bbox_2d'], *image.size) + cropped = image.crop(bbox) + resized = _resize_crop_if_needed(cropped) + output_path = os.path.abspath(os.path.join(self.work_dir, f'{uuid.uuid4()}.jpg')) + resized.save(output_path, format='JPEG', quality=JPEG_QUALITY, optimize=True) + return [ContentItem(image=output_path)] + except Exception as exc: + logger.warning(str(exc)) + return [ContentItem(text=f'Tool Execution Error {exc}')] diff --git a/agent_runtime/memory_tools.py b/agent_runtime/memory_tools.py new file mode 100644 index 0000000..fde7248 --- /dev/null +++ b/agent_runtime/memory_tools.py @@ -0,0 +1,74 @@ +import json +import os +from pathlib import Path +from typing import Union +from qwen_agent.tools.base import BaseTool, register_tool + +# 从环境变量读取,如果读不到则默认为当前目录下的 memory.json +# 使用 .resolve() 自动处理相对路径转绝对路径的逻辑 +MEMORY_FILE = Path(os.getenv('MEMORY_FILE_PATH', './memory.json')).resolve() + +def _load_memory() -> list: + """内部函数:安全加载记忆并强制转换为列表格式""" + if not MEMORY_FILE.exists(): + return [] + try: + content = MEMORY_FILE.read_text(encoding='utf-8').strip() + if not content: + return [] + data = json.loads(content) + # 核心修复:如果读到的是字典或其他格式,强制转为列表 + if isinstance(data, list): + return data + return [] + except Exception: + return [] + +def _save_memory(memories: list): + """内部函数:安全保存""" + try: + MEMORY_FILE.parent.mkdir(parents=True, exist_ok=True) + MEMORY_FILE.write_text(json.dumps(memories, ensure_ascii=False, indent=2), encoding='utf-8') + except Exception as e: + print(f"写入记忆文件失败: {e}") + +@register_tool('manage_memory', allow_overwrite=True) +class MemoryTool(BaseTool): + description = '长期记忆管理工具。支持 add (添加), list (查看), delete (删除索引)。' + parameters = { + 'type': 'object', + 'properties': { + 'operation': {'type': 'string', 'description': '操作类型: add|list|delete'}, + 'content': {'type': 'string', 'description': '记忆内容(仅add模式)'}, + 'index': {'type': 'integer', 'description': '索引号(仅delete模式)'} + }, + 'required': ['operation'], + } + + def call(self, params: Union[str, dict], **kwargs) -> str: + params = self._verify_json_format_args(params) + op = params['operation'].lower() + memories = _load_memory() + + if op == 'add': + content = params.get('content', '').strip() + if not content: + return "错误:内容不能为空。" + memories.append(content) + _save_memory(memories) + return f"✅ 成功存入:『{content}』" + + elif op == 'list': + if not memories: + return "目前没有任何长期记忆。" + return "记忆列表:\n" + "\n".join([f"[{i}] {m}" for i, m in enumerate(memories)]) + + elif op == 'delete': + idx = params.get('index') + if idx is None or not (0 <= idx < len(memories)): + return f"错误:索引 {idx} 无效。" + removed = memories.pop(idx) + _save_memory(memories) + return f"🗑️ 已删除:『{removed}』" + + return f"不支持的操作: {op}" \ No newline at end of file diff --git a/agent_runtime/readonly_tools.py b/agent_runtime/readonly_tools.py new file mode 100644 index 0000000..15e68b7 --- /dev/null +++ b/agent_runtime/readonly_tools.py @@ -0,0 +1,107 @@ +import json +import os +from pathlib import Path +from typing import Iterable, Union + +from qwen_agent.tools.base import BaseTool, register_tool + +DEFAULT_MAX_READ_BYTES = 512 * 1024 + + +def _project_root() -> Path: + return Path(__file__).resolve().parents[1] + + +def _split_root_items(raw: str) -> list[str]: + if not raw.strip(): + return [] + return [item.strip() for item in raw.split(os.pathsep) if item.strip()] + + +def _resolve_roots() -> tuple[Path, ...]: + roots_value = os.getenv('READONLY_FS_ROOTS', '') + root_items = _split_root_items(roots_value) + if not root_items: + legacy_root = os.getenv('READONLY_FS_ROOT', '') + if legacy_root.strip(): + root_items = [legacy_root.strip()] + if not root_items: + root_items = [str(_project_root())] + return tuple(Path(os.path.expanduser(item)).resolve() for item in root_items) + + +def _resolve_target(raw_path: str) -> Path: + return Path(os.path.expanduser(raw_path)).resolve() + + +def _is_within_root(target: Path, root: Path) -> bool: + try: + target.relative_to(root) + return True + except ValueError: + return False + + +def _ensure_within_roots(target: Path, roots: Iterable[Path]) -> None: + allowed_roots = tuple(roots) + if any(_is_within_root(target, root) for root in allowed_roots): + return + allowed_text = ', '.join(str(root) for root in allowed_roots) + raise PermissionError(f'只允许访问这些根目录内的路径: {allowed_text};拒绝: {target}') + + +@register_tool('filesystem', allow_overwrite=True) +class ReadOnlyFilesystemTool(BaseTool): + description = '只读文件系统工具,支持 list 和 read 两种操作。' + parameters = { + 'type': 'object', + 'properties': { + 'operation': { + 'type': 'string', + 'description': '仅支持 list|read' + }, + 'path': { + 'type': 'string', + 'description': '目标路径' + }, + }, + 'required': ['operation', 'path'], + } + + def call(self, params: Union[str, dict], **kwargs) -> str: + params = self._verify_json_format_args(params) + operation = str(params['operation']).strip().lower() + if operation not in {'list', 'read'}: + raise PermissionError(f'只读策略已启用,禁止 operation={operation}') + + roots = _resolve_roots() + target = _resolve_target(str(params['path'])) + _ensure_within_roots(target, roots) + if operation == 'list': + return self._list_path(target) + return self._read_file(target) + + def _list_path(self, target: Path) -> str: + if not target.exists(): + raise FileNotFoundError(f'路径不存在: {target}') + if target.is_file(): + stat = target.stat() + payload = {'type': 'file', 'path': str(target), 'size': stat.st_size} + return json.dumps(payload, ensure_ascii=False) + + items = [] + for child in sorted(target.iterdir()): + item_type = 'dir' if child.is_dir() else 'file' + size = child.stat().st_size if child.is_file() else None + items.append({'name': child.name, 'type': item_type, 'size': size}) + payload = {'type': 'dir', 'path': str(target), 'items': items} + return json.dumps(payload, ensure_ascii=False, indent=2) + + def _read_file(self, target: Path) -> str: + if not target.exists() or not target.is_file(): + raise FileNotFoundError(f'文件不存在: {target}') + limit = int(os.getenv('READONLY_FS_MAX_READ_BYTES', str(DEFAULT_MAX_READ_BYTES))) + size = target.stat().st_size + if size > limit: + raise ValueError(f'文件过大: {size} bytes,超过读取上限 {limit} bytes') + return target.read_text(encoding='utf-8') diff --git a/agent_runtime/search_tools.py b/agent_runtime/search_tools.py new file mode 100644 index 0000000..1d5d756 --- /dev/null +++ b/agent_runtime/search_tools.py @@ -0,0 +1,135 @@ +import os +import re +from typing import List, Union + +from ddgs import DDGS + +from qwen_agent.llm.schema import ContentItem +from qwen_agent.tools.base import BaseTool, register_tool + +DEFAULT_RESULTS = 6 +DEFAULT_REGION = os.getenv('WEB_SEARCH_REGION', 'wt-wt') +DEFAULT_SAFESEARCH = os.getenv('WEB_SEARCH_SAFESEARCH', 'on') +QUERY_SUFFIX_PATTERN = re.compile( + r'(是谁|是什么|是啥|什么意思|介绍一下|请介绍|是谁啊|是谁呀|是啥啊|是啥呀|吗|嘛|呢|么)$' +) + + +def _normalize_query(query: str) -> str: + compact = query.strip() + compact = compact.replace('?', '?').replace('!', '!').replace('。', '.') + compact = compact.strip(' ?!.,;:,。?!;:') + compact = compact.removeprefix('请问').strip() + compact = QUERY_SUFFIX_PATTERN.sub('', compact).strip() + compact = compact.strip(' ?!.,;:,。?!;:') + return compact or query.strip() + + +def _clamp_results(value: int) -> int: + if value < 1: + return 1 + if value > 12: + return 12 + return value + + +@register_tool('web_search', allow_overwrite=True) +class LocalWebSearchTool(BaseTool): + description = '搜索互联网并返回标题、链接和摘要。' + parameters = { + 'type': 'object', + 'properties': { + 'query': { + 'type': 'string', + 'description': '搜索关键词' + }, + 'max_results': { + 'type': 'integer', + 'description': '返回条数,建议 1 到 12', + 'default': DEFAULT_RESULTS + } + }, + 'required': ['query'], + } + + def call(self, params: Union[str, dict], **kwargs) -> str: + params = self._verify_json_format_args(params) + query = _normalize_query(params['query']) + if not query: + raise ValueError('query 不能为空') + max_results = _clamp_results(int(params.get('max_results', DEFAULT_RESULTS))) + + with DDGS() as ddgs: + results = list( + ddgs.text( + query=query, + max_results=max_results, + region=DEFAULT_REGION, + safesearch=DEFAULT_SAFESEARCH, + ) + ) + + if not results: + return f'未检索到结果,query={query}' + + lines = [] + for idx, item in enumerate(results, start=1): + title = item.get('title', '').strip() + href = item.get('href', '').strip() + body = item.get('body', '').strip() + lines.append(f'[{idx}] {title}\nURL: {href}\n摘要: {body}') + return '\n\n'.join(lines) + + +@register_tool('image_search', allow_overwrite=True) +class LocalImageSearchTool(BaseTool): + description = '按关键词搜索图片并返回图文结果。' + parameters = { + 'type': 'object', + 'properties': { + 'query': { + 'type': 'string', + 'description': '图片搜索关键词' + }, + 'max_results': { + 'type': 'integer', + 'description': '返回条数,建议 1 到 12', + 'default': DEFAULT_RESULTS + } + }, + 'required': ['query'], + } + + def call(self, params: Union[str, dict], **kwargs) -> List[ContentItem]: + params = self._verify_json_format_args(params) + query = _normalize_query(params['query']) + if not query: + raise ValueError('query 不能为空') + max_results = _clamp_results(int(params.get('max_results', DEFAULT_RESULTS))) + + try: + with DDGS() as ddgs: + results = list( + ddgs.images( + query=query, + max_results=max_results, + region=DEFAULT_REGION, + safesearch=DEFAULT_SAFESEARCH, + ) + ) + except Exception as exc: + return [ContentItem(text=f'图片检索失败: {exc}')] + + if not results: + return [ContentItem(text=f'未检索到图片,query={query}')] + + content: List[ContentItem] = [] + for idx, item in enumerate(results, start=1): + title = item.get('title', '').strip() + image_url = item.get('image', '').strip() + page_url = item.get('url', '').strip() + text = f'[{idx}] {title}\n图片: {image_url}\n来源: {page_url}' + content.append(ContentItem(text=text)) + if image_url: + content.append(ContentItem(image=image_url)) + return content diff --git a/agent_runtime/system_tools.py b/agent_runtime/system_tools.py new file mode 100644 index 0000000..ab13232 --- /dev/null +++ b/agent_runtime/system_tools.py @@ -0,0 +1,159 @@ +import json +import os +import shutil +import subprocess +from pathlib import Path +from typing import Union + +from qwen_agent.tools.base import BaseTool, register_tool + +DEFAULT_TIMEOUT = 60 + + +def _ensure_parent(path: Path) -> None: + parent = path.parent + parent.mkdir(parents=True, exist_ok=True) + + +def _build_shell_command(command: str) -> list[str]: + if os.name == 'nt': + return ['powershell.exe', '-NoProfile', '-Command', command] + return ['bash', '-lc', command] + + +@register_tool('filesystem', allow_overwrite=True) +class FilesystemTool(BaseTool): + description = '文件系统工具,支持目录列举、读写文件、创建目录和删除。' + parameters = { + 'type': 'object', + 'properties': { + 'operation': { + 'type': 'string', + 'description': 'list|read|write|append|mkdir|remove' + }, + 'path': { + 'type': 'string', + 'description': '目标路径' + }, + 'content': { + 'type': 'string', + 'description': '写入内容,仅 write 或 append 需要' + } + }, + 'required': ['operation', 'path'], + } + + def call(self, params: Union[str, dict], **kwargs) -> str: + params = self._verify_json_format_args(params) + operation = params['operation'].strip().lower() + target = Path(os.path.expanduser(params['path'])).resolve() + handlers = { + 'list': self._list_path, + 'read': self._read_file, + 'write': self._write_file, + 'append': self._append_file, + 'mkdir': self._mkdir_path, + 'remove': self._remove_path, + } + if operation not in handlers: + raise ValueError(f'不支持的 operation: {operation}') + return handlers[operation](target, params) + + def _list_path(self, target: Path, params: dict) -> str: + if not target.exists(): + raise FileNotFoundError(f'路径不存在: {target}') + if target.is_file(): + stat = target.stat() + return json.dumps({'type': 'file', 'path': str(target), 'size': stat.st_size}, ensure_ascii=False) + + items = [] + for child in sorted(target.iterdir()): + item_type = 'dir' if child.is_dir() else 'file' + size = child.stat().st_size if child.is_file() else None + items.append({'name': child.name, 'type': item_type, 'size': size}) + return json.dumps({'type': 'dir', 'path': str(target), 'items': items}, ensure_ascii=False, indent=2) + + def _read_file(self, target: Path, params: dict) -> str: + if not target.exists() or not target.is_file(): + raise FileNotFoundError(f'文件不存在: {target}') + return target.read_text(encoding='utf-8') + + def _write_file(self, target: Path, params: dict) -> str: + content = params.get('content') + if content is None: + raise ValueError('write 操作必须提供 content') + _ensure_parent(target) + target.write_text(content, encoding='utf-8') + return f'写入成功: {target}' + + def _append_file(self, target: Path, params: dict) -> str: + content = params.get('content') + if content is None: + raise ValueError('append 操作必须提供 content') + _ensure_parent(target) + with target.open('a', encoding='utf-8') as fp: + fp.write(content) + return f'追加成功: {target}' + + def _mkdir_path(self, target: Path, params: dict) -> str: + target.mkdir(parents=True, exist_ok=True) + return f'目录已创建: {target}' + + def _remove_path(self, target: Path, params: dict) -> str: + if not target.exists(): + raise FileNotFoundError(f'路径不存在: {target}') + if target.is_dir(): + shutil.rmtree(target) + else: + target.unlink() + return f'删除成功: {target}' + + +@register_tool('run_command', allow_overwrite=True) +class RunCommandTool(BaseTool): + description = '执行本机命令并返回退出码、标准输出和标准错误。' + parameters = { + 'type': 'object', + 'properties': { + 'command': { + 'type': 'string', + 'description': '待执行命令' + }, + 'cwd': { + 'type': 'string', + 'description': '执行目录' + }, + 'timeout_sec': { + 'type': 'integer', + 'description': '超时时间秒数', + 'default': DEFAULT_TIMEOUT + } + }, + 'required': ['command'], + } + + def call(self, params: Union[str, dict], **kwargs) -> str: + params = self._verify_json_format_args(params) + command = params['command'].strip() + if not command: + raise ValueError('command 不能为空') + timeout_sec = int(params.get('timeout_sec', DEFAULT_TIMEOUT)) + cwd_raw = params.get('cwd') or os.getcwd() + cwd = str(Path(os.path.expanduser(cwd_raw)).resolve()) + + completed = subprocess.run( + _build_shell_command(command), + cwd=cwd, + text=True, + capture_output=True, + timeout=timeout_sec, + check=False, + ) + payload = { + 'command': command, + 'cwd': cwd, + 'returncode': completed.returncode, + 'stdout': completed.stdout, + 'stderr': completed.stderr, + } + return json.dumps(payload, ensure_ascii=False, indent=2) diff --git a/agent_runtime/web_fetch_tool.py b/agent_runtime/web_fetch_tool.py new file mode 100644 index 0000000..56f85ae --- /dev/null +++ b/agent_runtime/web_fetch_tool.py @@ -0,0 +1,104 @@ +import time +import random +from typing import Tuple, Union +import requests +from requests import Response +from requests.exceptions import SSLError, RequestException +from bs4 import BeautifulSoup + +from qwen_agent.tools.base import BaseTool, register_tool + +DEFAULT_MAX_CHARS = 10000 + +# 模拟真实浏览器请求头,防止 GitHub 等网站返回 429 +COMMON_HEADERS = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', + 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', + 'Accept-Encoding': 'gzip, deflate, br', + 'Connection': 'keep-alive' +} + +def _normalize_text(text: str) -> str: + lines = [line.strip() for line in text.splitlines()] + lines = [line for line in lines if line] + return '\n'.join(lines) + +def _fetch_page(url: str, timeout: int = 30, retries: int = 2) -> Tuple[Union[Response, str], bool]: + """带有重试机制和伪装头的抓取函数""" + for i in range(retries + 1): + try: + if i > 0: + time.sleep(2 + random.uniform(1, 2) * i) + + response = requests.get(url, headers=COMMON_HEADERS, timeout=timeout, verify=True) + + if response.status_code == 429: + if i < retries: continue + return f"错误:目标网站限制了请求频率 (429)。请稍后再试,禁止读取本地无关文件。", False + + response.raise_for_status() + return response, False + + except SSLError: + try: + response = requests.get(url, headers=COMMON_HEADERS, timeout=timeout, verify=False) + response.raise_for_status() + return response, True + except Exception as e: + return f"SSL 错误且备选方案失败: {str(e)}", False + except RequestException as e: + if i < retries: continue + return f"网络抓取失败: {str(e)}", False + + return "未知抓取错误", False + +def _extract_page_text(html: str, max_chars: int) -> Tuple[str, str]: + soup = BeautifulSoup(html, 'html.parser') + for tag in soup(['script', 'style', 'noscript', 'header', 'footer', 'nav']): + tag.decompose() + title = soup.title.string.strip() if soup.title and soup.title.string else '无标题' + body_text = _normalize_text(soup.get_text(separator='\n')) + return title, body_text[:max_chars] + +@register_tool('web_fetch', allow_overwrite=True) +class WebFetchTool(BaseTool): + description = '抓取网页正文并返回可读文本。' + parameters = { + 'type': 'object', + 'properties': { + 'url': {'type': 'string', 'description': '网页链接'}, + 'max_chars': {'type': 'integer', 'description': '返回最大字符数', 'default': DEFAULT_MAX_CHARS} + }, + 'required': ['url'], + } + + def call(self, params: Union[str, dict], **kwargs) -> str: + params = self._verify_json_format_args(params) + url = params['url'].strip() + max_chars = int(params.get('max_chars', DEFAULT_MAX_CHARS)) + + result, insecure = _fetch_page(url) + if isinstance(result, str): + return result + + title, body_text = _extract_page_text(result.text, max_chars) + insecure_note = '(注意:使用了非安全连接)\n' if insecure else '' + return f'标题: {title}\n链接: {url}\n{insecure_note}\n{body_text}' + +@register_tool('web_extractor', allow_overwrite=True) +class WebExtractorTool(BaseTool): + description = '提取单个网页正文。' + parameters = { + 'type': 'object', + 'properties': { + 'url': {'type': 'string', 'description': '网页链接'}, + 'max_chars': {'type': 'integer', 'description': '返回最大字符数', 'default': DEFAULT_MAX_CHARS} + }, + 'required': ['url'], + } + + def call(self, params: Union[str, dict], **kwargs) -> str: + # 复用 WebFetchTool 的逻辑,但作为独立的类注册 + fetcher = WebFetchTool(self.cfg) + return fetcher.call(params, **kwargs) \ No newline at end of file diff --git a/agent_runtime/workflow_tools.py b/agent_runtime/workflow_tools.py new file mode 100644 index 0000000..6c5f40f --- /dev/null +++ b/agent_runtime/workflow_tools.py @@ -0,0 +1,170 @@ +import json +import os +import subprocess +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, Union + +from qwen_agent.tools.base import BaseTool, register_tool + +ROOT_DIR = Path(__file__).resolve().parents[1] +DATA_DIR = ROOT_DIR / '.tmp' / 'super_agent_data' +MEMORY_FILE = DATA_DIR / 'memory.json' +TODO_DIR = DATA_DIR / 'todos' +TASK_FILE = DATA_DIR / 'tasks.jsonl' + + +def _build_shell_command(command: str) -> list[str]: + if os.name == 'nt': + return ['powershell.exe', '-NoProfile', '-Command', command] + return ['bash', '-lc', command] + + +def _ensure_data_dirs() -> None: + DATA_DIR.mkdir(parents=True, exist_ok=True) + TODO_DIR.mkdir(parents=True, exist_ok=True) + if not MEMORY_FILE.exists(): + MEMORY_FILE.write_text('{}', encoding='utf-8') + + +def _load_memory() -> Dict[str, Any]: + _ensure_data_dirs() + return json.loads(MEMORY_FILE.read_text(encoding='utf-8')) + + +def _save_memory(data: Dict[str, Any]) -> None: + _ensure_data_dirs() + MEMORY_FILE.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding='utf-8') + + +@register_tool('save_memory', allow_overwrite=True) +class SaveMemoryTool(BaseTool): + description = '保存一条长期记忆,按 key 覆盖写入。' + parameters = { + 'type': 'object', + 'properties': { + 'key': { + 'type': 'string', + 'description': '记忆键名' + }, + 'value': { + 'type': 'string', + 'description': '记忆内容' + } + }, + 'required': ['key', 'value'], + } + + def call(self, params: Union[str, dict], **kwargs) -> str: + params = self._verify_json_format_args(params) + key = params['key'].strip() + if not key: + raise ValueError('key 不能为空') + memory = _load_memory() + memory[key] = params['value'] + _save_memory(memory) + return f'已保存记忆: {key}' + + +@register_tool('read_memory', allow_overwrite=True) +class ReadMemoryTool(BaseTool): + description = '读取长期记忆,支持读取单个 key 或全部。' + parameters = { + 'type': 'object', + 'properties': { + 'key': { + 'type': 'string', + 'description': '可选,不传则返回全部记忆' + } + }, + 'required': [], + } + + def call(self, params: Union[str, dict], **kwargs) -> str: + params = self._verify_json_format_args(params) + memory = _load_memory() + key = params.get('key') + if key: + return json.dumps({key: memory.get(key)}, ensure_ascii=False, indent=2) + return json.dumps(memory, ensure_ascii=False, indent=2) + + +@register_tool('todo_write', allow_overwrite=True) +class TodoWriteTool(BaseTool): + description = '写入任务清单文件。' + parameters = { + 'type': 'object', + 'properties': { + 'title': { + 'type': 'string', + 'description': '清单标题' + }, + 'items': { + 'type': 'array', + 'items': { + 'type': 'string' + }, + 'description': '任务项数组' + } + }, + 'required': ['title', 'items'], + } + + def call(self, params: Union[str, dict], **kwargs) -> str: + params = self._verify_json_format_args(params) + _ensure_data_dirs() + ts = datetime.now().strftime('%Y%m%d_%H%M%S') + safe_title = ''.join(ch if ch.isalnum() else '_' for ch in params['title'])[:40] + todo_path = TODO_DIR / f'{ts}_{safe_title}.md' + lines = [f'# {params["title"]}', ''] + for item in params['items']: + lines.append(f'- [ ] {item}') + todo_path.write_text('\n'.join(lines), encoding='utf-8') + return f'任务清单已写入: {todo_path}' + + +@register_tool('task', allow_overwrite=True) +class TaskTool(BaseTool): + description = '登记任务并可选执行命令,返回执行结果。' + parameters = { + 'type': 'object', + 'properties': { + 'task_name': { + 'type': 'string', + 'description': '任务名称' + }, + 'notes': { + 'type': 'string', + 'description': '任务说明' + }, + 'command': { + 'type': 'string', + 'description': '可选,执行命令' + } + }, + 'required': ['task_name'], + } + + def call(self, params: Union[str, dict], **kwargs) -> str: + params = self._verify_json_format_args(params) + _ensure_data_dirs() + event = { + 'time': datetime.now().isoformat(timespec='seconds'), + 'task_name': params['task_name'], + 'notes': params.get('notes', ''), + 'command': params.get('command', ''), + } + result = None + command = params.get('command') + if command: + run = subprocess.run(_build_shell_command(command), text=True, capture_output=True, check=False) + result = { + 'returncode': run.returncode, + 'stdout': run.stdout, + 'stderr': run.stderr, + } + event['result'] = result + with TASK_FILE.open('a', encoding='utf-8') as fp: + fp.write(json.dumps(event, ensure_ascii=False) + '\n') + payload = {'saved_to': str(TASK_FILE), 'task': event, 'command_result': result} + return json.dumps(payload, ensure_ascii=False, indent=2) diff --git a/agent_runtime/write_tools.py b/agent_runtime/write_tools.py new file mode 100644 index 0000000..7e8b50b --- /dev/null +++ b/agent_runtime/write_tools.py @@ -0,0 +1,43 @@ +import os +import json +from pathlib import Path +from typing import Union +from qwen_agent.tools.base import BaseTool, register_tool + +def _split_items(raw: str) -> list[str]: + return [item.strip() for item in raw.split(';') if item.strip()] + +def _resolve_write_roots() -> tuple[Path, ...]: + roots_value = os.getenv('WRITEABLE_FS_ROOTS', '') + return tuple(Path(os.path.expanduser(item)).resolve() for item in _split_items(roots_value)) + +@register_tool('write_file', allow_overwrite=True) +class WriteFileTool(BaseTool): + description = '文件写入工具。只要路径在白名单内,即可直接创建或覆盖文件。' + parameters = { + 'type': 'object', + 'properties': { + 'path': {'type': 'string', 'description': '目标绝对路径'}, + 'content': {'type': 'string', 'description': '要写入的完整内容'} + }, + 'required': ['path', 'content'], + } + + def call(self, params: Union[str, dict], **kwargs) -> str: + params = self._verify_json_format_args(params) + target = Path(os.path.expanduser(str(params['path']))).resolve() + content = str(params.get('content', '')) + + # 核心防线:检查是否在白名单内 + roots = _resolve_write_roots() + if not any(target.is_relative_to(root) for root in roots): + allowed = ", ".join(str(r) for r in roots) + return f"拒绝写入:路径不在白名单内。允许范围:{allowed}" + + try: + target.parent.mkdir(parents=True, exist_ok=True) + with open(target, 'w', encoding='utf-8') as f: + f.write(content) + return f"✅ 成功:内容已保存至 {target}" + except Exception as e: + return f"写入失败:{str(e)}" \ No newline at end of file diff --git a/bootstrap.bat b/bootstrap.bat new file mode 100644 index 0000000..4b08a1d --- /dev/null +++ b/bootstrap.bat @@ -0,0 +1,13 @@ +@echo off +setlocal +set SCRIPT_DIR=%~dp0 +powershell.exe -NoProfile -ExecutionPolicy Bypass -File "%SCRIPT_DIR%install.ps1" %* +if errorlevel 1 ( + echo. + echo [bootstrap] Install failed. + exit /b 1 +) +echo. +echo [bootstrap] Install completed. +echo [bootstrap] Start command: .\start_8080_toolhub_stack.cmd start +exit /b 0 diff --git a/bootstrap_q8.bat b/bootstrap_q8.bat new file mode 100644 index 0000000..581fc9e --- /dev/null +++ b/bootstrap_q8.bat @@ -0,0 +1,13 @@ +@echo off +setlocal +set SCRIPT_DIR=%~dp0 +powershell.exe -NoProfile -ExecutionPolicy Bypass -File "%SCRIPT_DIR%install_q8.ps1" %* +if errorlevel 1 ( + echo. + echo [bootstrap_q8] Q8 install failed. + exit /b 1 +) +echo. +echo [bootstrap_q8] Q8 install started or completed. +echo [bootstrap_q8] Start command: .\start_8080_toolhub_stack.cmd start +exit /b 0 diff --git a/compose.yml b/compose.yml new file mode 100644 index 0000000..55b52f7 --- /dev/null +++ b/compose.yml @@ -0,0 +1,56 @@ +services: + gateway: + build: + context: . + dockerfile: docker/gateway/Dockerfile + restart: unless-stopped + environment: + GATEWAY_HOST: 0.0.0.0 + GATEWAY_PORT: 8080 + BACKEND_BASE: http://backend:8081 + MODEL_SERVER: http://backend:8081/v1 + BACKEND_WAIT_HINT: docker compose logs -f backend + ACCESS_URLS: http://127.0.0.1:${GATEWAY_PORT:-8080} + READONLY_FS_ROOTS: /workspace + ports: + - "${GATEWAY_PORT:-8080}:8080" + volumes: + - .:/workspace:ro + depends_on: + - backend + healthcheck: + test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:8080/gateway/health >/dev/null || exit 1"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 10s + + backend: + build: + context: . + dockerfile: docker/backend/Dockerfile + restart: unless-stopped + environment: + HOST: 0.0.0.0 + PORT: 8081 + THINK_MODE: ${THINK_MODE:-think-on} + CTX_SIZE: ${CTX_SIZE:-16384} + IMAGE_MIN_TOKENS: ${IMAGE_MIN_TOKENS:-256} + IMAGE_MAX_TOKENS: ${IMAGE_MAX_TOKENS:-1024} + MMPROJ_OFFLOAD: ${MMPROJ_OFFLOAD:-off} + MODEL_PATH: /models/model.gguf + MMPROJ_PATH: /models/mmproj.gguf + MODEL_GGUF_URL: ${MODEL_GGUF_URL:-https://huggingface.co/lmstudio-community/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf} + MODEL_MMPROJ_URL: ${MODEL_MMPROJ_URL:-https://huggingface.co/lmstudio-community/Qwen3.5-9B-GGUF/resolve/main/mmproj-Qwen3.5-9B-BF16.gguf} + MODEL_GGUF_SHA256: ${MODEL_GGUF_SHA256:-} + MODEL_MMPROJ_SHA256: ${MODEL_MMPROJ_SHA256:-} + expose: + - "8081" + volumes: + - toolhub-models:/models + gpus: all + healthcheck: + test: ["NONE"] + +volumes: + toolhub-models: diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile new file mode 100644 index 0000000..2da122a --- /dev/null +++ b/docker/backend/Dockerfile @@ -0,0 +1,15 @@ +FROM ghcr.io/ggml-org/llama.cpp:server-cuda + +USER root +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y --no-install-recommends curl ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +COPY docker/backend/entrypoint.sh /usr/local/bin/toolhub-backend-entrypoint.sh +COPY docker/backend/entrypoint_helpers.sh /usr/local/bin/toolhub-backend-helpers.sh + +RUN chmod +x /usr/local/bin/toolhub-backend-entrypoint.sh /usr/local/bin/toolhub-backend-helpers.sh + +ENTRYPOINT ["/usr/local/bin/toolhub-backend-entrypoint.sh"] diff --git a/docker/backend/entrypoint.sh b/docker/backend/entrypoint.sh new file mode 100644 index 0000000..042d6ac --- /dev/null +++ b/docker/backend/entrypoint.sh @@ -0,0 +1,176 @@ +#!/usr/bin/env bash +set -euo pipefail + +DEFAULT_GGUF_URL="https://huggingface.co/lmstudio-community/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf" +DEFAULT_MMPROJ_URL="https://huggingface.co/lmstudio-community/Qwen3.5-9B-GGUF/resolve/main/mmproj-Qwen3.5-9B-BF16.gguf" +BACKEND_READY_TIMEOUT_SEC=180 +RECENT_LOG_LINE_COUNT=80 + +. /usr/local/bin/toolhub-backend-helpers.sh + +log_step() { + printf '[toolhub-backend] %s\n' "$1" +} + +log_stage() { + log_step "$1" +} + +resolve_llama_server_bin() { + local candidate="" + if candidate="$(command -v llama-server 2>/dev/null)"; then + printf '%s\n' "$candidate" + return + fi + + candidate="/app/llama-server" + if [[ -x "$candidate" ]]; then + printf '%s\n' "$candidate" + return + fi + + printf '未找到 llama-server,可执行文件既不在 PATH 中,也不在 /app/llama-server\n' >&2 + exit 1 +} + +require_positive_integer() { + local key="$1" + local value="$2" + if [[ ! "$value" =~ ^[0-9]+$ ]] || [[ "$value" -le 0 ]]; then + printf '%s 必须是正整数,收到: %s\n' "$key" "$value" >&2 + exit 1 + fi +} + +verify_sha256() { + local path="$1" + local expected="$2" + if [[ -z "$expected" ]]; then + return + fi + + local actual + actual="$(sha256sum "$path" | awk '{print $1}')" + if [[ "${actual,,}" != "${expected,,}" ]]; then + printf 'SHA256 校验失败: %s\n' "$path" >&2 + printf '期望: %s\n' "$expected" >&2 + printf '实际: %s\n' "$actual" >&2 + exit 1 + fi +} + +resolve_runtime_profile() { + case "${THINK_MODE:-think-on}" in + think-on) + REASONING_BUDGET="-1" + MAX_TOKENS="-1" + ;; + think-off) + REASONING_BUDGET="0" + MAX_TOKENS="2048" + ;; + *) + printf '不支持的 THINK_MODE: %s\n' "${THINK_MODE:-}" >&2 + exit 1 + ;; + esac +} + +main() { + local host_addr="${HOST:-0.0.0.0}" + local port_num="${PORT:-8081}" + local model_path="${MODEL_PATH:-/models/model.gguf}" + local mmproj_path="${MMPROJ_PATH:-/models/mmproj.gguf}" + local gguf_url="${MODEL_GGUF_URL:-$DEFAULT_GGUF_URL}" + local mmproj_url="${MODEL_MMPROJ_URL:-$DEFAULT_MMPROJ_URL}" + local ctx_size="${CTX_SIZE:-16384}" + local image_min_tokens="${IMAGE_MIN_TOKENS:-256}" + local image_max_tokens="${IMAGE_MAX_TOKENS:-1024}" + local mmproj_offload="${MMPROJ_OFFLOAD:-off}" + local backend_ready_timeout_sec="$BACKEND_READY_TIMEOUT_SEC" + local llama_server_bin + local runtime_dir="/tmp/toolhub-backend" + local stdout_log="${runtime_dir}/llama-server.stdout.log" + local stderr_log="${runtime_dir}/llama-server.stderr.log" + local llama_pid + + log_stage '阶段 1/6: 检查运行参数' + require_positive_integer "PORT" "$port_num" + require_positive_integer "CTX_SIZE" "$ctx_size" + require_positive_integer "IMAGE_MIN_TOKENS" "$image_min_tokens" + require_positive_integer "IMAGE_MAX_TOKENS" "$image_max_tokens" + require_positive_integer "BACKEND_READY_TIMEOUT_SEC" "$backend_ready_timeout_sec" + + if (( image_min_tokens > image_max_tokens )); then + printf 'IMAGE_MIN_TOKENS 不能大于 IMAGE_MAX_TOKENS\n' >&2 + exit 1 + fi + + if [[ "$mmproj_offload" != "on" && "$mmproj_offload" != "off" ]]; then + printf 'MMPROJ_OFFLOAD 仅支持 on 或 off,收到: %s\n' "$mmproj_offload" >&2 + exit 1 + fi + + resolve_runtime_profile + llama_server_bin="$(resolve_llama_server_bin)" + mkdir -p "$runtime_dir" + : > "$stdout_log" + : > "$stderr_log" + + log_stage '阶段 2/6: 检查或下载主模型' + download_if_missing "$model_path" "$gguf_url" "主模型" + log_stage '阶段 3/6: 检查或下载视觉模型' + download_if_missing "$mmproj_path" "$mmproj_url" "视觉模型" + + log_stage '阶段 4/6: 校验模型文件' + verify_sha256 "$model_path" "${MODEL_GGUF_SHA256:-}" + verify_sha256 "$mmproj_path" "${MODEL_MMPROJ_SHA256:-}" + + local args=( + -m "$model_path" + -mm "$mmproj_path" + --n-gpu-layers all + --flash-attn on + --fit on + --fit-target 256 + --temp 1.0 + --top-p 0.95 + --top-k 20 + --min-p 0.1 + --presence-penalty 1.5 + --repeat-penalty 1.05 + -n "$MAX_TOKENS" + --reasoning-budget "$REASONING_BUDGET" + -c "$ctx_size" + --image-min-tokens "$image_min_tokens" + --image-max-tokens "$image_max_tokens" + --host "$host_addr" + --port "$port_num" + --webui + ) + + if [[ "$mmproj_offload" == "off" ]]; then + args+=(--no-mmproj-offload) + else + args+=(--mmproj-offload) + fi + + log_stage '阶段 5/6: 启动 llama-server' + log_step "启动参数: host=$host_addr port=$port_num think=${THINK_MODE:-think-on}" + "$llama_server_bin" "${args[@]}" >"$stdout_log" 2>"$stderr_log" & + llama_pid=$! + log_step "llama-server 已启动: PID ${llama_pid}" + + log_stage '阶段 6/6: 等待模型加载到 GPU' + if ! wait_for_backend_ready "$port_num" "$backend_ready_timeout_sec" "$llama_pid" "$stdout_log" "$stderr_log"; then + if kill -0 "$llama_pid" 2>/dev/null; then + kill "$llama_pid" 2>/dev/null || true + wait "$llama_pid" 2>/dev/null || true + fi + exit 1 + fi + + wait "$llama_pid" +} + +main "$@" diff --git a/docker/backend/entrypoint_helpers.sh b/docker/backend/entrypoint_helpers.sh new file mode 100644 index 0000000..5048c4b --- /dev/null +++ b/docker/backend/entrypoint_helpers.sh @@ -0,0 +1,156 @@ +#!/usr/bin/env bash + +show_recent_server_logs() { + local stdout_log="$1" + local stderr_log="$2" + + log_step '后端启动失败,最近日志如下' + if [[ -s "$stdout_log" ]]; then + log_step '=== 最近标准输出 ===' + tail -n "$RECENT_LOG_LINE_COUNT" "$stdout_log" + fi + if [[ -s "$stderr_log" ]]; then + log_step '=== 最近标准错误 ===' + tail -n "$RECENT_LOG_LINE_COUNT" "$stderr_log" >&2 + fi +} + +probe_backend_ready() { + local port_num="$1" + curl -fsS "http://127.0.0.1:${port_num}/health" >/dev/null 2>&1 +} + +wait_for_backend_ready() { + local port_num="$1" + local timeout_sec="$2" + local llama_pid="$3" + local stdout_log="$4" + local stderr_log="$5" + local elapsed_sec=0 + + while (( elapsed_sec < timeout_sec )); do + if ! kill -0 "$llama_pid" 2>/dev/null; then + log_step '后端启动失败: llama-server 进程已提前退出' + show_recent_server_logs "$stdout_log" "$stderr_log" + return 1 + fi + if probe_backend_ready "$port_num"; then + log_step '后端健康检查已通过,网关会继续完成预热' + return 0 + fi + log_step "等待模型加载到 GPU... ${elapsed_sec}/${timeout_sec} 秒" + sleep 1 + elapsed_sec=$((elapsed_sec + 1)) + done + + log_step "后端在 ${timeout_sec} 秒内未就绪" + show_recent_server_logs "$stdout_log" "$stderr_log" + return 1 +} + +format_bytes() { + local bytes="$1" + awk -v bytes="$bytes" ' + BEGIN { + split("B KiB MiB GiB TiB", units, " ") + value = bytes + 0 + idx = 1 + while (value >= 1024 && idx < 5) { + value /= 1024 + idx++ + } + printf "%.1f %s", value, units[idx] + } + ' +} + +resolve_content_length() { + local url="$1" + curl -fsSLI "$url" \ + | tr -d '\r' \ + | awk 'tolower($1) == "content-length:" { print $2 }' \ + | tail -n 1 +} + +read_file_size() { + local path="$1" + if [[ -f "$path" ]]; then + stat -c '%s' "$path" + return + fi + printf '0\n' +} + +render_progress_message() { + local label="$1" + local current_bytes="$2" + local total_bytes="$3" + local speed_bytes="$4" + local current_text + local total_text + local speed_text + + current_text="$(format_bytes "$current_bytes")" + speed_text="$(format_bytes "$speed_bytes")" + total_text="$(format_bytes "${total_bytes:-0}")" + + if [[ -n "$total_bytes" && "$total_bytes" =~ ^[0-9]+$ && "$total_bytes" -gt 0 ]]; then + awk -v label="$label" -v current="$current_bytes" -v total="$total_bytes" \ + -v current_text="$current_text" -v total_text="$total_text" -v speed_text="$speed_text" ' + BEGIN { + pct = (current / total) * 100 + printf "下载%s: %.1f%% %s / %s %s/s\n", + label, pct, current_text, total_text, speed_text + } + ' + return + fi + + printf '下载%s: 已下载 %s %s/s\n' "$label" "$current_text" "$speed_text" +} + +download_if_missing() { + local path="$1" + local url="$2" + local label="$3" + local temp_path="${path}.part" + local total_bytes="" + local previous_bytes=0 + local current_bytes=0 + local speed_bytes=0 + local curl_pid + + mkdir -p "$(dirname "$path")" + if [[ -f "$path" ]]; then + log_step "检测到现有${label},跳过下载" + return + fi + + log_step "下载${label}: $url" + total_bytes="$(resolve_content_length "$url" || true)" + previous_bytes="$(read_file_size "$temp_path")" + + curl --fail --location --retry 5 --retry-delay 2 --retry-connrefused \ + --continue-at - --output "$temp_path" --silent --show-error "$url" & + curl_pid=$! + + while kill -0 "$curl_pid" 2>/dev/null; do + sleep 2 + current_bytes="$(read_file_size "$temp_path")" + speed_bytes=$(( (current_bytes - previous_bytes) / 2 )) + if (( speed_bytes < 0 )); then + speed_bytes=0 + fi + log_step "$(render_progress_message "$label" "$current_bytes" "$total_bytes" "$speed_bytes")" + previous_bytes="$current_bytes" + done + + if ! wait "$curl_pid"; then + printf '下载失败: %s\n' "$url" >&2 + exit 1 + fi + + current_bytes="$(read_file_size "$temp_path")" + log_step "下载${label}完成: $(format_bytes "$current_bytes")" + mv "$temp_path" "$path" +} diff --git a/docker/gateway/Dockerfile b/docker/gateway/Dockerfile new file mode 100644 index 0000000..511eb97 --- /dev/null +++ b/docker/gateway/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.11-slim + +ENV PYTHONUNBUFFERED=1 + +WORKDIR /app + +COPY requirements.txt /app/requirements.txt + +RUN python -m pip install --no-cache-dir --upgrade pip wheel \ + && python -m pip install --no-cache-dir -r /app/requirements.txt + +COPY . /app + +CMD ["python", "run_8080_toolhub_gateway.py", "--host", "0.0.0.0", "--port", "8080"] diff --git a/docs/DOCKER_COMPOSE.md b/docs/DOCKER_COMPOSE.md new file mode 100644 index 0000000..c1ee24a --- /dev/null +++ b/docs/DOCKER_COMPOSE.md @@ -0,0 +1,84 @@ +# Docker Compose + +ToolHub 提供 Docker Compose 入口,适合 Linux 主机部署,或不想在 Windows 宿主机安装 Python 的用户。这是一条可选路线,不替代 Windows 原生脚本主线。 + +--- + +## 前提条件 + +- Docker 和 Docker Compose 已安装 +- NVIDIA GPU 驱动已安装,且 NVIDIA Container Toolkit 可用 + +验证 GPU 容器环境: + +```bash +docker run --rm --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi +``` + +--- + +## 启动与停止 + +```bash +docker compose up --build # 前台启动 +docker compose up --build -d # 后台启动 +docker compose down # 停止 +``` + +首次启动时后端容器会自动下载模型文件,之后缓存在 Docker 命名卷 `toolhub-models` 中。 + +启动后浏览器访问 [http://127.0.0.1:8080](http://127.0.0.1:8080)。 + +如果后端还在下载模型或加载模型到 GPU,浏览器会先显示准备中页面。此时直接查看: + +```bash +docker compose logs -f backend +``` + +确认下载和加载进度即可。 + +--- + +## 容器结构 + +Compose 启动两个服务: + +| 服务 | 镜像基础 | 职责 | +| --- | --- | --- | +| `gateway` | `python:3.11-slim` | 网关层,提供网页入口和 OpenAI 兼容 API(端口 8080) | +| `backend` | `ghcr.io/ggml-org/llama.cpp:server-cuda` | 模型后端,GPU 推理(端口 8081) | + +架构与 Windows 原生路线一致:浏览器访问网关,网关将推理请求转发给后端。网关容器通过只读方式挂载项目目录(`/workspace`),文件系统访问行为与 Windows 路线保持一致。 + +--- + +## 模型管理 + +模型不会打进镜像,由后端容器首次启动时从 Hugging Face 下载,缓存在命名卷 `toolhub-models` 中。默认下载 Q4_K_M 量化。 + +如需切换到 Q8,在 `.env` 中将 `MODEL_GGUF_URL` 改为 Q8 下载地址,也可以先在宿主机执行 `.\install_q8.cmd` 让它自动修改,然后重启容器: + +```bash +docker compose down +docker compose up --build -d +``` + +> 容器内模型缓存(命名卷)和 Windows 路线的本地缓存(`.tmp/models/`)是两套独立缓存,互不影响。 + +--- + +## 配置 + +Compose 通过 `.env` 文件读取配置。以下变量会影响容器行为: + +| 变量 | 默认值 | 说明 | +| --- | --- | --- | +| `GATEWAY_PORT` | `8080` | 网关对外端口 | +| `BACKEND_PORT` | `8081` | 后端对外端口 | +| `THINK_MODE` | `think-on` | 思考模式 | +| `CTX_SIZE` | `16384` | 上下文窗口大小 | +| `IMAGE_MIN_TOKENS` | `256` | 图像最小 token 数 | +| `IMAGE_MAX_TOKENS` | `1024` | 图像最大 token 数 | +| `MMPROJ_OFFLOAD` | `off` | 视觉投影卸载开关 | + +修改 `.env` 后重启容器生效。 diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md new file mode 100644 index 0000000..5d65545 --- /dev/null +++ b/docs/QUICKSTART.md @@ -0,0 +1,137 @@ +# 快速开始 + +从零到能用的完整说明。默认路线为 Windows 原生,WSL 和 Docker Compose 见末尾。 + +--- + +## 系统要求 + +| 项目 | 要求 | +| --- | --- | +| 操作系统 | Windows 10 / 11 | +| GPU | NVIDIA,驱动 ≥ 525,建议 ≥ 8 GB 显存 | +| Python | 3.10+,已加入 PATH | +| 磁盘 | ≥ 20 GB 可用空间 | + +> Q4_K_M 量化下模型加上视觉投影约占 6.1 GB 显存。8 GB 显存可正常运行。 + +Docker Compose 路线不需要在宿主机安装 Python,系统要求见 [Docker Compose 文档](DOCKER_COMPOSE.md)。 + +--- + +## 1. 安装 + +双击 `bootstrap.bat`,或在命令行执行: + +```powershell +.\install.cmd +``` + +安装脚本会自动完成: + +- 创建 Python 虚拟环境并安装依赖 +- 下载 llama.cpp CUDA 运行时 +- 下载 Qwen3.5-9B Q4_K_M 主模型与 mmproj 视觉投影模型 + +首次安装需要下载约 6 GB 模型文件,请确保网络通畅。 + +--- + +## 2. 启动 + +```powershell +.\start_8080_toolhub_stack.cmd start +``` + +首次启动需要 30–60 秒加载模型到 GPU。看到"栈已启动"即表示就绪。 + +--- + +## 3. 打开网页 + +浏览器访问 [http://127.0.0.1:8080](http://127.0.0.1:8080)。 + +--- + +## 4. 服务管理 + +```powershell +.\start_8080_toolhub_stack.cmd start # 启动 +.\start_8080_toolhub_stack.cmd stop # 停止 +.\start_8080_toolhub_stack.cmd restart # 重启 +.\start_8080_toolhub_stack.cmd status # 查看状态 +.\start_8080_toolhub_stack.cmd logs # 查看日志 +``` + +--- + +## 5. 可选:升级到 Q8 量化 + +显存 ≥ 12 GB 时,可以切换到 Q8 获得更高推理精度。 + +双击 `bootstrap_q8.bat`,或执行 `.\install_q8.cmd`。脚本会自动修改 `.env` 中的模型路径和下载地址,然后开始下载。视觉模型 mmproj 不需要更换。 + +下载完成后执行 `.\start_8080_toolhub_stack.cmd restart` 切换。 + +--- + +## 6. 配置 + +复制 `.env.example` 为 `.env`,按需修改,启动脚本会自动加载。 + +常见调整: + +**切换思考模式:** + +```powershell +$env:THINK_MODE = 'think-off'; .\start_8080_toolhub_stack.cmd restart +``` + +**缩小上下文以节省显存:** + +```powershell +$env:CTX_SIZE = '8192'; .\start_8080_toolhub_stack.cmd restart +``` + +**扩大文件系统可读范围:** 修改 `.env` 中的 `READONLY_FS_ROOTS`,多个目录用分号分隔。留空时默认只读项目目录。 + +修改后执行 `.\start_8080_toolhub_stack.cmd restart` 生效。 + +--- + +## 7. API 调用 + +网关兼容 OpenAI API 格式: + +```bash +curl http://127.0.0.1:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Qwen3.5-9B-Q4_K_M", + "stream": true, + "messages": [ + {"role": "user", "content": "今天有什么科技新闻?"} + ] + }' +``` + +支持 OpenAI API 的客户端可将 Base URL 设为 `http://127.0.0.1:8080/v1`。 + +--- + +## 其他入口 + +### WSL + +WSL 入口复用 Windows 主链路,不会创建独立的 Linux 虚拟环境。 + +```bash +./install.sh # 安装 +./start_8080_toolhub_stack.sh start # 启动 +``` + +服务管理命令与 Windows 一致,把 `.cmd` 换成 `.sh` 即可。 + +### Docker Compose + +不需要在宿主机安装 Python 或手动下载模型。详见 [Docker Compose 文档](DOCKER_COMPOSE.md)。 diff --git a/docs/RELEASE_NOTES.md b/docs/RELEASE_NOTES.md new file mode 100644 index 0000000..86bebdc --- /dev/null +++ b/docs/RELEASE_NOTES.md @@ -0,0 +1,21 @@ +# RELEASE NOTES + +## v1.0.0 + +发布日期:2026-03-04 + +主要内容: + +- 交付范围固定为 Qwen3.5-9B +- 入口固定为 8080 网页 +- 工具能力集成到 8080 网关 +- 支持流式输出与思维链输出 +- 回答下方显示性能统计 +- 思维链 token 计入统计 +- 输入栏上方实时统计隐藏 +- 提供安装脚本、启动脚本和文档 + +限制: + +- 仅支持 9B 模型 +- 仅支持本机部署 diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md new file mode 100644 index 0000000..9b09bff --- /dev/null +++ b/docs/TROUBLESHOOTING.md @@ -0,0 +1,116 @@ +# 常见问题与排障 + +--- + +## 1. PowerShell 报脚本执行策略错误 + +看到 `PSSecurityException` 或 `about_Execution_Policies`,改用 `.cmd` 入口即可: + +```powershell +.\install.cmd +.\start_8080_toolhub_stack.cmd start +``` + +如果一定要直接调用 `.ps1`: + +```powershell +powershell -NoProfile -ExecutionPolicy Bypass -File .\install.ps1 +``` + +--- + +## 2. 提示 llama-server.exe 不存在 + +重新执行安装脚本: + +```powershell +.\install.cmd +``` + +完成后确认文件存在:`.tmp\llama_win_cuda\llama-server.exe`。 + +--- + +## 3. 提示模型文件不完整 + +检查以下两个文件是否存在且大小正常: + +- `.env` 里 `MODEL_PATH` 指向的主模型文件,默认为 `Qwen3.5-9B-Q4_K_M.gguf`,执行过 Q8 安装则为 `Qwen3.5-9B-Q8_0.gguf` +- `.tmp\models\crossrepo\lmstudio-community__Qwen3.5-9B-GGUF\mmproj-Qwen3.5-9B-BF16.gguf` + +文件残缺或为 0 字节时,删除后重新执行 `.\install.cmd`。 + +--- + +## 4. 启动后模型未就绪 + +```powershell +.\start_8080_toolhub_stack.cmd status +.\start_8080_toolhub_stack.cmd logs +``` + +首次启动需要 30–60 秒加载模型,刚启动不久的话稍等片刻。 + +--- + +## 5. 页面报内容编码错误 + +```powershell +.\start_8080_toolhub_stack.cmd restart +``` + +如果仍然出现,清浏览器缓存后刷新。 + +--- + +## 6. 显存不足 + +Q4_K_M 量化下模型加上视觉投影约占 6.1 GB 显存。如果显存紧张: + +**缩小上下文窗口:** + +```powershell +$env:CTX_SIZE = '8192'; .\start_8080_toolhub_stack.cmd restart +``` + +**降低图像 token 上限:** + +```powershell +$env:IMAGE_MAX_TOKENS = '512'; .\start_8080_toolhub_stack.cmd restart +``` + +也可以直接修改 `.env` 里对应的值,然后重启。 + +--- + +## 7. 看不到回答下方的性能统计 + +重启服务后发一条新消息即可看到。旧消息不会回填统计数据。 + +--- + +## 8. WSL 相关 + +WSL 入口复用 Windows 主链路。如果 WSL 中找不到 `powershell.exe`,检查 WSL 配置中 `interop` 是否被禁用。 + +--- + +## 9. Docker Compose 相关 + +### 容器启动失败 + +确认 GPU 容器环境可用: + +```bash +docker run --rm --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi +``` + +如果无法正常输出显卡信息,先解决 GPU 容器环境问题。 + +### 模型下载失败 + +容器首次启动时自动下载模型。下载失败时可通过 `.env` 覆盖 `MODEL_GGUF_URL` 和 `MODEL_MMPROJ_URL` 指向更快的源,再执行 `docker compose up --build`。 + +### 端口冲突 + +修改 `.env` 中的 `GATEWAY_PORT` 和 `BACKEND_PORT`,再重启容器。 diff --git a/env_config.ps1 b/env_config.ps1 new file mode 100644 index 0000000..69d5279 --- /dev/null +++ b/env_config.ps1 @@ -0,0 +1,120 @@ +function Normalize-EnvValue { + param([string]$Value) + + $trimmed = $Value.Trim() + if (-not $trimmed) { + return '' + } + + if ($trimmed.StartsWith('#')) { + return '' + } + + $hashIndex = $trimmed.IndexOf(' #') + if ($hashIndex -ge 0) { + $trimmed = $trimmed.Substring(0, $hashIndex).TrimEnd() + } + + $hasQuotes = ( + ($trimmed.StartsWith('"') -and $trimmed.EndsWith('"')) -or + ($trimmed.StartsWith("'") -and $trimmed.EndsWith("'")) + ) + if ($hasQuotes -and $trimmed.Length -ge 2) { + return $trimmed.Substring(1, $trimmed.Length - 2) + } + return $trimmed +} + +function Import-EnvFile { + param([string]$Path) + + if (-not (Test-Path $Path)) { + return + } + + foreach ($line in Get-Content -Path $Path -Encoding UTF8) { + $trimmed = $line.Trim() + if (-not $trimmed -or $trimmed.StartsWith('#')) { + continue + } + + $delimiter = $trimmed.IndexOf('=') + if ($delimiter -lt 1) { + continue + } + + $key = $trimmed.Substring(0, $delimiter).Trim() + $value = Normalize-EnvValue -Value ($trimmed.Substring($delimiter + 1)) + if (-not $key -or (Test-Path "Env:$key")) { + continue + } + [Environment]::SetEnvironmentVariable($key, $value, 'Process') + } +} + +function Resolve-ManagedPath { + param( + [string]$BaseDir, + [string]$Value, + [string]$DefaultRelativePath + ) + + $effective = if ([string]::IsNullOrWhiteSpace($Value)) { $DefaultRelativePath } else { $Value.Trim() } + if ([string]::IsNullOrWhiteSpace($effective)) { + return '' + } + if ([System.IO.Path]::IsPathRooted($effective)) { + return $effective + } + return [System.IO.Path]::GetFullPath((Join-Path $BaseDir $effective)) +} + +function Ensure-EnvFile { + param( + [string]$Path, + [string]$TemplatePath + ) + + if (Test-Path $Path) { + return + } + if (Test-Path $TemplatePath) { + Copy-Item -Path $TemplatePath -Destination $Path -Force + return + } + Set-Content -Path $Path -Value @() -Encoding UTF8 +} + +function Set-EnvFileValue { + param( + [string]$Path, + [string]$Key, + [string]$Value + ) + + $lines = [System.Collections.Generic.List[string]]::new() + if (Test-Path $Path) { + foreach ($line in Get-Content -Path $Path -Encoding UTF8) { + $lines.Add([string]$line) + } + } + + $replacement = "$Key=$Value" + $pattern = '^\s*' + [regex]::Escape($Key) + '\s*=' + $updated = $false + for ($i = 0; $i -lt $lines.Count; $i++) { + if ($lines[$i] -match $pattern) { + $lines[$i] = $replacement + $updated = $true + break + } + } + + if (-not $updated) { + if ($lines.Count -gt 0 -and -not [string]::IsNullOrWhiteSpace($lines[$lines.Count - 1])) { + $lines.Add('') + } + $lines.Add($replacement) + } + Set-Content -Path $Path -Value $lines -Encoding UTF8 +} diff --git a/install.cmd b/install.cmd new file mode 100644 index 0000000..9008265 --- /dev/null +++ b/install.cmd @@ -0,0 +1,5 @@ +@echo off +setlocal +set SCRIPT_DIR=%~dp0 +powershell.exe -NoProfile -ExecutionPolicy Bypass -File "%SCRIPT_DIR%install.ps1" %* +exit /b %ERRORLEVEL% diff --git a/install.ps1 b/install.ps1 new file mode 100644 index 0000000..4b59f03 --- /dev/null +++ b/install.ps1 @@ -0,0 +1,49 @@ +param( + [switch]$Wsl +) + +$ErrorActionPreference = 'Stop' +$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$WinInstaller = Join-Path $ScriptDir 'install.win.ps1' + +function Write-Step { + param([string]$Message) + Write-Host "[install] $Message" +} + +function Invoke-WslInstaller { + if (-not (Get-Command wsl.exe -ErrorAction SilentlyContinue)) { + throw 'wsl.exe not found. Please install WSL first.' + } + Write-Step "Run install.sh inside WSL" + $WslDir = (& wsl.exe wslpath -a "$ScriptDir").Trim() + if ([string]::IsNullOrWhiteSpace($WslDir)) { + throw 'Cannot convert current directory to a WSL path.' + } + $Cmd = "cd '$WslDir' && ./install.sh" + & wsl.exe bash -lc $Cmd + if ($LASTEXITCODE -ne 0) { + throw "Install failed, exit code: $LASTEXITCODE" + } + Write-Step 'Install completed (WSL)' + Write-Step 'Start command: ./start_8080_toolhub_stack.sh start' +} + +function Invoke-WinInstaller { + if (-not (Test-Path $WinInstaller)) { + throw "Windows installer not found: $WinInstaller" + } + Write-Step 'Run install.win.ps1' + & powershell.exe -NoProfile -ExecutionPolicy Bypass -File $WinInstaller + if ($LASTEXITCODE -ne 0) { + throw "Windows install failed, exit code: $LASTEXITCODE" + } + Write-Step 'Install completed (Windows)' + Write-Step 'Start command: .\start_8080_toolhub_stack.cmd start' +} + +if ($Wsl) { + Invoke-WslInstaller +} else { + Invoke-WinInstaller +} diff --git a/install.sh b/install.sh new file mode 100644 index 0000000..011c2fc --- /dev/null +++ b/install.sh @@ -0,0 +1,96 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +WIN_INSTALLER_PS1="$ROOT_DIR/install.win.ps1" + +print_usage() { + cat <<'USAGE' +用法: + ./install.sh + +说明: + 这是 WSL 兼容入口。 + 它会直接复用 Windows 安装主脚本,和 cmd / PowerShell 的安装结果保持一致。 +USAGE +} + +to_win_path_if_needed() { + local raw="$1" + if [[ -z "$raw" ]]; then + printf '' + return + fi + if [[ "$raw" == /* ]]; then + wslpath -w "$raw" + return + fi + printf '%s' "$raw" +} + +ps_escape_single_quotes() { + printf "%s" "$1" | sed "s/'/''/g" +} + +require_windows_power_shell() { + if ! command -v powershell.exe >/dev/null 2>&1; then + echo "未找到 powershell.exe,WSL 兼容入口无法调用 Windows 安装器。" + exit 1 + fi + if [[ ! -f "$WIN_INSTALLER_PS1" ]]; then + echo "缺少安装脚本: $WIN_INSTALLER_PS1" + exit 1 + fi +} + +build_env_overrides() { + local -n out_ref=$1 + out_ref=() + + for key in PYTHON_BIN LLAMA_WIN_CUDA_URL LLAMA_WIN_CUDART_URL MODEL_GGUF_URL MODEL_MMPROJ_URL MODEL_GGUF_SHA256 MODEL_MMPROJ_SHA256; do + if [[ -z "${!key-}" ]]; then + continue + fi + local value="${!key}" + if [[ "$key" == "PYTHON_BIN" ]]; then + value="$(to_win_path_if_needed "$value")" + fi + out_ref+=("$key=$value") + done +} + +build_ps_env_setup() { + local -n env_ref=$1 + local lines=() + local item key value escaped_value + for item in "${env_ref[@]}"; do + key="${item%%=*}" + value="${item#*=}" + escaped_value="$(ps_escape_single_quotes "$value")" + lines+=("[Environment]::SetEnvironmentVariable('$key', '$escaped_value', 'Process')") + done + printf '%s; ' "${lines[@]}" +} + +main() { + if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then + print_usage + exit 0 + fi + + require_windows_power_shell + + local installer_win + installer_win="$(wslpath -w "$WIN_INSTALLER_PS1")" + + local env_overrides=() + build_env_overrides env_overrides + + local ps_command + local ps_env_setup + ps_env_setup="$(build_ps_env_setup env_overrides)" + ps_command="[Console]::InputEncoding = [System.Text.UTF8Encoding]::new(\$false); [Console]::OutputEncoding = [System.Text.UTF8Encoding]::new(\$false); chcp 65001 > \$null; ${ps_env_setup}& '$installer_win'" + powershell.exe -NoProfile -ExecutionPolicy Bypass -Command "$ps_command" +} + +main "$@" diff --git a/install.win.ps1 b/install.win.ps1 new file mode 100644 index 0000000..db057e5 --- /dev/null +++ b/install.win.ps1 @@ -0,0 +1,438 @@ +param() + +$ErrorActionPreference = 'Stop' +$ProgressPreference = 'SilentlyContinue' + +$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$RootDir = (Resolve-Path $ScriptDir).Path +$EnvConfig = Join-Path $RootDir 'env_config.ps1' +if (Test-Path $EnvConfig) { + . $EnvConfig + Import-EnvFile -Path (Join-Path $RootDir '.env') +} +$VenvDir = Join-Path $RootDir '.venv-qwen35' +$VenvPython = Join-Path $VenvDir 'Scripts\python.exe' +$LlamaDir = Join-Path $RootDir '.tmp\llama_win_cuda' +$ModelRelativeDir = '.tmp\models\crossrepo\lmstudio-community__Qwen3.5-9B-GGUF' +$DefaultGgufRelativePath = Join-Path $ModelRelativeDir 'Qwen3.5-9B-Q4_K_M.gguf' +$DefaultMmprojRelativePath = Join-Path $ModelRelativeDir 'mmproj-Qwen3.5-9B-BF16.gguf' +$GgufPath = Resolve-ManagedPath -BaseDir $RootDir -Value $env:MODEL_PATH -DefaultRelativePath $DefaultGgufRelativePath +$MmprojPath = Resolve-ManagedPath -BaseDir $RootDir -Value $env:MMPROJ_PATH -DefaultRelativePath $DefaultMmprojRelativePath + +$DefaultGgufUrl = 'https://huggingface.co/lmstudio-community/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf' +$DefaultMmprojUrl = 'https://huggingface.co/lmstudio-community/Qwen3.5-9B-GGUF/resolve/main/mmproj-Qwen3.5-9B-BF16.gguf' +$LlamaReleaseApiUrl = 'https://api.github.com/repos/ggml-org/llama.cpp/releases/latest' +$LlamaReleasePageUrl = 'https://github.com/ggml-org/llama.cpp/releases/latest' +$LlamaReleaseDownloadPrefix = 'https://github.com/ggml-org/llama.cpp/releases/latest/download/' +$PreferredCudaBinAssetRegexes = @( + '^llama-.*-bin-win-cuda-12\.4-x64\.zip$', + '^llama-.*-bin-win-cuda-13\.1-x64\.zip$', + '^llama-.*-bin-win-cuda-.*-x64\.zip$' +) +$PreferredCudaRuntimeAssetRegexes = @( + '^cudart-llama-bin-win-cuda-12\.4-x64\.zip$', + '^cudart-llama-bin-win-cuda-13\.1-x64\.zip$', + '^cudart-llama-bin-win-cuda-.*-x64\.zip$' +) + +function Write-Step { + param([string]$Message) + Write-Host "[install] $Message" +} + +function New-PythonCandidate { + param( + [string]$Label, + [string]$Command, + [string[]]$Args = @() + ) + return [PSCustomObject]@{ + Label = $Label + Command = $Command + Args = $Args + } +} + +function Get-PythonCandidates { + $candidates = @() + if ($env:PYTHON_BIN) { + $candidates += New-PythonCandidate -Label "PYTHON_BIN=$($env:PYTHON_BIN)" -Command $env:PYTHON_BIN + } + $candidates += New-PythonCandidate -Label 'py -3' -Command 'py' -Args @('-3') + $candidates += New-PythonCandidate -Label 'python' -Command 'python' + $candidates += New-PythonCandidate -Label 'python3' -Command 'python3' + return $candidates +} + +function Test-PythonCandidate { + param([object]$PythonSpec) + $probeCode = 'import sys, venv; raise SystemExit(0 if sys.version_info >= (3, 10) else 3)' + try { + & $PythonSpec.Command @($PythonSpec.Args + @('-c', $probeCode)) *> $null + } catch { + Write-Step "跳过 Python 候选 $($PythonSpec.Label): $($_.Exception.Message)" + return $false + } + if ($LASTEXITCODE -eq 0) { + return $true + } + if ($LASTEXITCODE -eq 3) { + Write-Step "跳过 Python 候选 $($PythonSpec.Label): Python 版本低于 3.10" + return $false + } + Write-Step "跳过 Python 候选 $($PythonSpec.Label): 解释器不可用或缺少 venv 模块,exit code: $LASTEXITCODE" + return $false +} + +function Resolve-PythonSpec { + foreach ($candidate in Get-PythonCandidates) { + if (Test-PythonCandidate -PythonSpec $candidate) { + Write-Step "使用 Python: $($candidate.Label)" + return $candidate + } + } + throw '未找到可用 Python,请安装 Python 3.10+ 并确保 venv 模块可用。' +} + +function Invoke-CommandChecked { + param( + [string]$Command, + [string[]]$CommandArgs, + [string]$Action, + [string]$DisplayName = $Command + ) + try { + & $Command @CommandArgs + } catch { + throw "$Action 失败。命令: $DisplayName。错误: $($_.Exception.Message)" + } + if ($LASTEXITCODE -ne 0) { + throw "$Action 失败。命令: $DisplayName。exit code: $LASTEXITCODE" + } +} + +function Invoke-Python { + param( + [object]$PythonSpec, + [string[]]$PythonArgs, + [string]$Action + ) + Invoke-CommandChecked -Command $PythonSpec.Command -CommandArgs ($PythonSpec.Args + $PythonArgs) -Action $Action -DisplayName $PythonSpec.Label +} + +function Test-VenvPython { + param([string]$Path) + if (-not (Test-Path $Path)) { + return $false + } + try { + & $Path '-c' 'import sys' *> $null + } catch { + return $false + } + return $LASTEXITCODE -eq 0 +} + +function Ensure-Dir { + param([string]$Path) + if (-not (Test-Path $Path)) { + New-Item -Path $Path -ItemType Directory -Force | Out-Null + } +} + +function Resolve-CurlPath { + $curl = Get-Command curl.exe -ErrorAction SilentlyContinue + if (-not $curl) { + throw '未找到 curl.exe,无法执行带进度显示的下载。' + } + return $curl.Source +} + +function Download-File { + param( + [string]$Url, + [string]$OutFile + ) + Write-Step "下载: $Url" + $targetDir = Split-Path -Parent $OutFile + if (-not [string]::IsNullOrWhiteSpace($targetDir)) { + Ensure-Dir $targetDir + } + $tempFile = "$OutFile.part" + $curlPath = Resolve-CurlPath + $curlArgs = @( + '--fail', + '--location', + '--retry', '5', + '--retry-delay', '2', + '--output', $tempFile + ) + if (Test-Path $tempFile) { + Write-Step '检测到未完成下载,继续传输' + $curlArgs += @('--continue-at', '-') + } + $curlArgs += $Url + + try { + & $curlPath @curlArgs + } catch { + throw "下载失败。命令: curl.exe。错误: $($_.Exception.Message)" + } + if ($LASTEXITCODE -ne 0) { + throw "下载失败。命令: curl.exe。exit code: $LASTEXITCODE" + } + + if (Test-Path $OutFile) { + Remove-Item -Path $OutFile -Force -ErrorAction SilentlyContinue + } + Move-Item -Path $tempFile -Destination $OutFile -Force +} + +function Verify-Sha256 { + param( + [string]$Path, + [string]$Expected + ) + if ([string]::IsNullOrWhiteSpace($Expected)) { + return + } + $actual = (Get-FileHash -Path $Path -Algorithm SHA256).Hash.ToLowerInvariant() + $exp = $Expected.ToLowerInvariant() + if ($actual -ne $exp) { + throw "SHA256 校验失败: $Path" + } +} + +function Get-LlamaReleaseAssetsFromApi { + try { + $release = Invoke-RestMethod -Uri $LlamaReleaseApiUrl -Method Get + return @($release.assets | ForEach-Object { + [PSCustomObject]@{ + Name = [string]$_.name + Url = [string]$_.browser_download_url + } + }) + } catch { + Write-Step "GitHub API 不可用,改用页面解析。原因: $($_.Exception.Message)" + return @() + } +} + +function Get-LlamaReleaseAssetsFromHtml { + try { + $response = Invoke-WebRequest -Uri $LlamaReleasePageUrl -UseBasicParsing + } catch { + throw "获取 llama.cpp release 页面失败: $($_.Exception.Message)" + } + $content = [string]$response.Content + $regex = '(?:cudart-)?llama-[^"''<> ]*bin-win-cuda-[0-9.]+-x64\.zip' + $matches = [regex]::Matches($content, $regex, [System.Text.RegularExpressions.RegexOptions]::IgnoreCase) + $seen = @{} + $assets = @() + foreach ($match in $matches) { + $name = [string]$match.Value + $key = $name.ToLowerInvariant() + if ($seen.ContainsKey($key)) { + continue + } + $seen[$key] = $true + $assets += [PSCustomObject]@{ + Name = $name + Url = "$LlamaReleaseDownloadPrefix$name" + } + } + return $assets +} + +function Select-LlamaAsset { + param( + [object[]]$Assets, + [string[]]$Regexes + ) + foreach ($regex in $Regexes) { + $candidate = $Assets | Where-Object { $_.Name -match $regex } | Select-Object -First 1 + if ($candidate) { + return $candidate + } + } + return $null +} + +function Resolve-LlamaCudaAssets { + if ($env:LLAMA_WIN_CUDA_URL) { + $binName = Split-Path -Path $env:LLAMA_WIN_CUDA_URL -Leaf + $runtimeUrl = if ($env:LLAMA_WIN_CUDART_URL) { [string]$env:LLAMA_WIN_CUDART_URL } else { '' } + $runtimeName = if ([string]::IsNullOrWhiteSpace($runtimeUrl)) { '' } else { (Split-Path -Path $runtimeUrl -Leaf) } + Write-Step "使用自定义 llama.cpp 主包: $binName" + if (-not [string]::IsNullOrWhiteSpace($runtimeName)) { + Write-Step "使用自定义 CUDA 运行时包: $runtimeName" + } + return @{ + BinUrl = [string]$env:LLAMA_WIN_CUDA_URL + RuntimeUrl = $runtimeUrl + } + } + + $assets = Get-LlamaReleaseAssetsFromApi + if ($assets.Count -eq 0) { + $assets = Get-LlamaReleaseAssetsFromHtml + } + if ($assets.Count -eq 0) { + throw '自动解析 llama.cpp CUDA 资源失败,未读取到任何 win-cuda 包。' + } + + $bin = Select-LlamaAsset -Assets $assets -Regexes $PreferredCudaBinAssetRegexes + if (-not $bin) { + $preview = (@($assets | Select-Object -ExpandProperty Name | Select-Object -First 12)) -join ', ' + throw "自动解析失败:未找到完整 CUDA 主包。可用资源: $preview" + } + $runtime = Select-LlamaAsset -Assets $assets -Regexes $PreferredCudaRuntimeAssetRegexes + Write-Step "使用 llama.cpp 主包: $($bin.Name)" + if ($runtime) { + Write-Step "可选 CUDA 运行时包: $($runtime.Name)" + } + return @{ + BinUrl = [string]$bin.Url + RuntimeUrl = if ($runtime) { [string]$runtime.Url } else { '' } + } +} + +function Get-LlamaRuntimeStatus { + param([string]$BaseDir) + $missing = @() + $llamaExe = Test-Path (Join-Path $BaseDir 'llama-server.exe') + if (-not $llamaExe) { + $missing += 'llama-server.exe' + } + $cudaBackendDll = @(Get-ChildItem -Path $BaseDir -Filter 'ggml-cuda*.dll' -File -ErrorAction SilentlyContinue | Select-Object -First 1) + if ($cudaBackendDll.Count -eq 0) { + $missing += 'ggml-cuda*.dll' + } + $cudartDll = @(Get-ChildItem -Path $BaseDir -Filter 'cudart64_*.dll' -File -ErrorAction SilentlyContinue | Select-Object -First 1) + if ($cudartDll.Count -eq 0) { + $missing += 'cudart64_*.dll' + } + $cublasDll = @(Get-ChildItem -Path $BaseDir -Filter 'cublas64_*.dll' -File -ErrorAction SilentlyContinue | Select-Object -First 1) + if ($cublasDll.Count -eq 0) { + $missing += 'cublas64_*.dll' + } + return @{ + Ready = ($missing.Count -eq 0) + Missing = $missing + } +} + +function Clear-LlamaRuntimeDirectory { + if (-not (Test-Path $LlamaDir)) { + Ensure-Dir $LlamaDir + return + } + try { + Get-ChildItem -Path $LlamaDir -Force -ErrorAction Stop | Remove-Item -Recurse -Force -ErrorAction Stop + } catch { + throw "清理 CUDA 运行时目录失败,请先停止服务后重试。目录: $LlamaDir。错误: $($_.Exception.Message)" + } +} + +function Ensure-PythonEnv { + $python = Resolve-PythonSpec + $venvExists = Test-Path $VenvDir + $venvReady = Test-VenvPython -Path $VenvPython + if ($venvExists -and -not $venvReady) { + Write-Step "检测到不完整或非 Windows 虚拟环境,重建: $VenvDir" + Remove-Item -Path $VenvDir -Recurse -Force -ErrorAction SilentlyContinue + if (Test-Path $VenvDir) { + Write-Step '目录无法直接删除,尝试 venv --clear 重建' + Invoke-Python -PythonSpec $python -PythonArgs @('-m', 'venv', '--clear', $VenvDir) -Action '清空并重建虚拟环境' + } + } + if (-not (Test-Path $VenvDir)) { + Write-Step "创建虚拟环境: $VenvDir" + Invoke-Python -PythonSpec $python -PythonArgs @('-m', 'venv', $VenvDir) -Action '创建虚拟环境' + } + if (-not (Test-VenvPython -Path $VenvPython)) { + throw "虚拟环境未就绪: $VenvPython。请检查上面的 Python 或权限报错。" + } + Write-Step '安装 Python 依赖' + Invoke-CommandChecked -Command $VenvPython -CommandArgs @('-m', 'pip', 'install', '--upgrade', 'pip', 'wheel') -Action '升级 pip 和 wheel' + Invoke-CommandChecked -Command $VenvPython -CommandArgs @('-m', 'pip', 'install', '-r', (Join-Path $RootDir 'requirements.txt')) -Action '安装 requirements.txt 依赖' +} + +function Ensure-LlamaRuntime { + Ensure-Dir $LlamaDir + $status = Get-LlamaRuntimeStatus -BaseDir $LlamaDir + if ($status.Ready) { + Write-Step '检测到完整 CUDA 运行时,跳过下载' + return + } + Write-Step '检测到不完整 CUDA 运行时,清理后重装' + Clear-LlamaRuntimeDirectory + + $assets = Resolve-LlamaCudaAssets + $binZipPath = Join-Path $LlamaDir 'llama-win-cuda-bin.zip' + Download-File -Url $assets.BinUrl -OutFile $binZipPath + Write-Step '解压 llama.cpp CUDA 主包' + Expand-Archive -Path $binZipPath -DestinationPath $LlamaDir -Force + + $foundServer = Get-ChildItem -Path $LlamaDir -Filter 'llama-server.exe' -Recurse -File | Select-Object -First 1 + if (-not $foundServer) { + throw 'llama-server.exe 下载或解压失败,未在主包中找到可执行文件。' + } + $srcDir = Split-Path -Parent $foundServer.FullName + $srcDirResolved = (Resolve-Path $srcDir).Path + $llamaDirResolved = (Resolve-Path $LlamaDir).Path + if ($srcDirResolved -ne $llamaDirResolved) { + Copy-Item -Path (Join-Path $srcDir '*') -Destination $LlamaDir -Recurse -Force + } + + $status = Get-LlamaRuntimeStatus -BaseDir $LlamaDir + $needRuntime = ($status.Missing | Where-Object { $_ -match '^cudart64_|^cublas64_' }).Count -gt 0 + if ($needRuntime -and -not [string]::IsNullOrWhiteSpace([string]$assets.RuntimeUrl)) { + $runtimeZipPath = Join-Path $LlamaDir 'llama-win-cuda-runtime.zip' + Download-File -Url $assets.RuntimeUrl -OutFile $runtimeZipPath + Write-Step '解压 CUDA 运行时补充包' + Expand-Archive -Path $runtimeZipPath -DestinationPath $LlamaDir -Force + } + + $status = Get-LlamaRuntimeStatus -BaseDir $LlamaDir + if (-not $status.Ready) { + $missingText = ($status.Missing -join ', ') + throw "CUDA 运行时不完整,缺失: $missingText" + } +} + +function Ensure-ModelFiles { + Ensure-Dir (Split-Path -Parent $GgufPath) + Ensure-Dir (Split-Path -Parent $MmprojPath) + + $ggufUrl = if ($env:MODEL_GGUF_URL) { $env:MODEL_GGUF_URL } else { $DefaultGgufUrl } + $mmprojUrl = if ($env:MODEL_MMPROJ_URL) { $env:MODEL_MMPROJ_URL } else { $DefaultMmprojUrl } + Write-Step "主模型路径: $GgufPath" + Write-Step "视觉模型路径: $MmprojPath" + + if (-not (Test-Path $GgufPath)) { + Download-File -Url $ggufUrl -OutFile $GgufPath + } else { + Write-Step '检测到现有 9B 主模型,跳过下载' + } + + if (-not (Test-Path $MmprojPath)) { + Download-File -Url $mmprojUrl -OutFile $MmprojPath + } else { + Write-Step '检测到现有 mmproj,跳过下载' + } + + Verify-Sha256 -Path $GgufPath -Expected $env:MODEL_GGUF_SHA256 + Verify-Sha256 -Path $MmprojPath -Expected $env:MODEL_MMPROJ_SHA256 +} + +function Main { + Ensure-PythonEnv + Ensure-LlamaRuntime + Ensure-ModelFiles + Write-Step '安装完成' + Write-Step '启动命令: .\\start_8080_toolhub_stack.cmd start' + Write-Step '停止命令: .\\start_8080_toolhub_stack.cmd stop' +} + +Main diff --git a/install_q8.cmd b/install_q8.cmd new file mode 100644 index 0000000..a0590a2 --- /dev/null +++ b/install_q8.cmd @@ -0,0 +1,5 @@ +@echo off +setlocal +set SCRIPT_DIR=%~dp0 +powershell.exe -NoProfile -ExecutionPolicy Bypass -File "%SCRIPT_DIR%install_q8.ps1" %* +exit /b %ERRORLEVEL% diff --git a/install_q8.ps1 b/install_q8.ps1 new file mode 100644 index 0000000..4cd85ec --- /dev/null +++ b/install_q8.ps1 @@ -0,0 +1,63 @@ +param() + +$ErrorActionPreference = 'Stop' + +$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$RootDir = (Resolve-Path $ScriptDir).Path +$EnvConfig = Join-Path $RootDir 'env_config.ps1' +if (-not (Test-Path $EnvConfig)) { + throw "未找到 env_config.ps1: $EnvConfig" +} +. $EnvConfig + +$EnvFile = Join-Path $RootDir '.env' +$EnvExample = Join-Path $RootDir '.env.example' +$InstallScript = Join-Path $RootDir 'install.win.ps1' +$Q8RelativePath = '.tmp/models/crossrepo/lmstudio-community__Qwen3.5-9B-GGUF/Qwen3.5-9B-Q8_0.gguf' +$MmprojRelativePath = '.tmp/models/crossrepo/lmstudio-community__Qwen3.5-9B-GGUF/mmproj-Qwen3.5-9B-BF16.gguf' +$Q8Url = 'https://huggingface.co/lmstudio-community/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q8_0.gguf' +$MmprojUrl = 'https://huggingface.co/lmstudio-community/Qwen3.5-9B-GGUF/resolve/main/mmproj-Qwen3.5-9B-BF16.gguf' + +function Write-Step { + param([string]$Message) + Write-Host "[install_q8] $Message" +} + +function Set-ProcessEnvValue { + param( + [string]$Key, + [string]$Value + ) + [Environment]::SetEnvironmentVariable($Key, $Value, 'Process') +} + +function Update-Q8Env { + Ensure-EnvFile -Path $EnvFile -TemplatePath $EnvExample + Set-EnvFileValue -Path $EnvFile -Key 'MODEL_PATH' -Value $Q8RelativePath + Set-EnvFileValue -Path $EnvFile -Key 'MMPROJ_PATH' -Value $MmprojRelativePath + Set-EnvFileValue -Path $EnvFile -Key 'MODEL_GGUF_URL' -Value $Q8Url + Set-EnvFileValue -Path $EnvFile -Key 'MODEL_MMPROJ_URL' -Value $MmprojUrl + Set-EnvFileValue -Path $EnvFile -Key 'MODEL_GGUF_SHA256' -Value '' + Set-EnvFileValue -Path $EnvFile -Key 'MODEL_MMPROJ_SHA256' -Value '' +} + +function Main { + if (-not (Test-Path $InstallScript)) { + throw "未找到安装脚本: $InstallScript" + } + Update-Q8Env + Set-ProcessEnvValue -Key 'MODEL_PATH' -Value $Q8RelativePath + Set-ProcessEnvValue -Key 'MMPROJ_PATH' -Value $MmprojRelativePath + Set-ProcessEnvValue -Key 'MODEL_GGUF_URL' -Value $Q8Url + Set-ProcessEnvValue -Key 'MODEL_MMPROJ_URL' -Value $MmprojUrl + Set-ProcessEnvValue -Key 'MODEL_GGUF_SHA256' -Value '' + Set-ProcessEnvValue -Key 'MODEL_MMPROJ_SHA256' -Value '' + Write-Step "已写入 .env: MODEL_PATH=$Q8RelativePath" + Write-Step '已切换到 Q8 量化下载源,开始执行 install.win.ps1' + & powershell.exe -NoProfile -ExecutionPolicy Bypass -File $InstallScript + if ($LASTEXITCODE -ne 0) { + throw "Q8 安装失败,exit code: $LASTEXITCODE" + } +} + +Main diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0df57f8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +fastapi==0.135.1 +uvicorn==0.41.0 +requests==2.32.5 +qwen-agent==0.0.34 +ddgs==9.11.1 +beautifulsoup4==4.14.3 +Pillow==11.3.0 +numpy==2.3.3 +soundfile==0.13.1 +python-dateutil==2.9.0.post0 diff --git a/run_8080_toolhub_gateway.py b/run_8080_toolhub_gateway.py new file mode 100644 index 0000000..895ea0f --- /dev/null +++ b/run_8080_toolhub_gateway.py @@ -0,0 +1,593 @@ +#!/usr/bin/env python3 +import argparse +import os +import threading +import time +from contextlib import asynccontextmanager +from dataclasses import dataclass +from typing import Any, Dict, List, Set, Tuple + +import requests +import uvicorn +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse, Response, StreamingResponse +from starlette.concurrency import run_in_threadpool + +from toolhub_gateway_agent import ( + build_non_stream_response, + run_chat_completion, + stream_chat_completion, +) + +DEFAULT_GATEWAY_HOST = '127.0.0.1' +DEFAULT_GATEWAY_PORT = 8080 +DEFAULT_BACKEND_BASE = 'http://127.0.0.1:8081' +DEFAULT_MODEL_SERVER = 'http://127.0.0.1:8081/v1' +DEFAULT_TIMEOUT_SEC = 180 +DEFAULT_BACKEND_WAIT_HINT = '' +DEFAULT_ACCESS_URLS = 'http://127.0.0.1:8080,http://localhost:8080' +READY_ANNOUNCE_INTERVAL_SEC = 2 +WAIT_LOG_INTERVAL_SEC = 10 +WARMUP_MESSAGE = '请只回复一个字:好' +WARMUP_PARSE_ERROR_MARKER = 'Failed to parse input' +STREAM_CHUNK_BYTES = 8192 +SUPPORTED_PROXY_METHODS = ['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'OPTIONS', 'HEAD'] +HOP_HEADERS = { + 'connection', + 'keep-alive', + 'proxy-authenticate', + 'proxy-authorization', + 'te', + 'trailers', + 'transfer-encoding', + 'upgrade', +} +LOCAL_CONFIG_KEY = 'LlamaCppWebui.config' +LOCAL_OVERRIDES_KEY = 'LlamaCppWebui.userOverrides' +WEBUI_SETTINGS_PATCH = f""" + + +""".strip() +BACKEND_LOADING_HTML = """ + + + + + + ToolHub 正在准备中 + + + +
+
+ +

ToolHub 正在准备中

+
+

网关已经启动,但模型后端暂时还没有就绪。

+

如果这是第一次启动,程序可能正在下载模型文件,或者正在把模型加载到 GPU。

+

页面会停留在这个等待界面里,并自动检查后端状态。准备完成后会自动进入聊天界面,不再整页反复刷新。

+

正在检查后端状态...

+

已等待 0 秒

+
+

如果你是刚在终端里执行了启动命令,最直接的进度信息通常就在那个终端窗口里。

+ __HINT_BLOCK__ +
+
+ 查看技术详情 +
__DETAIL__
+
+
+ + + +""".strip() + + +@dataclass(frozen=True) +class GatewayConfig: + backend_base: str + model_server: str + gateway_host: str + gateway_port: int + timeout_sec: int = DEFAULT_TIMEOUT_SEC + backend_wait_hint: str = DEFAULT_BACKEND_WAIT_HINT + access_urls: Tuple[str, ...] = () + + +@dataclass +class GatewayState: + ready_event: threading.Event + + +def parse_args() -> GatewayConfig: + parser = argparse.ArgumentParser(description='Run 8080 toolhub gateway with 8081 llama-server backend.') + parser.add_argument('--host', default=os.getenv('GATEWAY_HOST', DEFAULT_GATEWAY_HOST)) + parser.add_argument('--port', type=int, default=int(os.getenv('GATEWAY_PORT', str(DEFAULT_GATEWAY_PORT)))) + parser.add_argument('--backend-base', default=os.getenv('BACKEND_BASE', DEFAULT_BACKEND_BASE)) + parser.add_argument('--model-server', default=os.getenv('MODEL_SERVER', DEFAULT_MODEL_SERVER)) + parser.add_argument('--timeout-sec', type=int, default=int(os.getenv('GATEWAY_TIMEOUT_SEC', str(DEFAULT_TIMEOUT_SEC)))) + parser.add_argument('--backend-wait-hint', default=os.getenv('BACKEND_WAIT_HINT', DEFAULT_BACKEND_WAIT_HINT)) + parser.add_argument('--access-urls', default=os.getenv('ACCESS_URLS', DEFAULT_ACCESS_URLS)) + args = parser.parse_args() + return GatewayConfig( + backend_base=args.backend_base.rstrip('/'), + model_server=args.model_server.rstrip('/'), + gateway_host=args.host, + gateway_port=args.port, + timeout_sec=args.timeout_sec, + backend_wait_hint=args.backend_wait_hint.strip(), + access_urls=parse_access_urls(args.access_urls), + ) + + +def parse_access_urls(raw: str) -> Tuple[str, ...]: + urls = [item.strip() for item in raw.split(',') if item.strip()] + return tuple(dict.fromkeys(urls)) + + +def filtered_headers(headers: Dict[str, str]) -> Dict[str, str]: + blocked = HOP_HEADERS | {'host', 'content-length', 'proxy-connection'} + return {key: value for key, value in headers.items() if key.lower() not in blocked} + + +def drop_headers_ci(headers: Dict[str, str], names: Set[str]) -> Dict[str, str]: + lowered = {name.lower() for name in names} + return {key: value for key, value in headers.items() if key.lower() not in lowered} + + +def build_backend_url(base: str, path: str, query: str) -> str: + if not query: + return f'{base}{path}' + return f'{base}{path}?{query}' + + +def stream_upstream(upstream: requests.Response): + try: + for chunk in upstream.iter_content(chunk_size=STREAM_CHUNK_BYTES): + if chunk: + yield chunk + finally: + upstream.close() + + +def inject_webui_settings(html: str) -> str: + if WEBUI_SETTINGS_PATCH in html: + return html + if '' in html: + return html.replace('', f'\n{WEBUI_SETTINGS_PATCH}\n', 1) + if '' in html: + return html.replace('', f'\n{WEBUI_SETTINGS_PATCH}\n', 1) + return f'{WEBUI_SETTINGS_PATCH}\n{html}' + + +def build_backend_loading_response(detail: str, wait_hint: str) -> Response: + safe_detail = detail.replace('&', '&').replace('<', '<').replace('>', '>') + hint_block = '' + if wait_hint: + safe_hint = wait_hint.replace('&', '&').replace('<', '<').replace('>', '>') + hint_block = f'

如果你想单独查看后端准备进度,可以执行:
{safe_hint}

' + html = BACKEND_LOADING_HTML.replace('__DETAIL__', safe_detail).replace('__HINT_BLOCK__', hint_block) + return Response( + content=html, + status_code=200, + media_type='text/html; charset=utf-8', + headers={'Cache-Control': 'no-store, max-age=0'}, + ) + + +def is_root_request(request: Request, path: str) -> bool: + return request.method == 'GET' and path in {'/', '/index.html'} + + +def is_backend_wait_status(status_code: int) -> bool: + return status_code in {502, 503, 504} + + +def format_access_urls(access_urls: Tuple[str, ...]) -> str: + return ' '.join(access_urls) + + +def check_backend_ready(cfg: GatewayConfig) -> bool: + try: + response = requests.get(f'{cfg.backend_base}/health', timeout=cfg.timeout_sec) + response.raise_for_status() + except Exception: # noqa: BLE001 + return False + return True + + +def announce_access_urls(cfg: GatewayConfig) -> None: + if not cfg.access_urls: + return + print( + f'[toolhub-gateway] 网页入口已经开放,正在加载模型,完成后可访问: {format_access_urls(cfg.access_urls)}', + flush=True, + ) + + +def announce_backend_ready(cfg: GatewayConfig) -> None: + if not cfg.access_urls: + return + print( + f'[toolhub-gateway] 模型已完成加载和预热,可以打开: {format_access_urls(cfg.access_urls)}', + flush=True, + ) + + +def is_gateway_ready(state: GatewayState) -> bool: + return state.ready_event.is_set() + + +def warmup_model(cfg: GatewayConfig) -> Tuple[bool, str]: + payload = { + 'messages': [{'role': 'user', 'content': WARMUP_MESSAGE}], + 'max_tokens': 1, + 'stream': False, + 'temperature': 0, + } + try: + response = requests.post( + f'{cfg.model_server}/chat/completions', + json=payload, + timeout=cfg.timeout_sec, + ) + except Exception as exc: # noqa: BLE001 + return False, f'模型预热请求失败: {exc}' + if response.ok: + return True, '模型预热已完成' + body = response.text.strip() + if response.status_code == 500 and WARMUP_PARSE_ERROR_MARKER in body: + return True, '模型首轮预热已经完成' + return False, f'模型预热暂未完成: HTTP {response.status_code} {body[:200]}' + + +def run_ready_announcer(cfg: GatewayConfig, state: GatewayState) -> None: + last_wait_detail = '' + last_wait_log_at = 0.0 + announce_access_urls(cfg) + while True: + if check_backend_ready(cfg): + ready, wait_detail = warmup_model(cfg) + else: + ready, wait_detail = False, '后端健康检查尚未通过' + if ready: + state.ready_event.set() + announce_backend_ready(cfg) + return + now = time.monotonic() + if wait_detail != last_wait_detail or (now - last_wait_log_at) >= WAIT_LOG_INTERVAL_SEC: + print(f'[toolhub-gateway] 后端仍在准备中: {wait_detail}', flush=True) + last_wait_detail = wait_detail + last_wait_log_at = now + time.sleep(READY_ANNOUNCE_INTERVAL_SEC) + + +async def handle_gateway_health(cfg: GatewayConfig, state: GatewayState) -> Dict[str, Any]: + status = 'ok' if is_gateway_ready(state) else 'warming' + backend_error = '' + try: + health = requests.get(f'{cfg.backend_base}/health', timeout=cfg.timeout_sec) + health.raise_for_status() + except Exception as exc: # noqa: BLE001 + status = 'degraded' + backend_error = str(exc) + return {'status': status, 'backend_base': cfg.backend_base, 'backend_error': backend_error} + + +async def handle_chat_completions(request: Request, cfg: GatewayConfig) -> Response: + payload = await request.json() + stream = bool(payload.get('stream', False)) + if stream: + try: + iterator = stream_chat_completion(payload, cfg.model_server, cfg.timeout_sec) + except Exception as exc: # noqa: BLE001 + error = {'error': {'code': 500, 'type': 'gateway_error', 'message': str(exc)}} + return JSONResponse(status_code=500, content=error) + return StreamingResponse(iterator, media_type='text/event-stream') + + try: + result = await run_in_threadpool(run_chat_completion, payload, cfg.model_server, cfg.timeout_sec) + except Exception as exc: # noqa: BLE001 + error = {'error': {'code': 500, 'type': 'gateway_error', 'message': str(exc)}} + return JSONResponse(status_code=500, content=error) + + answer = result['answer'] + model = result['model'] + reasoning = result.get('reasoning', '') + return JSONResponse(content=build_non_stream_response(answer, model, reasoning)) + + +async def handle_proxy(request: Request, full_path: str, cfg: GatewayConfig, state: GatewayState) -> Response: + path = '/' + full_path + if is_root_request(request, path) and not is_gateway_ready(state): + return build_backend_loading_response('模型正在加载或预热,完成后会自动进入聊天界面。', cfg.backend_wait_hint) + url = build_backend_url(cfg.backend_base, path, request.url.query) + headers = filtered_headers(dict(request.headers)) + body = await request.body() + + try: + upstream = requests.request( + method=request.method, + url=url, + headers=headers, + data=body, + stream=True, + timeout=cfg.timeout_sec, + allow_redirects=False, + ) + except Exception as exc: # noqa: BLE001 + if is_root_request(request, path): + return build_backend_loading_response(str(exc), cfg.backend_wait_hint) + if request.method == 'GET' and path == '/favicon.ico': + return Response(status_code=204) + error = {'error': {'type': 'proxy_error', 'message': str(exc)}} + return JSONResponse(status_code=502, content=error) + + response_headers = filtered_headers(dict(upstream.headers)) + content_type = upstream.headers.get('content-type', '') + if is_root_request(request, path) and is_backend_wait_status(upstream.status_code): + detail = upstream.text.strip() or f'backend returned {upstream.status_code}' + upstream.close() + return build_backend_loading_response(detail, cfg.backend_wait_hint) + if request.method == 'GET' and path == '/favicon.ico' and is_backend_wait_status(upstream.status_code): + upstream.close() + return Response(status_code=204) + if 'text/event-stream' in content_type: + return StreamingResponse( + stream_upstream(upstream), + status_code=upstream.status_code, + headers=response_headers, + media_type='text/event-stream', + ) + + is_webui_html = ( + request.method == 'GET' + and path in {'/', '/index.html'} + and upstream.status_code == 200 + and 'text/html' in content_type + ) + if is_webui_html: + encoding = upstream.encoding or 'utf-8' + html = upstream.content.decode(encoding, errors='replace') + injected = inject_webui_settings(html) + upstream.close() + clean_headers = drop_headers_ci(response_headers, {'content-encoding', 'content-length', 'etag'}) + return Response( + content=injected.encode('utf-8'), + status_code=200, + headers=clean_headers, + media_type='text/html; charset=utf-8', + ) + + upstream.raw.decode_content = False + data = upstream.raw.read(decode_content=False) + upstream.close() + return Response(content=data, status_code=upstream.status_code, headers=response_headers) + + +def create_app(cfg: GatewayConfig, state: GatewayState) -> FastAPI: + @asynccontextmanager + async def lifespan(_: FastAPI): + threading.Thread(target=run_ready_announcer, args=(cfg, state), daemon=True).start() + yield + + app = FastAPI(title='Qwen3.5 ToolHub Gateway 8080', lifespan=lifespan) + + @app.get('/gateway/health') + async def gateway_health() -> Dict[str, Any]: + return await handle_gateway_health(cfg, state) + + @app.post('/v1/chat/completions') + async def chat_completions(request: Request) -> Response: + return await handle_chat_completions(request, cfg) + + @app.api_route('/{full_path:path}', methods=SUPPORTED_PROXY_METHODS) + async def proxy_all(request: Request, full_path: str) -> Response: + return await handle_proxy(request, full_path, cfg, state) + + return app + + +def main() -> None: + cfg = parse_args() + state = GatewayState(ready_event=threading.Event()) + app = create_app(cfg, state) + uvicorn.run(app, host=cfg.gateway_host, port=cfg.gateway_port, log_level='info') + + +if __name__ == '__main__': + main() diff --git a/start_8080_toolhub_stack.cmd b/start_8080_toolhub_stack.cmd new file mode 100644 index 0000000..f62cee9 --- /dev/null +++ b/start_8080_toolhub_stack.cmd @@ -0,0 +1,5 @@ +@echo off +setlocal +set SCRIPT_DIR=%~dp0 +powershell.exe -NoProfile -ExecutionPolicy Bypass -File "%SCRIPT_DIR%start_8080_toolhub_stack.ps1" %* +exit /b %ERRORLEVEL% diff --git a/start_8080_toolhub_stack.ps1 b/start_8080_toolhub_stack.ps1 new file mode 100644 index 0000000..fb88bda --- /dev/null +++ b/start_8080_toolhub_stack.ps1 @@ -0,0 +1,292 @@ +param( + [string]$Command = 'status' +) + +$ErrorActionPreference = 'Stop' +$ProgressPreference = 'SilentlyContinue' + +$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$RootDir = (Resolve-Path $ScriptDir).Path +$EnvConfig = Join-Path $RootDir 'env_config.ps1' +if (Test-Path $EnvConfig) { + . $EnvConfig + Import-EnvFile -Path (Join-Path $RootDir '.env') +} +$PythonBin = Join-Path $RootDir '.venv-qwen35\Scripts\python.exe' +$GatewayRun = Join-Path $RootDir 'run_8080_toolhub_gateway.py' +$RuntimeDir = Join-Path $RootDir '.tmp\toolhub_gateway' +$PidFile = Join-Path $RuntimeDir 'gateway.pid' +$LogFile = Join-Path $RuntimeDir 'gateway.log' +$ErrLogFile = Join-Path $RuntimeDir 'gateway.err.log' +$ModelSwitch = Join-Path $RootDir 'switch_qwen35_webui.ps1' + +$GatewayHost = if ($env:GATEWAY_HOST) { $env:GATEWAY_HOST } else { '127.0.0.1' } +$GatewayPort = if ($env:GATEWAY_PORT) { $env:GATEWAY_PORT } else { '8080' } +$BackendHost = if ($env:BACKEND_HOST) { $env:BACKEND_HOST } else { '127.0.0.1' } +$BackendPort = if ($env:BACKEND_PORT) { $env:BACKEND_PORT } else { '8081' } +$ThinkMode = if ($env:THINK_MODE) { $env:THINK_MODE } else { 'think-on' } +$BackendWaitHint = '.\start_8080_toolhub_stack.cmd logs' +$SpinnerFrameIntervalMs = 120 +$SpinnerProbeIntervalMs = 1000 + +function Ensure-Dir { + param([string]$Path) + if (-not (Test-Path $Path)) { + New-Item -Path $Path -ItemType Directory -Force | Out-Null + } +} + +function Test-GatewayRunning { + if (-not (Test-Path $PidFile)) { + return $false + } + $raw = Get-Content -Path $PidFile -ErrorAction SilentlyContinue | Select-Object -First 1 + $gatewayPid = 0 + if (-not [int]::TryParse([string]$raw, [ref]$gatewayPid)) { + return $false + } + $proc = Get-Process -Id $gatewayPid -ErrorAction SilentlyContinue + return $null -ne $proc +} + +function Test-GatewayReady { + try { + $null = Invoke-RestMethod -Uri "http://$GatewayHost`:$GatewayPort/gateway/health" -Method Get -TimeoutSec 2 + return $true + } catch { + return $false + } +} + +function Show-GatewayFailureLogs { + Write-Host '网关启动失败,最近日志如下:' + if (Test-Path $LogFile) { + Write-Host '=== 网关标准输出 ===' + Get-Content -Path $LogFile -Tail 120 -ErrorAction SilentlyContinue + } + if (Test-Path $ErrLogFile) { + Write-Host '=== 网关标准错误 ===' + Get-Content -Path $ErrLogFile -Tail 120 -ErrorAction SilentlyContinue + } +} + +function Write-SpinnerLine { + param( + [string]$Label, + [double]$Current, + [int]$Total, + [int]$Tick + ) + $frames = @('|', '/', '-', '\') + $frame = $frames[$Tick % $frames.Count] + $currentText = [string][int][Math]::Floor($Current) + Write-Host -NoNewline "`r$Label $frame $currentText/$Total 秒" +} + +function Complete-SpinnerLine { + Write-Host '' +} + +function Stop-OrphanGatewayProcesses { + try { + $rootPattern = [regex]::Escape($RootDir) + $targets = Get-CimInstance Win32_Process -Filter "Name='python.exe'" -ErrorAction SilentlyContinue | Where-Object { + $cmd = [string]$_.CommandLine + $cmd -match 'run_8080_toolhub_gateway\.py' -and $cmd -match $rootPattern + } + foreach ($proc in $targets) { + if ($proc.ProcessId) { + Stop-Process -Id ([int]$proc.ProcessId) -Force -ErrorAction SilentlyContinue + } + } + } catch {} +} + +function Start-Backend { + if ($env:MODEL_KEY -and $env:MODEL_KEY -ne '9b') { + throw "当前交付包仅支持 MODEL_KEY=9b,收到: $($env:MODEL_KEY)" + } + $oldHost = $env:HOST + $oldPort = $env:PORT + try { + $env:HOST = $BackendHost + $env:PORT = $BackendPort + & powershell.exe -NoProfile -ExecutionPolicy Bypass -File $ModelSwitch '9b' $ThinkMode + if ($LASTEXITCODE -ne 0) { + throw '后端启动失败,请先查看上面的直接原因' + } + } finally { + $env:HOST = $oldHost + $env:PORT = $oldPort + } +} + +function Start-Gateway { + Ensure-Dir $RuntimeDir + Stop-OrphanGatewayProcesses + if (Test-GatewayRunning) { + Write-Host '网关状态: 已运行' + Write-Host "PID: $(Get-Content -Path $PidFile)" + return + } + if (-not (Test-Path $PythonBin)) { + throw "Python 环境不存在: $PythonBin" + } + + $args = @( + $GatewayRun, + '--host', $GatewayHost, + '--port', $GatewayPort, + '--backend-base', "http://$BackendHost`:$BackendPort", + '--model-server', "http://$BackendHost`:$BackendPort/v1" + ) + if (Test-Path $ErrLogFile) { + Remove-Item -Path $ErrLogFile -Force -ErrorAction SilentlyContinue + } + $oldWaitHint = $env:BACKEND_WAIT_HINT + try { + $env:BACKEND_WAIT_HINT = $BackendWaitHint + $proc = Start-Process -FilePath $PythonBin -ArgumentList $args -WindowStyle Hidden -RedirectStandardOutput $LogFile -RedirectStandardError $ErrLogFile -PassThru + } finally { + $env:BACKEND_WAIT_HINT = $oldWaitHint + } + Set-Content -Path $PidFile -Value $proc.Id -Encoding ascii + + $timeoutSec = 60 + $stopwatch = [System.Diagnostics.Stopwatch]::StartNew() + $nextProbeMs = 0 + $tick = 0 + while ($stopwatch.Elapsed.TotalSeconds -lt $timeoutSec) { + Write-SpinnerLine -Label '网关启动中...' -Current $stopwatch.Elapsed.TotalSeconds -Total $timeoutSec -Tick $tick + if ($stopwatch.ElapsedMilliseconds -ge $nextProbeMs) { + if (-not (Test-GatewayRunning)) { + break + } + if (Test-GatewayReady) { + Complete-SpinnerLine + return + } + $nextProbeMs += $SpinnerProbeIntervalMs + } + Start-Sleep -Milliseconds $SpinnerFrameIntervalMs + $tick++ + } + Complete-SpinnerLine + + Show-GatewayFailureLogs + throw '网关启动失败。' +} + +function Stop-Gateway { + Stop-OrphanGatewayProcesses + if (-not (Test-GatewayRunning)) { + if (Test-Path $PidFile) { + Remove-Item -Path $PidFile -Force -ErrorAction SilentlyContinue + } + Write-Host '网关状态: 未运行' + return + } + + $gatewayPid = [int](Get-Content -Path $PidFile | Select-Object -First 1) + Stop-Process -Id $gatewayPid -Force -ErrorAction SilentlyContinue + Start-Sleep -Seconds 1 + if (Test-Path $PidFile) { + Remove-Item -Path $PidFile -Force -ErrorAction SilentlyContinue + } + Write-Host '网关状态: 已停止' +} + +function Show-Status { + Write-Host '=== 网关 ===' + if (Test-GatewayRunning) { + $state = if (Test-GatewayReady) { '可访问' } else { '初始化中' } + Write-Host '状态: 运行中' + Write-Host "PID: $(Get-Content -Path $PidFile)" + Write-Host "地址: http://$GatewayHost`:$GatewayPort" + Write-Host "健康: $state" + Write-Host "日志: $LogFile" + Write-Host "错误日志: $ErrLogFile" + } else { + Write-Host '状态: 未运行' + } + + Write-Host '' + Write-Host '=== 模型后端 ===' + $oldHost = $env:HOST + $oldPort = $env:PORT + try { + $env:HOST = $BackendHost + $env:PORT = $BackendPort + & powershell.exe -NoProfile -ExecutionPolicy Bypass -File $ModelSwitch 'status' + } finally { + $env:HOST = $oldHost + $env:PORT = $oldPort + } +} + +function Show-Logs { + Write-Host '=== 网关日志 ===' + if (Test-Path $LogFile) { + Get-Content -Path $LogFile -Tail 120 + } + if (Test-Path $ErrLogFile) { + Write-Host '=== 网关错误日志 ===' + Get-Content -Path $ErrLogFile -Tail 120 + return + } + Write-Host '暂无日志' +} + +function Stop-Backend { + $oldHost = $env:HOST + $oldPort = $env:PORT + try { + $env:HOST = $BackendHost + $env:PORT = $BackendPort + & powershell.exe -NoProfile -ExecutionPolicy Bypass -File $ModelSwitch 'stop' + } finally { + $env:HOST = $oldHost + $env:PORT = $oldPort + } +} + +function Start-Stack { + try { + Write-Host '步骤 1/2: 启动模型后端' + Start-Backend + Write-Host '步骤 2/2: 启动网关服务' + Start-Gateway + Write-Host '栈已启动' + Write-Host "网页入口: http://$GatewayHost`:$GatewayPort" + Write-Host '可用状态检查命令: .\start_8080_toolhub_stack.cmd status' + Write-Host '停止命令: .\start_8080_toolhub_stack.cmd stop' + } catch { + Write-Host $_.Exception.Message + exit 1 + } +} + +function Stop-Stack { + Stop-Gateway + Stop-Backend +} + +switch ($Command) { + 'start' { Start-Stack; break } + 'stop' { Stop-Stack; break } + 'restart' { Stop-Stack; Start-Stack; break } + 'status' { Show-Status; break } + 'logs' { Show-Logs; break } + default { + Write-Host '用法:' + Write-Host ' .\\start_8080_toolhub_stack.cmd {start|stop|restart|status|logs}' + Write-Host '' + Write-Host '可选环境变量:' + Write-Host ' GATEWAY_HOST=127.0.0.1' + Write-Host ' GATEWAY_PORT=8080' + Write-Host ' BACKEND_HOST=127.0.0.1' + Write-Host ' BACKEND_PORT=8081' + Write-Host ' THINK_MODE=think-on' + exit 1 + } +} diff --git a/start_8080_toolhub_stack.sh b/start_8080_toolhub_stack.sh new file mode 100644 index 0000000..34912c4 --- /dev/null +++ b/start_8080_toolhub_stack.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PS1_PATH="$ROOT_DIR/start_8080_toolhub_stack.ps1" + +print_usage() { + cat <<'USAGE' +用法: + ./start_8080_toolhub_stack.sh {start|stop|restart|status|logs} + +说明: + WSL 入口会直接复用 Windows 主脚本的完整启动链路。 + 包括后端 GPU 强校验与网关管理,行为与 cmd / PowerShell 保持一致。 +USAGE +} + +to_win_path_if_needed() { + local raw="$1" + if [[ -z "$raw" ]]; then + printf '' + return + fi + if [[ "$raw" == /* ]]; then + wslpath -w "$raw" + return + fi + printf '%s' "$raw" +} + +ps_escape_single_quotes() { + printf "%s" "$1" | sed "s/'/''/g" +} + +require_windows_power_shell() { + if ! command -v powershell.exe >/dev/null 2>&1; then + echo "未找到 powershell.exe,无法从 WSL 调用 Windows 栈脚本。" + exit 1 + fi + if [[ ! -f "$PS1_PATH" ]]; then + echo "缺少栈脚本: $PS1_PATH" + exit 1 + fi +} + +build_env_overrides() { + local -n out_ref=$1 + out_ref=() + + for key in GATEWAY_HOST GATEWAY_PORT BACKEND_HOST BACKEND_PORT THINK_MODE HOST PORT CTX_SIZE IMAGE_MIN_TOKENS IMAGE_MAX_TOKENS MMPROJ_OFFLOAD GPU_MEMORY_DELTA_MIN_MIB; do + if [[ -n "${!key-}" ]]; then + out_ref+=("$key=${!key}") + fi + done + + if [[ -n "${BIN_PATH-}" ]]; then + out_ref+=("BIN_PATH=$(to_win_path_if_needed "$BIN_PATH")") + fi + if [[ -n "${MODEL_PATH-}" ]]; then + out_ref+=("MODEL_PATH=$(to_win_path_if_needed "$MODEL_PATH")") + fi + if [[ -n "${MMPROJ_PATH-}" ]]; then + out_ref+=("MMPROJ_PATH=$(to_win_path_if_needed "$MMPROJ_PATH")") + fi +} + +build_ps_env_setup() { + local -n env_ref=$1 + local lines=() + local item key value escaped_value + for item in "${env_ref[@]}"; do + key="${item%%=*}" + value="${item#*=}" + escaped_value="$(ps_escape_single_quotes "$value")" + lines+=("[Environment]::SetEnvironmentVariable('$key', '$escaped_value', 'Process')") + done + printf '%s; ' "${lines[@]}" +} + +main() { + local command="${1:-status}" + case "$command" in + start|stop|restart|status|logs) ;; + *) + print_usage + exit 1 + ;; + esac + + require_windows_power_shell + + local ps1_win + ps1_win="$(wslpath -w "$PS1_PATH")" + + local env_overrides=() + build_env_overrides env_overrides + + local ps_command + local ps_env_setup + ps_env_setup="$(build_ps_env_setup env_overrides)" + ps_command="[Console]::InputEncoding = [System.Text.UTF8Encoding]::new(\$false); [Console]::OutputEncoding = [System.Text.UTF8Encoding]::new(\$false); chcp 65001 > \$null; ${ps_env_setup}& '$ps1_win' '$command'" + powershell.exe -NoProfile -ExecutionPolicy Bypass -Command "$ps_command" +} + +main "${1:-status}" diff --git a/switch_qwen35_webui.ps1 b/switch_qwen35_webui.ps1 new file mode 100644 index 0000000..283ba75 --- /dev/null +++ b/switch_qwen35_webui.ps1 @@ -0,0 +1,499 @@ +param( + [string]$Command = 'status', + [string]$ThinkMode = 'think-on' +) + +$ErrorActionPreference = 'Stop' +$ProgressPreference = 'SilentlyContinue' + +$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$RootDir = (Resolve-Path $ScriptDir).Path +$EnvConfig = Join-Path $RootDir 'env_config.ps1' +if (Test-Path $EnvConfig) { + . $EnvConfig + Import-EnvFile -Path (Join-Path $RootDir '.env') +} +$BinPath = if ($env:BIN_PATH) { $env:BIN_PATH } else { Join-Path $RootDir '.tmp\llama_win_cuda\llama-server.exe' } +$HostAddr = if ($env:HOST) { $env:HOST } else { '127.0.0.1' } +$PortNum = if ($env:PORT) { $env:PORT } else { '8081' } +$CtxSize = if ($env:CTX_SIZE) { $env:CTX_SIZE } else { '16384' } +$ImageMinTokens = if ($env:IMAGE_MIN_TOKENS) { $env:IMAGE_MIN_TOKENS } else { '256' } +$ImageMaxTokens = if ($env:IMAGE_MAX_TOKENS) { $env:IMAGE_MAX_TOKENS } else { '1024' } +$MmprojOffload = if ($env:MMPROJ_OFFLOAD) { $env:MMPROJ_OFFLOAD } else { 'off' } +$ModelPath = Resolve-ManagedPath -BaseDir $RootDir -Value $env:MODEL_PATH -DefaultRelativePath '.tmp\models\crossrepo\lmstudio-community__Qwen3.5-9B-GGUF\Qwen3.5-9B-Q4_K_M.gguf' +$MmprojPath = Resolve-ManagedPath -BaseDir $RootDir -Value $env:MMPROJ_PATH -DefaultRelativePath '.tmp\models\crossrepo\lmstudio-community__Qwen3.5-9B-GGUF\mmproj-Qwen3.5-9B-BF16.gguf' +$WebuiDir = Join-Path $RootDir '.tmp\webui' +$PidFile = Join-Path $WebuiDir 'llama_server.pid' +$CurrentLogFile = Join-Path $WebuiDir 'current.log' +$CurrentErrLogFile = Join-Path $WebuiDir 'current.err.log' +$GpuMemoryDeltaMinMiB = if ($env:GPU_MEMORY_DELTA_MIN_MIB) { $env:GPU_MEMORY_DELTA_MIN_MIB } else { '1024' } +$BackendReadyTimeoutSec = if ($env:BACKEND_READY_TIMEOUT_SEC) { $env:BACKEND_READY_TIMEOUT_SEC } else { '180' } +$GpuVerifyTimeoutSec = if ($env:GPU_VERIFY_TIMEOUT_SEC) { $env:GPU_VERIFY_TIMEOUT_SEC } else { '180' } +$SpinnerFrameIntervalMs = 120 +$SpinnerProbeIntervalMs = 1000 + +function Ensure-Dir { + param([string]$Path) + if (-not (Test-Path $Path)) { + New-Item -Path $Path -ItemType Directory -Force | Out-Null + } +} + +function Test-Health { + try { + $null = Invoke-RestMethod -Uri "http://$HostAddr`:$PortNum/health" -Method Get -TimeoutSec 2 + return $true + } catch { + return $false + } +} + +function Get-ModelId { + try { + $models = Invoke-RestMethod -Uri "http://$HostAddr`:$PortNum/v1/models" -Method Get -TimeoutSec 3 + if ($models.data -and $models.data.Count -gt 0) { + return [string]$models.data[0].id + } + return '' + } catch { + return '' + } +} + +function Write-SpinnerLine { + param( + [string]$Label, + [double]$Current, + [int]$Total, + [int]$Tick + ) + $frames = @('|', '/', '-', '\') + $frame = $frames[$Tick % $frames.Count] + $currentText = [string][int][Math]::Floor($Current) + Write-Host -NoNewline "`r$Label $frame $currentText/$Total 秒" +} + +function Complete-SpinnerLine { + Write-Host '' +} + +function Test-ProcessRunning { + param([int]$ProcessId) + try { + $null = Get-Process -Id $ProcessId -ErrorAction Stop + return $true + } catch { + return $false + } +} + +function Wait-Ready { + param([int]$ProcessId) + $timeoutSec = [int]$BackendReadyTimeoutSec + $stopwatch = [System.Diagnostics.Stopwatch]::StartNew() + $nextProbeMs = 0 + $tick = 0 + while ($stopwatch.Elapsed.TotalSeconds -lt $timeoutSec) { + Write-SpinnerLine -Label '后端加载中...' -Current $stopwatch.Elapsed.TotalSeconds -Total $timeoutSec -Tick $tick + if ($stopwatch.ElapsedMilliseconds -ge $nextProbeMs) { + if (-not (Test-ProcessRunning -ProcessId $ProcessId)) { + Complete-SpinnerLine + return @{ Ready = $false; Reason = 'llama-server 进程已提前退出' } + } + if (Test-Health) { + $modelId = Get-ModelId + if (-not [string]::IsNullOrWhiteSpace($modelId)) { + Complete-SpinnerLine + return @{ Ready = $true; Reason = "模型已就绪: $modelId" } + } + } + $nextProbeMs += $SpinnerProbeIntervalMs + } + Start-Sleep -Milliseconds $SpinnerFrameIntervalMs + $tick++ + } + Complete-SpinnerLine + return @{ Ready = $false; Reason = "后端在 $timeoutSec 秒内未就绪" } +} + +function Read-LogText { + param([string]$Path) + if (-not (Test-Path $Path)) { + return '' + } + try { + $lines = Get-Content -Path $Path -Tail 400 -ErrorAction SilentlyContinue + if ($null -eq $lines) { + return '' + } + return ($lines -join "`n") + } catch { + return '' + } +} + +function Show-RecentServerLogs { + param( + [string]$OutLogPath, + [string]$ErrLogPath + ) + Write-Host '后端启动失败,最近日志如下:' + if (Test-Path $OutLogPath) { + Write-Host '=== 标准输出 ===' + Get-Content -Path $OutLogPath -Tail 120 -ErrorAction SilentlyContinue + } + if (Test-Path $ErrLogPath) { + Write-Host '=== 标准错误 ===' + Get-Content -Path $ErrLogPath -Tail 120 -ErrorAction SilentlyContinue + } +} + +function Test-GpuReadyFromLogs { + param( + [string]$OutLogPath, + [string]$ErrLogPath + ) + $content = (Read-LogText -Path $OutLogPath) + "`n" + (Read-LogText -Path $ErrLogPath) + if ([string]::IsNullOrWhiteSpace($content)) { + return @{ Ready = $false; Reason = '日志为空' } + } + + $match = [regex]::Match($content, 'offloaded\s+(\d+)\/(\d+)\s+layers\s+to\s+GPU', [System.Text.RegularExpressions.RegexOptions]::IgnoreCase) + if ($match.Success) { + $offloaded = [int]$match.Groups[1].Value + $total = [int]$match.Groups[2].Value + if ($offloaded -gt 0) { + return @{ Ready = $true; Reason = "offloaded $offloaded/$total" } + } + return @{ Ready = $false; Reason = "offloaded 0/$total" } + } + + $cpuFallbackPattern = 'cuda[^`n]*failed|no cuda-capable device|unable to initialize cuda|using cpu' + if ($content -match $cpuFallbackPattern) { + return @{ Ready = $false; Reason = '检测到 CUDA 初始化失败或 CPU 回退' } + } + + return @{ Ready = $false; Reason = '未检测到 GPU 卸载证据' } +} + +function Ensure-GpuOffload { + param( + [int]$ProcessId, + [int]$BaselineMemoryMiB, + [string]$OutLogPath, + [string]$ErrLogPath + ) + $moduleResult = @{ Ready = $false; Reason = '未执行检查' } + $result = @{ Ready = $false; Reason = '未知原因' } + $nvidiaResult = @{ Ready = $false; Reason = '未执行检查' } + $timeoutSec = [int]$GpuVerifyTimeoutSec + $stopwatch = [System.Diagnostics.Stopwatch]::StartNew() + $nextProbeMs = 0 + $tick = 0 + while ($stopwatch.Elapsed.TotalSeconds -lt $timeoutSec) { + Write-SpinnerLine -Label 'GPU 校验中...' -Current $stopwatch.Elapsed.TotalSeconds -Total $timeoutSec -Tick $tick + if ($stopwatch.ElapsedMilliseconds -ge $nextProbeMs) { + if (-not (Test-ProcessRunning -ProcessId $ProcessId)) { + Complete-SpinnerLine + throw 'llama-server 在 GPU 校验期间提前退出' + } + $moduleResult = Test-CudaBackendLoaded -ProcessId $ProcessId + $result = Test-GpuReadyFromLogs -OutLogPath $OutLogPath -ErrLogPath $ErrLogPath + $nvidiaResult = Test-GpuReadyByNvidiaSmi -BaselineMemoryMiB $BaselineMemoryMiB + if ($moduleResult.Ready -and ($result.Ready -or $nvidiaResult.Ready)) { + Complete-SpinnerLine + if ($result.Ready) { + return "$($moduleResult.Reason);$($result.Reason)" + } + return "$($moduleResult.Reason);$($nvidiaResult.Reason)" + } + $nextProbeMs += $SpinnerProbeIntervalMs + } + Start-Sleep -Milliseconds $SpinnerFrameIntervalMs + $tick++ + } + Complete-SpinnerLine + throw "已禁止 CPU 回退,但未检测到 GPU 卸载。模块检查: $($moduleResult.Reason);nvidia-smi: $($nvidiaResult.Reason);日志检查: $($result.Reason)" +} + +function Test-CudaBackendLoaded { + param([int]$ProcessId) + try { + $mods = Get-Process -Id $ProcessId -Module -ErrorAction Stop + $cuda = $mods | Where-Object { $_.ModuleName -match '^ggml-cuda.*\.dll$' } | Select-Object -First 1 + if ($null -ne $cuda) { + return @{ Ready = $true; Reason = "检测到 $($cuda.ModuleName) 已加载" } + } + return @{ Ready = $false; Reason = '未检测到 ggml-cuda*.dll' } + } catch { + return @{ Ready = $false; Reason = '无法读取 llama-server 进程模块' } + } +} + +function Test-GpuReadyByNvidiaSmi { + param([int]$BaselineMemoryMiB) + $snapshot = Get-GpuMemoryUsedMiB + if (-not $snapshot.Ok) { + return @{ Ready = $false; Reason = $snapshot.Reason } + } + $delta = $snapshot.UsedMiB - $BaselineMemoryMiB + if ($snapshot.UsedMiB -gt 0 -and $delta -ge [int]$GpuMemoryDeltaMinMiB) { + return @{ Ready = $true; Reason = "nvidia-smi 显存占用 ${snapshot.UsedMiB}MiB,较基线增加 ${delta}MiB" } + } + return @{ Ready = $false; Reason = "显存占用 ${snapshot.UsedMiB}MiB,较基线增加 ${delta}MiB,阈值 ${GpuMemoryDeltaMinMiB}MiB" } +} + +function Get-GpuMemoryUsedMiB { + $nvidia = Get-Command nvidia-smi.exe -ErrorAction SilentlyContinue + if (-not $nvidia) { + $nvidia = Get-Command nvidia-smi -ErrorAction SilentlyContinue + } + if (-not $nvidia) { + return @{ Ok = $false; UsedMiB = 0; Reason = 'nvidia-smi 不可用' } + } + + $output = & $nvidia.Source '--query-gpu=memory.used' '--format=csv,noheader,nounits' 2>&1 + if ($LASTEXITCODE -ne 0) { + return @{ Ok = $false; UsedMiB = 0; Reason = 'nvidia-smi 执行失败' } + } + + $rows = @($output | ForEach-Object { "$_".Trim() } | Where-Object { $_ -match '^[0-9]+$' }) + if ($rows.Count -eq 0) { + return @{ Ok = $false; UsedMiB = 0; Reason = 'nvidia-smi 未返回显存数据' } + } + $maxUsed = 0 + foreach ($row in $rows) { + $memValue = 0 + if ([int]::TryParse($row, [ref]$memValue)) { + if ($memValue -gt $maxUsed) { + $maxUsed = $memValue + } + } + } + return @{ Ok = $true; UsedMiB = $maxUsed; Reason = 'ok' } +} + +function Get-StartupFailureReason { + param( + [string]$OutLogPath, + [string]$ErrLogPath + ) + $content = (Read-LogText -Path $OutLogPath) + "`n" + (Read-LogText -Path $ErrLogPath) + if ([string]::IsNullOrWhiteSpace($content)) { + return '' + } + $bindMatch = [regex]::Match($content, "couldn't bind HTTP server socket, hostname:\s*([^,]+), port:\s*([0-9]+)", [System.Text.RegularExpressions.RegexOptions]::IgnoreCase) + if ($bindMatch.Success) { + $busyPort = $bindMatch.Groups[2].Value + return "端口 $busyPort 已被占用,请先关闭占用该端口的服务,再重新启动" + } + return '' +} + +function Get-PortOwnerSummary { + param([string]$Port) + try { + $listeners = Get-NetTCPConnection -LocalPort ([int]$Port) -State Listen -ErrorAction SilentlyContinue + if (-not $listeners) { + return '' + } + $owners = @() + foreach ($listener in @($listeners | Select-Object -ExpandProperty OwningProcess -Unique)) { + $proc = Get-Process -Id $listener -ErrorAction SilentlyContinue + if ($proc) { + $owners += ('{0} (PID {1})' -f $proc.ProcessName, $proc.Id) + } else { + $owners += ('PID {0}' -f $listener) + } + } + return ($owners -join ', ') + } catch { + return '' + } +} + +function Stop-Server { + if (Test-Path $PidFile) { + $raw = Get-Content -Path $PidFile -ErrorAction SilentlyContinue | Select-Object -First 1 + $serverPid = 0 + if ([int]::TryParse([string]$raw, [ref]$serverPid) -and $serverPid -gt 0) { + try { + Stop-Process -Id $serverPid -Force -ErrorAction SilentlyContinue + } catch {} + } + } + + $procs = Get-Process -Name 'llama-server' -ErrorAction SilentlyContinue + if ($procs) { + $procs | Stop-Process -Force -ErrorAction SilentlyContinue + } + + if (Test-Path $PidFile) { + Remove-Item -Path $PidFile -Force -ErrorAction SilentlyContinue + } + if (Test-Path $CurrentErrLogFile) { + Remove-Item -Path $CurrentErrLogFile -Force -ErrorAction SilentlyContinue + } +} + +function Show-Status { + if (Test-Health) { + $modelId = Get-ModelId + if ([string]::IsNullOrWhiteSpace($modelId)) { + $modelId = 'loading' + } + Write-Host '状态: 运行中' + Write-Host "地址: http://$HostAddr`:$PortNum" + Write-Host "模型: $modelId" + if (Test-Path $CurrentLogFile) { + $p = Get-Content -Path $CurrentLogFile -ErrorAction SilentlyContinue | Select-Object -First 1 + if ($p) { + Write-Host "日志: $p" + } + } + if (Test-Path $CurrentErrLogFile) { + $ep = Get-Content -Path $CurrentErrLogFile -ErrorAction SilentlyContinue | Select-Object -First 1 + if ($ep) { + Write-Host "错误日志: $ep" + } + } + return + } + Write-Host '状态: 未运行' +} + +function Resolve-RuntimeProfile { + switch ($ThinkMode) { + 'think-on' { return @{ ReasoningBudget = '-1'; MaxTokens = '-1' } } + 'think-off' { return @{ ReasoningBudget = '0'; MaxTokens = '2048' } } + default { throw "不支持的思考模式: $ThinkMode" } + } +} + +function Validate-Limits { + if (($CtxSize -notmatch '^[0-9]+$') -or ($ImageMinTokens -notmatch '^[0-9]+$') -or ($ImageMaxTokens -notmatch '^[0-9]+$')) { + throw 'CTX_SIZE / IMAGE_MIN_TOKENS / IMAGE_MAX_TOKENS 必须是正整数' + } + if ([int]$CtxSize -le 0 -or [int]$ImageMinTokens -le 0 -or [int]$ImageMaxTokens -le 0) { + throw 'CTX_SIZE / IMAGE_MIN_TOKENS / IMAGE_MAX_TOKENS 必须大于 0' + } + if ([int]$ImageMinTokens -gt [int]$ImageMaxTokens) { + throw 'IMAGE_MIN_TOKENS 不能大于 IMAGE_MAX_TOKENS' + } + if ($MmprojOffload -ne 'on' -and $MmprojOffload -ne 'off') { + throw 'MMPROJ_OFFLOAD 仅支持 on 或 off' + } + if (($GpuMemoryDeltaMinMiB -notmatch '^[0-9]+$') -or [int]$GpuMemoryDeltaMinMiB -le 0) { + throw 'GPU_MEMORY_DELTA_MIN_MIB 必须是正整数' + } + if (($BackendReadyTimeoutSec -notmatch '^[0-9]+$') -or [int]$BackendReadyTimeoutSec -le 0) { + throw 'BACKEND_READY_TIMEOUT_SEC 必须是正整数' + } + if (($GpuVerifyTimeoutSec -notmatch '^[0-9]+$') -or [int]$GpuVerifyTimeoutSec -le 0) { + throw 'GPU_VERIFY_TIMEOUT_SEC 必须是正整数' + } +} + +function Start-Server { + if (-not (Test-Path $BinPath)) { + throw "llama-server.exe 不存在: $BinPath" + } + if (-not (Test-Path $ModelPath) -or -not (Test-Path $MmprojPath)) { + throw "模型文件不完整。`nMODEL_PATH=$ModelPath`nMMPROJ_PATH=$MmprojPath" + } + + Ensure-Dir $WebuiDir + Validate-Limits + $profile = Resolve-RuntimeProfile + Stop-Server + $portOwner = Get-PortOwnerSummary -Port $PortNum + if ($portOwner) { + throw "端口 $PortNum 已被占用: $portOwner" + } + + $args = @( + '-m', $ModelPath, + '-mm', $MmprojPath, + '--n-gpu-layers', 'all', + '--flash-attn', 'on', + '--fit', 'on', + '--fit-target', '256', + '--temp', '1.0', + '--top-p', '0.95', + '--top-k', '20', + '--min-p', '0.1', + '--presence-penalty', '1.5', + '--repeat-penalty', '1.05', + '-n', $profile.MaxTokens, + '--reasoning-budget', $profile.ReasoningBudget, + '-c', $CtxSize, + '--image-min-tokens', $ImageMinTokens, + '--image-max-tokens', $ImageMaxTokens, + '--host', $HostAddr, + '--port', $PortNum, + '--webui' + ) + + if ($MmprojOffload -eq 'off') { + $args += '--no-mmproj-offload' + } else { + $args += '--mmproj-offload' + } + + $logPath = Join-Path $WebuiDir ("llama_server_9b_{0}.log" -f (Get-Date -Format 'yyyyMMdd_HHmmss')) + $errLogPath = Join-Path $WebuiDir ("llama_server_9b_{0}.err.log" -f (Get-Date -Format 'yyyyMMdd_HHmmss')) + if (Test-Path $logPath) { + Remove-Item -Path $logPath -Force -ErrorAction SilentlyContinue + } + if (Test-Path $errLogPath) { + Remove-Item -Path $errLogPath -Force -ErrorAction SilentlyContinue + } + $baselineGpuMemoryMiB = 0 + $gpuBaseline = Get-GpuMemoryUsedMiB + if ($gpuBaseline.Ok) { + $baselineGpuMemoryMiB = [int]$gpuBaseline.UsedMiB + } + Write-Host '后端进程启动中,正在装载模型到 GPU...' + $proc = Start-Process -FilePath $BinPath -ArgumentList $args -WindowStyle Hidden -RedirectStandardOutput $logPath -RedirectStandardError $errLogPath -PassThru + Set-Content -Path $PidFile -Value $proc.Id -Encoding ascii + Set-Content -Path $CurrentLogFile -Value $logPath -Encoding utf8 + Set-Content -Path $CurrentErrLogFile -Value $errLogPath -Encoding utf8 + + $startupReady = $false + try { + $readyResult = Wait-Ready -ProcessId $proc.Id + if (-not $readyResult.Ready) { + $startupFailureReason = Get-StartupFailureReason -OutLogPath $logPath -ErrLogPath $errLogPath + if ($startupFailureReason) { + throw "服务启动失败: $startupFailureReason" + } + throw "服务启动失败: $($readyResult.Reason)" + } + $gpuInfo = Ensure-GpuOffload -ProcessId $proc.Id -BaselineMemoryMiB $baselineGpuMemoryMiB -OutLogPath $logPath -ErrLogPath $errLogPath + Write-Host "GPU 校验通过: $gpuInfo" + $startupReady = $true + } finally { + if (-not $startupReady) { + Show-RecentServerLogs -OutLogPath $logPath -ErrLogPath $errLogPath + Stop-Server + } + } + + Write-Host "已切换到 9b,思考模式: $ThinkMode" + Write-Host "地址: http://$HostAddr`:$PortNum" + Write-Host "视觉限制: image tokens $ImageMinTokens-$ImageMaxTokens, mmproj offload=$MmprojOffload, ctx=$CtxSize" + Show-Status +} + +switch ($Command) { + 'status' { Show-Status; break } + 'stop' { Stop-Server; Write-Host '服务已停止'; break } + '9b' { Start-Server; break } + default { + Write-Host '用法:' + Write-Host ' .\\switch_qwen35_webui.ps1 status' + Write-Host ' .\\switch_qwen35_webui.ps1 stop' + Write-Host ' .\\switch_qwen35_webui.ps1 9b [think-on|think-off]' + exit 1 + } +} diff --git a/switch_qwen35_webui.sh b/switch_qwen35_webui.sh new file mode 100644 index 0000000..4eb3906 --- /dev/null +++ b/switch_qwen35_webui.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PS1_PATH="$ROOT_DIR/switch_qwen35_webui.ps1" + +print_usage() { + cat <<'USAGE' +用法: + ./switch_qwen35_webui.sh status + ./switch_qwen35_webui.sh stop + ./switch_qwen35_webui.sh 9b [think-on|think-off] + +说明: + WSL 入口会直接复用 Windows 主脚本的 GPU 强校验逻辑。 + 若未成功加载到 GPU,脚本会直接失败,不会回退 CPU。 +USAGE +} + +to_win_path_if_needed() { + local raw="$1" + if [[ -z "$raw" ]]; then + printf '' + return + fi + if [[ "$raw" == /* ]]; then + wslpath -w "$raw" + return + fi + printf '%s' "$raw" +} + +require_windows_power_shell() { + if ! command -v powershell.exe >/dev/null 2>&1; then + echo "未找到 powershell.exe,WSL 模式无法调用 Windows 后端脚本。" + exit 1 + fi + if [[ ! -f "$PS1_PATH" ]]; then + echo "缺少后端脚本: $PS1_PATH" + exit 1 + fi +} + +build_env_overrides() { + local -n out_ref=$1 + out_ref=() + for key in HOST PORT CTX_SIZE IMAGE_MIN_TOKENS IMAGE_MAX_TOKENS MMPROJ_OFFLOAD GPU_MEMORY_DELTA_MIN_MIB; do + if [[ -n "${!key-}" ]]; then + out_ref+=("$key=${!key}") + fi + done + + if [[ -n "${BIN_PATH-}" ]]; then + out_ref+=("BIN_PATH=$(to_win_path_if_needed "$BIN_PATH")") + fi + if [[ -n "${MODEL_PATH-}" ]]; then + out_ref+=("MODEL_PATH=$(to_win_path_if_needed "$MODEL_PATH")") + fi + if [[ -n "${MMPROJ_PATH-}" ]]; then + out_ref+=("MMPROJ_PATH=$(to_win_path_if_needed "$MMPROJ_PATH")") + fi +} + +main() { + local command="${1:-status}" + local think_mode="${2:-think-on}" + + case "$command" in + status|stop) ;; + 9b) + case "$think_mode" in + think-on|think-off) ;; + *) + echo "不支持的思考模式: $think_mode" + exit 1 + ;; + esac + ;; + *) + print_usage + exit 1 + ;; + esac + + require_windows_power_shell + + local ps1_win + ps1_win="$(wslpath -w "$PS1_PATH")" + + local env_overrides=() + build_env_overrides env_overrides + + if [[ "$command" == "9b" ]]; then + env "${env_overrides[@]}" powershell.exe -NoProfile -ExecutionPolicy Bypass -File "$ps1_win" "$command" "$think_mode" + return + fi + + env "${env_overrides[@]}" powershell.exe -NoProfile -ExecutionPolicy Bypass -File "$ps1_win" "$command" +} + +main "${1:-status}" "${2:-think-on}" diff --git a/toolhub_gateway_agent.py b/toolhub_gateway_agent.py new file mode 100644 index 0000000..cde8ed2 --- /dev/null +++ b/toolhub_gateway_agent.py @@ -0,0 +1,446 @@ +import json +import os +import time +import datetime +import uuid +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Sequence, Union + +import requests +from qwen_agent.agents import Assistant +from qwen_agent.llm.schema import ContentItem, Message + +import agent_runtime # noqa: F401 +from agent_runtime import readonly_tools # noqa: F401 + +DEFAULT_SYSTEM_PROMPT = ( + '你是 Qwen3.5,本地部署的多模态中文助手。\n' + '默认中文回答。\n' + '当用户只是打招呼或闲聊时,自然回应即可,不要主动枚举全部工具。\n' + '你的目标是先使用可用工具获得可验证信息,再给出结论。\n' + '规则:\n' + '1. 对最新信息先用 web_search,再按需用 web_fetch 或 web_extractor 抓取正文。\n' + '2. 对人名、作品名、小众概念等不确定知识先 web_search,若结果歧义则改写关键词再检索一次。\n' + '3. 核心规则:你已具备 filesystem 的读写能力。你可以读取文件,如果用户有需求,你也可以调用 write_file 工具进行写入。\n' + '4. 图片问题先看整图,细节再用 image_zoom_in_tool,使用相对坐标。\n' + '5. 工具失败时必须明确说明原因,不得伪造结果。\n' + '6. 联网任务要控制上下文预算,优先少量高质量来源。\n' + '7. 严禁在未获授权的情况下使用 filesystem 工具查看助手自身的源代码或运行目录。\n' + '8. 联网任务要控制上下文预算,优先少量高质量来源,避免搬运大段无关正文。\n' + '9. 长期记忆(主动意识):你拥有 manage_memory 工具。当你从对话中识别出以下内容时,必须【主动】调用 add 操作:\n - 用户的明确偏好(如:喜欢 MD 格式、不喜欢繁琐说明)。\n - 重要的个人事实(如:职业、项目代号、系统配置路径)。\n - 约定的工作习惯(如:每段代码都要加注释)。\n 执行后,在回复中自然地告知用户“我已记下此习惯/信息”。当用户问“你了解我什么”或要求修改时,配合 list 和 delete 操作。\n' +) + +DEFAULT_FUNCTION_LIST = [ + 'web_search', + 'web_fetch', + 'web_extractor', + 'image_search', + 'image_zoom_in_tool', + 'filesystem', + 'manage_memory', # 👈 检查这里,确保引号和逗号都齐了 +] +TIMINGS_EMIT_INTERVAL_SEC = 0.8 +MAX_FALLBACK_PART_TEXT_CHARS = 512 + +# --- 注入逻辑开始 --- +def get_injected_memory() -> str: + """从环境变量指定的路径动态加载记忆""" + path_str = os.getenv('MEMORY_FILE_PATH', './memory.json') + memory_path = Path(path_str).resolve() + + if not memory_path.exists(): + return "" + try: + with open(memory_path, 'r', encoding='utf-8') as f: + memories = json.load(f) + if not isinstance(memories, list) or not memories: + return "" + memory_str = "\n".join([f"- {m}" for m in memories]) + return f"\n【长期记忆库(已自动加载)】:\n{memory_str}\n" + except Exception as e: + print(f"注入记忆失败: {e}") + return "" + +def fetch_model_id(model_server: str, timeout_sec: int) -> str: + response = requests.get(f'{model_server}/models', timeout=timeout_sec) + response.raise_for_status() + return response.json()['data'][0]['id'] + + +def _extract_image_uri(part: Dict[str, Any]) -> Optional[str]: + keys = ('image_url', 'image', 'url', 'input_image', 'image_uri') + for key in keys: + value = part.get(key) + if isinstance(value, str) and value.strip(): + return value.strip() + if isinstance(value, dict): + nested = value.get('url') or value.get('image_url') or value.get('image') + if isinstance(nested, str) and nested.strip(): + return nested.strip() + return None + + +def _build_compact_part_text(part: Dict[str, Any], part_type: Any) -> str: + part_keys = sorted(str(k) for k in part.keys()) + payload = {'type': str(part_type or 'unknown'), 'keys': part_keys[:12]} + text = part.get('text') + if isinstance(text, str) and text.strip(): + payload['text'] = text.strip()[:MAX_FALLBACK_PART_TEXT_CHARS] + return json.dumps(payload, ensure_ascii=False) + + +def extract_generate_cfg(payload: Dict[str, Any]) -> Dict[str, Any]: + cfg: Dict[str, Any] = {} + keys = ('temperature', 'top_p', 'top_k', 'presence_penalty', 'frequency_penalty') + for key in keys: + value = payload.get(key) + if value is not None: + cfg[key] = value + repeat_penalty = payload.get('repeat_penalty') + if repeat_penalty is not None: + cfg['repetition_penalty'] = repeat_penalty + extra_body = payload.get('extra_body') + if not isinstance(extra_body, dict): + extra_body = {} + chat_template_kwargs = extra_body.get('chat_template_kwargs') + if not isinstance(chat_template_kwargs, dict): + chat_template_kwargs = {} + # 默认开启思考,若上层显式传入 false 则保持用户值。 + chat_template_kwargs.setdefault('enable_thinking', True) + extra_body['chat_template_kwargs'] = chat_template_kwargs + + requested_reasoning_format = payload.get('reasoning_format') + if isinstance(requested_reasoning_format, str) and requested_reasoning_format.strip(): + extra_body.setdefault('reasoning_format', requested_reasoning_format.strip()) + else: + extra_body.setdefault('reasoning_format', 'deepseek') + extra_body.setdefault('reasoning_budget', -1) + cfg['extra_body'] = extra_body + max_tokens = payload.get('max_tokens') + if isinstance(max_tokens, int) and max_tokens > 0: + cfg['max_tokens'] = max_tokens + if not cfg: + cfg = {'temperature': 0.7, 'top_p': 0.9, 'max_tokens': 512} + return cfg + + +def build_agent( + model_server: str, + timeout_sec: int, + generate_cfg: Dict[str, Any], + model_id: Optional[str] = None, + system_prompt: str = DEFAULT_SYSTEM_PROMPT, +) -> Assistant: + if not model_id: + model_id = fetch_model_id(model_server, timeout_sec) + llm_cfg = { + 'model': model_id, + 'model_server': model_server, + 'api_key': os.getenv('OPENAI_API_KEY', 'EMPTY'), + 'model_type': 'qwenvl_oai', + 'generate_cfg': generate_cfg, + } + + # === 核心改造 1:动态组装功能列表 === + actual_function_list = list(DEFAULT_FUNCTION_LIST) + if os.getenv('ENABLE_FILE_WRITE', 'False').lower() == 'true': + import agent_runtime.write_tools # 触发 register_tool 注册 + actual_function_list.append('write_file') + + # === 核心改造 2:动态注入记忆与实时时间 === + # 1. 先从物理文件加载记忆 + persistent_memory = get_injected_memory() + + # 2. 获取实时时间 + now = datetime.datetime.now() + current_time = now.strftime("%Y年%m月%d日 %H:%M:%S") + weekdays = ["一", "二", "三", "四", "五", "六", "日"] + dynamic_context = f"【系统实时状态】\n当前时间:{current_time},星期{weekdays[now.weekday()]}。\n" + + # 3. 按照:时间 -> 长期记忆 -> 原始指令 的顺序拼接 + actual_system_prompt = dynamic_context + persistent_memory + system_prompt + + return Assistant( + name='Qwen3.5-9B-ToolHub-8080', + description='8080 网页工具代理', + llm=llm_cfg, + function_list=actual_function_list, + system_message=actual_system_prompt, + ) + +def to_content_items(content: Any) -> Union[str, List[ContentItem]]: + if isinstance(content, str): + return content + if not isinstance(content, list): + return str(content) + + items: List[ContentItem] = [] + for part in content: + if not isinstance(part, dict): + items.append(ContentItem(text=str(part))) + continue + part_type = part.get('type') + if part_type in (None, 'text', 'input_text'): + text = part.get('text', '') + if text: + items.append(ContentItem(text=str(text))) + continue + image_uri = _extract_image_uri(part) + if image_uri: + items.append(ContentItem(image=image_uri)) + continue + items.append(ContentItem(text=_build_compact_part_text(part, part_type))) + return items if items else '' + + +def to_qwen_messages(openai_messages: Sequence[Dict[str, Any]]) -> List[Message]: + qwen_messages: List[Message] = [] + for item in openai_messages: + role = str(item.get('role', '')).strip() + if role not in {'system', 'user', 'assistant'}: + continue + qwen_messages.append(Message(role=role, content=to_content_items(item.get('content', '')))) + if not qwen_messages: + raise ValueError('messages 为空或不包含可用角色') + return qwen_messages + + +def content_to_text(content: Any) -> str: + if isinstance(content, str): + return content + if not isinstance(content, list): + return str(content) + + texts: List[str] = [] + for item in content: + if isinstance(item, str): + texts.append(item) + continue + if isinstance(item, dict) and item.get('text'): + texts.append(str(item['text'])) + continue + text = getattr(item, 'text', None) + if text: + texts.append(str(text)) + return '\n'.join(texts).strip() + + +def extract_answer_and_reasoning(messages: Sequence[Message]) -> Dict[str, str]: + answer = '' + reasoning_parts: List[str] = [] + for message in messages: + if getattr(message, 'role', '') != 'assistant': + continue + content_text = content_to_text(message.get('content', '')) + if content_text: + answer = content_text + reasoning_text = content_to_text(message.get('reasoning_content', '')) + if reasoning_text: + reasoning_parts.append(reasoning_text) + return {'answer': answer, 'reasoning': '\n'.join(reasoning_parts).strip()} + + +def run_chat_completion(payload: Dict[str, Any], model_server: str, timeout_sec: int) -> Dict[str, str]: + openai_messages = payload.get('messages') + if not isinstance(openai_messages, list): + raise ValueError('messages 必须是数组') + + model_id = fetch_model_id(model_server, timeout_sec) + agent = build_agent(model_server, timeout_sec, extract_generate_cfg(payload), model_id=model_id) + qwen_messages = to_qwen_messages(openai_messages) + final_batch = None + for batch in agent.run(qwen_messages): + final_batch = batch + if not final_batch: + raise RuntimeError('未收到模型输出') + + texts = extract_answer_and_reasoning(final_batch) + answer = texts['answer'] + reasoning = texts['reasoning'] + return {'model': model_id, 'answer': answer, 'reasoning': reasoning} + + +def build_sse_chunk( + chat_id: str, + created: int, + model: str, + delta: Dict[str, Any], + finish_reason: Optional[str] = None, + timings: Optional[Dict[str, Any]] = None, +) -> bytes: + chunk = { + 'id': chat_id, + 'object': 'chat.completion.chunk', + 'created': created, + 'model': model, + 'choices': [{'index': 0, 'delta': delta, 'finish_reason': finish_reason}], + } + if timings: + chunk['timings'] = timings + return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n".encode('utf-8') + + +def text_delta(previous: str, current: str) -> str: + if not current: + return '' + if current.startswith(previous): + return current[len(previous):] + return current + + +def model_base_url(model_server: str) -> str: + if model_server.endswith('/v1'): + return model_server[:-3] + return model_server.rstrip('/') + + +def count_text_tokens(model_server: str, timeout_sec: int, text: str) -> int: + if not text: + return 0 + url = f'{model_base_url(model_server)}/tokenize' + response = requests.post(url, json={'content': text}, timeout=timeout_sec) + response.raise_for_status() + data = response.json() + tokens = data.get('tokens') + if not isinstance(tokens, list): + raise ValueError('tokenize 返回格式异常') + return len(tokens) + + +def build_live_timings(token_count: int, elapsed_sec: float) -> Dict[str, Any]: + safe_elapsed = elapsed_sec if elapsed_sec > 0 else 1e-6 + return { + 'prompt_n': 0, + 'prompt_ms': 0, + 'predicted_n': token_count, + 'predicted_ms': safe_elapsed * 1000.0, + 'predicted_per_second': token_count / safe_elapsed, + 'cache_n': 0, + } + + +def merge_generated_text(reasoning: str, answer: str) -> str: + if reasoning and answer: + return f'{reasoning}\n{answer}' + return reasoning or answer + + +def stream_chat_completion(payload: Dict[str, Any], model_server: str, timeout_sec: int) -> Iterable[bytes]: + openai_messages = payload.get('messages') + if not isinstance(openai_messages, list): + raise ValueError('messages 必须是数组') + + model_id = fetch_model_id(model_server, timeout_sec) + agent = build_agent(model_server, timeout_sec, extract_generate_cfg(payload), model_id=model_id) + qwen_messages = to_qwen_messages(openai_messages) + + now = int(time.time()) + chat_id = f'chatcmpl-{uuid.uuid4().hex}' + yield build_sse_chunk(chat_id, now, model_id, {'role': 'assistant'}) + + previous_answer = '' + previous_reasoning = '' + started_at = time.perf_counter() + last_timing_at = started_at + last_reported_tokens = -1 + last_counted_text = '' + for batch in agent.run(qwen_messages): + texts = extract_answer_and_reasoning(batch) + answer = texts['answer'] + reasoning = texts['reasoning'] + + reasoning_inc = text_delta(previous_reasoning, reasoning) + if reasoning_inc: + yield build_sse_chunk(chat_id, now, model_id, {'reasoning_content': reasoning_inc}) + + answer_inc = text_delta(previous_answer, answer) + if answer_inc: + yield build_sse_chunk(chat_id, now, model_id, {'content': answer_inc}) + + generated_text = merge_generated_text(reasoning, answer) + current_time = time.perf_counter() + should_emit_timing = ( + generated_text + and generated_text != last_counted_text + and (current_time - last_timing_at) >= TIMINGS_EMIT_INTERVAL_SEC + ) + if should_emit_timing: + token_count = count_text_tokens(model_server, timeout_sec, generated_text) + if token_count != last_reported_tokens: + timings = build_live_timings(token_count, current_time - started_at) + yield build_sse_chunk(chat_id, now, model_id, {}, timings=timings) + last_reported_tokens = token_count + last_counted_text = generated_text + last_timing_at = current_time + + previous_reasoning = reasoning + previous_answer = answer + + final_generated_text = merge_generated_text(previous_reasoning, previous_answer) + if final_generated_text and final_generated_text != last_counted_text: + final_time = time.perf_counter() + token_count = count_text_tokens(model_server, timeout_sec, final_generated_text) + if token_count != last_reported_tokens: + timings = build_live_timings(token_count, final_time - started_at) + yield build_sse_chunk(chat_id, now, model_id, {}, timings=timings) + + yield build_sse_chunk(chat_id, now, model_id, {}, 'stop') + yield b'data: [DONE]\n\n' + + +def build_non_stream_response(answer: str, model: str, reasoning: str = '') -> Dict[str, Any]: + now = int(time.time()) + message = {'role': 'assistant', 'content': answer} + if reasoning: + message['reasoning_content'] = reasoning + return { + 'id': f'chatcmpl-{uuid.uuid4().hex}', + 'object': 'chat.completion', + 'created': now, + 'model': model, + 'choices': [{ + 'index': 0, + 'message': message, + 'finish_reason': 'stop', + }], + 'usage': {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0}, + } + + +def sse_lines(answer: str, model: str, reasoning: str = '') -> Iterable[bytes]: + now = int(time.time()) + chat_id = f'chatcmpl-{uuid.uuid4().hex}' + chunks = [ + { + 'id': chat_id, + 'object': 'chat.completion.chunk', + 'created': now, + 'model': model, + 'choices': [{'index': 0, 'delta': {'role': 'assistant'}, 'finish_reason': None}], + }, + ] + if reasoning: + chunks.append({ + 'id': chat_id, + 'object': 'chat.completion.chunk', + 'created': now, + 'model': model, + 'choices': [{'index': 0, 'delta': {'reasoning_content': reasoning}, 'finish_reason': None}], + }) + chunks.append({ + 'id': chat_id, + 'object': 'chat.completion.chunk', + 'created': now, + 'model': model, + 'choices': [{'index': 0, 'delta': {'content': answer}, 'finish_reason': None}], + }) + chunks.append({ + 'id': chat_id, + 'object': 'chat.completion.chunk', + 'created': now, + 'model': model, + 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}], + }) + for chunk in chunks: + yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n".encode('utf-8') + yield b'data: [DONE]\n\n'