Files
Qwen3.5-9B-ToolHub-Enhanced…/switch_qwen35_webui.ps1
2026-03-11 16:49:00 +08:00

500 lines
18 KiB
PowerShell
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
param(
[string]$Command = 'status',
[string]$ThinkMode = 'think-on'
)
$ErrorActionPreference = 'Stop'
$ProgressPreference = 'SilentlyContinue'
$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
$RootDir = (Resolve-Path $ScriptDir).Path
$EnvConfig = Join-Path $RootDir 'env_config.ps1'
if (Test-Path $EnvConfig) {
. $EnvConfig
Import-EnvFile -Path (Join-Path $RootDir '.env')
}
$BinPath = if ($env:BIN_PATH) { $env:BIN_PATH } else { Join-Path $RootDir '.tmp\llama_win_cuda\llama-server.exe' }
$HostAddr = if ($env:HOST) { $env:HOST } else { '127.0.0.1' }
$PortNum = if ($env:PORT) { $env:PORT } else { '8081' }
$CtxSize = if ($env:CTX_SIZE) { $env:CTX_SIZE } else { '16384' }
$ImageMinTokens = if ($env:IMAGE_MIN_TOKENS) { $env:IMAGE_MIN_TOKENS } else { '256' }
$ImageMaxTokens = if ($env:IMAGE_MAX_TOKENS) { $env:IMAGE_MAX_TOKENS } else { '1024' }
$MmprojOffload = if ($env:MMPROJ_OFFLOAD) { $env:MMPROJ_OFFLOAD } else { 'off' }
$ModelPath = Resolve-ManagedPath -BaseDir $RootDir -Value $env:MODEL_PATH -DefaultRelativePath '.tmp\models\crossrepo\lmstudio-community__Qwen3.5-9B-GGUF\Qwen3.5-9B-Q4_K_M.gguf'
$MmprojPath = Resolve-ManagedPath -BaseDir $RootDir -Value $env:MMPROJ_PATH -DefaultRelativePath '.tmp\models\crossrepo\lmstudio-community__Qwen3.5-9B-GGUF\mmproj-Qwen3.5-9B-BF16.gguf'
$WebuiDir = Join-Path $RootDir '.tmp\webui'
$PidFile = Join-Path $WebuiDir 'llama_server.pid'
$CurrentLogFile = Join-Path $WebuiDir 'current.log'
$CurrentErrLogFile = Join-Path $WebuiDir 'current.err.log'
$GpuMemoryDeltaMinMiB = if ($env:GPU_MEMORY_DELTA_MIN_MIB) { $env:GPU_MEMORY_DELTA_MIN_MIB } else { '1024' }
$BackendReadyTimeoutSec = if ($env:BACKEND_READY_TIMEOUT_SEC) { $env:BACKEND_READY_TIMEOUT_SEC } else { '180' }
$GpuVerifyTimeoutSec = if ($env:GPU_VERIFY_TIMEOUT_SEC) { $env:GPU_VERIFY_TIMEOUT_SEC } else { '180' }
$SpinnerFrameIntervalMs = 120
$SpinnerProbeIntervalMs = 1000
function Ensure-Dir {
param([string]$Path)
if (-not (Test-Path $Path)) {
New-Item -Path $Path -ItemType Directory -Force | Out-Null
}
}
function Test-Health {
try {
$null = Invoke-RestMethod -Uri "http://$HostAddr`:$PortNum/health" -Method Get -TimeoutSec 2
return $true
} catch {
return $false
}
}
function Get-ModelId {
try {
$models = Invoke-RestMethod -Uri "http://$HostAddr`:$PortNum/v1/models" -Method Get -TimeoutSec 3
if ($models.data -and $models.data.Count -gt 0) {
return [string]$models.data[0].id
}
return ''
} catch {
return ''
}
}
function Write-SpinnerLine {
param(
[string]$Label,
[double]$Current,
[int]$Total,
[int]$Tick
)
$frames = @('|', '/', '-', '\')
$frame = $frames[$Tick % $frames.Count]
$currentText = [string][int][Math]::Floor($Current)
Write-Host -NoNewline "`r$Label $frame $currentText/$Total"
}
function Complete-SpinnerLine {
Write-Host ''
}
function Test-ProcessRunning {
param([int]$ProcessId)
try {
$null = Get-Process -Id $ProcessId -ErrorAction Stop
return $true
} catch {
return $false
}
}
function Wait-Ready {
param([int]$ProcessId)
$timeoutSec = [int]$BackendReadyTimeoutSec
$stopwatch = [System.Diagnostics.Stopwatch]::StartNew()
$nextProbeMs = 0
$tick = 0
while ($stopwatch.Elapsed.TotalSeconds -lt $timeoutSec) {
Write-SpinnerLine -Label '后端加载中...' -Current $stopwatch.Elapsed.TotalSeconds -Total $timeoutSec -Tick $tick
if ($stopwatch.ElapsedMilliseconds -ge $nextProbeMs) {
if (-not (Test-ProcessRunning -ProcessId $ProcessId)) {
Complete-SpinnerLine
return @{ Ready = $false; Reason = 'llama-server 进程已提前退出' }
}
if (Test-Health) {
$modelId = Get-ModelId
if (-not [string]::IsNullOrWhiteSpace($modelId)) {
Complete-SpinnerLine
return @{ Ready = $true; Reason = "模型已就绪: $modelId" }
}
}
$nextProbeMs += $SpinnerProbeIntervalMs
}
Start-Sleep -Milliseconds $SpinnerFrameIntervalMs
$tick++
}
Complete-SpinnerLine
return @{ Ready = $false; Reason = "后端在 $timeoutSec 秒内未就绪" }
}
function Read-LogText {
param([string]$Path)
if (-not (Test-Path $Path)) {
return ''
}
try {
$lines = Get-Content -Path $Path -Tail 400 -ErrorAction SilentlyContinue
if ($null -eq $lines) {
return ''
}
return ($lines -join "`n")
} catch {
return ''
}
}
function Show-RecentServerLogs {
param(
[string]$OutLogPath,
[string]$ErrLogPath
)
Write-Host '后端启动失败,最近日志如下:'
if (Test-Path $OutLogPath) {
Write-Host '=== 标准输出 ==='
Get-Content -Path $OutLogPath -Tail 120 -ErrorAction SilentlyContinue
}
if (Test-Path $ErrLogPath) {
Write-Host '=== 标准错误 ==='
Get-Content -Path $ErrLogPath -Tail 120 -ErrorAction SilentlyContinue
}
}
function Test-GpuReadyFromLogs {
param(
[string]$OutLogPath,
[string]$ErrLogPath
)
$content = (Read-LogText -Path $OutLogPath) + "`n" + (Read-LogText -Path $ErrLogPath)
if ([string]::IsNullOrWhiteSpace($content)) {
return @{ Ready = $false; Reason = '日志为空' }
}
$match = [regex]::Match($content, 'offloaded\s+(\d+)\/(\d+)\s+layers\s+to\s+GPU', [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
if ($match.Success) {
$offloaded = [int]$match.Groups[1].Value
$total = [int]$match.Groups[2].Value
if ($offloaded -gt 0) {
return @{ Ready = $true; Reason = "offloaded $offloaded/$total" }
}
return @{ Ready = $false; Reason = "offloaded 0/$total" }
}
$cpuFallbackPattern = 'cuda[^`n]*failed|no cuda-capable device|unable to initialize cuda|using cpu'
if ($content -match $cpuFallbackPattern) {
return @{ Ready = $false; Reason = '检测到 CUDA 初始化失败或 CPU 回退' }
}
return @{ Ready = $false; Reason = '未检测到 GPU 卸载证据' }
}
function Ensure-GpuOffload {
param(
[int]$ProcessId,
[int]$BaselineMemoryMiB,
[string]$OutLogPath,
[string]$ErrLogPath
)
$moduleResult = @{ Ready = $false; Reason = '未执行检查' }
$result = @{ Ready = $false; Reason = '未知原因' }
$nvidiaResult = @{ Ready = $false; Reason = '未执行检查' }
$timeoutSec = [int]$GpuVerifyTimeoutSec
$stopwatch = [System.Diagnostics.Stopwatch]::StartNew()
$nextProbeMs = 0
$tick = 0
while ($stopwatch.Elapsed.TotalSeconds -lt $timeoutSec) {
Write-SpinnerLine -Label 'GPU 校验中...' -Current $stopwatch.Elapsed.TotalSeconds -Total $timeoutSec -Tick $tick
if ($stopwatch.ElapsedMilliseconds -ge $nextProbeMs) {
if (-not (Test-ProcessRunning -ProcessId $ProcessId)) {
Complete-SpinnerLine
throw 'llama-server 在 GPU 校验期间提前退出'
}
$moduleResult = Test-CudaBackendLoaded -ProcessId $ProcessId
$result = Test-GpuReadyFromLogs -OutLogPath $OutLogPath -ErrLogPath $ErrLogPath
$nvidiaResult = Test-GpuReadyByNvidiaSmi -BaselineMemoryMiB $BaselineMemoryMiB
if ($moduleResult.Ready -and ($result.Ready -or $nvidiaResult.Ready)) {
Complete-SpinnerLine
if ($result.Ready) {
return "$($moduleResult.Reason)$($result.Reason)"
}
return "$($moduleResult.Reason)$($nvidiaResult.Reason)"
}
$nextProbeMs += $SpinnerProbeIntervalMs
}
Start-Sleep -Milliseconds $SpinnerFrameIntervalMs
$tick++
}
Complete-SpinnerLine
throw "已禁止 CPU 回退,但未检测到 GPU 卸载。模块检查: $($moduleResult.Reason)nvidia-smi: $($nvidiaResult.Reason);日志检查: $($result.Reason)"
}
function Test-CudaBackendLoaded {
param([int]$ProcessId)
try {
$mods = Get-Process -Id $ProcessId -Module -ErrorAction Stop
$cuda = $mods | Where-Object { $_.ModuleName -match '^ggml-cuda.*\.dll$' } | Select-Object -First 1
if ($null -ne $cuda) {
return @{ Ready = $true; Reason = "检测到 $($cuda.ModuleName) 已加载" }
}
return @{ Ready = $false; Reason = '未检测到 ggml-cuda*.dll' }
} catch {
return @{ Ready = $false; Reason = '无法读取 llama-server 进程模块' }
}
}
function Test-GpuReadyByNvidiaSmi {
param([int]$BaselineMemoryMiB)
$snapshot = Get-GpuMemoryUsedMiB
if (-not $snapshot.Ok) {
return @{ Ready = $false; Reason = $snapshot.Reason }
}
$delta = $snapshot.UsedMiB - $BaselineMemoryMiB
if ($snapshot.UsedMiB -gt 0 -and $delta -ge [int]$GpuMemoryDeltaMinMiB) {
return @{ Ready = $true; Reason = "nvidia-smi 显存占用 ${snapshot.UsedMiB}MiB较基线增加 ${delta}MiB" }
}
return @{ Ready = $false; Reason = "显存占用 ${snapshot.UsedMiB}MiB较基线增加 ${delta}MiB阈值 ${GpuMemoryDeltaMinMiB}MiB" }
}
function Get-GpuMemoryUsedMiB {
$nvidia = Get-Command nvidia-smi.exe -ErrorAction SilentlyContinue
if (-not $nvidia) {
$nvidia = Get-Command nvidia-smi -ErrorAction SilentlyContinue
}
if (-not $nvidia) {
return @{ Ok = $false; UsedMiB = 0; Reason = 'nvidia-smi 不可用' }
}
$output = & $nvidia.Source '--query-gpu=memory.used' '--format=csv,noheader,nounits' 2>&1
if ($LASTEXITCODE -ne 0) {
return @{ Ok = $false; UsedMiB = 0; Reason = 'nvidia-smi 执行失败' }
}
$rows = @($output | ForEach-Object { "$_".Trim() } | Where-Object { $_ -match '^[0-9]+$' })
if ($rows.Count -eq 0) {
return @{ Ok = $false; UsedMiB = 0; Reason = 'nvidia-smi 未返回显存数据' }
}
$maxUsed = 0
foreach ($row in $rows) {
$memValue = 0
if ([int]::TryParse($row, [ref]$memValue)) {
if ($memValue -gt $maxUsed) {
$maxUsed = $memValue
}
}
}
return @{ Ok = $true; UsedMiB = $maxUsed; Reason = 'ok' }
}
function Get-StartupFailureReason {
param(
[string]$OutLogPath,
[string]$ErrLogPath
)
$content = (Read-LogText -Path $OutLogPath) + "`n" + (Read-LogText -Path $ErrLogPath)
if ([string]::IsNullOrWhiteSpace($content)) {
return ''
}
$bindMatch = [regex]::Match($content, "couldn't bind HTTP server socket, hostname:\s*([^,]+), port:\s*([0-9]+)", [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
if ($bindMatch.Success) {
$busyPort = $bindMatch.Groups[2].Value
return "端口 $busyPort 已被占用,请先关闭占用该端口的服务,再重新启动"
}
return ''
}
function Get-PortOwnerSummary {
param([string]$Port)
try {
$listeners = Get-NetTCPConnection -LocalPort ([int]$Port) -State Listen -ErrorAction SilentlyContinue
if (-not $listeners) {
return ''
}
$owners = @()
foreach ($listener in @($listeners | Select-Object -ExpandProperty OwningProcess -Unique)) {
$proc = Get-Process -Id $listener -ErrorAction SilentlyContinue
if ($proc) {
$owners += ('{0} (PID {1})' -f $proc.ProcessName, $proc.Id)
} else {
$owners += ('PID {0}' -f $listener)
}
}
return ($owners -join ', ')
} catch {
return ''
}
}
function Stop-Server {
if (Test-Path $PidFile) {
$raw = Get-Content -Path $PidFile -ErrorAction SilentlyContinue | Select-Object -First 1
$serverPid = 0
if ([int]::TryParse([string]$raw, [ref]$serverPid) -and $serverPid -gt 0) {
try {
Stop-Process -Id $serverPid -Force -ErrorAction SilentlyContinue
} catch {}
}
}
$procs = Get-Process -Name 'llama-server' -ErrorAction SilentlyContinue
if ($procs) {
$procs | Stop-Process -Force -ErrorAction SilentlyContinue
}
if (Test-Path $PidFile) {
Remove-Item -Path $PidFile -Force -ErrorAction SilentlyContinue
}
if (Test-Path $CurrentErrLogFile) {
Remove-Item -Path $CurrentErrLogFile -Force -ErrorAction SilentlyContinue
}
}
function Show-Status {
if (Test-Health) {
$modelId = Get-ModelId
if ([string]::IsNullOrWhiteSpace($modelId)) {
$modelId = 'loading'
}
Write-Host '状态: 运行中'
Write-Host "地址: http://$HostAddr`:$PortNum"
Write-Host "模型: $modelId"
if (Test-Path $CurrentLogFile) {
$p = Get-Content -Path $CurrentLogFile -ErrorAction SilentlyContinue | Select-Object -First 1
if ($p) {
Write-Host "日志: $p"
}
}
if (Test-Path $CurrentErrLogFile) {
$ep = Get-Content -Path $CurrentErrLogFile -ErrorAction SilentlyContinue | Select-Object -First 1
if ($ep) {
Write-Host "错误日志: $ep"
}
}
return
}
Write-Host '状态: 未运行'
}
function Resolve-RuntimeProfile {
switch ($ThinkMode) {
'think-on' { return @{ ReasoningBudget = '-1'; MaxTokens = '-1' } }
'think-off' { return @{ ReasoningBudget = '0'; MaxTokens = '2048' } }
default { throw "不支持的思考模式: $ThinkMode" }
}
}
function Validate-Limits {
if (($CtxSize -notmatch '^[0-9]+$') -or ($ImageMinTokens -notmatch '^[0-9]+$') -or ($ImageMaxTokens -notmatch '^[0-9]+$')) {
throw 'CTX_SIZE / IMAGE_MIN_TOKENS / IMAGE_MAX_TOKENS 必须是正整数'
}
if ([int]$CtxSize -le 0 -or [int]$ImageMinTokens -le 0 -or [int]$ImageMaxTokens -le 0) {
throw 'CTX_SIZE / IMAGE_MIN_TOKENS / IMAGE_MAX_TOKENS 必须大于 0'
}
if ([int]$ImageMinTokens -gt [int]$ImageMaxTokens) {
throw 'IMAGE_MIN_TOKENS 不能大于 IMAGE_MAX_TOKENS'
}
if ($MmprojOffload -ne 'on' -and $MmprojOffload -ne 'off') {
throw 'MMPROJ_OFFLOAD 仅支持 on 或 off'
}
if (($GpuMemoryDeltaMinMiB -notmatch '^[0-9]+$') -or [int]$GpuMemoryDeltaMinMiB -le 0) {
throw 'GPU_MEMORY_DELTA_MIN_MIB 必须是正整数'
}
if (($BackendReadyTimeoutSec -notmatch '^[0-9]+$') -or [int]$BackendReadyTimeoutSec -le 0) {
throw 'BACKEND_READY_TIMEOUT_SEC 必须是正整数'
}
if (($GpuVerifyTimeoutSec -notmatch '^[0-9]+$') -or [int]$GpuVerifyTimeoutSec -le 0) {
throw 'GPU_VERIFY_TIMEOUT_SEC 必须是正整数'
}
}
function Start-Server {
if (-not (Test-Path $BinPath)) {
throw "llama-server.exe 不存在: $BinPath"
}
if (-not (Test-Path $ModelPath) -or -not (Test-Path $MmprojPath)) {
throw "模型文件不完整。`nMODEL_PATH=$ModelPath`nMMPROJ_PATH=$MmprojPath"
}
Ensure-Dir $WebuiDir
Validate-Limits
$profile = Resolve-RuntimeProfile
Stop-Server
$portOwner = Get-PortOwnerSummary -Port $PortNum
if ($portOwner) {
throw "端口 $PortNum 已被占用: $portOwner"
}
$args = @(
'-m', $ModelPath,
'-mm', $MmprojPath,
'--n-gpu-layers', 'all',
'--flash-attn', 'on',
'--fit', 'on',
'--fit-target', '256',
'--temp', '1.0',
'--top-p', '0.95',
'--top-k', '20',
'--min-p', '0.1',
'--presence-penalty', '1.5',
'--repeat-penalty', '1.05',
'-n', $profile.MaxTokens,
'--reasoning-budget', $profile.ReasoningBudget,
'-c', $CtxSize,
'--image-min-tokens', $ImageMinTokens,
'--image-max-tokens', $ImageMaxTokens,
'--host', $HostAddr,
'--port', $PortNum,
'--webui'
)
if ($MmprojOffload -eq 'off') {
$args += '--no-mmproj-offload'
} else {
$args += '--mmproj-offload'
}
$logPath = Join-Path $WebuiDir ("llama_server_9b_{0}.log" -f (Get-Date -Format 'yyyyMMdd_HHmmss'))
$errLogPath = Join-Path $WebuiDir ("llama_server_9b_{0}.err.log" -f (Get-Date -Format 'yyyyMMdd_HHmmss'))
if (Test-Path $logPath) {
Remove-Item -Path $logPath -Force -ErrorAction SilentlyContinue
}
if (Test-Path $errLogPath) {
Remove-Item -Path $errLogPath -Force -ErrorAction SilentlyContinue
}
$baselineGpuMemoryMiB = 0
$gpuBaseline = Get-GpuMemoryUsedMiB
if ($gpuBaseline.Ok) {
$baselineGpuMemoryMiB = [int]$gpuBaseline.UsedMiB
}
Write-Host '后端进程启动中,正在装载模型到 GPU...'
$proc = Start-Process -FilePath $BinPath -ArgumentList $args -WindowStyle Hidden -RedirectStandardOutput $logPath -RedirectStandardError $errLogPath -PassThru
Set-Content -Path $PidFile -Value $proc.Id -Encoding ascii
Set-Content -Path $CurrentLogFile -Value $logPath -Encoding utf8
Set-Content -Path $CurrentErrLogFile -Value $errLogPath -Encoding utf8
$startupReady = $false
try {
$readyResult = Wait-Ready -ProcessId $proc.Id
if (-not $readyResult.Ready) {
$startupFailureReason = Get-StartupFailureReason -OutLogPath $logPath -ErrLogPath $errLogPath
if ($startupFailureReason) {
throw "服务启动失败: $startupFailureReason"
}
throw "服务启动失败: $($readyResult.Reason)"
}
$gpuInfo = Ensure-GpuOffload -ProcessId $proc.Id -BaselineMemoryMiB $baselineGpuMemoryMiB -OutLogPath $logPath -ErrLogPath $errLogPath
Write-Host "GPU 校验通过: $gpuInfo"
$startupReady = $true
} finally {
if (-not $startupReady) {
Show-RecentServerLogs -OutLogPath $logPath -ErrLogPath $errLogPath
Stop-Server
}
}
Write-Host "已切换到 9b思考模式: $ThinkMode"
Write-Host "地址: http://$HostAddr`:$PortNum"
Write-Host "视觉限制: image tokens $ImageMinTokens-$ImageMaxTokens, mmproj offload=$MmprojOffload, ctx=$CtxSize"
Show-Status
}
switch ($Command) {
'status' { Show-Status; break }
'stop' { Stop-Server; Write-Host '服务已停止'; break }
'9b' { Start-Server; break }
default {
Write-Host '用法:'
Write-Host ' .\\switch_qwen35_webui.ps1 status'
Write-Host ' .\\switch_qwen35_webui.ps1 stop'
Write-Host ' .\\switch_qwen35_webui.ps1 9b [think-on|think-off]'
exit 1
}
}