594 lines
20 KiB
Python
594 lines
20 KiB
Python
#!/usr/bin/env python3
|
|
import argparse
|
|
import os
|
|
import threading
|
|
import time
|
|
from contextlib import asynccontextmanager
|
|
from dataclasses import dataclass
|
|
from typing import Any, Dict, List, Set, Tuple
|
|
|
|
import requests
|
|
import uvicorn
|
|
from fastapi import FastAPI, Request
|
|
from fastapi.responses import JSONResponse, Response, StreamingResponse
|
|
from starlette.concurrency import run_in_threadpool
|
|
|
|
from toolhub_gateway_agent import (
|
|
build_non_stream_response,
|
|
run_chat_completion,
|
|
stream_chat_completion,
|
|
)
|
|
|
|
DEFAULT_GATEWAY_HOST = '127.0.0.1'
|
|
DEFAULT_GATEWAY_PORT = 8080
|
|
DEFAULT_BACKEND_BASE = 'http://127.0.0.1:8081'
|
|
DEFAULT_MODEL_SERVER = 'http://127.0.0.1:8081/v1'
|
|
DEFAULT_TIMEOUT_SEC = 180
|
|
DEFAULT_BACKEND_WAIT_HINT = ''
|
|
DEFAULT_ACCESS_URLS = 'http://127.0.0.1:8080,http://localhost:8080'
|
|
READY_ANNOUNCE_INTERVAL_SEC = 2
|
|
WAIT_LOG_INTERVAL_SEC = 10
|
|
WARMUP_MESSAGE = '请只回复一个字:好'
|
|
WARMUP_PARSE_ERROR_MARKER = 'Failed to parse input'
|
|
STREAM_CHUNK_BYTES = 8192
|
|
SUPPORTED_PROXY_METHODS = ['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'OPTIONS', 'HEAD']
|
|
HOP_HEADERS = {
|
|
'connection',
|
|
'keep-alive',
|
|
'proxy-authenticate',
|
|
'proxy-authorization',
|
|
'te',
|
|
'trailers',
|
|
'transfer-encoding',
|
|
'upgrade',
|
|
}
|
|
LOCAL_CONFIG_KEY = 'LlamaCppWebui.config'
|
|
LOCAL_OVERRIDES_KEY = 'LlamaCppWebui.userOverrides'
|
|
WEBUI_SETTINGS_PATCH = f"""
|
|
<script>
|
|
(function () {{
|
|
try {{
|
|
var cfgKey = '{LOCAL_CONFIG_KEY}';
|
|
var ovKey = '{LOCAL_OVERRIDES_KEY}';
|
|
var cfg = JSON.parse(localStorage.getItem(cfgKey) || '{{}}');
|
|
cfg.showMessageStats = true;
|
|
cfg.keepStatsVisible = false;
|
|
cfg.showThoughtInProgress = true;
|
|
cfg.disableReasoningParsing = false;
|
|
localStorage.setItem(cfgKey, JSON.stringify(cfg));
|
|
|
|
var overrides = JSON.parse(localStorage.getItem(ovKey) || '[]');
|
|
var set = new Set(Array.isArray(overrides) ? overrides : []);
|
|
['showMessageStats', 'keepStatsVisible', 'showThoughtInProgress', 'disableReasoningParsing']
|
|
.forEach(function (k) {{ set.add(k); }});
|
|
localStorage.setItem(ovKey, JSON.stringify(Array.from(set)));
|
|
}} catch (e) {{
|
|
console.error('webui settings patch failed', e);
|
|
}}
|
|
}})();
|
|
</script>
|
|
<style>
|
|
.chat-processing-info-container {{
|
|
display: none !important;
|
|
}}
|
|
</style>
|
|
""".strip()
|
|
BACKEND_LOADING_HTML = """
|
|
<!doctype html>
|
|
<html lang="zh-CN">
|
|
<head>
|
|
<meta charset="utf-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<title>ToolHub 正在准备中</title>
|
|
<style>
|
|
:root {
|
|
color-scheme: light;
|
|
font-family: "Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif;
|
|
background: #0f172a;
|
|
color: #e2e8f0;
|
|
}
|
|
body {
|
|
margin: 0;
|
|
min-height: 100vh;
|
|
display: grid;
|
|
place-items: center;
|
|
background:
|
|
radial-gradient(circle at top, rgba(59, 130, 246, 0.18), transparent 45%),
|
|
linear-gradient(180deg, #111827, #020617);
|
|
}
|
|
main {
|
|
width: min(680px, calc(100vw - 32px));
|
|
padding: 28px;
|
|
border-radius: 20px;
|
|
background: rgba(15, 23, 42, 0.88);
|
|
border: 1px solid rgba(148, 163, 184, 0.24);
|
|
box-shadow: 0 24px 80px rgba(15, 23, 42, 0.45);
|
|
}
|
|
h1 {
|
|
margin: 0 0 12px;
|
|
font-size: 28px;
|
|
}
|
|
.status {
|
|
display: flex;
|
|
align-items: center;
|
|
gap: 16px;
|
|
margin-bottom: 18px;
|
|
}
|
|
.spinner-shell {
|
|
position: relative;
|
|
width: 40px;
|
|
height: 40px;
|
|
flex: 0 0 auto;
|
|
}
|
|
.spinner-ring {
|
|
position: absolute;
|
|
inset: 0;
|
|
border-radius: 999px;
|
|
border: 3px solid rgba(148, 163, 184, 0.16);
|
|
border-top-color: #93c5fd;
|
|
border-right-color: rgba(96, 165, 250, 0.92);
|
|
animation: spin 12s steps(12, end) infinite;
|
|
will-change: transform;
|
|
transform: translateZ(0);
|
|
}
|
|
.spinner-ring::after {
|
|
content: "";
|
|
position: absolute;
|
|
top: 3px;
|
|
left: 50%;
|
|
width: 7px;
|
|
height: 7px;
|
|
margin-left: -3.5px;
|
|
border-radius: 999px;
|
|
background: #e0f2fe;
|
|
box-shadow: 0 0 12px rgba(96, 165, 250, 0.78);
|
|
}
|
|
.spinner-core {
|
|
position: absolute;
|
|
inset: 9px;
|
|
border-radius: 999px;
|
|
background:
|
|
radial-gradient(circle, rgba(191, 219, 254, 0.96) 0, rgba(147, 197, 253, 0.82) 34%, rgba(59, 130, 246, 0.18) 65%, transparent 72%);
|
|
}
|
|
p {
|
|
margin: 10px 0;
|
|
line-height: 1.7;
|
|
color: #cbd5e1;
|
|
}
|
|
.state-line {
|
|
margin-top: 14px;
|
|
color: #93c5fd;
|
|
}
|
|
.elapsed-line {
|
|
margin-top: 8px;
|
|
color: #cbd5e1;
|
|
font-variant-numeric: tabular-nums;
|
|
}
|
|
.hint-box {
|
|
margin-top: 16px;
|
|
padding: 14px 16px;
|
|
border-radius: 14px;
|
|
background: rgba(15, 23, 42, 0.72);
|
|
border: 1px solid rgba(148, 163, 184, 0.18);
|
|
}
|
|
details {
|
|
margin-top: 16px;
|
|
color: #94a3b8;
|
|
}
|
|
summary {
|
|
cursor: pointer;
|
|
}
|
|
pre {
|
|
margin: 10px 0 0;
|
|
padding: 12px;
|
|
border-radius: 12px;
|
|
background: rgba(2, 6, 23, 0.86);
|
|
border: 1px solid rgba(148, 163, 184, 0.16);
|
|
color: #cbd5e1;
|
|
white-space: pre-wrap;
|
|
word-break: break-word;
|
|
font-family: "Cascadia Code", "Consolas", monospace;
|
|
font-size: 13px;
|
|
line-height: 1.6;
|
|
}
|
|
code {
|
|
font-family: "Cascadia Code", "Consolas", monospace;
|
|
color: #f8fafc;
|
|
}
|
|
@keyframes spin {
|
|
from { transform: rotate(0deg); }
|
|
to { transform: rotate(360deg); }
|
|
}
|
|
@media (prefers-reduced-motion: reduce) {
|
|
.spinner-ring {
|
|
animation-duration: 12s;
|
|
}
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<main>
|
|
<div class="status">
|
|
<div class="spinner-shell" aria-hidden="true">
|
|
<div class="spinner-ring"></div>
|
|
<div class="spinner-core"></div>
|
|
</div>
|
|
<h1>ToolHub 正在准备中</h1>
|
|
</div>
|
|
<p>网关已经启动,但模型后端暂时还没有就绪。</p>
|
|
<p>如果这是第一次启动,程序可能正在下载模型文件,或者正在把模型加载到 GPU。</p>
|
|
<p>页面会停留在这个等待界面里,并自动检查后端状态。准备完成后会自动进入聊天界面,不再整页反复刷新。</p>
|
|
<p class="state-line" id="state-line">正在检查后端状态...</p>
|
|
<p class="elapsed-line" id="elapsed-line">已等待 0 秒</p>
|
|
<div class="hint-box">
|
|
<p>如果你是刚在终端里执行了启动命令,最直接的进度信息通常就在那个终端窗口里。</p>
|
|
__HINT_BLOCK__
|
|
</div>
|
|
<details>
|
|
<summary>查看技术详情</summary>
|
|
<pre>__DETAIL__</pre>
|
|
</details>
|
|
</main>
|
|
<script>
|
|
(function () {
|
|
var stateLine = document.getElementById('state-line');
|
|
var elapsedLine = document.getElementById('elapsed-line');
|
|
var healthUrl = '/gateway/health';
|
|
var startedAt = Date.now();
|
|
|
|
function updateState(message) {
|
|
if (stateLine) {
|
|
stateLine.textContent = message;
|
|
}
|
|
}
|
|
|
|
function updateElapsed() {
|
|
if (!elapsedLine) {
|
|
return;
|
|
}
|
|
var elapsedSec = Math.floor((Date.now() - startedAt) / 1000);
|
|
elapsedLine.textContent = '已等待 ' + elapsedSec + ' 秒';
|
|
}
|
|
|
|
async function pollHealth() {
|
|
try {
|
|
var response = await fetch(healthUrl, { cache: 'no-store' });
|
|
var payload = await response.json();
|
|
if (payload.status === 'ok') {
|
|
updateState('后端已经就绪,正在进入聊天界面...');
|
|
updateElapsed();
|
|
window.location.reload();
|
|
return;
|
|
}
|
|
updateState('模型仍在准备中,页面会自动继续等待。');
|
|
} catch (error) {
|
|
updateState('暂时还连不上后端,继续等待即可。');
|
|
}
|
|
window.setTimeout(pollHealth, 4000);
|
|
}
|
|
|
|
updateElapsed();
|
|
window.setInterval(updateElapsed, 1000);
|
|
window.setTimeout(pollHealth, 1200);
|
|
})();
|
|
</script>
|
|
</body>
|
|
</html>
|
|
""".strip()
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class GatewayConfig:
|
|
backend_base: str
|
|
model_server: str
|
|
gateway_host: str
|
|
gateway_port: int
|
|
timeout_sec: int = DEFAULT_TIMEOUT_SEC
|
|
backend_wait_hint: str = DEFAULT_BACKEND_WAIT_HINT
|
|
access_urls: Tuple[str, ...] = ()
|
|
|
|
|
|
@dataclass
|
|
class GatewayState:
|
|
ready_event: threading.Event
|
|
|
|
|
|
def parse_args() -> GatewayConfig:
|
|
parser = argparse.ArgumentParser(description='Run 8080 toolhub gateway with 8081 llama-server backend.')
|
|
parser.add_argument('--host', default=os.getenv('GATEWAY_HOST', DEFAULT_GATEWAY_HOST))
|
|
parser.add_argument('--port', type=int, default=int(os.getenv('GATEWAY_PORT', str(DEFAULT_GATEWAY_PORT))))
|
|
parser.add_argument('--backend-base', default=os.getenv('BACKEND_BASE', DEFAULT_BACKEND_BASE))
|
|
parser.add_argument('--model-server', default=os.getenv('MODEL_SERVER', DEFAULT_MODEL_SERVER))
|
|
parser.add_argument('--timeout-sec', type=int, default=int(os.getenv('GATEWAY_TIMEOUT_SEC', str(DEFAULT_TIMEOUT_SEC))))
|
|
parser.add_argument('--backend-wait-hint', default=os.getenv('BACKEND_WAIT_HINT', DEFAULT_BACKEND_WAIT_HINT))
|
|
parser.add_argument('--access-urls', default=os.getenv('ACCESS_URLS', DEFAULT_ACCESS_URLS))
|
|
args = parser.parse_args()
|
|
return GatewayConfig(
|
|
backend_base=args.backend_base.rstrip('/'),
|
|
model_server=args.model_server.rstrip('/'),
|
|
gateway_host=args.host,
|
|
gateway_port=args.port,
|
|
timeout_sec=args.timeout_sec,
|
|
backend_wait_hint=args.backend_wait_hint.strip(),
|
|
access_urls=parse_access_urls(args.access_urls),
|
|
)
|
|
|
|
|
|
def parse_access_urls(raw: str) -> Tuple[str, ...]:
|
|
urls = [item.strip() for item in raw.split(',') if item.strip()]
|
|
return tuple(dict.fromkeys(urls))
|
|
|
|
|
|
def filtered_headers(headers: Dict[str, str]) -> Dict[str, str]:
|
|
blocked = HOP_HEADERS | {'host', 'content-length', 'proxy-connection'}
|
|
return {key: value for key, value in headers.items() if key.lower() not in blocked}
|
|
|
|
|
|
def drop_headers_ci(headers: Dict[str, str], names: Set[str]) -> Dict[str, str]:
|
|
lowered = {name.lower() for name in names}
|
|
return {key: value for key, value in headers.items() if key.lower() not in lowered}
|
|
|
|
|
|
def build_backend_url(base: str, path: str, query: str) -> str:
|
|
if not query:
|
|
return f'{base}{path}'
|
|
return f'{base}{path}?{query}'
|
|
|
|
|
|
def stream_upstream(upstream: requests.Response):
|
|
try:
|
|
for chunk in upstream.iter_content(chunk_size=STREAM_CHUNK_BYTES):
|
|
if chunk:
|
|
yield chunk
|
|
finally:
|
|
upstream.close()
|
|
|
|
|
|
def inject_webui_settings(html: str) -> str:
|
|
if WEBUI_SETTINGS_PATCH in html:
|
|
return html
|
|
if '<head>' in html:
|
|
return html.replace('<head>', f'<head>\n{WEBUI_SETTINGS_PATCH}\n', 1)
|
|
if '<body>' in html:
|
|
return html.replace('<body>', f'<body>\n{WEBUI_SETTINGS_PATCH}\n', 1)
|
|
return f'{WEBUI_SETTINGS_PATCH}\n{html}'
|
|
|
|
|
|
def build_backend_loading_response(detail: str, wait_hint: str) -> Response:
|
|
safe_detail = detail.replace('&', '&').replace('<', '<').replace('>', '>')
|
|
hint_block = ''
|
|
if wait_hint:
|
|
safe_hint = wait_hint.replace('&', '&').replace('<', '<').replace('>', '>')
|
|
hint_block = f'<p>如果你想单独查看后端准备进度,可以执行:<br><code>{safe_hint}</code></p>'
|
|
html = BACKEND_LOADING_HTML.replace('__DETAIL__', safe_detail).replace('__HINT_BLOCK__', hint_block)
|
|
return Response(
|
|
content=html,
|
|
status_code=200,
|
|
media_type='text/html; charset=utf-8',
|
|
headers={'Cache-Control': 'no-store, max-age=0'},
|
|
)
|
|
|
|
|
|
def is_root_request(request: Request, path: str) -> bool:
|
|
return request.method == 'GET' and path in {'/', '/index.html'}
|
|
|
|
|
|
def is_backend_wait_status(status_code: int) -> bool:
|
|
return status_code in {502, 503, 504}
|
|
|
|
|
|
def format_access_urls(access_urls: Tuple[str, ...]) -> str:
|
|
return ' '.join(access_urls)
|
|
|
|
|
|
def check_backend_ready(cfg: GatewayConfig) -> bool:
|
|
try:
|
|
response = requests.get(f'{cfg.backend_base}/health', timeout=cfg.timeout_sec)
|
|
response.raise_for_status()
|
|
except Exception: # noqa: BLE001
|
|
return False
|
|
return True
|
|
|
|
|
|
def announce_access_urls(cfg: GatewayConfig) -> None:
|
|
if not cfg.access_urls:
|
|
return
|
|
print(
|
|
f'[toolhub-gateway] 网页入口已经开放,正在加载模型,完成后可访问: {format_access_urls(cfg.access_urls)}',
|
|
flush=True,
|
|
)
|
|
|
|
|
|
def announce_backend_ready(cfg: GatewayConfig) -> None:
|
|
if not cfg.access_urls:
|
|
return
|
|
print(
|
|
f'[toolhub-gateway] 模型已完成加载和预热,可以打开: {format_access_urls(cfg.access_urls)}',
|
|
flush=True,
|
|
)
|
|
|
|
|
|
def is_gateway_ready(state: GatewayState) -> bool:
|
|
return state.ready_event.is_set()
|
|
|
|
|
|
def warmup_model(cfg: GatewayConfig) -> Tuple[bool, str]:
|
|
payload = {
|
|
'messages': [{'role': 'user', 'content': WARMUP_MESSAGE}],
|
|
'max_tokens': 1,
|
|
'stream': False,
|
|
'temperature': 0,
|
|
}
|
|
try:
|
|
response = requests.post(
|
|
f'{cfg.model_server}/chat/completions',
|
|
json=payload,
|
|
timeout=cfg.timeout_sec,
|
|
)
|
|
except Exception as exc: # noqa: BLE001
|
|
return False, f'模型预热请求失败: {exc}'
|
|
if response.ok:
|
|
return True, '模型预热已完成'
|
|
body = response.text.strip()
|
|
if response.status_code == 500 and WARMUP_PARSE_ERROR_MARKER in body:
|
|
return True, '模型首轮预热已经完成'
|
|
return False, f'模型预热暂未完成: HTTP {response.status_code} {body[:200]}'
|
|
|
|
|
|
def run_ready_announcer(cfg: GatewayConfig, state: GatewayState) -> None:
|
|
last_wait_detail = ''
|
|
last_wait_log_at = 0.0
|
|
announce_access_urls(cfg)
|
|
while True:
|
|
if check_backend_ready(cfg):
|
|
ready, wait_detail = warmup_model(cfg)
|
|
else:
|
|
ready, wait_detail = False, '后端健康检查尚未通过'
|
|
if ready:
|
|
state.ready_event.set()
|
|
announce_backend_ready(cfg)
|
|
return
|
|
now = time.monotonic()
|
|
if wait_detail != last_wait_detail or (now - last_wait_log_at) >= WAIT_LOG_INTERVAL_SEC:
|
|
print(f'[toolhub-gateway] 后端仍在准备中: {wait_detail}', flush=True)
|
|
last_wait_detail = wait_detail
|
|
last_wait_log_at = now
|
|
time.sleep(READY_ANNOUNCE_INTERVAL_SEC)
|
|
|
|
|
|
async def handle_gateway_health(cfg: GatewayConfig, state: GatewayState) -> Dict[str, Any]:
|
|
status = 'ok' if is_gateway_ready(state) else 'warming'
|
|
backend_error = ''
|
|
try:
|
|
health = requests.get(f'{cfg.backend_base}/health', timeout=cfg.timeout_sec)
|
|
health.raise_for_status()
|
|
except Exception as exc: # noqa: BLE001
|
|
status = 'degraded'
|
|
backend_error = str(exc)
|
|
return {'status': status, 'backend_base': cfg.backend_base, 'backend_error': backend_error}
|
|
|
|
|
|
async def handle_chat_completions(request: Request, cfg: GatewayConfig) -> Response:
|
|
payload = await request.json()
|
|
stream = bool(payload.get('stream', False))
|
|
if stream:
|
|
try:
|
|
iterator = stream_chat_completion(payload, cfg.model_server, cfg.timeout_sec)
|
|
except Exception as exc: # noqa: BLE001
|
|
error = {'error': {'code': 500, 'type': 'gateway_error', 'message': str(exc)}}
|
|
return JSONResponse(status_code=500, content=error)
|
|
return StreamingResponse(iterator, media_type='text/event-stream')
|
|
|
|
try:
|
|
result = await run_in_threadpool(run_chat_completion, payload, cfg.model_server, cfg.timeout_sec)
|
|
except Exception as exc: # noqa: BLE001
|
|
error = {'error': {'code': 500, 'type': 'gateway_error', 'message': str(exc)}}
|
|
return JSONResponse(status_code=500, content=error)
|
|
|
|
answer = result['answer']
|
|
model = result['model']
|
|
reasoning = result.get('reasoning', '')
|
|
return JSONResponse(content=build_non_stream_response(answer, model, reasoning))
|
|
|
|
|
|
async def handle_proxy(request: Request, full_path: str, cfg: GatewayConfig, state: GatewayState) -> Response:
|
|
path = '/' + full_path
|
|
if is_root_request(request, path) and not is_gateway_ready(state):
|
|
return build_backend_loading_response('模型正在加载或预热,完成后会自动进入聊天界面。', cfg.backend_wait_hint)
|
|
url = build_backend_url(cfg.backend_base, path, request.url.query)
|
|
headers = filtered_headers(dict(request.headers))
|
|
body = await request.body()
|
|
|
|
try:
|
|
upstream = requests.request(
|
|
method=request.method,
|
|
url=url,
|
|
headers=headers,
|
|
data=body,
|
|
stream=True,
|
|
timeout=cfg.timeout_sec,
|
|
allow_redirects=False,
|
|
)
|
|
except Exception as exc: # noqa: BLE001
|
|
if is_root_request(request, path):
|
|
return build_backend_loading_response(str(exc), cfg.backend_wait_hint)
|
|
if request.method == 'GET' and path == '/favicon.ico':
|
|
return Response(status_code=204)
|
|
error = {'error': {'type': 'proxy_error', 'message': str(exc)}}
|
|
return JSONResponse(status_code=502, content=error)
|
|
|
|
response_headers = filtered_headers(dict(upstream.headers))
|
|
content_type = upstream.headers.get('content-type', '')
|
|
if is_root_request(request, path) and is_backend_wait_status(upstream.status_code):
|
|
detail = upstream.text.strip() or f'backend returned {upstream.status_code}'
|
|
upstream.close()
|
|
return build_backend_loading_response(detail, cfg.backend_wait_hint)
|
|
if request.method == 'GET' and path == '/favicon.ico' and is_backend_wait_status(upstream.status_code):
|
|
upstream.close()
|
|
return Response(status_code=204)
|
|
if 'text/event-stream' in content_type:
|
|
return StreamingResponse(
|
|
stream_upstream(upstream),
|
|
status_code=upstream.status_code,
|
|
headers=response_headers,
|
|
media_type='text/event-stream',
|
|
)
|
|
|
|
is_webui_html = (
|
|
request.method == 'GET'
|
|
and path in {'/', '/index.html'}
|
|
and upstream.status_code == 200
|
|
and 'text/html' in content_type
|
|
)
|
|
if is_webui_html:
|
|
encoding = upstream.encoding or 'utf-8'
|
|
html = upstream.content.decode(encoding, errors='replace')
|
|
injected = inject_webui_settings(html)
|
|
upstream.close()
|
|
clean_headers = drop_headers_ci(response_headers, {'content-encoding', 'content-length', 'etag'})
|
|
return Response(
|
|
content=injected.encode('utf-8'),
|
|
status_code=200,
|
|
headers=clean_headers,
|
|
media_type='text/html; charset=utf-8',
|
|
)
|
|
|
|
upstream.raw.decode_content = False
|
|
data = upstream.raw.read(decode_content=False)
|
|
upstream.close()
|
|
return Response(content=data, status_code=upstream.status_code, headers=response_headers)
|
|
|
|
|
|
def create_app(cfg: GatewayConfig, state: GatewayState) -> FastAPI:
|
|
@asynccontextmanager
|
|
async def lifespan(_: FastAPI):
|
|
threading.Thread(target=run_ready_announcer, args=(cfg, state), daemon=True).start()
|
|
yield
|
|
|
|
app = FastAPI(title='Qwen3.5 ToolHub Gateway 8080', lifespan=lifespan)
|
|
|
|
@app.get('/gateway/health')
|
|
async def gateway_health() -> Dict[str, Any]:
|
|
return await handle_gateway_health(cfg, state)
|
|
|
|
@app.post('/v1/chat/completions')
|
|
async def chat_completions(request: Request) -> Response:
|
|
return await handle_chat_completions(request, cfg)
|
|
|
|
@app.api_route('/{full_path:path}', methods=SUPPORTED_PROXY_METHODS)
|
|
async def proxy_all(request: Request, full_path: str) -> Response:
|
|
return await handle_proxy(request, full_path, cfg, state)
|
|
|
|
return app
|
|
|
|
|
|
def main() -> None:
|
|
cfg = parse_args()
|
|
state = GatewayState(ready_event=threading.Event())
|
|
app = create_app(cfg, state)
|
|
uvicorn.run(app, host=cfg.gateway_host, port=cfg.gateway_port, log_level='info')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|