first

2026-03-11 16:49:00 +08:00
commit 52d7d14795
53 changed files with 4991 additions and 0 deletions
--- a/toolhub_gateway_agent.py
+++ b/toolhub_gateway_agent.py
@@ -0,0 +1,446 @@
+import json
+import os
+import time
+import datetime
+import uuid
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Union
+
+import requests
+from qwen_agent.agents import Assistant
+from qwen_agent.llm.schema import ContentItem, Message
+
+import agent_runtime  # noqa: F401
+from agent_runtime import readonly_tools  # noqa: F401
+
+DEFAULT_SYSTEM_PROMPT = (
+    '你是 Qwen3.5，本地部署的多模态中文助手。\n'
+    '默认中文回答。\n'
+    '当用户只是打招呼或闲聊时，自然回应即可，不要主动枚举全部工具。\n'
+    '你的目标是先使用可用工具获得可验证信息，再给出结论。\n'
+    '规则:\n'
+    '1. 对最新信息先用 web_search，再按需用 web_fetch 或 web_extractor 抓取正文。\n'
+    '2. 对人名、作品名、小众概念等不确定知识先 web_search，若结果歧义则改写关键词再检索一次。\n'
+    '3. 核心规则：你已具备 filesystem 的读写能力。你可以读取文件，如果用户有需求，你也可以调用 write_file 工具进行写入。\n'
+    '4. 图片问题先看整图，细节再用 image_zoom_in_tool，使用相对坐标。\n'
+    '5. 工具失败时必须明确说明原因，不得伪造结果。\n'
+    '6. 联网任务要控制上下文预算，优先少量高质量来源。\n'
+    '7. 严禁在未获授权的情况下使用 filesystem 工具查看助手自身的源代码或运行目录。\n'
+    '8. 联网任务要控制上下文预算，优先少量高质量来源，避免搬运大段无关正文。\n'
+    '9. 长期记忆（主动意识）：你拥有 manage_memory 工具。当你从对话中识别出以下内容时，必须【主动】调用 add 操作：\n    - 用户的明确偏好（如：喜欢 MD 格式、不喜欢繁琐说明）。\n    - 重要的个人事实（如：职业、项目代号、系统配置路径）。\n    - 约定的工作习惯（如：每段代码都要加注释）。\n  执行后，在回复中自然地告知用户“我已记下此习惯/信息”。当用户问“你了解我什么”或要求修改时，配合 list 和 delete 操作。\n'
+)
+
+DEFAULT_FUNCTION_LIST = [
+    'web_search',
+    'web_fetch',
+    'web_extractor',
+    'image_search',
+    'image_zoom_in_tool',
+    'filesystem',
+    'manage_memory',  # 👈 检查这里，确保引号和逗号都齐了
+]
+TIMINGS_EMIT_INTERVAL_SEC = 0.8
+MAX_FALLBACK_PART_TEXT_CHARS = 512
+
+# --- 注入逻辑开始 ---
+def get_injected_memory() -> str:
+    """从环境变量指定的路径动态加载记忆"""
+    path_str = os.getenv('MEMORY_FILE_PATH', './memory.json')
+    memory_path = Path(path_str).resolve()
+    
+    if not memory_path.exists():
+        return ""
+    try:
+        with open(memory_path, 'r', encoding='utf-8') as f:
+            memories = json.load(f)
+            if not isinstance(memories, list) or not memories:
+                return ""
+            memory_str = "\n".join([f"- {m}" for m in memories])
+            return f"\n【长期记忆库（已自动加载）】:\n{memory_str}\n"
+    except Exception as e:
+        print(f"注入记忆失败: {e}")
+        return ""
+
+def fetch_model_id(model_server: str, timeout_sec: int) -> str:
+    response = requests.get(f'{model_server}/models', timeout=timeout_sec)
+    response.raise_for_status()
+    return response.json()['data'][0]['id']
+
+
+def _extract_image_uri(part: Dict[str, Any]) -> Optional[str]:
+    keys = ('image_url', 'image', 'url', 'input_image', 'image_uri')
+    for key in keys:
+        value = part.get(key)
+        if isinstance(value, str) and value.strip():
+            return value.strip()
+        if isinstance(value, dict):
+            nested = value.get('url') or value.get('image_url') or value.get('image')
+            if isinstance(nested, str) and nested.strip():
+                return nested.strip()
+    return None
+
+
+def _build_compact_part_text(part: Dict[str, Any], part_type: Any) -> str:
+    part_keys = sorted(str(k) for k in part.keys())
+    payload = {'type': str(part_type or 'unknown'), 'keys': part_keys[:12]}
+    text = part.get('text')
+    if isinstance(text, str) and text.strip():
+        payload['text'] = text.strip()[:MAX_FALLBACK_PART_TEXT_CHARS]
+    return json.dumps(payload, ensure_ascii=False)
+
+
+def extract_generate_cfg(payload: Dict[str, Any]) -> Dict[str, Any]:
+    cfg: Dict[str, Any] = {}
+    keys = ('temperature', 'top_p', 'top_k', 'presence_penalty', 'frequency_penalty')
+    for key in keys:
+        value = payload.get(key)
+        if value is not None:
+            cfg[key] = value
+    repeat_penalty = payload.get('repeat_penalty')
+    if repeat_penalty is not None:
+        cfg['repetition_penalty'] = repeat_penalty
+    extra_body = payload.get('extra_body')
+    if not isinstance(extra_body, dict):
+        extra_body = {}
+    chat_template_kwargs = extra_body.get('chat_template_kwargs')
+    if not isinstance(chat_template_kwargs, dict):
+        chat_template_kwargs = {}
+    # 默认开启思考，若上层显式传入 false 则保持用户值。
+    chat_template_kwargs.setdefault('enable_thinking', True)
+    extra_body['chat_template_kwargs'] = chat_template_kwargs
+
+    requested_reasoning_format = payload.get('reasoning_format')
+    if isinstance(requested_reasoning_format, str) and requested_reasoning_format.strip():
+        extra_body.setdefault('reasoning_format', requested_reasoning_format.strip())
+    else:
+        extra_body.setdefault('reasoning_format', 'deepseek')
+    extra_body.setdefault('reasoning_budget', -1)
+    cfg['extra_body'] = extra_body
+    max_tokens = payload.get('max_tokens')
+    if isinstance(max_tokens, int) and max_tokens > 0:
+        cfg['max_tokens'] = max_tokens
+    if not cfg:
+        cfg = {'temperature': 0.7, 'top_p': 0.9, 'max_tokens': 512}
+    return cfg
+
+
+def build_agent(
+    model_server: str,
+    timeout_sec: int,
+    generate_cfg: Dict[str, Any],
+    model_id: Optional[str] = None,
+    system_prompt: str = DEFAULT_SYSTEM_PROMPT,
+) -> Assistant:
+    if not model_id:
+        model_id = fetch_model_id(model_server, timeout_sec)
+    llm_cfg = {
+        'model': model_id,
+        'model_server': model_server,
+        'api_key': os.getenv('OPENAI_API_KEY', 'EMPTY'),
+        'model_type': 'qwenvl_oai',
+        'generate_cfg': generate_cfg,
+    }
+    
+    # === 核心改造 1：动态组装功能列表 ===
+    actual_function_list = list(DEFAULT_FUNCTION_LIST)
+    if os.getenv('ENABLE_FILE_WRITE', 'False').lower() == 'true':
+        import agent_runtime.write_tools  # 触发 register_tool 注册
+        actual_function_list.append('write_file')
+
+    # === 核心改造 2：动态注入记忆与实时时间 ===
+    # 1. 先从物理文件加载记忆
+    persistent_memory = get_injected_memory() 
+    
+    # 2. 获取实时时间
+    now = datetime.datetime.now()
+    current_time = now.strftime("%Y年%m月%d日 %H:%M:%S")
+    weekdays = ["一", "二", "三", "四", "五", "六", "日"]
+    dynamic_context = f"【系统实时状态】\n当前时间：{current_time}，星期{weekdays[now.weekday()]}。\n"
+    
+    # 3. 按照：时间 -> 长期记忆 -> 原始指令 的顺序拼接
+    actual_system_prompt = dynamic_context + persistent_memory + system_prompt
+
+    return Assistant(
+        name='Qwen3.5-9B-ToolHub-8080',
+        description='8080 网页工具代理',
+        llm=llm_cfg,
+        function_list=actual_function_list,
+        system_message=actual_system_prompt,
+    )
+
+def to_content_items(content: Any) -> Union[str, List[ContentItem]]:
+    if isinstance(content, str):
+        return content
+    if not isinstance(content, list):
+        return str(content)
+
+    items: List[ContentItem] = []
+    for part in content:
+        if not isinstance(part, dict):
+            items.append(ContentItem(text=str(part)))
+            continue
+        part_type = part.get('type')
+        if part_type in (None, 'text', 'input_text'):
+            text = part.get('text', '')
+            if text:
+                items.append(ContentItem(text=str(text)))
+            continue
+        image_uri = _extract_image_uri(part)
+        if image_uri:
+            items.append(ContentItem(image=image_uri))
+            continue
+        items.append(ContentItem(text=_build_compact_part_text(part, part_type)))
+    return items if items else ''
+
+
+def to_qwen_messages(openai_messages: Sequence[Dict[str, Any]]) -> List[Message]:
+    qwen_messages: List[Message] = []
+    for item in openai_messages:
+        role = str(item.get('role', '')).strip()
+        if role not in {'system', 'user', 'assistant'}:
+            continue
+        qwen_messages.append(Message(role=role, content=to_content_items(item.get('content', ''))))
+    if not qwen_messages:
+        raise ValueError('messages 为空或不包含可用角色')
+    return qwen_messages
+
+
+def content_to_text(content: Any) -> str:
+    if isinstance(content, str):
+        return content
+    if not isinstance(content, list):
+        return str(content)
+
+    texts: List[str] = []
+    for item in content:
+        if isinstance(item, str):
+            texts.append(item)
+            continue
+        if isinstance(item, dict) and item.get('text'):
+            texts.append(str(item['text']))
+            continue
+        text = getattr(item, 'text', None)
+        if text:
+            texts.append(str(text))
+    return '\n'.join(texts).strip()
+
+
+def extract_answer_and_reasoning(messages: Sequence[Message]) -> Dict[str, str]:
+    answer = ''
+    reasoning_parts: List[str] = []
+    for message in messages:
+        if getattr(message, 'role', '') != 'assistant':
+            continue
+        content_text = content_to_text(message.get('content', ''))
+        if content_text:
+            answer = content_text
+        reasoning_text = content_to_text(message.get('reasoning_content', ''))
+        if reasoning_text:
+            reasoning_parts.append(reasoning_text)
+    return {'answer': answer, 'reasoning': '\n'.join(reasoning_parts).strip()}
+
+
+def run_chat_completion(payload: Dict[str, Any], model_server: str, timeout_sec: int) -> Dict[str, str]:
+    openai_messages = payload.get('messages')
+    if not isinstance(openai_messages, list):
+        raise ValueError('messages 必须是数组')
+
+    model_id = fetch_model_id(model_server, timeout_sec)
+    agent = build_agent(model_server, timeout_sec, extract_generate_cfg(payload), model_id=model_id)
+    qwen_messages = to_qwen_messages(openai_messages)
+    final_batch = None
+    for batch in agent.run(qwen_messages):
+        final_batch = batch
+    if not final_batch:
+        raise RuntimeError('未收到模型输出')
+
+    texts = extract_answer_and_reasoning(final_batch)
+    answer = texts['answer']
+    reasoning = texts['reasoning']
+    return {'model': model_id, 'answer': answer, 'reasoning': reasoning}
+
+
+def build_sse_chunk(
+    chat_id: str,
+    created: int,
+    model: str,
+    delta: Dict[str, Any],
+    finish_reason: Optional[str] = None,
+    timings: Optional[Dict[str, Any]] = None,
+) -> bytes:
+    chunk = {
+        'id': chat_id,
+        'object': 'chat.completion.chunk',
+        'created': created,
+        'model': model,
+        'choices': [{'index': 0, 'delta': delta, 'finish_reason': finish_reason}],
+    }
+    if timings:
+        chunk['timings'] = timings
+    return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n".encode('utf-8')
+
+
+def text_delta(previous: str, current: str) -> str:
+    if not current:
+        return ''
+    if current.startswith(previous):
+        return current[len(previous):]
+    return current
+
+
+def model_base_url(model_server: str) -> str:
+    if model_server.endswith('/v1'):
+        return model_server[:-3]
+    return model_server.rstrip('/')
+
+
+def count_text_tokens(model_server: str, timeout_sec: int, text: str) -> int:
+    if not text:
+        return 0
+    url = f'{model_base_url(model_server)}/tokenize'
+    response = requests.post(url, json={'content': text}, timeout=timeout_sec)
+    response.raise_for_status()
+    data = response.json()
+    tokens = data.get('tokens')
+    if not isinstance(tokens, list):
+        raise ValueError('tokenize 返回格式异常')
+    return len(tokens)
+
+
+def build_live_timings(token_count: int, elapsed_sec: float) -> Dict[str, Any]:
+    safe_elapsed = elapsed_sec if elapsed_sec > 0 else 1e-6
+    return {
+        'prompt_n': 0,
+        'prompt_ms': 0,
+        'predicted_n': token_count,
+        'predicted_ms': safe_elapsed * 1000.0,
+        'predicted_per_second': token_count / safe_elapsed,
+        'cache_n': 0,
+    }
+
+
+def merge_generated_text(reasoning: str, answer: str) -> str:
+    if reasoning and answer:
+        return f'{reasoning}\n{answer}'
+    return reasoning or answer
+
+
+def stream_chat_completion(payload: Dict[str, Any], model_server: str, timeout_sec: int) -> Iterable[bytes]:
+    openai_messages = payload.get('messages')
+    if not isinstance(openai_messages, list):
+        raise ValueError('messages 必须是数组')
+
+    model_id = fetch_model_id(model_server, timeout_sec)
+    agent = build_agent(model_server, timeout_sec, extract_generate_cfg(payload), model_id=model_id)
+    qwen_messages = to_qwen_messages(openai_messages)
+
+    now = int(time.time())
+    chat_id = f'chatcmpl-{uuid.uuid4().hex}'
+    yield build_sse_chunk(chat_id, now, model_id, {'role': 'assistant'})
+
+    previous_answer = ''
+    previous_reasoning = ''
+    started_at = time.perf_counter()
+    last_timing_at = started_at
+    last_reported_tokens = -1
+    last_counted_text = ''
+    for batch in agent.run(qwen_messages):
+        texts = extract_answer_and_reasoning(batch)
+        answer = texts['answer']
+        reasoning = texts['reasoning']
+
+        reasoning_inc = text_delta(previous_reasoning, reasoning)
+        if reasoning_inc:
+            yield build_sse_chunk(chat_id, now, model_id, {'reasoning_content': reasoning_inc})
+
+        answer_inc = text_delta(previous_answer, answer)
+        if answer_inc:
+            yield build_sse_chunk(chat_id, now, model_id, {'content': answer_inc})
+
+        generated_text = merge_generated_text(reasoning, answer)
+        current_time = time.perf_counter()
+        should_emit_timing = (
+            generated_text
+            and generated_text != last_counted_text
+            and (current_time - last_timing_at) >= TIMINGS_EMIT_INTERVAL_SEC
+        )
+        if should_emit_timing:
+            token_count = count_text_tokens(model_server, timeout_sec, generated_text)
+            if token_count != last_reported_tokens:
+                timings = build_live_timings(token_count, current_time - started_at)
+                yield build_sse_chunk(chat_id, now, model_id, {}, timings=timings)
+                last_reported_tokens = token_count
+            last_counted_text = generated_text
+            last_timing_at = current_time
+
+        previous_reasoning = reasoning
+        previous_answer = answer
+
+    final_generated_text = merge_generated_text(previous_reasoning, previous_answer)
+    if final_generated_text and final_generated_text != last_counted_text:
+        final_time = time.perf_counter()
+        token_count = count_text_tokens(model_server, timeout_sec, final_generated_text)
+        if token_count != last_reported_tokens:
+            timings = build_live_timings(token_count, final_time - started_at)
+            yield build_sse_chunk(chat_id, now, model_id, {}, timings=timings)
+
+    yield build_sse_chunk(chat_id, now, model_id, {}, 'stop')
+    yield b'data: [DONE]\n\n'
+
+
+def build_non_stream_response(answer: str, model: str, reasoning: str = '') -> Dict[str, Any]:
+    now = int(time.time())
+    message = {'role': 'assistant', 'content': answer}
+    if reasoning:
+        message['reasoning_content'] = reasoning
+    return {
+        'id': f'chatcmpl-{uuid.uuid4().hex}',
+        'object': 'chat.completion',
+        'created': now,
+        'model': model,
+        'choices': [{
+            'index': 0,
+            'message': message,
+            'finish_reason': 'stop',
+        }],
+        'usage': {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0},
+    }
+
+
+def sse_lines(answer: str, model: str, reasoning: str = '') -> Iterable[bytes]:
+    now = int(time.time())
+    chat_id = f'chatcmpl-{uuid.uuid4().hex}'
+    chunks = [
+        {
+            'id': chat_id,
+            'object': 'chat.completion.chunk',
+            'created': now,
+            'model': model,
+            'choices': [{'index': 0, 'delta': {'role': 'assistant'}, 'finish_reason': None}],
+        },
+    ]
+    if reasoning:
+        chunks.append({
+            'id': chat_id,
+            'object': 'chat.completion.chunk',
+            'created': now,
+            'model': model,
+            'choices': [{'index': 0, 'delta': {'reasoning_content': reasoning}, 'finish_reason': None}],
+        })
+    chunks.append({
+        'id': chat_id,
+        'object': 'chat.completion.chunk',
+        'created': now,
+        'model': model,
+        'choices': [{'index': 0, 'delta': {'content': answer}, 'finish_reason': None}],
+    })
+    chunks.append({
+        'id': chat_id,
+        'object': 'chat.completion.chunk',
+        'created': now,
+        'model': model,
+        'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}],
+    })
+    for chunk in chunks:
+        yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n".encode('utf-8')
+    yield b'data: [DONE]\n\n'