AI 工具开发实战(8):开发一个可复用的 AI 聊天机器人框架——30 行代码部署一个对话服务

做了这么多工具,这篇做一个框架级的东西——一个可复用的 AI 聊天机器人框架。

不是再做一个聊天 UI,而是提供一个后端服务框架,支持多模型切换、流式输出、对话管理,可以嵌入任何项目中。

框架做什么

from aichat import ChatBot

bot = ChatBot(model="deepseek")
bot.chat("你好")  # 同步调用
bot.chat_stream("写一首诗")  # 流式输出

# 部署为 API
bot.serve(port=8000)
# → POST /chat → {"reply": "..."}
# → POST /chat/stream → SSE 流式输出

项目结构

aichat/
├── aichat/
│   ├── __init__.py
│   ├── bot.py           # 核心类
│   ├── models.py        # 模型配置
│   └── server.py        # FastAPI 部署
├── setup.py
├── requirements.txt
└── .env

核心实现

# aichat/bot.py
from openai import OpenAI
from typing import Generator, List, Dict

class ChatBot:
    """可复用的 AI 聊天机器人。"""

    def __init__(self, model="deepseek", system_prompt=None, temperature=0.7):
        self.model_name = model
        self.temperature = temperature
        self.history: List[Dict] = []

        cfg = MODELS.get(model, MODELS["deepseek"])
        self.client = OpenAI(
            api_key=cfg["api_key"],
            base_url=cfg["base_url"],
        )
        self.model_id = cfg["model_id"]

        if system_prompt:
            self.history.append({"role": "system", "content": system_prompt})

    def chat(self, message: str) -> str:
        """同步对话。"""
        self.history.append({"role": "user", "content": message})

        # 截断长上下文
        messages = self._trim_history()

        response = self.client.chat.completions.create(
            model=self.model_id,
            messages=messages,
            temperature=self.temperature,
        )
        reply = response.choices[0].message.content

        self.history.append({"role": "assistant", "content": reply})
        return reply

    def chat_stream(self, message: str) -> Generator[str, None, None]:
        """流式对话。"""
        self.history.append({"role": "user", "content": message})
        messages = self._trim_history()

        response = self.client.chat.completions.create(
            model=self.model_id,
            messages=messages,
            temperature=self.temperature,
            stream=True,
        )

        full_reply = ""
        for chunk in response:
            if chunk.choices[0].delta.content:
                token = chunk.choices[0].delta.content
                full_reply += token
                yield token

        self.history.append({"role": "assistant", "content": full_reply})

    def clear(self):
        """清空对话历史。"""
        system = [m for m in self.history if m["role"] == "system"]
        self.history = system

    def _trim_history(self, max_tokens=4000) -> list:
        """截断对话历史,保留最近的 token。"""
        total = 0
        trimmed = []
        for m in reversed(self.history):
            tokens = len(m["content"]) // 2
            if total + tokens > max_tokens:
                break
            total += tokens
            trimmed.append(m)
        return list(reversed(trimmed))

    def serve(self, host="0.0.0.0", port=8000):
        """部署为 FastAPI 服务。"""
        from .server import create_app
        import uvicorn
        app = create_app(self)
        uvicorn.run(app, host=host, port=port)
# aichat/models.py
import os

MODELS = {
    "deepseek": {
        "api_key": os.getenv("DEEPSEEK_API_KEY", ""),
        "base_url": "https://api.deepseek.com/v1",
        "model_id": "deepseek-chat",
    },
    "doubao": {
        "api_key": os.getenv("DOUBAO_API_KEY", ""),
        "base_url": "https://ark.cn-beijing.volces.com/api/v3",
        "model_id": "doubao-2.1-pro",
    },
    "openai": {
        "api_key": os.getenv("OPENAI_API_KEY", ""),
        "base_url": "https://api.openai.com/v1",
        "model_id": "gpt-5.5",
    },
}
# aichat/server.py
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel

class ChatRequest(BaseModel):
    message: str

def create_app(bot):
    app = FastAPI(title="AI Chat")

    @app.post("/chat")
    async def chat(req: ChatRequest):
        reply = bot.chat(req.message)
        return {"reply": reply}

    @app.post("/chat/stream")
    async def chat_stream(req: ChatRequest):
        return StreamingResponse(
            _stream_response(bot, req.message),
            media_type="text/event-stream",
        )

    @app.post("/clear")
    async def clear():
        bot.clear()
        return {"status": "ok"}

    return app

async def _stream_response(bot, message):
    for token in bot.chat_stream(message):
        yield f"data: {token}\n\n"
    yield "data: [DONE]\n\n"

使用方式

from aichat import ChatBot

# 初始化
bot = ChatBot(
    model="deepseek",
    system_prompt="你是一个 Python 编程助手。回答简洁,给出代码示例。",
    temperature=0.5,
)

# 同步调用
reply = bot.chat("Python 中怎么合并两个字典?")
print(reply)

# 流式调用
for token in bot.chat_stream("写一个快速排序"):
    print(token, end="", flush=True)

# 多轮对话
bot.chat("我是一名后端工程师")
bot.chat("刚才说的技术栈适合什么项目?")  # 会记住上文

# 部署为 API
bot.serve(port=8000)

总结

AI 聊天框架核心就 4 个能力:
1. 多模型切换(DeepSeek / 豆包 / OpenAI)
2. 同步 + 流式调用
3. 对话上下文管理(自动截断)
4. 一键部署为 API

封装好之后,任何项目都能嵌入 AI 对话能力。


本文是 《AI 开发者工具链实战》 系列的第 8 篇。
本文由 Zyentor(智元界)原创发布