AI 工具开发实战（8）：开发一个可复用的 AI 聊天机器人框架——30 行代码部署一个对话服务

做了这么多工具，这篇做一个框架级的东西——一个可复用的 AI 聊天机器人框架。

不是再做一个聊天 UI，而是提供一个后端服务框架，支持多模型切换、流式输出、对话管理，可以嵌入任何项目中。

框架做什么

from aichat import ChatBot

bot = ChatBot(model="deepseek")
bot.chat("你好")  # 同步调用
bot.chat_stream("写一首诗")  # 流式输出

# 部署为 API
bot.serve(port=8000)
# → POST /chat → {"reply": "..."}
# → POST /chat/stream → SSE 流式输出

项目结构

aichat/
├── aichat/
│   ├── __init__.py
│   ├── bot.py           # 核心类
│   ├── models.py        # 模型配置
│   └── server.py        # FastAPI 部署
├── setup.py
├── requirements.txt
└── .env

核心实现

# aichat/bot.py
from openai import OpenAI
from typing import Generator, List, Dict

class ChatBot:
    """可复用的 AI 聊天机器人。"""

    def __init__(self, model="deepseek", system_prompt=None, temperature=0.7):
        self.model_name = model
        self.temperature = temperature
        self.history: List[Dict] = []

        cfg = MODELS.get(model, MODELS["deepseek"])
        self.client = OpenAI(
            api_key=cfg["api_key"],
            base_url=cfg["base_url"],
        )
        self.model_id = cfg["model_id"]

        if system_prompt:
            self.history.append({"role": "system", "content": system_prompt})

    def chat(self, message: str) -> str:
        """同步对话。"""
        self.history.append({"role": "user", "content": message})

        # 截断长上下文
        messages = self._trim_history()

        response = self.client.chat.completions.create(
            model=self.model_id,
            messages=messages,
            temperature=self.temperature,
        )
        reply = response.choices[0].message.content

        self.history.append({"role": "assistant", "content": reply})
        return reply

    def chat_stream(self, message: str) -> Generator[str, None, None]:
        """流式对话。"""
        self.history.append({"role": "user", "content": message})
        messages = self._trim_history()

        response = self.client.chat.completions.create(
            model=self.model_id,
            messages=messages,
            temperature=self.temperature,
            stream=True,
        )

        full_reply = ""
        for chunk in response:
            if chunk.choices[0].delta.content:
                token = chunk.choices[0].delta.content
                full_reply += token
                yield token

        self.history.append({"role": "assistant", "content": full_reply})

    def clear(self):
        """清空对话历史。"""
        system = [m for m in self.history if m["role"] == "system"]
        self.history = system

    def _trim_history(self, max_tokens=4000) -> list:
        """截断对话历史，保留最近的 token。"""
        total = 0
        trimmed = []
        for m in reversed(self.history):
            tokens = len(m["content"]) // 2
            if total + tokens > max_tokens:
                break
            total += tokens
            trimmed.append(m)
        return list(reversed(trimmed))

    def serve(self, host="0.0.0.0", port=8000):
        """部署为 FastAPI 服务。"""
        from .server import create_app
        import uvicorn
        app = create_app(self)
        uvicorn.run(app, host=host, port=port)

# aichat/models.py
import os

MODELS = {
    "deepseek": {
        "api_key": os.getenv("DEEPSEEK_API_KEY", ""),
        "base_url": "https://api.deepseek.com/v1",
        "model_id": "deepseek-chat",
    },
    "doubao": {
        "api_key": os.getenv("DOUBAO_API_KEY", ""),
        "base_url": "https://ark.cn-beijing.volces.com/api/v3",
        "model_id": "doubao-2.1-pro",
    },
    "openai": {
        "api_key": os.getenv("OPENAI_API_KEY", ""),
        "base_url": "https://api.openai.com/v1",
        "model_id": "gpt-5.5",
    },
}

# aichat/server.py
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel

class ChatRequest(BaseModel):
    message: str

def create_app(bot):
    app = FastAPI(title="AI Chat")

    @app.post("/chat")
    async def chat(req: ChatRequest):
        reply = bot.chat(req.message)
        return {"reply": reply}

    @app.post("/chat/stream")
    async def chat_stream(req: ChatRequest):
        return StreamingResponse(
            _stream_response(bot, req.message),
            media_type="text/event-stream",
        )

    @app.post("/clear")
    async def clear():
        bot.clear()
        return {"status": "ok"}

    return app

async def _stream_response(bot, message):
    for token in bot.chat_stream(message):
        yield f"data: {token}\n\n"
    yield "data: [DONE]\n\n"

使用方式

from aichat import ChatBot

# 初始化
bot = ChatBot(
    model="deepseek",
    system_prompt="你是一个 Python 编程助手。回答简洁，给出代码示例。",
    temperature=0.5,
)

# 同步调用
reply = bot.chat("Python 中怎么合并两个字典？")
print(reply)

# 流式调用
for token in bot.chat_stream("写一个快速排序"):
    print(token, end="", flush=True)

# 多轮对话
bot.chat("我是一名后端工程师")
bot.chat("刚才说的技术栈适合什么项目？")  # 会记住上文

# 部署为 API
bot.serve(port=8000)

总结

AI 聊天框架核心就 4 个能力：
1. 多模型切换（DeepSeek / 豆包 / OpenAI）
2. 同步 + 流式调用
3. 对话上下文管理（自动截断）
4. 一键部署为 API

封装好之后，任何项目都能嵌入 AI 对话能力。

本文是 《AI 开发者工具链实战》 系列的第 8 篇。
本文由 Zyentor（智元界）原创发布

AI 工具开发实战（8）：开发一个可复用的 AI 聊天机器人框架——30 行代码部署一个对话服务

AI 工具开发实战（8）：开发一个可复用的 AI 聊天机器人框架——30 行代码部署一个对话服务

框架做什么

项目结构

核心实现

使用方式

总结

相关推荐

2026 AI 开发者生存指南（5）：AI Agent 框架对比——LangChain、LangGraph、CrewAI、Dify 怎么选？

2026 AI 开发者生存指南（1）：AI 开发生态全景图——模型、工具、框架全梳理

AI 全栈开发实战（6）：向量检索与 RAG 问答 —— Qdrant 检索、Re-rank、流式输出

📖 更多原创