faiss server

This commit is contained in:
2026-04-10 11:55:00 +00:00
parent bc82e3e708
commit 8e39e609cc
30 changed files with 1271 additions and 1048 deletions

14
faiss/.dockerignore Normal file
View File

@@ -0,0 +1,14 @@
__pycache__/
*.pyc
*.pyo
*.pyd
.Python
env/
venv/
.venv/
.env
.git/
.gitignore
docker-compose.yml
faiss_index.bin
__pycache__

3
faiss/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
faiss_index.bin
.vscode
__pycache__

48
faiss/Dockerfile Normal file
View File

@@ -0,0 +1,48 @@
# Dockerfile
FROM python:3.12-slim AS builder
# 设置环境变量
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
WORKDIR /app
# 安装系统依赖faiss-cpu 编译需要)
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
g++ \
libopenblas-dev \
libomp-dev \
&& rm -rf /var/lib/apt/lists/*
# 复制依赖文件并安装
COPY requirements.txt .
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
# 最终镜像
FROM python:3.12-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
WORKDIR /app
# 运行时需要的系统库
RUN apt-get update && apt-get install -y --no-install-recommends \
libopenblas-dev \
libomp-dev \
&& rm -rf /var/lib/apt/lists/*
# 从 builder 复制已安装的 Python 包
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# 复制项目代码
COPY . .
# 暴露端口
EXPOSE 8000
# 启动命令(使用字符串路径,兼容 reload
CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"]

61
faiss/README.md Normal file
View File

@@ -0,0 +1,61 @@
```shell
conda create -n faiss python=3.10 -y
conda activate faiss
conda install -c pytorch faiss-cpu
```
API 解释
```py
import numpy as np
import faiss
# FLATL2
index = faiss.IndexFlatL2(d)
index.add(xb)
D, I = index.search(xq, k)
# FlatIP
index = faiss.IndexFlatIP(d)
index.add(xb2)
D, I = index.search(xq2, k)
# IDMap
base = faiss.IndexFlatL2(d)
index = faiss.IndexIDMap(base)
index.add_with_ids(xb, ids)
D, I = index.search(xq, k)
# HNSWFlat
index = faiss.IndexHNSWFlat(d, M)
index.hnsw.efSearch = ef_search
index.add(xb)
D, I = index.search(xq, k)
# IVFFlat
quantizer = faiss.IndexFlatL2(d)
index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
safe_train(index, xb, "IndexIVFFlat")
index.add(xb)
index.nprobe = nprobe
D, I = index.search(xq, k)
# PQ
index = faiss.IndexPQ(d, M, nbits)
safe_train(index, xb, f"IndexPQ(M={M}, nbits={nbits})")
index.add(xb)
D, I = index.search(xq, k)
# IVFPQ
quantizer = faiss.IndexFlatL2(d)
index = faiss.IndexIVFPQ(quantizer, d, nlist, M, nbits)
safe_train(index, xb, f"IndexIVFPQ(nlist={nlist}, M={M}, nbits={nbits})")
index.add(xb)
index.nprobe = nprobe
D, I = index.search(xq, k)
# LSH
index = faiss.IndexLSH(d, nbits)
index.add(xb)
D, I = index.search(xq, k)
```

44
faiss/api.py Normal file
View File

@@ -0,0 +1,44 @@
# api.py
from fastapi import FastAPI, Depends, HTTPException
from models import EmbeddingInput, SearchInput
from faiss_manager import faiss_manager
from config import get_settings
settings = get_settings()
app = FastAPI(
title="FAISS 服务",
description="向量插入 + 相似搜索 + 持久化",
version="1.0.0"
)
def log_business(message: str):
if settings.ENABLE_REQUEST_LOGS:
print(message, flush=True)
@app.post("/insert")
async def insert(data: EmbeddingInput):
try:
vector_id = faiss_manager.insert(data.embedding)
log_business(f"[faiss] insert id={vector_id}")
return {"id": vector_id}
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@app.post("/search")
async def search(data: SearchInput):
try:
result = faiss_manager.search(data.embedding, data.k)
log_business(
f"[faiss] search ids={result['ids']} similarity_scores={result['similarity_scores']}",
)
return result
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@app.post("/persist")
async def persist():
faiss_manager.persist()
return {"status": "success", "message": "索引已持久化"}

34
faiss/config.py Normal file
View File

@@ -0,0 +1,34 @@
# config.py
from pydantic_settings import BaseSettings
from functools import lru_cache
class Settings(BaseSettings):
# FAISS 配置(已优化为你的 LLM 相似问题缓存场景)
FAISS_DIM: int = 1024 # 根据你的 embedding 模型修改e.g. bge-large=1024, text-embedding-3-large=3072
FAISS_INDEX_PATH: str = "faiss_index.bin"
FAISS_INDEX_TYPE: str = "HNSW" # 默认改为 HNSW最推荐
# HNSW 专用参数(速度 + 精度平衡)
HNSW_M: int = 32 # 每层连接数16-64越大精度越高但内存稍多
HNSW_EF_CONSTRUCTION: int = 200 # 构建质量100-400
HNSW_EF_SEARCH: int = 64 # 查询精度32-128越大越准但稍慢
# 是否使用余弦相似度(强烈推荐用于文本 embedding
USE_COSINE_SIMILARITY: bool = True # True = 自动归一化 + Index*IP
ENABLE_REQUEST_LOGS: bool = True # 是否打印插入/搜索业务日志
# FastAPI 配置
APP_HOST: str = "0.0.0.0"
APP_PORT: int = 8000
APP_TITLE: str = "FAISS 相似问题缓存服务"
APP_DESCRIPTION: str = "LLM 对话语义缓存 - 减少 token 消耗"
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
extra = "ignore"
@lru_cache()
def get_settings() -> Settings:
return Settings()

13
faiss/docker-compose.yml Normal file
View File

@@ -0,0 +1,13 @@
services:
faiss:
build: .
container_name: faiss-service
ports:
- "8451:8000"
volumes:
- ./faiss_index.bin:/app/faiss_index.bin # 持久化索引文件
- ./.env:/app/.env # 可选:挂载配置
restart: unless-stopped
environment:
- FAISS_DIM=1024
- APP_PORT=8000

87
faiss/faiss_manager.py Normal file
View File

@@ -0,0 +1,87 @@
# faiss_manager.py
import os
import numpy as np
import faiss
from config import get_settings
settings = get_settings()
class FaissManager:
def __init__(self):
self.dim = settings.FAISS_DIM
self.index_path = settings.FAISS_INDEX_PATH
self.use_cosine = settings.USE_COSINE_SIMILARITY
self.index = None
self._load_or_create_index()
def _load_or_create_index(self):
if os.path.exists(self.index_path):
self.index = faiss.read_index(self.index_path)
print(f"✅ 加载已有索引:{self.index.ntotal} 个向量,维度={self.index.d}")
return
# 创建新索引
if settings.FAISS_INDEX_TYPE == "HNSW":
if self.use_cosine:
self.index = faiss.IndexHNSWFlat(self.dim, settings.HNSW_M, faiss.METRIC_INNER_PRODUCT)
print("✅ 创建 HNSWIP 索引(余弦相似度)")
else:
self.index = faiss.IndexHNSWFlat(self.dim, settings.HNSW_M)
print("✅ 创建 HNSWFlat 索引L2 距离)")
# 设置 HNSW 参数
self.index.hnsw.efConstruction = settings.HNSW_EF_CONSTRUCTION
self.index.hnsw.efSearch = settings.HNSW_EF_SEARCH
print(f" HNSW 参数: M={settings.HNSW_M}, efConstruction={settings.HNSW_EF_CONSTRUCTION}, efSearch={settings.HNSW_EF_SEARCH}")
elif settings.FAISS_INDEX_TYPE == "FlatIP" and self.use_cosine:
self.index = faiss.IndexFlatIP(self.dim)
print("✅ 创建 FlatIP 索引(精确余弦)")
else:
# 默认精确 L2兼容旧配置
self.index = faiss.IndexFlatL2(self.dim)
print("✅ 创建 FlatL2 索引(精确欧式)")
def _normalize(self, embedding: list[float]) -> np.ndarray:
"""L2 归一化(余弦相似度必需)"""
vec = np.array(embedding, dtype=np.float32)
norm = np.linalg.norm(vec)
return vec / norm if norm > 0 else vec
def insert(self, embedding: list[float]) -> int:
"""插入向量,返回 ID"""
if len(embedding) != self.dim:
raise ValueError(f"Embedding 维度错误,应为 {self.dim}")
vec = self._normalize(embedding) if self.use_cosine else np.array(embedding, dtype=np.float32)
vec = vec.reshape(1, -1)
idx = self.index.ntotal
self.index.add(vec)
return idx
def search(self, embedding: list[float], k: int = 5):
"""搜索相似向量(返回 id + 距离)"""
if len(embedding) != self.dim:
raise ValueError(f"Embedding 维度错误,应为 {self.dim}")
vec = self._normalize(embedding) if self.use_cosine else np.array(embedding, dtype=np.float32)
vec = vec.reshape(1, -1)
distances, indices = self.index.search(vec, k)
return {
"ids": indices[0].tolist(),
"distances": distances[0].tolist(), # 余弦时值越大越相似1.0=完全相同)
"similarity_scores": [1 - d for d in distances[0].tolist()] if not self.use_cosine else distances[0].tolist()
}
def persist(self):
"""保存索引"""
faiss.write_index(self.index, self.index_path)
print(f"💾 索引已保存 → {self.index_path}(共 {self.index.ntotal} 个向量)")
return True
# 单例
faiss_manager = FaissManager()

19
faiss/main.py Normal file
View File

@@ -0,0 +1,19 @@
# main.py
import uvicorn
from config import get_settings
settings = get_settings()
if __name__ == "__main__":
print("🚀 启动 FAISS 服务...")
print(f" 地址: http://{settings.APP_HOST}:{settings.APP_PORT}")
print(f" 重载模式: {'已开启' if True else '已关闭'}")
uvicorn.run(
"api:app",
host=settings.APP_HOST,
port=settings.APP_PORT,
reload=True,
reload_dirs=["."],
log_level="info"
)

10
faiss/models.py Normal file
View File

@@ -0,0 +1,10 @@
# models.py
from pydantic import BaseModel
from typing import List
class EmbeddingInput(BaseModel):
embedding: List[float]
class SearchInput(BaseModel):
embedding: List[float]
k: int = 5

6
faiss/requirements.txt Normal file
View File

@@ -0,0 +1,6 @@
fastapi>=0.115.0
uvicorn[standard]>=0.30.0
pydantic-settings>=2.0.0
numpy>=1.26.0
faiss-cpu>=1.10.0
python-dotenv>=1.0.0