faiss server
This commit is contained in:
14
faiss/.dockerignore
Normal file
14
faiss/.dockerignore
Normal file
@@ -0,0 +1,14 @@
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
.Python
|
||||
env/
|
||||
venv/
|
||||
.venv/
|
||||
.env
|
||||
.git/
|
||||
.gitignore
|
||||
docker-compose.yml
|
||||
faiss_index.bin
|
||||
__pycache__
|
||||
3
faiss/.gitignore
vendored
Normal file
3
faiss/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
faiss_index.bin
|
||||
.vscode
|
||||
__pycache__
|
||||
48
faiss/Dockerfile
Normal file
48
faiss/Dockerfile
Normal file
@@ -0,0 +1,48 @@
|
||||
# Dockerfile
|
||||
FROM python:3.12-slim AS builder
|
||||
|
||||
# 设置环境变量
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 安装系统依赖(faiss-cpu 编译需要)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
g++ \
|
||||
libopenblas-dev \
|
||||
libomp-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 复制依赖文件并安装
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 最终镜像
|
||||
FROM python:3.12-slim
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 运行时需要的系统库
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libopenblas-dev \
|
||||
libomp-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 从 builder 复制已安装的 Python 包
|
||||
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
|
||||
COPY --from=builder /usr/local/bin /usr/local/bin
|
||||
|
||||
# 复制项目代码
|
||||
COPY . .
|
||||
|
||||
# 暴露端口
|
||||
EXPOSE 8000
|
||||
|
||||
# 启动命令(使用字符串路径,兼容 reload)
|
||||
CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
61
faiss/README.md
Normal file
61
faiss/README.md
Normal file
@@ -0,0 +1,61 @@
|
||||
```shell
|
||||
conda create -n faiss python=3.10 -y
|
||||
conda activate faiss
|
||||
conda install -c pytorch faiss-cpu
|
||||
```
|
||||
|
||||
|
||||
API 解释
|
||||
```py
|
||||
import numpy as np
|
||||
import faiss
|
||||
|
||||
# FLATL2
|
||||
index = faiss.IndexFlatL2(d)
|
||||
index.add(xb)
|
||||
D, I = index.search(xq, k)
|
||||
|
||||
# FlatIP
|
||||
index = faiss.IndexFlatIP(d)
|
||||
index.add(xb2)
|
||||
D, I = index.search(xq2, k)
|
||||
|
||||
# IDMap
|
||||
base = faiss.IndexFlatL2(d)
|
||||
index = faiss.IndexIDMap(base)
|
||||
index.add_with_ids(xb, ids)
|
||||
D, I = index.search(xq, k)
|
||||
|
||||
# HNSWFlat
|
||||
index = faiss.IndexHNSWFlat(d, M)
|
||||
index.hnsw.efSearch = ef_search
|
||||
index.add(xb)
|
||||
D, I = index.search(xq, k)
|
||||
|
||||
# IVFFlat
|
||||
quantizer = faiss.IndexFlatL2(d)
|
||||
index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
|
||||
safe_train(index, xb, "IndexIVFFlat")
|
||||
index.add(xb)
|
||||
index.nprobe = nprobe
|
||||
D, I = index.search(xq, k)
|
||||
|
||||
# PQ
|
||||
index = faiss.IndexPQ(d, M, nbits)
|
||||
safe_train(index, xb, f"IndexPQ(M={M}, nbits={nbits})")
|
||||
index.add(xb)
|
||||
D, I = index.search(xq, k)
|
||||
|
||||
# IVFPQ
|
||||
quantizer = faiss.IndexFlatL2(d)
|
||||
index = faiss.IndexIVFPQ(quantizer, d, nlist, M, nbits)
|
||||
safe_train(index, xb, f"IndexIVFPQ(nlist={nlist}, M={M}, nbits={nbits})")
|
||||
index.add(xb)
|
||||
index.nprobe = nprobe
|
||||
D, I = index.search(xq, k)
|
||||
|
||||
# LSH
|
||||
index = faiss.IndexLSH(d, nbits)
|
||||
index.add(xb)
|
||||
D, I = index.search(xq, k)
|
||||
```
|
||||
44
faiss/api.py
Normal file
44
faiss/api.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# api.py
|
||||
from fastapi import FastAPI, Depends, HTTPException
|
||||
from models import EmbeddingInput, SearchInput
|
||||
from faiss_manager import faiss_manager
|
||||
from config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
app = FastAPI(
|
||||
title="FAISS 服务",
|
||||
description="向量插入 + 相似搜索 + 持久化",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
def log_business(message: str):
|
||||
if settings.ENABLE_REQUEST_LOGS:
|
||||
print(message, flush=True)
|
||||
|
||||
@app.post("/insert")
|
||||
async def insert(data: EmbeddingInput):
|
||||
try:
|
||||
vector_id = faiss_manager.insert(data.embedding)
|
||||
log_business(f"[faiss] insert id={vector_id}")
|
||||
return {"id": vector_id}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/search")
|
||||
async def search(data: SearchInput):
|
||||
try:
|
||||
result = faiss_manager.search(data.embedding, data.k)
|
||||
log_business(
|
||||
f"[faiss] search ids={result['ids']} similarity_scores={result['similarity_scores']}",
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/persist")
|
||||
async def persist():
|
||||
faiss_manager.persist()
|
||||
return {"status": "success", "message": "索引已持久化"}
|
||||
34
faiss/config.py
Normal file
34
faiss/config.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# config.py
|
||||
from pydantic_settings import BaseSettings
|
||||
from functools import lru_cache
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# FAISS 配置(已优化为你的 LLM 相似问题缓存场景)
|
||||
FAISS_DIM: int = 1024 # 根据你的 embedding 模型修改(e.g. bge-large=1024, text-embedding-3-large=3072)
|
||||
FAISS_INDEX_PATH: str = "faiss_index.bin"
|
||||
FAISS_INDEX_TYPE: str = "HNSW" # 默认改为 HNSW(最推荐)
|
||||
|
||||
# HNSW 专用参数(速度 + 精度平衡)
|
||||
HNSW_M: int = 32 # 每层连接数(16-64),越大精度越高但内存稍多
|
||||
HNSW_EF_CONSTRUCTION: int = 200 # 构建质量(100-400)
|
||||
HNSW_EF_SEARCH: int = 64 # 查询精度(32-128),越大越准但稍慢
|
||||
|
||||
# 是否使用余弦相似度(强烈推荐用于文本 embedding)
|
||||
USE_COSINE_SIMILARITY: bool = True # True = 自动归一化 + Index*IP
|
||||
ENABLE_REQUEST_LOGS: bool = True # 是否打印插入/搜索业务日志
|
||||
|
||||
# FastAPI 配置
|
||||
APP_HOST: str = "0.0.0.0"
|
||||
APP_PORT: int = 8000
|
||||
APP_TITLE: str = "FAISS 相似问题缓存服务"
|
||||
APP_DESCRIPTION: str = "LLM 对话语义缓存 - 减少 token 消耗"
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
env_file_encoding = "utf-8"
|
||||
extra = "ignore"
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def get_settings() -> Settings:
|
||||
return Settings()
|
||||
13
faiss/docker-compose.yml
Normal file
13
faiss/docker-compose.yml
Normal file
@@ -0,0 +1,13 @@
|
||||
services:
|
||||
faiss:
|
||||
build: .
|
||||
container_name: faiss-service
|
||||
ports:
|
||||
- "8451:8000"
|
||||
volumes:
|
||||
- ./faiss_index.bin:/app/faiss_index.bin # 持久化索引文件
|
||||
- ./.env:/app/.env # 可选:挂载配置
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- FAISS_DIM=1024
|
||||
- APP_PORT=8000
|
||||
87
faiss/faiss_manager.py
Normal file
87
faiss/faiss_manager.py
Normal file
@@ -0,0 +1,87 @@
|
||||
# faiss_manager.py
|
||||
import os
|
||||
import numpy as np
|
||||
import faiss
|
||||
from config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
class FaissManager:
|
||||
def __init__(self):
|
||||
self.dim = settings.FAISS_DIM
|
||||
self.index_path = settings.FAISS_INDEX_PATH
|
||||
self.use_cosine = settings.USE_COSINE_SIMILARITY
|
||||
self.index = None
|
||||
self._load_or_create_index()
|
||||
|
||||
def _load_or_create_index(self):
|
||||
if os.path.exists(self.index_path):
|
||||
self.index = faiss.read_index(self.index_path)
|
||||
print(f"✅ 加载已有索引:{self.index.ntotal} 个向量,维度={self.index.d}")
|
||||
return
|
||||
|
||||
# 创建新索引
|
||||
if settings.FAISS_INDEX_TYPE == "HNSW":
|
||||
if self.use_cosine:
|
||||
self.index = faiss.IndexHNSWFlat(self.dim, settings.HNSW_M, faiss.METRIC_INNER_PRODUCT)
|
||||
print("✅ 创建 HNSWIP 索引(余弦相似度)")
|
||||
else:
|
||||
self.index = faiss.IndexHNSWFlat(self.dim, settings.HNSW_M)
|
||||
print("✅ 创建 HNSWFlat 索引(L2 距离)")
|
||||
|
||||
# 设置 HNSW 参数
|
||||
self.index.hnsw.efConstruction = settings.HNSW_EF_CONSTRUCTION
|
||||
self.index.hnsw.efSearch = settings.HNSW_EF_SEARCH
|
||||
print(f" HNSW 参数: M={settings.HNSW_M}, efConstruction={settings.HNSW_EF_CONSTRUCTION}, efSearch={settings.HNSW_EF_SEARCH}")
|
||||
|
||||
elif settings.FAISS_INDEX_TYPE == "FlatIP" and self.use_cosine:
|
||||
self.index = faiss.IndexFlatIP(self.dim)
|
||||
print("✅ 创建 FlatIP 索引(精确余弦)")
|
||||
else:
|
||||
# 默认精确 L2(兼容旧配置)
|
||||
self.index = faiss.IndexFlatL2(self.dim)
|
||||
print("✅ 创建 FlatL2 索引(精确欧式)")
|
||||
|
||||
def _normalize(self, embedding: list[float]) -> np.ndarray:
|
||||
"""L2 归一化(余弦相似度必需)"""
|
||||
vec = np.array(embedding, dtype=np.float32)
|
||||
norm = np.linalg.norm(vec)
|
||||
return vec / norm if norm > 0 else vec
|
||||
|
||||
def insert(self, embedding: list[float]) -> int:
|
||||
"""插入向量,返回 ID"""
|
||||
if len(embedding) != self.dim:
|
||||
raise ValueError(f"Embedding 维度错误,应为 {self.dim}")
|
||||
|
||||
vec = self._normalize(embedding) if self.use_cosine else np.array(embedding, dtype=np.float32)
|
||||
vec = vec.reshape(1, -1)
|
||||
|
||||
idx = self.index.ntotal
|
||||
self.index.add(vec)
|
||||
return idx
|
||||
|
||||
def search(self, embedding: list[float], k: int = 5):
|
||||
"""搜索相似向量(返回 id + 距离)"""
|
||||
if len(embedding) != self.dim:
|
||||
raise ValueError(f"Embedding 维度错误,应为 {self.dim}")
|
||||
|
||||
vec = self._normalize(embedding) if self.use_cosine else np.array(embedding, dtype=np.float32)
|
||||
vec = vec.reshape(1, -1)
|
||||
|
||||
distances, indices = self.index.search(vec, k)
|
||||
|
||||
return {
|
||||
"ids": indices[0].tolist(),
|
||||
"distances": distances[0].tolist(), # 余弦时:值越大越相似(1.0=完全相同)
|
||||
"similarity_scores": [1 - d for d in distances[0].tolist()] if not self.use_cosine else distances[0].tolist()
|
||||
}
|
||||
|
||||
def persist(self):
|
||||
"""保存索引"""
|
||||
faiss.write_index(self.index, self.index_path)
|
||||
print(f"💾 索引已保存 → {self.index_path}(共 {self.index.ntotal} 个向量)")
|
||||
return True
|
||||
|
||||
|
||||
# 单例
|
||||
faiss_manager = FaissManager()
|
||||
19
faiss/main.py
Normal file
19
faiss/main.py
Normal file
@@ -0,0 +1,19 @@
|
||||
# main.py
|
||||
import uvicorn
|
||||
from config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("🚀 启动 FAISS 服务...")
|
||||
print(f" 地址: http://{settings.APP_HOST}:{settings.APP_PORT}")
|
||||
print(f" 重载模式: {'已开启' if True else '已关闭'}")
|
||||
|
||||
uvicorn.run(
|
||||
"api:app",
|
||||
host=settings.APP_HOST,
|
||||
port=settings.APP_PORT,
|
||||
reload=True,
|
||||
reload_dirs=["."],
|
||||
log_level="info"
|
||||
)
|
||||
10
faiss/models.py
Normal file
10
faiss/models.py
Normal file
@@ -0,0 +1,10 @@
|
||||
# models.py
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
|
||||
class EmbeddingInput(BaseModel):
|
||||
embedding: List[float]
|
||||
|
||||
class SearchInput(BaseModel):
|
||||
embedding: List[float]
|
||||
k: int = 5
|
||||
6
faiss/requirements.txt
Normal file
6
faiss/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
fastapi>=0.115.0
|
||||
uvicorn[standard]>=0.30.0
|
||||
pydantic-settings>=2.0.0
|
||||
numpy>=1.26.0
|
||||
faiss-cpu>=1.10.0
|
||||
python-dotenv>=1.0.0
|
||||
Reference in New Issue
Block a user