redis缓存替换+pgvector向量替换

This commit is contained in:
1iaan
2026-04-04 22:39:16 +08:00
parent e993eb6c5c
commit 9d7c416737
124 changed files with 5460 additions and 141 deletions

View File

@@ -10,7 +10,6 @@ import (
"ai-chat-service/pkg/config"
"ai-chat-service/pkg/db/mysql"
"ai-chat-service/pkg/db/redis"
"ai-chat-service/pkg/db/vector"
"ai-chat-service/pkg/log"
"ai-chat-service/proto"
"flag"
@@ -57,17 +56,19 @@ func main() {
mysql.InitMysql(cnf)
// 初始化redis
redis.InitRedisPool(cnf)
// 初始化向量数据库
vector.InitDB(cnf)
recordsData := data.NewChatRecordsData(mysql.GetDB())
vectorRecordsData, err := vector_data.NewChatRecordsData(cnf)
if err != nil {
log.Fatal(err)
}
lis, err := net.Listen("tcp", fmt.Sprintf("%s:%d", cnf.Server.IP, cnf.Server.Port))
if err != nil {
log.Fatal(err)
}
s := grpc.NewServer(grpc.UnaryInterceptor(interceptor.UnaryAuthInterceptor), grpc.StreamInterceptor(metrics_app.NewStreamMiddleware(registry).WrapHandler()))
service := server.NewChatService(recordsData, vector_data.NewChatRecordsData(cnf, vector.GetVdb()), cnf, logger, busMetrics)
service := server.NewChatService(recordsData, vectorRecordsData, cnf, logger, busMetrics)
proto.RegisterChatServer(s, service)
healthCheckSrv := health.NewServer()

View File

@@ -59,18 +59,10 @@ func (s *chatService) newApp(in *proto.ChatCompletionRequest, contextCache chat_
if in.ChatParam.Model != "" {
conf.Model = in.ChatParam.Model
}
if in.ChatParam.TopP != 0 {
conf.TopP = in.ChatParam.TopP
}
if in.ChatParam.FrequencyPenalty != 0 {
conf.FrequencyPenalty = in.ChatParam.FrequencyPenalty
}
if in.ChatParam.PresencePenalty != 0 {
conf.PresencePenalty = in.ChatParam.PresencePenalty
}
if in.ChatParam.Temperature != 0 {
conf.Temperature = in.ChatParam.Temperature
}
conf.TopP = in.ChatParam.TopP
conf.FrequencyPenalty = in.ChatParam.FrequencyPenalty
conf.PresencePenalty = in.ChatParam.PresencePenalty
conf.Temperature = in.ChatParam.Temperature
if in.ChatParam.BotDesc != "" {
conf.BotDesc = in.ChatParam.BotDesc
}

View File

@@ -62,7 +62,7 @@ func (s *chatService) ChatCompletion(ctx context.Context, in *proto.ChatCompleti
idStr, score, err := s.vectorData.QueryData(context.Background(), map[string][]string{"keywords": {strings.Join(keywords, ",")}})
if err != nil {
s.log.Error(err)
} else if score > 0.99 {
} else if score > s.config.Vector.Threshold {
id, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
s.log.Error(err)
@@ -200,7 +200,7 @@ func (s *chatService) ChatCompletionStream(in *proto.ChatCompletionRequest, stre
idStr, score, err := s.vectorData.QueryData(context.Background(), map[string][]string{"keywords": {strings.Join(keywords, ",")}})
if err != nil {
s.log.Error(err)
} else if score > 0.99 {
} else if score > s.config.Vector.Threshold {
id, err := strconv.ParseInt(idStr, 10, 64)
if err != nil {
s.log.Error(err)

View File

@@ -3,7 +3,7 @@ package vector_data
import (
"ai-chat-service/pkg/config"
"context"
"github.com/tencent/vectordatabase-sdk-go/tcvectordb"
"fmt"
)
const CHAT_RECORDS = "chat_records"
@@ -17,53 +17,13 @@ type IChatRecordsData interface {
QueryData(ctx context.Context, text map[string][]string) (id string, score float32, err error)
}
type chatRecordsData struct {
config *config.Config
vectorDB *tcvectordb.Client
}
func NewChatRecordsData(config *config.Config, vectorDB *tcvectordb.Client) IChatRecordsData {
return &chatRecordsData{
config: config,
vectorDB: vectorDB,
func NewChatRecordsData(config *config.Config) (IChatRecordsData, error) {
switch config.Vector.Provider {
case "tencent", "":
return newTencentChatRecordsData(config)
case "pgvector":
return newPgvectorChatRecordsData(config)
default:
return nil, fmt.Errorf("unsupported vector provider: %s", config.Vector.Provider)
}
}
func (data *chatRecordsData) UpsertData(ctx context.Context, list []*ChatRecord) error {
database := data.config.VectorDB.Database
collection := CHAT_RECORDS
coll := data.vectorDB.Database(database).Collection(collection)
documentList := make([]tcvectordb.Document, 0, len(list))
for _, l := range list {
doc := tcvectordb.Document{
Id: l.ID,
}
doc.Fields = make(map[string]tcvectordb.Field, len(l.KVs))
for k, v := range l.KVs {
doc.Fields[k] = tcvectordb.Field{Val: v}
}
documentList = append(documentList, doc)
}
_, err := coll.Upsert(ctx, documentList)
if err != nil {
return err
}
return nil
}
func (data *chatRecordsData) QueryData(ctx context.Context, text map[string][]string) (id string, score float32, err error) {
database := data.config.VectorDB.Database
collection := CHAT_RECORDS
coll := data.vectorDB.Database(database).Collection(collection)
result, err := coll.SearchByText(ctx, text, &tcvectordb.SearchDocumentParams{
Params: &tcvectordb.SearchDocParams{Ef: 100},
Limit: 1,
})
if err != nil {
return "", 0, err
}
if len(result.Documents) > 0 && len(result.Documents[0]) > 0 {
doc := result.Documents[0][0]
return doc.Id, doc.Score, nil
}
return "", 0, nil
}

View File

@@ -0,0 +1,121 @@
package vector_data
import (
"ai-chat-service/pkg/config"
"ai-chat-service/services/embedding"
"context"
"fmt"
"strconv"
"strings"
"time"
"github.com/jackc/pgx"
)
type pgvectorChatRecordsData struct {
config *config.Config
pool *pgx.ConnPool
embedder embedding.Embedder
}
func newPgvectorChatRecordsData(config *config.Config) (IChatRecordsData, error) {
connConfig, err := pgx.ParseConnectionString(config.Vector.Pgvector.DSN)
if err != nil {
return nil, err
}
pool, err := pgx.NewConnPool(pgx.ConnPoolConfig{
ConnConfig: connConfig,
MaxConnections: config.Vector.Pgvector.MaxOpenConn,
})
if err != nil {
return nil, err
}
embedder, err := embedding.NewEmbedder(config)
if err != nil {
pool.Close()
return nil, err
}
return &pgvectorChatRecordsData{
config: config,
pool: pool,
embedder: embedder,
}, nil
}
func (data *pgvectorChatRecordsData) UpsertData(ctx context.Context, list []*ChatRecord) error {
table := data.config.Vector.Pgvector.Table
if table == "" {
table = "chat_record_vectors"
}
for _, item := range list {
recordID, err := strconv.ParseInt(item.ID, 10, 64)
if err != nil {
return err
}
keywordsText := embedding.BuildText(item.KVs["keywords"])
if keywordsText == "" {
continue
}
vector, err := data.embedder.Embed(ctx, keywordsText)
if err != nil {
return err
}
commandTag, err := data.pool.Exec(
fmt.Sprintf(
"INSERT INTO %s (record_id, keywords_text, embedding, created_at) VALUES ($1, $2, $3::vector, $4) ON CONFLICT (record_id) DO UPDATE SET keywords_text = EXCLUDED.keywords_text, embedding = EXCLUDED.embedding, created_at = EXCLUDED.created_at",
table,
),
recordID,
keywordsText,
vectorLiteral(vector),
time.Now().Unix(),
)
if err != nil {
return err
}
if commandTag.RowsAffected() == 0 {
return fmt.Errorf("pgvector upsert affected 0 rows for record_id=%d", recordID)
}
}
return nil
}
func (data *pgvectorChatRecordsData) QueryData(ctx context.Context, text map[string][]string) (id string, score float32, err error) {
keywordsText := embedding.BuildText(text["keywords"]...)
if keywordsText == "" {
return "", 0, nil
}
vector, err := data.embedder.Embed(ctx, keywordsText)
if err != nil {
return "", 0, err
}
table := data.config.Vector.Pgvector.Table
if table == "" {
table = "chat_record_vectors"
}
var recordID int64
err = data.pool.QueryRowEx(
ctx,
fmt.Sprintf(
"SELECT record_id, CAST(1 - (embedding <=> $1::vector) AS real) AS score FROM %s ORDER BY embedding <=> $1::vector LIMIT 1",
table,
),
nil,
vectorLiteral(vector),
).Scan(&recordID, &score)
if err != nil {
if err == pgx.ErrNoRows {
return "", 0, nil
}
return "", 0, err
}
return strconv.FormatInt(recordID, 10), score, nil
}
func vectorLiteral(values []float32) string {
parts := make([]string, 0, len(values))
for _, value := range values {
parts = append(parts, strconv.FormatFloat(float64(value), 'f', -1, 32))
}
return "[" + strings.Join(parts, ",") + "]"
}

View File

@@ -0,0 +1,66 @@
package vector_data
import (
"ai-chat-service/pkg/config"
"context"
"time"
"github.com/tencent/vectordatabase-sdk-go/tcvectordb"
)
type tencentChatRecordsData struct {
config *config.Config
vectorDB *tcvectordb.Client
}
func newTencentChatRecordsData(config *config.Config) (IChatRecordsData, error) {
option := &tcvectordb.ClientOption{
Timeout: time.Second * time.Duration(config.Vector.Tencent.Timeout),
MaxIdldConnPerHost: config.Vector.Tencent.MaxIdleConnPerHost,
IdleConnTimeout: time.Second * time.Duration(config.Vector.Tencent.IdleConnTimeout),
ReadConsistency: tcvectordb.ReadConsistency(config.Vector.Tencent.ReadConsistency),
}
client, err := tcvectordb.NewClient(config.Vector.Tencent.Url, config.Vector.Tencent.Username, config.Vector.Tencent.Pwd, option)
if err != nil {
return nil, err
}
return &tencentChatRecordsData{
config: config,
vectorDB: client,
}, nil
}
func (data *tencentChatRecordsData) UpsertData(ctx context.Context, list []*ChatRecord) error {
database := data.config.Vector.Tencent.Database
collection := CHAT_RECORDS
coll := data.vectorDB.Database(database).Collection(collection)
documentList := make([]tcvectordb.Document, 0, len(list))
for _, l := range list {
doc := tcvectordb.Document{Id: l.ID}
doc.Fields = make(map[string]tcvectordb.Field, len(l.KVs))
for k, v := range l.KVs {
doc.Fields[k] = tcvectordb.Field{Val: v}
}
documentList = append(documentList, doc)
}
_, err := coll.Upsert(ctx, documentList)
return err
}
func (data *tencentChatRecordsData) QueryData(ctx context.Context, text map[string][]string) (id string, score float32, err error) {
database := data.config.Vector.Tencent.Database
collection := CHAT_RECORDS
coll := data.vectorDB.Database(database).Collection(collection)
result, err := coll.SearchByText(ctx, text, &tcvectordb.SearchDocumentParams{
Params: &tcvectordb.SearchDocParams{Ef: 100},
Limit: 1,
})
if err != nil {
return "", 0, err
}
if len(result.Documents) > 0 && len(result.Documents[0]) > 0 {
doc := result.Documents[0][0]
return doc.Id, doc.Score, nil
}
return "", 0, nil
}

View File

@@ -53,7 +53,37 @@ dependOn:
address: "localhost:50054"
accessToken: "ang1chubdev1ozhome256487d22sapguuv1ozhom"
tokenizer:
address: "http://192.168.239.161:3002"
address: "http://127.0.0.1:3002"
vector:
# 向量后端tencent / pgvector
provider: "pgvector"
# 历史问答命中阈值
threshold: 0.99
tencent:
url: "http://lb-4u4r1fk4-1ys6gv3rpmdan420.clb.ap-guangzhou.tencentclb.com:60000"
username: "root"
pwd: "YaUfVueWZJ20e4ghyLlBT8Dou5OapwpFTUq50oft"
database: "ai-chat"
timeout: 5
maxIdleConnPerHost: 2
readConsistency: "eventualConsistency"
idleConnTimeout: 60
pgvector:
dsn: "postgres://postgres:postgres@127.0.0.1:15432/ai_chat?sslmode=disable"
table: "chat_record_vectors"
dimensions: 1024
maxLifeTime: 3600
maxOpenConn: 10
maxIdleConn: 10
embedding:
provider: "openai-compatible"
# 智谱 OpenAI 兼容网关;可被项目根目录 .env 覆盖
base_url: "https://open.bigmodel.cn/api/paas/v4"
# 默认故意设成错误值,真实 key 请放到项目根目录 .env
api_key: "__INVALID_SET_AI_CHAT_EMBEDDING_API_KEY__"
# embedding-2 固定 1024 维,和当前 pgvector 表结构一致
model: "embedding-2"
timeout: 10
vectorDB:
# 访问地址
url: "http://lb-4u4r1fk4-1ys6gv3rpmdan420.clb.ap-guangzhou.tencentclb.com:60000"
@@ -69,4 +99,4 @@ vectorDB:
# 读一致性: strongConsistency(强一致性)eventualConsistency(最终一致性)
readConsistency: "eventualConsistency"
# 空闲连接超时时长s
idleConnTimeout: 60
idleConnTimeout: 60

View File

@@ -1,13 +1,12 @@
module ai-chat-service
go 1.21
toolchain go1.24.2
go 1.25.0
require (
github.com/go-sql-driver/mysql v1.8.1
github.com/golang/protobuf v1.5.4
github.com/google/uuid v1.6.0
github.com/jackc/pgx v3.6.2+incompatible
github.com/prometheus/client_golang v1.20.4
github.com/redis/go-redis/v9 v9.6.1
github.com/sashabaranov/go-openai v1.9.4
@@ -24,11 +23,15 @@ require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/clbanning/mxj v1.8.4 // indirect
github.com/cockroachdb/apd v1.1.0 // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/gofrs/uuid v4.4.0+incompatible // indirect
github.com/google/go-querystring v1.0.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/jackc/fake v0.0.0-20150926172116-812a484cc733 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/lib/pq v1.12.3 // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/mozillazg/go-httpheader v0.2.1 // indirect
@@ -40,18 +43,21 @@ require (
github.com/prometheus/procfs v0.15.1 // indirect
github.com/sagikazarmark/locafero v0.4.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
github.com/shopspring/decimal v1.4.0 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
github.com/spf13/afero v1.11.0 // indirect
github.com/spf13/cast v1.6.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.11.1 // indirect
github.com/subosito/gotenv v1.6.0 // indirect
github.com/tencentyun/cos-go-sdk-v5 v0.7.54 // indirect
go.uber.org/atomic v1.9.0 // indirect
go.uber.org/multierr v1.9.0 // indirect
golang.org/x/crypto v0.24.0 // indirect
golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/sys v0.22.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/text v0.29.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240604185151-ef581f913117 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect

View File

@@ -1386,6 +1386,8 @@ github.com/cncf/xds/go v0.0.0-20231109132714-523115ebc101/go.mod h1:eXthEFrGJvWH
github.com/cncf/xds/go v0.0.0-20231128003011-0fa0005c9caa/go.mod h1:x/1Gn8zydmfq8dk6e9PdstVsDgu9RuyIIJqAaF//0IM=
github.com/cncf/xds/go v0.0.0-20240318125728-8a4994d93e50/go.mod h1:5e1+Vvlzido69INQaVO6d87Qn543Xr6nooe9Kz7oBFM=
github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I=
github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -1457,6 +1459,8 @@ github.com/goccy/go-json v0.9.11/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MG
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/goccy/go-yaml v1.9.8/go.mod h1:JubOolP3gh0HpiBc4BLRD4YmjEjHAmIIB2aaXKkTfoE=
github.com/goccy/go-yaml v1.11.0/go.mod h1:H+mJrWtjPTJAHvRbV09MCK9xYwODM+wRTVFFTWckfng=
github.com/gofrs/uuid v4.4.0+incompatible h1:3qXRTX8/NbyulANqlc0lchS1gqAVxRgsuW1YrTJupqA=
github.com/gofrs/uuid v4.4.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4=
@@ -1601,6 +1605,10 @@ github.com/iancoleman/strcase v0.3.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20220319035150-800ac71e25c2/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=
github.com/jackc/fake v0.0.0-20150926172116-812a484cc733 h1:vr3AYkKovP8uR8AvSGGUK1IDqRa5lAAvEkZG1LKaCRc=
github.com/jackc/fake v0.0.0-20150926172116-812a484cc733/go.mod h1:WrMFNQdiFJ80sQsxDoMokWK1W5TQtxBFNpzWTD84ibQ=
github.com/jackc/pgx v3.6.2+incompatible h1:2zP5OD7kiyR3xzRYMhOcXVvkDZsImVXfj+yIyTQf3/o=
github.com/jackc/pgx v3.6.2+incompatible/go.mod h1:0ZGrqGqkRlliWnWB4zKnWtjbSWbGkVEFm4TeybAXq+I=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
@@ -1629,6 +1637,8 @@ github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII=
github.com/lib/pq v1.12.3 h1:tTWxr2YLKwIvK90ZXEw8GP7UFHtcbTtty8zsI+YjrfQ=
github.com/lib/pq v1.12.3/go.mod h1:/p+8NSbOcwzAEI7wiMXFlgydTwcgTr3OSKMsD2BitpA=
github.com/lyft/protoc-gen-star v0.6.0/go.mod h1:TGAoBVkt8w7MPG72TrKIu85MIdXwDuzJYeZuUPFPNwA=
github.com/lyft/protoc-gen-star v0.6.1/go.mod h1:TGAoBVkt8w7MPG72TrKIu85MIdXwDuzJYeZuUPFPNwA=
github.com/lyft/protoc-gen-star/v2 v2.0.1/go.mod h1:RcCdONR2ScXaYnQC5tUzxzlpA3WVYF7/opLeUgcQs/o=
@@ -1704,6 +1714,8 @@ github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6g
github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
github.com/sashabaranov/go-openai v1.9.4 h1:KanoCEoowAI45jVXlenMCckutSRr39qOmSi9MyPBfZM=
github.com/sashabaranov/go-openai v1.9.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
@@ -1738,8 +1750,9 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
github.com/substrait-io/substrait-go v0.4.2/go.mod h1:qhpnLmrcvAnlZsUyPXZRqldiHapPTXC3t7xFgDi3aQg=
@@ -1827,6 +1840,8 @@ golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1m
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
@@ -2166,8 +2181,8 @@ golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=

View File

@@ -1,7 +1,11 @@
package config
import (
"bufio"
"log"
"os"
"path/filepath"
"strings"
"github.com/spf13/viper"
)
@@ -54,6 +58,35 @@ type Config struct {
Address string
}
}
Vector struct {
Provider string
Threshold float32
Tencent struct {
Url string
Username string
Pwd string
Database string
Timeout int
MaxIdleConnPerHost int
ReadConsistency string
IdleConnTimeout int
}
Pgvector struct {
DSN string `mapstructure:"dsn"`
Table string `mapstructure:"table"`
Dimensions int `mapstructure:"dimensions"`
MaxLifeTime int `mapstructure:"maxLifeTime"`
MaxOpenConn int `mapstructure:"maxOpenConn"`
MaxIdleConn int `mapstructure:"maxIdleConn"`
}
}
Embedding struct {
Provider string
BaseUrl string `mapstructure:"base_url"`
ApiKey string `mapstructure:"api_key"`
Model string `mapstructure:"model"`
Timeout int
}
VectorDB struct {
Url string
Username string
@@ -69,6 +102,7 @@ type Config struct {
var conf *Config
func InitConfig(filePath string, typ ...string) {
loadProjectDotEnv(filePath)
v := viper.New()
v.SetConfigFile(filePath)
if len(typ) > 0 {
@@ -83,9 +117,137 @@ func InitConfig(filePath string, typ ...string) {
if err != nil {
log.Fatal(err)
}
normalizeConfig(conf)
}
func GetConfig() *Config {
return conf
}
func normalizeConfig(conf *Config) {
if conf.Vector.Provider == "" {
conf.Vector.Provider = "tencent"
}
if conf.Vector.Threshold == 0 {
conf.Vector.Threshold = 0.99
}
// Backfill the new vector.tencent block from the legacy vectorDB config.
if conf.Vector.Tencent.Url == "" {
conf.Vector.Tencent.Url = conf.VectorDB.Url
}
if conf.Vector.Tencent.Username == "" {
conf.Vector.Tencent.Username = conf.VectorDB.Username
}
if conf.Vector.Tencent.Pwd == "" {
conf.Vector.Tencent.Pwd = conf.VectorDB.Pwd
}
if conf.Vector.Tencent.Database == "" {
conf.Vector.Tencent.Database = conf.VectorDB.Database
}
if conf.Vector.Tencent.Timeout == 0 {
conf.Vector.Tencent.Timeout = conf.VectorDB.Timeout
}
if conf.Vector.Tencent.MaxIdleConnPerHost == 0 {
conf.Vector.Tencent.MaxIdleConnPerHost = conf.VectorDB.MaxIdleConnPerHost
}
if conf.Vector.Tencent.ReadConsistency == "" {
conf.Vector.Tencent.ReadConsistency = conf.VectorDB.ReadConsistency
}
if conf.Vector.Tencent.IdleConnTimeout == 0 {
conf.Vector.Tencent.IdleConnTimeout = conf.VectorDB.IdleConnTimeout
}
if conf.Embedding.Provider == "" {
conf.Embedding.Provider = "openai-compatible"
}
if conf.Embedding.BaseUrl == "" {
conf.Embedding.BaseUrl = conf.Chat.BaseUrl
}
if conf.Embedding.ApiKey == "" {
conf.Embedding.ApiKey = conf.Chat.ApiKey
}
if conf.Embedding.Timeout == 0 {
conf.Embedding.Timeout = 10
}
overrideChatFromEnv(conf)
overrideEmbeddingFromEnv(conf)
}
func overrideChatFromEnv(conf *Config) {
if value := os.Getenv("AI_CHAT_OPENAI_BASE_URL"); value != "" {
conf.Chat.BaseUrl = value
} else if value := os.Getenv("OPENAI_BASE_URL"); value != "" {
conf.Chat.BaseUrl = value
}
if value := os.Getenv("AI_CHAT_OPENAI_MODEL"); value != "" {
conf.Chat.Model = value
} else if value := os.Getenv("OPENAI_MODEL"); value != "" {
conf.Chat.Model = value
}
if value := os.Getenv("AI_CHAT_OPENAI_API_KEY"); value != "" {
conf.Chat.ApiKey = value
return
}
if value := os.Getenv("OPENAI_API_KEY"); value != "" {
conf.Chat.ApiKey = value
return
}
if value := os.Getenv("MOONSHOT_API_KEY"); value != "" {
conf.Chat.ApiKey = value
}
}
func overrideEmbeddingFromEnv(conf *Config) {
if value := os.Getenv("AI_CHAT_EMBEDDING_BASE_URL"); value != "" {
conf.Embedding.BaseUrl = value
}
if value := os.Getenv("AI_CHAT_EMBEDDING_MODEL"); value != "" {
conf.Embedding.Model = value
}
if value := os.Getenv("AI_CHAT_EMBEDDING_API_KEY"); value != "" {
conf.Embedding.ApiKey = value
return
}
if value := os.Getenv("ZAI_API_KEY"); value != "" {
conf.Embedding.ApiKey = value
}
}
func loadProjectDotEnv(configFilePath string) {
projectRoot := filepath.Dir(filepath.Dir(configFilePath))
loadDotEnvFile(filepath.Join(projectRoot, ".env"))
}
func loadDotEnvFile(path string) {
file, err := os.Open(path)
if err != nil {
return
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" || strings.HasPrefix(line, "#") {
continue
}
key, value, ok := strings.Cut(line, "=")
if !ok {
continue
}
key = strings.TrimSpace(key)
value = strings.TrimSpace(value)
value = strings.Trim(value, `"'`)
if key == "" {
continue
}
if _, exists := os.LookupEnv(key); exists {
continue
}
_ = os.Setenv(key, value)
}
}

View File

@@ -7,10 +7,11 @@
package proto
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
)
const (

View File

@@ -1 +1,7 @@
time="2024-09-20T14:44:21+08:00" level=error msg="grpc: no transport security set (use grpc.WithTransportCredentials(insecure.NewCredentials()) explicitly or set credentials)" file="E:/Work/Code/go/5.0/2404/ai-chat/ai-chat-service/services/grpc-client/grpc_client_pool.go:25" func=ai-chat-service/services/grpc-client.NewPool.func1
time="2026-04-04T22:15:52+08:00" level=fatal msg="dial tcp 127.0.0.1:15432: socket: operation not permitted" file="/home/lian/share/aichat/ai-chat-service/chat-server/main.go:63" func=main.main
time="2026-04-04T22:15:52+08:00" level=fatal msg="listen tcp 0.0.0.0:50055: socket: operation not permitted" file="/home/lian/share/aichat/ai-chat-service/chat-server/main.go:68" func=main.main
time="2026-04-04T22:17:59+08:00" level=error msg="error, status code: 401, message: Invalid Authentication" file="/home/lian/share/aichat/ai-chat-service/chat-server/server/server.go:244" func="ai-chat-service/chat-server/server.(*chatService).ChatCompletionStream"
time="2026-04-04T22:26:04+08:00" level=error msg="error, status code: 400, message: invalid presence_penalty: only 0 is allowed for this model" file="/home/lian/share/aichat/ai-chat-service/chat-server/server/server.go:244" func="ai-chat-service/chat-server/server.(*chatService).ChatCompletionStream"
time="2026-04-04T22:27:56+08:00" level=error msg="dial tcp 127.0.0.1:8888: connect: connection refused" file="/home/lian/share/aichat/ai-chat-service/chat-server/server/app.go:243" func="ai-chat-service/chat-server/server.(*app).saveContext"
time="2026-04-04T22:27:56+08:00" level=error msg="dial tcp 127.0.0.1:8888: connect: connection refused" file="/home/lian/share/aichat/ai-chat-service/chat-server/server/server.go:304" func="ai-chat-service/chat-server/server.(*chatService).ChatCompletionStream.func1"

View File

@@ -0,0 +1,115 @@
package embedding
import (
"ai-chat-service/pkg/config"
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
)
type Embedder interface {
Embed(ctx context.Context, text string) ([]float32, error)
}
type openAICompatibleEmbedder struct {
baseURL string
apiKey string
model string
dimensions int
httpClient *http.Client
}
type embeddingRequest struct {
Input []string `json:"input"`
Model string `json:"model"`
}
type embeddingResponse struct {
Data []struct {
Embedding []float32 `json:"embedding"`
} `json:"data"`
}
func NewEmbedder(cnf *config.Config) (Embedder, error) {
switch cnf.Embedding.Provider {
case "openai-compatible", "":
return &openAICompatibleEmbedder{
baseURL: strings.TrimRight(cnf.Embedding.BaseUrl, "/"),
apiKey: cnf.Embedding.ApiKey,
model: cnf.Embedding.Model,
dimensions: cnf.Vector.Pgvector.Dimensions,
httpClient: &http.Client{Timeout: time.Duration(cnf.Embedding.Timeout) * time.Second},
}, nil
default:
return nil, fmt.Errorf("unsupported embedding provider: %s", cnf.Embedding.Provider)
}
}
func BuildText(parts ...string) string {
list := make([]string, 0, len(parts))
for _, part := range parts {
part = strings.TrimSpace(strings.ReplaceAll(part, "\n", " "))
if part == "" {
continue
}
list = append(list, part)
}
return strings.Join(list, ",")
}
func (e *openAICompatibleEmbedder) Embed(ctx context.Context, text string) ([]float32, error) {
text = BuildText(text)
if text == "" {
return nil, fmt.Errorf("embedding text is empty")
}
if e.baseURL == "" {
return nil, fmt.Errorf("embedding base_url is empty")
}
if e.apiKey == "" {
return nil, fmt.Errorf("embedding api_key is empty")
}
if e.model == "" {
return nil, fmt.Errorf("embedding model is empty")
}
reqBody := &embeddingRequest{
Input: []string{text},
Model: e.model,
}
body, err := json.Marshal(reqBody)
if err != nil {
return nil, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, e.baseURL+"/embeddings", bytes.NewReader(body))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+e.apiKey)
resp, err := e.httpClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return nil, fmt.Errorf("embedding request failed: status=%d", resp.StatusCode)
}
result := &embeddingResponse{}
if err = json.NewDecoder(resp.Body).Decode(result); err != nil {
return nil, err
}
if len(result.Data) == 0 || len(result.Data[0].Embedding) == 0 {
return nil, fmt.Errorf("embedding response is empty")
}
if e.dimensions > 0 && len(result.Data[0].Embedding) != e.dimensions {
return nil, fmt.Errorf("embedding dimension mismatch: got=%d want=%d", len(result.Data[0].Embedding), e.dimensions)
}
return result.Data[0].Embedding, nil
}