faiss server

This commit is contained in:
2026-04-10 11:55:00 +00:00
parent bc82e3e708
commit 8e39e609cc
30 changed files with 1271 additions and 1048 deletions

1
.gitignore vendored
View File

@@ -3,4 +3,3 @@ docs
.env
.vscode
faiss

View File

@@ -45,6 +45,18 @@ type ChatMessageRequestOptions struct {
ParentMessageId string `json:"parentMessageId"`
}
type TokenUsage struct {
PromptTokens int32 `json:"prompt_tokens"`
CompletionTokens int32 `json:"completion_tokens"`
TotalTokens int32 `json:"total_tokens"`
}
type MessageMeta struct {
Source string `json:"source,omitempty"`
TokenUsed *bool `json:"tokenUsed,omitempty"`
Usage *TokenUsage `json:"usage,omitempty"`
}
type ChatMessage struct {
ID string `json:"id"`
Text string `json:"text"`
@@ -52,6 +64,10 @@ type ChatMessage struct {
Name string `json:"name"`
Delta string `json:"delta"`
Detail *ai_chat_service_proto.ChatCompletionStreamResponse `json:"detail"`
Usage *TokenUsage `json:"usage,omitempty"`
Source string `json:"source,omitempty"`
TokenUsed *bool `json:"tokenUsed,omitempty"`
Meta *MessageMeta `json:"meta,omitempty"`
TokenCount int `json:"tokenCount"`
ParentMessageId string `json:"parentMessageId"`
}
@@ -153,6 +169,23 @@ func (chat *ChatService) ChatProcess(ctx *gin.Context) {
}
result.Detail = rsp
}
if usage := toTokenUsage(rsp.GetUsage()); usage != nil {
tokenUsed := usage.TotalTokens > 0
result.Usage = usage
result.TokenUsed = &tokenUsed
if result.Meta == nil {
result.Meta = &MessageMeta{}
}
result.Meta.Usage = usage
result.Meta.TokenUsed = &tokenUsed
}
if rsp.GetSource() != "" {
result.Source = rsp.GetSource()
if result.Meta == nil {
result.Meta = &MessageMeta{}
}
result.Meta.Source = rsp.GetSource()
}
bts, err := json.Marshal(result)
if err != nil {
@@ -180,6 +213,17 @@ func (chat *ChatService) ChatProcess(ctx *gin.Context) {
}
}
func toTokenUsage(usage *ai_chat_service_proto.Usage) *TokenUsage {
if usage == nil {
return nil
}
return &TokenUsage{
PromptTokens: usage.GetPromptTokens(),
CompletionTokens: usage.GetCompletionTokens(),
TotalTokens: usage.GetTotalTokens(),
}
}
func (chat *ChatService) topP() float32 {
model := strings.ToLower(chat.config.Chat.Model)
if strings.HasPrefix(model, "kimi-") || strings.HasPrefix(model, "moonshot-") {

View File

@@ -1,7 +1,7 @@
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.28.1
// protoc v4.22.0
// protoc-gen-go v1.36.6
// protoc v3.6.1
// source: proto/chat.proto
package proto
@@ -11,6 +11,7 @@ import (
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
unsafe "unsafe"
)
const (
@@ -21,25 +22,22 @@ const (
)
type ChatCompletionRequest struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
state protoimpl.MessageState `protogen:"open.v1"`
Message string `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"`
Id string `protobuf:"bytes,2,opt,name=id,proto3" json:"id,omitempty"`
Pid string `protobuf:"bytes,3,opt,name=pid,json=p_id,proto3" json:"pid,omitempty"`
EnableContext bool `protobuf:"varint,4,opt,name=enableContext,json=enable_context,proto3" json:"enableContext,omitempty"`
ChatParam *ChatParam `protobuf:"bytes,5,opt,name=chatParam,json=chat_param,proto3" json:"chatParam,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatCompletionRequest) Reset() {
*x = ChatCompletionRequest{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatCompletionRequest) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -49,7 +47,7 @@ func (*ChatCompletionRequest) ProtoMessage() {}
func (x *ChatCompletionRequest) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[0]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -100,10 +98,7 @@ func (x *ChatCompletionRequest) GetChatParam() *ChatParam {
}
type ChatParam struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
state protoimpl.MessageState `protogen:"open.v1"`
Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
MaxTokens int32 `protobuf:"varint,2,opt,name=maxTokens,json=max_tokens,proto3" json:"maxTokens,omitempty"`
Temperature float32 `protobuf:"fixed32,3,opt,name=temperature,proto3" json:"temperature,omitempty"`
@@ -114,16 +109,16 @@ type ChatParam struct {
MinResponseTokens int32 `protobuf:"varint,8,opt,name=minResponseTokens,json=min_response_tokens,proto3" json:"minResponseTokens,omitempty"`
ContextTTL int32 `protobuf:"varint,9,opt,name=contextTTL,json=context_ttl,proto3" json:"contextTTL,omitempty"`
ContextLen int32 `protobuf:"varint,10,opt,name=contextLen,json=context_len,proto3" json:"contextLen,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatParam) Reset() {
*x = ChatParam{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatParam) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -133,7 +128,7 @@ func (*ChatParam) ProtoMessage() {}
func (x *ChatParam) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[1]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -220,26 +215,24 @@ func (x *ChatParam) GetContextLen() int32 {
// 服务响应消息,非流式响应
type ChatCompletionResponse struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
state protoimpl.MessageState `protogen:"open.v1"`
Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"`
Object string `protobuf:"bytes,2,opt,name=object,proto3" json:"object,omitempty"`
Created int64 `protobuf:"varint,3,opt,name=created,proto3" json:"created,omitempty"`
Model string `protobuf:"bytes,4,opt,name=model,proto3" json:"model,omitempty"`
Choices []*ChatCompletionChoice `protobuf:"bytes,5,rep,name=choices,proto3" json:"choices,omitempty"`
Usage *Usage `protobuf:"bytes,6,opt,name=usage,proto3" json:"usage,omitempty"`
Source string `protobuf:"bytes,7,opt,name=source,proto3" json:"source,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatCompletionResponse) Reset() {
*x = ChatCompletionResponse{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatCompletionResponse) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -249,7 +242,7 @@ func (*ChatCompletionResponse) ProtoMessage() {}
func (x *ChatCompletionResponse) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[2]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -306,24 +299,28 @@ func (x *ChatCompletionResponse) GetUsage() *Usage {
return nil
}
type ChatCompletionChoice struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
func (x *ChatCompletionResponse) GetSource() string {
if x != nil {
return x.Source
}
return ""
}
type ChatCompletionChoice struct {
state protoimpl.MessageState `protogen:"open.v1"`
Index int32 `protobuf:"varint,1,opt,name=index,proto3" json:"index,omitempty"`
Message *ChatCompletionMessage `protobuf:"bytes,2,opt,name=message,proto3" json:"message,omitempty"`
FinishReason string `protobuf:"bytes,3,opt,name=finishReason,json=finish_reason,proto3" json:"finishReason,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatCompletionChoice) Reset() {
*x = ChatCompletionChoice{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatCompletionChoice) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -333,7 +330,7 @@ func (*ChatCompletionChoice) ProtoMessage() {}
func (x *ChatCompletionChoice) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[3]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -370,23 +367,20 @@ func (x *ChatCompletionChoice) GetFinishReason() string {
}
type ChatCompletionMessage struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
state protoimpl.MessageState `protogen:"open.v1"`
Role string `protobuf:"bytes,1,opt,name=role,proto3" json:"role,omitempty"`
Content string `protobuf:"bytes,2,opt,name=content,proto3" json:"content,omitempty"`
Name string `protobuf:"bytes,3,opt,name=name,proto3" json:"name,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatCompletionMessage) Reset() {
*x = ChatCompletionMessage{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatCompletionMessage) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -396,7 +390,7 @@ func (*ChatCompletionMessage) ProtoMessage() {}
func (x *ChatCompletionMessage) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[4]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -433,23 +427,20 @@ func (x *ChatCompletionMessage) GetName() string {
}
type Usage struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
state protoimpl.MessageState `protogen:"open.v1"`
PromptTokens int32 `protobuf:"varint,1,opt,name=promptTokens,json=prompt_tokens,proto3" json:"promptTokens,omitempty"`
CompletionTokens int32 `protobuf:"varint,2,opt,name=completionTokens,json=completion_tokens,proto3" json:"completionTokens,omitempty"`
TotalTokens int32 `protobuf:"varint,3,opt,name=totalTokens,json=total_tokens,proto3" json:"totalTokens,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *Usage) Reset() {
*x = Usage{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[5]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *Usage) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -459,7 +450,7 @@ func (*Usage) ProtoMessage() {}
func (x *Usage) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[5]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -497,25 +488,24 @@ func (x *Usage) GetTotalTokens() int32 {
// 服务响应消息,流式响应
type ChatCompletionStreamResponse struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
state protoimpl.MessageState `protogen:"open.v1"`
Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"`
Object string `protobuf:"bytes,2,opt,name=object,proto3" json:"object,omitempty"`
Created int64 `protobuf:"varint,3,opt,name=created,proto3" json:"created,omitempty"`
Model string `protobuf:"bytes,4,opt,name=model,proto3" json:"model,omitempty"`
Choices []*ChatCompletionStreamChoice `protobuf:"bytes,5,rep,name=choices,proto3" json:"choices,omitempty"`
Usage *Usage `protobuf:"bytes,6,opt,name=usage,proto3" json:"usage,omitempty"`
Source string `protobuf:"bytes,7,opt,name=source,proto3" json:"source,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatCompletionStreamResponse) Reset() {
*x = ChatCompletionStreamResponse{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[6]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatCompletionStreamResponse) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -525,7 +515,7 @@ func (*ChatCompletionStreamResponse) ProtoMessage() {}
func (x *ChatCompletionStreamResponse) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[6]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -575,24 +565,35 @@ func (x *ChatCompletionStreamResponse) GetChoices() []*ChatCompletionStreamChoic
return nil
}
type ChatCompletionStreamChoice struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
func (x *ChatCompletionStreamResponse) GetUsage() *Usage {
if x != nil {
return x.Usage
}
return nil
}
func (x *ChatCompletionStreamResponse) GetSource() string {
if x != nil {
return x.Source
}
return ""
}
type ChatCompletionStreamChoice struct {
state protoimpl.MessageState `protogen:"open.v1"`
Index int32 `protobuf:"varint,1,opt,name=index,proto3" json:"index,omitempty"`
Delta *ChatCompletionStreamChoiceDelta `protobuf:"bytes,2,opt,name=delta,proto3" json:"delta,omitempty"`
FinishReason string `protobuf:"bytes,3,opt,name=finishReason,json=finish_reason,proto3" json:"finishReason,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatCompletionStreamChoice) Reset() {
*x = ChatCompletionStreamChoice{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[7]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatCompletionStreamChoice) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -602,7 +603,7 @@ func (*ChatCompletionStreamChoice) ProtoMessage() {}
func (x *ChatCompletionStreamChoice) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[7]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -639,22 +640,19 @@ func (x *ChatCompletionStreamChoice) GetFinishReason() string {
}
type ChatCompletionStreamChoiceDelta struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
state protoimpl.MessageState `protogen:"open.v1"`
Content string `protobuf:"bytes,1,opt,name=content,proto3" json:"content,omitempty"`
Role string `protobuf:"bytes,2,opt,name=role,proto3" json:"role,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatCompletionStreamChoiceDelta) Reset() {
*x = ChatCompletionStreamChoiceDelta{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[8]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatCompletionStreamChoiceDelta) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -664,7 +662,7 @@ func (*ChatCompletionStreamChoiceDelta) ProtoMessage() {}
func (x *ChatCompletionStreamChoiceDelta) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[8]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -695,147 +693,84 @@ func (x *ChatCompletionStreamChoiceDelta) GetRole() string {
var File_proto_chat_proto protoreflect.FileDescriptor
var file_proto_chat_proto_rawDesc = []byte{
0x0a, 0x10, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x63, 0x68, 0x61, 0x74, 0x2e, 0x70, 0x72, 0x6f,
0x74, 0x6f, 0x12, 0x1a, 0x61, 0x69, 0x5f, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76,
0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x22, 0xc1,
0x01, 0x0a, 0x15, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f,
0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73,
0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61,
0x67, 0x65, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02,
0x69, 0x64, 0x12, 0x11, 0x0a, 0x03, 0x70, 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52,
0x04, 0x70, 0x5f, 0x69, 0x64, 0x12, 0x25, 0x0a, 0x0d, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x43,
0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x65, 0x6e,
0x61, 0x62, 0x6c, 0x65, 0x5f, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x12, 0x44, 0x0a, 0x09,
0x63, 0x68, 0x61, 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32,
0x25, 0x2e, 0x61, 0x69, 0x5f, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63,
0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61,
0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x52, 0x0a, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x70, 0x61, 0x72,
0x61, 0x6d, 0x22, 0xdc, 0x02, 0x0a, 0x09, 0x43, 0x68, 0x61, 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d,
0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52,
0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x1d, 0x0a, 0x09, 0x6d, 0x61, 0x78, 0x54, 0x6f, 0x6b,
0x65, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x6d, 0x61, 0x78, 0x5f, 0x74,
0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x74, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61,
0x74, 0x75, 0x72, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x74, 0x65, 0x6d, 0x70,
0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x12, 0x13, 0x0a, 0x04, 0x74, 0x6f, 0x70, 0x50, 0x18,
0x04, 0x20, 0x01, 0x28, 0x02, 0x52, 0x05, 0x74, 0x6f, 0x70, 0x5f, 0x70, 0x12, 0x29, 0x0a, 0x0f,
0x70, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18,
0x05, 0x20, 0x01, 0x28, 0x02, 0x52, 0x10, 0x70, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x5f,
0x70, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x2b, 0x0a, 0x10, 0x66, 0x72, 0x65, 0x71, 0x75,
0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x06, 0x20, 0x01, 0x28,
0x02, 0x52, 0x11, 0x66, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x5f, 0x70, 0x65, 0x6e,
0x61, 0x6c, 0x74, 0x79, 0x12, 0x19, 0x0a, 0x07, 0x62, 0x6f, 0x74, 0x44, 0x65, 0x73, 0x63, 0x18,
0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x62, 0x6f, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x12,
0x2e, 0x0a, 0x11, 0x6d, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x54, 0x6f,
0x6b, 0x65, 0x6e, 0x73, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, 0x13, 0x6d, 0x69, 0x6e, 0x5f,
0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x12,
0x1f, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x54, 0x4c, 0x18, 0x09, 0x20,
0x01, 0x28, 0x05, 0x52, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x74, 0x74, 0x6c,
0x12, 0x1f, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x4c, 0x65, 0x6e, 0x18, 0x0a,
0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x6c, 0x65,
0x6e, 0x22, 0xf5, 0x01, 0x0a, 0x16, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65,
0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x0e, 0x0a, 0x02,
0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x16, 0x0a, 0x06,
0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6f, 0x62,
0x6a, 0x65, 0x63, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x18,
0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x12, 0x14,
0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d,
0x6f, 0x64, 0x65, 0x6c, 0x12, 0x4a, 0x0a, 0x07, 0x63, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x73, 0x18,
0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x30, 0x2e, 0x61, 0x69, 0x5f, 0x63, 0x68, 0x61, 0x74, 0x5f,
0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x2e, 0x63,
0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f,
0x6e, 0x43, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x52, 0x07, 0x63, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x73,
0x12, 0x37, 0x0a, 0x05, 0x75, 0x73, 0x61, 0x67, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32,
0x21, 0x2e, 0x61, 0x69, 0x5f, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63,
0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x55, 0x73, 0x61,
0x67, 0x65, 0x52, 0x05, 0x75, 0x73, 0x61, 0x67, 0x65, 0x22, 0x9e, 0x01, 0x0a, 0x14, 0x43, 0x68,
0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x43, 0x68, 0x6f, 0x69,
0x63, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x01, 0x20, 0x01, 0x28,
0x05, 0x52, 0x05, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x12, 0x4b, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73,
0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x31, 0x2e, 0x61, 0x69, 0x5f, 0x63,
0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69,
0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c,
0x65, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x07, 0x6d, 0x65,
0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x23, 0x0a, 0x0c, 0x66, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x52,
0x65, 0x61, 0x73, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x66, 0x69, 0x6e,
0x69, 0x73, 0x68, 0x5f, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x22, 0x59, 0x0a, 0x15, 0x43, 0x68,
0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x65, 0x73, 0x73,
0x61, 0x67, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x6f, 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28,
0x09, 0x52, 0x04, 0x72, 0x6f, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65,
0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
0x74, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52,
0x04, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x7c, 0x0a, 0x05, 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x23,
0x0a, 0x0c, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x01,
0x20, 0x01, 0x28, 0x05, 0x52, 0x0d, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x5f, 0x74, 0x6f, 0x6b,
0x65, 0x6e, 0x73, 0x12, 0x2b, 0x0a, 0x10, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f,
0x6e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x11, 0x63,
0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73,
0x12, 0x21, 0x0a, 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18,
0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x74, 0x6f, 0x6b,
0x65, 0x6e, 0x73, 0x22, 0xc8, 0x01, 0x0a, 0x1c, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70,
0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x52, 0x65, 0x73, 0x70,
0x6f, 0x6e, 0x73, 0x65, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09,
0x52, 0x02, 0x69, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x18, 0x02,
0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x12, 0x18, 0x0a, 0x07,
0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x63,
0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18,
0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x50, 0x0a, 0x07,
0x63, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x36, 0x2e,
0x61, 0x69, 0x5f, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e,
0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61, 0x74, 0x43,
0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x43,
0x68, 0x6f, 0x69, 0x63, 0x65, 0x52, 0x07, 0x63, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x73, 0x22, 0xaa,
0x01, 0x0a, 0x1a, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f,
0x6e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x43, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x12, 0x14, 0x0a,
0x05, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x69, 0x6e,
0x64, 0x65, 0x78, 0x12, 0x51, 0x0a, 0x05, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x18, 0x02, 0x20, 0x01,
0x28, 0x0b, 0x32, 0x3b, 0x2e, 0x61, 0x69, 0x5f, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72,
0x76, 0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2e,
0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74,
0x72, 0x65, 0x61, 0x6d, 0x43, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x52,
0x05, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x12, 0x23, 0x0a, 0x0c, 0x66, 0x69, 0x6e, 0x69, 0x73, 0x68,
0x52, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x66, 0x69,
0x6e, 0x69, 0x73, 0x68, 0x5f, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x22, 0x4f, 0x0a, 0x1f, 0x43,
0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x72,
0x65, 0x61, 0x6d, 0x43, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x12, 0x18,
0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52,
0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x6f, 0x6c, 0x65,
0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x72, 0x6f, 0x6c, 0x65, 0x32, 0x87, 0x02, 0x0a,
0x04, 0x43, 0x68, 0x61, 0x74, 0x12, 0x77, 0x0a, 0x0e, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d,
0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x31, 0x2e, 0x61, 0x69, 0x5f, 0x63, 0x68, 0x61,
0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65,
0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74,
0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x32, 0x2e, 0x61, 0x69, 0x5f,
0x63, 0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f,
0x69, 0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70,
0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x85,
0x01, 0x0a, 0x14, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f,
0x6e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x31, 0x2e, 0x61, 0x69, 0x5f, 0x63, 0x68, 0x61,
0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65,
0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74,
0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x38, 0x2e, 0x61, 0x69, 0x5f,
0x63, 0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f,
0x69, 0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70,
0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x52, 0x65, 0x73, 0x70,
0x6f, 0x6e, 0x73, 0x65, 0x30, 0x01, 0x42, 0x17, 0x5a, 0x15, 0x61, 0x69, 0x2d, 0x63, 0x68, 0x61,
0x74, 0x2d, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62,
0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
}
const file_proto_chat_proto_rawDesc = "" +
"\n" +
"\x10proto/chat.proto\x12\x1aai_chat_service.zvoice.com\"\xc1\x01\n" +
"\x15ChatCompletionRequest\x12\x18\n" +
"\amessage\x18\x01 \x01(\tR\amessage\x12\x0e\n" +
"\x02id\x18\x02 \x01(\tR\x02id\x12\x11\n" +
"\x03pid\x18\x03 \x01(\tR\x04p_id\x12%\n" +
"\renableContext\x18\x04 \x01(\bR\x0eenable_context\x12D\n" +
"\tchatParam\x18\x05 \x01(\v2%.ai_chat_service.zvoice.com.ChatParamR\n" +
"chat_param\"\xdc\x02\n" +
"\tChatParam\x12\x14\n" +
"\x05model\x18\x01 \x01(\tR\x05model\x12\x1d\n" +
"\tmaxTokens\x18\x02 \x01(\x05R\n" +
"max_tokens\x12 \n" +
"\vtemperature\x18\x03 \x01(\x02R\vtemperature\x12\x13\n" +
"\x04topP\x18\x04 \x01(\x02R\x05top_p\x12)\n" +
"\x0fpresencePenalty\x18\x05 \x01(\x02R\x10presence_penalty\x12+\n" +
"\x10frequencyPenalty\x18\x06 \x01(\x02R\x11frequency_penalty\x12\x19\n" +
"\abotDesc\x18\a \x01(\tR\bbot_desc\x12.\n" +
"\x11minResponseTokens\x18\b \x01(\x05R\x13min_response_tokens\x12\x1f\n" +
"\n" +
"contextTTL\x18\t \x01(\x05R\vcontext_ttl\x12\x1f\n" +
"\n" +
"contextLen\x18\n" +
" \x01(\x05R\vcontext_len\"\x8d\x02\n" +
"\x16ChatCompletionResponse\x12\x0e\n" +
"\x02id\x18\x01 \x01(\tR\x02id\x12\x16\n" +
"\x06object\x18\x02 \x01(\tR\x06object\x12\x18\n" +
"\acreated\x18\x03 \x01(\x03R\acreated\x12\x14\n" +
"\x05model\x18\x04 \x01(\tR\x05model\x12J\n" +
"\achoices\x18\x05 \x03(\v20.ai_chat_service.zvoice.com.ChatCompletionChoiceR\achoices\x127\n" +
"\x05usage\x18\x06 \x01(\v2!.ai_chat_service.zvoice.com.UsageR\x05usage\x12\x16\n" +
"\x06source\x18\a \x01(\tR\x06source\"\x9e\x01\n" +
"\x14ChatCompletionChoice\x12\x14\n" +
"\x05index\x18\x01 \x01(\x05R\x05index\x12K\n" +
"\amessage\x18\x02 \x01(\v21.ai_chat_service.zvoice.com.ChatCompletionMessageR\amessage\x12#\n" +
"\ffinishReason\x18\x03 \x01(\tR\rfinish_reason\"Y\n" +
"\x15ChatCompletionMessage\x12\x12\n" +
"\x04role\x18\x01 \x01(\tR\x04role\x12\x18\n" +
"\acontent\x18\x02 \x01(\tR\acontent\x12\x12\n" +
"\x04name\x18\x03 \x01(\tR\x04name\"|\n" +
"\x05Usage\x12#\n" +
"\fpromptTokens\x18\x01 \x01(\x05R\rprompt_tokens\x12+\n" +
"\x10completionTokens\x18\x02 \x01(\x05R\x11completion_tokens\x12!\n" +
"\vtotalTokens\x18\x03 \x01(\x05R\ftotal_tokens\"\x99\x02\n" +
"\x1cChatCompletionStreamResponse\x12\x0e\n" +
"\x02id\x18\x01 \x01(\tR\x02id\x12\x16\n" +
"\x06object\x18\x02 \x01(\tR\x06object\x12\x18\n" +
"\acreated\x18\x03 \x01(\x03R\acreated\x12\x14\n" +
"\x05model\x18\x04 \x01(\tR\x05model\x12P\n" +
"\achoices\x18\x05 \x03(\v26.ai_chat_service.zvoice.com.ChatCompletionStreamChoiceR\achoices\x127\n" +
"\x05usage\x18\x06 \x01(\v2!.ai_chat_service.zvoice.com.UsageR\x05usage\x12\x16\n" +
"\x06source\x18\a \x01(\tR\x06source\"\xaa\x01\n" +
"\x1aChatCompletionStreamChoice\x12\x14\n" +
"\x05index\x18\x01 \x01(\x05R\x05index\x12Q\n" +
"\x05delta\x18\x02 \x01(\v2;.ai_chat_service.zvoice.com.ChatCompletionStreamChoiceDeltaR\x05delta\x12#\n" +
"\ffinishReason\x18\x03 \x01(\tR\rfinish_reason\"O\n" +
"\x1fChatCompletionStreamChoiceDelta\x12\x18\n" +
"\acontent\x18\x01 \x01(\tR\acontent\x12\x12\n" +
"\x04role\x18\x02 \x01(\tR\x04role2\x87\x02\n" +
"\x04Chat\x12w\n" +
"\x0eChatCompletion\x121.ai_chat_service.zvoice.com.ChatCompletionRequest\x1a2.ai_chat_service.zvoice.com.ChatCompletionResponse\x12\x85\x01\n" +
"\x14ChatCompletionStream\x121.ai_chat_service.zvoice.com.ChatCompletionRequest\x1a8.ai_chat_service.zvoice.com.ChatCompletionStreamResponse0\x01B\x17Z\x15ai-chat-service/protob\x06proto3"
var (
file_proto_chat_proto_rawDescOnce sync.Once
file_proto_chat_proto_rawDescData = file_proto_chat_proto_rawDesc
file_proto_chat_proto_rawDescData []byte
)
func file_proto_chat_proto_rawDescGZIP() []byte {
file_proto_chat_proto_rawDescOnce.Do(func() {
file_proto_chat_proto_rawDescData = protoimpl.X.CompressGZIP(file_proto_chat_proto_rawDescData)
file_proto_chat_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_proto_chat_proto_rawDesc), len(file_proto_chat_proto_rawDesc)))
})
return file_proto_chat_proto_rawDescData
}
var file_proto_chat_proto_msgTypes = make([]protoimpl.MessageInfo, 9)
var file_proto_chat_proto_goTypes = []interface{}{
var file_proto_chat_proto_goTypes = []any{
(*ChatCompletionRequest)(nil), // 0: ai_chat_service.zvoice.com.ChatCompletionRequest
(*ChatParam)(nil), // 1: ai_chat_service.zvoice.com.ChatParam
(*ChatCompletionResponse)(nil), // 2: ai_chat_service.zvoice.com.ChatCompletionResponse
@@ -852,16 +787,17 @@ var file_proto_chat_proto_depIdxs = []int32{
5, // 2: ai_chat_service.zvoice.com.ChatCompletionResponse.usage:type_name -> ai_chat_service.zvoice.com.Usage
4, // 3: ai_chat_service.zvoice.com.ChatCompletionChoice.message:type_name -> ai_chat_service.zvoice.com.ChatCompletionMessage
7, // 4: ai_chat_service.zvoice.com.ChatCompletionStreamResponse.choices:type_name -> ai_chat_service.zvoice.com.ChatCompletionStreamChoice
8, // 5: ai_chat_service.zvoice.com.ChatCompletionStreamChoice.delta:type_name -> ai_chat_service.zvoice.com.ChatCompletionStreamChoiceDelta
0, // 6: ai_chat_service.zvoice.com.Chat.ChatCompletion:input_type -> ai_chat_service.zvoice.com.ChatCompletionRequest
0, // 7: ai_chat_service.zvoice.com.Chat.ChatCompletionStream:input_type -> ai_chat_service.zvoice.com.ChatCompletionRequest
2, // 8: ai_chat_service.zvoice.com.Chat.ChatCompletion:output_type -> ai_chat_service.zvoice.com.ChatCompletionResponse
6, // 9: ai_chat_service.zvoice.com.Chat.ChatCompletionStream:output_type -> ai_chat_service.zvoice.com.ChatCompletionStreamResponse
8, // [8:10] is the sub-list for method output_type
6, // [6:8] is the sub-list for method input_type
6, // [6:6] is the sub-list for extension type_name
6, // [6:6] is the sub-list for extension extendee
0, // [0:6] is the sub-list for field type_name
5, // 5: ai_chat_service.zvoice.com.ChatCompletionStreamResponse.usage:type_name -> ai_chat_service.zvoice.com.Usage
8, // 6: ai_chat_service.zvoice.com.ChatCompletionStreamChoice.delta:type_name -> ai_chat_service.zvoice.com.ChatCompletionStreamChoiceDelta
0, // 7: ai_chat_service.zvoice.com.Chat.ChatCompletion:input_type -> ai_chat_service.zvoice.com.ChatCompletionRequest
0, // 8: ai_chat_service.zvoice.com.Chat.ChatCompletionStream:input_type -> ai_chat_service.zvoice.com.ChatCompletionRequest
2, // 9: ai_chat_service.zvoice.com.Chat.ChatCompletion:output_type -> ai_chat_service.zvoice.com.ChatCompletionResponse
6, // 10: ai_chat_service.zvoice.com.Chat.ChatCompletionStream:output_type -> ai_chat_service.zvoice.com.ChatCompletionStreamResponse
9, // [9:11] is the sub-list for method output_type
7, // [7:9] is the sub-list for method input_type
7, // [7:7] is the sub-list for extension type_name
7, // [7:7] is the sub-list for extension extendee
0, // [0:7] is the sub-list for field type_name
}
func init() { file_proto_chat_proto_init() }
@@ -869,121 +805,11 @@ func file_proto_chat_proto_init() {
if File_proto_chat_proto != nil {
return
}
if !protoimpl.UnsafeEnabled {
file_proto_chat_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatCompletionRequest); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatParam); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatCompletionResponse); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatCompletionChoice); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatCompletionMessage); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*Usage); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatCompletionStreamResponse); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatCompletionStreamChoice); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatCompletionStreamChoiceDelta); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: file_proto_chat_proto_rawDesc,
RawDescriptor: unsafe.Slice(unsafe.StringData(file_proto_chat_proto_rawDesc), len(file_proto_chat_proto_rawDesc)),
NumEnums: 0,
NumMessages: 9,
NumExtensions: 0,
@@ -994,7 +820,6 @@ func file_proto_chat_proto_init() {
MessageInfos: file_proto_chat_proto_msgTypes,
}.Build()
File_proto_chat_proto = out.File
file_proto_chat_proto_rawDesc = nil
file_proto_chat_proto_goTypes = nil
file_proto_chat_proto_depIdxs = nil
}

View File

@@ -31,6 +31,7 @@ message ChatCompletionResponse {
string model = 4 [json_name = "model"];
repeated ChatCompletionChoice choices = 5 [json_name = "choices"];
Usage usage = 6[json_name = "usage"];
string source = 7 [json_name = "source"];
}
message ChatCompletionChoice {
int32 index = 1[json_name = "index"];
@@ -55,6 +56,8 @@ message ChatCompletionStreamResponse {
int64 created = 3 [json_name = "created"];
string model = 4 [json_name = "model"];
repeated ChatCompletionStreamChoice choices = 5 [json_name = "choices"];
Usage usage = 6[json_name = "usage"];
string source = 7 [json_name = "source"];
}
message ChatCompletionStreamChoice {
int32 index = 1[json_name = "index"];

View File

@@ -1,7 +1,7 @@
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.2.0
// - protoc v4.22.0
// - protoc-gen-go-grpc v1.5.1
// - protoc v3.6.1
// source: proto/chat.proto
package proto
@@ -15,15 +15,20 @@ import (
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.32.0 or later.
const _ = grpc.SupportPackageIsVersion7
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9
const (
Chat_ChatCompletion_FullMethodName = "/ai_chat_service.zvoice.com.Chat/ChatCompletion"
Chat_ChatCompletionStream_FullMethodName = "/ai_chat_service.zvoice.com.Chat/ChatCompletionStream"
)
// ChatClient is the client API for Chat service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type ChatClient interface {
ChatCompletion(ctx context.Context, in *ChatCompletionRequest, opts ...grpc.CallOption) (*ChatCompletionResponse, error)
ChatCompletionStream(ctx context.Context, in *ChatCompletionRequest, opts ...grpc.CallOption) (Chat_ChatCompletionStreamClient, error)
ChatCompletionStream(ctx context.Context, in *ChatCompletionRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[ChatCompletionStreamResponse], error)
}
type chatClient struct {
@@ -35,20 +40,22 @@ func NewChatClient(cc grpc.ClientConnInterface) ChatClient {
}
func (c *chatClient) ChatCompletion(ctx context.Context, in *ChatCompletionRequest, opts ...grpc.CallOption) (*ChatCompletionResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(ChatCompletionResponse)
err := c.cc.Invoke(ctx, "/ai_chat_service.zvoice.com.Chat/ChatCompletion", in, out, opts...)
err := c.cc.Invoke(ctx, Chat_ChatCompletion_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *chatClient) ChatCompletionStream(ctx context.Context, in *ChatCompletionRequest, opts ...grpc.CallOption) (Chat_ChatCompletionStreamClient, error) {
stream, err := c.cc.NewStream(ctx, &Chat_ServiceDesc.Streams[0], "/ai_chat_service.zvoice.com.Chat/ChatCompletionStream", opts...)
func (c *chatClient) ChatCompletionStream(ctx context.Context, in *ChatCompletionRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[ChatCompletionStreamResponse], error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
stream, err := c.cc.NewStream(ctx, &Chat_ServiceDesc.Streams[0], Chat_ChatCompletionStream_FullMethodName, cOpts...)
if err != nil {
return nil, err
}
x := &chatChatCompletionStreamClient{stream}
x := &grpc.GenericClientStream[ChatCompletionRequest, ChatCompletionStreamResponse]{ClientStream: stream}
if err := x.ClientStream.SendMsg(in); err != nil {
return nil, err
}
@@ -58,43 +65,33 @@ func (c *chatClient) ChatCompletionStream(ctx context.Context, in *ChatCompletio
return x, nil
}
type Chat_ChatCompletionStreamClient interface {
Recv() (*ChatCompletionStreamResponse, error)
grpc.ClientStream
}
type chatChatCompletionStreamClient struct {
grpc.ClientStream
}
func (x *chatChatCompletionStreamClient) Recv() (*ChatCompletionStreamResponse, error) {
m := new(ChatCompletionStreamResponse)
if err := x.ClientStream.RecvMsg(m); err != nil {
return nil, err
}
return m, nil
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type Chat_ChatCompletionStreamClient = grpc.ServerStreamingClient[ChatCompletionStreamResponse]
// ChatServer is the server API for Chat service.
// All implementations must embed UnimplementedChatServer
// for forward compatibility
// for forward compatibility.
type ChatServer interface {
ChatCompletion(context.Context, *ChatCompletionRequest) (*ChatCompletionResponse, error)
ChatCompletionStream(*ChatCompletionRequest, Chat_ChatCompletionStreamServer) error
ChatCompletionStream(*ChatCompletionRequest, grpc.ServerStreamingServer[ChatCompletionStreamResponse]) error
mustEmbedUnimplementedChatServer()
}
// UnimplementedChatServer must be embedded to have forward compatible implementations.
type UnimplementedChatServer struct {
}
// UnimplementedChatServer must be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedChatServer struct{}
func (UnimplementedChatServer) ChatCompletion(context.Context, *ChatCompletionRequest) (*ChatCompletionResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method ChatCompletion not implemented")
}
func (UnimplementedChatServer) ChatCompletionStream(*ChatCompletionRequest, Chat_ChatCompletionStreamServer) error {
func (UnimplementedChatServer) ChatCompletionStream(*ChatCompletionRequest, grpc.ServerStreamingServer[ChatCompletionStreamResponse]) error {
return status.Errorf(codes.Unimplemented, "method ChatCompletionStream not implemented")
}
func (UnimplementedChatServer) mustEmbedUnimplementedChatServer() {}
func (UnimplementedChatServer) testEmbeddedByValue() {}
// UnsafeChatServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to ChatServer will
@@ -104,6 +101,13 @@ type UnsafeChatServer interface {
}
func RegisterChatServer(s grpc.ServiceRegistrar, srv ChatServer) {
// If the following call pancis, it indicates UnimplementedChatServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&Chat_ServiceDesc, srv)
}
@@ -117,7 +121,7 @@ func _Chat_ChatCompletion_Handler(srv interface{}, ctx context.Context, dec func
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/ai_chat_service.zvoice.com.Chat/ChatCompletion",
FullMethod: Chat_ChatCompletion_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(ChatServer).ChatCompletion(ctx, req.(*ChatCompletionRequest))
@@ -130,21 +134,11 @@ func _Chat_ChatCompletionStream_Handler(srv interface{}, stream grpc.ServerStrea
if err := stream.RecvMsg(m); err != nil {
return err
}
return srv.(ChatServer).ChatCompletionStream(m, &chatChatCompletionStreamServer{stream})
return srv.(ChatServer).ChatCompletionStream(m, &grpc.GenericServerStream[ChatCompletionRequest, ChatCompletionStreamResponse]{ServerStream: stream})
}
type Chat_ChatCompletionStreamServer interface {
Send(*ChatCompletionStreamResponse) error
grpc.ServerStream
}
type chatChatCompletionStreamServer struct {
grpc.ServerStream
}
func (x *chatChatCompletionStreamServer) Send(m *ChatCompletionStreamResponse) error {
return x.ServerStream.SendMsg(m)
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type Chat_ChatCompletionStreamServer = grpc.ServerStreamingServer[ChatCompletionStreamResponse]
// Chat_ServiceDesc is the grpc.ServiceDesc for Chat service.
// It's only intended for direct use with grpc.RegisterService,

View File

@@ -149,6 +149,30 @@ func (a *app) buildChatCompletionResponse(msg string) *proto.ChatCompletionRespo
}
}
func (a *app) countMessageTokens(role, content string) (int, error) {
message := openai.ChatCompletionMessage{
Role: role,
Content: content,
}
return tokenizer.GetTokens(&message, a.openaiConf.Model)
}
func (a *app) buildUsage(promptTokens int, answer string) (*proto.Usage, error) {
completionTokens := 0
if answer != "" {
tokens, err := a.countMessageTokens(openai.ChatMessageRoleAssistant, answer)
if err != nil {
return nil, err
}
completionTokens = tokens
}
return &proto.Usage{
PromptTokens: int32(promptTokens),
CompletionTokens: int32(completionTokens),
TotalTokens: int32(promptTokens + completionTokens),
}, nil
}
func (a *app) buildChatCompletionStreamResponse(id, delta, finishReason string) *proto.ChatCompletionStreamResponse {
return &proto.ChatCompletionStreamResponse{
Id: id,

View File

@@ -8,7 +8,6 @@ import (
"ai-chat-service/proto"
"ai-chat-service/services/embedding"
"ai-chat-service/services/faiss"
"ai-chat-service/services/tokenizer"
"context"
"encoding/json"
"io"
@@ -19,6 +18,11 @@ import (
"github.com/sashabaranov/go-openai"
)
const (
replySourceSemanticMatch = "semantic_match"
replySourceLLM = "llm"
)
type chatService struct {
proto.UnimplementedChatServer
config *config.Config
@@ -50,7 +54,19 @@ func (s *chatService) ChatCompletion(ctx context.Context, in *proto.ChatCompleti
}
if !ok {
s.busMetrics.SensitiveQuestionsTotalCounter.Inc()
return app.buildChatCompletionResponse(msg), nil
res := app.buildChatCompletionResponse(msg)
promptTokens, tokenErr := app.countMessageTokens(openai.ChatMessageRoleUser, in.Message)
if tokenErr != nil {
s.log.Error(tokenErr)
return res, nil
}
usage, tokenErr := app.buildUsage(promptTokens, msg)
if tokenErr != nil {
s.log.Error(tokenErr)
return res, nil
}
res.Usage = usage
return res, nil
}
keywords := app.keywords(in)
@@ -58,7 +74,7 @@ func (s *chatService) ChatCompletion(ctx context.Context, in *proto.ChatCompleti
s.busMetrics.KeywordsQuestionsTotalCounter.Inc()
}
req, _, _, _, err := app.buildChatCompletionRequest(in, false)
req, _, currTokens, _, err := app.buildChatCompletionRequest(in, false)
if err != nil {
s.busMetrics.ErrQuestionsTotalCounter.Inc()
return nil, err
@@ -66,7 +82,15 @@ func (s *chatService) ChatCompletion(ctx context.Context, in *proto.ChatCompleti
questionEmbedding, cachedRecord := s.searchCachedAnswer(ctx, in.Message)
if cachedRecord != nil {
return app.buildChatCompletionResponse(cachedRecord.Answer), nil
res := app.buildChatCompletionResponse(cachedRecord.Answer)
usage, tokenErr := app.buildUsage(currTokens, cachedRecord.Answer)
if tokenErr != nil {
s.log.Error(tokenErr)
} else {
res.Usage = usage
}
res.Source = replySourceSemanticMatch
return res, nil
}
client := app.getOpenaiClient()
@@ -88,8 +112,20 @@ func (s *chatService) ChatCompletion(ctx context.Context, in *proto.ChatCompleti
return nil, err
}
answer := ""
if len(resp.Choices) > 0 {
if err = s.persistQA(ctx, questionEmbedding, in.Message, resp.Choices[0].Message.Content); err != nil {
answer = resp.Choices[0].Message.Content
}
usage, tokenErr := app.buildUsage(currTokens, answer)
if tokenErr != nil {
s.log.Error(tokenErr)
} else {
res.Usage = usage
}
res.Source = replySourceLLM
if len(resp.Choices) > 0 {
if err = s.persistQA(ctx, questionEmbedding, in.Message, answer); err != nil {
s.log.Error(err)
} else {
s.busMetrics.QuestionsTotalCounter.Inc()
@@ -109,7 +145,8 @@ func (s *chatService) ChatCompletionStream(in *proto.ChatCompletionRequest, stre
if !ok {
s.busMetrics.SensitiveQuestionsTotalCounter.Inc()
resID := uuid.New().String()
if err = stream.Send(app.buildChatCompletionStreamResponse(resID, "", "")); err != nil {
start := app.buildChatCompletionStreamResponse(resID, "", "")
if err = stream.Send(start); err != nil {
return err
}
for _, res := range app.buildChatCompletionStreamResponseList(resID, msg) {
@@ -117,7 +154,19 @@ func (s *chatService) ChatCompletionStream(in *proto.ChatCompletionRequest, stre
return err
}
}
return stream.Send(app.buildChatCompletionStreamResponse(resID, "", "stop"))
final := app.buildChatCompletionStreamResponse(resID, "", "stop")
promptTokens, tokenErr := app.countMessageTokens(openai.ChatMessageRoleUser, in.Message)
if tokenErr != nil {
s.log.Error(tokenErr)
} else {
usage, tokenErr := app.buildUsage(promptTokens, msg)
if tokenErr != nil {
s.log.Error(tokenErr)
} else {
final.Usage = usage
}
}
return stream.Send(final)
}
keywords := app.keywords(in)
@@ -125,7 +174,7 @@ func (s *chatService) ChatCompletionStream(in *proto.ChatCompletionRequest, stre
s.busMetrics.KeywordsQuestionsTotalCounter.Inc()
}
req, _, _, _, err := app.buildChatCompletionRequest(in, true)
req, _, currTokens, _, err := app.buildChatCompletionRequest(in, true)
if err != nil {
s.busMetrics.ErrQuestionsTotalCounter.Inc()
return err
@@ -133,7 +182,9 @@ func (s *chatService) ChatCompletionStream(in *proto.ChatCompletionRequest, stre
questionEmbedding, cachedRecord := s.searchCachedAnswer(stream.Context(), in.Message)
if cachedRecord != nil {
if err = stream.Send(app.buildChatCompletionStreamResponse(cachedRecord.ID, "", "")); err != nil {
start := app.buildChatCompletionStreamResponse(cachedRecord.ID, "", "")
start.Source = replySourceSemanticMatch
if err = stream.Send(start); err != nil {
return err
}
for _, res := range app.buildChatCompletionStreamResponseList(cachedRecord.ID, cachedRecord.Answer) {
@@ -141,7 +192,15 @@ func (s *chatService) ChatCompletionStream(in *proto.ChatCompletionRequest, stre
return err
}
}
return stream.Send(app.buildChatCompletionStreamResponse(cachedRecord.ID, "", "stop"))
final := app.buildChatCompletionStreamResponse(cachedRecord.ID, "", "stop")
final.Source = replySourceSemanticMatch
usage, tokenErr := app.buildUsage(currTokens, cachedRecord.Answer)
if tokenErr != nil {
s.log.Error(tokenErr)
} else {
final.Usage = usage
}
return stream.Send(final)
}
client := app.getOpenaiClient()
@@ -154,6 +213,7 @@ func (s *chatService) ChatCompletionStream(in *proto.ChatCompletionRequest, stre
defer chatStream.Close()
completionContent := ""
responseID := ""
for {
resp, err := chatStream.Recv()
if err != nil && err != io.EOF {
@@ -164,6 +224,9 @@ func (s *chatService) ChatCompletionStream(in *proto.ChatCompletionRequest, stre
if err == io.EOF {
break
}
if resp.ID != "" {
responseID = resp.ID
}
completionContent += resp.Choices[0].Delta.Content
res := &proto.ChatCompletionStreamResponse{}
@@ -176,21 +239,24 @@ func (s *chatService) ChatCompletionStream(in *proto.ChatCompletionRequest, stre
s.busMetrics.ErrQuestionsTotalCounter.Inc()
return err
}
res.Source = replySourceLLM
if err = stream.Send(res); err != nil {
return err
}
}
model := s.config.Chat.Model
if in.ChatParam != nil && in.ChatParam.Model != "" {
model = in.ChatParam.Model
}
resultMessage := openai.ChatCompletionMessage{
Role: openai.ChatMessageRoleAssistant,
Content: completionContent,
}
if _, err = tokenizer.GetTokens(&resultMessage, model); err != nil {
usage, tokenErr := app.buildUsage(currTokens, completionContent)
if tokenErr != nil {
s.busMetrics.ErrQuestionsTotalCounter.Inc()
return tokenErr
}
if responseID == "" {
responseID = uuid.New().String()
}
final := app.buildChatCompletionStreamResponse(responseID, "", "stop")
final.Usage = usage
final.Source = replySourceLLM
if err = stream.Send(final); err != nil {
return err
}

View File

@@ -1,17 +1,17 @@
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.28.1
// protoc v4.22.0
// protoc-gen-go v1.36.6
// protoc v3.6.1
// source: proto/chat.proto
package proto
import (
reflect "reflect"
sync "sync"
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
unsafe "unsafe"
)
const (
@@ -22,25 +22,22 @@ const (
)
type ChatCompletionRequest struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
state protoimpl.MessageState `protogen:"open.v1"`
Message string `protobuf:"bytes,1,opt,name=message,proto3" json:"message,omitempty"`
Id string `protobuf:"bytes,2,opt,name=id,proto3" json:"id,omitempty"`
Pid string `protobuf:"bytes,3,opt,name=pid,json=p_id,proto3" json:"pid,omitempty"`
EnableContext bool `protobuf:"varint,4,opt,name=enableContext,json=enable_context,proto3" json:"enableContext,omitempty"`
ChatParam *ChatParam `protobuf:"bytes,5,opt,name=chatParam,json=chat_param,proto3" json:"chatParam,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatCompletionRequest) Reset() {
*x = ChatCompletionRequest{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatCompletionRequest) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -50,7 +47,7 @@ func (*ChatCompletionRequest) ProtoMessage() {}
func (x *ChatCompletionRequest) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[0]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -101,10 +98,7 @@ func (x *ChatCompletionRequest) GetChatParam() *ChatParam {
}
type ChatParam struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
state protoimpl.MessageState `protogen:"open.v1"`
Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
MaxTokens int32 `protobuf:"varint,2,opt,name=maxTokens,json=max_tokens,proto3" json:"maxTokens,omitempty"`
Temperature float32 `protobuf:"fixed32,3,opt,name=temperature,proto3" json:"temperature,omitempty"`
@@ -115,16 +109,16 @@ type ChatParam struct {
MinResponseTokens int32 `protobuf:"varint,8,opt,name=minResponseTokens,json=min_response_tokens,proto3" json:"minResponseTokens,omitempty"`
ContextTTL int32 `protobuf:"varint,9,opt,name=contextTTL,json=context_ttl,proto3" json:"contextTTL,omitempty"`
ContextLen int32 `protobuf:"varint,10,opt,name=contextLen,json=context_len,proto3" json:"contextLen,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatParam) Reset() {
*x = ChatParam{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatParam) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -134,7 +128,7 @@ func (*ChatParam) ProtoMessage() {}
func (x *ChatParam) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[1]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -221,26 +215,24 @@ func (x *ChatParam) GetContextLen() int32 {
// 服务响应消息,非流式响应
type ChatCompletionResponse struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
state protoimpl.MessageState `protogen:"open.v1"`
Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"`
Object string `protobuf:"bytes,2,opt,name=object,proto3" json:"object,omitempty"`
Created int64 `protobuf:"varint,3,opt,name=created,proto3" json:"created,omitempty"`
Model string `protobuf:"bytes,4,opt,name=model,proto3" json:"model,omitempty"`
Choices []*ChatCompletionChoice `protobuf:"bytes,5,rep,name=choices,proto3" json:"choices,omitempty"`
Usage *Usage `protobuf:"bytes,6,opt,name=usage,proto3" json:"usage,omitempty"`
Source string `protobuf:"bytes,7,opt,name=source,proto3" json:"source,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatCompletionResponse) Reset() {
*x = ChatCompletionResponse{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatCompletionResponse) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -250,7 +242,7 @@ func (*ChatCompletionResponse) ProtoMessage() {}
func (x *ChatCompletionResponse) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[2]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -307,24 +299,28 @@ func (x *ChatCompletionResponse) GetUsage() *Usage {
return nil
}
type ChatCompletionChoice struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
func (x *ChatCompletionResponse) GetSource() string {
if x != nil {
return x.Source
}
return ""
}
type ChatCompletionChoice struct {
state protoimpl.MessageState `protogen:"open.v1"`
Index int32 `protobuf:"varint,1,opt,name=index,proto3" json:"index,omitempty"`
Message *ChatCompletionMessage `protobuf:"bytes,2,opt,name=message,proto3" json:"message,omitempty"`
FinishReason string `protobuf:"bytes,3,opt,name=finishReason,json=finish_reason,proto3" json:"finishReason,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatCompletionChoice) Reset() {
*x = ChatCompletionChoice{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatCompletionChoice) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -334,7 +330,7 @@ func (*ChatCompletionChoice) ProtoMessage() {}
func (x *ChatCompletionChoice) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[3]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -371,23 +367,20 @@ func (x *ChatCompletionChoice) GetFinishReason() string {
}
type ChatCompletionMessage struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
state protoimpl.MessageState `protogen:"open.v1"`
Role string `protobuf:"bytes,1,opt,name=role,proto3" json:"role,omitempty"`
Content string `protobuf:"bytes,2,opt,name=content,proto3" json:"content,omitempty"`
Name string `protobuf:"bytes,3,opt,name=name,proto3" json:"name,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatCompletionMessage) Reset() {
*x = ChatCompletionMessage{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatCompletionMessage) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -397,7 +390,7 @@ func (*ChatCompletionMessage) ProtoMessage() {}
func (x *ChatCompletionMessage) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[4]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -434,23 +427,20 @@ func (x *ChatCompletionMessage) GetName() string {
}
type Usage struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
state protoimpl.MessageState `protogen:"open.v1"`
PromptTokens int32 `protobuf:"varint,1,opt,name=promptTokens,json=prompt_tokens,proto3" json:"promptTokens,omitempty"`
CompletionTokens int32 `protobuf:"varint,2,opt,name=completionTokens,json=completion_tokens,proto3" json:"completionTokens,omitempty"`
TotalTokens int32 `protobuf:"varint,3,opt,name=totalTokens,json=total_tokens,proto3" json:"totalTokens,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *Usage) Reset() {
*x = Usage{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[5]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *Usage) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -460,7 +450,7 @@ func (*Usage) ProtoMessage() {}
func (x *Usage) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[5]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -498,25 +488,24 @@ func (x *Usage) GetTotalTokens() int32 {
// 服务响应消息,流式响应
type ChatCompletionStreamResponse struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
state protoimpl.MessageState `protogen:"open.v1"`
Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"`
Object string `protobuf:"bytes,2,opt,name=object,proto3" json:"object,omitempty"`
Created int64 `protobuf:"varint,3,opt,name=created,proto3" json:"created,omitempty"`
Model string `protobuf:"bytes,4,opt,name=model,proto3" json:"model,omitempty"`
Choices []*ChatCompletionStreamChoice `protobuf:"bytes,5,rep,name=choices,proto3" json:"choices,omitempty"`
Usage *Usage `protobuf:"bytes,6,opt,name=usage,proto3" json:"usage,omitempty"`
Source string `protobuf:"bytes,7,opt,name=source,proto3" json:"source,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatCompletionStreamResponse) Reset() {
*x = ChatCompletionStreamResponse{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[6]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatCompletionStreamResponse) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -526,7 +515,7 @@ func (*ChatCompletionStreamResponse) ProtoMessage() {}
func (x *ChatCompletionStreamResponse) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[6]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -576,24 +565,35 @@ func (x *ChatCompletionStreamResponse) GetChoices() []*ChatCompletionStreamChoic
return nil
}
type ChatCompletionStreamChoice struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
func (x *ChatCompletionStreamResponse) GetUsage() *Usage {
if x != nil {
return x.Usage
}
return nil
}
func (x *ChatCompletionStreamResponse) GetSource() string {
if x != nil {
return x.Source
}
return ""
}
type ChatCompletionStreamChoice struct {
state protoimpl.MessageState `protogen:"open.v1"`
Index int32 `protobuf:"varint,1,opt,name=index,proto3" json:"index,omitempty"`
Delta *ChatCompletionStreamChoiceDelta `protobuf:"bytes,2,opt,name=delta,proto3" json:"delta,omitempty"`
FinishReason string `protobuf:"bytes,3,opt,name=finishReason,json=finish_reason,proto3" json:"finishReason,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatCompletionStreamChoice) Reset() {
*x = ChatCompletionStreamChoice{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[7]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatCompletionStreamChoice) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -603,7 +603,7 @@ func (*ChatCompletionStreamChoice) ProtoMessage() {}
func (x *ChatCompletionStreamChoice) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[7]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -640,22 +640,19 @@ func (x *ChatCompletionStreamChoice) GetFinishReason() string {
}
type ChatCompletionStreamChoiceDelta struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
unknownFields protoimpl.UnknownFields
state protoimpl.MessageState `protogen:"open.v1"`
Content string `protobuf:"bytes,1,opt,name=content,proto3" json:"content,omitempty"`
Role string `protobuf:"bytes,2,opt,name=role,proto3" json:"role,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ChatCompletionStreamChoiceDelta) Reset() {
*x = ChatCompletionStreamChoiceDelta{}
if protoimpl.UnsafeEnabled {
mi := &file_proto_chat_proto_msgTypes[8]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
}
func (x *ChatCompletionStreamChoiceDelta) String() string {
return protoimpl.X.MessageStringOf(x)
@@ -665,7 +662,7 @@ func (*ChatCompletionStreamChoiceDelta) ProtoMessage() {}
func (x *ChatCompletionStreamChoiceDelta) ProtoReflect() protoreflect.Message {
mi := &file_proto_chat_proto_msgTypes[8]
if protoimpl.UnsafeEnabled && x != nil {
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
@@ -696,147 +693,84 @@ func (x *ChatCompletionStreamChoiceDelta) GetRole() string {
var File_proto_chat_proto protoreflect.FileDescriptor
var file_proto_chat_proto_rawDesc = []byte{
0x0a, 0x10, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x63, 0x68, 0x61, 0x74, 0x2e, 0x70, 0x72, 0x6f,
0x74, 0x6f, 0x12, 0x1a, 0x61, 0x69, 0x5f, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76,
0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x22, 0xc1,
0x01, 0x0a, 0x15, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f,
0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73,
0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61,
0x67, 0x65, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02,
0x69, 0x64, 0x12, 0x11, 0x0a, 0x03, 0x70, 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52,
0x04, 0x70, 0x5f, 0x69, 0x64, 0x12, 0x25, 0x0a, 0x0d, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x43,
0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x65, 0x6e,
0x61, 0x62, 0x6c, 0x65, 0x5f, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x12, 0x44, 0x0a, 0x09,
0x63, 0x68, 0x61, 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32,
0x25, 0x2e, 0x61, 0x69, 0x5f, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63,
0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61,
0x74, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x52, 0x0a, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x70, 0x61, 0x72,
0x61, 0x6d, 0x22, 0xdc, 0x02, 0x0a, 0x09, 0x43, 0x68, 0x61, 0x74, 0x50, 0x61, 0x72, 0x61, 0x6d,
0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52,
0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x1d, 0x0a, 0x09, 0x6d, 0x61, 0x78, 0x54, 0x6f, 0x6b,
0x65, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x6d, 0x61, 0x78, 0x5f, 0x74,
0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x12, 0x20, 0x0a, 0x0b, 0x74, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61,
0x74, 0x75, 0x72, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0b, 0x74, 0x65, 0x6d, 0x70,
0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x12, 0x13, 0x0a, 0x04, 0x74, 0x6f, 0x70, 0x50, 0x18,
0x04, 0x20, 0x01, 0x28, 0x02, 0x52, 0x05, 0x74, 0x6f, 0x70, 0x5f, 0x70, 0x12, 0x29, 0x0a, 0x0f,
0x70, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18,
0x05, 0x20, 0x01, 0x28, 0x02, 0x52, 0x10, 0x70, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x5f,
0x70, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x12, 0x2b, 0x0a, 0x10, 0x66, 0x72, 0x65, 0x71, 0x75,
0x65, 0x6e, 0x63, 0x79, 0x50, 0x65, 0x6e, 0x61, 0x6c, 0x74, 0x79, 0x18, 0x06, 0x20, 0x01, 0x28,
0x02, 0x52, 0x11, 0x66, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79, 0x5f, 0x70, 0x65, 0x6e,
0x61, 0x6c, 0x74, 0x79, 0x12, 0x19, 0x0a, 0x07, 0x62, 0x6f, 0x74, 0x44, 0x65, 0x73, 0x63, 0x18,
0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x62, 0x6f, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x12,
0x2e, 0x0a, 0x11, 0x6d, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x54, 0x6f,
0x6b, 0x65, 0x6e, 0x73, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, 0x13, 0x6d, 0x69, 0x6e, 0x5f,
0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x12,
0x1f, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x54, 0x4c, 0x18, 0x09, 0x20,
0x01, 0x28, 0x05, 0x52, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x74, 0x74, 0x6c,
0x12, 0x1f, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x4c, 0x65, 0x6e, 0x18, 0x0a,
0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x6c, 0x65,
0x6e, 0x22, 0xf5, 0x01, 0x0a, 0x16, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65,
0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x0e, 0x0a, 0x02,
0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x16, 0x0a, 0x06,
0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6f, 0x62,
0x6a, 0x65, 0x63, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x18,
0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x12, 0x14,
0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d,
0x6f, 0x64, 0x65, 0x6c, 0x12, 0x4a, 0x0a, 0x07, 0x63, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x73, 0x18,
0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x30, 0x2e, 0x61, 0x69, 0x5f, 0x63, 0x68, 0x61, 0x74, 0x5f,
0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x2e, 0x63,
0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f,
0x6e, 0x43, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x52, 0x07, 0x63, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x73,
0x12, 0x37, 0x0a, 0x05, 0x75, 0x73, 0x61, 0x67, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32,
0x21, 0x2e, 0x61, 0x69, 0x5f, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63,
0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x55, 0x73, 0x61,
0x67, 0x65, 0x52, 0x05, 0x75, 0x73, 0x61, 0x67, 0x65, 0x22, 0x9e, 0x01, 0x0a, 0x14, 0x43, 0x68,
0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x43, 0x68, 0x6f, 0x69,
0x63, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x01, 0x20, 0x01, 0x28,
0x05, 0x52, 0x05, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x12, 0x4b, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73,
0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x31, 0x2e, 0x61, 0x69, 0x5f, 0x63,
0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69,
0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c,
0x65, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x07, 0x6d, 0x65,
0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x23, 0x0a, 0x0c, 0x66, 0x69, 0x6e, 0x69, 0x73, 0x68, 0x52,
0x65, 0x61, 0x73, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x66, 0x69, 0x6e,
0x69, 0x73, 0x68, 0x5f, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x22, 0x59, 0x0a, 0x15, 0x43, 0x68,
0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x65, 0x73, 0x73,
0x61, 0x67, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x6f, 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28,
0x09, 0x52, 0x04, 0x72, 0x6f, 0x6c, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65,
0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
0x74, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52,
0x04, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0x7c, 0x0a, 0x05, 0x55, 0x73, 0x61, 0x67, 0x65, 0x12, 0x23,
0x0a, 0x0c, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x01,
0x20, 0x01, 0x28, 0x05, 0x52, 0x0d, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x5f, 0x74, 0x6f, 0x6b,
0x65, 0x6e, 0x73, 0x12, 0x2b, 0x0a, 0x10, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f,
0x6e, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x11, 0x63,
0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73,
0x12, 0x21, 0x0a, 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18,
0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x74, 0x6f, 0x6b,
0x65, 0x6e, 0x73, 0x22, 0xc8, 0x01, 0x0a, 0x1c, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70,
0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x52, 0x65, 0x73, 0x70,
0x6f, 0x6e, 0x73, 0x65, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09,
0x52, 0x02, 0x69, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x18, 0x02,
0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x12, 0x18, 0x0a, 0x07,
0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x63,
0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18,
0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x50, 0x0a, 0x07,
0x63, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x36, 0x2e,
0x61, 0x69, 0x5f, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e,
0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61, 0x74, 0x43,
0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x43,
0x68, 0x6f, 0x69, 0x63, 0x65, 0x52, 0x07, 0x63, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x73, 0x22, 0xaa,
0x01, 0x0a, 0x1a, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f,
0x6e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x43, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x12, 0x14, 0x0a,
0x05, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x69, 0x6e,
0x64, 0x65, 0x78, 0x12, 0x51, 0x0a, 0x05, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x18, 0x02, 0x20, 0x01,
0x28, 0x0b, 0x32, 0x3b, 0x2e, 0x61, 0x69, 0x5f, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72,
0x76, 0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2e,
0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74,
0x72, 0x65, 0x61, 0x6d, 0x43, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x52,
0x05, 0x64, 0x65, 0x6c, 0x74, 0x61, 0x12, 0x23, 0x0a, 0x0c, 0x66, 0x69, 0x6e, 0x69, 0x73, 0x68,
0x52, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x66, 0x69,
0x6e, 0x69, 0x73, 0x68, 0x5f, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x22, 0x4f, 0x0a, 0x1f, 0x43,
0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x72,
0x65, 0x61, 0x6d, 0x43, 0x68, 0x6f, 0x69, 0x63, 0x65, 0x44, 0x65, 0x6c, 0x74, 0x61, 0x12, 0x18,
0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52,
0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x6f, 0x6c, 0x65,
0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x72, 0x6f, 0x6c, 0x65, 0x32, 0x87, 0x02, 0x0a,
0x04, 0x43, 0x68, 0x61, 0x74, 0x12, 0x77, 0x0a, 0x0e, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d,
0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x31, 0x2e, 0x61, 0x69, 0x5f, 0x63, 0x68, 0x61,
0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65,
0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74,
0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x32, 0x2e, 0x61, 0x69, 0x5f,
0x63, 0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f,
0x69, 0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70,
0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x85,
0x01, 0x0a, 0x14, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f,
0x6e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x31, 0x2e, 0x61, 0x69, 0x5f, 0x63, 0x68, 0x61,
0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f, 0x69, 0x63, 0x65,
0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74,
0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x38, 0x2e, 0x61, 0x69, 0x5f,
0x63, 0x68, 0x61, 0x74, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x7a, 0x76, 0x6f,
0x69, 0x63, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0x2e, 0x43, 0x68, 0x61, 0x74, 0x43, 0x6f, 0x6d, 0x70,
0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x52, 0x65, 0x73, 0x70,
0x6f, 0x6e, 0x73, 0x65, 0x30, 0x01, 0x42, 0x17, 0x5a, 0x15, 0x61, 0x69, 0x2d, 0x63, 0x68, 0x61,
0x74, 0x2d, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62,
0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
}
const file_proto_chat_proto_rawDesc = "" +
"\n" +
"\x10proto/chat.proto\x12\x1aai_chat_service.zvoice.com\"\xc1\x01\n" +
"\x15ChatCompletionRequest\x12\x18\n" +
"\amessage\x18\x01 \x01(\tR\amessage\x12\x0e\n" +
"\x02id\x18\x02 \x01(\tR\x02id\x12\x11\n" +
"\x03pid\x18\x03 \x01(\tR\x04p_id\x12%\n" +
"\renableContext\x18\x04 \x01(\bR\x0eenable_context\x12D\n" +
"\tchatParam\x18\x05 \x01(\v2%.ai_chat_service.zvoice.com.ChatParamR\n" +
"chat_param\"\xdc\x02\n" +
"\tChatParam\x12\x14\n" +
"\x05model\x18\x01 \x01(\tR\x05model\x12\x1d\n" +
"\tmaxTokens\x18\x02 \x01(\x05R\n" +
"max_tokens\x12 \n" +
"\vtemperature\x18\x03 \x01(\x02R\vtemperature\x12\x13\n" +
"\x04topP\x18\x04 \x01(\x02R\x05top_p\x12)\n" +
"\x0fpresencePenalty\x18\x05 \x01(\x02R\x10presence_penalty\x12+\n" +
"\x10frequencyPenalty\x18\x06 \x01(\x02R\x11frequency_penalty\x12\x19\n" +
"\abotDesc\x18\a \x01(\tR\bbot_desc\x12.\n" +
"\x11minResponseTokens\x18\b \x01(\x05R\x13min_response_tokens\x12\x1f\n" +
"\n" +
"contextTTL\x18\t \x01(\x05R\vcontext_ttl\x12\x1f\n" +
"\n" +
"contextLen\x18\n" +
" \x01(\x05R\vcontext_len\"\x8d\x02\n" +
"\x16ChatCompletionResponse\x12\x0e\n" +
"\x02id\x18\x01 \x01(\tR\x02id\x12\x16\n" +
"\x06object\x18\x02 \x01(\tR\x06object\x12\x18\n" +
"\acreated\x18\x03 \x01(\x03R\acreated\x12\x14\n" +
"\x05model\x18\x04 \x01(\tR\x05model\x12J\n" +
"\achoices\x18\x05 \x03(\v20.ai_chat_service.zvoice.com.ChatCompletionChoiceR\achoices\x127\n" +
"\x05usage\x18\x06 \x01(\v2!.ai_chat_service.zvoice.com.UsageR\x05usage\x12\x16\n" +
"\x06source\x18\a \x01(\tR\x06source\"\x9e\x01\n" +
"\x14ChatCompletionChoice\x12\x14\n" +
"\x05index\x18\x01 \x01(\x05R\x05index\x12K\n" +
"\amessage\x18\x02 \x01(\v21.ai_chat_service.zvoice.com.ChatCompletionMessageR\amessage\x12#\n" +
"\ffinishReason\x18\x03 \x01(\tR\rfinish_reason\"Y\n" +
"\x15ChatCompletionMessage\x12\x12\n" +
"\x04role\x18\x01 \x01(\tR\x04role\x12\x18\n" +
"\acontent\x18\x02 \x01(\tR\acontent\x12\x12\n" +
"\x04name\x18\x03 \x01(\tR\x04name\"|\n" +
"\x05Usage\x12#\n" +
"\fpromptTokens\x18\x01 \x01(\x05R\rprompt_tokens\x12+\n" +
"\x10completionTokens\x18\x02 \x01(\x05R\x11completion_tokens\x12!\n" +
"\vtotalTokens\x18\x03 \x01(\x05R\ftotal_tokens\"\x99\x02\n" +
"\x1cChatCompletionStreamResponse\x12\x0e\n" +
"\x02id\x18\x01 \x01(\tR\x02id\x12\x16\n" +
"\x06object\x18\x02 \x01(\tR\x06object\x12\x18\n" +
"\acreated\x18\x03 \x01(\x03R\acreated\x12\x14\n" +
"\x05model\x18\x04 \x01(\tR\x05model\x12P\n" +
"\achoices\x18\x05 \x03(\v26.ai_chat_service.zvoice.com.ChatCompletionStreamChoiceR\achoices\x127\n" +
"\x05usage\x18\x06 \x01(\v2!.ai_chat_service.zvoice.com.UsageR\x05usage\x12\x16\n" +
"\x06source\x18\a \x01(\tR\x06source\"\xaa\x01\n" +
"\x1aChatCompletionStreamChoice\x12\x14\n" +
"\x05index\x18\x01 \x01(\x05R\x05index\x12Q\n" +
"\x05delta\x18\x02 \x01(\v2;.ai_chat_service.zvoice.com.ChatCompletionStreamChoiceDeltaR\x05delta\x12#\n" +
"\ffinishReason\x18\x03 \x01(\tR\rfinish_reason\"O\n" +
"\x1fChatCompletionStreamChoiceDelta\x12\x18\n" +
"\acontent\x18\x01 \x01(\tR\acontent\x12\x12\n" +
"\x04role\x18\x02 \x01(\tR\x04role2\x87\x02\n" +
"\x04Chat\x12w\n" +
"\x0eChatCompletion\x121.ai_chat_service.zvoice.com.ChatCompletionRequest\x1a2.ai_chat_service.zvoice.com.ChatCompletionResponse\x12\x85\x01\n" +
"\x14ChatCompletionStream\x121.ai_chat_service.zvoice.com.ChatCompletionRequest\x1a8.ai_chat_service.zvoice.com.ChatCompletionStreamResponse0\x01B\x17Z\x15ai-chat-service/protob\x06proto3"
var (
file_proto_chat_proto_rawDescOnce sync.Once
file_proto_chat_proto_rawDescData = file_proto_chat_proto_rawDesc
file_proto_chat_proto_rawDescData []byte
)
func file_proto_chat_proto_rawDescGZIP() []byte {
file_proto_chat_proto_rawDescOnce.Do(func() {
file_proto_chat_proto_rawDescData = protoimpl.X.CompressGZIP(file_proto_chat_proto_rawDescData)
file_proto_chat_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_proto_chat_proto_rawDesc), len(file_proto_chat_proto_rawDesc)))
})
return file_proto_chat_proto_rawDescData
}
var file_proto_chat_proto_msgTypes = make([]protoimpl.MessageInfo, 9)
var file_proto_chat_proto_goTypes = []interface{}{
var file_proto_chat_proto_goTypes = []any{
(*ChatCompletionRequest)(nil), // 0: ai_chat_service.zvoice.com.ChatCompletionRequest
(*ChatParam)(nil), // 1: ai_chat_service.zvoice.com.ChatParam
(*ChatCompletionResponse)(nil), // 2: ai_chat_service.zvoice.com.ChatCompletionResponse
@@ -853,16 +787,17 @@ var file_proto_chat_proto_depIdxs = []int32{
5, // 2: ai_chat_service.zvoice.com.ChatCompletionResponse.usage:type_name -> ai_chat_service.zvoice.com.Usage
4, // 3: ai_chat_service.zvoice.com.ChatCompletionChoice.message:type_name -> ai_chat_service.zvoice.com.ChatCompletionMessage
7, // 4: ai_chat_service.zvoice.com.ChatCompletionStreamResponse.choices:type_name -> ai_chat_service.zvoice.com.ChatCompletionStreamChoice
8, // 5: ai_chat_service.zvoice.com.ChatCompletionStreamChoice.delta:type_name -> ai_chat_service.zvoice.com.ChatCompletionStreamChoiceDelta
0, // 6: ai_chat_service.zvoice.com.Chat.ChatCompletion:input_type -> ai_chat_service.zvoice.com.ChatCompletionRequest
0, // 7: ai_chat_service.zvoice.com.Chat.ChatCompletionStream:input_type -> ai_chat_service.zvoice.com.ChatCompletionRequest
2, // 8: ai_chat_service.zvoice.com.Chat.ChatCompletion:output_type -> ai_chat_service.zvoice.com.ChatCompletionResponse
6, // 9: ai_chat_service.zvoice.com.Chat.ChatCompletionStream:output_type -> ai_chat_service.zvoice.com.ChatCompletionStreamResponse
8, // [8:10] is the sub-list for method output_type
6, // [6:8] is the sub-list for method input_type
6, // [6:6] is the sub-list for extension type_name
6, // [6:6] is the sub-list for extension extendee
0, // [0:6] is the sub-list for field type_name
5, // 5: ai_chat_service.zvoice.com.ChatCompletionStreamResponse.usage:type_name -> ai_chat_service.zvoice.com.Usage
8, // 6: ai_chat_service.zvoice.com.ChatCompletionStreamChoice.delta:type_name -> ai_chat_service.zvoice.com.ChatCompletionStreamChoiceDelta
0, // 7: ai_chat_service.zvoice.com.Chat.ChatCompletion:input_type -> ai_chat_service.zvoice.com.ChatCompletionRequest
0, // 8: ai_chat_service.zvoice.com.Chat.ChatCompletionStream:input_type -> ai_chat_service.zvoice.com.ChatCompletionRequest
2, // 9: ai_chat_service.zvoice.com.Chat.ChatCompletion:output_type -> ai_chat_service.zvoice.com.ChatCompletionResponse
6, // 10: ai_chat_service.zvoice.com.Chat.ChatCompletionStream:output_type -> ai_chat_service.zvoice.com.ChatCompletionStreamResponse
9, // [9:11] is the sub-list for method output_type
7, // [7:9] is the sub-list for method input_type
7, // [7:7] is the sub-list for extension type_name
7, // [7:7] is the sub-list for extension extendee
0, // [0:7] is the sub-list for field type_name
}
func init() { file_proto_chat_proto_init() }
@@ -870,121 +805,11 @@ func file_proto_chat_proto_init() {
if File_proto_chat_proto != nil {
return
}
if !protoimpl.UnsafeEnabled {
file_proto_chat_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatCompletionRequest); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatParam); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatCompletionResponse); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatCompletionChoice); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatCompletionMessage); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*Usage); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatCompletionStreamResponse); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatCompletionStreamChoice); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
file_proto_chat_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} {
switch v := v.(*ChatCompletionStreamChoiceDelta); i {
case 0:
return &v.state
case 1:
return &v.sizeCache
case 2:
return &v.unknownFields
default:
return nil
}
}
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: file_proto_chat_proto_rawDesc,
RawDescriptor: unsafe.Slice(unsafe.StringData(file_proto_chat_proto_rawDesc), len(file_proto_chat_proto_rawDesc)),
NumEnums: 0,
NumMessages: 9,
NumExtensions: 0,
@@ -995,7 +820,6 @@ func file_proto_chat_proto_init() {
MessageInfos: file_proto_chat_proto_msgTypes,
}.Build()
File_proto_chat_proto = out.File
file_proto_chat_proto_rawDesc = nil
file_proto_chat_proto_goTypes = nil
file_proto_chat_proto_depIdxs = nil
}

View File

@@ -31,6 +31,7 @@ message ChatCompletionResponse {
string model = 4 [json_name = "model"];
repeated ChatCompletionChoice choices = 5 [json_name = "choices"];
Usage usage = 6[json_name = "usage"];
string source = 7 [json_name = "source"];
}
message ChatCompletionChoice {
int32 index = 1[json_name = "index"];
@@ -55,6 +56,8 @@ message ChatCompletionStreamResponse {
int64 created = 3 [json_name = "created"];
string model = 4 [json_name = "model"];
repeated ChatCompletionStreamChoice choices = 5 [json_name = "choices"];
Usage usage = 6[json_name = "usage"];
string source = 7 [json_name = "source"];
}
message ChatCompletionStreamChoice {
int32 index = 1[json_name = "index"];

View File

@@ -1,7 +1,7 @@
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.2.0
// - protoc v4.22.0
// - protoc-gen-go-grpc v1.5.1
// - protoc v3.6.1
// source: proto/chat.proto
package proto
@@ -15,15 +15,20 @@ import (
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.32.0 or later.
const _ = grpc.SupportPackageIsVersion7
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9
const (
Chat_ChatCompletion_FullMethodName = "/ai_chat_service.zvoice.com.Chat/ChatCompletion"
Chat_ChatCompletionStream_FullMethodName = "/ai_chat_service.zvoice.com.Chat/ChatCompletionStream"
)
// ChatClient is the client API for Chat service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type ChatClient interface {
ChatCompletion(ctx context.Context, in *ChatCompletionRequest, opts ...grpc.CallOption) (*ChatCompletionResponse, error)
ChatCompletionStream(ctx context.Context, in *ChatCompletionRequest, opts ...grpc.CallOption) (Chat_ChatCompletionStreamClient, error)
ChatCompletionStream(ctx context.Context, in *ChatCompletionRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[ChatCompletionStreamResponse], error)
}
type chatClient struct {
@@ -35,20 +40,22 @@ func NewChatClient(cc grpc.ClientConnInterface) ChatClient {
}
func (c *chatClient) ChatCompletion(ctx context.Context, in *ChatCompletionRequest, opts ...grpc.CallOption) (*ChatCompletionResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(ChatCompletionResponse)
err := c.cc.Invoke(ctx, "/ai_chat_service.zvoice.com.Chat/ChatCompletion", in, out, opts...)
err := c.cc.Invoke(ctx, Chat_ChatCompletion_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *chatClient) ChatCompletionStream(ctx context.Context, in *ChatCompletionRequest, opts ...grpc.CallOption) (Chat_ChatCompletionStreamClient, error) {
stream, err := c.cc.NewStream(ctx, &Chat_ServiceDesc.Streams[0], "/ai_chat_service.zvoice.com.Chat/ChatCompletionStream", opts...)
func (c *chatClient) ChatCompletionStream(ctx context.Context, in *ChatCompletionRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[ChatCompletionStreamResponse], error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
stream, err := c.cc.NewStream(ctx, &Chat_ServiceDesc.Streams[0], Chat_ChatCompletionStream_FullMethodName, cOpts...)
if err != nil {
return nil, err
}
x := &chatChatCompletionStreamClient{stream}
x := &grpc.GenericClientStream[ChatCompletionRequest, ChatCompletionStreamResponse]{ClientStream: stream}
if err := x.ClientStream.SendMsg(in); err != nil {
return nil, err
}
@@ -58,43 +65,33 @@ func (c *chatClient) ChatCompletionStream(ctx context.Context, in *ChatCompletio
return x, nil
}
type Chat_ChatCompletionStreamClient interface {
Recv() (*ChatCompletionStreamResponse, error)
grpc.ClientStream
}
type chatChatCompletionStreamClient struct {
grpc.ClientStream
}
func (x *chatChatCompletionStreamClient) Recv() (*ChatCompletionStreamResponse, error) {
m := new(ChatCompletionStreamResponse)
if err := x.ClientStream.RecvMsg(m); err != nil {
return nil, err
}
return m, nil
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type Chat_ChatCompletionStreamClient = grpc.ServerStreamingClient[ChatCompletionStreamResponse]
// ChatServer is the server API for Chat service.
// All implementations must embed UnimplementedChatServer
// for forward compatibility
// for forward compatibility.
type ChatServer interface {
ChatCompletion(context.Context, *ChatCompletionRequest) (*ChatCompletionResponse, error)
ChatCompletionStream(*ChatCompletionRequest, Chat_ChatCompletionStreamServer) error
ChatCompletionStream(*ChatCompletionRequest, grpc.ServerStreamingServer[ChatCompletionStreamResponse]) error
mustEmbedUnimplementedChatServer()
}
// UnimplementedChatServer must be embedded to have forward compatible implementations.
type UnimplementedChatServer struct {
}
// UnimplementedChatServer must be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedChatServer struct{}
func (UnimplementedChatServer) ChatCompletion(context.Context, *ChatCompletionRequest) (*ChatCompletionResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method ChatCompletion not implemented")
}
func (UnimplementedChatServer) ChatCompletionStream(*ChatCompletionRequest, Chat_ChatCompletionStreamServer) error {
func (UnimplementedChatServer) ChatCompletionStream(*ChatCompletionRequest, grpc.ServerStreamingServer[ChatCompletionStreamResponse]) error {
return status.Errorf(codes.Unimplemented, "method ChatCompletionStream not implemented")
}
func (UnimplementedChatServer) mustEmbedUnimplementedChatServer() {}
func (UnimplementedChatServer) testEmbeddedByValue() {}
// UnsafeChatServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to ChatServer will
@@ -104,6 +101,13 @@ type UnsafeChatServer interface {
}
func RegisterChatServer(s grpc.ServiceRegistrar, srv ChatServer) {
// If the following call pancis, it indicates UnimplementedChatServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&Chat_ServiceDesc, srv)
}
@@ -117,7 +121,7 @@ func _Chat_ChatCompletion_Handler(srv interface{}, ctx context.Context, dec func
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/ai_chat_service.zvoice.com.Chat/ChatCompletion",
FullMethod: Chat_ChatCompletion_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(ChatServer).ChatCompletion(ctx, req.(*ChatCompletionRequest))
@@ -130,21 +134,11 @@ func _Chat_ChatCompletionStream_Handler(srv interface{}, stream grpc.ServerStrea
if err := stream.RecvMsg(m); err != nil {
return err
}
return srv.(ChatServer).ChatCompletionStream(m, &chatChatCompletionStreamServer{stream})
return srv.(ChatServer).ChatCompletionStream(m, &grpc.GenericServerStream[ChatCompletionRequest, ChatCompletionStreamResponse]{ServerStream: stream})
}
type Chat_ChatCompletionStreamServer interface {
Send(*ChatCompletionStreamResponse) error
grpc.ServerStream
}
type chatChatCompletionStreamServer struct {
grpc.ServerStream
}
func (x *chatChatCompletionStreamServer) Send(m *ChatCompletionStreamResponse) error {
return x.ServerStream.SendMsg(m)
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type Chat_ChatCompletionStreamServer = grpc.ServerStreamingServer[ChatCompletionStreamResponse]
// Chat_ServiceDesc is the grpc.ServiceDesc for Chat service.
// It's only intended for direct use with grpc.RegisterService,

View File

@@ -0,0 +1,89 @@
package faiss
import (
"ai-chat-service/pkg/config"
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
)
type Client interface {
Insert(ctx context.Context, embedding []float32) (string, error)
Search(ctx context.Context, embedding []float32, k int) (*SearchResponse, error)
}
type client struct {
baseURL string
httpClient *http.Client
}
type SearchResponse struct {
IDs []int64 `json:"ids"`
Distances []float32 `json:"distances"`
SimilarityScores []float32 `json:"similarity_scores"`
}
type insertRequest struct {
Embedding []float32 `json:"embedding"`
}
type insertResponse struct {
ID int64 `json:"id"`
}
type searchRequest struct {
Embedding []float32 `json:"embedding"`
K int `json:"k"`
}
func NewClient(cnf *config.Config) Client {
return &client{
baseURL: strings.TrimRight(cnf.Faiss.BaseUrl, "/"),
httpClient: &http.Client{Timeout: time.Duration(cnf.Faiss.Timeout) * time.Second},
}
}
func (c *client) Insert(ctx context.Context, embedding []float32) (string, error) {
reqBody := &insertRequest{Embedding: embedding}
result := &insertResponse{}
if err := c.postJSON(ctx, "/insert", reqBody, result); err != nil {
return "", err
}
return fmt.Sprintf("%d", result.ID), nil
}
func (c *client) Search(ctx context.Context, embedding []float32, k int) (*SearchResponse, error) {
reqBody := &searchRequest{Embedding: embedding, K: k}
result := &SearchResponse{}
if err := c.postJSON(ctx, "/search", reqBody, result); err != nil {
return nil, err
}
return result, nil
}
func (c *client) postJSON(ctx context.Context, path string, requestData any, responseData any) error {
body, err := json.Marshal(requestData)
if err != nil {
return err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+path, bytes.NewReader(body))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
resp, err := c.httpClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return fmt.Errorf("faiss request failed: status=%d", resp.StatusCode)
}
return json.NewDecoder(resp.Body).Decode(responseData)
}

View File

@@ -1,4 +1,17 @@
services:
redis:
image: redis:7-alpine
container_name: ai-chat-redis
command:
- redis-server
- --save
- ""
- --appendonly
- "no"
ports:
- "8888:6379"
restart: unless-stopped
tokenizer:
build:
context: ../tokenizer
@@ -52,6 +65,7 @@ services:
ports:
- "50055:50055"
depends_on:
- ai-chat-redis
- tokenizer
- sensitive-filter
- keywords-filter

View File

@@ -17,9 +17,9 @@ chat:
bot_desc: "你是一个AI助手我需要你模拟一名资深的软件工程师来回答我的问题"
min_response_tokens: 600
redis:
host: "host.docker.internal"
port: 8888
pwd: "123456"
host: "redis"
port: 6379
pwd: ""
dependOn:
sensitive:
address: "sensitive-filter:50053"

14
faiss/.dockerignore Normal file
View File

@@ -0,0 +1,14 @@
__pycache__/
*.pyc
*.pyo
*.pyd
.Python
env/
venv/
.venv/
.env
.git/
.gitignore
docker-compose.yml
faiss_index.bin
__pycache__

3
faiss/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
faiss_index.bin
.vscode
__pycache__

48
faiss/Dockerfile Normal file
View File

@@ -0,0 +1,48 @@
# Dockerfile
FROM python:3.12-slim AS builder
# 设置环境变量
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
WORKDIR /app
# 安装系统依赖faiss-cpu 编译需要)
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
g++ \
libopenblas-dev \
libomp-dev \
&& rm -rf /var/lib/apt/lists/*
# 复制依赖文件并安装
COPY requirements.txt .
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
# 最终镜像
FROM python:3.12-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
WORKDIR /app
# 运行时需要的系统库
RUN apt-get update && apt-get install -y --no-install-recommends \
libopenblas-dev \
libomp-dev \
&& rm -rf /var/lib/apt/lists/*
# 从 builder 复制已安装的 Python 包
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# 复制项目代码
COPY . .
# 暴露端口
EXPOSE 8000
# 启动命令(使用字符串路径,兼容 reload
CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"]

61
faiss/README.md Normal file
View File

@@ -0,0 +1,61 @@
```shell
conda create -n faiss python=3.10 -y
conda activate faiss
conda install -c pytorch faiss-cpu
```
API 解释
```py
import numpy as np
import faiss
# FLATL2
index = faiss.IndexFlatL2(d)
index.add(xb)
D, I = index.search(xq, k)
# FlatIP
index = faiss.IndexFlatIP(d)
index.add(xb2)
D, I = index.search(xq2, k)
# IDMap
base = faiss.IndexFlatL2(d)
index = faiss.IndexIDMap(base)
index.add_with_ids(xb, ids)
D, I = index.search(xq, k)
# HNSWFlat
index = faiss.IndexHNSWFlat(d, M)
index.hnsw.efSearch = ef_search
index.add(xb)
D, I = index.search(xq, k)
# IVFFlat
quantizer = faiss.IndexFlatL2(d)
index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
safe_train(index, xb, "IndexIVFFlat")
index.add(xb)
index.nprobe = nprobe
D, I = index.search(xq, k)
# PQ
index = faiss.IndexPQ(d, M, nbits)
safe_train(index, xb, f"IndexPQ(M={M}, nbits={nbits})")
index.add(xb)
D, I = index.search(xq, k)
# IVFPQ
quantizer = faiss.IndexFlatL2(d)
index = faiss.IndexIVFPQ(quantizer, d, nlist, M, nbits)
safe_train(index, xb, f"IndexIVFPQ(nlist={nlist}, M={M}, nbits={nbits})")
index.add(xb)
index.nprobe = nprobe
D, I = index.search(xq, k)
# LSH
index = faiss.IndexLSH(d, nbits)
index.add(xb)
D, I = index.search(xq, k)
```

44
faiss/api.py Normal file
View File

@@ -0,0 +1,44 @@
# api.py
from fastapi import FastAPI, Depends, HTTPException
from models import EmbeddingInput, SearchInput
from faiss_manager import faiss_manager
from config import get_settings
settings = get_settings()
app = FastAPI(
title="FAISS 服务",
description="向量插入 + 相似搜索 + 持久化",
version="1.0.0"
)
def log_business(message: str):
if settings.ENABLE_REQUEST_LOGS:
print(message, flush=True)
@app.post("/insert")
async def insert(data: EmbeddingInput):
try:
vector_id = faiss_manager.insert(data.embedding)
log_business(f"[faiss] insert id={vector_id}")
return {"id": vector_id}
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@app.post("/search")
async def search(data: SearchInput):
try:
result = faiss_manager.search(data.embedding, data.k)
log_business(
f"[faiss] search ids={result['ids']} similarity_scores={result['similarity_scores']}",
)
return result
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@app.post("/persist")
async def persist():
faiss_manager.persist()
return {"status": "success", "message": "索引已持久化"}

34
faiss/config.py Normal file
View File

@@ -0,0 +1,34 @@
# config.py
from pydantic_settings import BaseSettings
from functools import lru_cache
class Settings(BaseSettings):
# FAISS 配置(已优化为你的 LLM 相似问题缓存场景)
FAISS_DIM: int = 1024 # 根据你的 embedding 模型修改e.g. bge-large=1024, text-embedding-3-large=3072
FAISS_INDEX_PATH: str = "faiss_index.bin"
FAISS_INDEX_TYPE: str = "HNSW" # 默认改为 HNSW最推荐
# HNSW 专用参数(速度 + 精度平衡)
HNSW_M: int = 32 # 每层连接数16-64越大精度越高但内存稍多
HNSW_EF_CONSTRUCTION: int = 200 # 构建质量100-400
HNSW_EF_SEARCH: int = 64 # 查询精度32-128越大越准但稍慢
# 是否使用余弦相似度(强烈推荐用于文本 embedding
USE_COSINE_SIMILARITY: bool = True # True = 自动归一化 + Index*IP
ENABLE_REQUEST_LOGS: bool = True # 是否打印插入/搜索业务日志
# FastAPI 配置
APP_HOST: str = "0.0.0.0"
APP_PORT: int = 8000
APP_TITLE: str = "FAISS 相似问题缓存服务"
APP_DESCRIPTION: str = "LLM 对话语义缓存 - 减少 token 消耗"
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
extra = "ignore"
@lru_cache()
def get_settings() -> Settings:
return Settings()

13
faiss/docker-compose.yml Normal file
View File

@@ -0,0 +1,13 @@
services:
faiss:
build: .
container_name: faiss-service
ports:
- "8451:8000"
volumes:
- ./faiss_index.bin:/app/faiss_index.bin # 持久化索引文件
- ./.env:/app/.env # 可选:挂载配置
restart: unless-stopped
environment:
- FAISS_DIM=1024
- APP_PORT=8000

87
faiss/faiss_manager.py Normal file
View File

@@ -0,0 +1,87 @@
# faiss_manager.py
import os
import numpy as np
import faiss
from config import get_settings
settings = get_settings()
class FaissManager:
def __init__(self):
self.dim = settings.FAISS_DIM
self.index_path = settings.FAISS_INDEX_PATH
self.use_cosine = settings.USE_COSINE_SIMILARITY
self.index = None
self._load_or_create_index()
def _load_or_create_index(self):
if os.path.exists(self.index_path):
self.index = faiss.read_index(self.index_path)
print(f"✅ 加载已有索引:{self.index.ntotal} 个向量,维度={self.index.d}")
return
# 创建新索引
if settings.FAISS_INDEX_TYPE == "HNSW":
if self.use_cosine:
self.index = faiss.IndexHNSWFlat(self.dim, settings.HNSW_M, faiss.METRIC_INNER_PRODUCT)
print("✅ 创建 HNSWIP 索引(余弦相似度)")
else:
self.index = faiss.IndexHNSWFlat(self.dim, settings.HNSW_M)
print("✅ 创建 HNSWFlat 索引L2 距离)")
# 设置 HNSW 参数
self.index.hnsw.efConstruction = settings.HNSW_EF_CONSTRUCTION
self.index.hnsw.efSearch = settings.HNSW_EF_SEARCH
print(f" HNSW 参数: M={settings.HNSW_M}, efConstruction={settings.HNSW_EF_CONSTRUCTION}, efSearch={settings.HNSW_EF_SEARCH}")
elif settings.FAISS_INDEX_TYPE == "FlatIP" and self.use_cosine:
self.index = faiss.IndexFlatIP(self.dim)
print("✅ 创建 FlatIP 索引(精确余弦)")
else:
# 默认精确 L2兼容旧配置
self.index = faiss.IndexFlatL2(self.dim)
print("✅ 创建 FlatL2 索引(精确欧式)")
def _normalize(self, embedding: list[float]) -> np.ndarray:
"""L2 归一化(余弦相似度必需)"""
vec = np.array(embedding, dtype=np.float32)
norm = np.linalg.norm(vec)
return vec / norm if norm > 0 else vec
def insert(self, embedding: list[float]) -> int:
"""插入向量,返回 ID"""
if len(embedding) != self.dim:
raise ValueError(f"Embedding 维度错误,应为 {self.dim}")
vec = self._normalize(embedding) if self.use_cosine else np.array(embedding, dtype=np.float32)
vec = vec.reshape(1, -1)
idx = self.index.ntotal
self.index.add(vec)
return idx
def search(self, embedding: list[float], k: int = 5):
"""搜索相似向量(返回 id + 距离)"""
if len(embedding) != self.dim:
raise ValueError(f"Embedding 维度错误,应为 {self.dim}")
vec = self._normalize(embedding) if self.use_cosine else np.array(embedding, dtype=np.float32)
vec = vec.reshape(1, -1)
distances, indices = self.index.search(vec, k)
return {
"ids": indices[0].tolist(),
"distances": distances[0].tolist(), # 余弦时值越大越相似1.0=完全相同)
"similarity_scores": [1 - d for d in distances[0].tolist()] if not self.use_cosine else distances[0].tolist()
}
def persist(self):
"""保存索引"""
faiss.write_index(self.index, self.index_path)
print(f"💾 索引已保存 → {self.index_path}(共 {self.index.ntotal} 个向量)")
return True
# 单例
faiss_manager = FaissManager()

19
faiss/main.py Normal file
View File

@@ -0,0 +1,19 @@
# main.py
import uvicorn
from config import get_settings
settings = get_settings()
if __name__ == "__main__":
print("🚀 启动 FAISS 服务...")
print(f" 地址: http://{settings.APP_HOST}:{settings.APP_PORT}")
print(f" 重载模式: {'已开启' if True else '已关闭'}")
uvicorn.run(
"api:app",
host=settings.APP_HOST,
port=settings.APP_PORT,
reload=True,
reload_dirs=["."],
log_level="info"
)

10
faiss/models.py Normal file
View File

@@ -0,0 +1,10 @@
# models.py
from pydantic import BaseModel
from typing import List
class EmbeddingInput(BaseModel):
embedding: List[float]
class SearchInput(BaseModel):
embedding: List[float]
k: int = 5

6
faiss/requirements.txt Normal file
View File

@@ -0,0 +1,6 @@
fastapi>=0.115.0
uvicorn[standard]>=0.30.0
pydantic-settings>=2.0.0
numpy>=1.26.0
faiss-cpu>=1.10.0
python-dotenv>=1.0.0

View File

@@ -1,16 +0,0 @@
create database ai_chat default charset utf8mb4;
use ai_chat;
CREATE TABLE `chat_records` (
`id` bigint NOT NULL AUTO_INCREMENT,
`user_msg` text,
`user_msg_tokens` int NOT NULL DEFAULT '0',
`user_msg_keywords` varchar(1024) NOT NULL DEFAULT '',
`ai_msg` text,
`ai_msg_tokens` int NOT NULL DEFAULT '0',
`req_tokens` int NOT NULL DEFAULT '0',
`create_at` bigint NOT NULL DEFAULT '0',
PRIMARY KEY (`id`),
KEY `index_create_at` (`create_at` DESC)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci

View File

@@ -1,12 +0,0 @@
CREATE EXTENSION IF NOT EXISTS vector;
CREATE TABLE IF NOT EXISTS chat_record_vectors (
id BIGSERIAL PRIMARY KEY,
record_id BIGINT NOT NULL,
keywords_text TEXT NOT NULL,
embedding vector(1024) NOT NULL,
created_at BIGINT NOT NULL
);
CREATE UNIQUE INDEX IF NOT EXISTS uq_chat_record_vectors_record_id
ON chat_record_vectors(record_id);

View File

@@ -1,7 +0,0 @@
echo "[1/2] MYSQL"
docker exec ai-chat-mysql mysql -uroot -proot -D ai_chat -e "TRUNCATE TABLE chat_records;"
echo "[2/2] PG"
docker exec ai-chat-pgvector psql -U postgres -d ai_chat -c "TRUNCATE TABLE chat_record_vectors;"

178
scripts/inspect_ai_redis.sh Executable file
View File

@@ -0,0 +1,178 @@
#!/usr/bin/env bash
set -euo pipefail
REDIS_HOST="${REDIS_HOST:-127.0.0.1}"
REDIS_PORT="${REDIS_PORT:-8888}"
REDIS_DB="${REDIS_DB:-0}"
REDIS_PASSWORD="${REDIS_PASSWORD:-}"
PATTERNS=("ai_chat_service_*")
usage() {
cat <<'EOF'
Usage:
inspect_ai_redis.sh [options]
Options:
--host <host> Redis host, default: 127.0.0.1
--port <port> Redis port, default: 8888
--db <db> Redis db index, default: 0
--password <pwd> Redis password
--pattern <pattern> Key pattern, can be repeated. Default: ai_chat_service_*
--help Show this help
Environment:
REDIS_HOST
REDIS_PORT
REDIS_DB
REDIS_PASSWORD
Examples:
./scripts/inspect_ai_redis.sh
./scripts/inspect_ai_redis.sh --pattern 'ai_chat_service_*' --pattern 'foo_*'
REDIS_PORT=6379 ./scripts/inspect_ai_redis.sh --host 10.0.0.8
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--host)
REDIS_HOST="$2"
shift 2
;;
--port)
REDIS_PORT="$2"
shift 2
;;
--db)
REDIS_DB="$2"
shift 2
;;
--password)
REDIS_PASSWORD="$2"
shift 2
;;
--pattern)
if [[ "${PATTERNS[*]}" == "ai_chat_service_*" && "${#PATTERNS[@]}" -eq 1 ]]; then
PATTERNS=()
fi
PATTERNS+=("$2")
shift 2
;;
--help|-h)
usage
exit 0
;;
*)
echo "Unknown argument: $1" >&2
usage >&2
exit 1
;;
esac
done
if ! command -v redis-cli >/dev/null 2>&1; then
echo "redis-cli not found in PATH" >&2
exit 1
fi
redis_cmd() {
if [[ -n "$REDIS_PASSWORD" ]]; then
redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" -n "$REDIS_DB" -a "$REDIS_PASSWORD" --raw "$@"
else
redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" -n "$REDIS_DB" --raw "$@"
fi
}
print_string() {
local key="$1"
redis_cmd GET "$key"
}
print_hash() {
local key="$1"
redis_cmd HGETALL "$key" | awk 'NR % 2 == 1 { printf "%s: ", $0; next } { print $0 }'
}
print_list() {
local key="$1"
redis_cmd LRANGE "$key" 0 -1
}
print_set() {
local key="$1"
redis_cmd SMEMBERS "$key"
}
print_zset() {
local key="$1"
redis_cmd ZRANGE "$key" 0 -1 WITHSCORES | awk 'NR % 2 == 1 { printf "%s => ", $0; next } { print $0 }'
}
print_stream() {
local key="$1"
redis_cmd XRANGE "$key" - +
}
if ! redis_cmd PING >/dev/null 2>&1; then
echo "Failed to connect to Redis at ${REDIS_HOST}:${REDIS_PORT}, db=${REDIS_DB}" >&2
exit 1
fi
declare -A seen_keys=()
keys=()
for pattern in "${PATTERNS[@]}"; do
while IFS= read -r key; do
[[ -z "$key" ]] && continue
if [[ -z "${seen_keys[$key]:-}" ]]; then
seen_keys["$key"]=1
keys+=("$key")
fi
done < <(redis_cmd --scan --pattern "$pattern")
done
if [[ "${#keys[@]}" -eq 0 ]]; then
echo "No keys matched patterns: ${PATTERNS[*]}"
exit 0
fi
printf 'Redis: %s:%s db=%s\n' "$REDIS_HOST" "$REDIS_PORT" "$REDIS_DB"
printf 'Patterns: %s\n' "${PATTERNS[*]}"
printf 'Matched keys: %s\n\n' "${#keys[@]}"
for key in "${keys[@]}"; do
key_type="$(redis_cmd TYPE "$key" | tr -d '\r')"
ttl="$(redis_cmd TTL "$key" | tr -d '\r')"
echo "KEY: $key"
echo "TYPE: $key_type"
echo "TTL: $ttl"
echo "VALUE:"
case "$key_type" in
string)
print_string "$key"
;;
hash)
print_hash "$key"
;;
list)
print_list "$key"
;;
set)
print_set "$key"
;;
zset)
print_zset "$key"
;;
stream)
print_stream "$key"
;;
*)
echo "(unsupported type: $key_type)"
;;
esac
echo
done

View File

View File

@@ -1,138 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "用法: bash scripts/verify-via-backends.sh \"你的消息\""
exit 1
fi
PROMPT="$1"
BACKEND_URL="${BACKEND_URL:-http://127.0.0.1:7080}"
MYSQL_CONTAINER="${MYSQL_CONTAINER:-ai-chat-mysql}"
PG_CONTAINER="${PG_CONTAINER:-ai-chat-pgvector}"
REDIS_HOST="${REDIS_HOST:-127.0.0.1}"
REDIS_PORT="${REDIS_PORT:-8888}"
REDIS_PASSWORD="${REDIS_PASSWORD:-123456}"
REQUEST_TIMEOUT="${REQUEST_TIMEOUT:-120}"
tmp_response="$(mktemp)"
tmp_stderr="$(mktemp)"
json_payload="$(python3 - "${PROMPT}" <<'PY'
import json, sys
print(json.dumps({"prompt": sys.argv[1], "options": {}}, ensure_ascii=False))
PY
)"
echo "[1/4] 通过 backend 发送消息"
if ! curl -sS -N \
-H 'Content-Type: application/json' \
--max-time "${REQUEST_TIMEOUT}" \
-X POST "${BACKEND_URL}/api/chat-process" \
-d "${json_payload}" \
> "${tmp_response}" 2> "${tmp_stderr}"; then
echo "backend 请求失败"
cat "${tmp_stderr}"
echo
echo "请求地址: ${BACKEND_URL}/api/chat-process"
echo "请求体: ${json_payload}"
exit 1
fi
first_line="$(head -n1 "${tmp_response}")"
if [[ -z "${first_line}" ]]; then
echo "未收到任何流式响应"
echo "响应文件: ${tmp_response}"
echo "响应内容:"
cat "${tmp_response}"
exit 1
fi
echo "响应首行:"
echo "${first_line}"
parsed_meta="$(python3 - "${tmp_response}" <<'PY'
import json, sys
path = sys.argv[1]
first = None
last = None
with open(path, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
except Exception:
continue
if first is None:
first = obj
last = obj
if first is None:
print("")
sys.exit(0)
status = first.get("status", "")
error_message = first.get("message", "")
assistant_id = first.get("id", "")
parent_message_id = first.get("parentMessageId", "")
reply_text = ""
if last is not None:
reply_text = last.get("text", "")
print(status)
print(error_message)
print(assistant_id)
print(parent_message_id)
print(reply_text)
PY
)"
response_status="$(printf '%s\n' "${parsed_meta}" | sed -n '1p')"
response_error_message="$(printf '%s\n' "${parsed_meta}" | sed -n '2p')"
assistant_id="$(printf '%s\n' "${parsed_meta}" | sed -n '3p')"
parent_message_id="$(printf '%s\n' "${parsed_meta}" | sed -n '4p')"
reply_text="$(printf '%s\n' "${parsed_meta}" | sed -n '5,$p')"
if [[ "${response_status}" == "Fail" ]]; then
echo "backend 返回错误:"
echo "${response_error_message}"
echo
echo "响应文件: ${tmp_response}"
exit 1
fi
if [[ -z "${parent_message_id}" || -z "${assistant_id}" ]]; then
echo "未能解析 assistant_id 或 parentMessageId"
echo "响应文件: ${tmp_response}"
exit 1
fi
echo "assistant_id=${assistant_id}"
echo "request_parent_message_id=${parent_message_id}"
echo
echo "回复内容:"
echo "${reply_text}"
echo
echo "[2/4] 查询 MySQL chat_records"
docker exec "${MYSQL_CONTAINER}" mysql -t -uroot -proot -D ai_chat -e \
"select id,user_msg,user_msg_tokens,user_msg_keywords,create_at from chat_records order by id desc limit 5;"
echo
echo "[3/4] 查询 PostgreSQL chat_record_vectors"
docker exec "${PG_CONTAINER}" psql -U postgres -d ai_chat -c \
"select record_id, keywords_text, created_at from chat_record_vectors order by created_at desc limit 5;"
echo
echo "[4/4] 查询 Redis 上下文"
redis_response="$(printf 'AUTH %s\r\nSCANPREFIX ai_chat_service_\r\n' \
"${REDIS_PASSWORD}" | nc -w 1 "${REDIS_HOST}" "${REDIS_PORT}")"
echo "${redis_response}"
echo
echo "用户消息 Redis Key: ai_chat_service_${parent_message_id}"
echo "AI回复 Redis Key: ai_chat_service_${assistant_id}"
echo
echo "原始响应保存在: ${tmp_response}"