Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: 新增图片推理模式来支持gpt4v #265

Merged
merged 13 commits into from
Nov 20, 2023
6 changes: 3 additions & 3 deletions code/handlers/card_common_action.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ import (
"context"
"encoding/json"
"fmt"
"start-feishubot/logger"

larkcard "github.com/larksuite/oapi-sdk-go/v3/card"
)

Expand All @@ -20,11 +18,13 @@ func NewCardHandler(m MessageHandler) CardHandlerFunc {
handlers := []CardHandlerMeta{
NewClearCardHandler,
NewPicResolutionHandler,
NewVisionResolutionHandler,
NewPicTextMoreHandler,
NewPicModeChangeHandler,
NewRoleTagCardHandler,
NewRoleCardHandler,
NewAIModeCardHandler,
NewVisionModeChangeHandler,
}

return func(ctx context.Context, cardAction *larkcard.CardAction) (interface{}, error) {
Expand All @@ -35,7 +35,7 @@ func NewCardHandler(m MessageHandler) CardHandlerFunc {
return nil, err
}
//pp.Println(cardMsg)
logger.Debug("cardMsg ", cardMsg)
//logger.Debug("cardMsg ", cardMsg)
for _, handler := range handlers {
h := handler(cardMsg, m)
i, err := h(ctx, cardAction)
Expand Down
74 changes: 74 additions & 0 deletions code/handlers/card_vision_action.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package handlers

import (
"context"
"fmt"
larkcard "github.com/larksuite/oapi-sdk-go/v3/card"
larkcore "github.com/larksuite/oapi-sdk-go/v3/core"
"start-feishubot/services"
)

func NewVisionResolutionHandler(cardMsg CardMsg,
m MessageHandler) CardHandlerFunc {
return func(ctx context.Context, cardAction *larkcard.CardAction) (interface{}, error) {
if cardMsg.Kind == VisionStyleKind {
CommonProcessVisionStyle(cardMsg, cardAction, m.sessionCache)
return nil, nil
}
return nil, ErrNextHandler
}
}
func NewVisionModeChangeHandler(cardMsg CardMsg,
m MessageHandler) CardHandlerFunc {
return func(ctx context.Context, cardAction *larkcard.CardAction) (interface{}, error) {
if cardMsg.Kind == VisionModeChangeKind {
newCard, err, done := CommonProcessVisionModeChange(cardMsg, m.sessionCache)
if done {
return newCard, err
}
return nil, nil
}
return nil, ErrNextHandler
}
}

func CommonProcessVisionStyle(msg CardMsg,
cardAction *larkcard.CardAction,
cache services.SessionServiceCacheInterface) {
option := cardAction.Action.Option
fmt.Println(larkcore.Prettify(msg))
cache.SetVisionDetail(msg.SessionId, services.VisionDetail(option))
//send text
replyMsg(context.Background(), "图片解析度调整为:"+option,
&msg.MsgId)
}

func CommonProcessVisionModeChange(cardMsg CardMsg,
session services.SessionServiceCacheInterface) (
interface{}, error, bool) {
if cardMsg.Value == "1" {

sessionId := cardMsg.SessionId
session.Clear(sessionId)
session.SetMode(sessionId,
services.ModeVision)
session.SetVisionDetail(sessionId,
services.VisionDetailLow)

newCard, _ :=
newSendCard(
withHeader("🕵️️ 已进入图片推理模式", larkcard.TemplateBlue),
withVisionDetailLevelBtn(&sessionId),
withNote("提醒:回复图片,让LLM和你一起推理图片的内容。"))
return newCard, nil, true
}
if cardMsg.Value == "0" {
newCard, _ := newSendCard(
withHeader("️🎒 机器人提醒", larkcard.TemplateGreen),
withMainMd("依旧保留此话题的上下文信息"),
withNote("我们可以继续探讨这个话题,期待和您聊天。如果您有其他问题或者想要讨论的话题,请告诉我哦"),
)
return newCard, nil, true
}
return nil, nil, false
}
17 changes: 17 additions & 0 deletions code/handlers/event_msg_action.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,23 @@ func setDefaultPrompt(msg []openai.Messages) []openai.Messages {
return msg
}

//func setDefaultVisionPrompt(msg []openai.VisionMessages) []openai.VisionMessages {
// if !hasSystemRole(msg) {
// msg = append(msg, openai.VisionMessages{
// Role: "system", Content: []openai.ContentType{
// {Type: "text", Text: "You are ChatGPT4V, " +
// "You are ChatGPT4V, " +
// "a large language and picture model trained by" +
// " OpenAI. " +
// "Answer in user's language as concisely as" +
// " possible. Knowledge cutoff: 20230601 " +
// "Current date" + time.Now().Format("20060102"),
// }},
// })
// }
// return msg
//}

type MessageAction struct { /*消息*/
}

Expand Down
127 changes: 127 additions & 0 deletions code/handlers/event_vision_action.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package handlers

import (
"context"
"fmt"
"os"
"start-feishubot/initialization"
"start-feishubot/logger"
"start-feishubot/services"
"start-feishubot/services/openai"
"start-feishubot/utils"

larkim "github.com/larksuite/oapi-sdk-go/v3/service/im/v1"
)

type VisionAction struct { /*图片推理*/
}

func (*VisionAction) Execute(a *ActionInfo) bool {
check := AzureModeCheck(a)
if !check {
return true
}
// 开启图片创作模式
if _, foundPic := utils.EitherTrimEqual(a.info.qParsed,
"/vision", "图片推理"); foundPic {
a.handler.sessionCache.Clear(*a.info.sessionId)
a.handler.sessionCache.SetMode(*a.info.sessionId,
services.ModeVision)
a.handler.sessionCache.SetVisionDetail(*a.info.sessionId,
services.VisionDetailHigh)
sendVisionInstructionCard(*a.ctx, a.info.sessionId,
a.info.msgId)
return false
}

mode := a.handler.sessionCache.GetMode(*a.info.sessionId)
fmt.Println("a.info.msgType: ", a.info.msgType)
logger.Debug("MODE:", mode)
// 收到一张图片,且不在图片推理模式下, 提醒是否切换到图片推理模式
if a.info.msgType == "image" && mode != services.ModeVision {
sendVisionModeCheckCard(*a.ctx, a.info.sessionId, a.info.msgId)
return false
}

// todo

if a.info.msgType == "image" && mode == services.ModeVision {
//保存图片
imageKey := a.info.imageKey
//fmt.Printf("fileKey: %s \n", imageKey)
msgId := a.info.msgId
//fmt.Println("msgId: ", *msgId)
req := larkim.NewGetMessageResourceReqBuilder().MessageId(
*msgId).FileKey(imageKey).Type("image").Build()
resp, err := initialization.GetLarkClient().Im.MessageResource.Get(context.Background(), req)
fmt.Println(resp, err)
if err != nil {
//fmt.Println(err)
replyMsg(*a.ctx, fmt.Sprintf("🤖️:图片下载失败,请稍后再试~\n 错误信息: %v", err),
a.info.msgId)
return false
}

f := fmt.Sprintf("%s.png", imageKey)
fmt.Println(f)
resp.WriteFile(f)
defer os.Remove(f)
//resolution := a.handler.sessionCache.GetPicResolution(*a.
// info.sessionId)

base64, err := openai.GetBase64FromImage(f)
if err != nil {
replyMsg(*a.ctx, fmt.Sprintf("🤖️:图片下载失败,请稍后再试~\n 错误信息: %v", err),
a.info.msgId)
return false
}
//
var msg []openai.VisionMessages
detail := a.handler.sessionCache.GetVisionDetail(*a.info.sessionId)
// 如果没有提示词,默认模拟ChatGPT

content2 := []openai.ContentType{
{Type: "text", Text: "图片里面有什么", ImageURL: nil},
{Type: "image_url", ImageURL: &openai.ImageURL{
URL: "data:image/jpeg;base64," + base64,
Detail: detail,
}},
}

msg = append(msg, openai.VisionMessages{
Role: "user", Content: content2,
})

// get ai mode as temperature
fmt.Println("msg: ", msg)
completions, err := a.handler.gpt.GetVisionInfo(msg)
if err != nil {
replyMsg(*a.ctx, fmt.Sprintf(
"🤖️:消息机器人摆烂了,请稍后再试~\n错误信息: %v", err), a.info.msgId)
return false
}
sendOldTopicCard(*a.ctx, a.info.sessionId, a.info.msgId,
completions.Content)
return false
//a.handler.sessionCache.SetMsg(*a.info.sessionId, msg)

////图片校验
//err = openai.VerifyPngs([]string{f})
//if err != nil {
// replyMsg(*a.ctx, fmt.Sprintf("🤖️:无法解析图片,请发送原图并尝试重新操作~"),
// a.info.msgId)
// return false
//}
//bs64, err := a.handler.gpt.GenerateOneImageVariation(f, resolution)
//if err != nil {
// replyMsg(*a.ctx, fmt.Sprintf(
// "🤖️:图片生成失败,请稍后再试~\n错误信息: %v", err), a.info.msgId)
// return false
//}
//replayImagePlainByBase64(*a.ctx, base64, a.info.msgId)
return false

}

return true
}
4 changes: 2 additions & 2 deletions code/handlers/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,17 +94,17 @@ func (m MessageHandler) msgReceivedHandler(ctx context.Context, event *larkim.P2
&ProcessedUniqueAction{}, //避免重复处理
&ProcessMentionAction{}, //判断机器人是否应该被调用
&AudioAction{}, //语音处理
&EmptyAction{}, //空消息处理
&ClearAction{}, //清除消息处理
&VisionAction{}, //图片推理处理
&PicAction{}, //图片处理
&AIModeAction{}, //模式切换处理
&RoleListAction{}, //角色列表处理
&HelpAction{}, //帮助处理
&BalanceAction{}, //余额处理
&RolePlayAction{}, //角色扮演处理
&MessageAction{}, //消息处理
&EmptyAction{}, //空消息处理
&StreamMessageAction{}, //流式消息处理

}
chain(data, actions...)
return nil
Expand Down
Loading
Loading