Skip to content

Commit

Permalink
Adding LLM Requests Total metric (#202)
Browse files Browse the repository at this point in the history
  • Loading branch information
jespino authored Jun 7, 2024
1 parent 6b3374e commit 561c677
Show file tree
Hide file tree
Showing 9 changed files with 99 additions and 54 deletions.
25 changes: 17 additions & 8 deletions server/ai/anthropic/anthropic.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,28 @@ import (
"fmt"

"github.com/mattermost/mattermost-plugin-ai/server/ai"
"github.com/mattermost/mattermost-plugin-ai/server/metrics"
)

const DefaultMaxTokens = 4096

type Anthropic struct {
client *Client
defaultModel string
tokenLimit int
client *Client
defaultModel string
tokenLimit int
metricsService metrics.Metrics
name string
}

func New(llmService ai.ServiceConfig) *Anthropic {
client := NewClient(llmService.APIKey)
func New(botConfig ai.BotConfig, metricsService metrics.Metrics) *Anthropic {
client := NewClient(botConfig.Service.APIKey)

return &Anthropic{
client: client,
defaultModel: llmService.DefaultModel,
tokenLimit: llmService.TokenLimit,
client: client,
defaultModel: botConfig.Service.DefaultModel,
tokenLimit: botConfig.Service.TokenLimit,
metricsService: metricsService,
name: botConfig.Name,
}
}

Expand Down Expand Up @@ -79,6 +84,8 @@ func (a *Anthropic) createCompletionRequest(conversation ai.BotConversation, opt
}

func (a *Anthropic) ChatCompletion(conversation ai.BotConversation, opts ...ai.LanguageModelOption) (*ai.TextStreamResult, error) {
a.metricsService.IncrementLLMRequests(a.name)

request := a.createCompletionRequest(conversation, opts)
request.Stream = true
result, err := a.client.MessageCompletion(request)
Expand All @@ -90,6 +97,8 @@ func (a *Anthropic) ChatCompletion(conversation ai.BotConversation, opts ...ai.L
}

func (a *Anthropic) ChatCompletionNoStream(conversation ai.BotConversation, opts ...ai.LanguageModelOption) (string, error) {
a.metricsService.IncrementLLMRequests(a.name)

request := a.createCompletionRequest(conversation, opts)
request.Stream = false
result, err := a.client.MessageCompletionNoStream(request)
Expand Down
25 changes: 16 additions & 9 deletions server/ai/asksage/asksage.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,29 @@ import (
"strings"

"github.com/mattermost/mattermost-plugin-ai/server/ai"
"github.com/mattermost/mattermost-plugin-ai/server/metrics"
)

type AskSage struct {
client *Client
defaultModel string
maxTokens int
client *Client
defaultModel string
maxTokens int
metricsService metrics.Metrics
name string
}

func New(llmService ai.ServiceConfig) *AskSage {
func New(botConfig ai.BotConfig, metricsService metrics.Metrics) *AskSage {
client := NewClient("")
client.Login(GetTokenParams{
Email: llmService.Username,
Password: llmService.Password,
Email: botConfig.Service.Username,
Password: botConfig.Service.Password,
})
return &AskSage{
client: client,
defaultModel: llmService.DefaultModel,
maxTokens: llmService.TokenLimit,
client: client,
defaultModel: botConfig.Service.DefaultModel,
maxTokens: botConfig.Service.TokenLimit,
metricsService: metricsService,
name: botConfig.Name,
}
}

Expand Down Expand Up @@ -75,6 +80,8 @@ func (s *AskSage) ChatCompletion(conversation ai.BotConversation, opts ...ai.Lan
}

func (s *AskSage) ChatCompletionNoStream(conversation ai.BotConversation, opts ...ai.LanguageModelOption) (string, error) {
s.metricsService.IncrementLLMRequests(s.name)

params := s.queryParamsFromConfig(s.createConfig(opts))
params.Message = conversationToMessagesList(conversation)
params.SystemPrompt = conversation.ExtractSystemMessage()
Expand Down
37 changes: 23 additions & 14 deletions server/ai/openai/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/invopop/jsonschema"
"github.com/mattermost/mattermost-plugin-ai/server/ai"
"github.com/mattermost/mattermost-plugin-ai/server/ai/subtitles"
"github.com/mattermost/mattermost-plugin-ai/server/metrics"
openaiClient "github.com/sashabaranov/go-openai"
)

Expand All @@ -26,6 +27,8 @@ type OpenAI struct {
defaultModel string
tokenLimit int
streamingTimeout time.Duration
metricsService metrics.Metrics
name string
}

const StreamingTimeoutDefault = 10 * time.Second
Expand All @@ -36,10 +39,10 @@ const OpenAIMaxImageSize = 20 * 1024 * 1024 // 20 MB

var ErrStreamingTimeout = errors.New("timeout streaming")

func NewCompatible(llmService ai.ServiceConfig) *OpenAI {
apiKey := llmService.APIKey
endpointURL := strings.TrimSuffix(llmService.APIURL, "/")
defaultModel := llmService.DefaultModel
func NewCompatible(botConfig ai.BotConfig, metricsService metrics.Metrics) *OpenAI {
apiKey := botConfig.Service.APIKey
endpointURL := strings.TrimSuffix(botConfig.Service.APIURL, "/")
defaultModel := botConfig.Service.DefaultModel
config := openaiClient.DefaultConfig(apiKey)
config.BaseURL = endpointURL

Expand All @@ -50,35 +53,39 @@ func NewCompatible(llmService ai.ServiceConfig) *OpenAI {
}

streamingTimeout := StreamingTimeoutDefault
if llmService.StreamingTimeoutSeconds > 0 {
streamingTimeout = time.Duration(llmService.StreamingTimeoutSeconds) * time.Second
if botConfig.Service.StreamingTimeoutSeconds > 0 {
streamingTimeout = time.Duration(botConfig.Service.StreamingTimeoutSeconds) * time.Second
}
return &OpenAI{
client: openaiClient.NewClientWithConfig(config),
defaultModel: defaultModel,
tokenLimit: llmService.TokenLimit,
tokenLimit: botConfig.Service.TokenLimit,
streamingTimeout: streamingTimeout,
metricsService: metricsService,
name: botConfig.Name,
}
}

func New(llmService ai.ServiceConfig) *OpenAI {
defaultModel := llmService.DefaultModel
func New(botConfig ai.BotConfig, metricsService metrics.Metrics) *OpenAI {
defaultModel := botConfig.Service.DefaultModel
if defaultModel == "" {
defaultModel = openaiClient.GPT3Dot5Turbo
}
config := openaiClient.DefaultConfig(llmService.APIKey)
config.OrgID = llmService.OrgID
config := openaiClient.DefaultConfig(botConfig.Service.APIKey)
config.OrgID = botConfig.Service.OrgID

streamingTimeout := StreamingTimeoutDefault
if llmService.StreamingTimeoutSeconds > 0 {
streamingTimeout = time.Duration(llmService.StreamingTimeoutSeconds) * time.Second
if botConfig.Service.StreamingTimeoutSeconds > 0 {
streamingTimeout = time.Duration(botConfig.Service.StreamingTimeoutSeconds) * time.Second
}

return &OpenAI{
client: openaiClient.NewClientWithConfig(config),
defaultModel: defaultModel,
tokenLimit: llmService.TokenLimit,
tokenLimit: botConfig.Service.TokenLimit,
streamingTimeout: streamingTimeout,
metricsService: metricsService,
name: botConfig.Name,
}
}

Expand Down Expand Up @@ -344,6 +351,8 @@ func (s *OpenAI) completionRequestFromConfig(cfg ai.LLMConfig) openaiClient.Chat
}

func (s *OpenAI) ChatCompletion(conversation ai.BotConversation, opts ...ai.LanguageModelOption) (*ai.TextStreamResult, error) {
s.metricsService.IncrementLLMRequests(s.name)

request := s.completionRequestFromConfig(s.createConfig(opts))
request = modifyCompletionRequestWithConversation(request, conversation)
request.Stream = true
Expand Down
2 changes: 1 addition & 1 deletion server/api_channel.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ func (p *Plugin) handleSince(c *gin.Context) {
return
}

resultStream, err := p.getLLM(bot.cfg.Service).ChatCompletion(prompt)
resultStream, err := p.getLLM(bot.cfg).ChatCompletion(prompt)
if err != nil {
c.AbortWithError(http.StatusInternalServerError, err)
return
Expand Down
2 changes: 1 addition & 1 deletion server/api_post.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func (p *Plugin) handleReact(c *gin.Context) {
return
}

emojiName, err := p.getLLM(bot.cfg.Service).ChatCompletionNoStream(prompt, ai.WithMaxGeneratedTokens(25))
emojiName, err := p.getLLM(bot.cfg).ChatCompletionNoStream(prompt, ai.WithMaxGeneratedTokens(25))
if err != nil {
c.AbortWithError(http.StatusInternalServerError, err)
return
Expand Down
10 changes: 5 additions & 5 deletions server/meeting_summarization.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,8 @@ func (p *Plugin) summarizeCallRecording(bot *Bot, rootID string, requestingUser

func (p *Plugin) summarizeTranscription(bot *Bot, transcription *subtitles.Subtitles, context ai.ConversationContext) (*ai.TextStreamResult, error) {
llmFormattedTranscription := transcription.FormatForLLM()
tokens := p.getLLM(bot.cfg.Service).CountTokens(llmFormattedTranscription)
tokenLimitWithMargin := int(float64(p.getLLM(bot.cfg.Service).TokenLimit())*0.75) - ContextTokenMargin
tokens := p.getLLM(bot.cfg).CountTokens(llmFormattedTranscription)
tokenLimitWithMargin := int(float64(p.getLLM(bot.cfg).TokenLimit())*0.75) - ContextTokenMargin
if tokenLimitWithMargin < 0 {
tokenLimitWithMargin = ContextTokenMargin / 2
}
Expand All @@ -277,7 +277,7 @@ func (p *Plugin) summarizeTranscription(bot *Bot, transcription *subtitles.Subti
return nil, fmt.Errorf("unable to get summarize chunk prompt: %w", err)
}

summarizedChunk, err := p.getLLM(bot.cfg.Service).ChatCompletionNoStream(summarizeChunkPrompt)
summarizedChunk, err := p.getLLM(bot.cfg).ChatCompletionNoStream(summarizeChunkPrompt)
if err != nil {
return nil, fmt.Errorf("unable to get summarized chunk: %w", err)
}
Expand All @@ -287,7 +287,7 @@ func (p *Plugin) summarizeTranscription(bot *Bot, transcription *subtitles.Subti

llmFormattedTranscription = strings.Join(summarizedChunks, "\n\n")
isChunked = true
p.pluginAPI.Log.Debug("Completed chunk summarization", "chunks", len(summarizedChunks), "tokens", p.getLLM(bot.cfg.Service).CountTokens(llmFormattedTranscription))
p.pluginAPI.Log.Debug("Completed chunk summarization", "chunks", len(summarizedChunks), "tokens", p.getLLM(bot.cfg).CountTokens(llmFormattedTranscription))
}

context.PromptParameters = map[string]string{"Transcription": llmFormattedTranscription, "IsChunked": fmt.Sprintf("%t", isChunked)}
Expand All @@ -296,7 +296,7 @@ func (p *Plugin) summarizeTranscription(bot *Bot, transcription *subtitles.Subti
return nil, fmt.Errorf("unable to get meeting summary prompt: %w", err)
}

summaryStream, err := p.getLLM(bot.cfg.Service).ChatCompletion(summaryPrompt)
summaryStream, err := p.getLLM(bot.cfg).ChatCompletion(summaryPrompt)
if err != nil {
return nil, fmt.Errorf("unable to get meeting summary: %w", err)
}
Expand Down
20 changes: 20 additions & 0 deletions server/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const (
MetricsSubsystemSystem = "system"
MetricsSubsystemHTTP = "http"
MetricsSubsystemAPI = "api"
MetricsSubsystemLLM = "llm"

MetricsCloudInstallationLabel = "installationId"
MetricsVersionLabel = "version"
Expand All @@ -23,6 +24,8 @@ type Metrics interface {

IncrementHTTPRequests()
IncrementHTTPErrors()

IncrementLLMRequests(llmName string)
}

type InstanceInfo struct {
Expand All @@ -42,6 +45,8 @@ type metrics struct {

httpRequestsTotal prometheus.Counter
httpErrorsTotal prometheus.Counter

llmRequestsTotal *prometheus.CounterVec
}

// NewMetrics Factory method to create a new metrics collector.
Expand Down Expand Up @@ -113,6 +118,15 @@ func NewMetrics(info InstanceInfo) Metrics {
})
m.registry.MustRegister(m.httpErrorsTotal)

m.llmRequestsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: MetricsNamespace,
Subsystem: MetricsSubsystemLLM,
Name: "requests_total",
Help: "The total number of LLM requests.",
ConstLabels: additionalLabels,
}, []string{"llm_name"})
m.registry.MustRegister(m.llmRequestsTotal)

return m
}

Expand All @@ -137,3 +151,9 @@ func (m *metrics) IncrementHTTPErrors() {
m.httpErrorsTotal.Inc()
}
}

func (m *metrics) IncrementLLMRequests(llmName string) {
if m != nil {
m.llmRequestsTotal.With(prometheus.Labels{"llm_name": llmName}).Inc()
}
}
22 changes: 11 additions & 11 deletions server/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,17 +127,17 @@ func (p *Plugin) OnActivate() error {
return nil
}

func (p *Plugin) getLLM(llmServiceConfig ai.ServiceConfig) ai.LanguageModel {
func (p *Plugin) getLLM(llmBotConfig ai.BotConfig) ai.LanguageModel {
var llm ai.LanguageModel
switch llmServiceConfig.Type {
switch llmBotConfig.Service.Type {
case "openai":
llm = openai.New(llmServiceConfig)
llm = openai.New(llmBotConfig, p.metricsService)
case "openaicompatible":
llm = openai.NewCompatible(llmServiceConfig)
llm = openai.NewCompatible(llmBotConfig, p.metricsService)
case "anthropic":
llm = anthropic.New(llmServiceConfig)
llm = anthropic.New(llmBotConfig, p.metricsService)
case "asksage":
llm = asksage.New(llmServiceConfig)
llm = asksage.New(llmBotConfig, p.metricsService)
}

cfg := p.getConfiguration()
Expand All @@ -152,18 +152,18 @@ func (p *Plugin) getLLM(llmServiceConfig ai.ServiceConfig) ai.LanguageModel {

func (p *Plugin) getTranscribe() ai.Transcriber {
cfg := p.getConfiguration()
var transcriptionService ai.ServiceConfig
var botConfig ai.BotConfig
for _, bot := range cfg.Bots {
if bot.Name == cfg.TranscriptGenerator {
transcriptionService = bot.Service
botConfig = bot
break
}
}
switch transcriptionService.Type {
switch botConfig.Service.Type {
case "openai":
return openai.New(transcriptionService)
return openai.New(botConfig, p.metricsService)
case "openaicompatible":
return openai.NewCompatible(transcriptionService)
return openai.NewCompatible(botConfig, p.metricsService)
}
return nil
}
Expand Down
10 changes: 5 additions & 5 deletions server/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func (p *Plugin) newConversation(bot *Bot, context ai.ConversationContext) error
}
conversation.AddPost(p.PostToAIPost(bot, context.Post))

result, err := p.getLLM(bot.cfg.Service).ChatCompletion(conversation)
result, err := p.getLLM(bot.cfg).ChatCompletion(conversation)
if err != nil {
return err
}
Expand Down Expand Up @@ -81,7 +81,7 @@ func (p *Plugin) generateTitle(bot *Bot, request string, threadRootID string) er
titleRequest := ai.BotConversation{
Posts: []ai.Post{{Role: ai.PostRoleUser, Message: request}},
}
conversationTitle, err := p.getLLM(bot.cfg.Service).ChatCompletionNoStream(titleRequest, ai.WithMaxGeneratedTokens(25))
conversationTitle, err := p.getLLM(bot.cfg).ChatCompletionNoStream(titleRequest, ai.WithMaxGeneratedTokens(25))
if err != nil {
return fmt.Errorf("failed to get title: %w", err)
}
Expand Down Expand Up @@ -134,7 +134,7 @@ func (p *Plugin) continueConversation(bot *Bot, threadData *ThreadData, context
}
prompt.AppendConversation(p.ThreadToBotConversation(bot, threadData.Posts))

result, err = p.getLLM(bot.cfg.Service).ChatCompletion(prompt)
result, err = p.getLLM(bot.cfg).ChatCompletion(prompt)
if err != nil {
return nil, err
}
Expand All @@ -157,7 +157,7 @@ func (p *Plugin) continueThreadConversation(bot *Bot, questionThreadData *Thread
}
prompt.AppendConversation(p.ThreadToBotConversation(bot, questionThreadData.Posts))

result, err := p.getLLM(bot.cfg.Service).ChatCompletion(prompt)
result, err := p.getLLM(bot.cfg).ChatCompletion(prompt)
if err != nil {
return nil, err
}
Expand All @@ -181,7 +181,7 @@ func (p *Plugin) summarizePost(bot *Bot, postIDToSummarize string, context ai.Co
if err != nil {
return nil, err
}
summaryStream, err := p.getLLM(bot.cfg.Service).ChatCompletion(prompt)
summaryStream, err := p.getLLM(bot.cfg).ChatCompletion(prompt)
if err != nil {
return nil, err
}
Expand Down

0 comments on commit 561c677

Please sign in to comment.