Matt Low
dfe43179c0
And calculate the tokens/chunk for gemini responses, fixing the tok/s meter for gemini models. Further, only consider the first candidate of streamed gemini responses.
37 lines
855 B
Go
37 lines
855 B
Go
package api
|
|
|
|
import (
|
|
"context"
|
|
|
|
"git.mlow.ca/mlow/lmcli/pkg/lmcli/model"
|
|
)
|
|
|
|
type ReplyCallback func(model.Message)
|
|
|
|
type Chunk struct {
|
|
Content string
|
|
TokenCount uint
|
|
}
|
|
|
|
type ChatCompletionClient interface {
|
|
// CreateChatCompletion requests a response to the provided messages.
|
|
// Replies are appended to the given replies struct, and the
|
|
// complete user-facing response is returned as a string.
|
|
CreateChatCompletion(
|
|
ctx context.Context,
|
|
params model.RequestParameters,
|
|
messages []model.Message,
|
|
callback ReplyCallback,
|
|
) (string, error)
|
|
|
|
// Like CreateChageCompletion, except the response is streamed via
|
|
// the output channel as it's received.
|
|
CreateChatCompletionStream(
|
|
ctx context.Context,
|
|
params model.RequestParameters,
|
|
messages []model.Message,
|
|
callback ReplyCallback,
|
|
output chan<- Chunk,
|
|
) (string, error)
|
|
}
|