lmcli/pkg/api/api.go
Matt Low dfe43179c0 Include token count in api.Chunk
And calculate the tokens/chunk for gemini responses, fixing the tok/s
meter for gemini models.

Further, only consider the first candidate of streamed gemini responses.
2024-06-09 20:49:18 +00:00

37 lines
855 B
Go

package api
import (
"context"
"git.mlow.ca/mlow/lmcli/pkg/lmcli/model"
)
type ReplyCallback func(model.Message)
type Chunk struct {
Content string
TokenCount uint
}
type ChatCompletionClient interface {
// CreateChatCompletion requests a response to the provided messages.
// Replies are appended to the given replies struct, and the
// complete user-facing response is returned as a string.
CreateChatCompletion(
ctx context.Context,
params model.RequestParameters,
messages []model.Message,
callback ReplyCallback,
) (string, error)
// Like CreateChageCompletion, except the response is streamed via
// the output channel as it's received.
CreateChatCompletionStream(
ctx context.Context,
params model.RequestParameters,
messages []model.Message,
callback ReplyCallback,
output chan<- Chunk,
) (string, error)
}