Include token count in api.Chunk

And calculate the tokens/chunk for gemini responses, fixing the tok/s meter for gemini models. Further, only consider the first candidate of streamed gemini responses.
2024-06-09 20:45:18 +00:00
parent 42c3297e54
commit dfe43179c0
6 changed files with 26 additions and 16 deletions
--- a/pkg/api/api.go
+++ b/pkg/api/api.go
@@ -9,7 +9,8 @@ import (
 type ReplyCallback func(model.Message)

 type Chunk struct {
-	Content string
+	Content    string
+	TokenCount uint
 }

 type ChatCompletionClient interface {