Include token count in api.Chunk

And calculate the tokens/chunk for gemini responses, fixing the tok/s meter for gemini models. Further, only consider the first candidate of streamed gemini responses.
2024-06-09 20:45:18 +00:00
parent 42c3297e54
commit dfe43179c0
6 changed files with 26 additions and 16 deletions
--- a/pkg/api/provider/ollama/ollama.go
+++ b/pkg/api/provider/ollama/ollama.go
@@ -182,7 +182,8 @@ func (c *OllamaClient) CreateChatCompletionStream(

 		if len(streamResp.Message.Content) > 0 {
 			output <- api.Chunk{
-				Content: streamResp.Message.Content,
+				Content:    streamResp.Message.Content,
+				TokenCount: 1,
 			}
 			content.WriteString(streamResp.Message.Content)
 		}