Include token count in api.Chunk
And calculate the tokens/chunk for gemini responses, fixing the tok/s meter for gemini models. Further, only consider the first candidate of streamed gemini responses.
This commit is contained in:
@@ -182,7 +182,8 @@ func (c *OllamaClient) CreateChatCompletionStream(
|
||||
|
||||
if len(streamResp.Message.Content) > 0 {
|
||||
output <- api.Chunk{
|
||||
Content: streamResp.Message.Content,
|
||||
Content: streamResp.Message.Content,
|
||||
TokenCount: 1,
|
||||
}
|
||||
content.WriteString(streamResp.Message.Content)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user