Include token count in api.Chunk

And calculate the tokens/chunk for gemini responses, fixing the tok/s meter for gemini models. Further, only consider the first candidate of streamed gemini responses.
2024-06-09 20:45:18 +00:00
parent 42c3297e54
commit dfe43179c0
6 changed files with 26 additions and 16 deletions
--- a/pkg/api/provider/anthropic/anthropic.go
+++ b/pkg/api/provider/anthropic/anthropic.go
@@ -244,6 +244,7 @@ func (c *AnthropicClient) CreateChatCompletionStream(
 				sb.WriteString(text)
 				output <- api.Chunk{
 					Content: text,
+					TokenCount: 1,
 				}
 			case "content_block_stop":
 				// ignore?
@@ -266,6 +267,7 @@ func (c *AnthropicClient) CreateChatCompletionStream(
 						sb.WriteString(FUNCTION_STOP_SEQUENCE)
 						output <- api.Chunk{
 							Content: FUNCTION_STOP_SEQUENCE,
+							TokenCount: 1,
 						}

 						funcCallXml := content[start:] + FUNCTION_STOP_SEQUENCE