Include token count in api.Chunk
And calculate the tokens/chunk for gemini responses, fixing the tok/s meter for gemini models. Further, only consider the first candidate of streamed gemini responses.
This commit is contained in:
@@ -244,6 +244,7 @@ func (c *AnthropicClient) CreateChatCompletionStream(
|
||||
sb.WriteString(text)
|
||||
output <- api.Chunk{
|
||||
Content: text,
|
||||
TokenCount: 1,
|
||||
}
|
||||
case "content_block_stop":
|
||||
// ignore?
|
||||
@@ -266,6 +267,7 @@ func (c *AnthropicClient) CreateChatCompletionStream(
|
||||
sb.WriteString(FUNCTION_STOP_SEQUENCE)
|
||||
output <- api.Chunk{
|
||||
Content: FUNCTION_STOP_SEQUENCE,
|
||||
TokenCount: 1,
|
||||
}
|
||||
|
||||
funcCallXml := content[start:] + FUNCTION_STOP_SEQUENCE
|
||||
|
||||
Reference in New Issue
Block a user