Private
Public Access
1
0

Include token count in api.Chunk

And calculate the tokens/chunk for gemini responses, fixing the tok/s
meter for gemini models.

Further, only consider the first candidate of streamed gemini responses.
This commit is contained in:
2024-06-09 20:45:18 +00:00
parent 42c3297e54
commit dfe43179c0
6 changed files with 26 additions and 16 deletions

View File

@@ -366,6 +366,8 @@ func (c *Client) CreateChatCompletionStream(
var toolCalls []FunctionCall
reader := bufio.NewReader(resp.Body)
lastTokenCount := 0
for {
line, err := reader.ReadBytes('\n')
if err != nil {
@@ -382,22 +384,25 @@ func (c *Client) CreateChatCompletionStream(
line = bytes.TrimPrefix(line, []byte("data: "))
var streamResp GenerateContentResponse
err = json.Unmarshal(line, &streamResp)
var resp GenerateContentResponse
err = json.Unmarshal(line, &resp)
if err != nil {
return "", err
}
for _, candidate := range streamResp.Candidates {
for _, part := range candidate.Content.Parts {
if part.FunctionCall != nil {
toolCalls = append(toolCalls, *part.FunctionCall)
} else if part.Text != "" {
output <- api.Chunk {
Content: part.Text,
}
content.WriteString(part.Text)
tokens := resp.UsageMetadata.CandidatesTokenCount - lastTokenCount
lastTokenCount += tokens
choice := resp.Candidates[0]
for _, part := range choice.Content.Parts {
if part.FunctionCall != nil {
toolCalls = append(toolCalls, *part.FunctionCall)
} else if part.Text != "" {
output <- api.Chunk{
Content: part.Text,
TokenCount: uint(tokens),
}
content.WriteString(part.Text)
}
}
}