Add support for openrouter reasoning + refactor
Started work to make it possible to pass in per-model reasoning config Cleaned up how we instantiate RequestParameters (TBD: remove RequestParameters?)
This commit is contained in:
@@ -23,7 +23,8 @@ type OpenAIClient struct {
|
||||
type ChatCompletionMessage struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content,omitempty"`
|
||||
ReasoningContent string `json:"reasoning_content,omitempty"`
|
||||
Reasoning string `json:"reasoning,omitempty"` // OpenRouter
|
||||
ReasoningContent string `json:"reasoning_content,omitempty"` // Deepseek, llama-server
|
||||
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
||||
ToolCallID string `json:"tool_call_id,omitempty"`
|
||||
}
|
||||
@@ -59,6 +60,13 @@ type Tool struct {
|
||||
Function FunctionDefinition `json:"function"`
|
||||
}
|
||||
|
||||
type OpenRouterReasoning struct {
|
||||
Effort provider.ReasoningEffort `json:"effort,omitempty"` // "high", "medium", "low"
|
||||
MaxTokens int `json:"max_tokens,omitempty"` // Specific token limit
|
||||
Exclude bool `json:"exclude,omitempty"` // Exclude reasoning tokens from response
|
||||
Enabled bool `json:"enabled,omitempty"` // Enable reasoning (default: inferred)
|
||||
}
|
||||
|
||||
type ChatCompletionRequest struct {
|
||||
Model string `json:"model"`
|
||||
MaxTokens int `json:"max_tokens,omitempty"`
|
||||
@@ -68,6 +76,8 @@ type ChatCompletionRequest struct {
|
||||
Tools []Tool `json:"tools,omitempty"`
|
||||
ToolChoice string `json:"tool_choice,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
// Reasoning config. TBD: handle for mulitple providers using the same field
|
||||
Reasoning *OpenRouterReasoning `json:"reasoning,omitempty"`
|
||||
}
|
||||
|
||||
type ChatCompletionChoice struct {
|
||||
@@ -185,6 +195,16 @@ func createChatCompletionRequest(
|
||||
request.ToolChoice = "auto"
|
||||
}
|
||||
|
||||
// Add OpenRouter reasoning config
|
||||
if params.Provider.Kind == provider.OpenRouter {
|
||||
request.Reasoning = &OpenRouterReasoning{
|
||||
Effort: params.Reasoning.Effort,
|
||||
MaxTokens: params.Reasoning.MaxTokens,
|
||||
Exclude: params.Reasoning.Exclude,
|
||||
Enabled: params.Reasoning.Enabled,
|
||||
}
|
||||
}
|
||||
|
||||
return request
|
||||
}
|
||||
|
||||
@@ -243,13 +263,20 @@ func (c *OpenAIClient) CreateChatCompletion(
|
||||
}
|
||||
|
||||
choice := completionResp.Choices[0]
|
||||
lastMessage := messages[len(messages)-1]
|
||||
|
||||
var content string
|
||||
lastMessage := messages[len(messages)-1]
|
||||
var reasoning string
|
||||
// Check if last message was a pre-fill
|
||||
if lastMessage.Role.IsAssistant() {
|
||||
// Append new contents to previous last message
|
||||
content = lastMessage.Content + choice.Message.Content
|
||||
// TBD: reasoning
|
||||
} else {
|
||||
content = choice.Message.Content
|
||||
if len(choice.Message.Reasoning) > 0 {
|
||||
reasoning = choice.Message.Reasoning
|
||||
}
|
||||
}
|
||||
|
||||
toolCalls := choice.Message.ToolCalls
|
||||
@@ -257,7 +284,7 @@ func (c *OpenAIClient) CreateChatCompletion(
|
||||
return api.NewMessageWithToolCalls(content, convertToolCallToAPI(toolCalls)), nil
|
||||
}
|
||||
|
||||
return api.NewMessageWithAssistant(content, ""), nil
|
||||
return api.NewMessageWithAssistant(content, reasoning), nil
|
||||
}
|
||||
|
||||
func (c *OpenAIClient) CreateChatCompletionStream(
|
||||
@@ -284,7 +311,9 @@ func (c *OpenAIClient) CreateChatCompletionStream(
|
||||
toolCalls := []ToolCall{}
|
||||
|
||||
lastMessage := messages[len(messages)-1]
|
||||
// Check if this was a prefill
|
||||
if lastMessage.Role.IsAssistant() {
|
||||
// Append the last message's contents to the buffer
|
||||
content.WriteString(lastMessage.Content)
|
||||
}
|
||||
|
||||
@@ -342,10 +371,23 @@ func (c *OpenAIClient) CreateChatCompletionStream(
|
||||
}
|
||||
reasoning.WriteString(delta.ReasoningContent)
|
||||
}
|
||||
// Handle reasoning field in stream response
|
||||
if len(delta.Reasoning) > 0 {
|
||||
output <- provider.Chunk{
|
||||
ReasoningContent: delta.Reasoning,
|
||||
TokenCount: 1,
|
||||
}
|
||||
reasoning.WriteString(delta.Reasoning)
|
||||
}
|
||||
}
|
||||
|
||||
if len(toolCalls) > 0 {
|
||||
return api.NewMessageWithToolCalls(content.String(), convertToolCallToAPI(toolCalls)), nil
|
||||
msg := api.NewMessageWithToolCalls(content.String(), convertToolCallToAPI(toolCalls))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
msg.ReasoningContent = reasoning.String()
|
||||
return msg, nil
|
||||
}
|
||||
|
||||
return api.NewMessageWithAssistant(content.String(), reasoning.String()), nil
|
||||
|
||||
@@ -12,8 +12,61 @@ type Chunk struct {
|
||||
TokenCount uint
|
||||
}
|
||||
|
||||
type ModelConfig struct {
|
||||
Provider string
|
||||
Client ChatCompletionProvider
|
||||
Model string
|
||||
MaxTokens int
|
||||
Temperature float32
|
||||
Reasoning bool
|
||||
}
|
||||
|
||||
func NewRequestParameters(modelConfig ModelConfig) RequestParameters {
|
||||
params := RequestParameters{
|
||||
Model: modelConfig.Model,
|
||||
MaxTokens: modelConfig.MaxTokens,
|
||||
Temperature: modelConfig.Temperature,
|
||||
Reasoning: ReasoningConfig{
|
||||
Enabled: modelConfig.Reasoning,
|
||||
},
|
||||
}
|
||||
return params
|
||||
}
|
||||
|
||||
type ReasoningEffort string
|
||||
|
||||
const (
|
||||
High ReasoningEffort = "high"
|
||||
Medium ReasoningEffort = "medium"
|
||||
Low ReasoningEffort = "low"
|
||||
)
|
||||
|
||||
// ProviderKind is a bit leaky, it informs the ChatCompletionProvider what
|
||||
// provider we're on so we know how to format requests, etc.
|
||||
type ProviderKind string
|
||||
|
||||
const (
|
||||
OpenRouter ProviderKind = "openrouter"
|
||||
OpenAI ProviderKind = "openai"
|
||||
Anthropic ProviderKind = "anthropic"
|
||||
)
|
||||
|
||||
type ProviderConfig struct {
|
||||
Kind ProviderKind
|
||||
SupportPrefill bool
|
||||
}
|
||||
|
||||
type ReasoningConfig struct {
|
||||
Effort ReasoningEffort
|
||||
MaxTokens int
|
||||
Exclude bool
|
||||
Enabled bool
|
||||
}
|
||||
|
||||
type RequestParameters struct {
|
||||
Model string
|
||||
Provider ProviderConfig
|
||||
Model string
|
||||
Reasoning ReasoningConfig
|
||||
|
||||
MaxTokens int
|
||||
Temperature float32
|
||||
|
||||
Reference in New Issue
Block a user