Private
Public Access
1
0

Add support for openrouter reasoning + refactor

Started work to make it possible to pass in per-model reasoning config
Cleaned up how we instantiate RequestParameters (TBD: remove
RequestParameters?)
This commit is contained in:
2025-07-29 00:55:28 +00:00
parent 54da088dee
commit 5335b5c28f
6 changed files with 125 additions and 41 deletions

View File

@@ -23,7 +23,8 @@ type OpenAIClient struct {
type ChatCompletionMessage struct {
Role string `json:"role"`
Content string `json:"content,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
Reasoning string `json:"reasoning,omitempty"` // OpenRouter
ReasoningContent string `json:"reasoning_content,omitempty"` // Deepseek, llama-server
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
ToolCallID string `json:"tool_call_id,omitempty"`
}
@@ -59,6 +60,13 @@ type Tool struct {
Function FunctionDefinition `json:"function"`
}
type OpenRouterReasoning struct {
Effort provider.ReasoningEffort `json:"effort,omitempty"` // "high", "medium", "low"
MaxTokens int `json:"max_tokens,omitempty"` // Specific token limit
Exclude bool `json:"exclude,omitempty"` // Exclude reasoning tokens from response
Enabled bool `json:"enabled,omitempty"` // Enable reasoning (default: inferred)
}
type ChatCompletionRequest struct {
Model string `json:"model"`
MaxTokens int `json:"max_tokens,omitempty"`
@@ -68,6 +76,8 @@ type ChatCompletionRequest struct {
Tools []Tool `json:"tools,omitempty"`
ToolChoice string `json:"tool_choice,omitempty"`
Stream bool `json:"stream,omitempty"`
// Reasoning config. TBD: handle for mulitple providers using the same field
Reasoning *OpenRouterReasoning `json:"reasoning,omitempty"`
}
type ChatCompletionChoice struct {
@@ -185,6 +195,16 @@ func createChatCompletionRequest(
request.ToolChoice = "auto"
}
// Add OpenRouter reasoning config
if params.Provider.Kind == provider.OpenRouter {
request.Reasoning = &OpenRouterReasoning{
Effort: params.Reasoning.Effort,
MaxTokens: params.Reasoning.MaxTokens,
Exclude: params.Reasoning.Exclude,
Enabled: params.Reasoning.Enabled,
}
}
return request
}
@@ -243,13 +263,20 @@ func (c *OpenAIClient) CreateChatCompletion(
}
choice := completionResp.Choices[0]
lastMessage := messages[len(messages)-1]
var content string
lastMessage := messages[len(messages)-1]
var reasoning string
// Check if last message was a pre-fill
if lastMessage.Role.IsAssistant() {
// Append new contents to previous last message
content = lastMessage.Content + choice.Message.Content
// TBD: reasoning
} else {
content = choice.Message.Content
if len(choice.Message.Reasoning) > 0 {
reasoning = choice.Message.Reasoning
}
}
toolCalls := choice.Message.ToolCalls
@@ -257,7 +284,7 @@ func (c *OpenAIClient) CreateChatCompletion(
return api.NewMessageWithToolCalls(content, convertToolCallToAPI(toolCalls)), nil
}
return api.NewMessageWithAssistant(content, ""), nil
return api.NewMessageWithAssistant(content, reasoning), nil
}
func (c *OpenAIClient) CreateChatCompletionStream(
@@ -284,7 +311,9 @@ func (c *OpenAIClient) CreateChatCompletionStream(
toolCalls := []ToolCall{}
lastMessage := messages[len(messages)-1]
// Check if this was a prefill
if lastMessage.Role.IsAssistant() {
// Append the last message's contents to the buffer
content.WriteString(lastMessage.Content)
}
@@ -342,10 +371,23 @@ func (c *OpenAIClient) CreateChatCompletionStream(
}
reasoning.WriteString(delta.ReasoningContent)
}
// Handle reasoning field in stream response
if len(delta.Reasoning) > 0 {
output <- provider.Chunk{
ReasoningContent: delta.Reasoning,
TokenCount: 1,
}
reasoning.WriteString(delta.Reasoning)
}
}
if len(toolCalls) > 0 {
return api.NewMessageWithToolCalls(content.String(), convertToolCallToAPI(toolCalls)), nil
msg := api.NewMessageWithToolCalls(content.String(), convertToolCallToAPI(toolCalls))
if err != nil {
return nil, err
}
msg.ReasoningContent = reasoning.String()
return msg, nil
}
return api.NewMessageWithAssistant(content.String(), reasoning.String()), nil

View File

@@ -12,8 +12,61 @@ type Chunk struct {
TokenCount uint
}
type ModelConfig struct {
Provider string
Client ChatCompletionProvider
Model string
MaxTokens int
Temperature float32
Reasoning bool
}
func NewRequestParameters(modelConfig ModelConfig) RequestParameters {
params := RequestParameters{
Model: modelConfig.Model,
MaxTokens: modelConfig.MaxTokens,
Temperature: modelConfig.Temperature,
Reasoning: ReasoningConfig{
Enabled: modelConfig.Reasoning,
},
}
return params
}
type ReasoningEffort string
const (
High ReasoningEffort = "high"
Medium ReasoningEffort = "medium"
Low ReasoningEffort = "low"
)
// ProviderKind is a bit leaky, it informs the ChatCompletionProvider what
// provider we're on so we know how to format requests, etc.
type ProviderKind string
const (
OpenRouter ProviderKind = "openrouter"
OpenAI ProviderKind = "openai"
Anthropic ProviderKind = "anthropic"
)
type ProviderConfig struct {
Kind ProviderKind
SupportPrefill bool
}
type ReasoningConfig struct {
Effort ReasoningEffort
MaxTokens int
Exclude bool
Enabled bool
}
type RequestParameters struct {
Model string
Provider ProviderConfig
Model string
Reasoning ReasoningConfig
MaxTokens int
Temperature float32