mirror of
https://github.com/ollama/ollama.git
synced 2026-04-18 09:03:35 -04:00
app: use the same client for inference and other requests (#15204)
Previously we were accidentally using different clients/UAs depending on whether it was an inference call or a different call. This change makes them consistent, other than the timeout being different.
This commit is contained in:
19
app/ui/ui.go
19
app/ui/ui.go
@@ -342,8 +342,18 @@ func (t *userAgentTransport) RoundTrip(req *http.Request) (*http.Response, error
|
|||||||
|
|
||||||
// httpClient returns an HTTP client that automatically adds the User-Agent header
|
// httpClient returns an HTTP client that automatically adds the User-Agent header
|
||||||
func (s *Server) httpClient() *http.Client {
|
func (s *Server) httpClient() *http.Client {
|
||||||
|
return userAgentHTTPClient(10 * time.Second)
|
||||||
|
}
|
||||||
|
|
||||||
|
// inferenceClient uses almost the same HTTP client, but without a timeout so
|
||||||
|
// long requests aren't truncated
|
||||||
|
func (s *Server) inferenceClient() *api.Client {
|
||||||
|
return api.NewClient(envconfig.Host(), userAgentHTTPClient(0))
|
||||||
|
}
|
||||||
|
|
||||||
|
func userAgentHTTPClient(timeout time.Duration) *http.Client {
|
||||||
return &http.Client{
|
return &http.Client{
|
||||||
Timeout: 10 * time.Second,
|
Timeout: timeout,
|
||||||
Transport: &userAgentTransport{
|
Transport: &userAgentTransport{
|
||||||
base: http.DefaultTransport,
|
base: http.DefaultTransport,
|
||||||
},
|
},
|
||||||
@@ -721,11 +731,7 @@ func (s *Server) chat(w http.ResponseWriter, r *http.Request) error {
|
|||||||
_, cancelLoading := context.WithCancel(ctx)
|
_, cancelLoading := context.WithCancel(ctx)
|
||||||
loading := false
|
loading := false
|
||||||
|
|
||||||
c, err := api.ClientFromEnvironment()
|
c := s.inferenceClient()
|
||||||
if err != nil {
|
|
||||||
cancelLoading()
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if the model exists locally by trying to show it
|
// Check if the model exists locally by trying to show it
|
||||||
// TODO (jmorganca): skip this round trip and instead just act
|
// TODO (jmorganca): skip this round trip and instead just act
|
||||||
@@ -1682,7 +1688,6 @@ func supportsBrowserTools(model string) bool {
|
|||||||
return strings.HasPrefix(strings.ToLower(model), "gpt-oss")
|
return strings.HasPrefix(strings.ToLower(model), "gpt-oss")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// buildChatRequest converts store.Chat to api.ChatRequest
|
// buildChatRequest converts store.Chat to api.ChatRequest
|
||||||
func (s *Server) buildChatRequest(chat *store.Chat, model string, think any, availableTools []map[string]any) (*api.ChatRequest, error) {
|
func (s *Server) buildChatRequest(chat *store.Chat, model string, think any, availableTools []map[string]any) (*api.ChatRequest, error) {
|
||||||
var msgs []api.Message
|
var msgs []api.Message
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import (
|
|||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/app/store"
|
"github.com/ollama/ollama/app/store"
|
||||||
"github.com/ollama/ollama/app/updater"
|
"github.com/ollama/ollama/app/updater"
|
||||||
)
|
)
|
||||||
@@ -526,6 +527,33 @@ func TestUserAgentTransport(t *testing.T) {
|
|||||||
t.Logf("User-Agent transport successfully set: %s", receivedUA)
|
t.Logf("User-Agent transport successfully set: %s", receivedUA)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestInferenceClientUsesUserAgent(t *testing.T) {
|
||||||
|
var gotUserAgent atomic.Value
|
||||||
|
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
gotUserAgent.Store(r.Header.Get("User-Agent"))
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.Write([]byte(`{}`))
|
||||||
|
}))
|
||||||
|
defer ts.Close()
|
||||||
|
|
||||||
|
t.Setenv("OLLAMA_HOST", ts.URL)
|
||||||
|
|
||||||
|
server := &Server{}
|
||||||
|
client := server.inferenceClient()
|
||||||
|
|
||||||
|
_, err := client.Show(context.Background(), &api.ShowRequest{Model: "test"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("show request failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
receivedUA, _ := gotUserAgent.Load().(string)
|
||||||
|
expectedUA := userAgent()
|
||||||
|
|
||||||
|
if receivedUA != expectedUA {
|
||||||
|
t.Errorf("User-Agent mismatch\nExpected: %s\nReceived: %s", expectedUA, receivedUA)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestSupportsBrowserTools(t *testing.T) {
|
func TestSupportsBrowserTools(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
model string
|
model string
|
||||||
|
|||||||
Reference in New Issue
Block a user