launch: fetch recommended models from server endpoint

Add /api/x/launch-models endpoint that serves recommended models from the registry (ollama.com) with a 24h cache, merged with built-in local defaults. The client fetches from this endpoint instead of using a hardcoded list, with a client-side fallback for older servers.
2026-04-18 09:03:35 -04:00 · 2026-04-13 20:27:18 -07:00
parent 40a1317dfd
commit 8ddbd9bf60
7 changed files with 221 additions and 44 deletions
--- a/api/client.go
+++ b/api/client.go
@@ -469,6 +469,15 @@ func (c *Client) Disconnect(ctx context.Context, encodedKey string) error {
 	return c.do(ctx, http.MethodDelete, fmt.Sprintf("/api/user/keys/%s", encodedKey), nil, nil)
 }

+// LaunchModels returns the recommended models for ollama launch.
+func (c *Client) LaunchModels(ctx context.Context) (*LaunchModelsResponse, error) {
+	var resp LaunchModelsResponse
+	if err := c.do(ctx, http.MethodGet, "/api/x/launch-models", nil, &resp); err != nil {
+		return nil, err
+	}
+	return &resp, nil
+}
+
 func (c *Client) Whoami(ctx context.Context) (*UserResponse, error) {
 	var resp UserResponse
 	if err := c.do(ctx, http.MethodPost, "/api/me", nil, &resp); err != nil {
--- a/api/types.go
+++ b/api/types.go
@@ -923,6 +923,20 @@ type UserResponse struct {
 	Plan      string    `json:"plan,omitempty"`
 }

+// LaunchModel describes a recommended model returned by the launch-models endpoint.
+type LaunchModel struct {
+	Model           string `json:"model"`
+	Description     string `json:"description"`
+	ContextLength   int    `json:"context_length"`
+	MaxOutputTokens int    `json:"max_output_tokens"`
+	VRAM            string `json:"vram"`
+}
+
+// LaunchModelsResponse is the response from the launch-models endpoint.
+type LaunchModelsResponse struct {
+	Models []LaunchModel `json:"models"`
+}
+
 // Tensor describes the metadata for a given tensor.
 type Tensor struct {
 	Name  string   `json:"name"`
--- a/cmd/launch/integrations_test.go
+++ b/cmd/launch/integrations_test.go
@@ -299,6 +299,28 @@ func TestIsCloudModel(t *testing.T) {
 	})
 }

+const testLaunchModelsJSON = `{"models":[
+	{"model":"kimi-k2.5:cloud","description":"Multimodal reasoning with subagents","context_length":262144,"max_output_tokens":262144,"vram":""},
+	{"model":"qwen3.5:cloud","description":"Reasoning, coding, and agentic tool use with vision","context_length":262144,"max_output_tokens":32768,"vram":""},
+	{"model":"glm-5.1:cloud","description":"Reasoning and code generation","context_length":202752,"max_output_tokens":131072,"vram":""},
+	{"model":"minimax-m2.7:cloud","description":"Fast, efficient coding and real-world productivity","context_length":204800,"max_output_tokens":128000,"vram":""},
+	{"model":"gemma4","description":"Reasoning and code generation locally","context_length":0,"max_output_tokens":0,"vram":"~16GB"},
+	{"model":"qwen3.5","description":"Reasoning, coding, and visual understanding locally","context_length":0,"max_output_tokens":0,"vram":"~11GB"}
+]}`
+
+// testRecommendedModels returns the full cloud+local recommended models list
+// for use in tests.
+func testRecommendedModels() []ModelItem {
+	return []ModelItem{
+		{Name: "kimi-k2.5:cloud", Description: "Multimodal reasoning with subagents", Recommended: true},
+		{Name: "qwen3.5:cloud", Description: "Reasoning, coding, and agentic tool use with vision", Recommended: true},
+		{Name: "glm-5.1:cloud", Description: "Reasoning and code generation", Recommended: true},
+		{Name: "minimax-m2.7:cloud", Description: "Fast, efficient coding and real-world productivity", Recommended: true},
+		{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true, VRAM: "~16GB"},
+		{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true, VRAM: "~11GB"},
+	}
+}
+
 func names(items []ModelItem) []string {
 	var out []string
 	for _, item := range items {
@@ -308,7 +330,7 @@ func names(items []ModelItem) []string {
 }

 func TestBuildModelList_NoExistingModels(t *testing.T) {
-	items, _, _, _ := buildModelList(nil, nil, "")
+	items, _, _, _ := buildModelList(nil, testRecommendedModels(), nil, "")

 	want := []string{"kimi-k2.5:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5"}
 	if diff := cmp.Diff(want, names(items)); diff != "" {
@@ -334,7 +356,7 @@ func TestBuildModelList_OnlyLocalModels_CloudRecsAtBottom(t *testing.T) {
 		{Name: "qwen2.5:latest", Remote: false},
 	}

-	items, _, _, _ := buildModelList(existing, nil, "")
+	items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
 	got := names(items)

 	// Recommended pinned at top (local recs first, then cloud recs when only-local), then installed non-recs
@@ -350,7 +372,7 @@ func TestBuildModelList_BothCloudAndLocal_RegularSort(t *testing.T) {
 		{Name: "glm-5.1:cloud", Remote: true},
 	}

-	items, _, _, _ := buildModelList(existing, nil, "")
+	items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
 	got := names(items)

 	// All recs pinned at top (cloud before local in mixed case), then non-recs
@@ -366,7 +388,7 @@ func TestBuildModelList_PreCheckedFirst(t *testing.T) {
 		{Name: "glm-5.1:cloud", Remote: true},
 	}

-	items, _, _, _ := buildModelList(existing, []string{"llama3.2"}, "")
+	items, _, _, _ := buildModelList(existing, testRecommendedModels(), []string{"llama3.2"}, "")
 	got := names(items)

 	if got[0] != "llama3.2" {
@@ -383,7 +405,7 @@ func TestBuildModelList_CurrentDefaultFirstAmongCheckedNonRec(t *testing.T) {

 	// "zebra" is the current/default; all three are checked, none are recommended.
 	// Expected non-rec order: zebra (default), alpha, middle (alphabetical).
-	items, _, _, _ := buildModelList(existing, []string{"zebra", "alpha", "middle"}, "zebra")
+	items, _, _, _ := buildModelList(existing, testRecommendedModels(), []string{"zebra", "alpha", "middle"}, "zebra")
 	got := names(items)

 	// Skip recommended items to find the non-rec portion.
@@ -413,7 +435,7 @@ func TestBuildModelList_ExistingRecommendedMarked(t *testing.T) {
 		{Name: "glm-5.1:cloud", Remote: true},
 	}

-	items, _, _, _ := buildModelList(existing, nil, "")
+	items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")

 	for _, item := range items {
 		switch item.Name {
@@ -439,7 +461,7 @@ func TestBuildModelList_ExistingCloudModelsNotPushedToBottom(t *testing.T) {
 		{Name: "glm-5.1:cloud", Remote: true},
 	}

-	items, _, _, _ := buildModelList(existing, nil, "")
+	items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
 	got := names(items)

 	// gemma4 and glm-5.1:cloud are installed so they sort normally;
@@ -457,7 +479,7 @@ func TestBuildModelList_HasRecommendedCloudModel_OnlyNonInstalledAtBottom(t *tes
 		{Name: "kimi-k2.5:cloud", Remote: true},
 	}

-	items, _, _, _ := buildModelList(existing, nil, "")
+	items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
 	got := names(items)

 	// kimi-k2.5:cloud is installed so it sorts normally;
@@ -489,7 +511,7 @@ func TestBuildModelList_LatestTagStripped(t *testing.T) {
 		{Name: "llama3.2:latest", Remote: false},
 	}

-	items, _, existingModels, _ := buildModelList(existing, nil, "")
+	items, _, existingModels, _ := buildModelList(existing, testRecommendedModels(), nil, "")
 	got := names(items)

 	// :latest should be stripped from display names
@@ -522,7 +544,7 @@ func TestBuildModelList_ReturnsExistingAndCloudMaps(t *testing.T) {
 		{Name: "glm-5.1:cloud", Remote: true},
 	}

-	_, _, existingModels, cloudModels := buildModelList(existing, nil, "")
+	_, _, existingModels, cloudModels := buildModelList(existing, testRecommendedModels(), nil, "")

 	if !existingModels["llama3.2"] {
 		t.Error("llama3.2 should be in existingModels")
@@ -554,7 +576,7 @@ func TestBuildModelList_RecommendedFieldSet(t *testing.T) {
 		{Name: "llama3.2:latest", Remote: false},
 	}

-	items, _, _, _ := buildModelList(existing, nil, "")
+	items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")

 	for _, item := range items {
 		switch item.Name {
@@ -576,7 +598,7 @@ func TestBuildModelList_MixedCase_CloudRecsFirst(t *testing.T) {
 		{Name: "glm-5.1:cloud", Remote: true},
 	}

-	items, _, _, _ := buildModelList(existing, nil, "")
+	items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
 	got := names(items)

 	// Cloud recs should sort before local recs in mixed case
@@ -592,7 +614,7 @@ func TestBuildModelList_OnlyLocal_LocalRecsFirst(t *testing.T) {
 		{Name: "llama3.2:latest", Remote: false},
 	}

-	items, _, _, _ := buildModelList(existing, nil, "")
+	items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
 	got := names(items)

 	// Local recs should sort before cloud recs in only-local case
@@ -609,7 +631,7 @@ func TestBuildModelList_RecsAboveNonRecs(t *testing.T) {
 		{Name: "custom-model", Remote: false},
 	}

-	items, _, _, _ := buildModelList(existing, nil, "")
+	items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
 	got := names(items)

 	// All recommended models should appear before non-recommended installed models
@@ -635,7 +657,7 @@ func TestBuildModelList_CheckedBeforeRecs(t *testing.T) {
 		{Name: "glm-5.1:cloud", Remote: true},
 	}

-	items, _, _, _ := buildModelList(existing, []string{"llama3.2"}, "")
+	items, _, _, _ := buildModelList(existing, testRecommendedModels(), []string{"llama3.2"}, "")
 	got := names(items)

 	if got[0] != "llama3.2" {
@@ -649,7 +671,7 @@ func TestBuildModelList_CurrentPrefersExactLocalOverCloudPrefix(t *testing.T) {
 		{Name: "qwen3.5", Remote: false},
 	}

-	_, orderedChecked, _, _ := buildModelList(existing, []string{"qwen3.5", "qwen3.5:cloud"}, "qwen3.5")
+	_, orderedChecked, _, _ := buildModelList(existing, testRecommendedModels(), []string{"qwen3.5", "qwen3.5:cloud"}, "qwen3.5")
 	if len(orderedChecked) < 2 {
 		t.Fatalf("expected orderedChecked to preserve both selections, got %v", orderedChecked)
 	}
@@ -664,7 +686,7 @@ func TestBuildModelList_CurrentPrefersExactCloudOverLocalPrefix(t *testing.T) {
 		{Name: "qwen3.5:cloud", Remote: true},
 	}

-	_, orderedChecked, _, _ := buildModelList(existing, []string{"qwen3.5:cloud", "qwen3.5"}, "qwen3.5:cloud")
+	_, orderedChecked, _, _ := buildModelList(existing, testRecommendedModels(), []string{"qwen3.5:cloud", "qwen3.5"}, "qwen3.5:cloud")
 	if len(orderedChecked) < 2 {
 		t.Fatalf("expected orderedChecked to preserve both selections, got %v", orderedChecked)
 	}
@@ -1510,7 +1532,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
 		existing := []modelInfo{
 			{Name: "qwen3.5", Remote: false},
 		}
-		items, _, _, _ := buildModelList(existing, nil, "")
+		items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")

 		for _, item := range items {
 			if item.Name == "qwen3.5" {
@@ -1527,7 +1549,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
 	})

 	t.Run("not-installed local rec has VRAM in description", func(t *testing.T) {
-		items, _, _, _ := buildModelList(nil, nil, "")
+		items, _, _, _ := buildModelList(nil, testRecommendedModels(), nil, "")

 		for _, item := range items {
 			if item.Name == "qwen3.5" {
@@ -1544,7 +1566,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
 		existing := []modelInfo{
 			{Name: "qwen3.5", Remote: false},
 		}
-		items, _, _, _ := buildModelList(existing, nil, "")
+		items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")

 		for _, item := range items {
 			if item.Name == "qwen3.5" {
--- a/cmd/launch/launch.go
+++ b/cmd/launch/launch.go
@@ -154,6 +154,7 @@ type ModelItem struct {
 	Name        string
 	Description string
 	Recommended bool
+	VRAM        string
 }

 // LaunchCmd returns the cobra command for launching integrations.
@@ -560,8 +561,9 @@ func (c *launcherClient) loadSelectableModels(ctx context.Context, preChecked []
 		return nil, nil, err
 	}

+	recs := fetchRecommendedModels(ctx, c.apiClient)
 	cloudDisabled, _ := cloudStatusDisabled(ctx, c.apiClient)
-	items, orderedChecked, _, _ := buildModelList(c.modelInventory, preChecked, current)
+	items, orderedChecked, _, _ := buildModelList(c.modelInventory, recs, preChecked, current)
 	if cloudDisabled {
 		items = filterCloudItems(items)
 		orderedChecked = c.filterDisabledCloudModels(ctx, orderedChecked)
--- a/cmd/launch/launch_test.go
+++ b/cmd/launch/launch_test.go
@@ -580,6 +580,8 @@ func TestResolveRunModel_UsesSignInHookForCloudModel(t *testing.T) {
 		case "/api/me":
 			w.WriteHeader(http.StatusUnauthorized)
 			fmt.Fprint(w, `{"error":"unauthorized","signin_url":"https://example.com/signin"}`)
+		case "/api/x/launch-models":
+			fmt.Fprint(w, testLaunchModelsJSON)
 		default:
 			http.NotFound(w, r)
 		}
@@ -708,6 +710,8 @@ func TestLaunchIntegration_EditorForceConfigure_FloatsCheckedModelsInPicker(t *t
 			fmt.Fprintf(w, `{"model":%q}`, req.Model)
 		case "/api/me":
 			fmt.Fprint(w, `{"name":"test-user"}`)
+		case "/api/x/launch-models":
+			fmt.Fprint(w, testLaunchModelsJSON)
 		default:
 			http.NotFound(w, r)
 		}
@@ -1126,6 +1130,8 @@ func TestLaunchIntegration_EditorConfigureMultiAllFailuresKeepsExistingAndSkipsL
 			default:
 				http.NotFound(w, r)
 			}
+		case "/api/x/launch-models":
+			fmt.Fprint(w, testLaunchModelsJSON)
 		default:
 			http.NotFound(w, r)
 		}
--- a/cmd/launch/models.go
+++ b/cmd/launch/models.go
@@ -4,12 +4,14 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"log/slog"
 	"net/http"
 	"os"
 	"os/exec"
 	"runtime"
 	"slices"
 	"strings"
+	"sync"
 	"time"

 	"github.com/ollama/ollama/api"
@@ -20,26 +22,15 @@ import (
 	"github.com/ollama/ollama/progress"
 )

-var recommendedModels = []ModelItem{
-	{Name: "kimi-k2.5:cloud", Description: "Multimodal reasoning with subagents", Recommended: true},
-	{Name: "qwen3.5:cloud", Description: "Reasoning, coding, and agentic tool use with vision", Recommended: true},
-	{Name: "glm-5.1:cloud", Description: "Reasoning and code generation", Recommended: true},
-	{Name: "minimax-m2.7:cloud", Description: "Fast, efficient coding and real-world productivity", Recommended: true},
-	{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true},
-	{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true},
-}
-
-var recommendedVRAM = map[string]string{
-	"gemma4":  "~16GB",
-	"qwen3.5": "~11GB",
-}
-
 // cloudModelLimit holds context and output token limits for a cloud model.
 type cloudModelLimit struct {
 	Context int
 	Output  int
 }

+// cloudModelLimitsMu guards concurrent access to cloudModelLimits.
+var cloudModelLimitsMu sync.RWMutex
+
 // cloudModelLimits maps cloud model base names to their token limits.
 // TODO(parthsareen): grab context/output limits from model info instead of hardcoding
 var cloudModelLimits = map[string]cloudModelLimit{
@@ -69,13 +60,53 @@ var cloudModelLimits = map[string]cloudModelLimit{
 func lookupCloudModelLimit(name string) (cloudModelLimit, bool) {
 	base, stripped := modelref.StripCloudSourceTag(name)
 	if stripped {
-		if l, ok := cloudModelLimits[base]; ok {
+		cloudModelLimitsMu.RLock()
+		l, ok := cloudModelLimits[base]
+		cloudModelLimitsMu.RUnlock()
+		if ok {
 			return l, true
 		}
 	}
 	return cloudModelLimit{}, false
 }

+// defaultRecommendedModels is the client-side fallback used when the server
+// endpoint is unreachable (e.g. older server, network error).
+var defaultRecommendedModels = []ModelItem{
+	{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true, VRAM: "~16GB"},
+	{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true, VRAM: "~11GB"},
+}
+
+// fetchRecommendedModels fetches the recommended models from the local Ollama
+// server's launch-models endpoint. The server caches models from the remote
+// registry and merges them with built-in local defaults, so this call is fast.
+// Falls back to a built-in default list if the server is unavailable.
+func fetchRecommendedModels(ctx context.Context, client *api.Client) []ModelItem {
+	resp, err := client.LaunchModels(ctx)
+	if err != nil {
+		slog.Warn("failed to load launch models from server, using defaults", "error", err)
+		return defaultRecommendedModels
+	}
+	var items []ModelItem
+	cloudModelLimitsMu.Lock()
+	for _, m := range resp.Models {
+		if m.ContextLength > 0 || m.MaxOutputTokens > 0 {
+			cloudModelLimits[m.Model] = cloudModelLimit{
+				Context: m.ContextLength,
+				Output:  m.MaxOutputTokens,
+			}
+		}
+		items = append(items, ModelItem{
+			Name:        m.Model,
+			Description: m.Description,
+			Recommended: true,
+			VRAM:        m.VRAM,
+		})
+	}
+	cloudModelLimitsMu.Unlock()
+	return items
+}
+
 // missingModelPolicy controls how model-not-found errors should be handled.
 type missingModelPolicy int

@@ -260,16 +291,19 @@ func confirmEditorEdit(runner Runner, editor Editor) (bool, error) {
 }

 // buildModelList merges existing models with recommendations for selection UIs.
-func buildModelList(existing []modelInfo, preChecked []string, current string) (items []ModelItem, orderedChecked []string, existingModels, cloudModels map[string]bool) {
+func buildModelList(existing []modelInfo, recs []ModelItem, preChecked []string, current string) (items []ModelItem, orderedChecked []string, existingModels, cloudModels map[string]bool) {
 	existingModels = make(map[string]bool)
 	cloudModels = make(map[string]bool)
 	recommended := make(map[string]bool)
 	var hasLocalModel, hasCloudModel bool
-
 	recDesc := make(map[string]string)
-	for _, rec := range recommendedModels {
+	recVRAM := make(map[string]string)
+	for _, rec := range recs {
 		recommended[rec.Name] = true
 		recDesc[rec.Name] = rec.Description
+		if rec.VRAM != "" {
+			recVRAM[rec.Name] = rec.VRAM
+		}
 	}

 	for _, m := range existing {
@@ -282,11 +316,11 @@ func buildModelList(existing []modelInfo, preChecked []string, current string) (
 		}
 		displayName := strings.TrimSuffix(m.Name, ":latest")
 		existingModels[displayName] = true
-		item := ModelItem{Name: displayName, Recommended: recommended[displayName], Description: recDesc[displayName]}
+		item := ModelItem{Name: displayName, Recommended: recommended[displayName], Description: recDesc[displayName], VRAM: recVRAM[displayName]}
 		items = append(items, item)
 	}

-	for _, rec := range recommendedModels {
+	for _, rec := range recs {
 		if existingModels[rec.Name] || existingModels[rec.Name+":latest"] {
 			continue
 		}
@@ -332,8 +366,8 @@ func buildModelList(existing []modelInfo, preChecked []string, current string) (
 			if items[i].Description != "" {
 				parts = append(parts, items[i].Description)
 			}
-			if vram := recommendedVRAM[items[i].Name]; vram != "" {
-				parts = append(parts, vram)
+			if items[i].VRAM != "" {
+				parts = append(parts, items[i].VRAM)
 			}
 			parts = append(parts, "(not downloaded)")
 			items[i].Description = strings.Join(parts, ", ")
@@ -341,7 +375,7 @@ func buildModelList(existing []modelInfo, preChecked []string, current string) (
 	}

 	recRank := make(map[string]int)
-	for i, rec := range recommendedModels {
+	for i, rec := range recs {
 		recRank[rec.Name] = i + 1
 	}

--- a/server/routes.go
+++ b/server/routes.go
@@ -102,6 +102,7 @@ type Server struct {
 	sched         *Scheduler
 	defaultNumCtx int
 	requestLogger *inferenceRequestLogger
+	launchModels  atomic.Pointer[api.LaunchModelsResponse]
 }

 func init() {
@@ -1706,6 +1707,7 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
 	r.POST("/api/copy", s.CopyHandler)
 	r.POST("/api/experimental/web_search", s.WebSearchExperimentalHandler)
 	r.POST("/api/experimental/web_fetch", s.WebFetchExperimentalHandler)
+	r.GET("/api/x/launch-models", s.LaunchModelsHandler)

 	// Inference
 	r.GET("/api/ps", s.PsHandler)
@@ -1803,6 +1805,11 @@ func Serve(ln net.Listener) error {
 	http.Handle("/", h)

 	ctx, done := context.WithCancel(context.Background())
+
+	if !envconfig.NoCloud() {
+		go s.refreshLaunchModels(ctx)
+	}
+
 	schedCtx, schedDone := context.WithCancel(ctx)
 	sched := InitScheduler(schedCtx)
 	s.sched = sched
@@ -2778,3 +2785,86 @@ func (s *Server) handleImageGenerate(c *gin.Context, req api.GenerateRequest, mo
 		c.JSON(http.StatusOK, finalResponse)
 	}
 }
+
+const launchModelsURL = "https://ollama.com/api/x/launch-models"
+
+// defaultLaunchModels are local models that are always recommended.
+var defaultLaunchModels = api.LaunchModelsResponse{
+	Models: []api.LaunchModel{
+		{Model: "gemma4", Description: "Reasoning and code generation locally", VRAM: "~16GB"},
+		{Model: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", VRAM: "~11GB"},
+	},
+}
+
+// LaunchModelsHandler returns the cached recommended launch models.
+func (s *Server) LaunchModelsHandler(c *gin.Context) {
+	models := s.launchModels.Load()
+	if models == nil {
+		c.JSON(http.StatusOK, &defaultLaunchModels)
+		return
+	}
+	c.JSON(http.StatusOK, models)
+}
+
+// refreshLaunchModels fetches the recommended launch models from the remote
+// server on startup and then refreshes every 24 hours. It merges remote models
+// with the built-in local defaults before caching.
+func (s *Server) refreshLaunchModels(ctx context.Context) {
+	s.fetchLaunchModels(ctx)
+
+	ticker := time.NewTicker(24 * time.Hour)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ticker.C:
+			s.fetchLaunchModels(ctx)
+		}
+	}
+}
+
+func (s *Server) fetchLaunchModels(ctx context.Context) {
+	fetchCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(fetchCtx, http.MethodGet, launchModelsURL, nil)
+	if err != nil {
+		slog.Warn("failed to create launch models request", "error", err)
+		return
+	}
+
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		slog.Warn("failed to fetch launch models", "error", err)
+		return
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		slog.Warn("unexpected status fetching launch models", "status", resp.StatusCode)
+		return
+	}
+
+	var remote api.LaunchModelsResponse
+	if err := json.NewDecoder(resp.Body).Decode(&remote); err != nil {
+		slog.Warn("failed to decode launch models", "error", err)
+		return
+	}
+
+	// Merge: remote models first, then append any default local models
+	// not already present in the remote response.
+	remoteNames := make(map[string]bool, len(remote.Models))
+	for _, m := range remote.Models {
+		remoteNames[m.Model] = true
+	}
+	for _, m := range defaultLaunchModels.Models {
+		if !remoteNames[m.Model] {
+			remote.Models = append(remote.Models, m)
+		}
+	}
+
+	s.launchModels.Store(&remote)
+	slog.Info("cached launch models", "count", len(remote.Models))
+}