launch: fetch recommended models from server endpoint

Add /api/x/launch-models endpoint that serves recommended models from
the registry (ollama.com) with a 24h cache, merged with built-in local
defaults. The client fetches from this endpoint instead of using a
hardcoded list, with a client-side fallback for older servers.
This commit is contained in:
Bruce MacDonald
2026-04-13 20:27:18 -07:00
parent 40a1317dfd
commit 8ddbd9bf60
7 changed files with 221 additions and 44 deletions

View File

@@ -469,6 +469,15 @@ func (c *Client) Disconnect(ctx context.Context, encodedKey string) error {
return c.do(ctx, http.MethodDelete, fmt.Sprintf("/api/user/keys/%s", encodedKey), nil, nil)
}
// LaunchModels returns the recommended models for ollama launch.
func (c *Client) LaunchModels(ctx context.Context) (*LaunchModelsResponse, error) {
var resp LaunchModelsResponse
if err := c.do(ctx, http.MethodGet, "/api/x/launch-models", nil, &resp); err != nil {
return nil, err
}
return &resp, nil
}
func (c *Client) Whoami(ctx context.Context) (*UserResponse, error) {
var resp UserResponse
if err := c.do(ctx, http.MethodPost, "/api/me", nil, &resp); err != nil {

View File

@@ -923,6 +923,20 @@ type UserResponse struct {
Plan string `json:"plan,omitempty"`
}
// LaunchModel describes a recommended model returned by the launch-models endpoint.
type LaunchModel struct {
Model string `json:"model"`
Description string `json:"description"`
ContextLength int `json:"context_length"`
MaxOutputTokens int `json:"max_output_tokens"`
VRAM string `json:"vram"`
}
// LaunchModelsResponse is the response from the launch-models endpoint.
type LaunchModelsResponse struct {
Models []LaunchModel `json:"models"`
}
// Tensor describes the metadata for a given tensor.
type Tensor struct {
Name string `json:"name"`

View File

@@ -299,6 +299,28 @@ func TestIsCloudModel(t *testing.T) {
})
}
const testLaunchModelsJSON = `{"models":[
{"model":"kimi-k2.5:cloud","description":"Multimodal reasoning with subagents","context_length":262144,"max_output_tokens":262144,"vram":""},
{"model":"qwen3.5:cloud","description":"Reasoning, coding, and agentic tool use with vision","context_length":262144,"max_output_tokens":32768,"vram":""},
{"model":"glm-5.1:cloud","description":"Reasoning and code generation","context_length":202752,"max_output_tokens":131072,"vram":""},
{"model":"minimax-m2.7:cloud","description":"Fast, efficient coding and real-world productivity","context_length":204800,"max_output_tokens":128000,"vram":""},
{"model":"gemma4","description":"Reasoning and code generation locally","context_length":0,"max_output_tokens":0,"vram":"~16GB"},
{"model":"qwen3.5","description":"Reasoning, coding, and visual understanding locally","context_length":0,"max_output_tokens":0,"vram":"~11GB"}
]}`
// testRecommendedModels returns the full cloud+local recommended models list
// for use in tests.
func testRecommendedModels() []ModelItem {
return []ModelItem{
{Name: "kimi-k2.5:cloud", Description: "Multimodal reasoning with subagents", Recommended: true},
{Name: "qwen3.5:cloud", Description: "Reasoning, coding, and agentic tool use with vision", Recommended: true},
{Name: "glm-5.1:cloud", Description: "Reasoning and code generation", Recommended: true},
{Name: "minimax-m2.7:cloud", Description: "Fast, efficient coding and real-world productivity", Recommended: true},
{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true, VRAM: "~16GB"},
{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true, VRAM: "~11GB"},
}
}
func names(items []ModelItem) []string {
var out []string
for _, item := range items {
@@ -308,7 +330,7 @@ func names(items []ModelItem) []string {
}
func TestBuildModelList_NoExistingModels(t *testing.T) {
items, _, _, _ := buildModelList(nil, nil, "")
items, _, _, _ := buildModelList(nil, testRecommendedModels(), nil, "")
want := []string{"kimi-k2.5:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5"}
if diff := cmp.Diff(want, names(items)); diff != "" {
@@ -334,7 +356,7 @@ func TestBuildModelList_OnlyLocalModels_CloudRecsAtBottom(t *testing.T) {
{Name: "qwen2.5:latest", Remote: false},
}
items, _, _, _ := buildModelList(existing, nil, "")
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
got := names(items)
// Recommended pinned at top (local recs first, then cloud recs when only-local), then installed non-recs
@@ -350,7 +372,7 @@ func TestBuildModelList_BothCloudAndLocal_RegularSort(t *testing.T) {
{Name: "glm-5.1:cloud", Remote: true},
}
items, _, _, _ := buildModelList(existing, nil, "")
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
got := names(items)
// All recs pinned at top (cloud before local in mixed case), then non-recs
@@ -366,7 +388,7 @@ func TestBuildModelList_PreCheckedFirst(t *testing.T) {
{Name: "glm-5.1:cloud", Remote: true},
}
items, _, _, _ := buildModelList(existing, []string{"llama3.2"}, "")
items, _, _, _ := buildModelList(existing, testRecommendedModels(), []string{"llama3.2"}, "")
got := names(items)
if got[0] != "llama3.2" {
@@ -383,7 +405,7 @@ func TestBuildModelList_CurrentDefaultFirstAmongCheckedNonRec(t *testing.T) {
// "zebra" is the current/default; all three are checked, none are recommended.
// Expected non-rec order: zebra (default), alpha, middle (alphabetical).
items, _, _, _ := buildModelList(existing, []string{"zebra", "alpha", "middle"}, "zebra")
items, _, _, _ := buildModelList(existing, testRecommendedModels(), []string{"zebra", "alpha", "middle"}, "zebra")
got := names(items)
// Skip recommended items to find the non-rec portion.
@@ -413,7 +435,7 @@ func TestBuildModelList_ExistingRecommendedMarked(t *testing.T) {
{Name: "glm-5.1:cloud", Remote: true},
}
items, _, _, _ := buildModelList(existing, nil, "")
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
for _, item := range items {
switch item.Name {
@@ -439,7 +461,7 @@ func TestBuildModelList_ExistingCloudModelsNotPushedToBottom(t *testing.T) {
{Name: "glm-5.1:cloud", Remote: true},
}
items, _, _, _ := buildModelList(existing, nil, "")
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
got := names(items)
// gemma4 and glm-5.1:cloud are installed so they sort normally;
@@ -457,7 +479,7 @@ func TestBuildModelList_HasRecommendedCloudModel_OnlyNonInstalledAtBottom(t *tes
{Name: "kimi-k2.5:cloud", Remote: true},
}
items, _, _, _ := buildModelList(existing, nil, "")
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
got := names(items)
// kimi-k2.5:cloud is installed so it sorts normally;
@@ -489,7 +511,7 @@ func TestBuildModelList_LatestTagStripped(t *testing.T) {
{Name: "llama3.2:latest", Remote: false},
}
items, _, existingModels, _ := buildModelList(existing, nil, "")
items, _, existingModels, _ := buildModelList(existing, testRecommendedModels(), nil, "")
got := names(items)
// :latest should be stripped from display names
@@ -522,7 +544,7 @@ func TestBuildModelList_ReturnsExistingAndCloudMaps(t *testing.T) {
{Name: "glm-5.1:cloud", Remote: true},
}
_, _, existingModels, cloudModels := buildModelList(existing, nil, "")
_, _, existingModels, cloudModels := buildModelList(existing, testRecommendedModels(), nil, "")
if !existingModels["llama3.2"] {
t.Error("llama3.2 should be in existingModels")
@@ -554,7 +576,7 @@ func TestBuildModelList_RecommendedFieldSet(t *testing.T) {
{Name: "llama3.2:latest", Remote: false},
}
items, _, _, _ := buildModelList(existing, nil, "")
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
for _, item := range items {
switch item.Name {
@@ -576,7 +598,7 @@ func TestBuildModelList_MixedCase_CloudRecsFirst(t *testing.T) {
{Name: "glm-5.1:cloud", Remote: true},
}
items, _, _, _ := buildModelList(existing, nil, "")
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
got := names(items)
// Cloud recs should sort before local recs in mixed case
@@ -592,7 +614,7 @@ func TestBuildModelList_OnlyLocal_LocalRecsFirst(t *testing.T) {
{Name: "llama3.2:latest", Remote: false},
}
items, _, _, _ := buildModelList(existing, nil, "")
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
got := names(items)
// Local recs should sort before cloud recs in only-local case
@@ -609,7 +631,7 @@ func TestBuildModelList_RecsAboveNonRecs(t *testing.T) {
{Name: "custom-model", Remote: false},
}
items, _, _, _ := buildModelList(existing, nil, "")
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
got := names(items)
// All recommended models should appear before non-recommended installed models
@@ -635,7 +657,7 @@ func TestBuildModelList_CheckedBeforeRecs(t *testing.T) {
{Name: "glm-5.1:cloud", Remote: true},
}
items, _, _, _ := buildModelList(existing, []string{"llama3.2"}, "")
items, _, _, _ := buildModelList(existing, testRecommendedModels(), []string{"llama3.2"}, "")
got := names(items)
if got[0] != "llama3.2" {
@@ -649,7 +671,7 @@ func TestBuildModelList_CurrentPrefersExactLocalOverCloudPrefix(t *testing.T) {
{Name: "qwen3.5", Remote: false},
}
_, orderedChecked, _, _ := buildModelList(existing, []string{"qwen3.5", "qwen3.5:cloud"}, "qwen3.5")
_, orderedChecked, _, _ := buildModelList(existing, testRecommendedModels(), []string{"qwen3.5", "qwen3.5:cloud"}, "qwen3.5")
if len(orderedChecked) < 2 {
t.Fatalf("expected orderedChecked to preserve both selections, got %v", orderedChecked)
}
@@ -664,7 +686,7 @@ func TestBuildModelList_CurrentPrefersExactCloudOverLocalPrefix(t *testing.T) {
{Name: "qwen3.5:cloud", Remote: true},
}
_, orderedChecked, _, _ := buildModelList(existing, []string{"qwen3.5:cloud", "qwen3.5"}, "qwen3.5:cloud")
_, orderedChecked, _, _ := buildModelList(existing, testRecommendedModels(), []string{"qwen3.5:cloud", "qwen3.5"}, "qwen3.5:cloud")
if len(orderedChecked) < 2 {
t.Fatalf("expected orderedChecked to preserve both selections, got %v", orderedChecked)
}
@@ -1510,7 +1532,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
existing := []modelInfo{
{Name: "qwen3.5", Remote: false},
}
items, _, _, _ := buildModelList(existing, nil, "")
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
for _, item := range items {
if item.Name == "qwen3.5" {
@@ -1527,7 +1549,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
})
t.Run("not-installed local rec has VRAM in description", func(t *testing.T) {
items, _, _, _ := buildModelList(nil, nil, "")
items, _, _, _ := buildModelList(nil, testRecommendedModels(), nil, "")
for _, item := range items {
if item.Name == "qwen3.5" {
@@ -1544,7 +1566,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
existing := []modelInfo{
{Name: "qwen3.5", Remote: false},
}
items, _, _, _ := buildModelList(existing, nil, "")
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
for _, item := range items {
if item.Name == "qwen3.5" {

View File

@@ -154,6 +154,7 @@ type ModelItem struct {
Name string
Description string
Recommended bool
VRAM string
}
// LaunchCmd returns the cobra command for launching integrations.
@@ -560,8 +561,9 @@ func (c *launcherClient) loadSelectableModels(ctx context.Context, preChecked []
return nil, nil, err
}
recs := fetchRecommendedModels(ctx, c.apiClient)
cloudDisabled, _ := cloudStatusDisabled(ctx, c.apiClient)
items, orderedChecked, _, _ := buildModelList(c.modelInventory, preChecked, current)
items, orderedChecked, _, _ := buildModelList(c.modelInventory, recs, preChecked, current)
if cloudDisabled {
items = filterCloudItems(items)
orderedChecked = c.filterDisabledCloudModels(ctx, orderedChecked)

View File

@@ -580,6 +580,8 @@ func TestResolveRunModel_UsesSignInHookForCloudModel(t *testing.T) {
case "/api/me":
w.WriteHeader(http.StatusUnauthorized)
fmt.Fprint(w, `{"error":"unauthorized","signin_url":"https://example.com/signin"}`)
case "/api/x/launch-models":
fmt.Fprint(w, testLaunchModelsJSON)
default:
http.NotFound(w, r)
}
@@ -708,6 +710,8 @@ func TestLaunchIntegration_EditorForceConfigure_FloatsCheckedModelsInPicker(t *t
fmt.Fprintf(w, `{"model":%q}`, req.Model)
case "/api/me":
fmt.Fprint(w, `{"name":"test-user"}`)
case "/api/x/launch-models":
fmt.Fprint(w, testLaunchModelsJSON)
default:
http.NotFound(w, r)
}
@@ -1126,6 +1130,8 @@ func TestLaunchIntegration_EditorConfigureMultiAllFailuresKeepsExistingAndSkipsL
default:
http.NotFound(w, r)
}
case "/api/x/launch-models":
fmt.Fprint(w, testLaunchModelsJSON)
default:
http.NotFound(w, r)
}

View File

@@ -4,12 +4,14 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net/http"
"os"
"os/exec"
"runtime"
"slices"
"strings"
"sync"
"time"
"github.com/ollama/ollama/api"
@@ -20,26 +22,15 @@ import (
"github.com/ollama/ollama/progress"
)
var recommendedModels = []ModelItem{
{Name: "kimi-k2.5:cloud", Description: "Multimodal reasoning with subagents", Recommended: true},
{Name: "qwen3.5:cloud", Description: "Reasoning, coding, and agentic tool use with vision", Recommended: true},
{Name: "glm-5.1:cloud", Description: "Reasoning and code generation", Recommended: true},
{Name: "minimax-m2.7:cloud", Description: "Fast, efficient coding and real-world productivity", Recommended: true},
{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true},
{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true},
}
var recommendedVRAM = map[string]string{
"gemma4": "~16GB",
"qwen3.5": "~11GB",
}
// cloudModelLimit holds context and output token limits for a cloud model.
type cloudModelLimit struct {
Context int
Output int
}
// cloudModelLimitsMu guards concurrent access to cloudModelLimits.
var cloudModelLimitsMu sync.RWMutex
// cloudModelLimits maps cloud model base names to their token limits.
// TODO(parthsareen): grab context/output limits from model info instead of hardcoding
var cloudModelLimits = map[string]cloudModelLimit{
@@ -69,13 +60,53 @@ var cloudModelLimits = map[string]cloudModelLimit{
func lookupCloudModelLimit(name string) (cloudModelLimit, bool) {
base, stripped := modelref.StripCloudSourceTag(name)
if stripped {
if l, ok := cloudModelLimits[base]; ok {
cloudModelLimitsMu.RLock()
l, ok := cloudModelLimits[base]
cloudModelLimitsMu.RUnlock()
if ok {
return l, true
}
}
return cloudModelLimit{}, false
}
// defaultRecommendedModels is the client-side fallback used when the server
// endpoint is unreachable (e.g. older server, network error).
var defaultRecommendedModels = []ModelItem{
{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true, VRAM: "~16GB"},
{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true, VRAM: "~11GB"},
}
// fetchRecommendedModels fetches the recommended models from the local Ollama
// server's launch-models endpoint. The server caches models from the remote
// registry and merges them with built-in local defaults, so this call is fast.
// Falls back to a built-in default list if the server is unavailable.
func fetchRecommendedModels(ctx context.Context, client *api.Client) []ModelItem {
resp, err := client.LaunchModels(ctx)
if err != nil {
slog.Warn("failed to load launch models from server, using defaults", "error", err)
return defaultRecommendedModels
}
var items []ModelItem
cloudModelLimitsMu.Lock()
for _, m := range resp.Models {
if m.ContextLength > 0 || m.MaxOutputTokens > 0 {
cloudModelLimits[m.Model] = cloudModelLimit{
Context: m.ContextLength,
Output: m.MaxOutputTokens,
}
}
items = append(items, ModelItem{
Name: m.Model,
Description: m.Description,
Recommended: true,
VRAM: m.VRAM,
})
}
cloudModelLimitsMu.Unlock()
return items
}
// missingModelPolicy controls how model-not-found errors should be handled.
type missingModelPolicy int
@@ -260,16 +291,19 @@ func confirmEditorEdit(runner Runner, editor Editor) (bool, error) {
}
// buildModelList merges existing models with recommendations for selection UIs.
func buildModelList(existing []modelInfo, preChecked []string, current string) (items []ModelItem, orderedChecked []string, existingModels, cloudModels map[string]bool) {
func buildModelList(existing []modelInfo, recs []ModelItem, preChecked []string, current string) (items []ModelItem, orderedChecked []string, existingModels, cloudModels map[string]bool) {
existingModels = make(map[string]bool)
cloudModels = make(map[string]bool)
recommended := make(map[string]bool)
var hasLocalModel, hasCloudModel bool
recDesc := make(map[string]string)
for _, rec := range recommendedModels {
recVRAM := make(map[string]string)
for _, rec := range recs {
recommended[rec.Name] = true
recDesc[rec.Name] = rec.Description
if rec.VRAM != "" {
recVRAM[rec.Name] = rec.VRAM
}
}
for _, m := range existing {
@@ -282,11 +316,11 @@ func buildModelList(existing []modelInfo, preChecked []string, current string) (
}
displayName := strings.TrimSuffix(m.Name, ":latest")
existingModels[displayName] = true
item := ModelItem{Name: displayName, Recommended: recommended[displayName], Description: recDesc[displayName]}
item := ModelItem{Name: displayName, Recommended: recommended[displayName], Description: recDesc[displayName], VRAM: recVRAM[displayName]}
items = append(items, item)
}
for _, rec := range recommendedModels {
for _, rec := range recs {
if existingModels[rec.Name] || existingModels[rec.Name+":latest"] {
continue
}
@@ -332,8 +366,8 @@ func buildModelList(existing []modelInfo, preChecked []string, current string) (
if items[i].Description != "" {
parts = append(parts, items[i].Description)
}
if vram := recommendedVRAM[items[i].Name]; vram != "" {
parts = append(parts, vram)
if items[i].VRAM != "" {
parts = append(parts, items[i].VRAM)
}
parts = append(parts, "(not downloaded)")
items[i].Description = strings.Join(parts, ", ")
@@ -341,7 +375,7 @@ func buildModelList(existing []modelInfo, preChecked []string, current string) (
}
recRank := make(map[string]int)
for i, rec := range recommendedModels {
for i, rec := range recs {
recRank[rec.Name] = i + 1
}

View File

@@ -102,6 +102,7 @@ type Server struct {
sched *Scheduler
defaultNumCtx int
requestLogger *inferenceRequestLogger
launchModels atomic.Pointer[api.LaunchModelsResponse]
}
func init() {
@@ -1706,6 +1707,7 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
r.POST("/api/copy", s.CopyHandler)
r.POST("/api/experimental/web_search", s.WebSearchExperimentalHandler)
r.POST("/api/experimental/web_fetch", s.WebFetchExperimentalHandler)
r.GET("/api/x/launch-models", s.LaunchModelsHandler)
// Inference
r.GET("/api/ps", s.PsHandler)
@@ -1803,6 +1805,11 @@ func Serve(ln net.Listener) error {
http.Handle("/", h)
ctx, done := context.WithCancel(context.Background())
if !envconfig.NoCloud() {
go s.refreshLaunchModels(ctx)
}
schedCtx, schedDone := context.WithCancel(ctx)
sched := InitScheduler(schedCtx)
s.sched = sched
@@ -2778,3 +2785,86 @@ func (s *Server) handleImageGenerate(c *gin.Context, req api.GenerateRequest, mo
c.JSON(http.StatusOK, finalResponse)
}
}
const launchModelsURL = "https://ollama.com/api/x/launch-models"
// defaultLaunchModels are local models that are always recommended.
var defaultLaunchModels = api.LaunchModelsResponse{
Models: []api.LaunchModel{
{Model: "gemma4", Description: "Reasoning and code generation locally", VRAM: "~16GB"},
{Model: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", VRAM: "~11GB"},
},
}
// LaunchModelsHandler returns the cached recommended launch models.
func (s *Server) LaunchModelsHandler(c *gin.Context) {
models := s.launchModels.Load()
if models == nil {
c.JSON(http.StatusOK, &defaultLaunchModels)
return
}
c.JSON(http.StatusOK, models)
}
// refreshLaunchModels fetches the recommended launch models from the remote
// server on startup and then refreshes every 24 hours. It merges remote models
// with the built-in local defaults before caching.
func (s *Server) refreshLaunchModels(ctx context.Context) {
s.fetchLaunchModels(ctx)
ticker := time.NewTicker(24 * time.Hour)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
s.fetchLaunchModels(ctx)
}
}
}
func (s *Server) fetchLaunchModels(ctx context.Context) {
fetchCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(fetchCtx, http.MethodGet, launchModelsURL, nil)
if err != nil {
slog.Warn("failed to create launch models request", "error", err)
return
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
slog.Warn("failed to fetch launch models", "error", err)
return
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
slog.Warn("unexpected status fetching launch models", "status", resp.StatusCode)
return
}
var remote api.LaunchModelsResponse
if err := json.NewDecoder(resp.Body).Decode(&remote); err != nil {
slog.Warn("failed to decode launch models", "error", err)
return
}
// Merge: remote models first, then append any default local models
// not already present in the remote response.
remoteNames := make(map[string]bool, len(remote.Models))
for _, m := range remote.Models {
remoteNames[m.Model] = true
}
for _, m := range defaultLaunchModels.Models {
if !remoteNames[m.Model] {
remote.Models = append(remote.Models, m)
}
}
s.launchModels.Store(&remote)
slog.Info("cached launch models", "count", len(remote.Models))
}