mirror of
https://github.com/ollama/ollama.git
synced 2026-04-18 09:03:35 -04:00
launch: fetch recommended models from server endpoint
Add /api/x/launch-models endpoint that serves recommended models from the registry (ollama.com) with a 24h cache, merged with built-in local defaults. The client fetches from this endpoint instead of using a hardcoded list, with a client-side fallback for older servers.
This commit is contained in:
@@ -469,6 +469,15 @@ func (c *Client) Disconnect(ctx context.Context, encodedKey string) error {
|
||||
return c.do(ctx, http.MethodDelete, fmt.Sprintf("/api/user/keys/%s", encodedKey), nil, nil)
|
||||
}
|
||||
|
||||
// LaunchModels returns the recommended models for ollama launch.
|
||||
func (c *Client) LaunchModels(ctx context.Context) (*LaunchModelsResponse, error) {
|
||||
var resp LaunchModelsResponse
|
||||
if err := c.do(ctx, http.MethodGet, "/api/x/launch-models", nil, &resp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &resp, nil
|
||||
}
|
||||
|
||||
func (c *Client) Whoami(ctx context.Context) (*UserResponse, error) {
|
||||
var resp UserResponse
|
||||
if err := c.do(ctx, http.MethodPost, "/api/me", nil, &resp); err != nil {
|
||||
|
||||
14
api/types.go
14
api/types.go
@@ -923,6 +923,20 @@ type UserResponse struct {
|
||||
Plan string `json:"plan,omitempty"`
|
||||
}
|
||||
|
||||
// LaunchModel describes a recommended model returned by the launch-models endpoint.
|
||||
type LaunchModel struct {
|
||||
Model string `json:"model"`
|
||||
Description string `json:"description"`
|
||||
ContextLength int `json:"context_length"`
|
||||
MaxOutputTokens int `json:"max_output_tokens"`
|
||||
VRAM string `json:"vram"`
|
||||
}
|
||||
|
||||
// LaunchModelsResponse is the response from the launch-models endpoint.
|
||||
type LaunchModelsResponse struct {
|
||||
Models []LaunchModel `json:"models"`
|
||||
}
|
||||
|
||||
// Tensor describes the metadata for a given tensor.
|
||||
type Tensor struct {
|
||||
Name string `json:"name"`
|
||||
|
||||
@@ -299,6 +299,28 @@ func TestIsCloudModel(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
const testLaunchModelsJSON = `{"models":[
|
||||
{"model":"kimi-k2.5:cloud","description":"Multimodal reasoning with subagents","context_length":262144,"max_output_tokens":262144,"vram":""},
|
||||
{"model":"qwen3.5:cloud","description":"Reasoning, coding, and agentic tool use with vision","context_length":262144,"max_output_tokens":32768,"vram":""},
|
||||
{"model":"glm-5.1:cloud","description":"Reasoning and code generation","context_length":202752,"max_output_tokens":131072,"vram":""},
|
||||
{"model":"minimax-m2.7:cloud","description":"Fast, efficient coding and real-world productivity","context_length":204800,"max_output_tokens":128000,"vram":""},
|
||||
{"model":"gemma4","description":"Reasoning and code generation locally","context_length":0,"max_output_tokens":0,"vram":"~16GB"},
|
||||
{"model":"qwen3.5","description":"Reasoning, coding, and visual understanding locally","context_length":0,"max_output_tokens":0,"vram":"~11GB"}
|
||||
]}`
|
||||
|
||||
// testRecommendedModels returns the full cloud+local recommended models list
|
||||
// for use in tests.
|
||||
func testRecommendedModels() []ModelItem {
|
||||
return []ModelItem{
|
||||
{Name: "kimi-k2.5:cloud", Description: "Multimodal reasoning with subagents", Recommended: true},
|
||||
{Name: "qwen3.5:cloud", Description: "Reasoning, coding, and agentic tool use with vision", Recommended: true},
|
||||
{Name: "glm-5.1:cloud", Description: "Reasoning and code generation", Recommended: true},
|
||||
{Name: "minimax-m2.7:cloud", Description: "Fast, efficient coding and real-world productivity", Recommended: true},
|
||||
{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true, VRAM: "~16GB"},
|
||||
{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true, VRAM: "~11GB"},
|
||||
}
|
||||
}
|
||||
|
||||
func names(items []ModelItem) []string {
|
||||
var out []string
|
||||
for _, item := range items {
|
||||
@@ -308,7 +330,7 @@ func names(items []ModelItem) []string {
|
||||
}
|
||||
|
||||
func TestBuildModelList_NoExistingModels(t *testing.T) {
|
||||
items, _, _, _ := buildModelList(nil, nil, "")
|
||||
items, _, _, _ := buildModelList(nil, testRecommendedModels(), nil, "")
|
||||
|
||||
want := []string{"kimi-k2.5:cloud", "qwen3.5:cloud", "glm-5.1:cloud", "minimax-m2.7:cloud", "gemma4", "qwen3.5"}
|
||||
if diff := cmp.Diff(want, names(items)); diff != "" {
|
||||
@@ -334,7 +356,7 @@ func TestBuildModelList_OnlyLocalModels_CloudRecsAtBottom(t *testing.T) {
|
||||
{Name: "qwen2.5:latest", Remote: false},
|
||||
}
|
||||
|
||||
items, _, _, _ := buildModelList(existing, nil, "")
|
||||
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
|
||||
got := names(items)
|
||||
|
||||
// Recommended pinned at top (local recs first, then cloud recs when only-local), then installed non-recs
|
||||
@@ -350,7 +372,7 @@ func TestBuildModelList_BothCloudAndLocal_RegularSort(t *testing.T) {
|
||||
{Name: "glm-5.1:cloud", Remote: true},
|
||||
}
|
||||
|
||||
items, _, _, _ := buildModelList(existing, nil, "")
|
||||
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
|
||||
got := names(items)
|
||||
|
||||
// All recs pinned at top (cloud before local in mixed case), then non-recs
|
||||
@@ -366,7 +388,7 @@ func TestBuildModelList_PreCheckedFirst(t *testing.T) {
|
||||
{Name: "glm-5.1:cloud", Remote: true},
|
||||
}
|
||||
|
||||
items, _, _, _ := buildModelList(existing, []string{"llama3.2"}, "")
|
||||
items, _, _, _ := buildModelList(existing, testRecommendedModels(), []string{"llama3.2"}, "")
|
||||
got := names(items)
|
||||
|
||||
if got[0] != "llama3.2" {
|
||||
@@ -383,7 +405,7 @@ func TestBuildModelList_CurrentDefaultFirstAmongCheckedNonRec(t *testing.T) {
|
||||
|
||||
// "zebra" is the current/default; all three are checked, none are recommended.
|
||||
// Expected non-rec order: zebra (default), alpha, middle (alphabetical).
|
||||
items, _, _, _ := buildModelList(existing, []string{"zebra", "alpha", "middle"}, "zebra")
|
||||
items, _, _, _ := buildModelList(existing, testRecommendedModels(), []string{"zebra", "alpha", "middle"}, "zebra")
|
||||
got := names(items)
|
||||
|
||||
// Skip recommended items to find the non-rec portion.
|
||||
@@ -413,7 +435,7 @@ func TestBuildModelList_ExistingRecommendedMarked(t *testing.T) {
|
||||
{Name: "glm-5.1:cloud", Remote: true},
|
||||
}
|
||||
|
||||
items, _, _, _ := buildModelList(existing, nil, "")
|
||||
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
|
||||
|
||||
for _, item := range items {
|
||||
switch item.Name {
|
||||
@@ -439,7 +461,7 @@ func TestBuildModelList_ExistingCloudModelsNotPushedToBottom(t *testing.T) {
|
||||
{Name: "glm-5.1:cloud", Remote: true},
|
||||
}
|
||||
|
||||
items, _, _, _ := buildModelList(existing, nil, "")
|
||||
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
|
||||
got := names(items)
|
||||
|
||||
// gemma4 and glm-5.1:cloud are installed so they sort normally;
|
||||
@@ -457,7 +479,7 @@ func TestBuildModelList_HasRecommendedCloudModel_OnlyNonInstalledAtBottom(t *tes
|
||||
{Name: "kimi-k2.5:cloud", Remote: true},
|
||||
}
|
||||
|
||||
items, _, _, _ := buildModelList(existing, nil, "")
|
||||
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
|
||||
got := names(items)
|
||||
|
||||
// kimi-k2.5:cloud is installed so it sorts normally;
|
||||
@@ -489,7 +511,7 @@ func TestBuildModelList_LatestTagStripped(t *testing.T) {
|
||||
{Name: "llama3.2:latest", Remote: false},
|
||||
}
|
||||
|
||||
items, _, existingModels, _ := buildModelList(existing, nil, "")
|
||||
items, _, existingModels, _ := buildModelList(existing, testRecommendedModels(), nil, "")
|
||||
got := names(items)
|
||||
|
||||
// :latest should be stripped from display names
|
||||
@@ -522,7 +544,7 @@ func TestBuildModelList_ReturnsExistingAndCloudMaps(t *testing.T) {
|
||||
{Name: "glm-5.1:cloud", Remote: true},
|
||||
}
|
||||
|
||||
_, _, existingModels, cloudModels := buildModelList(existing, nil, "")
|
||||
_, _, existingModels, cloudModels := buildModelList(existing, testRecommendedModels(), nil, "")
|
||||
|
||||
if !existingModels["llama3.2"] {
|
||||
t.Error("llama3.2 should be in existingModels")
|
||||
@@ -554,7 +576,7 @@ func TestBuildModelList_RecommendedFieldSet(t *testing.T) {
|
||||
{Name: "llama3.2:latest", Remote: false},
|
||||
}
|
||||
|
||||
items, _, _, _ := buildModelList(existing, nil, "")
|
||||
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
|
||||
|
||||
for _, item := range items {
|
||||
switch item.Name {
|
||||
@@ -576,7 +598,7 @@ func TestBuildModelList_MixedCase_CloudRecsFirst(t *testing.T) {
|
||||
{Name: "glm-5.1:cloud", Remote: true},
|
||||
}
|
||||
|
||||
items, _, _, _ := buildModelList(existing, nil, "")
|
||||
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
|
||||
got := names(items)
|
||||
|
||||
// Cloud recs should sort before local recs in mixed case
|
||||
@@ -592,7 +614,7 @@ func TestBuildModelList_OnlyLocal_LocalRecsFirst(t *testing.T) {
|
||||
{Name: "llama3.2:latest", Remote: false},
|
||||
}
|
||||
|
||||
items, _, _, _ := buildModelList(existing, nil, "")
|
||||
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
|
||||
got := names(items)
|
||||
|
||||
// Local recs should sort before cloud recs in only-local case
|
||||
@@ -609,7 +631,7 @@ func TestBuildModelList_RecsAboveNonRecs(t *testing.T) {
|
||||
{Name: "custom-model", Remote: false},
|
||||
}
|
||||
|
||||
items, _, _, _ := buildModelList(existing, nil, "")
|
||||
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
|
||||
got := names(items)
|
||||
|
||||
// All recommended models should appear before non-recommended installed models
|
||||
@@ -635,7 +657,7 @@ func TestBuildModelList_CheckedBeforeRecs(t *testing.T) {
|
||||
{Name: "glm-5.1:cloud", Remote: true},
|
||||
}
|
||||
|
||||
items, _, _, _ := buildModelList(existing, []string{"llama3.2"}, "")
|
||||
items, _, _, _ := buildModelList(existing, testRecommendedModels(), []string{"llama3.2"}, "")
|
||||
got := names(items)
|
||||
|
||||
if got[0] != "llama3.2" {
|
||||
@@ -649,7 +671,7 @@ func TestBuildModelList_CurrentPrefersExactLocalOverCloudPrefix(t *testing.T) {
|
||||
{Name: "qwen3.5", Remote: false},
|
||||
}
|
||||
|
||||
_, orderedChecked, _, _ := buildModelList(existing, []string{"qwen3.5", "qwen3.5:cloud"}, "qwen3.5")
|
||||
_, orderedChecked, _, _ := buildModelList(existing, testRecommendedModels(), []string{"qwen3.5", "qwen3.5:cloud"}, "qwen3.5")
|
||||
if len(orderedChecked) < 2 {
|
||||
t.Fatalf("expected orderedChecked to preserve both selections, got %v", orderedChecked)
|
||||
}
|
||||
@@ -664,7 +686,7 @@ func TestBuildModelList_CurrentPrefersExactCloudOverLocalPrefix(t *testing.T) {
|
||||
{Name: "qwen3.5:cloud", Remote: true},
|
||||
}
|
||||
|
||||
_, orderedChecked, _, _ := buildModelList(existing, []string{"qwen3.5:cloud", "qwen3.5"}, "qwen3.5:cloud")
|
||||
_, orderedChecked, _, _ := buildModelList(existing, testRecommendedModels(), []string{"qwen3.5:cloud", "qwen3.5"}, "qwen3.5:cloud")
|
||||
if len(orderedChecked) < 2 {
|
||||
t.Fatalf("expected orderedChecked to preserve both selections, got %v", orderedChecked)
|
||||
}
|
||||
@@ -1510,7 +1532,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
|
||||
existing := []modelInfo{
|
||||
{Name: "qwen3.5", Remote: false},
|
||||
}
|
||||
items, _, _, _ := buildModelList(existing, nil, "")
|
||||
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
|
||||
|
||||
for _, item := range items {
|
||||
if item.Name == "qwen3.5" {
|
||||
@@ -1527,7 +1549,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("not-installed local rec has VRAM in description", func(t *testing.T) {
|
||||
items, _, _, _ := buildModelList(nil, nil, "")
|
||||
items, _, _, _ := buildModelList(nil, testRecommendedModels(), nil, "")
|
||||
|
||||
for _, item := range items {
|
||||
if item.Name == "qwen3.5" {
|
||||
@@ -1544,7 +1566,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
|
||||
existing := []modelInfo{
|
||||
{Name: "qwen3.5", Remote: false},
|
||||
}
|
||||
items, _, _, _ := buildModelList(existing, nil, "")
|
||||
items, _, _, _ := buildModelList(existing, testRecommendedModels(), nil, "")
|
||||
|
||||
for _, item := range items {
|
||||
if item.Name == "qwen3.5" {
|
||||
|
||||
@@ -154,6 +154,7 @@ type ModelItem struct {
|
||||
Name string
|
||||
Description string
|
||||
Recommended bool
|
||||
VRAM string
|
||||
}
|
||||
|
||||
// LaunchCmd returns the cobra command for launching integrations.
|
||||
@@ -560,8 +561,9 @@ func (c *launcherClient) loadSelectableModels(ctx context.Context, preChecked []
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
recs := fetchRecommendedModels(ctx, c.apiClient)
|
||||
cloudDisabled, _ := cloudStatusDisabled(ctx, c.apiClient)
|
||||
items, orderedChecked, _, _ := buildModelList(c.modelInventory, preChecked, current)
|
||||
items, orderedChecked, _, _ := buildModelList(c.modelInventory, recs, preChecked, current)
|
||||
if cloudDisabled {
|
||||
items = filterCloudItems(items)
|
||||
orderedChecked = c.filterDisabledCloudModels(ctx, orderedChecked)
|
||||
|
||||
@@ -580,6 +580,8 @@ func TestResolveRunModel_UsesSignInHookForCloudModel(t *testing.T) {
|
||||
case "/api/me":
|
||||
w.WriteHeader(http.StatusUnauthorized)
|
||||
fmt.Fprint(w, `{"error":"unauthorized","signin_url":"https://example.com/signin"}`)
|
||||
case "/api/x/launch-models":
|
||||
fmt.Fprint(w, testLaunchModelsJSON)
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
@@ -708,6 +710,8 @@ func TestLaunchIntegration_EditorForceConfigure_FloatsCheckedModelsInPicker(t *t
|
||||
fmt.Fprintf(w, `{"model":%q}`, req.Model)
|
||||
case "/api/me":
|
||||
fmt.Fprint(w, `{"name":"test-user"}`)
|
||||
case "/api/x/launch-models":
|
||||
fmt.Fprint(w, testLaunchModelsJSON)
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
@@ -1126,6 +1130,8 @@ func TestLaunchIntegration_EditorConfigureMultiAllFailuresKeepsExistingAndSkipsL
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
case "/api/x/launch-models":
|
||||
fmt.Fprint(w, testLaunchModelsJSON)
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
|
||||
@@ -4,12 +4,14 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
@@ -20,26 +22,15 @@ import (
|
||||
"github.com/ollama/ollama/progress"
|
||||
)
|
||||
|
||||
var recommendedModels = []ModelItem{
|
||||
{Name: "kimi-k2.5:cloud", Description: "Multimodal reasoning with subagents", Recommended: true},
|
||||
{Name: "qwen3.5:cloud", Description: "Reasoning, coding, and agentic tool use with vision", Recommended: true},
|
||||
{Name: "glm-5.1:cloud", Description: "Reasoning and code generation", Recommended: true},
|
||||
{Name: "minimax-m2.7:cloud", Description: "Fast, efficient coding and real-world productivity", Recommended: true},
|
||||
{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true},
|
||||
{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true},
|
||||
}
|
||||
|
||||
var recommendedVRAM = map[string]string{
|
||||
"gemma4": "~16GB",
|
||||
"qwen3.5": "~11GB",
|
||||
}
|
||||
|
||||
// cloudModelLimit holds context and output token limits for a cloud model.
|
||||
type cloudModelLimit struct {
|
||||
Context int
|
||||
Output int
|
||||
}
|
||||
|
||||
// cloudModelLimitsMu guards concurrent access to cloudModelLimits.
|
||||
var cloudModelLimitsMu sync.RWMutex
|
||||
|
||||
// cloudModelLimits maps cloud model base names to their token limits.
|
||||
// TODO(parthsareen): grab context/output limits from model info instead of hardcoding
|
||||
var cloudModelLimits = map[string]cloudModelLimit{
|
||||
@@ -69,13 +60,53 @@ var cloudModelLimits = map[string]cloudModelLimit{
|
||||
func lookupCloudModelLimit(name string) (cloudModelLimit, bool) {
|
||||
base, stripped := modelref.StripCloudSourceTag(name)
|
||||
if stripped {
|
||||
if l, ok := cloudModelLimits[base]; ok {
|
||||
cloudModelLimitsMu.RLock()
|
||||
l, ok := cloudModelLimits[base]
|
||||
cloudModelLimitsMu.RUnlock()
|
||||
if ok {
|
||||
return l, true
|
||||
}
|
||||
}
|
||||
return cloudModelLimit{}, false
|
||||
}
|
||||
|
||||
// defaultRecommendedModels is the client-side fallback used when the server
|
||||
// endpoint is unreachable (e.g. older server, network error).
|
||||
var defaultRecommendedModels = []ModelItem{
|
||||
{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true, VRAM: "~16GB"},
|
||||
{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true, VRAM: "~11GB"},
|
||||
}
|
||||
|
||||
// fetchRecommendedModels fetches the recommended models from the local Ollama
|
||||
// server's launch-models endpoint. The server caches models from the remote
|
||||
// registry and merges them with built-in local defaults, so this call is fast.
|
||||
// Falls back to a built-in default list if the server is unavailable.
|
||||
func fetchRecommendedModels(ctx context.Context, client *api.Client) []ModelItem {
|
||||
resp, err := client.LaunchModels(ctx)
|
||||
if err != nil {
|
||||
slog.Warn("failed to load launch models from server, using defaults", "error", err)
|
||||
return defaultRecommendedModels
|
||||
}
|
||||
var items []ModelItem
|
||||
cloudModelLimitsMu.Lock()
|
||||
for _, m := range resp.Models {
|
||||
if m.ContextLength > 0 || m.MaxOutputTokens > 0 {
|
||||
cloudModelLimits[m.Model] = cloudModelLimit{
|
||||
Context: m.ContextLength,
|
||||
Output: m.MaxOutputTokens,
|
||||
}
|
||||
}
|
||||
items = append(items, ModelItem{
|
||||
Name: m.Model,
|
||||
Description: m.Description,
|
||||
Recommended: true,
|
||||
VRAM: m.VRAM,
|
||||
})
|
||||
}
|
||||
cloudModelLimitsMu.Unlock()
|
||||
return items
|
||||
}
|
||||
|
||||
// missingModelPolicy controls how model-not-found errors should be handled.
|
||||
type missingModelPolicy int
|
||||
|
||||
@@ -260,16 +291,19 @@ func confirmEditorEdit(runner Runner, editor Editor) (bool, error) {
|
||||
}
|
||||
|
||||
// buildModelList merges existing models with recommendations for selection UIs.
|
||||
func buildModelList(existing []modelInfo, preChecked []string, current string) (items []ModelItem, orderedChecked []string, existingModels, cloudModels map[string]bool) {
|
||||
func buildModelList(existing []modelInfo, recs []ModelItem, preChecked []string, current string) (items []ModelItem, orderedChecked []string, existingModels, cloudModels map[string]bool) {
|
||||
existingModels = make(map[string]bool)
|
||||
cloudModels = make(map[string]bool)
|
||||
recommended := make(map[string]bool)
|
||||
var hasLocalModel, hasCloudModel bool
|
||||
|
||||
recDesc := make(map[string]string)
|
||||
for _, rec := range recommendedModels {
|
||||
recVRAM := make(map[string]string)
|
||||
for _, rec := range recs {
|
||||
recommended[rec.Name] = true
|
||||
recDesc[rec.Name] = rec.Description
|
||||
if rec.VRAM != "" {
|
||||
recVRAM[rec.Name] = rec.VRAM
|
||||
}
|
||||
}
|
||||
|
||||
for _, m := range existing {
|
||||
@@ -282,11 +316,11 @@ func buildModelList(existing []modelInfo, preChecked []string, current string) (
|
||||
}
|
||||
displayName := strings.TrimSuffix(m.Name, ":latest")
|
||||
existingModels[displayName] = true
|
||||
item := ModelItem{Name: displayName, Recommended: recommended[displayName], Description: recDesc[displayName]}
|
||||
item := ModelItem{Name: displayName, Recommended: recommended[displayName], Description: recDesc[displayName], VRAM: recVRAM[displayName]}
|
||||
items = append(items, item)
|
||||
}
|
||||
|
||||
for _, rec := range recommendedModels {
|
||||
for _, rec := range recs {
|
||||
if existingModels[rec.Name] || existingModels[rec.Name+":latest"] {
|
||||
continue
|
||||
}
|
||||
@@ -332,8 +366,8 @@ func buildModelList(existing []modelInfo, preChecked []string, current string) (
|
||||
if items[i].Description != "" {
|
||||
parts = append(parts, items[i].Description)
|
||||
}
|
||||
if vram := recommendedVRAM[items[i].Name]; vram != "" {
|
||||
parts = append(parts, vram)
|
||||
if items[i].VRAM != "" {
|
||||
parts = append(parts, items[i].VRAM)
|
||||
}
|
||||
parts = append(parts, "(not downloaded)")
|
||||
items[i].Description = strings.Join(parts, ", ")
|
||||
@@ -341,7 +375,7 @@ func buildModelList(existing []modelInfo, preChecked []string, current string) (
|
||||
}
|
||||
|
||||
recRank := make(map[string]int)
|
||||
for i, rec := range recommendedModels {
|
||||
for i, rec := range recs {
|
||||
recRank[rec.Name] = i + 1
|
||||
}
|
||||
|
||||
|
||||
@@ -102,6 +102,7 @@ type Server struct {
|
||||
sched *Scheduler
|
||||
defaultNumCtx int
|
||||
requestLogger *inferenceRequestLogger
|
||||
launchModels atomic.Pointer[api.LaunchModelsResponse]
|
||||
}
|
||||
|
||||
func init() {
|
||||
@@ -1706,6 +1707,7 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
|
||||
r.POST("/api/copy", s.CopyHandler)
|
||||
r.POST("/api/experimental/web_search", s.WebSearchExperimentalHandler)
|
||||
r.POST("/api/experimental/web_fetch", s.WebFetchExperimentalHandler)
|
||||
r.GET("/api/x/launch-models", s.LaunchModelsHandler)
|
||||
|
||||
// Inference
|
||||
r.GET("/api/ps", s.PsHandler)
|
||||
@@ -1803,6 +1805,11 @@ func Serve(ln net.Listener) error {
|
||||
http.Handle("/", h)
|
||||
|
||||
ctx, done := context.WithCancel(context.Background())
|
||||
|
||||
if !envconfig.NoCloud() {
|
||||
go s.refreshLaunchModels(ctx)
|
||||
}
|
||||
|
||||
schedCtx, schedDone := context.WithCancel(ctx)
|
||||
sched := InitScheduler(schedCtx)
|
||||
s.sched = sched
|
||||
@@ -2778,3 +2785,86 @@ func (s *Server) handleImageGenerate(c *gin.Context, req api.GenerateRequest, mo
|
||||
c.JSON(http.StatusOK, finalResponse)
|
||||
}
|
||||
}
|
||||
|
||||
const launchModelsURL = "https://ollama.com/api/x/launch-models"
|
||||
|
||||
// defaultLaunchModels are local models that are always recommended.
|
||||
var defaultLaunchModels = api.LaunchModelsResponse{
|
||||
Models: []api.LaunchModel{
|
||||
{Model: "gemma4", Description: "Reasoning and code generation locally", VRAM: "~16GB"},
|
||||
{Model: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", VRAM: "~11GB"},
|
||||
},
|
||||
}
|
||||
|
||||
// LaunchModelsHandler returns the cached recommended launch models.
|
||||
func (s *Server) LaunchModelsHandler(c *gin.Context) {
|
||||
models := s.launchModels.Load()
|
||||
if models == nil {
|
||||
c.JSON(http.StatusOK, &defaultLaunchModels)
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusOK, models)
|
||||
}
|
||||
|
||||
// refreshLaunchModels fetches the recommended launch models from the remote
|
||||
// server on startup and then refreshes every 24 hours. It merges remote models
|
||||
// with the built-in local defaults before caching.
|
||||
func (s *Server) refreshLaunchModels(ctx context.Context) {
|
||||
s.fetchLaunchModels(ctx)
|
||||
|
||||
ticker := time.NewTicker(24 * time.Hour)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
s.fetchLaunchModels(ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) fetchLaunchModels(ctx context.Context) {
|
||||
fetchCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(fetchCtx, http.MethodGet, launchModelsURL, nil)
|
||||
if err != nil {
|
||||
slog.Warn("failed to create launch models request", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
slog.Warn("failed to fetch launch models", "error", err)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
slog.Warn("unexpected status fetching launch models", "status", resp.StatusCode)
|
||||
return
|
||||
}
|
||||
|
||||
var remote api.LaunchModelsResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&remote); err != nil {
|
||||
slog.Warn("failed to decode launch models", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Merge: remote models first, then append any default local models
|
||||
// not already present in the remote response.
|
||||
remoteNames := make(map[string]bool, len(remote.Models))
|
||||
for _, m := range remote.Models {
|
||||
remoteNames[m.Model] = true
|
||||
}
|
||||
for _, m := range defaultLaunchModels.Models {
|
||||
if !remoteNames[m.Model] {
|
||||
remote.Models = append(remote.Models, m)
|
||||
}
|
||||
}
|
||||
|
||||
s.launchModels.Store(&remote)
|
||||
slog.Info("cached launch models", "count", len(remote.Models))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user