Update vendor ggml code to a5bb8ba4 (#13832)

Co-authored-by: Daniel Hiltgen <daniel@ollama.com>
Co-authored-by: Gabe Goodhart <ghart@us.ibm.com>
Co-authored-by: Shalini Salomi Bodapati <Shalini.Salomi.Bodapati@ibm.com>
This commit is contained in:
Jeffrey Morgan
2026-02-02 17:31:59 -08:00
committed by GitHub
parent 8f4a008139
commit ef00199fb4
241 changed files with 21271 additions and 5074 deletions

View File

@@ -26,10 +26,16 @@ llm_build_smallthinker<iswa>::llm_build_smallthinker(const llama_model & model,
ggml_tensor * inp_out_ids = build_inp_out_ids();
for (int il = 0; il < n_layer; ++il) {
ggml_tensor * inpSA = inpL;
ggml_tensor * probs = nullptr;
const float freq_base_l = model.get_rope_freq_base (cparams, il);
const float freq_scale_l = model.get_rope_freq_scale(cparams, il);
probs = build_lora_mm(model.layers[il].ffn_gate_inp, inpL); // [n_expert, n_tokens]
ggml_tensor * inpSA = inpL;
// This overlaps with SWA layers in current models, so get_rope_freq_base/scale may be superfluous
const bool use_rope = hparams.n_no_rope_layer_step == n_layer ||
il % hparams.n_no_rope_layer_step != 0;
ggml_tensor * probs = build_lora_mm(model.layers[il].ffn_gate_inp, inpL); // [n_expert, n_tokens]
cb(probs, "ffn_moe_logits", il);
// norm
@@ -52,11 +58,11 @@ llm_build_smallthinker<iswa>::llm_build_smallthinker(const llama_model & model,
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
if (hparams.n_no_rope_layer_step == n_layer || il % hparams.n_no_rope_layer_step != 0) {
Qcur = ggml_rope_ext(ctx0, Qcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
if (use_rope) {
Qcur = ggml_rope_ext(ctx0, Qcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base_l, freq_scale_l,
ext_factor, attn_factor, beta_fast, beta_slow);
Kcur = ggml_rope_ext(ctx0, Kcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
Kcur = ggml_rope_ext(ctx0, Kcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base_l, freq_scale_l,
ext_factor, attn_factor, beta_fast, beta_slow);
}
cb(Qcur, "Qcur", il);