mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 14:26:30 +00:00
feat: llama.cpp bump (17f7f4) for SSM performance improvements (#13408)
* feat: Bump llama.cpp to the latest master (17f7f4b) This brings in significant improvements to prefill performance for all models using the SSM_CONV and SSM_SCAN ops (granite4, jamba, falcon-h, nemotron-h, Qwen3 Next) on Apple Metal. See https://github.com/ggml-org/llama.cpp/pull/17876 Branch: LlamaCPPMetalSSMImprovements Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * feat: Update patches 1-4 Branch: LlamaCPPMetalSSMImprovements Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * fix: Update patches 5-12 Branch: LlamaCPPMetalSSMImprovements Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * feat: Update patches 13-18 Branch: LlamaCPPMetalSSMImprovements Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * feat: Update patch 20 Branch: LlamaCPPMetalSSMImprovements Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * feat: Update patches 21-31 Branch: LlamaCPPMetalSSMImprovements Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * feat: Sync vendored code The two files I'm not sure about here are the swap from gemma3-iswa.cpp to gemma3.cpp (I chose to include this because I think it's required), and the inclusion of `ggml-zendnn.h` which I chose to omit. Branch: LlamaCPPMetalSSMImprovements Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> --------- Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
6
llama/llama.cpp/tools/mtmd/mtmd.cpp
vendored
6
llama/llama.cpp/tools/mtmd/mtmd.cpp
vendored
@@ -118,6 +118,7 @@ mtmd_context_params mtmd_context_params_default() {
|
||||
/* image_marker */ MTMD_DEFAULT_IMAGE_MARKER,
|
||||
/* media_marker */ mtmd_default_marker(),
|
||||
/* flash_attn_type */ LLAMA_FLASH_ATTN_TYPE_AUTO,
|
||||
/* warmup */ true,
|
||||
/* image_min_tokens */ -1,
|
||||
/* image_max_tokens */ -1,
|
||||
};
|
||||
@@ -187,6 +188,7 @@ struct mtmd_context {
|
||||
/* flash_attn_type */ CLIP_FLASH_ATTN_TYPE_AUTO,
|
||||
/* image_min_tokens */ ctx_params.image_min_tokens,
|
||||
/* image_max_tokens */ ctx_params.image_max_tokens,
|
||||
/* warmup */ ctx_params.warmup,
|
||||
};
|
||||
|
||||
auto res = clip_init(mmproj_fname, ctx_clip_params);
|
||||
@@ -314,6 +316,10 @@ struct mtmd_context {
|
||||
img_beg = "<|im_start|>";
|
||||
img_end = "<|im_end|>";
|
||||
|
||||
} else if (proj == PROJECTOR_TYPE_LFM2) {
|
||||
img_beg = "<|image_start|>";
|
||||
img_end = "<|image_end|>";
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user