mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 14:26:30 +00:00
* feat: Bump llama.cpp to the latest master (17f7f4b) This brings in significant improvements to prefill performance for all models using the SSM_CONV and SSM_SCAN ops (granite4, jamba, falcon-h, nemotron-h, Qwen3 Next) on Apple Metal. See https://github.com/ggml-org/llama.cpp/pull/17876 Branch: LlamaCPPMetalSSMImprovements Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * feat: Update patches 1-4 Branch: LlamaCPPMetalSSMImprovements Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * fix: Update patches 5-12 Branch: LlamaCPPMetalSSMImprovements Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * feat: Update patches 13-18 Branch: LlamaCPPMetalSSMImprovements Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * feat: Update patch 20 Branch: LlamaCPPMetalSSMImprovements Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * feat: Update patches 21-31 Branch: LlamaCPPMetalSSMImprovements Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> * feat: Sync vendored code The two files I'm not sure about here are the swap from gemma3-iswa.cpp to gemma3.cpp (I chose to include this because I think it's required), and the inclusion of `ggml-zendnn.h` which I chose to omit. Branch: LlamaCPPMetalSSMImprovements Signed-off-by: Gabe Goodhart <ghart@us.ibm.com> --------- Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
47 lines
1.5 KiB
Diff
47 lines
1.5 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Gabe Goodhart <ghart@us.ibm.com>
|
|
Date: Tue, 24 Jun 2025 16:55:31 -0600
|
|
Subject: [PATCH] add C API for mtmd_input_text
|
|
|
|
Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
|
|
---
|
|
tools/mtmd/mtmd.cpp | 10 ++++++++++
|
|
tools/mtmd/mtmd.h | 3 +++
|
|
2 files changed, 13 insertions(+)
|
|
|
|
diff --git a/tools/mtmd/mtmd.cpp b/tools/mtmd/mtmd.cpp
|
|
index d06fa42e6..0f5712e21 100644
|
|
--- a/tools/mtmd/mtmd.cpp
|
|
+++ b/tools/mtmd/mtmd.cpp
|
|
@@ -87,6 +87,16 @@ enum mtmd_slice_tmpl {
|
|
MTMD_SLICE_TMPL_IDEFICS3,
|
|
};
|
|
|
|
+mtmd_input_text* mtmd_input_text_init(const char * text, bool add_special, bool parse_special) {
|
|
+ return new mtmd_input_text{text, add_special, parse_special};
|
|
+}
|
|
+
|
|
+void mtmd_input_text_free(mtmd_input_text* input_text) {
|
|
+ if (input_text) {
|
|
+ delete input_text;
|
|
+ }
|
|
+}
|
|
+
|
|
const char * mtmd_default_marker() {
|
|
return "<__media__>";
|
|
}
|
|
diff --git a/tools/mtmd/mtmd.h b/tools/mtmd/mtmd.h
|
|
index b3df24c29..a6a1af3b8 100644
|
|
--- a/tools/mtmd/mtmd.h
|
|
+++ b/tools/mtmd/mtmd.h
|
|
@@ -75,6 +75,9 @@ typedef struct mtmd_input_chunk mtmd_input_chunk;
|
|
typedef struct mtmd_input_chunks mtmd_input_chunks;
|
|
typedef struct mtmd_input_text mtmd_input_text;
|
|
|
|
+MTMD_API mtmd_input_text* mtmd_input_text_init(const char * text, bool add_special, bool parse_special);
|
|
+MTMD_API void mtmd_input_text_free(mtmd_input_text* input_text);
|
|
+
|
|
struct mtmd_context_params {
|
|
bool use_gpu;
|
|
bool print_timings;
|