diff --git a/llama/patches/0018-BF16-macos-version-guard.patch b/llama/patches/0018-BF16-macos-version-guard.patch deleted file mode 100644 index f209c802..00000000 --- a/llama/patches/0018-BF16-macos-version-guard.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Daniel Hiltgen -Date: Wed, 30 Jul 2025 08:43:46 -0700 -Subject: [PATCH] BF16 macos version guard - -Only enable BF16 on supported MacOS versions (v14+) ---- - ggml/src/ggml-metal/ggml-metal-context.m | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/ggml/src/ggml-metal/ggml-metal-context.m b/ggml/src/ggml-metal/ggml-metal-context.m -index 052efb7ac..b47dc7879 100644 ---- a/ggml/src/ggml-metal/ggml-metal-context.m -+++ b/ggml/src/ggml-metal/ggml-metal-context.m -@@ -125,7 +125,12 @@ ggml_metal_t ggml_metal_init(ggml_metal_device_t dev) { - - res->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); - -- res->use_bfloat = props_dev->has_bfloat; -+ if (@available(macOS 14.0, *)) { -+ res->use_bfloat = props_dev->has_bfloat; -+ } else { -+ res->use_bfloat = false; -+ } -+ - res->use_fusion = getenv("GGML_METAL_FUSION_DISABLE") == nil; - res->use_concurrency = getenv("GGML_METAL_CONCURRENCY_DISABLE") == nil; - diff --git a/llama/patches/0019-ggml-Add-batch-size-hint.patch b/llama/patches/0018-ggml-Add-batch-size-hint.patch similarity index 100% rename from llama/patches/0019-ggml-Add-batch-size-hint.patch rename to llama/patches/0018-ggml-Add-batch-size-hint.patch diff --git a/llama/patches/0021-fix-mtmd-audio.cpp-build-on-windows.patch b/llama/patches/0019-fix-mtmd-audio.cpp-build-on-windows.patch similarity index 100% rename from llama/patches/0021-fix-mtmd-audio.cpp-build-on-windows.patch rename to llama/patches/0019-fix-mtmd-audio.cpp-build-on-windows.patch diff --git a/llama/patches/0020-Disable-ggml-blas-on-macos-v13-and-older.patch b/llama/patches/0020-Disable-ggml-blas-on-macos-v13-and-older.patch deleted file mode 100644 index 9fbc0b60..00000000 --- a/llama/patches/0020-Disable-ggml-blas-on-macos-v13-and-older.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Daniel Hiltgen -Date: Sun, 3 Aug 2025 10:00:20 -0700 -Subject: [PATCH] Disable ggml-blas on macos v13 and older - ---- - ggml/src/ggml-blas/ggml-blas.cpp | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/ggml/src/ggml-blas/ggml-blas.cpp b/ggml/src/ggml-blas/ggml-blas.cpp -index 88d088952..6a38a51a2 100644 ---- a/ggml/src/ggml-blas/ggml-blas.cpp -+++ b/ggml/src/ggml-blas/ggml-blas.cpp -@@ -507,6 +507,11 @@ static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = { - }; - - ggml_backend_reg_t ggml_backend_blas_reg(void) { -+ // MacOS prior to v14 does not include cblas_sgemm - disable this backend if it isn't available -+ if (&cblas_sgemm == NULL) { -+ GGML_LOG_INFO("Disabling ggml-blas backend on old MacOS version\n"); -+ return NULL; -+ } - static struct ggml_backend_reg ggml_backend_blas_reg = { - /* .api_version = */ GGML_BACKEND_API_VERSION, - /* .iface = */ ggml_backend_blas_reg_i, diff --git a/llama/patches/0022-ggml-No-alloc-mode.patch b/llama/patches/0020-ggml-No-alloc-mode.patch similarity index 100% rename from llama/patches/0022-ggml-No-alloc-mode.patch rename to llama/patches/0020-ggml-No-alloc-mode.patch diff --git a/llama/patches/0023-decode-disable-output_all.patch b/llama/patches/0021-decode-disable-output_all.patch similarity index 100% rename from llama/patches/0023-decode-disable-output_all.patch rename to llama/patches/0021-decode-disable-output_all.patch diff --git a/llama/patches/0024-ggml-Enable-resetting-backend-devices.patch b/llama/patches/0022-ggml-Enable-resetting-backend-devices.patch similarity index 100% rename from llama/patches/0024-ggml-Enable-resetting-backend-devices.patch rename to llama/patches/0022-ggml-Enable-resetting-backend-devices.patch diff --git a/llama/patches/0025-harden-uncaught-exception-registration.patch b/llama/patches/0023-harden-uncaught-exception-registration.patch similarity index 100% rename from llama/patches/0025-harden-uncaught-exception-registration.patch rename to llama/patches/0023-harden-uncaught-exception-registration.patch diff --git a/llama/patches/0026-GPU-discovery-enhancements.patch b/llama/patches/0024-GPU-discovery-enhancements.patch similarity index 100% rename from llama/patches/0026-GPU-discovery-enhancements.patch rename to llama/patches/0024-GPU-discovery-enhancements.patch diff --git a/llama/patches/0027-NVML-fallback-for-unified-memory-GPUs.patch b/llama/patches/0025-NVML-fallback-for-unified-memory-GPUs.patch similarity index 100% rename from llama/patches/0027-NVML-fallback-for-unified-memory-GPUs.patch rename to llama/patches/0025-NVML-fallback-for-unified-memory-GPUs.patch diff --git a/llama/patches/0028-report-LoadLibrary-failures.patch b/llama/patches/0026-report-LoadLibrary-failures.patch similarity index 100% rename from llama/patches/0028-report-LoadLibrary-failures.patch rename to llama/patches/0026-report-LoadLibrary-failures.patch diff --git a/llama/patches/0029-interleave-multi-rope.patch b/llama/patches/0027-interleave-multi-rope.patch similarity index 100% rename from llama/patches/0029-interleave-multi-rope.patch rename to llama/patches/0027-interleave-multi-rope.patch diff --git a/llama/patches/0030-Add-memory-detection-using-DXGI-PDH.patch b/llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch similarity index 100% rename from llama/patches/0030-Add-memory-detection-using-DXGI-PDH.patch rename to llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch diff --git a/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp b/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp index 6a38a51a..88d08895 100644 --- a/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp +++ b/ml/backend/ggml/ggml/src/ggml-blas/ggml-blas.cpp @@ -507,11 +507,6 @@ static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = { }; ggml_backend_reg_t ggml_backend_blas_reg(void) { - // MacOS prior to v14 does not include cblas_sgemm - disable this backend if it isn't available - if (&cblas_sgemm == NULL) { - GGML_LOG_INFO("Disabling ggml-blas backend on old MacOS version\n"); - return NULL; - } static struct ggml_backend_reg ggml_backend_blas_reg = { /* .api_version = */ GGML_BACKEND_API_VERSION, /* .iface = */ ggml_backend_blas_reg_i, diff --git a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-context.m b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-context.m index b47dc787..052efb7a 100644 --- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-context.m +++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal-context.m @@ -125,12 +125,7 @@ ggml_metal_t ggml_metal_init(ggml_metal_device_t dev) { res->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); - if (@available(macOS 14.0, *)) { - res->use_bfloat = props_dev->has_bfloat; - } else { - res->use_bfloat = false; - } - + res->use_bfloat = props_dev->has_bfloat; res->use_fusion = getenv("GGML_METAL_FUSION_DISABLE") == nil; res->use_concurrency = getenv("GGML_METAL_CONCURRENCY_DISABLE") == nil;