mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 14:26:30 +00:00
Callers can set a backend buffer type to be no-alloc, meaning that it does not allocate memory for tensors or operations. This can be used for calculating memory requirements. Tensors and graphs must be recreated with no-alloc set to false before loading data. Defaults to false for newly created backend buffer types.
100 lines
3.7 KiB
Diff
100 lines
3.7 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Jesse Gross <jesse@ollama.com>
|
|
Date: Wed, 23 Jul 2025 11:58:49 -0700
|
|
Subject: [PATCH] ggml: No-alloc mode
|
|
|
|
Callers can set a backend buffer type to be no-alloc, meaning that
|
|
it does not allocate memory for tensors or operations. This can
|
|
be used for calculating memory requirements. Tensors and graphs
|
|
must be recreated with no-alloc set to false before loading data.
|
|
|
|
Defaults to false for newly created backend buffer types.
|
|
---
|
|
ggml/include/ggml-backend.h | 1 +
|
|
ggml/src/ggml-backend-impl.h | 2 ++
|
|
ggml/src/ggml-backend.cpp | 19 ++++++++++++++++++-
|
|
3 files changed, 21 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
|
|
index 48839339..3903c3cb 100644
|
|
--- a/ggml/include/ggml-backend.h
|
|
+++ b/ggml/include/ggml-backend.h
|
|
@@ -35,6 +35,7 @@ extern "C" {
|
|
//
|
|
|
|
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
|
+ GGML_API void ggml_backend_buft_set_alloc (ggml_backend_buffer_type_t buft, bool alloc);
|
|
GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
|
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
|
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
|
|
diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h
|
|
index c36c12d6..81749a5a 100644
|
|
--- a/ggml/src/ggml-backend-impl.h
|
|
+++ b/ggml/src/ggml-backend-impl.h
|
|
@@ -32,6 +32,7 @@ extern "C" {
|
|
struct ggml_backend_buffer_type_i iface;
|
|
ggml_backend_dev_t device;
|
|
void * context;
|
|
+ bool no_alloc;
|
|
};
|
|
|
|
//
|
|
@@ -63,6 +64,7 @@ extern "C" {
|
|
void * context;
|
|
size_t size;
|
|
enum ggml_backend_buffer_usage usage;
|
|
+ bool no_alloc;
|
|
};
|
|
|
|
GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
|
|
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
|
|
index be335e8c..84928bc3 100644
|
|
--- a/ggml/src/ggml-backend.cpp
|
|
+++ b/ggml/src/ggml-backend.cpp
|
|
@@ -35,12 +35,22 @@ const char * ggml_backend_buft_name(ggml_backend_buffer_type_t buft) {
|
|
return buft->iface.get_name(buft);
|
|
}
|
|
|
|
+void ggml_backend_buft_set_alloc(ggml_backend_buffer_type_t buft, bool alloc) {
|
|
+ buft->no_alloc = !alloc;
|
|
+}
|
|
+
|
|
ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
|
if (size == 0) {
|
|
// return a dummy buffer for zero-sized allocations
|
|
return ggml_backend_buffer_init(buft, {}, NULL, 0);
|
|
}
|
|
|
|
+ if (buft->no_alloc) {
|
|
+ ggml_backend_buffer_t buf = ggml_backend_buffer_init(buft, {}, NULL, size);
|
|
+ buf->no_alloc = true;
|
|
+ return buf;
|
|
+ }
|
|
+
|
|
return buft->iface.alloc_buffer(buft, size);
|
|
}
|
|
|
|
@@ -89,7 +99,8 @@ ggml_backend_buffer_t ggml_backend_buffer_init(
|
|
/* .buft = */ buft,
|
|
/* .context = */ context,
|
|
/* .size = */ size,
|
|
- /* .usage = */ GGML_BACKEND_BUFFER_USAGE_ANY
|
|
+ /* .usage = */ GGML_BACKEND_BUFFER_USAGE_ANY,
|
|
+ /* .no_alloc = */ false
|
|
};
|
|
|
|
return buffer;
|
|
@@ -119,6 +130,12 @@ void * ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) {
|
|
return NULL;
|
|
}
|
|
|
|
+ // If we aren't allocating memory, return a placeholder non-NULL pointer
|
|
+ // that meets alignment requirements
|
|
+ if (buffer->no_alloc) {
|
|
+ return (void *)ggml_backend_buffer_get_alignment(buffer);
|
|
+ }
|
|
+
|
|
void * base = buffer->iface.get_base(buffer);
|
|
|
|
GGML_ASSERT(base != NULL && "backend buffer base cannot be NULL");
|