mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-22 06:43:57 +00:00
GGML update to ec98e2002 (#13451)
* Revert "add support for NVIDIA Nemotron 3 Nano" This reverts commit e7d2ae9d69421012e9a8765c06a3fdf0e45b12f3. * GGML update to 380b4c984 Remove MaskBatchPadding as GGML_KQ_MASK_PAD is no longer present (no padding required) * update to c45f89d55 * ec98e2002 solar pro needed more adjusting - needs verification * review comments
This commit is contained in:
@@ -11,10 +11,10 @@ Subject: [PATCH] graph memory reporting on failure
|
||||
4 files changed, 40 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/ggml/include/ggml-alloc.h b/ggml/include/ggml-alloc.h
|
||||
index 2cb150fd2..7ab3f0192 100644
|
||||
index 78aa059dd..7fa8403b3 100644
|
||||
--- a/ggml/include/ggml-alloc.h
|
||||
+++ b/ggml/include/ggml-alloc.h
|
||||
@@ -65,6 +65,7 @@ GGML_API bool ggml_gallocr_reserve_n(
|
||||
@@ -72,6 +72,7 @@ GGML_API bool ggml_gallocr_reserve_n(
|
||||
GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
|
||||
|
||||
GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);
|
||||
@@ -23,10 +23,10 @@ index 2cb150fd2..7ab3f0192 100644
|
||||
// Utils
|
||||
// Create a buffer and allocate all the tensors in a ggml_context
|
||||
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
|
||||
index f1b740785..c54ff98bf 100644
|
||||
index 4ed5f3577..a7ebe5dcd 100644
|
||||
--- a/ggml/include/ggml-backend.h
|
||||
+++ b/ggml/include/ggml-backend.h
|
||||
@@ -318,6 +318,7 @@ extern "C" {
|
||||
@@ -319,6 +319,7 @@ extern "C" {
|
||||
|
||||
GGML_API ggml_backend_buffer_type_t ggml_backend_sched_get_buffer_type(ggml_backend_sched_t sched, ggml_backend_t backend);
|
||||
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
|
||||
@@ -35,10 +35,10 @@ index f1b740785..c54ff98bf 100644
|
||||
GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
||||
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
||||
diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c
|
||||
index a5995fdc2..dbfd8b5b2 100644
|
||||
index 41419b617..73b39bfea 100644
|
||||
--- a/ggml/src/ggml-alloc.c
|
||||
+++ b/ggml/src/ggml-alloc.c
|
||||
@@ -494,6 +494,7 @@ struct node_alloc {
|
||||
@@ -485,6 +485,7 @@ struct node_alloc {
|
||||
struct ggml_gallocr {
|
||||
ggml_backend_buffer_type_t * bufts; // [n_buffers]
|
||||
struct vbuffer ** buffers; // [n_buffers]
|
||||
@@ -46,7 +46,7 @@ index a5995fdc2..dbfd8b5b2 100644
|
||||
struct ggml_dyn_tallocr ** buf_tallocs; // [n_buffers]
|
||||
int n_buffers;
|
||||
|
||||
@@ -517,6 +518,9 @@ ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs
|
||||
@@ -508,6 +509,9 @@ ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs
|
||||
galloc->buffers = calloc(n_bufs, sizeof(struct vbuffer *));
|
||||
GGML_ASSERT(galloc->buffers != NULL);
|
||||
|
||||
@@ -56,7 +56,7 @@ index a5995fdc2..dbfd8b5b2 100644
|
||||
galloc->buf_tallocs = calloc(n_bufs, sizeof(struct ggml_dyn_tallocr *));
|
||||
GGML_ASSERT(galloc->buf_tallocs != NULL);
|
||||
|
||||
@@ -584,6 +588,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
|
||||
@@ -575,6 +579,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
|
||||
ggml_hash_set_free(&galloc->hash_set);
|
||||
free(galloc->hash_values);
|
||||
free(galloc->bufts);
|
||||
@@ -64,7 +64,7 @@ index a5995fdc2..dbfd8b5b2 100644
|
||||
free(galloc->buffers);
|
||||
free(galloc->buf_tallocs);
|
||||
free(galloc->node_allocs);
|
||||
@@ -899,6 +904,8 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
||||
@@ -904,6 +909,8 @@ static bool ggml_gallocr_reserve_n_impl(
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,18 +73,19 @@ index a5995fdc2..dbfd8b5b2 100644
|
||||
// reallocate buffers if needed
|
||||
for (int i = 0; i < galloc->n_buffers; i++) {
|
||||
// if the buffer type is used multiple times, we reuse the same buffer
|
||||
@@ -933,14 +940,19 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
||||
#endif
|
||||
ggml_vbuffer_free(galloc->buffers[i]);
|
||||
galloc->buffers[i] = ggml_vbuffer_alloc(galloc->bufts[i], galloc->buf_tallocs[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE);
|
||||
- if (galloc->buffers[i] == NULL) {
|
||||
+ if (galloc->buffers[i]) {
|
||||
+ galloc->buffer_sizes[i] = ggml_vbuffer_size(galloc->buffers[i]);
|
||||
+ } else {
|
||||
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size);
|
||||
- return false;
|
||||
+ galloc->buffer_sizes[i] = new_size;
|
||||
+ success = false;
|
||||
@@ -940,15 +947,20 @@ static bool ggml_gallocr_reserve_n_impl(
|
||||
galloc->buffers[i] = NULL;
|
||||
} else {
|
||||
galloc->buffers[i] = ggml_vbuffer_alloc(galloc->bufts[i], galloc->buf_tallocs[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE);
|
||||
- if (galloc->buffers[i] == NULL) {
|
||||
+ if (galloc->buffers[i]) {
|
||||
+ galloc->buffer_sizes[i] = ggml_vbuffer_size(galloc->buffers[i]);
|
||||
+ } else {
|
||||
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size);
|
||||
- return false;
|
||||
+ galloc->buffer_sizes[i] = new_size;
|
||||
+ success = false;
|
||||
}
|
||||
}
|
||||
+ } else {
|
||||
+ galloc->buffer_sizes[i] = ggml_vbuffer_size(galloc->buffers[i]);
|
||||
@@ -95,8 +96,8 @@ index a5995fdc2..dbfd8b5b2 100644
|
||||
+ return success;
|
||||
}
|
||||
|
||||
bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) {
|
||||
@@ -1095,6 +1107,22 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
|
||||
void ggml_gallocr_reserve_n_size(
|
||||
@@ -1118,6 +1130,22 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
|
||||
return ggml_vbuffer_size(galloc->buffers[buffer_id]);
|
||||
}
|
||||
|
||||
@@ -120,10 +121,10 @@ index a5995fdc2..dbfd8b5b2 100644
|
||||
|
||||
static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
|
||||
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
|
||||
index afde2f0b7..dbf8486a0 100644
|
||||
index 9f37ca70c..1459d16dd 100644
|
||||
--- a/ggml/src/ggml-backend.cpp
|
||||
+++ b/ggml/src/ggml-backend.cpp
|
||||
@@ -1840,6 +1840,13 @@ size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backe
|
||||
@@ -1859,6 +1859,13 @@ size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backe
|
||||
return ggml_gallocr_get_buffer_size(sched->galloc, backend_index);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user