mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-23 23:18:26 +00:00
runner.go: Better abstract vision model integration
-Update mllama to take the cross attention state as embeddings in a batch, more similar to how Llava handles it. This improves integration with the input cache. -Pass locations in a prompt for embeddings using tags similar to Llava. -Abstract interface to vision models so the main runner accesses Clip and Mllama similarly Co-authored-by: Michael Yang <mxyng@pm.me>
This commit is contained in:
3
llama/llama.h
vendored
3
llama/llama.h
vendored
@@ -266,6 +266,7 @@ extern "C" {
|
||||
|
||||
llama_token * token;
|
||||
float * embd;
|
||||
int32_t n_embd;
|
||||
llama_pos * pos;
|
||||
int32_t * n_seq_id;
|
||||
llama_seq_id ** seq_id;
|
||||
@@ -451,7 +452,7 @@ extern "C" {
|
||||
|
||||
// TODO (jmorganca): this should most likely be passed in as part of a batch
|
||||
// and not set on the context for all batches.
|
||||
LLAMA_API void llama_set_cross_attn_state(struct llama_context * ctx, float * cross_attn_state);
|
||||
LLAMA_API void llama_set_cross_attention(struct llama_context * ctx, bool cross_attn_state);
|
||||
|
||||
// Frees all allocated memory
|
||||
LLAMA_API void llama_free(struct llama_context * ctx);
|
||||
|
||||
Reference in New Issue
Block a user