From fc2a0715dfe2dd370fa9aa0e55e449130cf3e5d7 Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <dhiltgen@users.noreply.github.com>
Date: Thu, 7 Nov 2024 09:20:40 -0800
Subject: [PATCH 1/8] Be explicit for gpu library link dir (#7560)

On linux nvcc isn't automatically linking to the same cuda version.
---
 llama/make/gpu.make | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama/make/gpu.make b/llama/make/gpu.make
index 939fa41a..fbd8dbca 100644
--- a/llama/make/gpu.make
+++ b/llama/make/gpu.make
@@ -85,7 +85,7 @@ $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(RUNNERS
 	GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie  $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner
 $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(DIST_GPU_RUNNER_LIB_DEPS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS)
 	@-mkdir -p $(dir $@)
-	$(CCACHE) $(GPU_COMPILER) --shared $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@
+	$(CCACHE) $(GPU_COMPILER) --shared -L$(GPU_LIB_DIR) $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@
 
 # Distribution targets
 $(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%

From 9e83e550e1b0022e39de9bf2b84961cec3a0071c Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <dhiltgen@users.noreply.github.com>
Date: Thu, 7 Nov 2024 10:20:50 -0800
Subject: [PATCH 2/8] Align rocm compiler flags (#7467)

Bring consistency with the old generate script behavior
---
 llama/make/Makefile.rocm | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llama/make/Makefile.rocm b/llama/make/Makefile.rocm
index 947c43a6..136ce015 100644
--- a/llama/make/Makefile.rocm
+++ b/llama/make/Makefile.rocm
@@ -58,6 +58,8 @@ endif
 GPU_COMPILER_CUFLAGS = \
 	$(GPU_COMPILER_FPIC) \
 	$(addprefix -m,$(GPU_RUNNER_CPU_FLAGS)) \
+	-mf16c \
+	-mfma \
 	-parallel-jobs=2 \
 	-c \
 	-O3 \
@@ -77,6 +79,9 @@ GPU_COMPILER_CUFLAGS = \
 	-D_CRT_SECURE_NO_WARNINGS \
 	-D_GNU_SOURCE \
 	-D_XOPEN_SOURCE=600 \
+	-DUSE_PROF_API=1 \
+	-std=gnu++14 \
+	-x hip \
 	-mllvm=-amdgpu-early-inline-all=true \
 	-mllvm=-amdgpu-function-calls=false \
 	-Wno-expansion-to-defined \

From b111aa5a91769e5af0edf7259773b20514f9883f Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <dhiltgen@users.noreply.github.com>
Date: Thu, 7 Nov 2024 14:25:53 -0800
Subject: [PATCH 3/8] Debug logging for nvcuda init (#7532)

Some users are reporting crashes during nvcuda.dll initialization
on windows.  This should help narrow down where things are going bad.
---
 discover/gpu_info_nvcuda.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/discover/gpu_info_nvcuda.c b/discover/gpu_info_nvcuda.c
index a1a38bfc..466e1ac2 100644
--- a/discover/gpu_info_nvcuda.c
+++ b/discover/gpu_info_nvcuda.c
@@ -4,6 +4,7 @@
 #include "gpu_info_nvcuda.h"
 
 void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
+  LOG(resp->ch.verbose, "initializing %s\n", nvcuda_lib_path);
   CUresult ret;
   resp->err = NULL;
   resp->num_devices = 0;
@@ -57,8 +58,10 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
       resp->cudaErr = -1;
       return;
     }
+    LOG(resp->ch.verbose, "dlsym: %s - %p\n", l[i].s, *l[i].p);
   }
 
+  LOG(resp->ch.verbose, "calling cuInit\n");
   ret = (*resp->ch.cuInit)(0);
   if (ret != CUDA_SUCCESS) {
     LOG(resp->ch.verbose, "cuInit err: %d\n", ret);
@@ -75,15 +78,18 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
   resp->ch.driver_minor = 0;
 
   // Report driver version if we're in verbose mode, ignore errors
+  LOG(resp->ch.verbose, "calling cuDriverGetVersion\n");
   ret = (*resp->ch.cuDriverGetVersion)(&version);
   if (ret != CUDA_SUCCESS) {
     LOG(resp->ch.verbose, "cuDriverGetVersion failed: %d\n", ret);
   } else {
+    LOG(resp->ch.verbose, "raw version 0x%x\n", version);
     resp->ch.driver_major = version / 1000;
     resp->ch.driver_minor = (version - (resp->ch.driver_major * 1000)) / 10;
     LOG(resp->ch.verbose, "CUDA driver version: %d.%d\n", resp->ch.driver_major, resp->ch.driver_minor);
   }
 
+  LOG(resp->ch.verbose, "calling cuDeviceGetCount\n");
   ret = (*resp->ch.cuDeviceGetCount)(&resp->num_devices);
   if (ret != CUDA_SUCCESS) {
     LOG(resp->ch.verbose, "cuDeviceGetCount err: %d\n", ret);
@@ -94,6 +100,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
     resp->cudaErr = ret;
     return;
   }
+  LOG(resp->ch.verbose, "device count %d\n", resp->num_devices);
 }
 
 const int buflen = 256;

From 1618700c5a042ecedb3fe3a93d7c90d3b4bd3001 Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <dhiltgen@users.noreply.github.com>
Date: Thu, 7 Nov 2024 14:26:31 -0800
Subject: [PATCH 4/8] Workaround buggy P2P ROCm copy on windows (#7466)

This enables the workaround code only for windows which should help windows users with muliple AMD GPUs
---
 llama/make/Makefile.rocm | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llama/make/Makefile.rocm b/llama/make/Makefile.rocm
index 136ce015..4ab176b4 100644
--- a/llama/make/Makefile.rocm
+++ b/llama/make/Makefile.rocm
@@ -92,6 +92,12 @@ GPU_COMPILER_CUFLAGS = \
 	-Wno-unused-result \
 	-I.
 
+# Workaround buggy P2P copy on some windows multi-GPU setups
+# This workaround breaks linux systems with small system RAM, so only enable on windows
+ifeq ($(OS),windows)
+	GPU_COMPILER_CUFLAGS += -DGGML_CUDA_NO_PEER_COPY=1
+endif
+
 include make/gpu.make
 
 # Adjust the rules from gpu.make to handle the ROCm dependencies properly

From 3d25e7bf8c32391a719336e5d990be9dee263f02 Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <dhiltgen@users.noreply.github.com>
Date: Thu, 7 Nov 2024 14:26:47 -0800
Subject: [PATCH 5/8] win: remove preview title from installer (#7529)

This should have been in #7347 but was overlooked.
---
 app/ollama.iss | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/ollama.iss b/app/ollama.iss
index 7362eeeb..d5940be6 100644
--- a/app/ollama.iss
+++ b/app/ollama.iss
@@ -136,7 +136,7 @@ Type: filesandordirs; Name: "{%TEMP}\ollama*"
 Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama"
 
 [Messages]
-WizardReady=Ollama Windows Preview
+WizardReady=Ollama
 ReadyLabel1=%nLet's get you up and running with your own large language models.
 SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or finish the other installer, then click OK to continue with this install, or Cancel to exit.
 

From 3a5239e6bf46986c8579eabcdff5cf5891a58b9a Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <dhiltgen@users.noreply.github.com>
Date: Fri, 8 Nov 2024 09:27:04 -0800
Subject: [PATCH 6/8] Set macos min version for all architectures (#7579)

---
 scripts/build_darwin.sh | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/scripts/build_darwin.sh b/scripts/build_darwin.sh
index f00cbe84..fd370f48 100755
--- a/scripts/build_darwin.sh
+++ b/scripts/build_darwin.sh
@@ -6,17 +6,18 @@ set -e
 
 mkdir -p dist
 
+# These require Xcode v13 or older to target MacOS v11
+# If installed to an alternate location use the following to enable
+# export SDKROOT=/Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
+# export DEVELOPER_DIR=/Applications/Xcode_12.5.1.app/Contents/Developer
+export CGO_CFLAGS=-mmacosx-version-min=11.3
+export CGO_CXXFLAGS=-mmacosx-version-min=11.3
+export CGO_LDFLAGS=-mmacosx-version-min=11.3
+
 for TARGETARCH in arm64 amd64; do
     echo "Building Go runner darwin $TARGETARCH"
     rm -rf llama/build
     GOOS=darwin ARCH=$TARGETARCH GOARCH=$TARGETARCH make -C llama -j 8
-    # These require Xcode v13 or older to target MacOS v11
-    # If installed to an alternate location use the following to enable
-    # export SDKROOT=/Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
-    # export DEVELOPER_DIR=/Applications/Xcode_12.5.1.app/Contents/Developer
-    export CGO_CFLAGS=-mmacosx-version-min=11.3
-    export CGO_CXXFLAGS=-mmacosx-version-min=11.3
-    export CGO_LDFLAGS=-mmacosx-version-min=11.3
     CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -o dist/ollama-darwin-$TARGETARCH
     CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -cover -o dist/ollama-darwin-$TARGETARCH-cov
 done

From 771fab1dd8ca1040377400731bffa728845e6cb2 Mon Sep 17 00:00:00 2001
From: "Edward J. Schwartz" <moo.github.domain.edmcman@xoxy.net>
Date: Fri, 8 Nov 2024 12:36:17 -0500
Subject: [PATCH 7/8] docs: update langchainpy.md with proper model name
 (#7527)

---
 docs/tutorials/langchainpy.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorials/langchainpy.md b/docs/tutorials/langchainpy.md
index 06543a07..359d3cbd 100644
--- a/docs/tutorials/langchainpy.md
+++ b/docs/tutorials/langchainpy.md
@@ -10,7 +10,7 @@ This sounds like a typical censored response, but even llama2-uncensored gives a
 
 So let's figure out how we can use **LangChain** with Ollama to ask our question to the actual document, the Odyssey by Homer, using Python.
 
-Let's start by asking a simple question that we can get an answer to from the **Llama2** model using **Ollama**. First, we need to install the **LangChain** package:
+Let's start by asking a simple question that we can get an answer to from the **Llama3** model using **Ollama**. First, we need to install the **LangChain** package:
 
 `pip install langchain_community`
 

From c2e8cbaa140986b6a27f2c795e2fb9b38e74f094 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@ollama.com>
Date: Wed, 6 Nov 2024 13:14:18 -0800
Subject: [PATCH 8/8] runner.go: Check for zero length images

If we get a request with a zero length image, it will result in
an out-of-bounds error when we pass the data to the image encoder.
---
 llama/runner/image.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llama/runner/image.go b/llama/runner/image.go
index 1cb898d3..70058290 100644
--- a/llama/runner/image.go
+++ b/llama/runner/image.go
@@ -68,6 +68,10 @@ func (c *ImageContext) NewEmbed(llamaContext *llama.Context, data []byte, aspect
 		return nil, nil
 	}
 
+	if len(data) <= 0 {
+		return nil, errors.New("received zero length image")
+	}
+
 	hash := c.hashImage(data)
 
 	c.mu.Lock()