From fc2a0715dfe2dd370fa9aa0e55e449130cf3e5d7 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Thu, 7 Nov 2024 09:20:40 -0800 Subject: [PATCH 1/8] Be explicit for gpu library link dir (#7560) On linux nvcc isn't automatically linking to the same cuda version. --- llama/make/gpu.make | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama/make/gpu.make b/llama/make/gpu.make index 939fa41a..fbd8dbca 100644 --- a/llama/make/gpu.make +++ b/llama/make/gpu.make @@ -85,7 +85,7 @@ $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(RUNNERS GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(DIST_GPU_RUNNER_LIB_DEPS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS) @-mkdir -p $(dir $@) - $(CCACHE) $(GPU_COMPILER) --shared $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@ + $(CCACHE) $(GPU_COMPILER) --shared -L$(GPU_LIB_DIR) $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@ # Distribution targets $(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/% From 9e83e550e1b0022e39de9bf2b84961cec3a0071c Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Thu, 7 Nov 2024 10:20:50 -0800 Subject: [PATCH 2/8] Align rocm compiler flags (#7467) Bring consistency with the old generate script behavior --- llama/make/Makefile.rocm | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llama/make/Makefile.rocm b/llama/make/Makefile.rocm index 947c43a6..136ce015 100644 --- a/llama/make/Makefile.rocm +++ b/llama/make/Makefile.rocm @@ -58,6 +58,8 @@ endif GPU_COMPILER_CUFLAGS = \ $(GPU_COMPILER_FPIC) \ $(addprefix -m,$(GPU_RUNNER_CPU_FLAGS)) \ + -mf16c \ + -mfma \ -parallel-jobs=2 \ -c \ -O3 \ @@ -77,6 +79,9 @@ GPU_COMPILER_CUFLAGS = \ -D_CRT_SECURE_NO_WARNINGS \ -D_GNU_SOURCE \ -D_XOPEN_SOURCE=600 \ + -DUSE_PROF_API=1 \ + -std=gnu++14 \ + -x hip \ -mllvm=-amdgpu-early-inline-all=true \ -mllvm=-amdgpu-function-calls=false \ -Wno-expansion-to-defined \ From b111aa5a91769e5af0edf7259773b20514f9883f Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Thu, 7 Nov 2024 14:25:53 -0800 Subject: [PATCH 3/8] Debug logging for nvcuda init (#7532) Some users are reporting crashes during nvcuda.dll initialization on windows. This should help narrow down where things are going bad. --- discover/gpu_info_nvcuda.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/discover/gpu_info_nvcuda.c b/discover/gpu_info_nvcuda.c index a1a38bfc..466e1ac2 100644 --- a/discover/gpu_info_nvcuda.c +++ b/discover/gpu_info_nvcuda.c @@ -4,6 +4,7 @@ #include "gpu_info_nvcuda.h" void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) { + LOG(resp->ch.verbose, "initializing %s\n", nvcuda_lib_path); CUresult ret; resp->err = NULL; resp->num_devices = 0; @@ -57,8 +58,10 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) { resp->cudaErr = -1; return; } + LOG(resp->ch.verbose, "dlsym: %s - %p\n", l[i].s, *l[i].p); } + LOG(resp->ch.verbose, "calling cuInit\n"); ret = (*resp->ch.cuInit)(0); if (ret != CUDA_SUCCESS) { LOG(resp->ch.verbose, "cuInit err: %d\n", ret); @@ -75,15 +78,18 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) { resp->ch.driver_minor = 0; // Report driver version if we're in verbose mode, ignore errors + LOG(resp->ch.verbose, "calling cuDriverGetVersion\n"); ret = (*resp->ch.cuDriverGetVersion)(&version); if (ret != CUDA_SUCCESS) { LOG(resp->ch.verbose, "cuDriverGetVersion failed: %d\n", ret); } else { + LOG(resp->ch.verbose, "raw version 0x%x\n", version); resp->ch.driver_major = version / 1000; resp->ch.driver_minor = (version - (resp->ch.driver_major * 1000)) / 10; LOG(resp->ch.verbose, "CUDA driver version: %d.%d\n", resp->ch.driver_major, resp->ch.driver_minor); } + LOG(resp->ch.verbose, "calling cuDeviceGetCount\n"); ret = (*resp->ch.cuDeviceGetCount)(&resp->num_devices); if (ret != CUDA_SUCCESS) { LOG(resp->ch.verbose, "cuDeviceGetCount err: %d\n", ret); @@ -94,6 +100,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) { resp->cudaErr = ret; return; } + LOG(resp->ch.verbose, "device count %d\n", resp->num_devices); } const int buflen = 256; From 1618700c5a042ecedb3fe3a93d7c90d3b4bd3001 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Thu, 7 Nov 2024 14:26:31 -0800 Subject: [PATCH 4/8] Workaround buggy P2P ROCm copy on windows (#7466) This enables the workaround code only for windows which should help windows users with muliple AMD GPUs --- llama/make/Makefile.rocm | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llama/make/Makefile.rocm b/llama/make/Makefile.rocm index 136ce015..4ab176b4 100644 --- a/llama/make/Makefile.rocm +++ b/llama/make/Makefile.rocm @@ -92,6 +92,12 @@ GPU_COMPILER_CUFLAGS = \ -Wno-unused-result \ -I. +# Workaround buggy P2P copy on some windows multi-GPU setups +# This workaround breaks linux systems with small system RAM, so only enable on windows +ifeq ($(OS),windows) + GPU_COMPILER_CUFLAGS += -DGGML_CUDA_NO_PEER_COPY=1 +endif + include make/gpu.make # Adjust the rules from gpu.make to handle the ROCm dependencies properly From 3d25e7bf8c32391a719336e5d990be9dee263f02 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Thu, 7 Nov 2024 14:26:47 -0800 Subject: [PATCH 5/8] win: remove preview title from installer (#7529) This should have been in #7347 but was overlooked. --- app/ollama.iss | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/ollama.iss b/app/ollama.iss index 7362eeeb..d5940be6 100644 --- a/app/ollama.iss +++ b/app/ollama.iss @@ -136,7 +136,7 @@ Type: filesandordirs; Name: "{%TEMP}\ollama*" Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama" [Messages] -WizardReady=Ollama Windows Preview +WizardReady=Ollama ReadyLabel1=%nLet's get you up and running with your own large language models. SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or finish the other installer, then click OK to continue with this install, or Cancel to exit. From 3a5239e6bf46986c8579eabcdff5cf5891a58b9a Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 8 Nov 2024 09:27:04 -0800 Subject: [PATCH 6/8] Set macos min version for all architectures (#7579) --- scripts/build_darwin.sh | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/scripts/build_darwin.sh b/scripts/build_darwin.sh index f00cbe84..fd370f48 100755 --- a/scripts/build_darwin.sh +++ b/scripts/build_darwin.sh @@ -6,17 +6,18 @@ set -e mkdir -p dist +# These require Xcode v13 or older to target MacOS v11 +# If installed to an alternate location use the following to enable +# export SDKROOT=/Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk +# export DEVELOPER_DIR=/Applications/Xcode_12.5.1.app/Contents/Developer +export CGO_CFLAGS=-mmacosx-version-min=11.3 +export CGO_CXXFLAGS=-mmacosx-version-min=11.3 +export CGO_LDFLAGS=-mmacosx-version-min=11.3 + for TARGETARCH in arm64 amd64; do echo "Building Go runner darwin $TARGETARCH" rm -rf llama/build GOOS=darwin ARCH=$TARGETARCH GOARCH=$TARGETARCH make -C llama -j 8 - # These require Xcode v13 or older to target MacOS v11 - # If installed to an alternate location use the following to enable - # export SDKROOT=/Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk - # export DEVELOPER_DIR=/Applications/Xcode_12.5.1.app/Contents/Developer - export CGO_CFLAGS=-mmacosx-version-min=11.3 - export CGO_CXXFLAGS=-mmacosx-version-min=11.3 - export CGO_LDFLAGS=-mmacosx-version-min=11.3 CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -o dist/ollama-darwin-$TARGETARCH CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -cover -o dist/ollama-darwin-$TARGETARCH-cov done From 771fab1dd8ca1040377400731bffa728845e6cb2 Mon Sep 17 00:00:00 2001 From: "Edward J. Schwartz" Date: Fri, 8 Nov 2024 12:36:17 -0500 Subject: [PATCH 7/8] docs: update langchainpy.md with proper model name (#7527) --- docs/tutorials/langchainpy.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/langchainpy.md b/docs/tutorials/langchainpy.md index 06543a07..359d3cbd 100644 --- a/docs/tutorials/langchainpy.md +++ b/docs/tutorials/langchainpy.md @@ -10,7 +10,7 @@ This sounds like a typical censored response, but even llama2-uncensored gives a So let's figure out how we can use **LangChain** with Ollama to ask our question to the actual document, the Odyssey by Homer, using Python. -Let's start by asking a simple question that we can get an answer to from the **Llama2** model using **Ollama**. First, we need to install the **LangChain** package: +Let's start by asking a simple question that we can get an answer to from the **Llama3** model using **Ollama**. First, we need to install the **LangChain** package: `pip install langchain_community` From c2e8cbaa140986b6a27f2c795e2fb9b38e74f094 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 6 Nov 2024 13:14:18 -0800 Subject: [PATCH 8/8] runner.go: Check for zero length images If we get a request with a zero length image, it will result in an out-of-bounds error when we pass the data to the image encoder. --- llama/runner/image.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llama/runner/image.go b/llama/runner/image.go index 1cb898d3..70058290 100644 --- a/llama/runner/image.go +++ b/llama/runner/image.go @@ -68,6 +68,10 @@ func (c *ImageContext) NewEmbed(llamaContext *llama.Context, data []byte, aspect return nil, nil } + if len(data) <= 0 { + return nil, errors.New("received zero length image") + } + hash := c.hashImage(data) c.mu.Lock()