Merge branch 'ollama:main' into main

This commit is contained in:
likelovewant
2024-11-09 12:09:06 +08:00
committed by GitHub
7 changed files with 33 additions and 10 deletions

View File

@@ -136,7 +136,7 @@ Type: filesandordirs; Name: "{%TEMP}\ollama*"
Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama" Type: filesandordirs; Name: "{%LOCALAPPDATA}\Programs\Ollama"
[Messages] [Messages]
WizardReady=Ollama Windows Preview WizardReady=Ollama
ReadyLabel1=%nLet's get you up and running with your own large language models. ReadyLabel1=%nLet's get you up and running with your own large language models.
SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or finish the other installer, then click OK to continue with this install, or Cancel to exit. SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or finish the other installer, then click OK to continue with this install, or Cancel to exit.

View File

@@ -4,6 +4,7 @@
#include "gpu_info_nvcuda.h" #include "gpu_info_nvcuda.h"
void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) { void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
LOG(resp->ch.verbose, "initializing %s\n", nvcuda_lib_path);
CUresult ret; CUresult ret;
resp->err = NULL; resp->err = NULL;
resp->num_devices = 0; resp->num_devices = 0;
@@ -57,8 +58,10 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
resp->cudaErr = -1; resp->cudaErr = -1;
return; return;
} }
LOG(resp->ch.verbose, "dlsym: %s - %p\n", l[i].s, *l[i].p);
} }
LOG(resp->ch.verbose, "calling cuInit\n");
ret = (*resp->ch.cuInit)(0); ret = (*resp->ch.cuInit)(0);
if (ret != CUDA_SUCCESS) { if (ret != CUDA_SUCCESS) {
LOG(resp->ch.verbose, "cuInit err: %d\n", ret); LOG(resp->ch.verbose, "cuInit err: %d\n", ret);
@@ -75,15 +78,18 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
resp->ch.driver_minor = 0; resp->ch.driver_minor = 0;
// Report driver version if we're in verbose mode, ignore errors // Report driver version if we're in verbose mode, ignore errors
LOG(resp->ch.verbose, "calling cuDriverGetVersion\n");
ret = (*resp->ch.cuDriverGetVersion)(&version); ret = (*resp->ch.cuDriverGetVersion)(&version);
if (ret != CUDA_SUCCESS) { if (ret != CUDA_SUCCESS) {
LOG(resp->ch.verbose, "cuDriverGetVersion failed: %d\n", ret); LOG(resp->ch.verbose, "cuDriverGetVersion failed: %d\n", ret);
} else { } else {
LOG(resp->ch.verbose, "raw version 0x%x\n", version);
resp->ch.driver_major = version / 1000; resp->ch.driver_major = version / 1000;
resp->ch.driver_minor = (version - (resp->ch.driver_major * 1000)) / 10; resp->ch.driver_minor = (version - (resp->ch.driver_major * 1000)) / 10;
LOG(resp->ch.verbose, "CUDA driver version: %d.%d\n", resp->ch.driver_major, resp->ch.driver_minor); LOG(resp->ch.verbose, "CUDA driver version: %d.%d\n", resp->ch.driver_major, resp->ch.driver_minor);
} }
LOG(resp->ch.verbose, "calling cuDeviceGetCount\n");
ret = (*resp->ch.cuDeviceGetCount)(&resp->num_devices); ret = (*resp->ch.cuDeviceGetCount)(&resp->num_devices);
if (ret != CUDA_SUCCESS) { if (ret != CUDA_SUCCESS) {
LOG(resp->ch.verbose, "cuDeviceGetCount err: %d\n", ret); LOG(resp->ch.verbose, "cuDeviceGetCount err: %d\n", ret);
@@ -94,6 +100,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
resp->cudaErr = ret; resp->cudaErr = ret;
return; return;
} }
LOG(resp->ch.verbose, "device count %d\n", resp->num_devices);
} }
const int buflen = 256; const int buflen = 256;

View File

@@ -10,7 +10,7 @@ This sounds like a typical censored response, but even llama2-uncensored gives a
So let's figure out how we can use **LangChain** with Ollama to ask our question to the actual document, the Odyssey by Homer, using Python. So let's figure out how we can use **LangChain** with Ollama to ask our question to the actual document, the Odyssey by Homer, using Python.
Let's start by asking a simple question that we can get an answer to from the **Llama2** model using **Ollama**. First, we need to install the **LangChain** package: Let's start by asking a simple question that we can get an answer to from the **Llama3** model using **Ollama**. First, we need to install the **LangChain** package:
`pip install langchain_community` `pip install langchain_community`

View File

@@ -58,6 +58,8 @@ endif
GPU_COMPILER_CUFLAGS = \ GPU_COMPILER_CUFLAGS = \
$(GPU_COMPILER_FPIC) \ $(GPU_COMPILER_FPIC) \
$(addprefix -m,$(GPU_RUNNER_CPU_FLAGS)) \ $(addprefix -m,$(GPU_RUNNER_CPU_FLAGS)) \
-mf16c \
-mfma \
-parallel-jobs=2 \ -parallel-jobs=2 \
-c \ -c \
-O3 \ -O3 \
@@ -77,6 +79,9 @@ GPU_COMPILER_CUFLAGS = \
-D_CRT_SECURE_NO_WARNINGS \ -D_CRT_SECURE_NO_WARNINGS \
-D_GNU_SOURCE \ -D_GNU_SOURCE \
-D_XOPEN_SOURCE=600 \ -D_XOPEN_SOURCE=600 \
-DUSE_PROF_API=1 \
-std=gnu++14 \
-x hip \
-mllvm=-amdgpu-early-inline-all=true \ -mllvm=-amdgpu-early-inline-all=true \
-mllvm=-amdgpu-function-calls=false \ -mllvm=-amdgpu-function-calls=false \
-Wno-expansion-to-defined \ -Wno-expansion-to-defined \
@@ -87,6 +92,12 @@ GPU_COMPILER_CUFLAGS = \
-Wno-unused-result \ -Wno-unused-result \
-I. -I.
# Workaround buggy P2P copy on some windows multi-GPU setups
# This workaround breaks linux systems with small system RAM, so only enable on windows
ifeq ($(OS),windows)
GPU_COMPILER_CUFLAGS += -DGGML_CUDA_NO_PEER_COPY=1
endif
include make/gpu.make include make/gpu.make
# Adjust the rules from gpu.make to handle the ROCm dependencies properly # Adjust the rules from gpu.make to handle the ROCm dependencies properly

View File

@@ -85,7 +85,7 @@ $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/ollama_llama_server$(EXE_EXT): $(RUNNERS
GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner GOARCH=$(ARCH) CGO_LDFLAGS="$(TARGET_CGO_LDFLAGS)" go build -buildmode=pie $(GPU_GOFLAGS) -trimpath -tags $(subst $(space),$(comma),$(GPU_RUNNER_CPU_FLAGS) $(GPU_RUNNER_GO_TAGS)) -o $@ ./runner
$(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(DIST_GPU_RUNNER_LIB_DEPS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS) $(RUNNERS_BUILD_DIR)/$(GPU_RUNNER_NAME)/$(SHARED_PREFIX)ggml_$(GPU_RUNNER_NAME).$(SHARED_EXT): $(GPU_RUNNER_OBJS) $(DIST_GPU_RUNNER_LIB_DEPS) $(COMMON_HDRS) $(GPU_RUNNER_HDRS)
@-mkdir -p $(dir $@) @-mkdir -p $(dir $@)
$(CCACHE) $(GPU_COMPILER) --shared $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@ $(CCACHE) $(GPU_COMPILER) --shared -L$(GPU_LIB_DIR) $(GPU_RUNNER_DRIVER_LIB_LINK) -L${DIST_GPU_RUNNER_DEPS_DIR} $(foreach lib, $(GPU_RUNNER_LIBS_SHORT), -l$(lib)) $(GPU_RUNNER_OBJS) -o $@
# Distribution targets # Distribution targets
$(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/% $(RUNNERS_DIST_DIR)/%: $(RUNNERS_BUILD_DIR)/%

View File

@@ -68,6 +68,10 @@ func (c *ImageContext) NewEmbed(llamaContext *llama.Context, data []byte, aspect
return nil, nil return nil, nil
} }
if len(data) <= 0 {
return nil, errors.New("received zero length image")
}
hash := c.hashImage(data) hash := c.hashImage(data)
c.mu.Lock() c.mu.Lock()

View File

@@ -6,17 +6,18 @@ set -e
mkdir -p dist mkdir -p dist
# These require Xcode v13 or older to target MacOS v11
# If installed to an alternate location use the following to enable
# export SDKROOT=/Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
# export DEVELOPER_DIR=/Applications/Xcode_12.5.1.app/Contents/Developer
export CGO_CFLAGS=-mmacosx-version-min=11.3
export CGO_CXXFLAGS=-mmacosx-version-min=11.3
export CGO_LDFLAGS=-mmacosx-version-min=11.3
for TARGETARCH in arm64 amd64; do for TARGETARCH in arm64 amd64; do
echo "Building Go runner darwin $TARGETARCH" echo "Building Go runner darwin $TARGETARCH"
rm -rf llama/build rm -rf llama/build
GOOS=darwin ARCH=$TARGETARCH GOARCH=$TARGETARCH make -C llama -j 8 GOOS=darwin ARCH=$TARGETARCH GOARCH=$TARGETARCH make -C llama -j 8
# These require Xcode v13 or older to target MacOS v11
# If installed to an alternate location use the following to enable
# export SDKROOT=/Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
# export DEVELOPER_DIR=/Applications/Xcode_12.5.1.app/Contents/Developer
export CGO_CFLAGS=-mmacosx-version-min=11.3
export CGO_CXXFLAGS=-mmacosx-version-min=11.3
export CGO_LDFLAGS=-mmacosx-version-min=11.3
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -o dist/ollama-darwin-$TARGETARCH CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -o dist/ollama-darwin-$TARGETARCH
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -cover -o dist/ollama-darwin-$TARGETARCH-cov CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -trimpath -cover -o dist/ollama-darwin-$TARGETARCH-cov
done done