diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index acae0050..37ac7e45 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -329,7 +329,9 @@ jobs: done working-directory: dist/${{ matrix.os }}-${{ matrix.arch }} - run: | - for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz); done + for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do + tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE --owner 0 --group 0 | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz); + done - uses: actions/upload-artifact@v4 with: name: dist-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 365ad749..e989b127 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,7 +24,7 @@ set(GGML_LLAMAFILE ON) set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128) set(GGML_CUDA_GRAPHS ON) -if((NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64") +if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+")) set(GGML_CPU_ALL_VARIANTS ON) endif() diff --git a/README.md b/README.md index 46bda842..ba8ca634 100644 --- a/README.md +++ b/README.md @@ -403,6 +403,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [ChibiChat](https://github.com/CosmicEventHorizon/ChibiChat) (Kotlin-based Android app to chat with Ollama and Koboldcpp API endpoints) - [LocalLLM](https://github.com/qusaismael/localllm) (Minimal Web-App to run ollama models on it with a GUI) - [Ollamazing](https://github.com/buiducnhat/ollamazing) (Web extension to run Ollama models) +- [OpenDeepResearcher-via-searxng](https://github.com/benhaotang/OpenDeepResearcher-via-searxng) (A Deep Research equivent endpoint with Ollama support for running locally) ### Cloud @@ -570,6 +571,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Alfred Ollama](https://github.com/zeitlings/alfred-ollama) (Alfred Workflow) - [TextLLaMA](https://github.com/adarshM84/TextLLaMA) A Chrome Extension that helps you write emails, correct grammar, and translate into any language - [Simple-Discord-AI](https://github.com/zyphixor/simple-discord-ai) +- [LLM Telegram Bot](https://github.com/innightwolfsleep/llm_telegram_bot) (telegram bot, primary for RP. Oobabooga-like buttons, [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui) API integration e.t.c) ### Supported backends diff --git a/docs/windows.md b/docs/windows.md index 2a0d08d9..018cc41d 100644 --- a/docs/windows.md +++ b/docs/windows.md @@ -55,7 +55,7 @@ Here's a quick example showing API access from `powershell` ## Troubleshooting Ollama on Windows stores files in a few different locations. You can view them in -the explorer window by hitting `+R` and type in: +the explorer window by hitting `+R` and type in: - `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates - *app.log* contains most resent logs from the GUI application - *server.log* contains the most recent server logs diff --git a/format/format_test.go b/format/format_test.go index bff32780..f6aff2dc 100644 --- a/format/format_test.go +++ b/format/format_test.go @@ -12,6 +12,9 @@ func TestHumanNumber(t *testing.T) { testCases := []testCase{ {0, "0"}, + {999, "999"}, + {1000, "1K"}, + {1001, "1K"}, {1000000, "1M"}, {125000000, "125M"}, {500500000, "500.50M"}, diff --git a/kvcache/causal_test.go b/kvcache/causal_test.go index 0b614df0..874e4743 100644 --- a/kvcache/causal_test.go +++ b/kvcache/causal_test.go @@ -305,6 +305,10 @@ func (b *testBackend) NewContext() ml.Context { return &testContext{} } +func (b *testBackend) SystemInfo() string { + return "not implemented" +} + type testContext struct{} func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor { diff --git a/llama/patches/0018-use-std-filesystem-path-instead-of-wstring.patch b/llama/patches/0018-use-std-filesystem-path-instead-of-wstring.patch new file mode 100644 index 00000000..749cfbba --- /dev/null +++ b/llama/patches/0018-use-std-filesystem-path-instead-of-wstring.patch @@ -0,0 +1,285 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: jmorganca +Date: Sun, 16 Feb 2025 20:00:22 -0500 +Subject: [PATCH] use std::filesystem::path instead of wstring + +--- + ggml/src/ggml-backend-reg.cpp | 116 ++++++++++++---------------------- + 1 file changed, 40 insertions(+), 76 deletions(-) + +diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp +index 84b21dd8..de78feae 100644 +--- a/ggml/src/ggml-backend-reg.cpp ++++ b/ggml/src/ggml-backend-reg.cpp +@@ -72,16 +72,6 @@ + # pragma clang diagnostic ignored "-Wdeprecated-declarations" + #endif + +-static std::wstring utf8_to_utf16(const std::string & str) { +- std::wstring_convert> converter; +- return converter.from_bytes(str); +-} +- +-static std::string utf16_to_utf8(const std::wstring & str) { +- std::wstring_convert> converter; +- return converter.to_bytes(str); +-} +- + #if defined(__clang__) + # pragma clang diagnostic pop + #endif +@@ -96,12 +86,12 @@ struct dl_handle_deleter { + } + }; + +-static dl_handle * dl_load_library(const std::wstring & path) { ++static dl_handle * dl_load_library(const std::filesystem::path & path) { + // suppress error dialogs for missing DLLs + DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); + SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); + +- HMODULE handle = LoadLibraryW(path.c_str()); ++ HMODULE handle = LoadLibraryW(path.wstring().c_str()); + + SetErrorMode(old_mode); + +@@ -129,8 +119,8 @@ struct dl_handle_deleter { + } + }; + +-static void * dl_load_library(const std::wstring & path) { +- dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL); ++static void * dl_load_library(const std::filesystem::path & path) { ++ dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL); + + return handle; + } +@@ -222,11 +212,11 @@ struct ggml_backend_registry { + ); + } + +- ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) { ++ ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) { + dl_handle_ptr handle { dl_load_library(path) }; + if (!handle) { + if (!silent) { +- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str()); ++ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path.string().c_str()); + } + return nullptr; + } +@@ -234,7 +224,7 @@ struct ggml_backend_registry { + auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score"); + if (score_fn && score_fn() == 0) { + if (!silent) { +- GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str()); ++ GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path.string().c_str()); + } + return nullptr; + } +@@ -242,7 +232,7 @@ struct ggml_backend_registry { + auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init"); + if (!backend_init_fn) { + if (!silent) { +- GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str()); ++ GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path.string().c_str()); + } + return nullptr; + } +@@ -251,16 +241,16 @@ struct ggml_backend_registry { + if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) { + if (!silent) { + if (!reg) { +- GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str()); ++ GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path.string().c_str()); + } else { + GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n", +- __func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION); ++ __func__, path.string().c_str(), reg->api_version, GGML_BACKEND_API_VERSION); + } + } + return nullptr; + } + +- GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str()); ++ GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path.string().c_str()); + + register_backend(reg, score_fn ? score_fn() : -1, std::move(handle)); + +@@ -396,14 +386,14 @@ ggml_backend_t ggml_backend_init_best(void) { + + // Dynamic loading + ggml_backend_reg_t ggml_backend_load(const char * path) { +- return get_reg().load_backend(utf8_to_utf16(path), false); ++ return get_reg().load_backend(path, false); + } + + void ggml_backend_unload(ggml_backend_reg_t reg) { + get_reg().unload_backend(reg, true); + } + +-static std::wstring get_executable_path() { ++static std::filesystem::path get_executable_path() { + #if defined(__APPLE__) + // get executable path + std::vector path; +@@ -415,15 +405,9 @@ static std::wstring get_executable_path() { + } + path.resize(size); + } +- std::string base_path(path.data(), size); +- // remove executable name +- auto last_slash = base_path.find_last_of('/'); +- if (last_slash != std::string::npos) { +- base_path = base_path.substr(0, last_slash); +- } +- return utf8_to_utf16(base_path + "/"); ++ ++ return std::filesystem::path(path.data()).parent_path(); + #elif defined(__linux__) || defined(__FreeBSD__) +- std::string base_path = "."; + std::vector path(1024); + while (true) { + // get executable path +@@ -436,76 +420,56 @@ static std::wstring get_executable_path() { + break; + } + if (len < (ssize_t) path.size()) { +- base_path = std::string(path.data(), len); +- // remove executable name +- auto last_slash = base_path.find_last_of('/'); +- if (last_slash != std::string::npos) { +- base_path = base_path.substr(0, last_slash); +- } +- break; ++ return std::filesystem::path(path.data()).parent_path(); + } + path.resize(path.size() * 2); + } +- +- return utf8_to_utf16(base_path + "/"); + #elif defined(_WIN32) + std::vector path(MAX_PATH); + DWORD len = GetModuleFileNameW(NULL, path.data(), path.size()); + if (len == 0) { + return {}; + } +- std::wstring base_path(path.data(), len); +- // remove executable name +- auto last_slash = base_path.find_last_of('\\'); +- if (last_slash != std::string::npos) { +- base_path = base_path.substr(0, last_slash); +- } +- return base_path + L"\\"; +-#else +- return {}; +-#endif +-} + +-static std::wstring backend_filename_prefix() { +-#ifdef _WIN32 +- return L"ggml-"; ++ return std::filesystem::path(path.data()).parent_path(); + #else +- return L"libggml-"; ++ return {}; + #endif + } + +-static std::wstring backend_filename_suffix() { ++static std::string backend_filename_prefix() { + #ifdef _WIN32 +- return L".dll"; ++ return "ggml-"; + #else +- return L".so"; ++ return "libggml-"; + #endif + } + +-static std::wstring path_separator() { ++static std::string backend_filename_suffix() { + #ifdef _WIN32 +- return L"\\"; ++ return ".dll"; + #else +- return L"/"; ++ return ".so"; + #endif + } + + static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) { + // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths + // TODO: search system paths +- std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-"; +- std::vector search_paths; ++ namespace fs = std::filesystem; ++ std::string file_prefix = backend_filename_prefix() + name + "-"; ++ std::vector search_paths; ++ + if (user_search_path == nullptr) { +- search_paths.push_back(L"." + path_separator()); ++ search_paths.push_back(fs::current_path()); + search_paths.push_back(get_executable_path()); + } else { +- search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator()); ++ search_paths.push_back(fs::u8path(user_search_path)); + } + + int best_score = 0; +- std::wstring best_path; ++ fs::path best_path; + +- namespace fs = std::filesystem; + for (const auto & search_path : search_paths) { + if (!fs::exists(search_path)) { + continue; +@@ -514,31 +478,31 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, + for (const auto & entry : dir_it) { + try { + if (entry.is_regular_file()) { +- std::wstring filename = entry.path().filename().wstring(); +- std::wstring ext = entry.path().extension().wstring(); ++ std::string filename = entry.path().filename().string(); ++ std::string ext = entry.path().extension().string(); + if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) { +- dl_handle_ptr handle { dl_load_library(entry.path().wstring()) }; ++ dl_handle_ptr handle { dl_load_library(entry.path()) }; + if (!handle) { +- GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str()); ++ GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str()); + continue; + } + + auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score"); + if (!score_fn) { +- GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str()); ++ GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str()); + continue; + } + + int s = score_fn(); +- GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s); ++ GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s); + if (s > best_score) { + best_score = s; +- best_path = entry.path().wstring(); ++ best_path = entry.path(); + } + } + } + } catch (const std::exception & e) { +- GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what()); ++ GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, entry.path().string().c_str(), e.what()); + } + } + } +@@ -546,7 +510,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, + if (best_score == 0) { + // try to load the base backend + for (const auto & search_path : search_paths) { +- std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix(); ++ fs::path path = fs::path(search_path) / (backend_filename_prefix() + name + backend_filename_suffix()); + if (fs::exists(path)) { + return get_reg().load_backend(path, silent); + } diff --git a/ml/backend.go b/ml/backend.go index 0e99ab5a..aebf86f7 100644 --- a/ml/backend.go +++ b/ml/backend.go @@ -23,6 +23,7 @@ type Backend interface { Config() Config Get(name string) Tensor NewContext() Context + SystemInfo() string } var backends = make(map[string]func(*os.File) (Backend, error)) diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index 6a727a60..5ba36361 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -1,11 +1,27 @@ package ggml -// #cgo CPPFLAGS: -I${SRCDIR}/ggml/include -// #include -// #include -// #include "ggml.h" -// #include "ggml-cpu.h" -// #include "ggml-backend.h" +/* +#cgo CPPFLAGS: -I${SRCDIR}/ggml/include +#include +#include +#include "ggml.h" +#include "ggml-cpu.h" +#include "ggml-backend.h" +static struct ggml_backend_feature * getBackendFeatures(void *fp, ggml_backend_reg_t reg) {return ((ggml_backend_get_features_t)(fp))(reg);} +static struct ggml_backend_feature * getNextBackendFeatures(struct ggml_backend_feature * feature) { return &feature[1];} + +typedef enum {COMP_UNKNOWN,COMP_GCC,COMP_CLANG} COMPILER; +COMPILER inline get_compiler() { +#if defined(__clang__) + return COMP_CLANG; +#elif defined(__GNUC__) + return COMP_GCC; +#else + return UNKNOWN_COMPILER; +#endif +} + +*/ import "C" import ( @@ -626,3 +642,34 @@ func (t *Tensor) Conv2D(ctx ml.Context, t2 ml.Tensor, s0, s1, p0, p1, d0, d1 int t: C.ggml_conv_2d(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.int(s0), C.int(s1), C.int(p0), C.int(p1), C.int(d0), C.int(d1)), } } + +func (b *Backend) SystemInfo() string { + var compiler string + switch C.get_compiler() { + case C.COMP_UNKNOWN: + compiler = "cgo(unknown_compiler)" + case C.COMP_GCC: + compiler = "cgo(gcc)" + case C.COMP_CLANG: + compiler = "cgo(clang)" + } + + var s string + for i := range C.ggml_backend_reg_count() { + reg := C.ggml_backend_reg_get(i) + fName := C.CString("ggml_backend_get_features") + defer C.free(unsafe.Pointer(fName)) + get_features_fn := C.ggml_backend_reg_get_proc_address(reg, fName) + if get_features_fn != nil { + s += C.GoString(C.ggml_backend_reg_name(reg)) + s += " : " + for features := C.getBackendFeatures(get_features_fn, reg); features.name != nil; features = C.getNextBackendFeatures(features) { + s += C.GoString(features.name) + s += " = " + s += C.GoString(features.value) + s += " | " + } + } + } + return s + compiler +} diff --git a/ml/backend/ggml/ggml/src/ggml-backend-reg.cpp b/ml/backend/ggml/ggml/src/ggml-backend-reg.cpp index 84b21dd8..de78feae 100644 --- a/ml/backend/ggml/ggml/src/ggml-backend-reg.cpp +++ b/ml/backend/ggml/ggml/src/ggml-backend-reg.cpp @@ -72,16 +72,6 @@ # pragma clang diagnostic ignored "-Wdeprecated-declarations" #endif -static std::wstring utf8_to_utf16(const std::string & str) { - std::wstring_convert> converter; - return converter.from_bytes(str); -} - -static std::string utf16_to_utf8(const std::wstring & str) { - std::wstring_convert> converter; - return converter.to_bytes(str); -} - #if defined(__clang__) # pragma clang diagnostic pop #endif @@ -96,12 +86,12 @@ struct dl_handle_deleter { } }; -static dl_handle * dl_load_library(const std::wstring & path) { +static dl_handle * dl_load_library(const std::filesystem::path & path) { // suppress error dialogs for missing DLLs DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); - HMODULE handle = LoadLibraryW(path.c_str()); + HMODULE handle = LoadLibraryW(path.wstring().c_str()); SetErrorMode(old_mode); @@ -129,8 +119,8 @@ struct dl_handle_deleter { } }; -static void * dl_load_library(const std::wstring & path) { - dl_handle * handle = dlopen(utf16_to_utf8(path).c_str(), RTLD_NOW | RTLD_LOCAL); +static void * dl_load_library(const std::filesystem::path & path) { + dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL); return handle; } @@ -222,11 +212,11 @@ struct ggml_backend_registry { ); } - ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) { + ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) { dl_handle_ptr handle { dl_load_library(path) }; if (!handle) { if (!silent) { - GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(path).c_str()); + GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path.string().c_str()); } return nullptr; } @@ -234,7 +224,7 @@ struct ggml_backend_registry { auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score"); if (score_fn && score_fn() == 0) { if (!silent) { - GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, utf16_to_utf8(path).c_str()); + GGML_LOG_INFO("%s: backend %s is not supported on this system\n", __func__, path.string().c_str()); } return nullptr; } @@ -242,7 +232,7 @@ struct ggml_backend_registry { auto backend_init_fn = (ggml_backend_init_t) dl_get_sym(handle.get(), "ggml_backend_init"); if (!backend_init_fn) { if (!silent) { - GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, utf16_to_utf8(path).c_str()); + GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s\n", __func__, path.string().c_str()); } return nullptr; } @@ -251,16 +241,16 @@ struct ggml_backend_registry { if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) { if (!silent) { if (!reg) { - GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, utf16_to_utf8(path).c_str()); + GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path.string().c_str()); } else { GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n", - __func__, utf16_to_utf8(path).c_str(), reg->api_version, GGML_BACKEND_API_VERSION); + __func__, path.string().c_str(), reg->api_version, GGML_BACKEND_API_VERSION); } } return nullptr; } - GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str()); + GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path.string().c_str()); register_backend(reg, score_fn ? score_fn() : -1, std::move(handle)); @@ -396,14 +386,14 @@ ggml_backend_t ggml_backend_init_best(void) { // Dynamic loading ggml_backend_reg_t ggml_backend_load(const char * path) { - return get_reg().load_backend(utf8_to_utf16(path), false); + return get_reg().load_backend(path, false); } void ggml_backend_unload(ggml_backend_reg_t reg) { get_reg().unload_backend(reg, true); } -static std::wstring get_executable_path() { +static std::filesystem::path get_executable_path() { #if defined(__APPLE__) // get executable path std::vector path; @@ -415,15 +405,9 @@ static std::wstring get_executable_path() { } path.resize(size); } - std::string base_path(path.data(), size); - // remove executable name - auto last_slash = base_path.find_last_of('/'); - if (last_slash != std::string::npos) { - base_path = base_path.substr(0, last_slash); - } - return utf8_to_utf16(base_path + "/"); + + return std::filesystem::path(path.data()).parent_path(); #elif defined(__linux__) || defined(__FreeBSD__) - std::string base_path = "."; std::vector path(1024); while (true) { // get executable path @@ -436,76 +420,56 @@ static std::wstring get_executable_path() { break; } if (len < (ssize_t) path.size()) { - base_path = std::string(path.data(), len); - // remove executable name - auto last_slash = base_path.find_last_of('/'); - if (last_slash != std::string::npos) { - base_path = base_path.substr(0, last_slash); - } - break; + return std::filesystem::path(path.data()).parent_path(); } path.resize(path.size() * 2); } - - return utf8_to_utf16(base_path + "/"); #elif defined(_WIN32) std::vector path(MAX_PATH); DWORD len = GetModuleFileNameW(NULL, path.data(), path.size()); if (len == 0) { return {}; } - std::wstring base_path(path.data(), len); - // remove executable name - auto last_slash = base_path.find_last_of('\\'); - if (last_slash != std::string::npos) { - base_path = base_path.substr(0, last_slash); - } - return base_path + L"\\"; + + return std::filesystem::path(path.data()).parent_path(); #else return {}; #endif } -static std::wstring backend_filename_prefix() { +static std::string backend_filename_prefix() { #ifdef _WIN32 - return L"ggml-"; + return "ggml-"; #else - return L"libggml-"; + return "libggml-"; #endif } -static std::wstring backend_filename_suffix() { +static std::string backend_filename_suffix() { #ifdef _WIN32 - return L".dll"; + return ".dll"; #else - return L".so"; -#endif -} - -static std::wstring path_separator() { -#ifdef _WIN32 - return L"\\"; -#else - return L"/"; + return ".so"; #endif } static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) { // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths // TODO: search system paths - std::wstring file_prefix = backend_filename_prefix() + utf8_to_utf16(name) + L"-"; - std::vector search_paths; + namespace fs = std::filesystem; + std::string file_prefix = backend_filename_prefix() + name + "-"; + std::vector search_paths; + if (user_search_path == nullptr) { - search_paths.push_back(L"." + path_separator()); + search_paths.push_back(fs::current_path()); search_paths.push_back(get_executable_path()); } else { - search_paths.push_back(utf8_to_utf16(user_search_path) + path_separator()); + search_paths.push_back(fs::u8path(user_search_path)); } int best_score = 0; - std::wstring best_path; + fs::path best_path; - namespace fs = std::filesystem; for (const auto & search_path : search_paths) { if (!fs::exists(search_path)) { continue; @@ -514,31 +478,31 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, for (const auto & entry : dir_it) { try { if (entry.is_regular_file()) { - std::wstring filename = entry.path().filename().wstring(); - std::wstring ext = entry.path().extension().wstring(); + std::string filename = entry.path().filename().string(); + std::string ext = entry.path().extension().string(); if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) { - dl_handle_ptr handle { dl_load_library(entry.path().wstring()) }; + dl_handle_ptr handle { dl_load_library(entry.path()) }; if (!handle) { - GGML_LOG_ERROR("%s: failed to load %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str()); + GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str()); continue; } auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score"); if (!score_fn) { - GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str()); + GGML_LOG_DEBUG("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str()); continue; } int s = score_fn(); - GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), s); + GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s); if (s > best_score) { best_score = s; - best_path = entry.path().wstring(); + best_path = entry.path(); } } } } catch (const std::exception & e) { - GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, utf16_to_utf8(entry.path().wstring()).c_str(), e.what()); + GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, entry.path().string().c_str(), e.what()); } } } @@ -546,7 +510,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, if (best_score == 0) { // try to load the base backend for (const auto & search_path : search_paths) { - std::wstring path = search_path + backend_filename_prefix() + utf8_to_utf16(name) + backend_filename_suffix(); + fs::path path = fs::path(search_path) / (backend_filename_prefix() + name + backend_filename_suffix()); if (fs::exists(path)) { return get_reg().load_backend(path, silent); } diff --git a/model/model.go b/model/model.go index 8a8c9b29..5eedc9bd 100644 --- a/model/model.go +++ b/model/model.go @@ -21,6 +21,7 @@ import ( _ "github.com/ollama/ollama/ml/backend" ) +// Options contains the inputs for a model forward pass type Options struct { Inputs []int32 Positions []int32 @@ -34,11 +35,13 @@ type config struct { Cache kvcache.Cache } +// Base implements the common fields and methods for all models type Base struct { b ml.Backend config } +// Backend returns the underlying backend that will run the model func (m *Base) Backend() ml.Backend { return m.b } @@ -47,6 +50,7 @@ func (m *Base) Config() config { return m.config } +// Model implements a specific model architecture, defining the forward pass and any model-specific configuration type Model interface { Forward(ml.Context, Options) (ml.Tensor, error) @@ -56,6 +60,7 @@ type Model interface { var models = make(map[string]func(ml.Config) (Model, error)) +// Register registers a model constructor for the given architecture func Register(name string, f func(ml.Config) (Model, error)) { if _, ok := models[name]; ok { panic("model: model already registered") @@ -64,8 +69,9 @@ func Register(name string, f func(ml.Config) (Model, error)) { models[name] = f } -func New(s string) (Model, error) { - r, err := os.Open(s) +// New initializes a new model instance with the provided configuration based on the metadata in the model file +func New(modelPath string) (Model, error) { + r, err := os.Open(modelPath) if err != nil { return nil, err } diff --git a/progress/progress.go b/progress/progress.go index 102830a8..0cd0ea1f 100644 --- a/progress/progress.go +++ b/progress/progress.go @@ -1,6 +1,7 @@ package progress import ( + "bufio" "fmt" "io" "sync" @@ -13,7 +14,8 @@ type State interface { type Progress struct { mu sync.Mutex - w io.Writer + // buffer output to minimize flickering on all terminals + w *bufio.Writer pos int @@ -22,7 +24,7 @@ type Progress struct { } func NewProgress(w io.Writer) *Progress { - p := &Progress{w: w} + p := &Progress{w: bufio.NewWriter(w)} go p.start() return p } @@ -48,11 +50,14 @@ func (p *Progress) Stop() bool { stopped := p.stop() if stopped { fmt.Fprint(p.w, "\n") + p.w.Flush() } return stopped } func (p *Progress) StopAndClear() bool { + defer p.w.Flush() + fmt.Fprint(p.w, "\033[?25l") defer fmt.Fprint(p.w, "\033[?25h") @@ -81,20 +86,24 @@ func (p *Progress) render() { p.mu.Lock() defer p.mu.Unlock() + defer p.w.Flush() + + // eliminate flickering on terminals that support synchronized output + fmt.Fprint(p.w, "\033[?2026h") + defer fmt.Fprint(p.w, "\033[?2026l") + fmt.Fprint(p.w, "\033[?25l") defer fmt.Fprint(p.w, "\033[?25h") - // clear already rendered progress lines - for i := range p.pos { - if i > 0 { - fmt.Fprint(p.w, "\033[A") - } - fmt.Fprint(p.w, "\033[2K\033[1G") + // move the cursor back to the beginning + for range p.pos - 1 { + fmt.Fprint(p.w, "\033[A") } + fmt.Fprint(p.w, "\033[1G") // render progress lines for i, state := range p.states { - fmt.Fprint(p.w, state.String()) + fmt.Fprint(p.w, state.String(), "\033[K") if i < len(p.states)-1 { fmt.Fprint(p.w, "\n") } diff --git a/runner/llamarunner/runner.go b/runner/llamarunner/runner.go index 93d6bfab..72873ec4 100644 --- a/runner/llamarunner/runner.go +++ b/runner/llamarunner/runner.go @@ -845,8 +845,6 @@ func (s *Server) loadModel( threads int, multiUserCache bool, ) { - llama.BackendInit() - var err error s.model, err = llama.LoadModelFromFile(mpath, params) if err != nil { @@ -932,6 +930,8 @@ func Execute(args []string) error { }) slog.SetDefault(slog.New(handler)) slog.Info("starting go runner") + + llama.BackendInit() slog.Info("system", "info", llama.PrintSystemInfo(), "threads", *threads) server := &Server{ diff --git a/runner/ollamarunner/runner.go b/runner/ollamarunner/runner.go index d5a3b340..6d45050c 100644 --- a/runner/ollamarunner/runner.go +++ b/runner/ollamarunner/runner.go @@ -813,6 +813,8 @@ func (s *Server) loadModel( panic(err) } + slog.Info("system", "info", s.model.Backend().SystemInfo() /* "threads", *threads */) + // TODO(jessegross): LoRA loading if lpath.String() != "" { panic("loras are not yet implemented") @@ -881,7 +883,6 @@ func Execute(args []string) error { }) slog.SetDefault(slog.New(handler)) slog.Info("starting ollama engine") - // TODO(jessegross): Some system info would be useful server := &Server{ batchSize: *batchSize,