From fd5971be0bb11d1b5903fc6778c329b4fd93d569 Mon Sep 17 00:00:00 2001 From: "Wang,Zhe" Date: Fri, 24 May 2024 11:18:27 +0800 Subject: [PATCH 01/13] support ollama run on Intel GPUs --- gpu/gpu.go | 126 +++++++++++++++------ gpu/gpu_info.h | 1 + gpu/gpu_info_oneapi.c | 214 +++++++++++++++++++++++++++++++++++ gpu/gpu_info_oneapi.h | 211 ++++++++++++++++++++++++++++++++++ gpu/gpu_oneapi.go | 21 ++++ llm/generate/gen_linux.sh | 30 +++++ llm/generate/gen_windows.ps1 | 44 +++++++ 7 files changed, 615 insertions(+), 32 deletions(-) create mode 100644 gpu/gpu_info_oneapi.c create mode 100644 gpu/gpu_info_oneapi.h create mode 100644 gpu/gpu_oneapi.go diff --git a/gpu/gpu.go b/gpu/gpu.go index 781e23df..ecf135dd 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -16,6 +16,7 @@ import ( "os" "path/filepath" "runtime" + "strconv" "strings" "sync" "unsafe" @@ -28,6 +29,7 @@ type handles struct { deviceCount int cudart *C.cudart_handle_t nvcuda *C.nvcuda_handle_t + oneapi *C.oneapi_handle_t } const ( @@ -80,6 +82,15 @@ var NvcudaWindowsGlobs = []string{ "c:\\windows\\system*\\nvcuda.dll", } +var OneapiWindowsGlobs = []string{ + "c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll", +} + +var OneapiLinuxGlobs = []string{ + "/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*", + "/usr/lib*/libze_intel_gpu.so*", +} + // Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed. // Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices. var CudaTegra string = os.Getenv("JETSON_JETPACK") @@ -94,6 +105,8 @@ func initGPUHandles() *handles { var cudartMgmtPatterns []string var nvcudaMgmtName string var nvcudaMgmtPatterns []string + var oneapiMgmtName string + var oneapiMgmtPatterns []string tmpDir, _ := PayloadsDir() switch runtime.GOOS { @@ -105,6 +118,8 @@ func initGPUHandles() *handles { // Aligned with driver, we can't carry as payloads nvcudaMgmtName = "nvcuda.dll" nvcudaMgmtPatterns = NvcudaWindowsGlobs + oneapiMgmtName = "ze_intel_gpu64.dll" + oneapiMgmtPatterns = OneapiWindowsGlobs case "linux": cudartMgmtName = "libcudart.so*" if tmpDir != "" { @@ -115,6 +130,8 @@ func initGPUHandles() *handles { // Aligned with driver, we can't carry as payloads nvcudaMgmtName = "libcuda.so*" nvcudaMgmtPatterns = NvcudaLinuxGlobs + oneapiMgmtName = "libze_intel_gpu.so" + oneapiMgmtPatterns = OneapiLinuxGlobs default: return gpuHandles } @@ -141,6 +158,18 @@ func initGPUHandles() *handles { return gpuHandles } } + + oneapiLibPaths := FindGPULibs(oneapiMgmtName, oneapiMgmtPatterns) + if len(oneapiLibPaths) > 0 { + deviceCount, oneapi, libPath := LoadOneapiMgmt(oneapiLibPaths) + if oneapi != nil { + slog.Debug("detected Intel GPUs", "library", libPath, "count", deviceCount) + gpuHandles.oneapi = oneapi + gpuHandles.deviceCount = deviceCount + return gpuHandles + } + } + return gpuHandles } @@ -181,39 +210,53 @@ func GetGPUInfo() GpuInfoList { if cpuVariant == "" && runtime.GOARCH == "amd64" { continue } - gpuInfo := GpuInfo{ - Library: "cuda", - } - var driverMajor int - var driverMinor int - if gpuHandles.cudart != nil { - C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo) - } else { - C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo) - driverMajor = int(gpuHandles.nvcuda.driver_major) - driverMinor = int(gpuHandles.nvcuda.driver_minor) - } - if memInfo.err != nil { - slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err)) - C.free(unsafe.Pointer(memInfo.err)) - continue - } - if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) { - slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor)) - continue - } - gpuInfo.TotalMemory = uint64(memInfo.total) - gpuInfo.FreeMemory = uint64(memInfo.free) - gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) - gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor) - gpuInfo.MinimumMemory = cudaMinimumMemory - gpuInfo.DependencyPath = depPath - gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) - gpuInfo.DriverMajor = int(driverMajor) - gpuInfo.DriverMinor = int(driverMinor) + if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil { + gpuInfo := GpuInfo{ + Library: "cuda", + } + var driverMajor int + var driverMinor int + if gpuHandles.cudart != nil { + C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo) + } else { + C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo) + driverMajor = int(gpuHandles.nvcuda.driver_major) + driverMinor = int(gpuHandles.nvcuda.driver_minor) + } + if memInfo.err != nil { + slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err)) + C.free(unsafe.Pointer(memInfo.err)) + continue + } + if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) { + slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor)) + continue + } + gpuInfo.TotalMemory = uint64(memInfo.total) + gpuInfo.FreeMemory = uint64(memInfo.free) + gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) + gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor) + gpuInfo.MinimumMemory = cudaMinimumMemory + gpuInfo.DependencyPath = depPath + gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) + gpuInfo.DriverMajor = int(driverMajor) + gpuInfo.DriverMinor = int(driverMinor) - // TODO potentially sort on our own algorithm instead of what the underlying GPU library does... - resp = append(resp, gpuInfo) + // TODO potentially sort on our own algorithm instead of what the underlying GPU library does... + resp = append(resp, gpuInfo) + } + if gpuHandles.oneapi != nil { + gpuInfo := GpuInfo{ + Library: "oneapi", + } + C.oneapi_check_vram(*gpuHandles.oneapi, &memInfo) + var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend. + memInfo.free = C.uint64_t(totalFreeMem) + gpuInfo.TotalMemory = uint64(memInfo.total) + gpuInfo.FreeMemory = uint64(memInfo.free) + gpuInfo.ID = strconv.Itoa(i) + resp = append(resp, gpuInfo) + } } // Then AMD @@ -348,6 +391,23 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) { return 0, nil, "" } +func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) { + var resp C.oneapi_init_resp_t + resp.oh.verbose = getVerboseState() + for _, libPath := range oneapiLibPaths { + lib := C.CString(libPath) + defer C.free(unsafe.Pointer(lib)) + C.oneapi_init(lib, &resp) + if resp.err != nil { + slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err)) + C.free(unsafe.Pointer(resp.err)) + } else { + return int(resp.num_devices), &resp.oh, libPath + } + } + return 0, nil, "" +} + func getVerboseState() C.uint16_t { if envconfig.Debug { return C.uint16_t(1) @@ -368,6 +428,8 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) { return cudaGetVisibleDevicesEnv(l) case "rocm": return rocmGetVisibleDevicesEnv(l) + case "oneapi": + return oneapiGetVisibleDevicesEnv(l) default: slog.Debug("no filter required for library " + l[0].Library) return "", "" diff --git a/gpu/gpu_info.h b/gpu/gpu_info.h index 2fa86f8d..482b81a6 100644 --- a/gpu/gpu_info.h +++ b/gpu/gpu_info.h @@ -62,6 +62,7 @@ void cpu_check_ram(mem_info_t *resp); #include "gpu_info_cudart.h" #include "gpu_info_nvcuda.h" +#include "gpu_info_oneapi.h" #endif // __GPU_INFO_H__ #endif // __APPLE__ \ No newline at end of file diff --git a/gpu/gpu_info_oneapi.c b/gpu/gpu_info_oneapi.c new file mode 100644 index 00000000..4be90e80 --- /dev/null +++ b/gpu/gpu_info_oneapi.c @@ -0,0 +1,214 @@ +#ifndef __APPLE__ + +#include "gpu_info_oneapi.h" + +#include + +void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp) +{ + ze_result_t ret; + resp->err = NULL; + const int buflen = 256; + char buf[buflen + 1]; + int i; + struct lookup + { + char *s; + void **p; + } l[] = { + {"zesInit", (void *)&resp->oh.zesInit}, + {"zesDriverGet", (void *)&resp->oh.zesDriverGet}, + {"zesDeviceGet", (void *)&resp->oh.zesDeviceGet}, + {"zesDeviceGetProperties", (void *)&resp->oh.zesDeviceGetProperties}, + {"zesDeviceEnumMemoryModules", + (void *)&resp->oh.zesDeviceEnumMemoryModules}, + {"zesMemoryGetProperties", (void *)&resp->oh.zesMemoryGetProperties}, + {"zesMemoryGetState", (void *)&resp->oh.zesMemoryGetState}, + {NULL, NULL}, + }; + + resp->oh.handle = LOAD_LIBRARY(oneapi_lib_path, RTLD_LAZY); + if (!resp->oh.handle) + { + char *msg = LOAD_ERR(); + snprintf(buf, buflen, + "Unable to load %s library to query for Intel GPUs: %s\n", + oneapi_lib_path, msg); + free(msg); + resp->err = strdup(buf); + return; + } + + // TODO once we've squashed the remaining corner cases remove this log + LOG(resp->oh.verbose, + "wiring Level-Zero management library functions in %s\n", + oneapi_lib_path); + + for (i = 0; l[i].s != NULL; i++) + { + // TODO once we've squashed the remaining corner cases remove this log + LOG(resp->oh.verbose, "dlsym: %s\n", l[i].s); + + *l[i].p = LOAD_SYMBOL(resp->oh.handle, l[i].s); + if (!l[i].p) + { + resp->oh.handle = NULL; + char *msg = LOAD_ERR(); + LOG(resp->oh.verbose, "dlerr: %s\n", msg); + UNLOAD_LIBRARY(resp->oh.handle); + snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, msg); + free(msg); + resp->err = strdup(buf); + return; + } + } + + ret = (*resp->oh.zesInit)(0); + if (ret != ZE_RESULT_SUCCESS) + { + LOG(resp->oh.verbose, "zesInit err: %d\n", ret); + UNLOAD_LIBRARY(resp->oh.handle); + resp->oh.handle = NULL; + snprintf(buf, buflen, "oneapi vram init failure: %d", ret); + resp->err = strdup(buf); + } + + (*resp->oh.zesDriverGet)(&resp->num_devices, NULL); + + return; +} + +void oneapi_check_vram(oneapi_handle_t h, mem_info_t *resp) +{ + ze_result_t ret; + resp->err = NULL; + uint64_t totalMem = 0; + uint64_t usedMem = 0; + const int buflen = 256; + char buf[buflen + 1]; + int i, d, m; + + if (h.handle == NULL) + { + resp->err = strdup("Level-Zero handle not initialized"); + return; + } + + uint32_t driversCount = 0; + ret = (*h.zesDriverGet)(&driversCount, NULL); + if (ret != ZE_RESULT_SUCCESS) + { + snprintf(buf, buflen, "unable to get driver count: %d", ret); + resp->err = strdup(buf); + return; + } + LOG(h.verbose, "discovered %d Level-Zero drivers\n", driversCount); + + zes_driver_handle_t *allDrivers = + malloc(driversCount * sizeof(zes_driver_handle_t)); + (*h.zesDriverGet)(&driversCount, allDrivers); + + resp->total = 0; + resp->free = 0; + + for (d = 0; d < driversCount; d++) + { + uint32_t deviceCount = 0; + ret = (*h.zesDeviceGet)(allDrivers[d], &deviceCount, NULL); + if (ret != ZE_RESULT_SUCCESS) + { + snprintf(buf, buflen, "unable to get device count: %d", ret); + resp->err = strdup(buf); + free(allDrivers); + return; + } + + LOG(h.verbose, "discovered %d Level-Zero devices\n", deviceCount); + + zes_device_handle_t *devices = + malloc(deviceCount * sizeof(zes_device_handle_t)); + (*h.zesDeviceGet)(allDrivers[d], &deviceCount, devices); + + for (i = 0; i < deviceCount; i++) + { + zes_device_ext_properties_t ext_props; + ext_props.stype = ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES; + ext_props.pNext = NULL; + + zes_device_properties_t props; + props.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES; + props.pNext = &ext_props; + + ret = (*h.zesDeviceGetProperties)(devices[i], &props); + if (ret != ZE_RESULT_SUCCESS) + { + snprintf(buf, buflen, "unable to get device properties: %d", ret); + resp->err = strdup(buf); + free(allDrivers); + free(devices); + return; + } + + if (h.verbose) + { + // When in verbose mode, report more information about + // the card we discover. + LOG(h.verbose, "[%d] oneAPI device name: %s\n", i, + props.modelName); + LOG(h.verbose, "[%d] oneAPI brand: %s\n", i, + props.brandName); + LOG(h.verbose, "[%d] oneAPI vendor: %s\n", i, + props.vendorName); + LOG(h.verbose, "[%d] oneAPI S/N: %s\n", i, + props.serialNumber); + LOG(h.verbose, "[%d] oneAPI board number: %s\n", i, + props.boardNumber); + } + + uint32_t memCount = 0; + ret = (*h.zesDeviceEnumMemoryModules)(devices[i], &memCount, NULL); + if (ret != ZE_RESULT_SUCCESS) + { + snprintf(buf, buflen, + "unable to enumerate Level-Zero memory modules: %d", ret); + resp->err = strdup(buf); + free(allDrivers); + free(devices); + return; + } + + LOG(h.verbose, "discovered %d Level-Zero memory modules\n", memCount); + + zes_mem_handle_t *mems = malloc(memCount * sizeof(zes_mem_handle_t)); + (*h.zesDeviceEnumMemoryModules)(devices[i], &memCount, mems); + + for (m = 0; m < memCount; m++) + { + zes_mem_state_t state; + state.stype = ZES_STRUCTURE_TYPE_MEM_STATE; + state.pNext = NULL; + ret = (*h.zesMemoryGetState)(mems[m], &state); + if (ret != ZE_RESULT_SUCCESS) + { + snprintf(buf, buflen, "unable to get memory state: %d", ret); + resp->err = strdup(buf); + free(allDrivers); + free(devices); + free(mems); + return; + } + + resp->total += state.size; + resp->free += state.free; + } + + free(mems); + } + + free(devices); + } + + free(allDrivers); +} + +#endif // __APPLE__ diff --git a/gpu/gpu_info_oneapi.h b/gpu/gpu_info_oneapi.h new file mode 100644 index 00000000..9db9fae0 --- /dev/null +++ b/gpu/gpu_info_oneapi.h @@ -0,0 +1,211 @@ +#ifndef __APPLE__ +#ifndef __GPU_INFO_ONEAPI_H__ +#define __GPU_INFO_ONEAPI_H__ +#include "gpu_info.h" + +#define ZE_MAX_DEVICE_NAME 256 +#define ZE_MAX_DEVICE_UUID_SIZE 16 +#define ZES_STRING_PROPERTY_SIZE 64 +#define ZE_BIT(_i) (1 << _i) + +// Just enough typedef's to dlopen/dlsym for memory information +typedef enum ze_result_t +{ + ZE_RESULT_SUCCESS = 0, + // Other values omitted for now... +} ze_result_t; + +typedef uint8_t ze_bool_t; +typedef struct _zes_driver_handle_t *zes_driver_handle_t; +typedef struct _zes_device_handle_t *zes_device_handle_t; +typedef struct _zes_mem_handle_t *zes_mem_handle_t; + +typedef enum _ze_structure_type_t +{ + ZE_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff +} ze_structure_type_t; + +typedef enum _zes_structure_type_t +{ + ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES = 0x1, + ZES_STRUCTURE_TYPE_MEM_PROPERTIES = 0xb, + ZES_STRUCTURE_TYPE_MEM_STATE = 0x1e, + ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES = 0x2d, + ZES_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff +} zes_structure_type_t; + +typedef enum _zes_mem_type_t +{ + ZES_MEM_TYPE_FORCE_UINT32 = 0x7fffffff +} zes_mem_type_t; + +typedef enum _zes_mem_loc_t +{ + ZES_MEM_LOC_SYSTEM = 0, + ZES_MEM_LOC_DEVICE = 1, + ZES_MEM_LOC_FORCE_UINT32 = 0x7fffffff +} zes_mem_loc_t; + +typedef enum _zes_mem_health_t +{ + ZES_MEM_HEALTH_FORCE_UINT32 = 0x7fffffff +} zes_mem_health_t; + +typedef struct _ze_device_uuid_t +{ + uint8_t id[ZE_MAX_DEVICE_UUID_SIZE]; +} ze_device_uuid_t; + +typedef struct _zes_uuid_t +{ + uint8_t id[ZE_MAX_DEVICE_UUID_SIZE]; +} zes_uuid_t; + +typedef enum _ze_device_type_t +{ + ZE_DEVICE_TYPE_GPU = 1, + ZE_DEVICE_TYPE_CPU = 2, + ZE_DEVICE_TYPE_FPGA = 3, + ZE_DEVICE_TYPE_MCA = 4, + ZE_DEVICE_TYPE_VPU = 5, + ZE_DEVICE_TYPE_FORCE_UINT32 = 0x7fffffff +} ze_device_type_t; + +typedef enum _zes_device_type_t +{ + ZES_DEVICE_TYPE_GPU = 1, + ZES_DEVICE_TYPE_CPU = 2, + ZES_DEVICE_TYPE_FPGA = 3, + ZES_DEVICE_TYPE_MCA = 4, + ZES_DEVICE_TYPE_VPU = 5, + ZES_DEVICE_TYPE_FORCE_UINT32 = 0x7fffffff +} zes_device_type_t; + +typedef uint32_t ze_device_property_flags_t; +typedef enum _ze_device_property_flag_t +{ + ZE_DEVICE_PROPERTY_FLAG_INTEGRATED = ZE_BIT(0), + ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE = ZE_BIT(1), + ZE_DEVICE_PROPERTY_FLAG_ECC = ZE_BIT(2), + ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING = ZE_BIT(3), + ZE_DEVICE_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff +} ze_device_property_flag_t; + +typedef uint32_t zes_device_property_flags_t; +typedef enum _zes_device_property_flag_t +{ + ZES_DEVICE_PROPERTY_FLAG_INTEGRATED = ZE_BIT(0), + ZES_DEVICE_PROPERTY_FLAG_SUBDEVICE = ZE_BIT(1), + ZES_DEVICE_PROPERTY_FLAG_ECC = ZE_BIT(2), + ZES_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING = ZE_BIT(3), + ZES_DEVICE_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff +} zes_device_property_flag_t; + +typedef struct _ze_device_properties_t +{ + ze_structure_type_t stype; + void *pNext; + ze_device_type_t type; + uint32_t vendorId; + uint32_t deviceId; + ze_device_property_flags_t flags; + uint32_t subdeviceId; + uint32_t coreClockRate; + uint64_t maxMemAllocSize; + uint32_t maxHardwareContexts; + uint32_t maxCommandQueuePriority; + uint32_t numThreadsPerEU; + uint32_t physicalEUSimdWidth; + uint32_t numEUsPerSubslice; + uint32_t numSubslicesPerSlice; + uint32_t numSlices; + uint64_t timerResolution; + uint32_t timestampValidBits; + uint32_t kernelTimestampValidBits; + ze_device_uuid_t uuid; + char name[ZE_MAX_DEVICE_NAME]; +} ze_device_properties_t; + +typedef struct _zes_device_properties_t +{ + zes_structure_type_t stype; + void *pNext; + ze_device_properties_t core; + uint32_t numSubdevices; + char serialNumber[ZES_STRING_PROPERTY_SIZE]; + char boardNumber[ZES_STRING_PROPERTY_SIZE]; + char brandName[ZES_STRING_PROPERTY_SIZE]; + char modelName[ZES_STRING_PROPERTY_SIZE]; + char vendorName[ZES_STRING_PROPERTY_SIZE]; + char driverVersion[ZES_STRING_PROPERTY_SIZE]; +} zes_device_properties_t; + +typedef struct _zes_device_ext_properties_t +{ + zes_structure_type_t stype; + void *pNext; + zes_uuid_t uuid; + zes_device_type_t type; + zes_device_property_flags_t flags; +} zes_device_ext_properties_t; + +typedef struct _zes_mem_properties_t +{ + zes_structure_type_t stype; + void *pNext; + zes_mem_type_t type; + ze_bool_t onSubdevice; + uint32_t subdeviceId; + zes_mem_loc_t location; + uint64_t physicalSize; + int32_t busWidth; + int32_t numChannels; +} zes_mem_properties_t; + +typedef struct _zes_mem_state_t +{ + zes_structure_type_t stype; + const void *pNext; + zes_mem_health_t health; + uint64_t free; + uint64_t size; +} zes_mem_state_t; + +typedef struct oneapi_handle +{ + void *handle; + uint16_t verbose; + ze_result_t (*zesInit)(int); + ze_result_t (*zesDriverGet)(uint32_t *pCount, zes_driver_handle_t *phDrivers); + ze_result_t (*zesDeviceGet)(zes_driver_handle_t hDriver, uint32_t *pCount, + zes_device_handle_t *phDevices); + ze_result_t (*zesDeviceGetProperties)(zes_device_handle_t hDevice, + zes_device_properties_t *pProperties); + ze_result_t (*zesDeviceEnumMemoryModules)(zes_device_handle_t hDevice, + uint32_t *pCount, + zes_mem_handle_t *phMemory); + ze_result_t (*zesMemoryGetProperties)(zes_mem_handle_t hMemory, + zes_mem_properties_t *pProperties); + ze_result_t (*zesMemoryGetState)(zes_mem_handle_t hMemory, + zes_mem_state_t *pState); + +} oneapi_handle_t; + +typedef struct oneapi_init_resp +{ + char *err; // If err is non-null handle is invalid + int num_devices; + oneapi_handle_t oh; +} oneapi_init_resp_t; + +typedef struct oneapi_version_resp +{ + ze_result_t status; + char *str; // Contains version or error string if status != 0 +} oneapi_version_resp_t; + +void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp); +void oneapi_check_vram(oneapi_handle_t rh, mem_info_t *resp); + +#endif // __GPU_INFO_INTEL_H__ +#endif // __APPLE__ diff --git a/gpu/gpu_oneapi.go b/gpu/gpu_oneapi.go new file mode 100644 index 00000000..9864bde5 --- /dev/null +++ b/gpu/gpu_oneapi.go @@ -0,0 +1,21 @@ +//go:build linux || windows + +package gpu + +import ( + "log/slog" + "strings" +) + +func oneapiGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) { + ids := []string{} + for _, info := range gpuInfo { + if info.Library != "oneapi" { + // TODO shouldn't happen if things are wired correctly... + slog.Debug("oneapiGetVisibleDevicesEnv skipping over non-sycl device", "library", info.Library) + continue + } + ids = append(ids, info.ID) + } + return "ONEAPI_DEVICE_SELECTOR", "level_zero:" + strings.Join(ids, ",") +} diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index 63668bd2..37343a93 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -206,6 +206,36 @@ if [ -d "${CUDA_LIB_DIR}" ]; then fi +if [ -z "${ONEAPI_ROOT}" ]; then + # Try the default location in case it exists + ONEAPI_ROOT=/opt/intel/oneapi +fi + +if [ -d "${ONEAPI_ROOT}" ]; then + echo "OneAPI libraries detected - building dynamic OneAPI library" + init_vars + source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI + CC=icx + CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL=ON -DLLAMA_SYCL_F16=OFF" + BUILD_DIR="../build/linux/${ARCH}/oneapi" + EXTRA_LIBS="-fsycl -Wl,-rpath,${ONEAPI_ROOT}/compiler/latest/lib,-rpath,${ONEAPI_ROOT}/mkl/latest/lib,-rpath,${ONEAPI_ROOT}/tbb/latest/lib,-rpath,${ONEAPI_ROOT}/compiler/latest/opt/oclfpga/linux64/lib -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb" + DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it + build + + # copy oneAPI dependencies + for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do + cp "${dep}" "${BUILD_DIR}/bin/" + done + cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${BUILD_DIR}/bin/" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${BUILD_DIR}/bin/" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${BUILD_DIR}/bin/" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${BUILD_DIR}/bin/" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${BUILD_DIR}/bin/" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${BUILD_DIR}/bin/" + cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${BUILD_DIR}/bin/" + compress +fi + if [ -z "${ROCM_PATH}" ]; then # Try the default location in case it exists ROCM_PATH=/opt/rocm diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index 9bdfb9d3..553c358b 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -289,6 +289,49 @@ function build_cuda() { } } +function build_oneapi() { + if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${env:ONEAPI_ROOT}")) { + # Get oneAPI version + $script:ONEAPI_VERSION = icpx --version + $script:ONEAPI_VERSION = [regex]::Match($script:ONEAPI_VERSION, '(?<=oneAPI DPC\+\+/C\+\+ Compiler )(?\d+\.\d+\.\d+)').Value + if ($null -ne $script:ONEAPI_VERSION) { + $script:ONEAPI_VARIANT = "_v" + $script:ONEAPI_VERSION + } + init_vars + $script:buildDir = "../build/windows/${script:ARCH}/oneapi$script:ONEAPI_VARIANT" + $script:distDir ="$script:DIST_BASE\oneapi$script:ONEAPI_VARIANT" + $script:cmakeDefs += @( + "-G", "MinGW Makefiles", + "-DLLAMA_SYCL=ON", + "-DCMAKE_C_COMPILER=icx", + "-DCMAKE_CXX_COMPILER=icx", + "-DCMAKE_BUILD_TYPE=Release" + ) + + Write-Host "Building oneAPI" + build + # Ninja doesn't prefix with config name + if ($null -ne $script:DUMPBIN) { + & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | Select-String ".dll" + } + sign + install + + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:distDir}" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:distDir}" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:distDir}" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:distDir}" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:distDir}" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:distDir}" + cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:distDir}" + cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:distDir}" + cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:distDir}" + cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:distDir}" + } else { + Write-Host "Skipping oneAPI generation step" + } +} + function build_rocm() { if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) { $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename @@ -356,6 +399,7 @@ if ($($args.count) -eq 0) { build_cpu_avx build_cpu_avx2 build_cuda + build_oneapi build_rocm } From 485016bfbb30325c610a2e5071d282fab640ec28 Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Sun, 26 May 2024 11:46:00 -0700 Subject: [PATCH 02/13] Update install.sh --- scripts/install.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/install.sh b/scripts/install.sh index 20b0db60..9281947a 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -295,6 +295,7 @@ if ! lsmod | grep -q nvidia; then fi $SUDO modprobe nvidia + $SUDO modprobe nvidia_uvm fi From c79f8c9c3934ee8c9507ab89d4ec31c16bbd0fd8 Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Sun, 26 May 2024 14:57:17 -0700 Subject: [PATCH 03/13] Ensure `nvidia` and `nvidia_uvm` kernel modules are loaded in `install.sh` script and at startup (#4652) * ensure kernel modules are loaded in `install.sh` script and at startup * indentation * use `SUDO` variable * restart if nouveau is detected * consistent success message for AMD --- scripts/install.sh | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/scripts/install.sh b/scripts/install.sh index 9281947a..7d79c9ed 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -72,7 +72,7 @@ status "Installing ollama to $BINDIR..." $SUDO install -o0 -g0 -m755 -d $BINDIR $SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama -install_success() { +install_success() { status 'The Ollama API is now available at 127.0.0.1:11434.' status 'Install complete. Run "ollama" from the command line.' } @@ -139,12 +139,12 @@ fi check_gpu() { # Look for devices based on vendor ID for NVIDIA and AMD case $1 in - lspci) + lspci) case $2 in nvidia) available lspci && lspci -d '10de:' | grep -q 'NVIDIA' || return 1 ;; amdgpu) available lspci && lspci -d '1002:' | grep -q 'AMD' || return 1 ;; esac ;; - lshw) + lshw) case $2 in nvidia) available lshw && $SUDO lshw -c display -numeric | grep -q 'vendor: .* \[10DE\]' || return 1 ;; amdgpu) available lshw && $SUDO lshw -c display -numeric | grep -q 'vendor: .* \[1002\]' || return 1 ;; @@ -153,11 +153,6 @@ check_gpu() { esac } -if check_gpu nvidia-smi; then - status "NVIDIA GPU installed." - exit 0 -fi - if ! check_gpu lspci nvidia && ! check_gpu lshw nvidia && ! check_gpu lspci amdgpu && ! check_gpu lshw amdgpu; then install_success warning "No NVIDIA/AMD GPU detected. Ollama will run in CPU-only mode." @@ -181,7 +176,7 @@ if check_gpu lspci amdgpu || check_gpu lshw amdgpu; then curl --fail --show-error --location --progress-bar "https://ollama.com/download/ollama-linux-amd64-rocm.tgz${VER_PARAM}" \ | $SUDO tar zx --owner ollama --group ollama -C /usr/share/ollama/lib/rocm . install_success - status "AMD GPU dependencies installed." + status "AMD GPU ready." exit 0 fi @@ -288,15 +283,25 @@ if ! lsmod | grep -q nvidia; then if [ -n "$NVIDIA_CUDA_VERSION" ]; then $SUDO dkms install $NVIDIA_CUDA_VERSION fi - - if lsmod | grep -q nouveau; then - status 'Reboot to complete NVIDIA CUDA driver install.' - exit 0 - fi - - $SUDO modprobe nvidia - $SUDO modprobe nvidia_uvm fi +$SUDO modprobe nvidia +$SUDO modprobe nvidia_uvm -status "NVIDIA CUDA drivers installed." +# make sure the NVIDIA modules are loaded on boot with nvidia-persistenced +if command -v nvidia-persistenced > /dev/null 2>&1; then + $SUDO touch /etc/modules-load.d/nvidia.conf + MODULES="nvidia nvidia-uvm" + for MODULE in $MODULES; do + if ! grep -qxF "$MODULE" /etc/modules-load.d/nvidia.conf; then + echo "$MODULE" | sudo tee -a /etc/modules-load.d/nvidia.conf > /dev/null + fi + done +fi + +if lsmod | grep -q nouveau; then + status 'Reboot to complete NVIDIA CUDA driver install.' + exit 0 +fi + +status "NVIDIA GPU ready." From 8a8e7afa968f9c241a6bf85e2e9f711e8be41c7c Mon Sep 17 00:00:00 2001 From: Rayan Mostovoi <135296650+Bubullzz@users.noreply.github.com> Date: Tue, 28 May 2024 02:19:20 +0200 Subject: [PATCH 04/13] small fix on examples/python-simplechat/client.py to actually get a streamed response and get tokens printed as we receive it (#4671) --- examples/python-simplechat/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/python-simplechat/client.py b/examples/python-simplechat/client.py index 9ae99fb7..f82a16b3 100644 --- a/examples/python-simplechat/client.py +++ b/examples/python-simplechat/client.py @@ -9,6 +9,7 @@ def chat(messages): r = requests.post( "http://0.0.0.0:11434/api/chat", json={"model": model, "messages": messages, "stream": True}, + stream=True ) r.raise_for_status() output = "" From 7487229c343e5177356a40ddc5061a9fc6dfaded Mon Sep 17 00:00:00 2001 From: Lei Jitang Date: Tue, 28 May 2024 08:21:10 +0800 Subject: [PATCH 05/13] llm/server.go: Fix 2 minor typos (#4661) Signed-off-by: Lei Jitang --- llm/server.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llm/server.go b/llm/server.go index f670931f..cf75de90 100644 --- a/llm/server.go +++ b/llm/server.go @@ -24,9 +24,9 @@ import ( "golang.org/x/sync/semaphore" "github.com/ollama/ollama/api" + "github.com/ollama/ollama/envconfig" "github.com/ollama/ollama/format" "github.com/ollama/ollama/gpu" - "github.com/ollama/ollama/envconfig" ) type LlamaServer interface { @@ -243,7 +243,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr gpuCount = 0 } - // Find an availableServers port, retry on each iterration in case the failure was a port conflict race + // Find an availableServers port, retry on each iteration in case the failure was a port conflict race port := 0 if a, err := net.ResolveTCPAddr("tcp", "localhost:0"); err == nil { var l *net.TCPListener @@ -756,7 +756,7 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu var c completion if err := json.Unmarshal(evt, &c); err != nil { - return fmt.Errorf("error unmarshaling llm prediction response: %v", err) + return fmt.Errorf("error unmarshalling llm prediction response: %v", err) } switch { From 6f43898b17463382c23081a581ea6a7275386132 Mon Sep 17 00:00:00 2001 From: Orfeo Ciano Date: Tue, 28 May 2024 01:22:01 +0100 Subject: [PATCH 06/13] Adds olpaka flutter client (#4647) * Adds olpaka flutter client * Update README.md --------- Co-authored-by: Jeffrey Morgan --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 76ade550..40c53399 100644 --- a/README.md +++ b/README.md @@ -283,6 +283,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [Ollama RAG Chatbot](https://github.com/datvodinh/rag-chatbot.git) (Local Chat with multiple PDFs using Ollama and RAG) - [BrainSoup](https://www.nurgo-software.com/products/brainsoup) (Flexible native client with RAG & multi-agent automation) - [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends) +- [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama) ### Terminal From 9db0996ed458e64ee6814d3c276bd5cb57c208ca Mon Sep 17 00:00:00 2001 From: Tai Date: Tue, 28 May 2024 10:58:26 +0800 Subject: [PATCH 07/13] Add OllamaSpring Project to Readme (#4672) * Add OllamaSpring Project to Readme * Update README.md --------- Co-authored-by: Jeffrey Morgan --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 40c53399..19c8adb1 100644 --- a/README.md +++ b/README.md @@ -284,6 +284,7 @@ See the [API documentation](./docs/api.md) for all endpoints. - [BrainSoup](https://www.nurgo-software.com/products/brainsoup) (Flexible native client with RAG & multi-agent automation) - [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends) - [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama) +- [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS) ### Terminal From 92c81e8117c5b2b81467798fe0d1187927d9002e Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 28 May 2024 08:56:18 -0700 Subject: [PATCH 08/13] Give the final model loading more time On some systems, 1 minute isn't sufficient to finish the load after it hits 100% This creates 2 distinct timers, although they're both set to the same value for now so we can refine the timeouts further. --- llm/server.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/llm/server.go b/llm/server.go index cf75de90..462f8484 100644 --- a/llm/server.go +++ b/llm/server.go @@ -519,11 +519,13 @@ func (s *llmServer) Ping(ctx context.Context) error { func (s *llmServer) WaitUntilRunning(ctx context.Context) error { start := time.Now() - stallDuration := 60 * time.Second - stallTimer := time.Now().Add(stallDuration) // give up if we stall for + stallDuration := 5 * time.Minute // If no progress happens + finalLoadDuration := 5 * time.Minute // After we hit 100%, give the runner more time to come online + stallTimer := time.Now().Add(stallDuration) // give up if we stall slog.Info("waiting for llama runner to start responding") var lastStatus ServerStatus = -1 + fullyLoaded := false for { select { @@ -572,6 +574,10 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error { if priorProgress != s.loadProgress { slog.Debug(fmt.Sprintf("model load progress %0.2f", s.loadProgress)) stallTimer = time.Now().Add(stallDuration) + } else if !fullyLoaded && int(s.loadProgress*100.0) >= 100 { + slog.Debug("model load completed, waiting for server to become available", "status", status.ToString()) + stallTimer = time.Now().Add(finalLoadDuration) + fullyLoaded = true } time.Sleep(time.Millisecond * 250) continue From b7d316d98d6f6393c58eb8235c07d896d7a479ea Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Tue, 28 May 2024 09:59:36 -0700 Subject: [PATCH 09/13] fix nvidia detection in install script (#4683) --- scripts/install.sh | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/scripts/install.sh b/scripts/install.sh index 7d79c9ed..04b18796 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -153,6 +153,11 @@ check_gpu() { esac } +if check_gpu nvidia-smi; then + status "NVIDIA GPU installed." + exit 0 +fi + if ! check_gpu lspci nvidia && ! check_gpu lshw nvidia && ! check_gpu lspci amdgpu && ! check_gpu lshw amdgpu; then install_success warning "No NVIDIA/AMD GPU detected. Ollama will run in CPU-only mode." @@ -269,7 +274,7 @@ if ! check_gpu nvidia-smi || [ -z "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\ esac fi -if ! lsmod | grep -q nvidia; then +if ! lsmod | grep -q nvidia || ! lsmod | grep -q nvidia_uvm; then KERNEL_RELEASE="$(uname -r)" case $OS_NAME in rocky) $SUDO $PACKAGE_MANAGER -y install kernel-devel kernel-headers ;; @@ -283,10 +288,15 @@ if ! lsmod | grep -q nvidia; then if [ -n "$NVIDIA_CUDA_VERSION" ]; then $SUDO dkms install $NVIDIA_CUDA_VERSION fi -fi -$SUDO modprobe nvidia -$SUDO modprobe nvidia_uvm + if lsmod | grep -q nouveau; then + status 'Reboot to complete NVIDIA CUDA driver install.' + exit 0 + fi + + $SUDO modprobe nvidia + $SUDO modprobe nvidia_uvm +fi # make sure the NVIDIA modules are loaded on boot with nvidia-persistenced if command -v nvidia-persistenced > /dev/null 2>&1; then @@ -299,9 +309,4 @@ if command -v nvidia-persistenced > /dev/null 2>&1; then done fi -if lsmod | grep -q nouveau; then - status 'Reboot to complete NVIDIA CUDA driver install.' - exit 0 -fi - status "NVIDIA GPU ready." From ad897080a299bf86aee16b498edb5ddb250edd35 Mon Sep 17 00:00:00 2001 From: Josh <76125168+joshyan1@users.noreply.github.com> Date: Tue, 28 May 2024 12:04:03 -0700 Subject: [PATCH 10/13] working on integration of multi-byte and multi-width runes (#4549) * integrated runewidth for display management - fixed cursor movement for mutli-width char * updated input and deletion of multi-byte chars * fixed line history with some exceptions * improved insert and add * fixed issues with moving across lines * end of line extra space tracking' * saved changes * fixed end of line issues with empty spaces * worked some more * worked on end of line * fixed failed test * fixed minor inserting bug * fixed movement hotkeys * adjusted hotkeys * removed comments * Update readline/buffer.go Co-authored-by: Bruce MacDonald * Update readline/buffer.go Co-authored-by: Bruce MacDonald * Update readline/buffer.go Co-authored-by: Bruce MacDonald * Update readline/buffer.go Co-authored-by: Bruce MacDonald * Update readline/buffer.go Co-authored-by: Bruce MacDonald * Update readline/buffer.go Co-authored-by: Bruce MacDonald * Update readline/buffer.go Co-authored-by: Bruce MacDonald * Update readline/buffer.go Co-authored-by: Bruce MacDonald * Update readline/buffer.go Co-authored-by: Bruce MacDonald * Update readline/buffer.go Co-authored-by: Bruce MacDonald * Update readline/buffer.go Co-authored-by: Bruce MacDonald * Update readline/buffer.go Co-authored-by: Bruce MacDonald * deleted comments and duplicate code * removed duplicate code * added comments, refactored add function to use addChar * added helper to retrieve lineSpacing, renamed lineFlags for clarity * fixed remove() --------- Co-authored-by: Bruce MacDonald --- readline/buffer.go | 387 +++++++++++++++++++++++++++++++++---------- readline/readline.go | 4 +- 2 files changed, 305 insertions(+), 86 deletions(-) diff --git a/readline/buffer.go b/readline/buffer.go index 52e8a56c..2c3bfec9 100644 --- a/readline/buffer.go +++ b/readline/buffer.go @@ -5,16 +5,20 @@ import ( "os" "github.com/emirpasic/gods/lists/arraylist" + "github.com/mattn/go-runewidth" "golang.org/x/term" ) type Buffer struct { - Pos int - Buf *arraylist.List - Prompt *Prompt - LineWidth int - Width int - Height int + DisplayPos int + Pos int + Buf *arraylist.List + //LineHasSpace is an arraylist of bools to keep track of whether a line has a space at the end + LineHasSpace *arraylist.List + Prompt *Prompt + LineWidth int + Width int + Height int } func NewBuffer(prompt *Prompt) (*Buffer, error) { @@ -27,25 +31,57 @@ func NewBuffer(prompt *Prompt) (*Buffer, error) { lwidth := width - len(prompt.prompt()) b := &Buffer{ - Pos: 0, - Buf: arraylist.New(), - Prompt: prompt, - Width: width, - Height: height, - LineWidth: lwidth, + DisplayPos: 0, + Pos: 0, + Buf: arraylist.New(), + LineHasSpace: arraylist.New(), + Prompt: prompt, + Width: width, + Height: height, + LineWidth: lwidth, } return b, nil } +func (b *Buffer) GetLineSpacing(line int) bool { + hasSpace, _ := b.LineHasSpace.Get(line) + + if hasSpace == nil { + return false + } + + return hasSpace.(bool) + +} + func (b *Buffer) MoveLeft() { if b.Pos > 0 { - if b.Pos%b.LineWidth == 0 { - fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width)) - } else { - fmt.Print(CursorLeft) + //asserts that we retrieve a rune + if e, ok := b.Buf.Get(b.Pos - 1); ok { + if r, ok := e.(rune); ok { + rLength := runewidth.RuneWidth(r) + + if b.DisplayPos%b.LineWidth == 0 { + fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width)) + if rLength == 2 { + fmt.Print(CursorLeft) + } + + line := b.DisplayPos/b.LineWidth - 1 + hasSpace := b.GetLineSpacing(line) + if hasSpace { + b.DisplayPos -= 1 + fmt.Print(CursorLeft) + } + } else { + fmt.Print(cursorLeftN(rLength)) + } + + b.Pos -= 1 + b.DisplayPos -= rLength + } } - b.Pos -= 1 } } @@ -71,18 +107,35 @@ func (b *Buffer) MoveLeftWord() { } func (b *Buffer) MoveRight() { - if b.Pos < b.Size() { - b.Pos += 1 - if b.Pos%b.LineWidth == 0 { - fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt()))) - } else { - fmt.Print(CursorRight) + if b.Pos < b.Buf.Size() { + if e, ok := b.Buf.Get(b.Pos); ok { + if r, ok := e.(rune); ok { + rLength := runewidth.RuneWidth(r) + b.Pos += 1 + hasSpace := b.GetLineSpacing(b.DisplayPos / b.LineWidth) + b.DisplayPos += rLength + + if b.DisplayPos%b.LineWidth == 0 { + fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt()))) + + } else if (b.DisplayPos-rLength)%b.LineWidth == b.LineWidth-1 && hasSpace { + fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())+rLength)) + b.DisplayPos += 1 + + } else if b.LineHasSpace.Size() > 0 && b.DisplayPos%b.LineWidth == b.LineWidth-1 && hasSpace { + fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt()))) + b.DisplayPos += 1 + + } else { + fmt.Print(cursorRightN(rLength)) + } + } } } } func (b *Buffer) MoveRightWord() { - if b.Pos < b.Size() { + if b.Pos < b.Buf.Size() { for { b.MoveRight() v, _ := b.Buf.Get(b.Pos) @@ -90,7 +143,7 @@ func (b *Buffer) MoveRightWord() { break } - if b.Pos == b.Size() { + if b.Pos == b.Buf.Size() { break } } @@ -99,7 +152,7 @@ func (b *Buffer) MoveRightWord() { func (b *Buffer) MoveToStart() { if b.Pos > 0 { - currLine := b.Pos / b.LineWidth + currLine := b.DisplayPos / b.LineWidth if currLine > 0 { for cnt := 0; cnt < currLine; cnt++ { fmt.Print(CursorUp) @@ -107,81 +160,195 @@ func (b *Buffer) MoveToStart() { } fmt.Printf(CursorBOL + cursorRightN(len(b.Prompt.prompt()))) b.Pos = 0 + b.DisplayPos = 0 } } func (b *Buffer) MoveToEnd() { - if b.Pos < b.Size() { - currLine := b.Pos / b.LineWidth - totalLines := b.Size() / b.LineWidth + if b.Pos < b.Buf.Size() { + currLine := b.DisplayPos / b.LineWidth + totalLines := b.DisplaySize() / b.LineWidth if currLine < totalLines { for cnt := 0; cnt < totalLines-currLine; cnt++ { fmt.Print(CursorDown) } - remainder := b.Size() % b.LineWidth + remainder := b.DisplaySize() % b.LineWidth fmt.Printf(CursorBOL + cursorRightN(len(b.Prompt.prompt())+remainder)) } else { - fmt.Print(cursorRightN(b.Size() - b.Pos)) + fmt.Print(cursorRightN(b.DisplaySize() - b.DisplayPos)) } - b.Pos = b.Size() + b.Pos = b.Buf.Size() + b.DisplayPos = b.DisplaySize() } } -func (b *Buffer) Size() int { - return b.Buf.Size() +func (b *Buffer) DisplaySize() int { + sum := 0 + for i := 0; i < b.Buf.Size(); i++ { + if e, ok := b.Buf.Get(i); ok { + if r, ok := e.(rune); ok { + sum += runewidth.RuneWidth(r) + } + } + } + + return sum } func (b *Buffer) Add(r rune) { + if b.Pos == b.Buf.Size() { - fmt.Printf("%c", r) - b.Buf.Add(r) - b.Pos += 1 - if b.Pos > 0 && b.Pos%b.LineWidth == 0 { + b.AddChar(r, false) + } else { + b.AddChar(r, true) + } +} + +func (b *Buffer) AddChar(r rune, insert bool) { + rLength := runewidth.RuneWidth(r) + b.DisplayPos += rLength + + if b.Pos > 0 { + + if b.DisplayPos%b.LineWidth == 0 { + fmt.Printf("%c", r) fmt.Printf("\n%s", b.Prompt.AltPrompt) + + if insert { + b.LineHasSpace.Set(b.DisplayPos/b.LineWidth-1, false) + } else { + b.LineHasSpace.Add(false) + } + + // this case occurs when a double-width rune crosses the line boundary + } else if b.DisplayPos%b.LineWidth < (b.DisplayPos-rLength)%b.LineWidth { + if insert { + fmt.Print(ClearToEOL) + } + fmt.Printf("\n%s", b.Prompt.AltPrompt) + b.DisplayPos += 1 + fmt.Printf("%c", r) + + if insert { + b.LineHasSpace.Set(b.DisplayPos/b.LineWidth-1, true) + } else { + b.LineHasSpace.Add(true) + } + + } else { + fmt.Printf("%c", r) } } else { fmt.Printf("%c", r) + } + + if insert { b.Buf.Insert(b.Pos, r) - b.Pos += 1 - if b.Pos > 0 && b.Pos%b.LineWidth == 0 { - fmt.Printf("\n%s", b.Prompt.AltPrompt) - } + } else { + b.Buf.Add(r) + } + + b.Pos += 1 + + if insert { b.drawRemaining() } } +func (b *Buffer) countRemainingLineWidth(place int) int { + var sum int + counter := -1 + var prevLen int + + for place <= b.LineWidth { + counter += 1 + sum += prevLen + if e, ok := b.Buf.Get(b.Pos + counter); ok { + if r, ok := e.(rune); ok { + place += runewidth.RuneWidth(r) + prevLen = len(string(r)) + } + } else { + break + } + } + + return sum +} + func (b *Buffer) drawRemaining() { var place int remainingText := b.StringN(b.Pos) if b.Pos > 0 { - place = b.Pos % b.LineWidth + place = b.DisplayPos % b.LineWidth } fmt.Print(CursorHide) // render the rest of the current line - currLine := remainingText[:min(b.LineWidth-place, len(remainingText))] + currLineLength := b.countRemainingLineWidth(place) + + currLine := remainingText[:min(currLineLength, len(remainingText))] + currLineSpace := runewidth.StringWidth(currLine) + remLength := runewidth.StringWidth(remainingText) + if len(currLine) > 0 { fmt.Printf(ClearToEOL + currLine) - fmt.Print(cursorLeftN(len(currLine))) + fmt.Print(cursorLeftN(currLineSpace)) } else { fmt.Print(ClearToEOL) } + if currLineSpace != b.LineWidth-place && currLineSpace != remLength { + b.LineHasSpace.Set(b.DisplayPos/b.LineWidth, true) + } else if currLineSpace != b.LineWidth-place { + b.LineHasSpace.Remove(b.DisplayPos / b.LineWidth) + } else { + b.LineHasSpace.Set(b.DisplayPos/b.LineWidth, false) + } + + if (b.DisplayPos+currLineSpace)%b.LineWidth == 0 && currLine == remainingText { + fmt.Print(cursorRightN(currLineSpace)) + fmt.Printf("\n%s", b.Prompt.AltPrompt) + fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width-currLineSpace)) + } + // render the other lines - if len(remainingText) > len(currLine) { - remaining := []rune(remainingText[len(currLine):]) + if remLength > currLineSpace { + remaining := (remainingText[len(currLine):]) var totalLines int - for i, c := range remaining { - if i%b.LineWidth == 0 { + var displayLength int + var lineLength int = currLineSpace + + for _, c := range remaining { + if displayLength == 0 || (displayLength+runewidth.RuneWidth(c))%b.LineWidth < displayLength%b.LineWidth { fmt.Printf("\n%s", b.Prompt.AltPrompt) totalLines += 1 + + if displayLength != 0 { + if lineLength == b.LineWidth { + b.LineHasSpace.Set(b.DisplayPos/b.LineWidth+totalLines-1, false) + } else { + b.LineHasSpace.Set(b.DisplayPos/b.LineWidth+totalLines-1, true) + } + } + + lineLength = 0 } + + displayLength += runewidth.RuneWidth(c) + lineLength += runewidth.RuneWidth(c) fmt.Printf("%c", c) } fmt.Print(ClearToEOL) fmt.Print(cursorUpN(totalLines)) - fmt.Printf(CursorBOL + cursorRightN(b.Width-len(currLine))) + fmt.Printf(CursorBOL + cursorRightN(b.Width-currLineSpace)) + + hasSpace := b.GetLineSpacing(b.DisplayPos / b.LineWidth) + + if hasSpace && b.DisplayPos%b.LineWidth != b.LineWidth-1 { + fmt.Print(CursorLeft) + } } fmt.Print(CursorShow) @@ -189,46 +356,84 @@ func (b *Buffer) drawRemaining() { func (b *Buffer) Remove() { if b.Buf.Size() > 0 && b.Pos > 0 { - if b.Pos%b.LineWidth == 0 { - // if the user backspaces over the word boundary, do this magic to clear the line - // and move to the end of the previous line - fmt.Printf(CursorBOL + ClearToEOL) - fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width) + " " + CursorLeft) - } else { - fmt.Printf(CursorLeft + " " + CursorLeft) - } - var eraseExtraLine bool - if (b.Size()-1)%b.LineWidth == 0 { - eraseExtraLine = true - } + if e, ok := b.Buf.Get(b.Pos - 1); ok { + if r, ok := e.(rune); ok { + rLength := runewidth.RuneWidth(r) + hasSpace := b.GetLineSpacing(b.DisplayPos/b.LineWidth - 1) - b.Pos -= 1 - b.Buf.Remove(b.Pos) + if b.DisplayPos%b.LineWidth == 0 { + // if the user backspaces over the word boundary, do this magic to clear the line + // and move to the end of the previous line + fmt.Printf(CursorBOL + ClearToEOL) + fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width)) - if b.Pos < b.Size() { - b.drawRemaining() - // this erases a line which is left over when backspacing in the middle of a line and there - // are trailing characters which go over the line width boundary - if eraseExtraLine { - remainingLines := (b.Size() - b.Pos) / b.LineWidth - fmt.Printf(cursorDownN(remainingLines+1) + CursorBOL + ClearToEOL) - place := b.Pos % b.LineWidth - fmt.Printf(cursorUpN(remainingLines+1) + cursorRightN(place+len(b.Prompt.prompt()))) + if b.DisplaySize()%b.LineWidth < (b.DisplaySize()-rLength)%b.LineWidth { + b.LineHasSpace.Remove(b.DisplayPos/b.LineWidth - 1) + } + + if hasSpace { + b.DisplayPos -= 1 + fmt.Print(CursorLeft) + } + + if rLength == 2 { + fmt.Print(CursorLeft + " " + cursorLeftN(2)) + } else { + fmt.Print(" " + CursorLeft) + } + + } else if (b.DisplayPos-rLength)%b.LineWidth == 0 && hasSpace { + fmt.Printf(CursorBOL + ClearToEOL) + fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width)) + + if b.Pos == b.Buf.Size() { + b.LineHasSpace.Remove(b.DisplayPos/b.LineWidth - 1) + } + b.DisplayPos -= 1 + + } else { + fmt.Print(cursorLeftN(rLength)) + for i := 0; i < rLength; i++ { + fmt.Print(" ") + } + fmt.Print(cursorLeftN(rLength)) + } + + var eraseExtraLine bool + if (b.DisplaySize()-1)%b.LineWidth == 0 || (rLength == 2 && ((b.DisplaySize()-2)%b.LineWidth == 0)) || b.DisplaySize()%b.LineWidth == 0 { + eraseExtraLine = true + } + + b.Pos -= 1 + b.DisplayPos -= rLength + b.Buf.Remove(b.Pos) + + if b.Pos < b.Buf.Size() { + b.drawRemaining() + // this erases a line which is left over when backspacing in the middle of a line and there + // are trailing characters which go over the line width boundary + if eraseExtraLine { + remainingLines := (b.DisplaySize() - b.DisplayPos) / b.LineWidth + fmt.Printf(cursorDownN(remainingLines+1) + CursorBOL + ClearToEOL) + place := b.DisplayPos % b.LineWidth + fmt.Printf(cursorUpN(remainingLines+1) + cursorRightN(place+len(b.Prompt.prompt()))) + } + } } } } } func (b *Buffer) Delete() { - if b.Size() > 0 && b.Pos < b.Size() { + if b.Buf.Size() > 0 && b.Pos < b.Buf.Size() { b.Buf.Remove(b.Pos) b.drawRemaining() - if b.Size()%b.LineWidth == 0 { - if b.Pos != b.Size() { - remainingLines := (b.Size() - b.Pos) / b.LineWidth + if b.DisplaySize()%b.LineWidth == 0 { + if b.DisplayPos != b.DisplaySize() { + remainingLines := (b.DisplaySize() - b.DisplayPos) / b.LineWidth fmt.Printf(cursorDownN(remainingLines) + CursorBOL + ClearToEOL) - place := b.Pos % b.LineWidth + place := b.DisplayPos % b.LineWidth fmt.Printf(cursorUpN(remainingLines) + cursorRightN(place+len(b.Prompt.prompt()))) } } @@ -244,8 +449,8 @@ func (b *Buffer) DeleteBefore() { } func (b *Buffer) DeleteRemaining() { - if b.Size() > 0 && b.Pos < b.Size() { - charsToDel := b.Size() - b.Pos + if b.DisplaySize() > 0 && b.Pos < b.DisplaySize() { + charsToDel := b.Buf.Size() - b.Pos for cnt := 0; cnt < charsToDel; cnt++ { b.Delete() } @@ -281,8 +486,10 @@ func (b *Buffer) ClearScreen() { ph := b.Prompt.placeholder() fmt.Printf(ColorGrey + ph + cursorLeftN(len(ph)) + ColorDefault) } else { - currPos := b.Pos + currPos := b.DisplayPos + currIndex := b.Pos b.Pos = 0 + b.DisplayPos = 0 b.drawRemaining() fmt.Printf(CursorReset + cursorRightN(len(b.Prompt.prompt()))) if currPos > 0 { @@ -300,7 +507,8 @@ func (b *Buffer) ClearScreen() { fmt.Printf(CursorBOL + b.Prompt.AltPrompt) } } - b.Pos = currPos + b.Pos = currIndex + b.DisplayPos = currPos } } @@ -309,9 +517,20 @@ func (b *Buffer) IsEmpty() bool { } func (b *Buffer) Replace(r []rune) { + b.DisplayPos = 0 b.Pos = 0 + lineNums := b.DisplaySize() / b.LineWidth + b.Buf.Clear() - fmt.Printf(ClearLine + CursorBOL + b.Prompt.prompt()) + + fmt.Printf(CursorBOL + ClearToEOL) + + for i := 0; i < lineNums; i++ { + fmt.Print(CursorUp + CursorBOL + ClearToEOL) + } + + fmt.Printf(CursorBOL + b.Prompt.prompt()) + for _, c := range r { b.Add(c) } @@ -328,7 +547,7 @@ func (b *Buffer) StringN(n int) string { func (b *Buffer) StringNM(n, m int) string { var s string if m == 0 { - m = b.Size() + m = b.Buf.Size() } for cnt := n; cnt < m; cnt++ { c, _ := b.Buf.Get(cnt) diff --git a/readline/readline.go b/readline/readline.go index 6fa45391..ee461ae4 100644 --- a/readline/readline.go +++ b/readline/readline.go @@ -150,7 +150,7 @@ func (i *Instance) Readline() (string, error) { i.Pasting = false } case KeyDel: - if buf.Size() > 0 { + if buf.DisplaySize() > 0 { buf.Delete() } metaDel = true @@ -202,7 +202,7 @@ func (i *Instance) Readline() (string, error) { buf.Add(' ') } case CharDelete: - if buf.Size() > 0 { + if buf.DisplaySize() > 0 { buf.Delete() } else { return "", io.EOF From 6d423b383b340b4fd5e99ab03c1e22ad8a59c56d Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Tue, 28 May 2024 14:41:50 -0700 Subject: [PATCH 11/13] Improve install experience on WSL2 and Linux (#4653) --- scripts/install.sh | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/scripts/install.sh b/scripts/install.sh index 04b18796..c30de483 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -35,7 +35,7 @@ esac KERN=$(uname -r) case "$KERN" in - *icrosoft*WSL2 | *icrosoft*wsl2) ;; + *icrosoft*WSL2 | *icrosoft*wsl2) IS_WSL2=true;; *icrosoft) error "Microsoft WSL1 is not currently supported. Please upgrade to WSL2 with 'wsl --set-version 2'" ;; *) ;; esac @@ -131,6 +131,17 @@ if available systemctl; then configure_systemd fi +# WSL2 only supports GPUs via nvidia passthrough +# so check for nvidia-smi to determine if GPU is available +if [ "$IS_WSL2" = true ]; then + if available nvidia-smi && [ -z "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")" ]; then + status "Nvidia GPU detected." + fi + install_success + exit 0 +fi + +# Install GPU dependencies on Linux if ! available lspci && ! available lshw; then warning "Unable to detect NVIDIA/AMD GPU. Install lspci or lshw to automatically detect and install GPU dependencies." exit 0 @@ -310,3 +321,4 @@ if command -v nvidia-persistenced > /dev/null 2>&1; then fi status "NVIDIA GPU ready." +install_success From 45cbfc5aee6af8e53b99b38e6aa49152df10db81 Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Tue, 28 May 2024 14:49:46 -0700 Subject: [PATCH 12/13] fix wsl2 status check for nvidia cards (#4689) --- scripts/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install.sh b/scripts/install.sh index c30de483..6ce288ac 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -134,7 +134,7 @@ fi # WSL2 only supports GPUs via nvidia passthrough # so check for nvidia-smi to determine if GPU is available if [ "$IS_WSL2" = true ]; then - if available nvidia-smi && [ -z "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")" ]; then + if available nvidia-smi && [ -n "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")" ]; then status "Nvidia GPU detected." fi install_success From 1f5008544bad78089e55f6f2694e579bb7ba12ab Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Tue, 28 May 2024 15:01:22 -0700 Subject: [PATCH 13/13] Update install.sh --- scripts/install.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/install.sh b/scripts/install.sh index 6ce288ac..a71d921d 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -33,6 +33,8 @@ case "$ARCH" in *) error "Unsupported architecture: $ARCH" ;; esac +IS_WSL2=false + KERN=$(uname -r) case "$KERN" in *icrosoft*WSL2 | *icrosoft*wsl2) IS_WSL2=true;;