mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-22 06:43:57 +00:00
Compare commits
6 Commits
v0.2.3-alp
...
v0.2.5
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c7e2f8889d | ||
|
|
cb02c084eb | ||
|
|
706449c10d | ||
|
|
f7ee012300 | ||
|
|
90807b2ad0 | ||
|
|
1ed0aa8fea |
@@ -23,7 +23,7 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
UpdateCheckURLBase = "https://ollama.com/api/update"
|
||||
UpdateCheckURLBase = "https://api.github.com/repos/likelovewant/ollama-for-amd/releases/latest"
|
||||
UpdateDownloaded = false
|
||||
UpdateCheckInterval = 60 * 60 * time.Second
|
||||
)
|
||||
|
||||
@@ -23,7 +23,7 @@ const (
|
||||
var (
|
||||
// Used to validate if the given ROCm lib is usable
|
||||
ROCmLibGlobs = []string{"hipblas.dll", "rocblas"} // This is not sufficient to discern v5 vs v6
|
||||
RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\6.1\\bin"} // TODO glob?
|
||||
RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\5.7\\bin"} // TODO glob?
|
||||
)
|
||||
|
||||
func AMDGetGPUInfo() []RocmGPUInfo {
|
||||
|
||||
@@ -6,11 +6,12 @@ function amdGPUs {
|
||||
if ($env:AMDGPU_TARGETS) {
|
||||
return $env:AMDGPU_TARGETS
|
||||
}
|
||||
# Current supported rocblas list from ROCm v6.1.2 on windows
|
||||
# TODO - load from some common data file for linux + windows build consistency
|
||||
$GPU_LIST = @(
|
||||
"gfx803"
|
||||
"gfx900"
|
||||
"gfx902"
|
||||
"gfx904"
|
||||
"gfx90c:xnack-"
|
||||
"gfx906:xnack-"
|
||||
"gfx908:xnack-"
|
||||
@@ -19,7 +20,7 @@ function amdGPUs {
|
||||
"gfx940"
|
||||
"gfx941"
|
||||
"gfx942"
|
||||
"gfx1010:xnack-"
|
||||
"gfx1010"
|
||||
"gfx1011"
|
||||
"gfx1012:xnack-"
|
||||
"gfx1030"
|
||||
@@ -50,8 +51,8 @@ function init_vars {
|
||||
}
|
||||
$script:cmakeDefs = @(
|
||||
"-DBUILD_SHARED_LIBS=on",
|
||||
"-DGGML_NATIVE=off",
|
||||
"-DGGML_OPENMP=off"
|
||||
"-DLLAMA_NATIVE=off",
|
||||
"-DLLAMA_OPENMP=off"
|
||||
)
|
||||
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
|
||||
$script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
|
||||
@@ -193,9 +194,9 @@ function cleanup {
|
||||
}
|
||||
|
||||
|
||||
# -DGGML_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
|
||||
# -DGGML_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
|
||||
# -DGGML_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
|
||||
# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
|
||||
# -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
|
||||
# -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
|
||||
|
||||
|
||||
function build_static() {
|
||||
@@ -215,13 +216,13 @@ function build_static() {
|
||||
"-DCMAKE_C_COMPILER=gcc.exe",
|
||||
"-DCMAKE_CXX_COMPILER=g++.exe",
|
||||
"-DBUILD_SHARED_LIBS=off",
|
||||
"-DGGML_NATIVE=off",
|
||||
"-DGGML_AVX=off",
|
||||
"-DGGML_AVX2=off",
|
||||
"-DGGML_AVX512=off",
|
||||
"-DGGML_F16C=off",
|
||||
"-DGGML_FMA=off",
|
||||
"-DGGML_OPENMP=off")
|
||||
"-DLLAMA_NATIVE=off",
|
||||
"-DLLAMA_AVX=off",
|
||||
"-DLLAMA_AVX2=off",
|
||||
"-DLLAMA_AVX512=off",
|
||||
"-DLLAMA_F16C=off",
|
||||
"-DLLAMA_FMA=off",
|
||||
"-DLLAMA_OPENMP=off")
|
||||
$script:buildDir="../build/windows/${script:ARCH}_static"
|
||||
write-host "Building static library"
|
||||
build
|
||||
@@ -235,7 +236,7 @@ function build_cpu($gen_arch) {
|
||||
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
|
||||
# remaining llama.cpp builds use MSVC
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", $gen_arch, "-DGGML_AVX=off", "-DGGML_AVX2=off", "-DGGML_AVX512=off", "-DGGML_FMA=off", "-DGGML_F16C=off") + $script:cmakeDefs
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", $gen_arch, "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||
$script:buildDir="../build/windows/${script:ARCH}/cpu"
|
||||
$script:distDir="$script:DIST_BASE\cpu"
|
||||
write-host "Building LCD CPU"
|
||||
@@ -250,7 +251,7 @@ function build_cpu($gen_arch) {
|
||||
function build_cpu_avx() {
|
||||
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx"))) {
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DGGML_AVX=on", "-DGGML_AVX2=off", "-DGGML_AVX512=off", "-DGGML_FMA=off", "-DGGML_F16C=off") + $script:cmakeDefs
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
|
||||
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
|
||||
$script:distDir="$script:DIST_BASE\cpu_avx"
|
||||
write-host "Building AVX CPU"
|
||||
@@ -265,7 +266,7 @@ function build_cpu_avx() {
|
||||
function build_cpu_avx2() {
|
||||
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx2"))) {
|
||||
init_vars
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DGGML_AVX=on", "-DGGML_AVX2=on", "-DGGML_AVX512=off", "-DGGML_FMA=on", "-DGGML_F16C=on") + $script:cmakeDefs
|
||||
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
|
||||
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
|
||||
$script:distDir="$script:DIST_BASE\cpu_avx2"
|
||||
write-host "Building AVX2 CPU"
|
||||
@@ -290,9 +291,9 @@ function build_cuda() {
|
||||
$script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
|
||||
$script:cmakeDefs += @(
|
||||
"-A", "x64",
|
||||
"-DGGML_CUDA=ON",
|
||||
"-DGGML_AVX=on",
|
||||
"-DGGML_AVX2=off",
|
||||
"-DLLAMA_CUDA=ON",
|
||||
"-DLLAMA_AVX=on",
|
||||
"-DLLAMA_AVX2=off",
|
||||
"-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR",
|
||||
"-DCMAKE_CUDA_FLAGS=-t8",
|
||||
"-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}"
|
||||
@@ -330,7 +331,7 @@ function build_oneapi() {
|
||||
$script:distDir ="$script:DIST_BASE\oneapi$script:ONEAPI_VARIANT"
|
||||
$script:cmakeDefs += @(
|
||||
"-G", "MinGW Makefiles",
|
||||
"-DGGML_SYCL=ON",
|
||||
"-DLLAMA_SYCL=ON",
|
||||
"-DCMAKE_C_COMPILER=icx",
|
||||
"-DCMAKE_CXX_COMPILER=icx",
|
||||
"-DCMAKE_BUILD_TYPE=Release"
|
||||
@@ -376,11 +377,10 @@ function build_rocm() {
|
||||
"-G", "Ninja",
|
||||
"-DCMAKE_C_COMPILER=clang.exe",
|
||||
"-DCMAKE_CXX_COMPILER=clang++.exe",
|
||||
"-DGGML_HIPBLAS=on",
|
||||
"-DLLAMA_CUDA_NO_PEER_COPY=on",
|
||||
"-DLLAMA_HIPBLAS=on",
|
||||
"-DHIP_PLATFORM=amd",
|
||||
"-DGGML_AVX=on",
|
||||
"-DGGML_AVX2=off",
|
||||
"-DLLAMA_AVX=on",
|
||||
"-DLLAMA_AVX2=off",
|
||||
"-DCMAKE_POSITION_INDEPENDENT_CODE=on",
|
||||
"-DAMDGPU_TARGETS=$(amdGPUs)",
|
||||
"-DGPU_TARGETS=$(amdGPUs)"
|
||||
@@ -406,6 +406,7 @@ function build_rocm() {
|
||||
sign
|
||||
install
|
||||
|
||||
# Assumes v5.7, may need adjustments for v6
|
||||
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
|
||||
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null
|
||||
cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
|
||||
|
||||
@@ -102,6 +102,7 @@ func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capabil
|
||||
}
|
||||
|
||||
func (s *Server) GenerateHandler(c *gin.Context) {
|
||||
checkpointStart := time.Now()
|
||||
var req api.GenerateRequest
|
||||
if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
||||
@@ -129,6 +130,8 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
checkpointLoaded := time.Now()
|
||||
|
||||
if req.Prompt == "" {
|
||||
c.JSON(http.StatusOK, api.GenerateResponse{
|
||||
Model: req.Model,
|
||||
@@ -191,26 +194,48 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
||||
|
||||
ch := make(chan any)
|
||||
go func() {
|
||||
// TODO (jmorganca): avoid building the response twice both here and below
|
||||
var sb strings.Builder
|
||||
defer close(ch)
|
||||
if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
|
||||
Prompt: prompt,
|
||||
Images: images,
|
||||
Format: req.Format,
|
||||
Options: opts,
|
||||
}, func(r llm.CompletionResponse) {
|
||||
ch <- api.GenerateResponse{
|
||||
}, func(cr llm.CompletionResponse) {
|
||||
res := api.GenerateResponse{
|
||||
Model: req.Model,
|
||||
CreatedAt: time.Now().UTC(),
|
||||
Response: r.Content,
|
||||
Done: r.Done,
|
||||
DoneReason: r.DoneReason,
|
||||
Response: cr.Content,
|
||||
Done: cr.Done,
|
||||
DoneReason: cr.DoneReason,
|
||||
Metrics: api.Metrics{
|
||||
PromptEvalCount: r.PromptEvalCount,
|
||||
PromptEvalDuration: r.PromptEvalDuration,
|
||||
EvalCount: r.EvalCount,
|
||||
EvalDuration: r.EvalDuration,
|
||||
PromptEvalCount: cr.PromptEvalCount,
|
||||
PromptEvalDuration: cr.PromptEvalDuration,
|
||||
EvalCount: cr.EvalCount,
|
||||
EvalDuration: cr.EvalDuration,
|
||||
},
|
||||
}
|
||||
|
||||
if _, err := sb.WriteString(cr.Content); err != nil {
|
||||
ch <- gin.H{"error": err.Error()}
|
||||
}
|
||||
|
||||
if cr.Done {
|
||||
res.TotalDuration = time.Since(checkpointStart)
|
||||
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||
|
||||
if !req.Raw {
|
||||
tokens, err := r.Tokenize(c.Request.Context(), prompt+sb.String())
|
||||
if err != nil {
|
||||
ch <- gin.H{"error": err.Error()}
|
||||
return
|
||||
}
|
||||
res.Context = append(req.Context, tokens...)
|
||||
}
|
||||
}
|
||||
|
||||
ch <- res
|
||||
}); err != nil {
|
||||
ch <- gin.H{"error": err.Error()}
|
||||
}
|
||||
@@ -1122,6 +1147,8 @@ func (s *Server) ProcessHandler(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (s *Server) ChatHandler(c *gin.Context) {
|
||||
checkpointStart := time.Now()
|
||||
|
||||
var req api.ChatRequest
|
||||
if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
|
||||
@@ -1141,6 +1168,8 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
checkpointLoaded := time.Now()
|
||||
|
||||
if len(req.Messages) == 0 {
|
||||
c.JSON(http.StatusOK, api.ChatResponse{
|
||||
Model: req.Model,
|
||||
@@ -1152,6 +1181,10 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if req.Messages[0].Role != "system" {
|
||||
req.Messages = append([]api.Message{{Role: "system", Content: m.System}}, req.Messages...)
|
||||
}
|
||||
|
||||
prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, req.Messages)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
@@ -1169,7 +1202,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
||||
Format: req.Format,
|
||||
Options: opts,
|
||||
}, func(r llm.CompletionResponse) {
|
||||
ch <- api.ChatResponse{
|
||||
res := api.ChatResponse{
|
||||
Model: req.Model,
|
||||
CreatedAt: time.Now().UTC(),
|
||||
Message: api.Message{Role: "assistant", Content: r.Content},
|
||||
@@ -1182,6 +1215,13 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
||||
EvalDuration: r.EvalDuration,
|
||||
},
|
||||
}
|
||||
|
||||
if r.Done {
|
||||
res.TotalDuration = time.Since(checkpointStart)
|
||||
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||
}
|
||||
|
||||
ch <- res
|
||||
}); err != nil {
|
||||
ch <- gin.H{"error": err.Error()}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user