mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
@@ -17,6 +17,7 @@ import (
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -30,9 +31,37 @@ import (
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
"github.com/ollama/ollama/llama"
|
||||
"github.com/ollama/ollama/logutil"
|
||||
"github.com/ollama/ollama/model"
|
||||
)
|
||||
|
||||
type filteredEnv []string
|
||||
|
||||
func (e filteredEnv) LogValue() slog.Value {
|
||||
var attrs []slog.Attr
|
||||
for _, env := range e {
|
||||
if key, value, ok := strings.Cut(env, "="); ok {
|
||||
switch {
|
||||
case strings.HasPrefix(key, "OLLAMA_"),
|
||||
strings.HasPrefix(key, "CUDA_"),
|
||||
strings.HasPrefix(key, "ROCR_"),
|
||||
strings.HasPrefix(key, "ROCM_"),
|
||||
strings.HasPrefix(key, "HIP_"),
|
||||
strings.HasPrefix(key, "GPU_"),
|
||||
strings.HasPrefix(key, "HSA_"),
|
||||
strings.HasPrefix(key, "GGML_"),
|
||||
slices.Contains([]string{
|
||||
"PATH",
|
||||
"LD_LIBRARY_PATH",
|
||||
"DYLD_LIBRARY_PATH",
|
||||
}, key):
|
||||
attrs = append(attrs, slog.String(key, value))
|
||||
}
|
||||
}
|
||||
}
|
||||
return slog.GroupValue(attrs...)
|
||||
}
|
||||
|
||||
type LlamaServer interface {
|
||||
Ping(ctx context.Context) error
|
||||
WaitUntilRunning(ctx context.Context) error
|
||||
@@ -148,10 +177,6 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||
params = append(params, "--n-gpu-layers", strconv.Itoa(opts.NumGPU))
|
||||
}
|
||||
|
||||
if envconfig.Debug() {
|
||||
params = append(params, "--verbose")
|
||||
}
|
||||
|
||||
if opts.MainGPU > 0 {
|
||||
params = append(params, "--main-gpu", strconv.Itoa(opts.MainGPU))
|
||||
}
|
||||
@@ -404,26 +429,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||
}
|
||||
|
||||
slog.Info("starting llama server", "cmd", s.cmd)
|
||||
if envconfig.Debug() {
|
||||
filteredEnv := []string{}
|
||||
for _, ev := range s.cmd.Env {
|
||||
if strings.HasPrefix(ev, "OLLAMA_") ||
|
||||
strings.HasPrefix(ev, "CUDA_") ||
|
||||
strings.HasPrefix(ev, "ROCR_") ||
|
||||
strings.HasPrefix(ev, "ROCM_") ||
|
||||
strings.HasPrefix(ev, "HIP_") ||
|
||||
strings.HasPrefix(ev, "GPU_") ||
|
||||
strings.HasPrefix(ev, "HSA_") ||
|
||||
strings.HasPrefix(ev, "GGML_") ||
|
||||
strings.HasPrefix(ev, "PATH=") ||
|
||||
strings.HasPrefix(ev, "LD_LIBRARY_PATH=") ||
|
||||
strings.HasPrefix(ev, "DYLD_LIBRARY_PATH=") {
|
||||
filteredEnv = append(filteredEnv, ev)
|
||||
}
|
||||
}
|
||||
// Log at debug as the environment is inherited and might contain sensitive information
|
||||
slog.Debug("subprocess", "environment", filteredEnv)
|
||||
}
|
||||
slog.Debug("subprocess", "", filteredEnv(s.cmd.Env))
|
||||
|
||||
if err = s.cmd.Start(); err != nil {
|
||||
var msg string
|
||||
@@ -721,6 +727,9 @@ type CompletionResponse struct {
|
||||
}
|
||||
|
||||
func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn func(CompletionResponse)) error {
|
||||
slog.Debug("completion request", "images", len(req.Images), "prompt", len(req.Prompt), "format", string(req.Format))
|
||||
slog.Log(ctx, logutil.LevelTrace, "completion request", "prompt", req.Prompt)
|
||||
|
||||
if len(req.Format) > 0 {
|
||||
switch string(req.Format) {
|
||||
case `null`, `""`:
|
||||
@@ -884,6 +893,8 @@ type EmbeddingResponse struct {
|
||||
}
|
||||
|
||||
func (s *llmServer) Embedding(ctx context.Context, input string) ([]float32, error) {
|
||||
slog.Log(ctx, logutil.LevelTrace, "embedding request", "input", input)
|
||||
|
||||
if err := s.sem.Acquire(ctx, 1); err != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
slog.Info("aborting embedding request due to client closing the connection")
|
||||
|
||||
Reference in New Issue
Block a user