mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-24 07:28:27 +00:00
Merge branch 'ollama:main' into main
This commit is contained in:
@@ -125,9 +125,9 @@ type Tensor struct {
|
||||
|
||||
func (t Tensor) blockSize() uint64 {
|
||||
switch t.Kind {
|
||||
case 0, 1, 24, 25, 26, 27, 28, 31: // F32, F16, I8, I16, I32, I64, F64, BF16
|
||||
case 0, 1, 24, 25, 26, 27, 28, 30: // F32, F16, I8, I16, I32, I64, F64, BF16
|
||||
return 1
|
||||
case 2, 3, 8, 9, 20: // Q4_0, Q4_1, Q8_0, Q8_1, IQ4_NL
|
||||
case 2, 3, 4, 5, 6, 7, 8, 9, 20: // Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, Q8_1, IQ4_NL
|
||||
return 32
|
||||
default: // All others
|
||||
return 256
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
"github.com/ollama/ollama/server/envconfig"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
)
|
||||
|
||||
// This algorithm looks for a complete fit to determine if we need to unload other models
|
||||
|
||||
@@ -26,7 +26,7 @@ import (
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/format"
|
||||
"github.com/ollama/ollama/gpu"
|
||||
"github.com/ollama/ollama/server/envconfig"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
)
|
||||
|
||||
type LlamaServer interface {
|
||||
@@ -528,7 +528,7 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
slog.Info("context expired before server started")
|
||||
slog.Warn("client connection closed before server finished loading, aborting load")
|
||||
return fmt.Errorf("timed out waiting for llama runner to start: %w", ctx.Err())
|
||||
case err := <-s.done:
|
||||
msg := ""
|
||||
|
||||
Reference in New Issue
Block a user