chore: update models to use slice/chunk/chunksections (#12934)

* use slice/chunks * bert * llama4 * gemma3n * gptoss * mistral3 * qwen3vl * qwen25vl * deepseek2 * remove unused ops
2025-12-21 14:26:30 +00:00 · 2025-11-13 15:20:12 -08:00
parent c114987523
commit 333203d871
13 changed files with 59 additions and 135 deletions
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -146,7 +146,6 @@ type Tensor interface {
 	FromFloats([]float32)
 	FromInts([]int32)

-	Neg(ctx Context) Tensor
 	Add(ctx Context, t2 Tensor) Tensor
 	Sub(ctx Context, t2 Tensor) Tensor
 	Mul(ctx Context, t2 Tensor) Tensor
@@ -185,7 +184,6 @@ type Tensor interface {
 	View(ctx Context, offset int, shape ...int) Tensor
 	Permute(ctx Context, shape ...int) Tensor
 	Contiguous(ctx Context, shape ...int) Tensor
-	Set(ctx Context, t2 Tensor, offset int, strides ...int) Tensor

 	Pad(ctx Context, shape ...int) Tensor

@@ -209,7 +207,6 @@ type Tensor interface {
 	Stddev(ctx Context) Tensor
 	Sqr(ctx Context) Tensor
 	Sqrt(ctx Context) Tensor
-	Clamp(ctx Context, min, max float32) Tensor
 }

 // ScaledDotProductAttention implements a fused attention
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -1137,13 +1137,6 @@ func (t *Tensor) Cast(ctx ml.Context, dtype ml.DType) ml.Tensor {
 	}
 }

-func (t *Tensor) Neg(ctx ml.Context) ml.Tensor {
-	return &Tensor{
-		b: t.b,
-		t: C.ggml_neg(ctx.(*Context).ctx, t.t),
-	}
-}
-
 func (t *Tensor) Add(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
 	return &Tensor{
 		b: t.b,
@@ -1632,20 +1625,6 @@ func (t *Tensor) AvgPool2D(ctx ml.Context, k, s int, p float32) ml.Tensor {
 	}
 }

-func (t *Tensor) Set(ctx ml.Context, t2 ml.Tensor, offset int, strides ...int) ml.Tensor {
-	var tt *C.struct_ggml_tensor
-	switch len(strides) {
-	case 0:
-		tt = C.ggml_set_1d(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.size_t(offset))
-	case 1:
-		tt = C.ggml_set_2d(ctx.(*Context).ctx, t.t, t2.(*Tensor).t, C.size_t(offset), C.size_t(strides[0]))
-	default:
-		panic("unsupported number of dimensions")
-	}
-
-	return &Tensor{b: t.b, t: tt}
-}
-
 func (t *Tensor) ScaledDotProductAttention(ctx ml.Context, key, value, mask, sinks ml.Tensor, scale float64) ml.Tensor {
 	var kqMask *C.struct_ggml_tensor
 	if mask != nil {
@@ -1732,13 +1711,6 @@ func (t *Tensor) Sqrt(ctx ml.Context) ml.Tensor {
 	}
 }

-func (t *Tensor) Clamp(ctx ml.Context, min, max float32) ml.Tensor {
-	return &Tensor{
-		b: t.b,
-		t: C.ggml_clamp(ctx.(*Context).ctx, t.t, C.float(min), C.float(max)),
-	}
-}
-
 // Slice returns a view of the tensor sliced along dim from low to high in step steps.
 // Slice panics if the dimension is invalid or the slice parameters are out of range.
 // If dim=0 and step>1, the tensor is a copy rather than a view to ensure proper shape.
--- a/ml/nn/pooling/pooling.go
+++ b/ml/nn/pooling/pooling.go
@@ -32,10 +32,9 @@ func (t Type) Forward(ctx ml.Context, hiddenStates ml.Tensor) ml.Tensor {
 		hiddenStates = hiddenStates.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx).Mean(ctx)
 		return hiddenStates.Permute(ctx, 1, 0, 2, 3).Contiguous(ctx)
 	case TypeCLS:
-		return hiddenStates.View(ctx, 0, hiddenStates.Dim(0))
+		return hiddenStates.Slice(ctx, 1, 0, 1, 1)
 	case TypeLast:
-		hiddenStates = hiddenStates.View(ctx, (hiddenStates.Dim(1)-1)*hiddenStates.Stride(1), hiddenStates.Dim(0))
-		return hiddenStates
+		return hiddenStates.Slice(ctx, 1, hiddenStates.Dim(1)-1, hiddenStates.Dim(1), 1)
 	default:
 		panic("unknown pooling type")
 	}