mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
GGML update to ec98e2002 (#13451)
* Revert "add support for NVIDIA Nemotron 3 Nano" This reverts commit e7d2ae9d69421012e9a8765c06a3fdf0e45b12f3. * GGML update to 380b4c984 Remove MaskBatchPadding as GGML_KQ_MASK_PAD is no longer present (no padding required) * update to c45f89d55 * ec98e2002 solar pro needed more adjusting - needs verification * review comments
This commit is contained in:
@@ -140,10 +140,6 @@ func (c *Causal) Init(backend ml.Backend, dtype ml.DType, maxSequences, capacity
|
||||
c.config.CachePadding = 1
|
||||
}
|
||||
|
||||
if c.config.MaskBatchPadding == 0 {
|
||||
c.config.MaskBatchPadding = 1
|
||||
}
|
||||
|
||||
if c.config.MaskDType == ml.DTypeOther {
|
||||
c.config.MaskDType = ml.DTypeF32
|
||||
}
|
||||
@@ -364,15 +360,12 @@ func roundUp(length, pad int) int {
|
||||
// token in the history should apply. This is based on both the sequence and causality (the
|
||||
// position of the history is not ahead of the token in the batch).
|
||||
func (c *Causal) buildMask(ctx ml.Context) ml.Tensor {
|
||||
// Align and pad the two dimensions as required by the backend
|
||||
batchSize := roundUp(c.curBatchSize, c.config.MaskBatchPadding)
|
||||
|
||||
c.curCellRange.min = roundDown(c.curCellRange.min, c.config.CachePadding)
|
||||
c.curCellRange.max = roundUp(c.curCellRange.max+1, c.config.CachePadding) - 1
|
||||
|
||||
length := c.curCellRange.max - c.curCellRange.min + 1
|
||||
|
||||
mask := make([]float32, batchSize*length)
|
||||
mask := make([]float32, c.curBatchSize*length)
|
||||
|
||||
for i := range c.curBatchSize {
|
||||
enabled := !slices.Contains(c.opts.Except, i)
|
||||
@@ -386,13 +379,7 @@ func (c *Causal) buildMask(ctx ml.Context) ml.Tensor {
|
||||
}
|
||||
}
|
||||
|
||||
// Mask out any padding tokens we added. For padding that we added to the cache history, this
|
||||
// has already been masked out because the sequence doesn't match.
|
||||
for i := c.curBatchSize * length; i < len(mask); i++ {
|
||||
mask[i] = float32(math.Inf(-1))
|
||||
}
|
||||
|
||||
maskTensor := ctx.Input().FromFloats(mask, length, batchSize)
|
||||
maskTensor := ctx.Input().FromFloats(mask, length, c.curBatchSize)
|
||||
|
||||
if c.config.MaskDType != ml.DTypeF32 {
|
||||
maskTensor = maskTensor.Cast(ctx, c.config.MaskDType)
|
||||
|
||||
Reference in New Issue
Block a user