mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
The quantization PR didn't block all unsupported file types, which this PR fixes. It also updates the API docs to reflect the now reduced set of supported types.
This commit is contained in:
@@ -70,23 +70,7 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
|
||||
newType = fsggml.TensorTypeQ6_K
|
||||
}
|
||||
} else if strings.Contains(name, "attn_v.weight") {
|
||||
if ftype == fsggml.FileTypeQ2_K {
|
||||
if kv.GQA() >= 4 {
|
||||
newType = fsggml.TensorTypeQ4_K
|
||||
} else {
|
||||
newType = fsggml.TensorTypeQ3_K
|
||||
}
|
||||
} else if ftype == fsggml.FileTypeQ2_K_S && kv.GQA() >= 4 {
|
||||
newType = fsggml.TensorTypeQ4_K
|
||||
} else if ftype == fsggml.FileTypeQ3_K_M {
|
||||
if qs.iAttnV < 2 {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
} else {
|
||||
newType = fsggml.TensorTypeQ4_K
|
||||
}
|
||||
} else if ftype == fsggml.FileTypeQ3_K_L {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
} else if (ftype == fsggml.FileTypeQ4_K_M || ftype == fsggml.FileTypeQ5_K_M) &&
|
||||
if (ftype == fsggml.FileTypeQ4_K_M) &&
|
||||
useMoreBits(qs.iAttnV, qs.nAttnV) {
|
||||
newType = fsggml.TensorTypeQ6_K
|
||||
} else if ftype == fsggml.FileTypeQ4_K_S && qs.iAttnV < 4 {
|
||||
@@ -114,54 +98,23 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
|
||||
} else if strings.Contains(name, "ffn_down") {
|
||||
iLayer := qs.iFfnDown
|
||||
n_layer := qs.nFfnDown
|
||||
if ftype == fsggml.FileTypeQ2_K {
|
||||
newType = fsggml.TensorTypeQ3_K
|
||||
} else if ftype == fsggml.FileTypeQ2_K_S {
|
||||
if iLayer < n_layer/8 {
|
||||
newType = fsggml.TensorTypeQ4_K
|
||||
}
|
||||
} else if ftype == fsggml.FileTypeQ3_K_M {
|
||||
if iLayer < n_layer/16 {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
} else if useMoreBits(iLayer, n_layer) {
|
||||
newType = fsggml.TensorTypeQ4_K
|
||||
} else {
|
||||
newType = fsggml.TensorTypeQ3_K
|
||||
}
|
||||
} else if ftype == fsggml.FileTypeQ3_K_L {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
} else if ftype == fsggml.FileTypeQ4_K_M {
|
||||
if ftype == fsggml.FileTypeQ4_K_M {
|
||||
if useMoreBits(iLayer, n_layer) {
|
||||
newType = fsggml.TensorTypeQ6_K
|
||||
}
|
||||
} else if ftype == fsggml.FileTypeQ5_K_M && useMoreBits(iLayer, n_layer) {
|
||||
newType = fsggml.TensorTypeQ6_K
|
||||
} else if ftype == fsggml.FileTypeQ4_K_S && iLayer < n_layer/8 {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
}
|
||||
qs.iFfnDown++
|
||||
} else if strings.Contains(name, "attn_output.weight") {
|
||||
if nExperts == 8 {
|
||||
if ftype == fsggml.FileTypeQ2_K || ftype == fsggml.FileTypeQ3_K_S || ftype == fsggml.FileTypeQ3_K_M ||
|
||||
ftype == fsggml.FileTypeQ4_K_S || ftype == fsggml.FileTypeQ4_K_M {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
}
|
||||
} else {
|
||||
if ftype == fsggml.FileTypeQ2_K {
|
||||
newType = fsggml.TensorTypeQ3_K
|
||||
} else if ftype == fsggml.FileTypeQ3_K_M {
|
||||
newType = fsggml.TensorTypeQ4_K
|
||||
} else if ftype == fsggml.FileTypeQ3_K_L {
|
||||
if ftype == fsggml.FileTypeQ4_K_S || ftype == fsggml.FileTypeQ4_K_M {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
}
|
||||
}
|
||||
} else if strings.Contains(name, "attn_qkv.weight") {
|
||||
if ftype == fsggml.FileTypeQ3_K_M || ftype == fsggml.FileTypeQ3_K_L {
|
||||
newType = fsggml.TensorTypeQ4_K
|
||||
} else if ftype == fsggml.FileTypeQ4_K_M {
|
||||
if ftype == fsggml.FileTypeQ4_K_M {
|
||||
newType = fsggml.TensorTypeQ5_K
|
||||
} else if ftype == fsggml.FileTypeQ5_K_M {
|
||||
newType = fsggml.TensorTypeQ6_K
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -42,71 +42,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeF32,
|
||||
expected: fsggml.TensorTypeQ6_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q4_k",
|
||||
kv: map[string]any{
|
||||
"general.architecture": "foo",
|
||||
"foo.attention.head_count": uint32(4),
|
||||
"foo.attention.head_count_kv": uint32(1),
|
||||
},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q3_k",
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ3_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q2_k_s_q4_k",
|
||||
kv: map[string]any{
|
||||
"general.architecture": "foo",
|
||||
"foo.attention.head_count": uint32(4),
|
||||
"foo.attention.head_count_kv": uint32(1),
|
||||
},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K_S,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q3_k_m",
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q3_k_m_i",
|
||||
qs: quantizeState{
|
||||
iAttnV: 2,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q3_k_l",
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_v.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_L,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_v.weight_q4_k_m",
|
||||
qs: quantizeState{
|
||||
@@ -156,88 +91,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeF32,
|
||||
expected: fsggml.TensorTypeQ8_0,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q2_k",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ3_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q2_k_s",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K_S,
|
||||
expected: fsggml.TensorTypeQ4_0,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q2_k_s_layers",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 2,
|
||||
nFfnDown: 3 * 8,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K_S,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q3_k_m_base",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 1,
|
||||
nFfnDown: 8,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ3_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q3_k_m_16",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 2,
|
||||
nFfnDown: 3 * 16,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q3_k_m_8",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 2,
|
||||
nFfnDown: 3 * 8,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q3_k_l",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_L,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q4_k_m",
|
||||
qs: quantizeState{
|
||||
@@ -264,19 +117,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeQ4_K_M,
|
||||
expected: fsggml.TensorTypeQ6_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q5_k_m",
|
||||
qs: quantizeState{
|
||||
iFfnDown: 2,
|
||||
nFfnDown: 3 * 8,
|
||||
},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "ffn_down",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ5_K_M,
|
||||
expected: fsggml.TensorTypeQ6_K,
|
||||
},
|
||||
{
|
||||
name: "ffn_down_q4_k_s",
|
||||
qs: quantizeState{
|
||||
@@ -290,59 +130,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeQ4_K_S,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_output.weight_8_expert",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{
|
||||
"general.architecture": "foo",
|
||||
"foo.expert_count": uint32(8),
|
||||
},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_output.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_output.weight_q2",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_output.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ2_K,
|
||||
expected: fsggml.TensorTypeQ3_K,
|
||||
},
|
||||
{
|
||||
name: "attn_output.weight_q3_k_m",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_output.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_output.weight_q3_k_l",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_output.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_L,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_qkv.weight_q3_k_m",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_qkv.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ3_K_M,
|
||||
expected: fsggml.TensorTypeQ4_K,
|
||||
},
|
||||
{
|
||||
name: "attn_qkv.weight_q4_k_m",
|
||||
qs: quantizeState{},
|
||||
@@ -353,16 +140,6 @@ func TestGetTensorNewType(t *testing.T) {
|
||||
ftype: fsggml.FileTypeQ4_K_M,
|
||||
expected: fsggml.TensorTypeQ5_K,
|
||||
},
|
||||
{
|
||||
name: "attn_qkv.weight_q5_k_m",
|
||||
qs: quantizeState{},
|
||||
kv: map[string]any{},
|
||||
newType: fsggml.TensorTypeQ4_0,
|
||||
tensor_name: "blk.0.attn_qkv.weight",
|
||||
shape: []uint64{256},
|
||||
ftype: fsggml.FileTypeQ5_K_M,
|
||||
expected: fsggml.TensorTypeQ6_K,
|
||||
},
|
||||
}
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user