Follow up to #10363 (#10647)

The quantization PR didn't block all unsupported file types, which this PR fixes. It also updates the API docs to reflect the now reduced set of supported types.
2025-12-21 22:33:56 +00:00 · 2025-05-12 15:23:31 -07:00
parent 0cefd46f23
commit 9d6df90805
4 changed files with 88 additions and 382 deletions
--- a/server/quantization.go
+++ b/server/quantization.go
@@ -70,23 +70,7 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
 			newType = fsggml.TensorTypeQ6_K
 		}
 	} else if strings.Contains(name, "attn_v.weight") {
-		if ftype == fsggml.FileTypeQ2_K {
-			if kv.GQA() >= 4 {
-				newType = fsggml.TensorTypeQ4_K
-			} else {
-				newType = fsggml.TensorTypeQ3_K
-			}
-		} else if ftype == fsggml.FileTypeQ2_K_S && kv.GQA() >= 4 {
-			newType = fsggml.TensorTypeQ4_K
-		} else if ftype == fsggml.FileTypeQ3_K_M {
-			if qs.iAttnV < 2 {
-				newType = fsggml.TensorTypeQ5_K
-			} else {
-				newType = fsggml.TensorTypeQ4_K
-			}
-		} else if ftype == fsggml.FileTypeQ3_K_L {
-			newType = fsggml.TensorTypeQ5_K
-		} else if (ftype == fsggml.FileTypeQ4_K_M || ftype == fsggml.FileTypeQ5_K_M) &&
+		if (ftype == fsggml.FileTypeQ4_K_M) &&
 			useMoreBits(qs.iAttnV, qs.nAttnV) {
 			newType = fsggml.TensorTypeQ6_K
 		} else if ftype == fsggml.FileTypeQ4_K_S && qs.iAttnV < 4 {
@@ -114,54 +98,23 @@ func getTensorNewType(kv fsggml.KV, qs *quantizeState, newType fsggml.TensorType
 	} else if strings.Contains(name, "ffn_down") {
 		iLayer := qs.iFfnDown
 		n_layer := qs.nFfnDown
-		if ftype == fsggml.FileTypeQ2_K {
-			newType = fsggml.TensorTypeQ3_K
-		} else if ftype == fsggml.FileTypeQ2_K_S {
-			if iLayer < n_layer/8 {
-				newType = fsggml.TensorTypeQ4_K
-			}
-		} else if ftype == fsggml.FileTypeQ3_K_M {
-			if iLayer < n_layer/16 {
-				newType = fsggml.TensorTypeQ5_K
-			} else if useMoreBits(iLayer, n_layer) {
-				newType = fsggml.TensorTypeQ4_K
-			} else {
-				newType = fsggml.TensorTypeQ3_K
-			}
-		} else if ftype == fsggml.FileTypeQ3_K_L {
-			newType = fsggml.TensorTypeQ5_K
-		} else if ftype == fsggml.FileTypeQ4_K_M {
+		if ftype == fsggml.FileTypeQ4_K_M {
 			if useMoreBits(iLayer, n_layer) {
 				newType = fsggml.TensorTypeQ6_K
 			}
-		} else if ftype == fsggml.FileTypeQ5_K_M && useMoreBits(iLayer, n_layer) {
-			newType = fsggml.TensorTypeQ6_K
 		} else if ftype == fsggml.FileTypeQ4_K_S && iLayer < n_layer/8 {
 			newType = fsggml.TensorTypeQ5_K
 		}
 		qs.iFfnDown++
 	} else if strings.Contains(name, "attn_output.weight") {
 		if nExperts == 8 {
-			if ftype == fsggml.FileTypeQ2_K || ftype == fsggml.FileTypeQ3_K_S || ftype == fsggml.FileTypeQ3_K_M ||
-				ftype == fsggml.FileTypeQ4_K_S || ftype == fsggml.FileTypeQ4_K_M {
-				newType = fsggml.TensorTypeQ5_K
-			}
-		} else {
-			if ftype == fsggml.FileTypeQ2_K {
-				newType = fsggml.TensorTypeQ3_K
-			} else if ftype == fsggml.FileTypeQ3_K_M {
-				newType = fsggml.TensorTypeQ4_K
-			} else if ftype == fsggml.FileTypeQ3_K_L {
+			if ftype == fsggml.FileTypeQ4_K_S || ftype == fsggml.FileTypeQ4_K_M {
 				newType = fsggml.TensorTypeQ5_K
 			}
 		}
 	} else if strings.Contains(name, "attn_qkv.weight") {
-		if ftype == fsggml.FileTypeQ3_K_M || ftype == fsggml.FileTypeQ3_K_L {
-			newType = fsggml.TensorTypeQ4_K
-		} else if ftype == fsggml.FileTypeQ4_K_M {
+		if ftype == fsggml.FileTypeQ4_K_M {
 			newType = fsggml.TensorTypeQ5_K
-		} else if ftype == fsggml.FileTypeQ5_K_M {
-			newType = fsggml.TensorTypeQ6_K
 		}
 	}

--- a/server/quantization_test.go
+++ b/server/quantization_test.go
@@ -42,71 +42,6 @@ func TestGetTensorNewType(t *testing.T) {
 			ftype:       fsggml.FileTypeF32,
 			expected:    fsggml.TensorTypeQ6_K,
 		},
-		{
-			name: "attn_v.weight_q4_k",
-			kv: map[string]any{
-				"general.architecture":        "foo",
-				"foo.attention.head_count":    uint32(4),
-				"foo.attention.head_count_kv": uint32(1),
-			},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_v.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K,
-			expected:    fsggml.TensorTypeQ4_K,
-		},
-		{
-			name:        "attn_v.weight_q3_k",
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_v.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K,
-			expected:    fsggml.TensorTypeQ3_K,
-		},
-		{
-			name: "attn_v.weight_q2_k_s_q4_k",
-			kv: map[string]any{
-				"general.architecture":        "foo",
-				"foo.attention.head_count":    uint32(4),
-				"foo.attention.head_count_kv": uint32(1),
-			},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_v.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K_S,
-			expected:    fsggml.TensorTypeQ4_K,
-		},
-		{
-			name:        "attn_v.weight_q3_k_m",
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_v.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_M,
-			expected:    fsggml.TensorTypeQ5_K,
-		},
-		{
-			name: "attn_v.weight_q3_k_m_i",
-			qs: quantizeState{
-				iAttnV: 2,
-			},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_v.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_M,
-			expected:    fsggml.TensorTypeQ4_K,
-		},
-		{
-			name:        "attn_v.weight_q3_k_l",
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_v.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_L,
-			expected:    fsggml.TensorTypeQ5_K,
-		},
 		{
 			name: "attn_v.weight_q4_k_m",
 			qs: quantizeState{
@@ -156,88 +91,6 @@ func TestGetTensorNewType(t *testing.T) {
 			ftype:       fsggml.FileTypeF32,
 			expected:    fsggml.TensorTypeQ8_0,
 		},
-		{
-			name:        "ffn_down_q2_k",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K,
-			expected:    fsggml.TensorTypeQ3_K,
-		},
-		{
-			name:        "ffn_down_q2_k_s",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K_S,
-			expected:    fsggml.TensorTypeQ4_0,
-		},
-		{
-			name: "ffn_down_q2_k_s_layers",
-			qs: quantizeState{
-				iFfnDown: 2,
-				nFfnDown: 3 * 8,
-			},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K_S,
-			expected:    fsggml.TensorTypeQ4_K,
-		},
-		{
-			name: "ffn_down_q3_k_m_base",
-			qs: quantizeState{
-				iFfnDown: 1,
-				nFfnDown: 8,
-			},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_M,
-			expected:    fsggml.TensorTypeQ3_K,
-		},
-		{
-			name: "ffn_down_q3_k_m_16",
-			qs: quantizeState{
-				iFfnDown: 2,
-				nFfnDown: 3 * 16,
-			},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_M,
-			expected:    fsggml.TensorTypeQ5_K,
-		},
-		{
-			name: "ffn_down_q3_k_m_8",
-			qs: quantizeState{
-				iFfnDown: 2,
-				nFfnDown: 3 * 8,
-			},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_M,
-			expected:    fsggml.TensorTypeQ4_K,
-		},
-		{
-			name:        "ffn_down_q3_k_l",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_L,
-			expected:    fsggml.TensorTypeQ5_K,
-		},
 		{
 			name: "ffn_down_q4_k_m",
 			qs: quantizeState{
@@ -264,19 +117,6 @@ func TestGetTensorNewType(t *testing.T) {
 			ftype:       fsggml.FileTypeQ4_K_M,
 			expected:    fsggml.TensorTypeQ6_K,
 		},
-		{
-			name: "ffn_down_q5_k_m",
-			qs: quantizeState{
-				iFfnDown: 2,
-				nFfnDown: 3 * 8,
-			},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "ffn_down",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ5_K_M,
-			expected:    fsggml.TensorTypeQ6_K,
-		},
 		{
 			name: "ffn_down_q4_k_s",
 			qs: quantizeState{
@@ -290,59 +130,6 @@ func TestGetTensorNewType(t *testing.T) {
 			ftype:       fsggml.FileTypeQ4_K_S,
 			expected:    fsggml.TensorTypeQ5_K,
 		},
-		{
-			name: "attn_output.weight_8_expert",
-			qs:   quantizeState{},
-			kv: map[string]any{
-				"general.architecture": "foo",
-				"foo.expert_count":     uint32(8),
-			},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_output.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K,
-			expected:    fsggml.TensorTypeQ5_K,
-		},
-		{
-			name:        "attn_output.weight_q2",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_output.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ2_K,
-			expected:    fsggml.TensorTypeQ3_K,
-		},
-		{
-			name:        "attn_output.weight_q3_k_m",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_output.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_M,
-			expected:    fsggml.TensorTypeQ4_K,
-		},
-		{
-			name:        "attn_output.weight_q3_k_l",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_output.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_L,
-			expected:    fsggml.TensorTypeQ5_K,
-		},
-		{
-			name:        "attn_qkv.weight_q3_k_m",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_qkv.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ3_K_M,
-			expected:    fsggml.TensorTypeQ4_K,
-		},
 		{
 			name:        "attn_qkv.weight_q4_k_m",
 			qs:          quantizeState{},
@@ -353,16 +140,6 @@ func TestGetTensorNewType(t *testing.T) {
 			ftype:       fsggml.FileTypeQ4_K_M,
 			expected:    fsggml.TensorTypeQ5_K,
 		},
-		{
-			name:        "attn_qkv.weight_q5_k_m",
-			qs:          quantizeState{},
-			kv:          map[string]any{},
-			newType:     fsggml.TensorTypeQ4_0,
-			tensor_name: "blk.0.attn_qkv.weight",
-			shape:       []uint64{256},
-			ftype:       fsggml.FileTypeQ5_K_M,
-			expected:    fsggml.TensorTypeQ6_K,
-		},
 	}
 	for _, tt := range cases {
 		t.Run(tt.name, func(t *testing.T) {