@@ -1165,6 +1165,23 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
1165
1165
.nrows = 1,
1166
1166
.row_meta_size = 4,
1167
1167
},
1168
+ [GGML_TYPE_IQ4_KS_R4] = {
1169
+ .type_name = "iq4_ks_r4",
1170
+ .blck_size = QK_K,
1171
+ .type_size = sizeof(block_iq4_ks),
1172
+ .is_quantized = true,
1173
+ .to_float = (ggml_to_float_t) dequantize_row_iq4_ks_r4,
1174
+ .from_float = quantize_row_iq4_ks_r4,
1175
+ .from_float_ref = (ggml_from_float_t)quantize_row_iq4_ks_r4_ref,
1176
+ .vec_dot = vec_dot_iq4_ks_r4_q8_k,
1177
+ #if defined __AVX2__
1178
+ .vec_dot_type = GGML_TYPE_Q8_K32,
1179
+ #else
1180
+ .vec_dot_type = GGML_TYPE_Q8_K,
1181
+ #endif
1182
+ .nrows = 1,
1183
+ .row_meta_size = 4,
1184
+ },
1168
1185
[GGML_TYPE_IQ4_KSS] = {
1169
1186
.type_name = "iq4_kss",
1170
1187
.blck_size = QK_K,
@@ -4197,6 +4214,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
4197
4214
case GGML_FTYPE_MOSTLY_Q8_0_R4: wtype = GGML_TYPE_Q8_0_R4; break;
4198
4215
case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
4199
4216
case GGML_FTYPE_MOSTLY_IQ4_KS: wtype = GGML_TYPE_IQ4_KS; break;
4217
+ case GGML_FTYPE_MOSTLY_IQ4_KS_R4: wtype = GGML_TYPE_IQ4_KS_R4;break;
4200
4218
case GGML_FTYPE_MOSTLY_IQ4_KSS: wtype = GGML_TYPE_IQ4_KSS; break;
4201
4219
case GGML_FTYPE_MOSTLY_IQ2_K: wtype = GGML_TYPE_IQ2_K; break;
4202
4220
case GGML_FTYPE_MOSTLY_IQ2_K_R4: wtype = GGML_TYPE_IQ2_K_R4; break;
@@ -10737,6 +10755,7 @@ static void ggml_compute_forward_add(
10737
10755
case GGML_TYPE_Q8_0_R4:
10738
10756
case GGML_TYPE_IQ4_XS:
10739
10757
case GGML_TYPE_IQ4_KS:
10758
+ case GGML_TYPE_IQ4_KS_R4:
10740
10759
case GGML_TYPE_IQ4_KSS:
10741
10760
case GGML_TYPE_IQ2_K:
10742
10761
case GGML_TYPE_IQ2_K_R4:
@@ -11196,6 +11215,7 @@ static void ggml_compute_forward_add1(
11196
11215
case GGML_TYPE_Q8_0_R4:
11197
11216
case GGML_TYPE_IQ4_XS:
11198
11217
case GGML_TYPE_IQ4_KS:
11218
+ case GGML_TYPE_IQ4_KS_R4:
11199
11219
case GGML_TYPE_IQ4_KSS:
11200
11220
case GGML_TYPE_IQ2_K:
11201
11221
case GGML_TYPE_IQ2_K_R4:
@@ -11352,6 +11372,7 @@ static void ggml_compute_forward_acc(
11352
11372
case GGML_TYPE_Q8_0_R4:
11353
11373
case GGML_TYPE_IQ4_XS:
11354
11374
case GGML_TYPE_IQ4_KS:
11375
+ case GGML_TYPE_IQ4_KS_R4:
11355
11376
case GGML_TYPE_IQ4_KSS:
11356
11377
case GGML_TYPE_IQ2_K:
11357
11378
case GGML_TYPE_IQ2_K_R4:
@@ -14554,6 +14575,7 @@ static void ggml_compute_forward_out_prod(
14554
14575
case GGML_TYPE_Q8_0_R4:
14555
14576
case GGML_TYPE_IQ4_XS:
14556
14577
case GGML_TYPE_IQ4_KS:
14578
+ case GGML_TYPE_IQ4_KS_R4:
14557
14579
case GGML_TYPE_IQ4_KSS:
14558
14580
case GGML_TYPE_IQ2_K:
14559
14581
case GGML_TYPE_IQ2_K_R4:
@@ -14950,6 +14972,7 @@ static void ggml_compute_forward_set(
14950
14972
case GGML_TYPE_Q8_0_R4:
14951
14973
case GGML_TYPE_IQ4_XS:
14952
14974
case GGML_TYPE_IQ4_KS:
14975
+ case GGML_TYPE_IQ4_KS_R4:
14953
14976
case GGML_TYPE_IQ4_KSS:
14954
14977
case GGML_TYPE_IQ2_K:
14955
14978
case GGML_TYPE_IQ2_K_R4:
@@ -15240,6 +15263,7 @@ static void ggml_compute_forward_get_rows(
15240
15263
case GGML_TYPE_Q8_0_R4:
15241
15264
case GGML_TYPE_IQ4_XS:
15242
15265
case GGML_TYPE_IQ4_KS:
15266
+ case GGML_TYPE_IQ4_KS_R4:
15243
15267
case GGML_TYPE_IQ4_KSS:
15244
15268
case GGML_TYPE_IQ2_K:
15245
15269
case GGML_TYPE_IQ2_K_R4:
@@ -15859,6 +15883,7 @@ static void ggml_compute_forward_clamp(
15859
15883
case GGML_TYPE_Q8_0_R4:
15860
15884
case GGML_TYPE_IQ4_XS:
15861
15885
case GGML_TYPE_IQ4_KS:
15886
+ case GGML_TYPE_IQ4_KS_R4:
15862
15887
case GGML_TYPE_IQ4_KSS:
15863
15888
case GGML_TYPE_IQ2_K:
15864
15889
case GGML_TYPE_IQ2_K_R4:
@@ -22706,6 +22731,7 @@ size_t ggml_quantize_chunk(
22706
22731
case GGML_TYPE_Q8_0_R4: result = quantize_q8_0_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
22707
22732
case GGML_TYPE_IQ4_XS: result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
22708
22733
case GGML_TYPE_IQ4_KS: result = quantize_iq4_ks (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
22734
+ case GGML_TYPE_IQ4_KS_R4:result = quantize_iq4_ks_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
22709
22735
case GGML_TYPE_IQ4_KSS: result = quantize_iq4_kss(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
22710
22736
case GGML_TYPE_IQ2_K: result = quantize_iq2_k (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
22711
22737
case GGML_TYPE_IQ2_K_R4:result = quantize_iq2_k_r4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
0 commit comments