Skip to content

Commit

Permalink
MMQ dedup 3
Browse files Browse the repository at this point in the history
q4_0

q4_1

q3_k

q8_1_q8_1 opt

q8_0 opt

remove guards

WIP q4_K

util function for q45_k scales

q4_K

WIP dp4a

q4_k dp4a

fixup

fixup

WIP q5_K

q5_K

fixup

q6_K opt

iq2_xxs works

qi2_xs works

extern templates

iq2_s

iq3_xxs

iq3_s

extern templates

iq1_s

fixup

fixup
  • Loading branch information
JohannesGaessler authored and Nexesenex committed Jul 15, 2024
1 parent 51c0baf commit 07d49f7
Show file tree
Hide file tree
Showing 9 changed files with 867 additions and 760 deletions.
24 changes: 24 additions & 0 deletions ggml/src/ggml-cuda/mmq.cu
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,24 @@ void ggml_cuda_op_mul_mat_q(
case GGML_TYPE_Q6_K:
mul_mat_q_case<GGML_TYPE_Q6_K>(ctx, args, stream);
break;
case GGML_TYPE_IQ2_XXS:
mul_mat_q_case<GGML_TYPE_IQ2_XXS>(ctx, args, stream);
break;
case GGML_TYPE_IQ2_XS:
mul_mat_q_case<GGML_TYPE_IQ2_XS>(ctx, args, stream);
break;
case GGML_TYPE_IQ2_S:
mul_mat_q_case<GGML_TYPE_IQ2_S>(ctx, args, stream);
break;
case GGML_TYPE_IQ3_XXS:
mul_mat_q_case<GGML_TYPE_IQ3_XXS>(ctx, args, stream);
break;
case GGML_TYPE_IQ3_S:
mul_mat_q_case<GGML_TYPE_IQ3_S>(ctx, args, stream);
break;
case GGML_TYPE_IQ1_S:
mul_mat_q_case<GGML_TYPE_IQ1_S>(ctx, args, stream);
break;
case GGML_TYPE_IQ4_XS:
mul_mat_q_case<GGML_TYPE_IQ4_XS>(ctx, args, stream);
break;
Expand Down Expand Up @@ -95,6 +113,12 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) {
case GGML_TYPE_Q4_K:
case GGML_TYPE_Q5_K:
case GGML_TYPE_Q6_K:
case GGML_TYPE_IQ2_XXS:
case GGML_TYPE_IQ2_XS:
case GGML_TYPE_IQ2_S:
case GGML_TYPE_IQ3_XXS:
case GGML_TYPE_IQ3_S:
case GGML_TYPE_IQ1_S:
case GGML_TYPE_IQ4_XS:
case GGML_TYPE_IQ4_NL:
mmq_supported = true;
Expand Down
1,570 changes: 811 additions & 759 deletions ggml/src/ggml-cuda/mmq.cuh

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion ggml/src/ggml-cuda/template-instances/generate_cu_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
TYPES_MMQ = [
"GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0",
"GGML_TYPE_Q2_K", "GGML_TYPE_Q3_K", "GGML_TYPE_Q4_K", "GGML_TYPE_Q5_K", "GGML_TYPE_Q6_K",
"GGML_TYPE_IQ4_NL", "GGML_TYPE_IQ4_XS"
"GGML_TYPE_IQ2_XXS", "GGML_TYPE_IQ2_XS", "GGML_TYPE_IQ2_S", "GGML_TYPE_IQ3_XXS", "GGML_TYPE_IQ3_S",
"GGML_TYPE_IQ1_S", "GGML_TYPE_IQ4_NL", "GGML_TYPE_IQ4_XS"
]

SOURCE_MMQ = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
Expand Down
5 changes: 5 additions & 0 deletions ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// This file has been autogenerated by generate_cu_files.py, do not edit manually.

#include "../mmq.cuh"

DECL_MMQ_CASE(GGML_TYPE_IQ1_S);
5 changes: 5 additions & 0 deletions ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// This file has been autogenerated by generate_cu_files.py, do not edit manually.

#include "../mmq.cuh"

DECL_MMQ_CASE(GGML_TYPE_IQ2_S);
5 changes: 5 additions & 0 deletions ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// This file has been autogenerated by generate_cu_files.py, do not edit manually.

#include "../mmq.cuh"

DECL_MMQ_CASE(GGML_TYPE_IQ2_XS);
5 changes: 5 additions & 0 deletions ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// This file has been autogenerated by generate_cu_files.py, do not edit manually.

#include "../mmq.cuh"

DECL_MMQ_CASE(GGML_TYPE_IQ2_XXS);
5 changes: 5 additions & 0 deletions ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// This file has been autogenerated by generate_cu_files.py, do not edit manually.

#include "../mmq.cuh"

DECL_MMQ_CASE(GGML_TYPE_IQ3_S);
5 changes: 5 additions & 0 deletions ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// This file has been autogenerated by generate_cu_files.py, do not edit manually.

#include "../mmq.cuh"

DECL_MMQ_CASE(GGML_TYPE_IQ3_XXS);

0 comments on commit 07d49f7

Please sign in to comment.