write gemm quantize scales

Tencent · Sep 20, 2024 · 37208ab · 37208ab
1 parent 7f2d1da
commit 37208ab
Showing 1 changed file with 18 additions and 0 deletions.
diff --git a/tools/modelwriter.h b/tools/modelwriter.h
@@ -1773,6 +1773,7 @@ int ModelWriter::save(const char* parampath, const char* binpath)
             fprintf_param_value(" 12=%d", output_elempack)
             fprintf_param_value(" 13=%d", output_elemtype)
             fprintf_param_value(" 14=%d", output_transpose)
+            fprintf_param_value(" 18=%d", int8_scale_term)
             fprintf_param_value(" 20=%d", constant_TILE_M)
             fprintf_param_value(" 21=%d", constant_TILE_N)
             fprintf_param_value(" 22=%d", constant_TILE_K)
@@ -1789,6 +1790,23 @@ int ModelWriter::save(const char* parampath, const char* binpath)
             {
                 fwrite_weight_tag_data(op->C_data, bp);
             }
+
+#if NCNN_INT8
+            // write int8_scale data
+            if (op->int8_scale_term)
+            {
+                if (op->constantA == 1)
+                {
+                    fwrite_weight_data(op->A_data_int8_scales, bp, 90, 100);
+                }
+                if (op->constantB == 1)
+                {
+                    ncnn::Mat B_data_int8_scales(1);
+                    B_data_int8_scales[0] = op->B_data_int8_scale;
+                    fwrite_weight_data(B_data_int8_scales, bp, 90, 100);
+                }
+            }
+#endif // NCNN_INT8
         }
         else if (layer->type == "GLU")
         {