fixup! riscv requantize packing

Tencent · Mar 1, 2024 · 830052a · 830052a
1 parent 5dde273
commit 830052a
Showing 1 changed file with 16 additions and 4 deletions.
diff --git a/src/layer/riscv/requantize_riscv.cpp b/src/layer/riscv/requantize_riscv.cpp
@@ -83,7 +83,10 @@ int Requantize_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Optio
                         signed char* ptr = (signed char*)top_blob + i * packn;
 
                         vfloat32m4_t _v = vfcvt_f_x_v_f32m4(vle32_v_i32m4(intptr, vl), vl);
-                        _v = activation_ps(vfmacc_vf_f32m4(_bias, scale_in, _v, vl), activation_type, activation_params, vl);
+                        // _v = activation_ps(vfmacc_vf_f32m4(_bias, scale_in, _v, vl), activation_type, activation_params, vl);
+                        _v = vfmul_vf_f32m4(_v, scale_in, vl);
+                        _v = vfadd_vv_f32m4(_v, _bias, vl);
+                        _v = activation_ps(_v, activation_type, activation_params, vl);
                         vint8m1_t _out = float2int8(vfmul_vf_f32m4(_v, scale_out, vl), vl);
                         vse8_v_i8m1(ptr, _out, vl);
                     }
@@ -98,7 +101,10 @@ int Requantize_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Optio
 
                         vfloat32m4_t _bias = vle32_v_f32m4((const float*)bias_data + i * packn, vl);
                         vfloat32m4_t _v = vfcvt_f_x_v_f32m4(vle32_v_i32m4(intptr, vl), vl);
-                        _v = activation_ps(vfmacc_vf_f32m4(_bias, scale_in, _v, vl), activation_type, activation_params, vl);
+                        // _v = activation_ps(vfmacc_vf_f32m4(_bias, scale_in, _v, vl), activation_type, activation_params, vl);
+                        _v = vfmul_vf_f32m4(_v, scale_in, vl);
+                        _v = vfadd_vv_f32m4(_v, _bias, vl);
+                        _v = activation_ps(_v, activation_type, activation_params, vl);
                         vint8m1_t _out = float2int8(vfmul_vf_f32m4(_v, scale_out, vl), vl);
                         vse8_v_i8m1(ptr, _out, vl);
                     }
@@ -137,7 +143,10 @@ int Requantize_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Optio
                         vfloat32m4_t _scale_out = vle32_v_f32m4((const float*)scale_out_data + i * packn, vl);
 
                         vfloat32m4_t _v = vfcvt_f_x_v_f32m4(vle32_v_i32m4(intptr, vl), vl);
-                        _v = activation_ps(vfmacc_vf_f32m4(_bias, scale_in, _v, vl), activation_type, activation_params, vl);
+                        // _v = activation_ps(vfmacc_vf_f32m4(_bias, scale_in, _v, vl), activation_type, activation_params, vl);
+                        _v = vfmul_vf_f32m4(_v, scale_in, vl);
+                        _v = vfadd_vv_f32m4(_v, _bias, vl);
+                        _v = activation_ps(_v, activation_type, activation_params, vl);
                         vint8m1_t _out = float2int8(vfmul_vv_f32m4(_v, _scale_out, vl), vl);
                         vse8_v_i8m1(ptr, _out, vl);
                     }
@@ -154,7 +163,10 @@ int Requantize_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Optio
                         vfloat32m4_t _bias = vle32_v_f32m4((const float*)bias_data + i * packn, vl);
 
                         vfloat32m4_t _v = vfcvt_f_x_v_f32m4(vle32_v_i32m4(intptr, vl), vl);
-                        _v = activation_ps(vfmacc_vf_f32m4(_bias, scale_in, _v, vl), activation_type, activation_params, vl);
+                        // _v = activation_ps(vfmacc_vf_f32m4(_bias, scale_in, _v, vl), activation_type, activation_params, vl);
+                        _v = vfmul_vf_f32m4(_v, scale_in, vl);
+                        _v = vfadd_vv_f32m4(_v, _bias, vl);
+                        _v = activation_ps(_v, activation_type, activation_params, vl);
                         vint8m1_t _out = float2int8(vfmul_vv_f32m4(_v, _scale_out, vl), vl);
                         vse8_v_i8m1(ptr, _out, vl);
                     }