diff --git a/Bender.yml b/Bender.yml index 74318e60..dfa19f31 100644 --- a/Bender.yml +++ b/Bender.yml @@ -3,8 +3,8 @@ package: authors: ["Stefan Mach "] dependencies: - common_cells: {git: "https://github.com/pulp-platform/common_cells.git", version: v1.13.1} - fpu_div_sqrt_mvp: {git: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git", version: v1.0.1} + common_cells: {git: "https://github.com/pulp-platform/common_cells.git", version: 1.13.1} + fpu_div_sqrt_mvp: {git: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git", version: 1.0.3} sources: - src/fpnew_pkg.sv @@ -17,9 +17,5 @@ sources: - src/fpnew_opgroup_block.sv - src/fpnew_opgroup_fmt_slice.sv - src/fpnew_opgroup_multifmt_slice.sv - - src/fpnew_pipe_in.sv - - src/fpnew_pipe_out.sv - - src/fpnew_pipe_inside_fma.sv - - src/fpnew_pipe_inside_cast.sv - src/fpnew_rounding.sv - src/fpnew_top.sv diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index ddd51fca..48e9d93f 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -14,6 +14,25 @@ Versions of the IP in the same major relase are "pin-compatible" with each other ### Changed ### Fixed +## [0.6.0] - 2019-07-04 + +### Changed +- Pipelines are generated in the datapath modules instead of separate instances + +### Fixed +- Don't care assignments to structs have been expanded for better tool support [(#14)](https://github.com/pulp-platform/fpnew/pull/14) +- Undriven busy signal in output pipeline bypass +- Typo in the documentation about the multiply operation +- Generation of merged slices when the first package format is disabled +- Potential simulation/synthesis mismatch of the UF flag +- Various linter warnings +- Documentation to reflect on updated pipeline distribution order +- [fpu_div_sqrt_mvp] Bumped to fix linter warnings +- [Bender] Fixed dependencies for Bender [(#14)](https://github.com/pulp-platform/fpnew/pull/15) + +### Removed +- Currently unused modules: `fpnew_pipe*`, `fpnew_{f2i,f2f,i2f}_cast` + ## [0.5.6] - 2019-06-12 @@ -28,7 +47,7 @@ Versions of the IP in the same major relase are "pin-compatible" with each other ## [0.5.5] - 2019-06-02 ### Fixed -- UF flag handling according to IEEE754-2008 (#11) +- UF flag handling according to IEEE754-2008 [(#11)](https://github.com/pulp-platform/fpnew/issues/11) ## [0.5.4] - 2019-06-02 diff --git a/docs/README.md b/docs/README.md index 7c071e30..54322ddc 100644 --- a/docs/README.md +++ b/docs/README.md @@ -100,7 +100,7 @@ Unless noted otherwise, the first operand `op[0]` is used for the operation. | `FNMSUB` | `1` | Negated fused multiply-add (`-(op[0] * op[1]) - op[2]`) | | `ADD` | `0` | Addition (`op[1] + op[2]`) *note the operand indices* | | `ADD` | `1` | Subtraction (`op[1] - op[2]`) *note the operand indices* | -| `MUL` | `0` | Multiplication (`op[0] - op[1]`) | +| `MUL` | `0` | Multiplication (`op[0] * op[1]`) | | `DIV` | `0` | Division (`op[0] / op[1]`) | | `SQRT` | `0` | Square root | | `SGNJ` | `0` | Sign injection, operation encoded in rounding mode
`RNE`: `op[0]` with `sign(op[1])`
`RTZ`: `op[0]` with `~sign(op[1])`
`RDN`: `op[0]` with `sign(op[0]) ^ sign(op[1])`
`RUP`: `op[0]` (passthrough) | @@ -336,12 +336,12 @@ For best results, we *strongly* encourage the use of automatic retiming options The configuration `pipe_config_t` is an enumeration of type `logic [1:0]` holding the following implementation options for the pipelines in operational units: -| Enumerator | Description | -|---------------|-----------------------------------------------------------------------------------------------------| -| `BEFORE` | All pipeline registers are inserted at the inputs of the operational unit | -| `AFTER` | All pipeline registers are inserted at the outputs of the operational unit | -| `INSIDE` | All registers are inserted at roughly the middle of the operational unit (if not possible, `AFTER`) | -| `DISTRIBUTED` | Registers are evenly distributed to `INSIDE`, `AFTER`, and `BEFORE` (if no `INSIDE`, all `AFTER`) | +| Enumerator | Description | +|---------------|------------------------------------------------------------------------------------------------------| +| `BEFORE` | All pipeline registers are inserted at the inputs of the operational unit | +| `AFTER` | All pipeline registers are inserted at the outputs of the operational unit | +| `INSIDE` | All registers are inserted at roughly the middle of the operational unit (if not possible, `BEFORE`) | +| `DISTRIBUTED` | Registers are evenly distributed to `INSIDE`, `BEFORE`, and `AFTER` (if no `INSIDE`, all `BEFORE`) | diff --git a/ips_list.yml b/ips_list.yml index 5918cd43..a1f78506 100644 --- a/ips_list.yml +++ b/ips_list.yml @@ -23,5 +23,5 @@ common_cells: domain: [soc, cluster] fpu_div_sqrt_mvp: - commit: v1.0.1 + commit: v1.0.3 domain: [cluster,soc] diff --git a/src/fpnew_cast_multi.sv b/src/fpnew_cast_multi.sv index dae8bd51..e21cc368 100644 --- a/src/fpnew_cast_multi.sv +++ b/src/fpnew_cast_multi.sv @@ -11,6 +11,8 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_cast_multi #( parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, @@ -62,7 +64,6 @@ module fpnew_cast_multi #( localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(FpFmtConfig); - localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits; localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits; localparam int unsigned SUPER_BIAS = 2**(SUPER_EXP_BITS - 1) - 1; @@ -75,92 +76,102 @@ module fpnew_cast_multi #( // or the number of bits in an integer localparam int unsigned INT_EXP_WIDTH = fpnew_pkg::maximum($clog2(MAX_INT_WIDTH), fpnew_pkg::maximum(SUPER_EXP_BITS, $clog2(SUPER_BIAS + SUPER_MAN_BITS))) + 1; + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise // --------------- // Input pipeline // --------------- - // Pipelined input signals + // Selected pipeline output signals as non-arrays logic [WIDTH-1:0] operands_q; logic [NUM_FORMATS-1:0] is_boxed_q; - fpnew_pkg::roundmode_e rnd_mode_q; - fpnew_pkg::operation_e op_q; logic op_mod_q; fpnew_pkg::fp_format_e src_fmt_q; fpnew_pkg::fp_format_e dst_fmt_q; fpnew_pkg::int_format_e int_fmt_q; - TagType tag_q; - AuxType aux_q; - logic out_valid_input; - logic in_ready_inside; // written by inside pipeline - logic busy_input; - - - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : input_pipeline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? (NumPipeRegs / 3) // Last to get regs - : NumPipeRegs; - fpnew_pipe_in #( - .Width ( WIDTH ), - .NumPipeRegs ( NUM_REGS ), - .NumOperands ( 1 ), - .NumFormats ( NUM_FORMATS ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i, - .rnd_mode_i, - .op_i, - .op_mod_i, - .src_fmt_i, - .dst_fmt_i, - .int_fmt_i, - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( is_boxed_q ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( op_q ), - .op_mod_o ( op_mod_q ), - .src_fmt_o ( src_fmt_q ), - .dst_fmt_o ( dst_fmt_q ), - .int_fmt_o ( int_fmt_q ), - .tag_o ( tag_q ), - .aux_o ( aux_q ), - .out_valid_o ( out_valid_input ), - .out_ready_i ( in_ready_inside ), - .busy_o ( busy_input ) - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign in_ready_o = in_ready_inside; - assign operands_q = operands_i; - assign is_boxed_q = is_boxed_i; - assign rnd_mode_q = rnd_mode_i; - assign op_q = op_i; - assign op_mod_q = op_mod_i; - assign src_fmt_q = src_fmt_i; - assign dst_fmt_q = dst_fmt_i; - assign int_fmt_q = int_fmt_i; - assign tag_q = tag_i; - assign aux_q = aux_i; - assign out_valid_input = in_valid_i; - assign busy_input = 1'b0; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][NUM_FORMATS-1:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_src_fmt_q[0] = src_fmt_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_int_fmt_q[0] = int_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign is_boxed_q = inp_pipe_is_boxed_q[NUM_INP_REGS]; + assign op_mod_q = inp_pipe_op_mod_q[NUM_INP_REGS]; + assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + assign int_fmt_q = inp_pipe_int_fmt_q[NUM_INP_REGS]; // ----------------- // Input processing // ----------------- logic src_is_int, dst_is_int; // if 0, it's a float - assign src_is_int = (op_q == fpnew_pkg::I2F); - assign dst_is_int = (op_q == fpnew_pkg::F2I); + assign src_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::I2F); + assign dst_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::F2I); logic [INT_MAN_WIDTH-1:0] encoded_mant; // input mantissa with implicit bit @@ -169,7 +180,7 @@ module fpnew_cast_multi #( logic [NUM_FORMATS-1:0][INT_MAN_WIDTH-1:0] fmt_mantissa; logic signed [NUM_FORMATS-1:0][INT_EXP_WIDTH-1:0] fmt_shift_compensation; // for LZC - fpnew_pkg::fp_info_t [NUM_FORMATS-1:0] info_q; + fpnew_pkg::fp_info_t [NUM_FORMATS-1:0] info; logic [NUM_INT_FORMATS-1:0][INT_MAN_WIDTH-1:0] ifmt_input_val; logic int_sign; @@ -190,16 +201,16 @@ module fpnew_cast_multi #( ) i_fpnew_classifier ( .operands_i ( operands_q[FP_WIDTH-1:0] ), .is_boxed_i ( is_boxed_q[fmt] ), - .info_o ( info_q[fmt] ) + .info_o ( info[fmt] ) ); assign fmt_sign[fmt] = operands_q[FP_WIDTH-1]; assign fmt_exponent[fmt] = signed'({1'b0, operands_q[MAN_BITS+:EXP_BITS]}); - assign fmt_mantissa[fmt] = {info_q[fmt].is_normal, operands_q[MAN_BITS-1:0]}; // zero pad + assign fmt_mantissa[fmt] = {info[fmt].is_normal, operands_q[MAN_BITS-1:0]}; // zero pad // Compensation for the difference in mantissa widths used for leading-zero count assign fmt_shift_compensation[fmt] = signed'(INT_MAN_WIDTH - 1 - MAN_BITS); end else begin : inactive_format - assign info_q[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign info[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled @@ -241,7 +252,7 @@ module fpnew_cast_multi #( assign src_bias = signed'(fpnew_pkg::bias(src_fmt_q)); assign src_exp = fmt_exponent[src_fmt_q]; - assign src_subnormal = signed'({1'b0, info_q[src_fmt_q].is_subnormal}); + assign src_subnormal = signed'({1'b0, info[src_fmt_q].is_subnormal}); assign src_offset = fmt_shift_compensation[src_fmt_q]; logic input_sign; // input sign @@ -278,112 +289,118 @@ module fpnew_cast_multi #( assign input_exp = src_is_int ? int_input_exp : fp_input_exp; - logic signed [INT_EXP_WIDTH-1:0] destination_exp_d, destination_exp_q; // re-biased exponent for destination - logic signed [INT_EXP_WIDTH-1:0] dst_bias; // dst format bias - assign dst_bias = signed'(fpnew_pkg::bias(dst_fmt_q)); + logic signed [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination // Rebias the exponent - assign destination_exp_d = input_exp + dst_bias; + assign destination_exp = input_exp + signed'(fpnew_pkg::bias(dst_fmt_q)); // --------------- // Internal pipeline // --------------- - // Pipelined internal signals + // Pipeline output signals as non-arrays logic input_sign_q; logic signed [INT_EXP_WIDTH-1:0] input_exp_q; - logic [INT_MAN_WIDTH-1:0] input_mant_q; + logic [INT_MAN_WIDTH-1:0] input_mant_q; + logic signed [INT_EXP_WIDTH-1:0] destination_exp_q; logic src_is_int_q; logic dst_is_int_q; - fpnew_pkg::fp_info_t info_q2; + fpnew_pkg::fp_info_t info_q; logic mant_is_zero_q; logic op_mod_q2; - fpnew_pkg::roundmode_e rnd_mode_q2; + fpnew_pkg::roundmode_e rnd_mode_q; fpnew_pkg::fp_format_e src_fmt_q2; fpnew_pkg::fp_format_e dst_fmt_q2; fpnew_pkg::int_format_e int_fmt_q2; - TagType tag_q2; - AuxType aux_q2; - logic out_valid_inside; - logic in_ready_output; // written by output pipeline - logic busy_inside; - - // Generate pipeline between mul and add if needed - if (PipeConfig==fpnew_pkg::INSIDE || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : inside_pipeline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? ((NumPipeRegs + 2) / 3) // First to get regs - : NumPipeRegs; - fpnew_pipe_inside_cast #( - .IntExpWidth ( INT_EXP_WIDTH ), - .IntManWidth ( INT_MAN_WIDTH ), - .NumPipeRegs ( NUM_REGS ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_inside_pipe ( - .clk_i, - .rst_ni, - .input_sign_i ( input_sign ), - .input_exp_i ( input_exp ), - .destination_exp_i ( destination_exp_d ), - .input_mant_i ( input_mant ), - .src_is_int_i ( src_is_int ), - .dst_is_int_i ( dst_is_int ), - .info_i ( info_q[src_fmt_q] ), - .mant_is_zero_i ( mant_is_zero ), - .op_mod_i ( op_mod_q ), - .rnd_mode_i ( rnd_mode_q ), - .src_fmt_i ( src_fmt_q ), - .dst_fmt_i ( dst_fmt_q ), - .int_fmt_i ( int_fmt_q ), - .tag_i ( tag_q ), - .aux_i ( aux_q ), - .in_valid_i ( out_valid_input ), - .in_ready_o ( in_ready_inside ), - .flush_i, - .input_sign_o ( input_sign_q ), - .input_exp_o ( input_exp_q ), - .destination_exp_o ( destination_exp_q ), - .input_mant_o ( input_mant_q ), - .src_is_int_o ( src_is_int_q ), - .dst_is_int_o ( dst_is_int_q ), - .info_o ( info_q2 ), - .mant_is_zero_o ( mant_is_zero_q ), - .op_mod_o ( op_mod_q2 ), - .rnd_mode_o ( rnd_mode_q2 ), - .src_fmt_o ( src_fmt_q2 ), - .dst_fmt_o ( dst_fmt_q2 ), - .int_fmt_o ( int_fmt_q2 ), - .tag_o ( tag_q2 ), - .aux_o ( aux_q2 ), - .out_valid_o ( out_valid_inside ), - .out_ready_i ( in_ready_output ), - .busy_o ( busy_inside ) - ); - // Otherwise pass through inputs - end else begin : no_inside_pipeline - assign in_ready_inside = in_ready_output; - assign input_sign_q = input_sign; - assign input_exp_q = input_exp; - assign destination_exp_q = destination_exp_d; - assign input_mant_q = input_mant; - assign src_is_int_q = src_is_int; - assign dst_is_int_q = dst_is_int; - assign info_q2 = info_q[src_fmt_q]; - assign mant_is_zero_q = mant_is_zero; - assign op_mod_q2 = op_mod_q; - assign rnd_mode_q2 = rnd_mode_q; - assign src_fmt_q2 = src_fmt_q; - assign dst_fmt_q2 = dst_fmt_q; - assign int_fmt_q2 = int_fmt_q; - assign tag_q2 = tag_q; - assign aux_q2 = aux_q; - assign out_valid_inside = out_valid_input; - assign busy_inside = 1'b0; + // Internal pipeline signals, index i holds signal after i register stages + + + logic [0:NUM_MID_REGS] mid_pipe_input_sign_q; + logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q; + logic [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q; + logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_dest_exp_q; + logic [0:NUM_MID_REGS] mid_pipe_src_is_int_q; + logic [0:NUM_MID_REGS] mid_pipe_dst_is_int_q; + fpnew_pkg::fp_info_t [0:NUM_MID_REGS] mid_pipe_info_q; + logic [0:NUM_MID_REGS] mid_pipe_mant_zero_q; + logic [0:NUM_MID_REGS] mid_pipe_op_mod_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; + fpnew_pkg::int_format_e [0:NUM_MID_REGS] mid_pipe_int_fmt_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_input_sign_q[0] = input_sign; + assign mid_pipe_input_exp_q[0] = input_exp; + assign mid_pipe_input_mant_q[0] = input_mant; + assign mid_pipe_dest_exp_q[0] = destination_exp; + assign mid_pipe_src_is_int_q[0] = src_is_int; + assign mid_pipe_dst_is_int_q[0] = dst_is_int; + assign mid_pipe_info_q[0] = info[src_fmt_q]; + assign mid_pipe_mant_zero_q[0] = mant_is_zero; + assign mid_pipe_op_mod_q[0] = op_mod_q; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_src_fmt_q[0] = src_fmt_q; + assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; + assign mid_pipe_int_fmt_q[0] = int_fmt_q; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_input_exp_q[i+1], mid_pipe_input_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_input_mant_q[i+1], mid_pipe_input_mant_q[i], reg_ena, '0) + `FFL(mid_pipe_dest_exp_q[i+1], mid_pipe_dest_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_src_is_int_q[i+1], mid_pipe_src_is_int_q[i], reg_ena, '0) + `FFL(mid_pipe_dst_is_int_q[i+1], mid_pipe_dst_is_int_q[i], reg_ena, '0) + `FFL(mid_pipe_info_q[i+1], mid_pipe_info_q[i], reg_ena, '0) + `FFL(mid_pipe_mant_zero_q[i+1], mid_pipe_mant_zero_q[i], reg_ena, '0) + `FFL(mid_pipe_op_mod_q[i+1], mid_pipe_op_mod_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_src_fmt_q[i+1], mid_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_int_fmt_q[i+1], mid_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use + assign input_sign_q = mid_pipe_input_sign_q[NUM_MID_REGS]; + assign input_exp_q = mid_pipe_input_exp_q[NUM_MID_REGS]; + assign input_mant_q = mid_pipe_input_mant_q[NUM_MID_REGS]; + assign destination_exp_q = mid_pipe_dest_exp_q[NUM_MID_REGS]; + assign src_is_int_q = mid_pipe_src_is_int_q[NUM_MID_REGS]; + assign dst_is_int_q = mid_pipe_dst_is_int_q[NUM_MID_REGS]; + assign info_q = mid_pipe_info_q[NUM_MID_REGS]; + assign mant_is_zero_q = mid_pipe_mant_zero_q[NUM_MID_REGS]; + assign op_mod_q2 = mid_pipe_op_mod_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign src_fmt_q2 = mid_pipe_src_fmt_q[NUM_MID_REGS]; + assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS]; + assign int_fmt_q2 = mid_pipe_int_fmt_q[NUM_MID_REGS]; // -------- // Casting // -------- - logic [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments + logic [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments logic [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift logic [2*INT_MAN_WIDTH:0] destination_mant; // mantissa from shifter, with rnd bit @@ -425,7 +442,7 @@ module fpnew_cast_multi #( end else begin // Overflow or infinities (for proper rounding) if ((destination_exp_q >= 2**fpnew_pkg::exp_bits(dst_fmt_q2)-1) || - (~src_is_int_q && info_q2.is_inf)) begin + (~src_is_int_q && info_q.is_inf)) begin final_exp = unsigned'(2**fpnew_pkg::exp_bits(dst_fmt_q2)-2); // largest normal value preshift_mant = '1; // largest normal value and RS bits set of_before_round = 1'b1; @@ -521,7 +538,7 @@ module fpnew_cast_multi #( .abs_value_i ( pre_round_abs ), .sign_i ( input_sign_q ), // source format .round_sticky_bits_i ( round_sticky_bits ), - .rnd_mode_i ( rnd_mode_q2 ), + .rnd_mode_i ( rnd_mode_q ), .effective_subtraction_i ( 1'b0 ), // no operation happened .abs_rounded_o ( rounded_abs ), .sign_o ( rounded_sign ), @@ -586,8 +603,7 @@ module fpnew_cast_multi #( if (FpFmtConfig[fmt]) begin : active_format always_comb begin : special_results logic [FP_WIDTH-1:0] special_res; - - special_res = info_q2.is_zero + special_res = info_q.is_zero ? input_sign_q << FP_WIDTH-1 // signed zero : {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN @@ -601,12 +617,12 @@ module fpnew_cast_multi #( end // Detect special case from source format, I2F casts don't produce a special result - assign fp_result_is_special = ~src_is_int_q & (info_q2.is_zero | - info_q2.is_nan | - ~info_q2.is_boxed); + assign fp_result_is_special = ~src_is_int_q & (info_q.is_zero | + info_q.is_nan | + ~info_q.is_boxed); // Signalling input NaNs raise invalid flag, otherwise no flags set - assign fp_special_status = '{NV: info_q2.is_signalling, default: 1'b0}; + assign fp_special_status = '{NV: info_q.is_signalling, default: 1'b0}; // Assemble result according to destination format assign fp_special_result = fmt_special_result[dst_fmt_q2]; // destination format @@ -634,7 +650,7 @@ module fpnew_cast_multi #( special_res[INT_WIDTH-1] = op_mod_q2; // for unsigned casts yields 2**INT_WIDTH-1 // Negative special case (except for nans) tie to -max or 0 - if (input_sign_q && !info_q2.is_nan) + if (input_sign_q && !info_q.is_nan) special_res = ~special_res; // Initialize special result with sign-extension @@ -647,8 +663,8 @@ module fpnew_cast_multi #( end // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned) - assign int_result_is_special = info_q2.is_nan | info_q2.is_inf | - of_before_round | ~info_q2.is_boxed | + assign int_result_is_special = info_q.is_nan | info_q.is_inf | + of_before_round | ~info_q.is_boxed | (input_sign_q & op_mod_q2 & ~rounded_int_res_zero); // All integer special cases are invalid @@ -665,14 +681,12 @@ module fpnew_cast_multi #( logic [WIDTH-1:0] fp_result, int_result; fpnew_pkg::status_t fp_status, int_status; - assign fp_regular_status = '{ - NV: src_is_int_q & (of_before_round | of_after_round), // overflow is invalid for I2F casts - DZ: 1'b0, // no divisions - OF: ~src_is_int_q & (~info_q2.is_inf & (of_before_round | of_after_round)), // inf casts no OF - UF: uf_after_round & fp_regular_status.NX, - NX: src_is_int_q ? (| fp_round_sticky_bits) // overflow is invalid in i2f - : (| fp_round_sticky_bits) | (~info_q2.is_inf & (of_before_round | of_after_round)) - }; + assign fp_regular_status.NV = src_is_int_q & (of_before_round | of_after_round); // overflow is invalid for I2F casts + assign fp_regular_status.DZ = 1'b0; // no divisions + assign fp_regular_status.OF = ~src_is_int_q & (~info_q.is_inf & (of_before_round | of_after_round)); // inf casts no OF + assign fp_regular_status.UF = uf_after_round & fp_regular_status.NX; + assign fp_regular_status.NX = src_is_int_q ? (| fp_round_sticky_bits) // overflow is invalid in i2f + : (| fp_round_sticky_bits) | (~info_q.is_inf & (of_before_round | of_after_round)); assign int_regular_status = '{NX: (| int_round_sticky_bits), default: 1'b0}; assign fp_result = fp_result_is_special ? fp_special_result : fmt_result[dst_fmt_q2]; @@ -684,7 +698,6 @@ module fpnew_cast_multi #( logic [WIDTH-1:0] result_d; fpnew_pkg::status_t status_d; logic extension_bit; - logic busy_output; // Select output depending on special case detection assign result_d = dst_is_int_q ? int_result : fp_result; @@ -696,51 +709,52 @@ module fpnew_cast_multi #( // ---------------- // Output Pipeline // ---------------- - // Generate pipeline at output if needed - if (PipeConfig==fpnew_pkg::AFTER || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : output_pipline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? ((NumPipeRegs + 1) / 3) // Second to get regs - : NumPipeRegs; - fpnew_pipe_out #( - .Width ( WIDTH ), - .NumPipeRegs ( NUM_REGS ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( extension_bit ), - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .tag_i ( tag_q2 ), - .aux_i ( aux_q2 ), - .in_valid_i ( out_valid_inside ), - .in_ready_o ( in_ready_output ), - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o ( busy_output ) - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign in_ready_output = out_ready_i; - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = extension_bit; - assign tag_o = tag_q2; - assign aux_o = aux_q2; - assign out_valid_o = out_valid_inside; + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + logic [0:NUM_OUT_REGS] out_pipe_ext_bit_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_ext_bit_q[0] = extension_bit; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - - assign busy_o = busy_input | busy_inside | busy_output; - + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS]; + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv index 382b1a1e..1aed3a55 100644 --- a/src/fpnew_divsqrt_multi.sv +++ b/src/fpnew_divsqrt_multi.sv @@ -51,72 +51,78 @@ module fpnew_divsqrt_multi #( output logic busy_o ); + // ---------- + // Constants + // ---------- + // Pipelines + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + // --------------- // Input pipeline // --------------- - // Pipelined input signals - logic [1:0][WIDTH-1:0] operands_q; - logic [NUM_FORMATS-1:0][1:0] is_boxed_q; - fpnew_pkg::roundmode_e rnd_mode_q; - fpnew_pkg::operation_e op_q; - logic op_mod_q; - fpnew_pkg::fp_format_e dst_fmt_q; - TagType tag_q; - AuxType aux_q; - logic in_valid_q, in_ready_q; - logic pipe_busy; + // Selected pipeline output signals as non-arrays + logic [1:0][WIDTH-1:0] operands_q; + fpnew_pkg::roundmode_e rnd_mode_q; + fpnew_pkg::operation_e op_q; + fpnew_pkg::fp_format_e dst_fmt_q; + logic in_valid_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE) begin : input_pipeline - fpnew_pipe_in #( - .Width ( WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .NumOperands ( 2 ), - .NumFormats ( NUM_FORMATS ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i, - .rnd_mode_i, - .op_i, - .op_mod_i ( 1'b0 ), // unused - .src_fmt_i ( fpnew_pkg::FP32 ), // unused - .dst_fmt_i, - .int_fmt_i ( fpnew_pkg::INT8 ), // unused, - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( is_boxed_q ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( op_q ), - .op_mod_o ( /* unused */ ), - .src_fmt_o ( /* unused */ ), - .dst_fmt_o ( dst_fmt_q ), - .int_fmt_o ( /* unused */ ), - .tag_o ( tag_q ), - .aux_o ( aux_q ), - .out_valid_o ( in_valid_q ), - .out_ready_i ( in_ready_q ), - .busy_o ( pipe_busy ) - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign operands_q = operands_i; - assign is_boxed_q = is_boxed_i; - assign rnd_mode_q = rnd_mode_i; - assign op_q = op_i; - assign dst_fmt_q = dst_fmt_i; - assign tag_q = tag_i; - assign aux_q = aux_i; - assign in_valid_q = in_valid_i; - assign in_ready_o = in_ready_q; + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign op_q = inp_pipe_op_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; // ----------------- // Input processing @@ -158,7 +164,7 @@ module fpnew_divsqrt_multi #( fsm_state_e state_q, state_d; // Upstream ready comes from sanitization FSM - assign in_ready_q = in_ready; + assign inp_pipe_ready[NUM_INP_REGS] = in_ready; // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; @@ -237,9 +243,9 @@ module fpnew_divsqrt_multi #( AuxType result_aux_q; // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_is_fp8_q, input_is_fp8, in_valid_q, '0) - `FFL(result_tag_q, tag_q, in_valid_q, '0) - `FFL(result_aux_q, aux_q, in_valid_q, '0) + `FFL(result_is_fp8_q, input_is_fp8, in_valid_q, '0) + `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], in_valid_q, '0) + `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], in_valid_q, '0) // ----------------- // DIVSQRT instance @@ -284,48 +290,49 @@ module fpnew_divsqrt_multi #( // ---------------- // Output Pipeline // ---------------- - // Generate pipeline at output if needed - if (PipeConfig!=fpnew_pkg::BEFORE) begin : output_pipline - fpnew_pipe_out #( - .Width ( WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( 1'b1 ), // always NaN-box - .tag_i ( result_tag_q ), - .aux_i ( result_aux_q ), - .in_valid_i ( out_valid ), - .in_ready_o ( out_ready ), - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o ( pipe_busy ) - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = 1'b1; // always NaN-box - assign tag_o = result_tag_q; - assign aux_o = result_aux_q; - assign out_valid_o = out_valid; - assign out_ready = out_ready_i; - end + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; - // Busy flag - assign busy_o = in_valid_q | unit_busy | pipe_busy; + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = result_tag_q; + assign out_pipe_aux_q[0] = result_aux_q; + assign out_pipe_valid_q[0] = out_valid; + // Input stage: Propagate pipeline ready signal to inside pipe + assign out_ready = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_f2fcast.sv b/src/fpnew_f2fcast.sv deleted file mode 100644 index 975b23de..00000000 --- a/src/fpnew_f2fcast.sv +++ /dev/null @@ -1,374 +0,0 @@ -// Copyright 2019 ETH Zurich and University of Bologna. -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Stefan Mach - -module fpnew_f2fcast #( - parameter fpnew_pkg::fp_format_e SrcFpFormat = fpnew_pkg::fp_format_e'(0), - parameter fpnew_pkg::fp_format_e DstFpFormat = fpnew_pkg::fp_format_e'(0), - parameter int unsigned NumPipeRegs = 0, - parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, - // Do not change - localparam int unsigned SRC_WIDTH = fpnew_pkg::fp_width(SrcFpFormat), - localparam int unsigned DST_WIDTH = fpnew_pkg::fp_width(DstFpFormat) -) ( - input logic clk_i, - input logic rst_ni, - // Input signals - input logic [SRC_WIDTH-1:0] operands_i, // 1 operand - input logic is_boxed_i, // 1 operand - input fpnew_pkg::roundmode_e rnd_mode_i, - input TagType tag_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, - // Output signals - output logic [DST_WIDTH-1:0] result_o, - output fpnew_pkg::status_t status_o, - output logic extension_bit_o, - output TagType tag_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o -); - - // ---------- - // Constants - // ---------- - localparam int SRC_EXP_BITS = fpnew_pkg::exp_bits(SrcFpFormat); - localparam int SRC_MAN_BITS = fpnew_pkg::man_bits(SrcFpFormat); - localparam int SRC_BIAS = fpnew_pkg::bias(SrcFpFormat); - localparam int DST_EXP_BITS = fpnew_pkg::exp_bits(DstFpFormat); - localparam int DST_MAN_BITS = fpnew_pkg::man_bits(DstFpFormat); - localparam int DST_BIAS = fpnew_pkg::bias(DstFpFormat); - - // If needed, there will be a LZC for renormalization - localparam int unsigned LZC_RESULT_WIDTH = $clog2(SRC_MAN_BITS + 1); - // The wider of both exponent widths - localparam int unsigned SUPER_EXP_BITS = fpnew_pkg::maximum(SRC_EXP_BITS, DST_EXP_BITS); - // The internal exponent must be able to represent the smallest denormal input value as signed - localparam int unsigned INT_EXP_WIDTH = - fpnew_pkg::maximum(SUPER_EXP_BITS, $clog2(SRC_BIAS + SRC_MAN_BITS)) + 1; // +1 for signed - // The wider of both mantissa widhts, includes normal bit - localparam int unsigned INT_MAN_WIDTH = fpnew_pkg::maximum(SRC_MAN_BITS, DST_MAN_BITS) + 1; - - // ---------------- - // Type definition - // ---------------- - typedef struct packed { - logic sign; - logic [SRC_EXP_BITS-1:0] exponent; - logic [SRC_MAN_BITS-1:0] mantissa; - } src_fp_t; - - typedef struct packed { - logic sign; - logic [DST_EXP_BITS-1:0] exponent; - logic [DST_MAN_BITS-1:0] mantissa; - } dst_fp_t; - - // --------------- - // Input pipeline - // --------------- - // Pipelined input signals - logic [SRC_WIDTH-1:0] operands_q; - logic is_boxed_q; - fpnew_pkg::roundmode_e rnd_mode_q; - - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE) begin : input_pipeline - fpnew_pipe_in #( - .Width ( SRC_WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .NumOperands ( 1 ), - .TagType ( TagType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i, - .rnd_mode_i, - .op_i ( fpnew_pkg::FMADD ), // unused - .op_mod_i ( 1'b0 ), // unused - .src_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .dst_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .int_fmt_i ( fpnew_pkg::int_format_e'(0) ), // unused - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( is_boxed_q ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( /* unused */ ), - .op_mod_o ( /* unused */ ), - .src_fmt_o ( /* unused */ ), - .dst_fmt_o ( /* unused */ ), - .int_fmt_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign operands_q = operands_i; - assign is_boxed_q = is_boxed_i; - assign rnd_mode_q = rnd_mode_i; - end - - // ----------------- - // Input processing - // ----------------- - src_fp_t operand_a; - fpnew_pkg::fp_info_t info_a; - - logic signed [SRC_EXP_BITS:0] encoded_exp; // biased encoded exponent - logic signed [INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent - logic [SRC_MAN_BITS:0] encoded_mant; // as encoded, includes normal bit - logic [INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa - - assign operand_a = operands_q; - - // Classify input - fpnew_classifier #( - .FpFormat ( SrcFpFormat ), - .NumOperands ( 1 ) - ) i_class_a ( - .operands_i ( operands_q ), - .is_boxed_i ( is_boxed_q ), - .info_o ( info_a ) - ); - - assign encoded_mant = {info_a.is_normal, operand_a.mantissa}; - assign encoded_exp = signed'({1'b0, operand_a.exponent}); - - // In case of growing exponent size, denormal values need to be normalized - if (DST_EXP_BITS > SRC_EXP_BITS) begin : renormalize_mantissa - logic [LZC_RESULT_WIDTH-1:0] renorm_shamt; // renormalization shift amount - logic [LZC_RESULT_WIDTH:0] renorm_shamt_sgn; // signed form for calculations - - // Leading-zero counter is needed for renormalization - lzc #( - .WIDTH ( SRC_MAN_BITS + 1 ), - .MODE ( 1 ) // MODE = 1 counts leading zeroes - ) i_lzc ( - .in_i ( encoded_mant ), - .cnt_o ( renorm_shamt ), - .empty_o ( /* unused */ ) - ); - - assign renorm_shamt_sgn = signed'({1'b0, renorm_shamt}); - - // Realign input mantissa, append zeroes if destination is wider - assign input_mant = {>> {encoded_mant << renorm_shamt, '0}}; - // Unbias exponent and compensate for shift - assign input_exp = signed'(encoded_exp - renorm_shamt_sgn + info_a.is_subnormal - SRC_BIAS); - // Otherwise we leave the denormals be because they cannot become normal - end else begin : normalized_mantissa - assign input_mant = encoded_mant; - assign input_exp = signed'(encoded_exp + info_a.is_subnormal - SRC_BIAS); - end - - // ---------------------- - // Special case handling - // ---------------------- - dst_fp_t special_result; - fpnew_pkg::status_t special_status; - logic result_is_special; - - - // We handle zero and NaN inputs separately - assign result_is_special = info_a.is_zero | info_a.is_nan | ~info_a.is_boxed; - - // Signalling NaNs raise invalid flag, otherwise no flags set - assign special_status = '{NV: info_a.is_signalling, default: 1'b0}; - - // Assemble result according to special case - assign special_result = info_a.is_zero - ? '{sign: operand_a.sign, exponent: '0, mantissa: '0} // signed zero - : '{sign: 1'b0, exponent: '1, mantissa: 2**(DST_MAN_BITS-1)}; // qNaN - - // -------- - // Casting - // -------- - logic signed [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination - logic [DST_EXP_BITS-1:0] final_exp; // after eventual adjustments - - logic [INT_MAN_WIDTH+DST_MAN_BITS:0] preshift_mant; // mantissa before final shift - logic [INT_MAN_WIDTH+DST_MAN_BITS:0] destination_mant; // mantissa from shifter, with rnd bit - logic [DST_MAN_BITS-1:0] final_mant; // mantissa after adjustments - - logic [$clog2(DST_MAN_BITS+1)-1:0] denorm_shamt; // shift amount for denormalization - - logic [1:0] round_sticky_bits; - logic of_before_round, uf_before_round; - - // Rebias the exponent - assign destination_exp = signed'(input_exp + DST_BIAS); - - // Perform adjustments to mantissa and exponent - always_comb begin : cast_value - // Default assignment - final_exp = unsigned'(destination_exp); // take exponent as is, only look at lower bits - preshift_mant = '0; // initialize mantissa container with zeroes - denorm_shamt = '0; - of_before_round = 1'b0; - uf_before_round = 1'b0; - - // Place mantissa to the left of the shifter - preshift_mant = {>> {input_mant, '0}}; - - // Handle overflows or infinities (for proper rounding) - if ((destination_exp >= 2**DST_EXP_BITS-1) || info_a.is_inf) begin - final_exp = unsigned'(2**DST_EXP_BITS-2); // largest normal value - preshift_mant = '1; // largest normal value and RS bits set - of_before_round = 1'b1; - // In case the destination exponent is smaller, we need to denormalize the underflow - end else if (DST_EXP_BITS < SRC_EXP_BITS) begin // STATIC - // Denormalize underflowing values - if (destination_exp < 1 && destination_exp >= -DST_MAN_BITS) begin - final_exp = '0; // denormal result - denorm_shamt = unsigned'(1 - destination_exp); // adjust mantissa by right shifting - uf_before_round = 1'b1; - // Limit the shift to retain sticky bits - end else if (destination_exp < -signed'(DST_MAN_BITS)) begin - final_exp = '0; // denormal result - denorm_shamt = unsigned'(1 + DST_MAN_BITS); // shift mantissa into sticky bits - uf_before_round = 1'b1; - end - // Otherwise, only previously denormal values can be denormal with exponent 0 - end else if (destination_exp == 0) begin - denorm_shamt = 1; // the shifter becomes a trivial 2-input mux - uf_before_round = 1'b1; - end - end - - // Mantissa adjustment shift - assign destination_mant = preshift_mant >> denorm_shamt; - // Extract final mantissa and round bit, discard the normal bit - assign {final_mant, round_sticky_bits[1]} = - destination_mant[INT_MAN_WIDTH+DST_MAN_BITS-1:INT_MAN_WIDTH-1]; - // Collapse sticky bits - assign round_sticky_bits[0] = (| {destination_mant[INT_MAN_WIDTH-2:0]}); // unused bits are sticky - - // ---------------------------- - // Rounding and classification - // ---------------------------- - logic [DST_EXP_BITS+DST_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding - - logic of_after_round; // overflow - logic uf_after_round; // underflow - - logic rounded_sign; - logic [DST_EXP_BITS+DST_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding - - assign pre_round_abs = {final_exp, final_mant}; - - // Rounding is only needed if exponent size or mantissa became smaller - if ((DST_MAN_BITS < SRC_MAN_BITS) || (DST_EXP_BITS < SRC_EXP_BITS)) begin : gen_rounding - - fpnew_rounding #( - .AbsWidth ( DST_WIDTH - 1 ) - ) i_fpnew_rounding ( - .abs_value_i ( pre_round_abs ), - .sign_i ( operand_a.sign ), - .round_sticky_bits_i ( round_sticky_bits ), - .rnd_mode_i ( rnd_mode_q ), - .effective_subtraction_i ( 1'b0 ), // no operation happened - .abs_rounded_o ( rounded_abs ), - .sign_o ( rounded_sign ), - .exact_zero_o ( /* unused */ ) - ); - - end else begin : no_rounding - assign rounded_abs = pre_round_abs; - assign rounded_sign = operand_a.sign; - end - - // Classification after rounding - assign uf_after_round = rounded_abs[DST_EXP_BITS+DST_MAN_BITS-1:DST_MAN_BITS] == '0; // denormal - assign of_after_round = rounded_abs[DST_EXP_BITS+DST_MAN_BITS-1:DST_MAN_BITS] == '1; // inf exp. - - // ----------------- - // Result selection - // ----------------- - logic [DST_WIDTH-1:0] regular_result; - fpnew_pkg::status_t regular_status; - - // Assemble regular result - assign regular_result = {rounded_sign, rounded_abs}; - assign regular_status = '{ - NV: 1'b0, // only valid cases are handled in regular path - DZ: 1'b0, // no divisions - OF: ~info_a.is_inf & (of_before_round | of_after_round), // rounding can introduce new overflow - UF: uf_after_round & regular_status.NX, // only inexact results raise UF - NX: (| round_sticky_bits) | (~info_a.is_inf & (of_before_round | of_after_round)) - }; - - // Final results for output pipeline - logic [DST_WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - - // Select output depending on special case detection - assign result_d = result_is_special ? special_result : regular_result; - assign status_d = result_is_special ? special_status : regular_status; - - // ---------------- - // Output Pipeline - // ---------------- - // Generate pipeline at output if needed - if (PipeConfig!=fpnew_pkg::BEFORE) begin : output_pipline - fpnew_pipe_out #( - .Width ( DST_WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .TagType ( TagType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( 1'b1 ), // always NaN-Box result - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = 1'b1; // always NaN-Box result - end - -endmodule diff --git a/src/fpnew_f2icast.sv b/src/fpnew_f2icast.sv deleted file mode 100644 index 23b6a0a1..00000000 --- a/src/fpnew_f2icast.sv +++ /dev/null @@ -1,356 +0,0 @@ -// Copyright 2019 ETH Zurich and University of Bologna. -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Stefan Mach - -module fpnew_f2icast #( - parameter fpnew_pkg::fp_format_e SrcFpFormat = fpnew_pkg::fp_format_e'(0), - parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '{default: 1'b1}, - parameter int unsigned NumPipeRegs = 0, - parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, - // Do not change - localparam int unsigned SRC_WIDTH = fpnew_pkg::fp_width(SrcFpFormat), - localparam int unsigned DST_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig) -) ( - input logic clk_i, - input logic rst_ni, - // Input signals - input logic [SRC_WIDTH-1:0] operands_i, // 1 operand - input logic is_boxed_i, // 1 operand - input fpnew_pkg::roundmode_e rnd_mode_i, - input logic op_mod_i, - input fpnew_pkg::int_format_e int_fmt_i, - input TagType tag_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, - // Output signals - output logic [DST_WIDTH-1:0] result_o, - output fpnew_pkg::status_t status_o, - output logic extension_bit_o, - output TagType tag_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o -); - - // ---------- - // Constants - // ---------- - localparam int SRC_EXP_BITS = fpnew_pkg::exp_bits(SrcFpFormat); - localparam int SRC_MAN_BITS = fpnew_pkg::man_bits(SrcFpFormat); - localparam int SRC_BIAS = fpnew_pkg::bias(SrcFpFormat); - - // The internal exponent is signed - localparam int unsigned INT_EXP_WIDTH = SRC_EXP_BITS + 1; // +1 for signed - // The internal mantissa contains the normal bit - localparam int unsigned INT_MAN_WIDTH = SRC_MAN_BITS + 1; - - // ---------------- - // Type definition - // ---------------- - typedef struct packed { - logic sign; - logic [SRC_EXP_BITS-1:0] exponent; - logic [SRC_MAN_BITS-1:0] mantissa; - } src_fp_t; - - // --------------- - // Input pipeline - // --------------- - // Pipelined input signals - logic [SRC_WIDTH-1:0] operands_q; - logic is_boxed_q; - fpnew_pkg::roundmode_e rnd_mode_q; - logic op_mod_q; - fpnew_pkg::int_format_e int_fmt_q; - - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE) begin : input_pipeline - fpnew_pipe_in #( - .Width ( SRC_WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .NumOperands ( 1 ), - .TagType ( TagType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i, - .rnd_mode_i, - .op_i ( fpnew_pkg::FMADD ), // unused - .op_mod_i, - .src_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .dst_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .int_fmt_i, - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( is_boxed_q ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( /* unused */ ), - .op_mod_o ( op_mod_q ), - .src_fmt_o ( /* unused */ ), - .dst_fmt_o ( /* unused */ ), - .int_fmt_o ( int_fmt_q ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign operands_q = operands_i; - assign is_boxed_q = is_boxed_i; - assign rnd_mode_q = rnd_mode_i; - assign op_mod_q = op_mod_i; - assign int_fmt_q = int_fmt_i; - end - - // ----------------- - // Input processing - // ----------------- - src_fp_t operand_a; - fpnew_pkg::fp_info_t info_a; - - logic signed [SRC_EXP_BITS:0] encoded_exp; // biased encoded exponent - logic signed [INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent - logic [SRC_MAN_BITS:0] encoded_mant; // as encoded, includes normal bit - - assign operand_a = operands_q; - - // Classify input - fpnew_classifier #( - .FpFormat ( SrcFpFormat ), - .NumOperands ( 1 ) - ) i_class_a ( - .operands_i ( operands_q ), - .is_boxed_i ( is_boxed_q ), - .info_o ( info_a ) - ); - - assign encoded_mant = {1'b1, operand_a.mantissa}; // we don't care about denormals - assign encoded_exp = signed'({1'b0, operand_a.exponent}); - - assign input_exp = signed'(encoded_exp - SRC_BIAS); // Only handle normal cases - - // -------- - // Casting - // -------- - logic [INT_MAN_WIDTH+DST_WIDTH:0] preshift_mant; // mantissa before final shift with rnd bit - logic [INT_MAN_WIDTH+DST_WIDTH:0] destination_mant; // mantissa from shifter with rnd bit - logic [DST_WIDTH-1:0] final_mant; // final integer value after adjustments - - logic [$clog2(DST_WIDTH+1)-1:0] mant_shamt; // shift amount for mantissa - - logic [1:0] round_sticky_bits; - logic of_before_round, uf_before_round; - - // Perform adjustments to mantissa and exponent - always_comb begin : cast_value - // Default assignment - preshift_mant = '0; // initialize mantissa container with zeroes - // Mantissa with implicit bit can be right shifted to represent integer value - mant_shamt = unsigned'(DST_WIDTH - 1 - input_exp); - of_before_round = 1'b0; - uf_before_round = 1'b0; - - // Place mantissa to the left of the shifter - preshift_mant[INT_MAN_WIDTH+DST_WIDTH:DST_WIDTH+1] = encoded_mant; - - // Detect overflows. Range for conversions to unsigned is larger by one. - if (input_exp >= signed'(fpnew_pkg::int_width(int_fmt_q) - 1 + op_mod_q)) begin - mant_shamt = '0; // prevent shifting - of_before_round = 1'b1; - // Handle underflows: all bits to the sticky. - end else if (input_exp < -1) begin - mant_shamt = DST_WIDTH + 1; // Limit shift range to - uf_before_round = 1'b1; - end - end - - // Mantissa shift - assign destination_mant = preshift_mant >> mant_shamt; - - // Extract final mantissa and round/sticky bits - always_comb begin : assemble_result - logic [DST_WIDTH-1:0] fmt_mant[fpnew_pkg::NUM_INT_FORMATS]; // integer value for each format - - // Assemble result for each format - for (int unsigned i = 0; i < fpnew_pkg::NUM_INT_FORMATS; i++) begin - if (IntFmtConfig[i]) begin // only active formats - logic [DST_WIDTH-1:0] sign_ext_mask; // mask for the locations of sign extension bits - logic sign_ext_val; // the sign extension bits for this format - logic [DST_WIDTH-1:0] sign_ext_vector; // full of sign extension bits - - // Default assignment: the result mantissa - fmt_mant[i] = destination_mant[INT_MAN_WIDTH+DST_WIDTH:INT_MAN_WIDTH+1]; - // Set up sign extension mask and value - sign_ext_mask = '1 << fpnew_pkg::int_width(fpnew_pkg::int_format_e'(i)); - sign_ext_val = fmt_mant[i][fpnew_pkg::int_width(fpnew_pkg::int_format_e'(i))-1]; - sign_ext_vector = '{default: sign_ext_val}; - - // Combine result and sign extension vectors - fmt_mant[i] = (sign_ext_mask & sign_ext_vector) | (~sign_ext_mask & fmt_mant[i]); - end - end - // Select result according to format chosen - final_mant = fmt_mant[int_fmt_q]; - end - - assign round_sticky_bits[1] = destination_mant[INT_MAN_WIDTH]; // rnd bit - assign round_sticky_bits[0] = (| {destination_mant[INT_MAN_WIDTH-1:0]}); // unused bits are sticky - - // ---------------------------- - // Rounding and classification - // ---------------------------- - logic [DST_WIDTH-1:0] pre_round_abs; // absolute value of result before rounding - - logic rounded_sign; - logic [DST_WIDTH-1:0] rounded_abs; // absolute value of result after rounding - logic res_zero; - - assign pre_round_abs = final_mant; - - fpnew_rounding #( - .AbsWidth ( DST_WIDTH ) - ) i_fpnew_rounding ( - .abs_value_i ( pre_round_abs ), - .sign_i ( operand_a.sign ), - .round_sticky_bits_i ( round_sticky_bits ), - .rnd_mode_i ( rnd_mode_q ), - .effective_subtraction_i ( 1'b0 ), // no operation happened - .abs_rounded_o ( rounded_abs ), - .sign_o ( rounded_sign ), - .exact_zero_o ( res_zero ) - ); - - // ---------------------- - // Special case handling - // ---------------------- - logic [DST_WIDTH-1:0] special_result; - fpnew_pkg::status_t special_status; - logic result_is_special; - - // We handle Inf, NaN, overflows and negative unsigned values separately - assign result_is_special = info_a.is_nan | info_a.is_inf | of_before_round | ~info_a.is_boxed | - (operand_a.sign & op_mod_q & ~res_zero); - - // All special cases are invalid - assign special_status = '{NV: 1'b1, default: 1'b0}; - - // Assemble result according to special case - always_comb begin : special_cases - logic [DST_WIDTH-1:0] fmt_special_result[fpnew_pkg::NUM_INT_FORMATS]; - - // Determine special result for each format separately - for (int unsigned i = 0; i < fpnew_pkg::NUM_INT_FORMATS; i++) begin - if (IntFmtConfig[i]) begin // only active formats - logic [DST_WIDTH-1:0] sign_ext_mask; // mask for the locations of sign extension bits - logic [DST_WIDTH-1:0] sign_ext_vector; // full of sign extension bits - // Default assignment: set all ones - fmt_special_result[i] = '1; - // MSB of special result depends on signed/unsigned - fmt_special_result[i][fpnew_pkg::int_width(fpnew_pkg::int_format_e'(i))-1] = op_mod_q; - - // In case of a negative result, flip the bits to procude -max or 0 - if (!info_a.is_nan && operand_a.sign) - fmt_special_result[i] = ~fmt_special_result[i]; - - // Set up sign extension mask and value - sign_ext_mask = '1 << fpnew_pkg::int_width(fpnew_pkg::int_format_e'(i)); - sign_ext_vector = '{default: op_mod_q ^ (~info_a.is_nan & operand_a.sign)}; - - // Combine result and sign extension bits - fmt_special_result[i] = (sign_ext_mask & sign_ext_vector) | - (~sign_ext_mask & fmt_special_result[i]); - - end - end - // Select result according to format - special_result = fmt_special_result[int_fmt_q]; - - end - - // ----------------- - // Result selection - // ----------------- - logic [DST_WIDTH-1:0] regular_result; - fpnew_pkg::status_t regular_status; - - // Invert regular result depending on sign - assign regular_result = rounded_sign ? unsigned'(-rounded_abs) : rounded_abs; - assign regular_status = '{NX: (| round_sticky_bits), default: 1'b0}; // only NX can be raised - - // Final results for output pipeline - logic [DST_WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - logic extension_bit; - - // Select output depending on special case detection - assign result_d = result_is_special ? special_result : regular_result; - assign status_d = result_is_special ? special_status : regular_status; - - // MSB of result decides extension - assign extension_bit = result_d[DST_WIDTH-1]; - - // ---------------- - // Output Pipeline - // ---------------- - // Generate pipeline at output if needed - if (PipeConfig!=fpnew_pkg::BEFORE) begin : output_pipline - fpnew_pipe_out #( - .Width ( DST_WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .TagType ( TagType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( extension_bit ), - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = extension_bit; - end - -endmodule diff --git a/src/fpnew_fma.sv b/src/fpnew_fma.sv index 256d7f36..f9fa813b 100644 --- a/src/fpnew_fma.sv +++ b/src/fpnew_fma.sv @@ -11,6 +11,8 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_fma #( parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), parameter int unsigned NumPipeRegs = 0, @@ -61,9 +63,25 @@ module fpnew_fma #( // Internal exponent width of FMA must accomodate all meaningful exponent values in order to avoid // datapath leakage. This is either given by the exponent bits or the width of the LZC result. // In most reasonable FP formats the internal exponent will be wider than the LZC result. - localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH); + localparam int unsigned EXP_WIDTH = unsigned'(fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH)); // Shift amount width: maximum internal mantissa size is 3p+3 bits localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3); + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise // ---------------- // Type definition @@ -77,71 +95,49 @@ module fpnew_fma #( // --------------- // Input pipeline // --------------- - // Pipelined input signals - logic [2:0][WIDTH-1:0] operands_q; - logic [2:0] is_boxed_q; - fpnew_pkg::roundmode_e rnd_mode_q; - fpnew_pkg::operation_e op_q; - logic op_mod_q; - TagType tag_q; - AuxType aux_q; - logic out_valid_input; - logic in_ready_inside; // written by inside pipeline - logic busy_input; - - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : input_pipeline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? (NumPipeRegs / 3) // Last to get regs - : NumPipeRegs; - fpnew_pipe_in #( - .Width ( WIDTH ), - .NumPipeRegs ( NUM_REGS ), - .NumOperands ( 3 ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i, - .rnd_mode_i, - .op_i, - .op_mod_i, - .src_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .dst_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .int_fmt_i ( fpnew_pkg::int_format_e'(0) ), // unused - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( is_boxed_q ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( op_q ), - .op_mod_o ( op_mod_q ), - .src_fmt_o ( /* unused */ ), - .dst_fmt_o ( /* unused */ ), - .int_fmt_o ( /* unused */ ), - .tag_o ( tag_q ), - .aux_o ( aux_q ), - .out_valid_o ( out_valid_input ), - .out_ready_i ( in_ready_inside ), - .busy_o ( busy_input ) - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign in_ready_o = in_ready_inside; - assign operands_q = operands_i; - assign is_boxed_q = is_boxed_i; - assign rnd_mode_q = rnd_mode_i; - assign op_q = op_i; - assign op_mod_q = op_mod_i; - assign tag_q = tag_i; - assign aux_q = aux_i; - assign out_valid_input = in_valid_i; - assign busy_input = 1'b0; + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][2:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // ----------------- @@ -154,9 +150,9 @@ module fpnew_fma #( .FpFormat ( FpFormat ), .NumOperands ( 3 ) ) i_class_inputs ( - .operands_i ( operands_q ), - .is_boxed_i ( is_boxed_q ), - .info_o ( info_q ) + .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ), + .info_o ( info_q ) ); fp_t operand_a, operand_b, operand_c; @@ -177,17 +173,17 @@ module fpnew_fma #( always_comb begin : op_select // Default assignments - packing-order-agnostic - operand_a = operands_q[0]; - operand_b = operands_q[1]; - operand_c = operands_q[2]; + operand_a = inp_pipe_operands_q[NUM_INP_REGS][0]; + operand_b = inp_pipe_operands_q[NUM_INP_REGS][1]; + operand_c = inp_pipe_operands_q[NUM_INP_REGS][2]; info_a = info_q[0]; info_b = info_q[1]; info_c = info_q[2]; // op_mod_q inverts sign of operand C - operand_c.sign = operand_c.sign ^ op_mod_q; + operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; - unique case (op_q) + unique case (inp_pipe_op_q[NUM_INP_REGS]) fpnew_pkg::FMADD: ; // do nothing fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product fpnew_pkg::ADD: begin // Set multiplicand to +1 @@ -357,121 +353,121 @@ module fpnew_fma #( assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift; assign inject_carry_in = effective_subtraction & ~sticky_before_add; - // --------------- - // Internal pipeline - // --------------- - // Pipelined internal signals - logic effective_subtraction_q; - logic signed [EXP_WIDTH-1:0] exponent_product_q; - logic signed [EXP_WIDTH-1:0] exponent_difference_q; - logic signed [EXP_WIDTH-1:0] tentative_exponent_q; - logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; - logic sticky_before_add_q; - fpnew_pkg::roundmode_e rnd_mode_q2; - fp_t special_result_q; - fpnew_pkg::status_t special_status_q; - logic result_is_special_q; - TagType tag_q2; - AuxType aux_q2; - logic out_valid_inside; - logic in_ready_output; // written by output pipeline - logic busy_inside; - // ------ // Adder // ------ logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry logic sum_carry; // observe carry bit from sum for sign fixing - logic [3*PRECISION_BITS+3:0] sum_d, sum_q; // discard carry as sum won't overflow - logic final_sign_d, final_sign_q; + logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow + logic final_sign; //Mantissa adder (ab+c). In normal addition, it cannot overflow. assign sum_raw = product_shifted + addend_shifted + inject_carry_in; assign sum_carry = sum_raw[3*PRECISION_BITS+4]; - // Complement negative sum (can only happen in subtraction -> overflows for positive results) - assign sum_d = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; + assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; // In case of a mispredicted subtraction result, do a sign flip - assign final_sign_d = (effective_subtraction && (sum_carry == tentative_sign)) + assign final_sign = (effective_subtraction && (sum_carry == tentative_sign)) ? 1'b1 : (effective_subtraction ? 1'b0 : tentative_sign); - // Generate pipeline between mul and add if needed - if (PipeConfig==fpnew_pkg::INSIDE || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : inside_pipeline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? ((NumPipeRegs + 2) / 3) // First to get regs - : NumPipeRegs; - - fpnew_pipe_inside_fma #( - .ExpWidth ( EXP_WIDTH ), - .PrecBits ( PRECISION_BITS ), - .NumPipeRegs ( NUM_REGS ), - .FpType ( fp_t ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_inside_pipe ( - .clk_i, - .rst_ni, - .effective_subtraction_i ( effective_subtraction ), - .final_sign_i ( final_sign_d ), - .exponent_product_i ( exponent_product ), - .exponent_difference_i ( exponent_difference ), - .tentative_exponent_i ( tentative_exponent ), - .addend_shamt_i ( addend_shamt ), - .sticky_before_add_i ( sticky_before_add ), - .sum_i ( sum_d ), - .rnd_mode_i ( rnd_mode_q ), - .dst_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .result_is_special_i ( result_is_special ), - .special_result_i ( special_result ), - .special_status_i ( special_status ), - .tag_i ( tag_q ), - .aux_i ( aux_q ), - .in_valid_i ( out_valid_input ), - .in_ready_o ( in_ready_inside ), - .flush_i, - .effective_subtraction_o ( effective_subtraction_q ), - .final_sign_o ( final_sign_q ), - .exponent_product_o ( exponent_product_q ), - .exponent_difference_o ( exponent_difference_q ), - .tentative_exponent_o ( tentative_exponent_q ), - .addend_shamt_o ( addend_shamt_q ), - .sticky_before_add_o ( sticky_before_add_q ), - .sum_o ( sum_q ), - .rnd_mode_o ( rnd_mode_q2 ), - .dst_fmt_o ( /* unused */ ), - .result_is_special_o ( result_is_special_q ), - .special_result_o ( special_result_q ), - .special_status_o ( special_status_q ), - .tag_o ( tag_q2 ), - .aux_o ( aux_q2 ), - .out_valid_o ( out_valid_inside ), - .out_ready_i ( in_ready_output ), - .busy_o ( busy_inside ) - ); - - // Otherwise pass through inputs - end else begin : no_inside_pipeline - assign in_ready_inside = in_ready_output; - assign effective_subtraction_q = effective_subtraction; - assign final_sign_q = final_sign_d; - assign exponent_product_q = exponent_product; - assign exponent_difference_q = exponent_difference; - assign tentative_exponent_q = tentative_exponent; - assign addend_shamt_q = addend_shamt; - assign sticky_before_add_q = sticky_before_add; - assign rnd_mode_q2 = rnd_mode_q; - assign result_is_special_q = result_is_special; - assign special_result_q = special_result; - assign special_status_q = special_status; - assign tag_q2 = tag_q; - assign aux_q2 = aux_q; - assign out_valid_inside = out_valid_input; - assign busy_inside = 1'b0; - assign sum_q = sum_d; + // --------------- + // Internal pipeline + // --------------- + // Pipeline output signals as non-arrays + logic effective_subtraction_q; + logic signed [EXP_WIDTH-1:0] exponent_product_q; + logic signed [EXP_WIDTH-1:0] exponent_difference_q; + logic signed [EXP_WIDTH-1:0] tentative_exponent_q; + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; + logic sticky_before_add_q; + logic [3*PRECISION_BITS+3:0] sum_q; + logic final_sign_q; + fpnew_pkg::roundmode_e rnd_mode_q; + logic result_is_special_q; + fp_t special_result_q; + fpnew_pkg::status_t special_status_q; + // Internal pipeline signals, index i holds signal after i register stages + logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q; + logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q; + logic [0:NUM_MID_REGS] mid_pipe_sticky_q; + logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q; + logic [0:NUM_MID_REGS] mid_pipe_final_sign_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; + fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; + fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_eff_sub_q[0] = effective_subtraction; + assign mid_pipe_exp_prod_q[0] = exponent_product; + assign mid_pipe_exp_diff_q[0] = exponent_difference; + assign mid_pipe_tent_exp_q[0] = tentative_exponent; + assign mid_pipe_add_shamt_q[0] = addend_shamt; + assign mid_pipe_sticky_q[0] = sticky_before_add; + assign mid_pipe_sum_q[0] = sum; + assign mid_pipe_final_sign_q[0] = final_sign; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_res_is_spec_q[0] = result_is_special; + assign mid_pipe_spec_res_q[0] = special_result; + assign mid_pipe_spec_stat_q[0] = special_status; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0) + `FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0) + `FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0) + `FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0) + `FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use + assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; + assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; + assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS]; + assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS]; + assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS]; + assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS]; + assign sum_q = mid_pipe_sum_q[NUM_MID_REGS]; + assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS]; + assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS]; + assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS]; // -------------- // Normalization @@ -517,7 +513,7 @@ module fpnew_fma #( // Subnormal result end else begin // Cap the shift distance to align mantissa with minimum exponent - norm_shamt = unsigned'(signed'(PRECISION_BITS + 2 + exponent_product_q)); + norm_shamt = unsigned'(signed'(PRECISION_BITS) + 2 + exponent_product_q); normalized_exponent = 0; // subnormals encoded as 0 end // Addend-anchored case @@ -579,7 +575,7 @@ module fpnew_fma #( // Assemble result before rounding. In case of overflow, the largest normal value is set. assign pre_round_sign = final_sign_q; - assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : final_exponent[EXP_BITS-1:0]; + assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : unsigned'(final_exponent[EXP_BITS-1:0]); assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[MAN_BITS:1]; // bit 0 is R bit assign pre_round_abs = {pre_round_exponent, pre_round_mantissa}; @@ -593,7 +589,7 @@ module fpnew_fma #( .abs_value_i ( pre_round_abs ), .sign_i ( pre_round_sign ), .round_sticky_bits_i ( round_sticky_bits ), - .rnd_mode_i ( rnd_mode_q2 ), + .rnd_mode_i ( rnd_mode_q ), .effective_subtraction_i ( effective_subtraction_q ), .abs_rounded_o ( rounded_abs ), .sign_o ( rounded_sign ), @@ -611,19 +607,16 @@ module fpnew_fma #( fpnew_pkg::status_t regular_status; // Assemble regular result - assign regular_result = {rounded_sign, rounded_abs}; - assign regular_status = '{ - NV: 1'b0, // only valid cases are handled in regular path - DZ: 1'b0, // no divisions - OF: of_before_round | of_after_round, // rounding can introduce new overflow - UF: uf_after_round & regular_status.NX, // only inexact results raise UF - NX: (| round_sticky_bits) | of_before_round | of_after_round // RS bits mean loss in precision - }; + assign regular_result = {rounded_sign, rounded_abs}; + assign regular_status.NV = 1'b0; // only valid cases are handled in regular path + assign regular_status.DZ = 1'b0; // no divisions + assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow + assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF + assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round; // Final results for output pipeline fp_t result_d; fpnew_pkg::status_t status_d; - logic busy_output; // Select output depending on special case detection assign result_d = result_is_special_q ? special_result_q : regular_result; @@ -632,51 +625,49 @@ module fpnew_fma #( // ---------------- // Output Pipeline // ---------------- - // Generate pipeline at output if needed - if (PipeConfig==fpnew_pkg::AFTER || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : output_pipline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? ((NumPipeRegs + 1) / 3) // Second to get regs - : NumPipeRegs; - fpnew_pipe_out #( - .Width ( WIDTH ), - .NumPipeRegs ( NUM_REGS ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( 1'b1 ), // always NaN-Box result - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .tag_i ( tag_q2 ), - .aux_i ( aux_q2 ), - .in_valid_i ( out_valid_inside ), - .in_ready_o ( in_ready_output ), - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o ( busy_output ) - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign in_ready_output = out_ready_i; - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = tag_q2; - assign aux_o = aux_q2; - assign out_valid_o = out_valid_inside; + // Output pipeline signals, index i holds signal after i register stages + fp_t [0:NUM_OUT_REGS] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - - assign busy_o = busy_input | busy_inside | busy_output; - + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_fma_multi.sv b/src/fpnew_fma_multi.sv index 26817f4a..6b52237f 100644 --- a/src/fpnew_fma_multi.sv +++ b/src/fpnew_fma_multi.sv @@ -11,6 +11,8 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_fma_multi #( parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, parameter int unsigned NumPipeRegs = 0, @@ -70,6 +72,22 @@ module fpnew_fma_multi #( localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(SUPER_EXP_BITS + 2, LZC_RESULT_WIDTH); // Shift amount width: maximum internal mantissa size is 3p+3 bits localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3); + // Pipelines + localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) // Second to get distributed regs + : 0); // no regs here otherwise + localparam NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) // Last to get distributed regs + : 0); // no regs here otherwise // ---------------- // Type definition @@ -83,77 +101,65 @@ module fpnew_fma_multi #( // --------------- // Input pipeline // --------------- - // Pipelined input signals - logic [2:0][WIDTH-1:0] operands_q; - logic [NUM_FORMATS-1:0][2:0] is_boxed_q; - fpnew_pkg::roundmode_e rnd_mode_q; - fpnew_pkg::operation_e op_q; - logic op_mod_q; - fpnew_pkg::fp_format_e src_fmt_q; - fpnew_pkg::fp_format_e dst_fmt_q; - TagType tag_q; - AuxType aux_q; - logic out_valid_input; - logic in_ready_inside; // written by inside pipeline - logic busy_input; - - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : input_pipeline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? (NumPipeRegs / 3) // Last to get regs - : NumPipeRegs; - fpnew_pipe_in #( - .Width ( WIDTH ), - .NumPipeRegs ( NUM_REGS ), - .NumOperands ( 3 ), - .NumFormats ( NUM_FORMATS ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i, - .rnd_mode_i, - .op_i, - .op_mod_i, - .src_fmt_i, - .dst_fmt_i, - .int_fmt_i ( fpnew_pkg::int_format_e'(0) ), // unused - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( is_boxed_q ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( op_q ), - .op_mod_o ( op_mod_q ), - .src_fmt_o ( src_fmt_q ), - .dst_fmt_o ( dst_fmt_q ), - .int_fmt_o ( /* unused */ ), - .tag_o ( tag_q ), - .aux_o ( aux_q ), - .out_valid_o ( out_valid_input ), - .out_ready_i ( in_ready_inside ), - .busy_o ( busy_input ) - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign in_ready_o = in_ready_inside; - assign operands_q = operands_i; - assign is_boxed_q = is_boxed_i; - assign rnd_mode_q = rnd_mode_i; - assign op_q = op_i; - assign op_mod_q = op_mod_i; - assign src_fmt_q = src_fmt_i; - assign dst_fmt_q = dst_fmt_i; - assign tag_q = tag_i; - assign aux_q = aux_i; - assign out_valid_input = in_valid_i; - assign busy_input = 1'b0; + // Selected pipeline output signals as non-arrays + logic [2:0][WIDTH-1:0] operands_q; + fpnew_pkg::fp_format_e src_fmt_q; + fpnew_pkg::fp_format_e dst_fmt_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][2:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][NUM_FORMATS-1:0][2:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_src_fmt_q[0] = src_fmt_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; // ----------------- // Input processing @@ -162,9 +168,6 @@ module fpnew_fma_multi #( logic signed [NUM_FORMATS-1:0][2:0][SUPER_EXP_BITS-1:0] fmt_exponent; logic [NUM_FORMATS-1:0][2:0][SUPER_MAN_BITS-1:0] fmt_mantissa; - logic [2:0][WIDTH-1:0] input_operands; - - fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][2:0] info_q; // FP Input initialization @@ -182,9 +185,9 @@ module fpnew_fma_multi #( .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), .NumOperands ( 3 ) ) i_fpnew_classifier ( - .operands_i ( trimmed_ops ), - .is_boxed_i ( is_boxed_q[fmt] ), - .info_o ( info_q[fmt] ) + .operands_i ( trimmed_ops ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS][fmt] ), + .info_o ( info_q[fmt] ) ); for (genvar op = 0; op < 3; op++) begin : gen_operands assign trimmed_ops[op] = operands_q[op][FP_WIDTH-1:0]; @@ -227,9 +230,9 @@ module fpnew_fma_multi #( info_c = info_q[dst_fmt_q][2]; // op_mod_q inverts sign of operand C - operand_c.sign = operand_c.sign ^ op_mod_q; + operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; - unique case (op_q) + unique case (inp_pipe_op_q[NUM_INP_REGS]) fpnew_pkg::FMADD: ; // do nothing fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product fpnew_pkg::ADD: begin // Set multiplicand to +1 @@ -431,127 +434,126 @@ module fpnew_fma_multi #( assign addend_shifted = (effective_subtraction) ? ~addend_after_shift : addend_after_shift; assign inject_carry_in = effective_subtraction & ~sticky_before_add; - // --------------- - // Internal pipeline - // --------------- - // Pipelined internal signals - logic effective_subtraction_q; - logic signed [EXP_WIDTH-1:0] exponent_product_q; - logic signed [EXP_WIDTH-1:0] exponent_difference_q; - logic signed [EXP_WIDTH-1:0] tentative_exponent_q; - logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; - logic sticky_before_add_q; - logic [3*PRECISION_BITS+3:0] product_shifted_q; - logic [3*PRECISION_BITS+3:0] addend_shifted_q; - logic inject_carry_in_q; - fpnew_pkg::roundmode_e rnd_mode_q2; - fpnew_pkg::fp_format_e dst_fmt_q2; - fp_t special_result_q; - fpnew_pkg::status_t special_status_q; - logic result_is_special_q; - TagType tag_q2; - AuxType aux_q2; - logic out_valid_inside; - logic in_ready_output; // written by output pipeline - logic busy_inside; - // ------ // Adder // ------ logic [3*PRECISION_BITS+4:0] sum_raw; // added one bit for the carry logic sum_carry; // observe carry bit from sum for sign fixing - logic [3*PRECISION_BITS+3:0] sum_d, sum_q; // discard carry as sum won't overflow - logic final_sign_d, final_sign_q; + logic [3*PRECISION_BITS+3:0] sum; // discard carry as sum won't overflow + logic final_sign; //Mantissa adder (ab+c). In normal addition, it cannot overflow. assign sum_raw = product_shifted + addend_shifted + inject_carry_in; assign sum_carry = sum_raw[3*PRECISION_BITS+4]; - // Complement negative sum (can only happen in subtraction -> overflows for positive results) - assign sum_d = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; + assign sum = (effective_subtraction && ~sum_carry) ? -sum_raw : sum_raw; // In case of a mispredicted subtraction result, do a sign flip - assign final_sign_d = (effective_subtraction && (sum_carry == tentative_sign)) + assign final_sign = (effective_subtraction && (sum_carry == tentative_sign)) ? 1'b1 : (effective_subtraction ? 1'b0 : tentative_sign); - - // Generate pipeline between mul and add if needed - if (PipeConfig==fpnew_pkg::INSIDE || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : inside_pipeline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? ((NumPipeRegs + 2) / 3) // First to get regs - : NumPipeRegs; - - fpnew_pipe_inside_fma #( - .ExpWidth ( EXP_WIDTH ), - .PrecBits ( PRECISION_BITS ), - .NumPipeRegs ( NUM_REGS ), - .FpType ( fp_t ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_inside_pipe ( - .clk_i, - .rst_ni, - .effective_subtraction_i ( effective_subtraction ), - .final_sign_i ( final_sign_d ), - .exponent_product_i ( exponent_product ), - .exponent_difference_i ( exponent_difference ), - .tentative_exponent_i ( tentative_exponent ), - .addend_shamt_i ( addend_shamt ), - .sticky_before_add_i ( sticky_before_add ), - .sum_i ( sum_d ), - .rnd_mode_i ( rnd_mode_q ), - .dst_fmt_i ( dst_fmt_q ), // unused - .result_is_special_i ( result_is_special ), - .special_result_i ( special_result ), - .special_status_i ( special_status ), - .tag_i ( tag_q ), - .aux_i ( aux_q ), - .in_valid_i ( out_valid_input ), - .in_ready_o ( in_ready_inside ), - .flush_i, - .effective_subtraction_o ( effective_subtraction_q ), - .final_sign_o ( final_sign_q ), - .exponent_product_o ( exponent_product_q ), - .exponent_difference_o ( exponent_difference_q ), - .tentative_exponent_o ( tentative_exponent_q ), - .addend_shamt_o ( addend_shamt_q ), - .sticky_before_add_o ( sticky_before_add_q ), - .sum_o ( sum_q ), - .rnd_mode_o ( rnd_mode_q2 ), - .dst_fmt_o ( dst_fmt_q2 ), - .result_is_special_o ( result_is_special_q ), - .special_result_o ( special_result_q ), - .special_status_o ( special_status_q ), - .tag_o ( tag_q2 ), - .aux_o ( aux_q2 ), - .out_valid_o ( out_valid_inside ), - .out_ready_i ( in_ready_output ), - .busy_o ( busy_inside ) - ); - - // Otherwise pass through inputs - end else begin : no_inside_pipeline - assign in_ready_inside = in_ready_output; - assign effective_subtraction_q = effective_subtraction; - assign final_sign_q = final_sign_d; - assign exponent_product_q = exponent_product; - assign exponent_difference_q = exponent_difference; - assign tentative_exponent_q = tentative_exponent; - assign addend_shamt_q = addend_shamt; - assign sticky_before_add_q = sticky_before_add; - assign rnd_mode_q2 = rnd_mode_q; - assign dst_fmt_q2 = dst_fmt_q; - assign result_is_special_q = result_is_special; - assign special_result_q = special_result; - assign special_status_q = special_status; - assign tag_q2 = tag_q; - assign aux_q2 = aux_q; - assign out_valid_inside = out_valid_input; - assign busy_inside = 1'b0; - assign sum_q = sum_d; + // --------------- + // Internal pipeline + // --------------- + // Pipeline output signals as non-arrays + logic effective_subtraction_q; + logic signed [EXP_WIDTH-1:0] exponent_product_q; + logic signed [EXP_WIDTH-1:0] exponent_difference_q; + logic signed [EXP_WIDTH-1:0] tentative_exponent_q; + logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; + logic sticky_before_add_q; + logic [3*PRECISION_BITS+3:0] sum_q; + logic final_sign_q; + fpnew_pkg::fp_format_e dst_fmt_q2; + fpnew_pkg::roundmode_e rnd_mode_q; + logic result_is_special_q; + fp_t special_result_q; + fpnew_pkg::status_t special_status_q; + // Internal pipeline signals, index i holds signal after i register stages + logic [0:NUM_MID_REGS] mid_pipe_eff_sub_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_prod_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_exp_diff_q; + logic signed [0:NUM_MID_REGS][EXP_WIDTH-1:0] mid_pipe_tent_exp_q; + logic [0:NUM_MID_REGS][SHIFT_AMOUNT_WIDTH-1:0] mid_pipe_add_shamt_q; + logic [0:NUM_MID_REGS] mid_pipe_sticky_q; + logic [0:NUM_MID_REGS][3*PRECISION_BITS+3:0] mid_pipe_sum_q; + logic [0:NUM_MID_REGS] mid_pipe_final_sign_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; + logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; + fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; + fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_eff_sub_q[0] = effective_subtraction; + assign mid_pipe_exp_prod_q[0] = exponent_product; + assign mid_pipe_exp_diff_q[0] = exponent_difference; + assign mid_pipe_tent_exp_q[0] = tentative_exponent; + assign mid_pipe_add_shamt_q[0] = addend_shamt; + assign mid_pipe_sticky_q[0] = sticky_before_add; + assign mid_pipe_sum_q[0] = sum; + assign mid_pipe_final_sign_q[0] = final_sign; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; + assign mid_pipe_res_is_spec_q[0] = result_is_special; + assign mid_pipe_spec_res_q[0] = special_result; + assign mid_pipe_spec_stat_q[0] = special_status; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) + `FFL(mid_pipe_exp_diff_q[i+1], mid_pipe_exp_diff_q[i], reg_ena, '0) + `FFL(mid_pipe_tent_exp_q[i+1], mid_pipe_tent_exp_q[i], reg_ena, '0) + `FFL(mid_pipe_add_shamt_q[i+1], mid_pipe_add_shamt_q[i], reg_ena, '0) + `FFL(mid_pipe_sticky_q[i+1], mid_pipe_sticky_q[i], reg_ena, '0) + `FFL(mid_pipe_sum_q[i+1], mid_pipe_sum_q[i], reg_ena, '0) + `FFL(mid_pipe_final_sign_q[i+1], mid_pipe_final_sign_q[i], reg_ena, '0) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use + assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; + assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; + assign exponent_difference_q = mid_pipe_exp_diff_q[NUM_MID_REGS]; + assign tentative_exponent_q = mid_pipe_tent_exp_q[NUM_MID_REGS]; + assign addend_shamt_q = mid_pipe_add_shamt_q[NUM_MID_REGS]; + assign sticky_before_add_q = mid_pipe_sticky_q[NUM_MID_REGS]; + assign sum_q = mid_pipe_sum_q[NUM_MID_REGS]; + assign final_sign_q = mid_pipe_final_sign_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS]; + assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS]; + assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS]; + assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS]; // -------------- // Normalization @@ -708,7 +710,7 @@ module fpnew_fma_multi #( .abs_value_i ( pre_round_abs ), .sign_i ( pre_round_sign ), .round_sticky_bits_i ( round_sticky_bits ), - .rnd_mode_i ( rnd_mode_q2 ), + .rnd_mode_i ( rnd_mode_q ), .effective_subtraction_i ( effective_subtraction_q ), .abs_rounded_o ( rounded_abs ), .sign_o ( rounded_sign ), @@ -753,18 +755,15 @@ module fpnew_fma_multi #( // Assemble regular result assign regular_result = fmt_result[dst_fmt_q2]; - assign regular_status = '{ - NV: 1'b0, // only valid cases are handled in regular path - DZ: 1'b0, // no divisions - OF: of_before_round | of_after_round, // rounding can introduce new overflow - UF: uf_after_round & regular_status.NX, // only inexact results raise UF - NX: (| round_sticky_bits) | of_before_round | of_after_round // RS bits mean loss in precision - }; + assign regular_status.NV = 1'b0; // only valid cases are handled in regular path + assign regular_status.DZ = 1'b0; // no divisions + assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow + assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF + assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round; // Final results for output pipeline logic [WIDTH-1:0] result_d; fpnew_pkg::status_t status_d; - logic busy_output; // Select output depending on special case detection assign result_d = result_is_special_q ? special_result_q : regular_result; @@ -773,51 +772,49 @@ module fpnew_fma_multi #( // ---------------- // Output Pipeline // ---------------- - // Generate pipeline at output if needed - if (PipeConfig==fpnew_pkg::AFTER || PipeConfig==fpnew_pkg::DISTRIBUTED) begin : output_pipline - localparam NUM_REGS = PipeConfig==fpnew_pkg::DISTRIBUTED - ? ((NumPipeRegs + 1) / 3) // Second to get regs - : NumPipeRegs; - fpnew_pipe_out #( - .Width ( WIDTH ), - .NumPipeRegs ( NUM_REGS ), - .TagType ( TagType ), - .AuxType ( AuxType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( 1'b1 ), // always NaN-Box result - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .tag_i ( tag_q2 ), - .aux_i ( aux_q2 ), - .in_valid_i ( out_valid_inside ), - .in_ready_o ( in_ready_output ), - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o ( busy_output ) - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign in_ready_output = out_ready_i; - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = tag_q2; - assign aux_o = aux_q2; - assign out_valid_o = out_valid_inside; + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - - assign busy_o = busy_input | busy_inside | busy_output; - + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_i2fcast.sv b/src/fpnew_i2fcast.sv deleted file mode 100644 index 56176cdd..00000000 --- a/src/fpnew_i2fcast.sv +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright 2019 ETH Zurich and University of Bologna. -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Stefan Mach - -module fpnew_i2fcast #( - parameter fpnew_pkg::fp_format_e DstFpFormat = fpnew_pkg::fp_format_e'(0), - parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, - parameter int unsigned NumPipeRegs = 0, - parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, - - localparam int unsigned SRC_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig), // do not change - localparam int unsigned DST_WIDTH = fpnew_pkg::fp_width(DstFpFormat) // do not change -) ( - input logic clk_i, - input logic rst_ni, - // Input signals - input logic [SRC_WIDTH-1:0] operands_i, // 1 operand - input fpnew_pkg::roundmode_e rnd_mode_i, - input logic op_mod_i, - input fpnew_pkg::int_format_e int_fmt_i, - input TagType tag_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, - // Output signals - output logic [DST_WIDTH-1:0] result_o, - output fpnew_pkg::status_t status_o, - output logic extension_bit_o, - output TagType tag_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o -); - - // ---------- - // Constants - // ---------- - localparam int DST_EXP_BITS = fpnew_pkg::exp_bits(DstFpFormat); - localparam int DST_MAN_BITS = fpnew_pkg::man_bits(DstFpFormat); - localparam int DST_BIAS = fpnew_pkg::bias(DstFpFormat); - - // The internal mantissa contains the normal and RS bits and must also be able to hold the integer - localparam int unsigned INT_MAN_WIDTH = fpnew_pkg::maximum(DST_MAN_BITS + 3, SRC_WIDTH); - - // There is a LZC for normalization - localparam int unsigned LZC_RESULT_WIDTH = $clog2(SRC_WIDTH); - - // The internal exponent is signed and must be wide enough to hold readjustment shift - localparam int unsigned INT_EXP_WIDTH = - fpnew_pkg::maximum(DST_EXP_BITS, LZC_RESULT_WIDTH) + 1; // +1 for signed - - // ---------------- - // Type definition - // ---------------- - typedef struct packed { - logic sign; - logic [DST_EXP_BITS-1:0] exponent; - logic [DST_MAN_BITS-1:0] mantissa; - } dst_fp_t; - - // --------------- - // Input pipeline - // --------------- - // Pipelined input signals - logic [SRC_WIDTH-1:0] operands_q; - fpnew_pkg::roundmode_e rnd_mode_q; - logic op_mod_q; - fpnew_pkg::int_format_e int_fmt_q; - - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE) begin : input_pipeline - fpnew_pipe_in #( - .Width ( SRC_WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .NumOperands ( 1 ), - .TagType ( TagType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i ( '{default: fpnew_pkg::DONT_CARE} ), // unused - .rnd_mode_i, - .op_i ( fpnew_pkg::FMADD ), // unused - .op_mod_i, - .src_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .dst_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .int_fmt_i, - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( /* unused */ ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( /* unused */ ), - .op_mod_o ( op_mod_q ), - .src_fmt_o ( /* unused */ ), - .dst_fmt_o ( /* unused */ ), - .int_fmt_o ( int_fmt_q ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign operands_q = operands_i; - assign rnd_mode_q = rnd_mode_i; - assign op_mod_q = op_mod_i; - assign int_fmt_q = int_fmt_i; - end - - // ----------------- - // Input processing - // ----------------- - logic [SRC_WIDTH-1:0] operand_a; - logic [SRC_WIDTH-1:0] input_val; - logic input_sign; - logic [SRC_WIDTH-1:0] input_mag; - - assign operand_a = operands_q; - - // Sign-extend input value - always_comb begin : sign_ext_input - logic [SRC_WIDTH-1:0] fmt_input_val[fpnew_pkg::NUM_INT_FORMATS]; // per-format input value - - // sign-extend input for each format - for (int unsigned i = 0; i < fpnew_pkg::NUM_INT_FORMATS; i++) begin - if (IntFmtConfig[i]) begin // only active formats - logic [SRC_WIDTH-1:0] sign_ext_mask; // mask for the locations of sign extension bits - logic sign_ext_val; // the sign extension bits for this format - logic [SRC_WIDTH-1:0] sign_ext_vector; // full of sign extension bits - - // Set up sign extension mask and value - sign_ext_mask = '1 << fpnew_pkg::int_width(fpnew_pkg::int_format_e'(i)); - sign_ext_val = operand_a[fpnew_pkg::int_width(fpnew_pkg::int_format_e'(i))-1]; - sign_ext_vector = '{default: sign_ext_val & ~op_mod_q}; // only for signed casts - - // Combine result and sign extension vectors - fmt_input_val[i] = (sign_ext_mask & sign_ext_vector) | (~sign_ext_mask & operand_a); - end - end - - // Select input according to format chosen - input_val = fmt_input_val[int_fmt_q]; - end - - // Get the sign for signed casts - assign input_sign = input_val[SRC_WIDTH-1] & ~op_mod_q; - // Obtain the input's magnitude - assign input_mag = input_sign ? unsigned'(-input_val) : input_val; - - // -------- - // Casting - // -------- - logic signed [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination - logic [DST_EXP_BITS-1:0] final_exp; // after eventual adjustments - - logic [INT_MAN_WIDTH-1:0] preshift_mant; // mantissa before normalization shift - logic [INT_MAN_WIDTH-1:0] destination_mant; // mantissa from shifter - logic [DST_MAN_BITS-1:0] final_mant; // mantissa after adjustments - - logic [LZC_RESULT_WIDTH-1:0] norm_shamt; // shift amount for denormalization - logic signed [LZC_RESULT_WIDTH:0] norm_shamt_sgn; // shift amount in signed form - - logic result_zero; - - logic [1:0] round_sticky_bits; - logic of_before_round; - - // Leading-zero counter is needed for normalization - lzc #( - .WIDTH ( SRC_WIDTH ), - .MODE ( 1 ) // MODE = 1 counts leading zeroes - ) i_lzc ( - .in_i ( input_mag ), - .cnt_o ( norm_shamt ), - .empty_o ( result_zero ) - ); - assign norm_shamt_sgn = signed'({1'b0, norm_shamt}); - - // Place integer to the left of the shifter space (only matters if DST_MAN_BITS > SRC_WIDTH) - assign preshift_mant = input_mag << (INT_MAN_WIDTH - SRC_WIDTH); // constant shift - // Perform the Normalization shift - assign destination_mant = preshift_mant << norm_shamt; - - // Exponent is calculated from source width and the leading zeroes, bias is added - assign destination_exp = signed'(SRC_WIDTH - 1 - norm_shamt_sgn + DST_BIAS); - - // Handle the only special case we have: OF - always_comb begin : detect_overflow - // Default assignment - final_exp = unsigned'(destination_exp); // take exponent as is - final_mant = destination_mant[INT_MAN_WIDTH-2 -: DST_MAN_BITS]; - round_sticky_bits[1] = destination_mant[INT_MAN_WIDTH-DST_MAN_BITS-2]; - round_sticky_bits[0] = (| {destination_mant[INT_MAN_WIDTH-DST_MAN_BITS-3:0]}); // reduce sticky - of_before_round = 1'b0; - - // Properly set the exponent for zeroes - if (result_zero) begin - final_exp = '0; - // Handle overflows or infinities (for proper rounding) - end else if (destination_exp >= 2**DST_EXP_BITS-1) begin - final_exp = unsigned'(2**DST_EXP_BITS-2); // largest normal value - final_mant = '1; // largest normal value and RS bits set - round_sticky_bits = '1; // RS set - of_before_round = 1'b1; - end - end - - // ---------------------------- - // Rounding and classification - // ---------------------------- - logic [DST_EXP_BITS+DST_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding - - logic of_after_round; // overflow - - logic [DST_EXP_BITS+DST_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding - logic rounded_sign; - - assign pre_round_abs = {final_exp, final_mant}; - - // Rounding is only needed if mantissa smaller than integer or integer has larger range - if ((DST_MAN_BITS + 1 < SRC_WIDTH) || (SRC_WIDTH > DST_BIAS)) begin : gen_rounding - - fpnew_rounding #( - .AbsWidth ( DST_WIDTH - 1 ) - ) i_fpnew_rounding ( - .abs_value_i ( pre_round_abs ), - .sign_i ( input_sign ), - .round_sticky_bits_i ( round_sticky_bits ), - .rnd_mode_i ( rnd_mode_q ), - .effective_subtraction_i ( 1'b0 ), // no operation happened - .abs_rounded_o ( rounded_abs ), - .sign_o ( rounded_sign ), - .exact_zero_o ( /* unused */ ) - ); - - end else begin : no_rounding - assign rounded_abs = pre_round_abs; - assign rounded_sign = input_sign; - end - - // Classification after rounding - assign of_after_round = rounded_abs[DST_EXP_BITS+DST_MAN_BITS-1:DST_MAN_BITS] == '1; // inf exp. - - // ----------------- - // Result selection - // ----------------- - // Final results for output pipeline - logic [DST_WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - - // Assemble final result - assign result_d = {rounded_sign, rounded_abs}; - assign status_d = '{ - NV: of_before_round | of_after_round, // Overflowing values are invalid for casts - DZ: 1'b0, // no divisions - OF: 1'b0, // no overflow per se => it's invalid to cast too large integers - UF: 1'b0, // no underflow - NX: (| round_sticky_bits) // RS bits mean loss in precision - }; - - // ---------------- - // Output Pipeline - // ---------------- - // Generate pipeline at output if needed - if (PipeConfig!=fpnew_pkg::BEFORE) begin : output_pipline - fpnew_pipe_out #( - .Width ( DST_WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .TagType ( TagType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( 1'b1 ), // always NaN-Box result - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = 1'b1; // always NaN-Box result - end - -endmodule diff --git a/src/fpnew_noncomp.sv b/src/fpnew_noncomp.sv index af23b8c8..9e485f9e 100644 --- a/src/fpnew_noncomp.sv +++ b/src/fpnew_noncomp.sv @@ -11,6 +11,8 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_noncomp #( parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), parameter int unsigned NumPipeRegs = 0, @@ -54,6 +56,17 @@ module fpnew_noncomp #( // ---------- localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(FpFormat); localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(FpFormat); + // Pipelines + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // no regs here otherwise // ---------------- // Type definition @@ -67,57 +80,49 @@ module fpnew_noncomp #( // --------------- // Input pipeline // --------------- - // Pipelined input signals - logic [1:0][WIDTH-1:0] operands_q; - logic [1:0] is_boxed_q; - fpnew_pkg::roundmode_e rnd_mode_q; - fpnew_pkg::operation_e op_q; - logic op_mod_q; - - // Generate pipeline at input if needed - if (PipeConfig==fpnew_pkg::BEFORE) begin : input_pipeline - fpnew_pipe_in #( - .Width ( WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .NumOperands ( 2 ), - .TagType ( TagType ) - ) i_input_pipe ( - .clk_i, - .rst_ni, - .operands_i, - .is_boxed_i, - .rnd_mode_i, - .op_i, - .op_mod_i, - .src_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .dst_fmt_i ( fpnew_pkg::fp_format_e'(0) ), // unused - .int_fmt_i ( fpnew_pkg::int_format_e'(0) ), // unused - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .operands_o ( operands_q ), - .is_boxed_o ( is_boxed_q ), - .rnd_mode_o ( rnd_mode_q ), - .op_o ( op_q ), - .op_mod_o ( op_mod_q ), - .src_fmt_o ( /* unused */ ), - .dst_fmt_o ( /* unused */ ), - .int_fmt_o ( /* unused */ ), - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through inputs - end else begin : no_input_pipeline - assign operands_q = operands_i; - assign is_boxed_q = is_boxed_i; - assign rnd_mode_q = rnd_mode_i; - assign op_q = op_i; - assign op_mod_q = op_mod_i; + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; + logic [0:NUM_INP_REGS][1:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to updtream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // --------------------- @@ -130,17 +135,17 @@ module fpnew_noncomp #( .FpFormat ( FpFormat ), .NumOperands ( 2 ) ) i_class_a ( - .operands_i ( operands_q ), - .is_boxed_i ( is_boxed_q ), - .info_o ( info_q ) + .operands_i ( inp_pipe_operands_q[NUM_INP_REGS] ), + .is_boxed_i ( inp_pipe_is_boxed_q[NUM_INP_REGS] ), + .info_o ( info_q ) ); fp_t operand_a, operand_b; fpnew_pkg::fp_info_t info_a, info_b; - // Packing-order-agnostic assignments - assign operand_a = operands_q[0]; - assign operand_b = operands_q[1]; + // Packing-order-agnostic assignments + assign operand_a = inp_pipe_operands_q[NUM_INP_REGS][0]; + assign operand_b = inp_pipe_operands_q[NUM_INP_REGS][1]; assign info_a = info_q[0]; assign info_b = info_q[1]; @@ -182,7 +187,7 @@ module fpnew_noncomp #( sign_b = operand_b.sign & info_b.is_boxed; // Do the sign injection based on rm field - unique case (rnd_mode_q) + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) fpnew_pkg::RNE: sgnj_result.sign = sign_b; // SGNJ fpnew_pkg::RTZ: sgnj_result.sign = ~sign_b; // SGNJN fpnew_pkg::RDN: sgnj_result.sign = sign_a ^ sign_b; // SGNJX @@ -193,8 +198,8 @@ module fpnew_noncomp #( assign sgnj_status = '0; // sign injections never raise exceptions - // op_mod_q enables sign-extension of result (for storing to integer regfile) - assign sgnj_extension_bit = op_mod_q ? sgnj_result.sign : 1'b1; // NaN-box regular float results + // op_mod_q enables integer sign-extension of result (for storing to integer regfile) + assign sgnj_extension_bit = inp_pipe_op_mod_q[NUM_INP_REGS] ? sgnj_result.sign : 1'b1; // ------------------ // Minimum / Maximum @@ -220,7 +225,7 @@ module fpnew_noncomp #( else if (info_b.is_nan) minmax_result = operand_a; // Otherwise decide according to the operation else begin - unique case (rnd_mode_q) + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) fpnew_pkg::RNE: minmax_result = operand_a_smaller ? operand_a : operand_b; // MIN fpnew_pkg::RTZ: minmax_result = operand_a_smaller ? operand_b : operand_a; // MAX default: minmax_result = '{default: fpnew_pkg::DONT_CARE}; // don't care @@ -249,18 +254,18 @@ module fpnew_noncomp #( if (signalling_nan) cmp_status.NV = 1'b1; // invalid operation // Otherwise do comparisons else begin - unique case (rnd_mode_q) + unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) fpnew_pkg::RNE: begin // Less than or equal if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid - else cmp_result = (operand_a_smaller | operands_equal) ^ op_mod_q; + else cmp_result = (operand_a_smaller | operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS]; end fpnew_pkg::RTZ: begin // Less than if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid - else cmp_result = (operand_a_smaller & ~operands_equal) ^ op_mod_q; // -0 = +0, not less + else cmp_result = (operand_a_smaller & ~operands_equal) ^ inp_pipe_op_mod_q[NUM_INP_REGS]; end fpnew_pkg::RDN: begin // Equal - if (any_operand_nan) cmp_result = op_mod_q; // NaNs are valid, always campare as not equal - else cmp_result = operands_equal ^ op_mod_q; + if (any_operand_nan) cmp_result = inp_pipe_op_mod_q[NUM_INP_REGS]; // NaN always not equal + else cmp_result = operands_equal ^ inp_pipe_op_mod_q[NUM_INP_REGS]; end default: cmp_result = '{default: fpnew_pkg::DONT_CARE}; // don't care endcase @@ -306,7 +311,7 @@ module fpnew_noncomp #( // Select result always_comb begin : select_result - unique case (op_q) + unique case (inp_pipe_op_q[NUM_INP_REGS]) fpnew_pkg::SGNJ: begin result_d = sgnj_result; status_d = sgnj_status; @@ -335,48 +340,65 @@ module fpnew_noncomp #( endcase end - assign is_class_d = (op_q == fpnew_pkg::CLASSIFY); + assign is_class_d = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::CLASSIFY); // ---------------- // Output Pipeline // ---------------- - // Generate pipeline at output if needed - if (PipeConfig!=fpnew_pkg::BEFORE) begin : output_pipline - fpnew_pipe_out #( - .Width ( WIDTH ), - .NumPipeRegs ( NumPipeRegs ), - .TagType ( TagType ) - ) i_output_pipe ( - .clk_i, - .rst_ni, - .result_i ( result_d ), - .status_i ( status_d ), - .extension_bit_i ( extension_bit_d ), - .class_mask_i ( class_mask_d ), - .is_class_i ( is_class_d ), - .tag_i, - .aux_i, - .in_valid_i, - .in_ready_o, - .flush_i, - .result_o, - .status_o, - .extension_bit_o, - .class_mask_o, - .is_class_o, - .tag_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o - ); - // Otherwise pass through outputs - end else begin : no_output_pipeline - assign result_o = result_d; - assign status_o = status_d; - assign extension_bit_o = extension_bit_d; - assign class_mask_o = class_mask_d; - assign is_class_o = is_class_d; + // Output pipeline signals, index i holds signal after i register stages + fp_t [0:NUM_OUT_REGS] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + logic [0:NUM_OUT_REGS] out_pipe_extension_bit_q; + fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q; + logic [0:NUM_OUT_REGS] out_pipe_is_class_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_extension_bit_q[0] = extension_bit_d; + assign out_pipe_class_mask_q[0] = class_mask_d; + assign out_pipe_is_class_q[0] = is_class_d; + assign out_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign out_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign out_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0) + `FFL(out_pipe_class_mask_q[i+1], out_pipe_class_mask_q[i], reg_ena, fpnew_pkg::QNAN) + `FFL(out_pipe_is_class_q[i+1], out_pipe_is_class_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS]; + assign class_mask_o = out_pipe_class_mask_q[NUM_OUT_REGS]; + assign is_class_o = out_pipe_is_class_q[NUM_OUT_REGS]; + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_opgroup_block.sv b/src/fpnew_opgroup_block.sv index d5f71a69..b9daeeb7 100644 --- a/src/fpnew_opgroup_block.sv +++ b/src/fpnew_opgroup_block.sv @@ -79,9 +79,9 @@ module fpnew_opgroup_block #( // ------------------------- for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_parallel_slices // Some constants for this format - localparam logic ANY_MERGED = fpnew_pkg::any_enabled_multi(FmtUnitTypes); + localparam logic ANY_MERGED = fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask); localparam logic IS_FIRST_MERGED = - fpnew_pkg::is_first_enabled_multi(fpnew_pkg::fp_format_e'(fmt), FmtUnitTypes); + fpnew_pkg::is_first_enabled_multi(fpnew_pkg::fp_format_e'(fmt), FmtUnitTypes, FpFmtMask); // Generate slice only if format enabled if (FpFmtMask[fmt] && (FmtUnitTypes[fmt] == fpnew_pkg::PARALLEL)) begin : active_format @@ -91,13 +91,13 @@ module fpnew_opgroup_block #( assign in_valid = in_valid_i & (dst_fmt_i == fmt); // enable selected format fpnew_opgroup_fmt_slice #( - .OpGroup ( OpGroup ), - .FpFormat ( fmt ), - .Width ( Width ), - .EnableVectors ( EnableVectors ), - .NumPipeRegs ( FmtPipeRegs[fmt] ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ) + .OpGroup ( OpGroup ), + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .Width ( Width ), + .EnableVectors ( EnableVectors ), + .NumPipeRegs ( FmtPipeRegs[fmt] ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ) ) i_fmt_slice ( .clk_i, .rst_ni, @@ -123,12 +123,16 @@ module fpnew_opgroup_block #( end else if (FpFmtMask[fmt] && ANY_MERGED && !IS_FIRST_MERGED) begin : merged_unused // Ready is split up into formats - assign fmt_in_ready[fmt] = fmt_in_ready[fpnew_pkg::get_first_enabled_multi(FmtUnitTypes)]; + assign fmt_in_ready[fmt] = fmt_in_ready[fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, + FpFmtMask)]; assign fmt_out_valid[fmt] = 1'b0; // don't emit values assign fmt_busy[fmt] = 1'b0; // never busy // Outputs are don't care - assign fmt_outputs[fmt] = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE; + assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE); // Tie off disabled formats end else if (!FpFmtMask[fmt] || (FmtUnitTypes[fmt] == fpnew_pkg::DISABLED)) begin : disable_fmt @@ -136,16 +140,19 @@ module fpnew_opgroup_block #( assign fmt_out_valid[fmt] = 1'b0; // don't emit values assign fmt_busy[fmt] = 1'b0; // never busy // Outputs are don't care - assign fmt_outputs[fmt] = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].result = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE; + assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE); end end // ---------------------- // Generate Merged Slice // ---------------------- - if (fpnew_pkg::any_enabled_multi(FmtUnitTypes)) begin : gen_merged_slice + if (fpnew_pkg::any_enabled_multi(FmtUnitTypes, FpFmtMask)) begin : gen_merged_slice - localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes); + localparam FMT = fpnew_pkg::get_first_enabled_multi(FmtUnitTypes, FpFmtMask); logic in_valid; diff --git a/src/fpnew_opgroup_fmt_slice.sv b/src/fpnew_opgroup_fmt_slice.sv index 80ddb0db..fda2a57f 100644 --- a/src/fpnew_opgroup_fmt_slice.sv +++ b/src/fpnew_opgroup_fmt_slice.sv @@ -126,7 +126,8 @@ module fpnew_opgroup_fmt_slice #( .out_ready_i ( out_ready ), .busy_o ( lane_busy[lane] ) ); - assign lane_is_class[lane] = 1'b0; + assign lane_is_class[lane] = 1'b0; + assign lane_class_mask[lane] = fpnew_pkg::NEGINF; end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance // fpnew_divsqrt #( // .FpFormat (FpFormat), @@ -245,11 +246,14 @@ module fpnew_opgroup_fmt_slice #( localparam int unsigned CLASS_VEC_BITS = (NUM_LANES*8 > Width) ? 8 * (Width / 8) : NUM_LANES*8; - localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1; + // Pad out unused vec_class bits + if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class + assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0; + end + + // localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1; - assign slice_class_result = result_is_vector - ? slice_vec_class_result & CLASS_VEC_MASK - : lane_class_mask[0]; // Scalar classification block + assign slice_class_result = result_is_vector ? slice_vec_class_result : lane_class_mask[0]; // Select the proper result assign result_o = result_is_class ? slice_class_result : slice_regular_result; diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index 597e9dc0..14c1b453 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -11,6 +11,8 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_opgroup_multifmt_slice #( parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::CONV, parameter int unsigned Width = 64, @@ -338,45 +340,49 @@ module fpnew_opgroup_multifmt_slice #( assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]}; end + // Mute int results if unused + for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : int_results_disabled + if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result + assign ifmt_slice_result[ifmt] = '0; + end + end + // Bypass lanes with target operand for vectorial casts if (OpGroup == fpnew_pkg::CONV) begin : target_regs - logic in_valid, out_ready; - - assign in_valid = in_valid_i & vectorial_op; - // instantiate pipe - fpnew_pipe_out #( - .Width ( Width ), - .NumPipeRegs ( NumPipeRegs ), - .TagType ( logic ), - .AuxType ( logic [2:0] ) - ) target_pipe ( - .clk_i, - .rst_ni, - .result_i ( conv_target_d ), - .status_i ( '0 ), // unused - .extension_bit_i ( 1'b0 ), // unused - .class_mask_i ( fpnew_pkg::QNAN ), // unused - .is_class_i ( 1'b0 ), // unused - .tag_i ( 1'b0 ), // unused - .aux_i ( target_aux_d ), - .in_valid_i ( in_valid ), - .in_ready_o ( /* unused */ ), - .flush_i, - .result_o ( conv_target_q ), - .status_o ( /* unused */ ), - .extension_bit_o ( /* unused */ ), - .class_mask_o ( /* unused */ ), - .is_class_o ( /* unused */ ), - .tag_o ( /* unused */ ), - .aux_o ( target_aux_q ), - .out_valid_o ( /* unused */ ), - .out_ready_i ( out_ready ), - .busy_o ( /* unused */ ) - ); - assign out_ready = out_ready_i & result_is_vector; + // Bypass pipeline signals, index i holds signal after i register stages + logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q; + logic [0:NumPipeRegs][2:0] byp_pipe_aux_q; + logic [0:NumPipeRegs] byp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NumPipeRegs] byp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign byp_pipe_target_q[0] = conv_target_d; + assign byp_pipe_aux_q[0] = target_aux_d; + assign byp_pipe_valid_q[0] = in_valid_i & vectorial_op; + // Generate the register stages + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0) + `FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector; + // Output stage: assign module outputs + assign conv_target_q = byp_pipe_target_q[NumPipeRegs]; // decode the aux data - assign {result_vec_op, result_is_cpk} = target_aux_q; + assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs]; end else begin : no_conv assign {result_vec_op, result_is_cpk} = '0; end diff --git a/src/fpnew_pipe_in.sv b/src/fpnew_pipe_in.sv deleted file mode 100644 index 818f5eb5..00000000 --- a/src/fpnew_pipe_in.sv +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright 2019 ETH Zurich and University of Bologna. -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Stefan Mach - -// Generate pipeline stages as given by NumPipeRegs. When NumPipeRegs is 0, no registers are -// generated. -// +---------|---------|---------|----------|-------------------+ -// | _d[0] | _d[1] | _d[2] | _d[..] | _d[NumPipeRegs] | -// | | _q[0] | _q[1] | _q[..-1] | _q[NumPipeRegs-1] | -// inputs_i >=========|=========|=========|====~~====|===================> inputs_o -// in_valid_i >---------|---------|---------|----~~----|-------------------> out_valid_o -// in_ready_o <---------+---------+---------+----~~----+-------------------< out_ready_i -// | | | | | | -// stage # +----0----|----1----|----2----|----..----|----NumPipeRegs----+ -// -// NOTE: These registers must be retimed in synthesis for sensible pipelining. Make sure to -// optimize registers through the instantiating hierarchy. -// The ready signal is not a direct feed-through from destination to source but takes into account -// intermediate 'bubbles' in the pipeline. As such, downstream stalls can be hidden when the -// pipeline is not full. -// Enable signals on the registers will lead to clock-gated pipeline stages when this optimization -// is enabled during synthesis. Make sure to optimize clock gates through hierarchies. - -// Author: Stefan Mach - -`include "common_cells/registers.svh" - -module fpnew_pipe_in #( - parameter int unsigned Width = 32, - parameter int unsigned NumPipeRegs = 0, - parameter int unsigned NumOperands = 3, - parameter int unsigned NumFormats = 1, - parameter type TagType = logic, - parameter type AuxType = logic -) ( - input logic clk_i, - input logic rst_ni, - // Input signals - input logic [NumOperands-1:0][Width-1:0] operands_i, - input logic [NumFormats-1:0][NumOperands-1:0] is_boxed_i, - input fpnew_pkg::roundmode_e rnd_mode_i, - input fpnew_pkg::operation_e op_i, - input logic op_mod_i, - input fpnew_pkg::fp_format_e src_fmt_i, - input fpnew_pkg::fp_format_e dst_fmt_i, - input fpnew_pkg::int_format_e int_fmt_i, - input TagType tag_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, - // Output signals - output logic [NumOperands-1:0][Width-1:0] operands_o, - output logic [NumFormats-1:0][NumOperands-1:0] is_boxed_o, - output fpnew_pkg::roundmode_e rnd_mode_o, - output fpnew_pkg::operation_e op_o, - output logic op_mod_o, - output fpnew_pkg::fp_format_e src_fmt_o, - output fpnew_pkg::fp_format_e dst_fmt_o, - output fpnew_pkg::int_format_e int_fmt_o, - output TagType tag_o, - output AuxType aux_o, - // Output Handshake - output logic out_valid_o, - input logic out_ready_i, - // Status signal - output logic busy_o -); - - // Input signals for the next stage (= output signals of the previous stage) - logic [0:NumPipeRegs][NumOperands-1:0][Width-1:0] operands_d; - logic [0:NumPipeRegs][NumFormats-1:0][NumOperands-1:0] is_boxed_d; - fpnew_pkg::roundmode_e [0:NumPipeRegs] rnd_mode_d; - fpnew_pkg::operation_e [0:NumPipeRegs] op_d; - logic [0:NumPipeRegs] op_mod_d; - fpnew_pkg::fp_format_e [0:NumPipeRegs] src_fmt_d; - fpnew_pkg::fp_format_e [0:NumPipeRegs] dst_fmt_d; - fpnew_pkg::int_format_e [0:NumPipeRegs] int_fmt_d; - TagType [0:NumPipeRegs] tag_d; - AuxType [0:NumPipeRegs] aux_d; - logic [0:NumPipeRegs] valid_d; - // Ready signal is combinatorial for all stages - logic [0:NumPipeRegs] stage_ready; - - // Input stage: First element of pipeline is taken from inputs - assign operands_d[0] = operands_i; - assign is_boxed_d[0] = is_boxed_i; - assign rnd_mode_d[0] = rnd_mode_i; - assign op_d[0] = op_i; - assign op_mod_d[0] = op_mod_i; - assign src_fmt_d[0] = src_fmt_i; - assign dst_fmt_d[0] = dst_fmt_i; - assign int_fmt_d[0] = int_fmt_i; - assign tag_d[0] = tag_i; - assign aux_d[0] = aux_i; - assign valid_d[0] = in_valid_i; - - // Input stage: Propagate pipeline ready signal - assign in_ready_o = stage_ready[0]; - - // Generate the pipeline stages in case they are needed - if (NumPipeRegs > 0) begin : gen_pipeline - // Pipelined versions of signals for later stages - logic [0:NumPipeRegs][NumOperands-1:0][Width-1:0] operands_q; - logic [0:NumPipeRegs][NumFormats-1:0][NumOperands-1:0] is_boxed_q; - fpnew_pkg::roundmode_e [0:NumPipeRegs] rnd_mode_q; - fpnew_pkg::operation_e [0:NumPipeRegs] op_q; - logic [0:NumPipeRegs] op_mod_q; - fpnew_pkg::fp_format_e [0:NumPipeRegs] src_fmt_q; - fpnew_pkg::fp_format_e [0:NumPipeRegs] dst_fmt_q; - fpnew_pkg::int_format_e [0:NumPipeRegs] int_fmt_q; - TagType [0:NumPipeRegs] tag_q; - AuxType [0:NumPipeRegs] aux_q; - logic [0:NumPipeRegs] valid_q; - - for (genvar i = 0; i < int'(NumPipeRegs); i++) begin : pipeline_stages - // Internal register enable for this stage - logic reg_ena; - - // Next state from previous register to form a shift register - assign operands_d[i+1] = operands_q[i]; - assign is_boxed_d[i+1] = is_boxed_q[i]; - assign rnd_mode_d[i+1] = rnd_mode_q[i]; - assign op_d[i+1] = op_q[i]; - assign op_mod_d[i+1] = op_mod_q[i]; - assign src_fmt_d[i+1] = src_fmt_q[i]; - assign dst_fmt_d[i+1] = dst_fmt_q[i]; - assign int_fmt_d[i+1] = int_fmt_q[i]; - assign tag_d[i+1] = tag_q[i]; - assign aux_d[i+1] = aux_q[i]; - assign valid_d[i+1] = valid_q[i]; - - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign stage_ready[i] = stage_ready[i+1] | ~valid_q[i]; - - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(valid_q[i], valid_d[i], stage_ready[i], flush_i, 1'b0, clk_i, rst_ni) - - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = stage_ready[i] & valid_d[i]; - - // Generate the pipeline registers within the stages, use enable-registers - `FFL(operands_q[i], operands_d[i], reg_ena, '0) - `FFL(is_boxed_q[i], is_boxed_d[i], reg_ena, '0) - `FFL(rnd_mode_q[i], rnd_mode_d[i], reg_ena, fpnew_pkg::RNE) - `FFL(op_q[i], op_d[i], reg_ena, fpnew_pkg::FMADD) - `FFL(op_mod_q[i], op_mod_d[i], reg_ena, '0) - `FFL(src_fmt_q[i], src_fmt_d[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(dst_fmt_q[i], dst_fmt_d[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(int_fmt_q[i], int_fmt_d[i], reg_ena, fpnew_pkg::int_format_e'(0)) - `FFL(tag_q[i], tag_d[i], reg_ena, '0) - `FFL(aux_q[i], aux_d[i], reg_ena, '0) - end - end - - // Output stage: bind last stage outputs to module output. Directly connects to input if no regs. - assign operands_o = operands_d[NumPipeRegs]; - assign is_boxed_o = is_boxed_d[NumPipeRegs]; - assign rnd_mode_o = rnd_mode_d[NumPipeRegs]; - assign op_o = op_d[NumPipeRegs]; - assign op_mod_o = op_mod_d[NumPipeRegs]; - assign dst_fmt_o = dst_fmt_d[NumPipeRegs]; - assign src_fmt_o = src_fmt_d[NumPipeRegs]; - assign int_fmt_o = int_fmt_d[NumPipeRegs]; - assign tag_o = tag_d[NumPipeRegs]; - assign aux_o = aux_d[NumPipeRegs]; - assign out_valid_o = valid_d[NumPipeRegs]; - - // Output stage: Ready travels backwards from output side - assign stage_ready[NumPipeRegs] = out_ready_i; - - // The pipeline is considered busy if any valid data is in flight - assign busy_o = (| valid_d); - -endmodule diff --git a/src/fpnew_pipe_inside_cast.sv b/src/fpnew_pipe_inside_cast.sv deleted file mode 100644 index bf699980..00000000 --- a/src/fpnew_pipe_inside_cast.sv +++ /dev/null @@ -1,226 +0,0 @@ -// Copyright 2019 ETH Zurich and University of Bologna. -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Stefan Mach - -// Generate pipeline stages as given by NumPipeRegs. When NumPipeRegs is 0, no registers are -// generated. -// +---------|---------|---------|----------|-------------------+ -// | _d[0] | _d[1] | _d[2] | _d[..] | _d[NumPipeRegs] | -// | | _q[0] | _q[1] | _q[..-1] | _q[NumPipeRegs-1] | -// inputs_i >=========|=========|=========|====~~====|===================> inputs_o -// in_valid_i >---------|---------|---------|----~~----|-------------------> out_valid_o -// in_ready_o <---------+---------+---------+----~~----+-------------------< out_ready_i -// | | | | | | -// stage # +----0----|----1----|----2----|----..----|----NumPipeRegs----+ -// -// NOTE: These registers must be retimed in synthesis for sensible pipelining. Make sure to -// optimize registers through the instantiating hierarchy. -// The ready signal is not a direct feed-through from destination to source but takes into account -// intermediate 'bubbles' in the pipeline. As such, downstream stalls can be hidden when the -// pipeline is not full. -// Enable signals on the registers will lead to clock-gated pipeline stages when this optimization -// is enabled during synthesis. Make sure to optimize clock gates through hierarchies. - -// Author: Stefan Mach - -`include "common_cells/registers.svh" - -module fpnew_pipe_inside_cast #( - parameter int unsigned IntExpWidth = 12, - parameter int unsigned IntManWidth = 64, - parameter int unsigned NumPipeRegs = 0, - parameter type TagType = logic, - parameter type AuxType = logic -) ( - input logic clk_i, - input logic rst_ni, - // Input signals - input logic input_sign_i, - input logic signed [IntExpWidth-1:0] input_exp_i, - input logic signed [IntExpWidth-1:0] destination_exp_i, - input logic [IntManWidth-1:0] input_mant_i, - input logic src_is_int_i, - input logic dst_is_int_i, - input fpnew_pkg::fp_info_t info_i, - input logic mant_is_zero_i, - input logic op_mod_i, - input fpnew_pkg::roundmode_e rnd_mode_i, - input fpnew_pkg::fp_format_e src_fmt_i, - input fpnew_pkg::fp_format_e dst_fmt_i, - input fpnew_pkg::int_format_e int_fmt_i, - input TagType tag_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, - // Output signals - output logic input_sign_o, - output logic signed [IntExpWidth-1:0] input_exp_o, - output logic signed [IntExpWidth-1:0] destination_exp_o, - output logic [IntManWidth-1:0] input_mant_o, - output logic src_is_int_o, - output logic dst_is_int_o, - output fpnew_pkg::fp_info_t info_o, - output logic mant_is_zero_o, - output logic op_mod_o, - output fpnew_pkg::roundmode_e rnd_mode_o, - output fpnew_pkg::fp_format_e src_fmt_o, - output fpnew_pkg::fp_format_e dst_fmt_o, - output fpnew_pkg::int_format_e int_fmt_o, - output TagType tag_o, - output AuxType aux_o, - // Output Handshake - output logic out_valid_o, - input logic out_ready_i, - // Status signal - output logic busy_o -); - - // Input signals for the next stage (= output signals of the previous stage) - logic [0:NumPipeRegs] input_sign_d; - logic signed [0:NumPipeRegs][IntExpWidth-1:0] input_exp_d; - logic signed [0:NumPipeRegs][IntExpWidth-1:0] destination_exp_d; - logic [0:NumPipeRegs][IntManWidth-1:0] input_mant_d; - logic [0:NumPipeRegs] src_is_int_d; - logic [0:NumPipeRegs] dst_is_int_d; - fpnew_pkg::fp_info_t [0:NumPipeRegs] info_d; - logic [0:NumPipeRegs] mant_is_zero_d; - logic [0:NumPipeRegs] op_mod_d; - fpnew_pkg::roundmode_e [0:NumPipeRegs] rnd_mode_d; - fpnew_pkg::fp_format_e [0:NumPipeRegs] src_fmt_d; - fpnew_pkg::fp_format_e [0:NumPipeRegs] dst_fmt_d; - fpnew_pkg::int_format_e [0:NumPipeRegs] int_fmt_d; - TagType [0:NumPipeRegs] tag_d; - AuxType [0:NumPipeRegs] aux_d; - logic [0:NumPipeRegs] valid_d; - // Ready signal is combinatorial for all stages - logic [0:NumPipeRegs] stage_ready; - - // Input stage: First element of pipeline is taken from inputs - assign input_sign_d[0] = input_sign_i; - assign input_exp_d[0] = input_exp_i; - assign destination_exp_d[0] = destination_exp_i; - assign input_mant_d[0] = input_mant_i; - assign src_is_int_d[0] = src_is_int_i; - assign dst_is_int_d[0] = dst_is_int_i; - assign info_d[0] = info_i; - assign mant_is_zero_d[0] = mant_is_zero_i; - assign op_mod_d[0] = op_mod_i; - assign rnd_mode_d[0] = rnd_mode_i; - assign src_fmt_d[0] = src_fmt_i; - assign dst_fmt_d[0] = dst_fmt_i; - assign int_fmt_d[0] = int_fmt_i; - assign tag_d[0] = tag_i; - assign aux_d[0] = aux_i; - assign valid_d[0] = in_valid_i; - - // Input stage: Propagate pipeline ready signal - assign in_ready_o = stage_ready[0]; - - // Generate the pipeline stages in case they are needed - if (NumPipeRegs > 0) begin : gen_pipeline - // Pipelined versions of signals for later stages - logic [0:NumPipeRegs] input_sign_q; - logic signed [0:NumPipeRegs][IntExpWidth-1:0] input_exp_q; - logic signed [0:NumPipeRegs][IntExpWidth-1:0] destination_exp_q; - logic [0:NumPipeRegs][IntManWidth-1:0] input_mant_q; - logic [0:NumPipeRegs] src_is_int_q; - logic [0:NumPipeRegs] dst_is_int_q; - fpnew_pkg::fp_info_t [0:NumPipeRegs] info_q; - logic [0:NumPipeRegs] mant_is_zero_q; - logic [0:NumPipeRegs] op_mod_q; - fpnew_pkg::roundmode_e [0:NumPipeRegs] rnd_mode_q; - fpnew_pkg::fp_format_e [0:NumPipeRegs] src_fmt_q; - fpnew_pkg::fp_format_e [0:NumPipeRegs] dst_fmt_q; - fpnew_pkg::int_format_e [0:NumPipeRegs] int_fmt_q; - TagType [0:NumPipeRegs] tag_q; - AuxType [0:NumPipeRegs] aux_q; - logic [0:NumPipeRegs] valid_q; - - for (genvar i = 0; i < int'(NumPipeRegs); i++) begin : pipeline_stages - // Internal register enable for this stage - logic reg_ena; - - // Next state from previous register to form a shift register - assign input_sign_d[i+1] = input_sign_q[i]; - assign input_exp_d[i+1] = input_exp_q[i]; - assign destination_exp_d[i+1] = destination_exp_q[i]; - assign input_mant_d[i+1] = input_mant_q[i]; - assign src_is_int_d[i+1] = src_is_int_q[i]; - assign dst_is_int_d[i+1] = dst_is_int_q[i]; - assign info_d[i+1] = info_q[i]; - assign mant_is_zero_d[i+1] = mant_is_zero_q[i]; - assign op_mod_d[i+1] = op_mod_q[i]; - assign rnd_mode_d[i+1] = rnd_mode_q[i]; - assign src_fmt_d[i+1] = src_fmt_q[i]; - assign dst_fmt_d[i+1] = dst_fmt_q[i]; - assign int_fmt_d[i+1] = int_fmt_q[i]; - assign tag_d[i+1] = tag_q[i]; - assign aux_d[i+1] = aux_q[i]; - assign valid_d[i+1] = valid_q[i]; - - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign stage_ready[i] = stage_ready[i+1] | ~valid_q[i]; - - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(valid_q[i], valid_d[i], stage_ready[i], flush_i, 1'b0, clk_i, rst_ni) - - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = stage_ready[i] & valid_d[i]; - - // Generate the pipeline registers within the stages, use enable-registers - `FFL(input_sign_q[i], input_sign_d[i], reg_ena, '0) - `FFL(destination_exp_q[i], destination_exp_d[i], reg_ena, '0) - `FFL(input_exp_q[i], input_exp_d[i], reg_ena, '0) - `FFL(input_mant_q[i], input_mant_d[i], reg_ena, '0) - `FFL(src_is_int_q[i], src_is_int_d[i], reg_ena, '0) - `FFL(dst_is_int_q[i], dst_is_int_d[i], reg_ena, '0) - `FFL(info_q[i], info_d[i], reg_ena, '0) - `FFL(mant_is_zero_q[i], mant_is_zero_d[i], reg_ena, '0) - `FFL(op_mod_q[i], op_mod_d[i], reg_ena, '0) - `FFL(rnd_mode_q[i], rnd_mode_d[i], reg_ena, fpnew_pkg::RNE) - `FFL(src_fmt_q[i], src_fmt_d[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(dst_fmt_q[i], dst_fmt_d[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(int_fmt_q[i], int_fmt_d[i], reg_ena, fpnew_pkg::int_format_e'(0)) - `FFL(tag_q[i], tag_d[i], reg_ena, '0) - `FFL(aux_q[i], aux_d[i], reg_ena, '0) - end - end - - // Output stage: bind last stage outputs to module output. Directly connects to input if no regs. - assign input_sign_o = input_sign_d[NumPipeRegs]; - assign input_exp_o = input_exp_d[NumPipeRegs]; - assign destination_exp_o = destination_exp_d[NumPipeRegs]; - assign input_mant_o = input_mant_d[NumPipeRegs]; - assign src_is_int_o = src_is_int_d[NumPipeRegs]; - assign dst_is_int_o = dst_is_int_d[NumPipeRegs]; - assign info_o = info_d[NumPipeRegs]; - assign mant_is_zero_o = mant_is_zero_d[NumPipeRegs]; - assign op_mod_o = op_mod_d[NumPipeRegs]; - assign rnd_mode_o = rnd_mode_d[NumPipeRegs]; - assign src_fmt_o = src_fmt_d[NumPipeRegs]; - assign dst_fmt_o = dst_fmt_d[NumPipeRegs]; - assign int_fmt_o = int_fmt_d[NumPipeRegs]; - assign tag_o = tag_d[NumPipeRegs]; - assign aux_o = aux_d[NumPipeRegs]; - assign out_valid_o = valid_d[NumPipeRegs]; - - // Output stage: Ready travels backwards from output side - assign stage_ready[NumPipeRegs] = out_ready_i; - - // The pipeline is considered busy if any valid data is in flight - assign busy_o = (| valid_d); - -endmodule diff --git a/src/fpnew_pipe_inside_fma.sv b/src/fpnew_pipe_inside_fma.sv deleted file mode 100644 index a9ddf0df..00000000 --- a/src/fpnew_pipe_inside_fma.sv +++ /dev/null @@ -1,233 +0,0 @@ -// Copyright 2019 ETH Zurich and University of Bologna. -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Stefan Mach - -// Generate pipeline stages as given by NumPipeRegs. When NumPipeRegs is 0, no registers are -// generated. -// +---------|---------|---------|----------|-------------------+ -// | _d[0] | _d[1] | _d[2] | _d[..] | _d[NumPipeRegs] | -// | | _q[0] | _q[1] | _q[..-1] | _q[NumPipeRegs-1] | -// inputs_i >=========|=========|=========|====~~====|===================> inputs_o -// in_valid_i >---------|---------|---------|----~~----|-------------------> out_valid_o -// in_ready_o <---------+---------+---------+----~~----+-------------------< out_ready_i -// | | | | | | -// stage # +----0----|----1----|----2----|----..----|----NumPipeRegs----+ -// -// NOTE: These registers must be retimed in synthesis for sensible pipelining. Make sure to -// optimize registers through the instantiating hierarchy. -// The ready signal is not a direct feed-through from destination to source but takes into account -// intermediate 'bubbles' in the pipeline. As such, downstream stalls can be hidden when the -// pipeline is not full. -// Enable signals on the registers will lead to clock-gated pipeline stages when this optimization -// is enabled during synthesis. Make sure to optimize clock gates through hierarchies. - -// Author: Stefan Mach - -`include "common_cells/registers.svh" - -module fpnew_pipe_inside_fma #( - parameter int unsigned ExpWidth = 10, - parameter int unsigned PrecBits = 24, - parameter int unsigned NumPipeRegs = 0, - parameter type FpType = logic, - parameter type TagType = logic, - parameter type AuxType = logic, - // Do not change - localparam int unsigned SUM_WIDTH = 3*PrecBits+3+1, - localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PrecBits + 3) -) ( - input logic clk_i, - input logic rst_ni, - // Input signals - input logic effective_subtraction_i, - input logic final_sign_i, - input logic signed [ExpWidth-1:0] exponent_product_i, - input logic signed [ExpWidth-1:0] exponent_difference_i, - input logic signed [ExpWidth-1:0] tentative_exponent_i, - input logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_i, - input logic sticky_before_add_i, - input logic [SUM_WIDTH-1:0] sum_i, - input fpnew_pkg::roundmode_e rnd_mode_i, - input fpnew_pkg::fp_format_e dst_fmt_i, - input logic result_is_special_i, - input FpType special_result_i, - input fpnew_pkg::status_t special_status_i, - input TagType tag_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, - // Output signals - output logic effective_subtraction_o, - output logic final_sign_o, - output logic signed [ExpWidth-1:0] exponent_product_o, - output logic signed [ExpWidth-1:0] exponent_difference_o, - output logic signed [ExpWidth-1:0] tentative_exponent_o, - output logic [SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_o, - output logic sticky_before_add_o, - output logic [SUM_WIDTH-1:0] sum_o, - output fpnew_pkg::roundmode_e rnd_mode_o, - output fpnew_pkg::fp_format_e dst_fmt_o, - output logic result_is_special_o, - output FpType special_result_o, - output fpnew_pkg::status_t special_status_o, - output TagType tag_o, - output AuxType aux_o, - // Output Handshake - output logic out_valid_o, - input logic out_ready_i, - // Status signal - output logic busy_o -); - - // Input signals for the next stage (= output signals of the previous stage) - logic [0:NumPipeRegs] effective_subtraction_d; - logic [0:NumPipeRegs] final_sign_d; - logic signed [0:NumPipeRegs][ExpWidth-1:0] exponent_product_d; - logic signed [0:NumPipeRegs][ExpWidth-1:0] exponent_difference_d; - logic signed [0:NumPipeRegs][ExpWidth-1:0] tentative_exponent_d; - logic [0:NumPipeRegs][SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_d; - logic [0:NumPipeRegs] sticky_before_add_d; - logic [0:NumPipeRegs][SUM_WIDTH-1:0] sum_d; - fpnew_pkg::roundmode_e [0:NumPipeRegs] rnd_mode_d; - fpnew_pkg::fp_format_e [0:NumPipeRegs] dst_fmt_d; - logic [0:NumPipeRegs] result_is_special_d; - FpType [0:NumPipeRegs] special_result_d; - fpnew_pkg::status_t [0:NumPipeRegs] special_status_d; - TagType [0:NumPipeRegs] tag_d; - AuxType [0:NumPipeRegs] aux_d; - logic [0:NumPipeRegs] valid_d; - // Ready signal is combinatorial for all stages - logic [0:NumPipeRegs] stage_ready; - - // Input stage: First element of pipeline is taken from inputs - assign effective_subtraction_d[0] = effective_subtraction_i; - assign final_sign_d[0] = final_sign_i; - assign exponent_product_d[0] = exponent_product_i; - assign exponent_difference_d[0] = exponent_difference_i; - assign tentative_exponent_d[0] = tentative_exponent_i; - assign addend_shamt_d[0] = addend_shamt_i; - assign sticky_before_add_d[0] = sticky_before_add_i; - assign sum_d[0] = sum_i; - assign rnd_mode_d[0] = rnd_mode_i; - assign dst_fmt_d[0] = dst_fmt_i; - assign result_is_special_d[0] = result_is_special_i; - assign special_result_d[0] = special_result_i; - assign special_status_d[0] = special_status_i; - assign tag_d[0] = tag_i; - assign aux_d[0] = aux_i; - assign valid_d[0] = in_valid_i; - - // Input stage: Propagate pipeline ready signal - assign in_ready_o = stage_ready[0]; - - // Generate the pipeline stages in case they are needed - if (NumPipeRegs > 0) begin : gen_pipeline - // Pipelined versions of signals for later stages - logic [0:NumPipeRegs] effective_subtraction_q; - logic [0:NumPipeRegs] final_sign_q; - logic signed [0:NumPipeRegs][ExpWidth-1:0] exponent_product_q; - logic signed [0:NumPipeRegs][ExpWidth-1:0] exponent_difference_q; - logic signed [0:NumPipeRegs][ExpWidth-1:0] tentative_exponent_q; - logic [0:NumPipeRegs][SHIFT_AMOUNT_WIDTH-1:0] addend_shamt_q; - logic [0:NumPipeRegs] sticky_before_add_q; - logic [0:NumPipeRegs][SUM_WIDTH-1:0] sum_q; - logic [0:NumPipeRegs][3*PrecBits+3:0] product_shifted_q; - logic [0:NumPipeRegs][3*PrecBits+3:0] addend_shifted_q; - logic [0:NumPipeRegs] inject_carry_in_q; - fpnew_pkg::roundmode_e [0:NumPipeRegs] rnd_mode_q; - fpnew_pkg::fp_format_e [0:NumPipeRegs] dst_fmt_q; - logic [0:NumPipeRegs] result_is_special_q; - FpType [0:NumPipeRegs] special_result_q; - fpnew_pkg::status_t [0:NumPipeRegs] special_status_q; - TagType [0:NumPipeRegs] tag_q; - AuxType [0:NumPipeRegs] aux_q; - logic [0:NumPipeRegs] valid_q; - - for (genvar i = 0; i < int'(NumPipeRegs); i++) begin : pipeline_stages - // Internal register enable for this stage - logic reg_ena; - - // Next state from previous register to form a shift register - assign effective_subtraction_d[i+1] = effective_subtraction_q[i]; - assign final_sign_d[i+1] = final_sign_q[i]; - assign exponent_product_d[i+1] = exponent_product_q[i]; - assign exponent_difference_d[i+1] = exponent_difference_q[i]; - assign tentative_exponent_d[i+1] = tentative_exponent_q[i]; - assign addend_shamt_d[i+1] = addend_shamt_q[i]; - assign sticky_before_add_d[i+1] = sticky_before_add_q[i]; - assign sum_d[i+1] = sum_q[i]; - assign rnd_mode_d[i+1] = rnd_mode_q[i]; - assign dst_fmt_d[i+1] = dst_fmt_q[i]; - assign result_is_special_d[i+1] = result_is_special_q[i]; - assign special_result_d[i+1] = special_result_q[i]; - assign special_status_d[i+1] = special_status_q[i]; - assign tag_d[i+1] = tag_q[i]; - assign aux_d[i+1] = aux_q[i]; - assign valid_d[i+1] = valid_q[i]; - - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign stage_ready[i] = stage_ready[i+1] | ~valid_q[i]; - - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(valid_q[i], valid_d[i], stage_ready[i], flush_i, 1'b0, clk_i, rst_ni) - - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = stage_ready[i] & valid_d[i]; - - // Generate the pipeline registers within the stages, use enable-registers - `FFL(effective_subtraction_q[i], effective_subtraction_d[i], reg_ena, '0) - `FFL(final_sign_q[i], final_sign_d[i], reg_ena, '0) - `FFL(exponent_product_q[i], exponent_product_d[i], reg_ena, '0) - `FFL(exponent_difference_q[i], exponent_difference_d[i], reg_ena, '0) - `FFL(tentative_exponent_q[i], tentative_exponent_d[i], reg_ena, '0) - `FFL(addend_shamt_q[i], addend_shamt_d[i], reg_ena, '0) - `FFL(sticky_before_add_q[i], sticky_before_add_d[i], reg_ena, '0) - `FFL(sum_q[i], sum_d[i], reg_ena, '0) - `FFL(rnd_mode_q[i], rnd_mode_d[i], reg_ena, fpnew_pkg::RNE) - `FFL(dst_fmt_q[i], dst_fmt_d[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(result_is_special_q[i], result_is_special_d[i], reg_ena, '0) - `FFL(special_result_q[i], special_result_d[i], reg_ena, '0) - `FFL(special_status_q[i], special_status_d[i], reg_ena, '0) - `FFL(tag_q[i], tag_d[i], reg_ena, '0) - `FFL(aux_q[i], aux_d[i], reg_ena, '0) - end - end - - // Output stage: bind last stage outputs to module output. Directly connects to input if no regs. - assign effective_subtraction_o = effective_subtraction_d[NumPipeRegs]; - assign final_sign_o = final_sign_d[NumPipeRegs]; - assign exponent_product_o = exponent_product_d[NumPipeRegs]; - assign exponent_difference_o = exponent_difference_d[NumPipeRegs]; - assign tentative_exponent_o = tentative_exponent_d[NumPipeRegs]; - assign addend_shamt_o = addend_shamt_d[NumPipeRegs]; - assign sticky_before_add_o = sticky_before_add_d[NumPipeRegs]; - assign sum_o = sum_d[NumPipeRegs]; - assign rnd_mode_o = rnd_mode_d[NumPipeRegs]; - assign dst_fmt_o = dst_fmt_d[NumPipeRegs]; - assign result_is_special_o = result_is_special_d[NumPipeRegs]; - assign special_result_o = special_result_d[NumPipeRegs]; - assign special_status_o = special_status_d[NumPipeRegs]; - assign tag_o = tag_d[NumPipeRegs]; - assign aux_o = aux_d[NumPipeRegs]; - assign out_valid_o = valid_d[NumPipeRegs]; - - // Output stage: Ready travels backwards from output side - assign stage_ready[NumPipeRegs] = out_ready_i; - - // The pipeline is considered busy if any valid data is in flight - assign busy_o = (| valid_d); - -endmodule diff --git a/src/fpnew_pipe_out.sv b/src/fpnew_pipe_out.sv deleted file mode 100644 index 686af4bd..00000000 --- a/src/fpnew_pipe_out.sv +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright 2019 ETH Zurich and University of Bologna. -// -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Stefan Mach - -// Generate pipeline stages as given by NumPipeRegs. When NumPipeRegs is 0, no registers are -// generated. -// +---------|---------|---------|----------|-------------------+ -// | _d[0] | _d[1] | _d[2] | _d[..] | _d[NumPipeRegs] | -// | | _q[0] | _q[1] | _q[..-1] | _q[NumPipeRegs-1] | -// inputs_i >=========|=========|=========|====~~====|===================> inputs_o -// in_valid_i >---------|---------|---------|----~~----|-------------------> out_valid_o -// in_ready_o <---------+---------+---------+----~~----+-------------------< out_ready_i -// | | | | | | -// stage # +----0----|----1----|----2----|----..----|----NumPipeRegs----+ -// -// NOTE: These registers must be retimed in synthesis for sensible pipelining. Make sure to -// optimize registers through the instantiating hierarchy. -// The ready signal is not a direct feed-through from destination to source but takes into account -// intermediate 'bubbles' in the pipeline. As such, downstream stalls can be hidden when the -// pipeline is not full. -// Enable signals on the registers will lead to clock-gated pipeline stages when this optimization -// is enabled during synthesis. Make sure to optimize clock gates through hierarchies. - -`include "common_cells/registers.svh" - -module fpnew_pipe_out #( - parameter int unsigned Width = 32, - parameter int unsigned NumPipeRegs = 0, - parameter type TagType = logic, - parameter type AuxType = logic -) ( - input logic clk_i, - input logic rst_ni, - // Input signals - input logic [Width-1:0] result_i, - input fpnew_pkg::status_t status_i, - input logic extension_bit_i, - input fpnew_pkg::classmask_e class_mask_i, - input logic is_class_i, - input TagType tag_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, - // Output signals - output logic [Width-1:0] result_o, - output fpnew_pkg::status_t status_o, - output logic extension_bit_o, - output fpnew_pkg::classmask_e class_mask_o, - output logic is_class_o, - output TagType tag_o, - output AuxType aux_o, - // Output Handshake - output logic out_valid_o, - input logic out_ready_i, - // Status signal - output logic busy_o -); - - // Input signals for the next stage (= output signals of the previous stage) - logic [NumPipeRegs:0][Width-1:0] result_d; - fpnew_pkg::status_t [NumPipeRegs:0] status_d; - logic [NumPipeRegs:0] extension_bit_d; - fpnew_pkg::classmask_e [NumPipeRegs:0] class_mask_d; - logic [NumPipeRegs:0] is_class_d; - TagType [NumPipeRegs:0] tag_d; - AuxType [NumPipeRegs:0] aux_d; - logic [NumPipeRegs:0] valid_d; - // Ready signal is combinatorial for all stages - logic [NumPipeRegs:0] stage_ready; - - // Input stage: First element of pipeline is taken from inputs - assign result_d[0] = result_i; - assign status_d[0] = status_i; - assign extension_bit_d[0] = extension_bit_i; - assign class_mask_d[0] = class_mask_i; - assign is_class_d[0] = is_class_i; - assign tag_d[0] = tag_i; - assign aux_d[0] = aux_i; - assign valid_d[0] = in_valid_i; - - // Input stage: Propagate ready signal from pipeline - assign in_ready_o = stage_ready[0]; - - // Generate the pipeline stages in case they are needed - if (NumPipeRegs > 0) begin : gen_pipeline - // Pipelined versions of signals for later stages - logic [NumPipeRegs-1:0][Width-1:0] result_q; - fpnew_pkg::status_t [NumPipeRegs-1:0] status_q; - logic [NumPipeRegs-1:0] extension_bit_q; - fpnew_pkg::classmask_e [NumPipeRegs-1:0] class_mask_q; - logic [NumPipeRegs-1:0] is_class_q; - TagType [NumPipeRegs-1:0] tag_q; - AuxType [NumPipeRegs-1:0] aux_q; - logic [NumPipeRegs-1:0] valid_q; - - for (genvar i = 0; i < NumPipeRegs; i++) begin : pipeline_stages - // Internal register enable for this stage -> creates gated registers if supported in synth - logic reg_ena; - - // Next state from previous register to form a shift register - assign result_d[i+1] = result_q[i]; - assign status_d[i+1] = status_q[i]; - assign extension_bit_d[i+1] = extension_bit_q[i]; - assign class_mask_d[i+1] = class_mask_q[i]; - assign is_class_d[i+1] = is_class_q[i]; - assign tag_d[i+1] = tag_q[i]; - assign aux_d[i+1] = aux_q[i]; - assign valid_d[i+1] = valid_q[i]; - - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage register only holds a bubble (not valid) -> we can pop it - assign stage_ready[i] = stage_ready[i+1] | ~valid_q[i]; - - // Valid registers: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(valid_q[i], valid_d[i], stage_ready[i], flush_i, 1'b0, clk_i, rst_ni) - - // Enable the payload registers if pipleine ready and a valid data item is present (gating) - assign reg_ena = stage_ready[i] & valid_d[i]; - - // Generate the pipeline registers within the stages, use enable-registers - `FFL(result_q[i], result_d[i], reg_ena, '0) - `FFL(status_q[i], status_d[i], reg_ena, '0) - `FFL(extension_bit_q[i], extension_bit_d[i], reg_ena, '0) - `FFL(class_mask_q[i], class_mask_d[i], reg_ena, fpnew_pkg::QNAN) - `FFL(is_class_q[i], is_class_d[i], reg_ena, '0) - `FFL(tag_q[i], tag_d[i], reg_ena, '0) - `FFL(aux_q[i], aux_d[i], reg_ena, '0) - end - end - - // Output stage: bind last stage outputs to module output. Directly connects to input if no regs. - assign result_o = result_d[NumPipeRegs]; - assign status_o = status_d[NumPipeRegs]; - assign extension_bit_o = extension_bit_d[NumPipeRegs]; - assign class_mask_o = class_mask_d[NumPipeRegs]; - assign is_class_o = is_class_d[NumPipeRegs]; - assign tag_o = tag_d[NumPipeRegs]; - assign aux_o = aux_d[NumPipeRegs]; - assign out_valid_o = valid_d[NumPipeRegs]; - - // Output stage: Ready travels backwards from output side - assign stage_ready[NumPipeRegs] = out_ready_i; - - // The pipeline is considered busy if any valid data is in flight - assign busy_o = (| valid_d); - -endmodule diff --git a/src/fpnew_pkg.sv b/src/fpnew_pkg.sv index 3d1152c3..734ca96b 100644 --- a/src/fpnew_pkg.sv +++ b/src/fpnew_pkg.sv @@ -444,25 +444,28 @@ package fpnew_pkg; return res; endfunction - - - function automatic logic any_enabled_multi(fmt_unit_types_t types); + // Return whether any active format is set as MERGED + function automatic logic any_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg); for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) - if (types[i] == MERGED) + if (cfg[i] && types[i] == MERGED) return 1'b1; return 1'b0; endfunction - function automatic logic is_first_enabled_multi(fp_format_e fmt, fmt_unit_types_t types); + // Return whether the given format is the first active one set as MERGED + function automatic logic is_first_enabled_multi(fp_format_e fmt, + fmt_unit_types_t types, + fmt_logic_t cfg); for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin - if (types[i] == MERGED) return (fp_format_e'(i) == fmt); + if (cfg[i] && types[i] == MERGED) return (fp_format_e'(i) == fmt); end return 1'b0; endfunction - function automatic fp_format_e get_first_enabled_multi(fmt_unit_types_t types); + // Returns the first format that is active and is set as MERGED + function automatic fp_format_e get_first_enabled_multi(fmt_unit_types_t types, fmt_logic_t cfg); for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) - if (types[i] == MERGED) + if (cfg[i] && types[i] == MERGED) return fp_format_e'(i); return fp_format_e'(0); endfunction diff --git a/src/fpu_div_sqrt_mvp b/src/fpu_div_sqrt_mvp index 08e70e1c..83a601f9 160000 --- a/src/fpu_div_sqrt_mvp +++ b/src/fpu_div_sqrt_mvp @@ -1 +1 @@ -Subproject commit 08e70e1c176837d791942cac8e0d36ba782a038b +Subproject commit 83a601f97934ed5e06d737b9c80d98b08867c5fa diff --git a/src_files.yml b/src_files.yml index e476c378..1931258f 100644 --- a/src_files.yml +++ b/src_files.yml @@ -13,10 +13,6 @@ fpnew: src/fpnew_opgroup_block.sv, src/fpnew_opgroup_fmt_slice.sv, src/fpnew_opgroup_multifmt_slice.sv, - src/fpnew_pipe_in.sv, - src/fpnew_pipe_out.sv, - src/fpnew_pipe_inside_fma.sv, - src/fpnew_pipe_inside_cast.sv, src/fpnew_rounding.sv, src/fpnew_top.sv, ]