diff --git a/Bender.yml b/Bender.yml index b635aa07..ab2bc73c 100644 --- a/Bender.yml +++ b/Bender.yml @@ -37,6 +37,8 @@ sources: - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_with_sqrt.v - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v + - src/fpnew_aux.sv + - src/fpnew_aux_fsm.sv - src/fpnew_divsqrt_th_32.sv - src/fpnew_divsqrt_th_64_multi.sv - src/fpnew_divsqrt_multi.sv diff --git a/src/fpnew_aux.sv b/src/fpnew_aux.sv new file mode 100644 index 00000000..28059db5 --- /dev/null +++ b/src/fpnew_aux.sv @@ -0,0 +1,122 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Maurus Item +// +// Description Aux chain for FPNew, handles transmitting of shared handshake and aux data +// And enables the correct lanes so they always stay in sync. + +`include "common_cells/registers.svh" + +module fpnew_aux #( + parameter int unsigned NumPipeRegs = 0, + parameter type TagType = logic, + parameter type AuxType = logic, + parameter int unsigned NumLanes = 1 +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input TagType tag_i, + input AuxType aux_i, + input logic is_vector_i, + input logic [NumLanes-1:0] lane_active_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output TagType tag_o, + output AuxType aux_o, + output logic is_vector_o, + output logic [NumLanes-1:0] lane_active_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Register Enable for Lanes + output logic [NumPipeRegs-1:0] reg_enable_o, + output logic [NumPipeRegs-1:0] vector_reg_enable_o, + output logic [NumLanes-1:0][NumPipeRegs-1:0] lane_reg_enable_o, + // Indication of valid data in flight + output logic busy_o +); + + + // --------------- + // Input pipeline + // --------------- + // Input pipeline signals, index i holds signal after i register stages + TagType [0:NumPipeRegs] tag; + AuxType [0:NumPipeRegs] aux; + logic [0:NumPipeRegs] is_vector; + logic [0:NumPipeRegs][NumLanes-1:0] lane_active; + logic [0:NumPipeRegs] valid; + + // Ready signal is combinatorial for all stages + logic [0:NumPipeRegs] ready; + + // First element of pipeline is taken from inputs + assign tag [0] = tag_i; + assign aux [0] = aux_i; + assign is_vector [0] = is_vector_i; + assign valid [0] = in_valid_i; + assign lane_active[0] = lane_active_i; + + // Propagate pipeline ready signal to upstream circuitry + assign in_ready_o = ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_input_pipeline + + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign ready[i] = ready[i+1] | ~valid[i+1]; + + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(valid[i+1], valid[i], ready[i], flush_i, 1'b0, clk_i, rst_ni) + + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = ready[i] & valid[i]; + + // Drive external registers with reg enable + assign reg_enable_o[i] = reg_ena; + + // Drive external vector registers with reg enable if operation is a vector + assign vector_reg_enable_o[i] = reg_ena & is_vector[i]; + for (genvar l = 0; l < NumLanes; l++) begin + assign lane_reg_enable_o[l][i] = reg_ena & lane_active[i][l]; + end + + // Generate the pipeline registers within the stages, use enable-registers + `FFL( tag[i+1], tag[i], reg_ena, TagType'('0)) + `FFL( aux[i+1], aux[i], reg_ena, AuxType'('0)) + `FFL( is_vector[i+1], is_vector[i], reg_ena, '0 ) + `FFL(lane_active[i+1], lane_active[i], reg_ena, '0 ) + end + + // Ready travels backwards from output side, driven by downstream circuitry + assign ready[NumPipeRegs] = out_ready_i; + + // Assign module outputs + assign tag_o = tag [NumPipeRegs]; + assign aux_o = aux [NumPipeRegs]; + assign is_vector_o = is_vector [NumPipeRegs]; + assign out_valid_o = valid [NumPipeRegs]; + assign lane_active_o = lane_active[NumPipeRegs]; + + // Assign output Flags: Busy if any element inside the pipe is valid + assign busy_o = |valid; +endmodule diff --git a/src/fpnew_aux_fsm.sv b/src/fpnew_aux_fsm.sv new file mode 100644 index 00000000..7ab7763f --- /dev/null +++ b/src/fpnew_aux_fsm.sv @@ -0,0 +1,306 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Maurus Item +// +// Description Aux chain for FPNew, handles transmitting of shared handshake and aux data +// And enables the correct lanes so they always stay in sync. +// This version can be used for lanes that have some form of FSM in them and only eventually are ready + +`include "common_cells/registers.svh" + +module fpnew_aux_fsm #( + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + parameter int unsigned NumLanes = 1 +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input TagType tag_i, + input AuxType aux_i, + input logic is_vector_i, + input logic [NumLanes-1:0] lane_active_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output TagType tag_o, + output AuxType aux_o, + output logic is_vector_o, + output logic [NumLanes-1:0] lane_active_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Register Enable for Lanes + output logic [NumPipeRegs-1:0] reg_enable_o, + output logic [NumPipeRegs-1:0] vector_reg_enable_o, + output logic [NumLanes-1:0][NumPipeRegs-1:0] lane_reg_enable_o, + // Signals for the Lane FSMs + // Signal to start the FSM, will be asserted for one cycle + output logic [NumLanes-1:0] lane_fsm_start_o, + // Signal that the FSM finished it's operation, should be asserted continuously + input logic [NumLanes-1:0] lane_fsm_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Pipeline Distribution + // ---------- + // This must match between this module and modules that use this module as reg enable input! + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // Always have one reg to use for FSM Input + localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + + // --------------- + // Input pipeline + // --------------- + // Input pipeline signals, index i holds signal after i register stages + TagType [0:NUM_INP_REGS] in_tag; + AuxType [0:NUM_INP_REGS] in_aux; + logic [0:NUM_INP_REGS] in_is_vector; + logic [0:NUM_INP_REGS][NumLanes-1:0] in_lane_active; + logic [0:NUM_INP_REGS] in_valid; + + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] in_ready; + + // First element of pipeline is taken from inputs + assign in_tag [0] = tag_i; + assign in_aux [0] = aux_i; + assign in_is_vector [0] = is_vector_i; + assign in_valid [0] = in_valid_i; + assign in_lane_active[0] = lane_active_i; + + // Propagate pipeline ready signal to upstream circuitry + assign in_ready_o = in_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign in_ready[i] = in_ready[i+1] | ~in_valid[i+1]; + + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(in_valid[i+1], in_valid[i], in_ready[i], flush_i, 1'b0, clk_i, rst_ni) + + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = in_ready[i] & in_valid[i]; + + // Drive external registers with reg enable + assign reg_enable_o[i] = reg_ena; + + // Drive external vector registers with reg enable if operation is a vector + assign vector_reg_enable_o[i] = reg_ena & in_is_vector[i]; + for (genvar l = 0; l < NumLanes; l++) begin + assign lane_reg_enable_o[l][i] = reg_ena & in_lane_active[i][l]; + end + + // Generate the pipeline registers within the stages, use enable-registers + `FFL( in_tag[i+1], in_tag[i], reg_ena, TagType'('0)) + `FFL( in_aux[i+1], in_aux[i], reg_ena, AuxType'('0)) + `FFL( in_is_vector[i+1], in_is_vector[i], reg_ena, '0 ) + `FFL(in_lane_active[i+1], in_lane_active[i], reg_ena, '0 ) + end + + // ---------- + // Global FSM + // ---------- + + // FSM states + typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; + fsm_state_e state_q, state_d; + + // Input & Output Handshake + logic fsm_in_valid, fsm_in_ready; + logic fsm_out_valid, fsm_out_ready; + + // Synchronisazion signals + logic fsm_start, fsm_ready, fsm_busy; + + // Data holding signals + TagType held_tag; + AuxType held_aux; + logic held_is_vector; + logic [NumLanes-1:0] held_lane_active; + + // Upstream Handshake Connection + assign fsm_in_valid = in_valid[NUM_INP_REGS]; + assign in_ready[NUM_INP_REGS] = fsm_in_ready; + + // Done when all active lanes are done + assign fsm_ready = &lane_fsm_ready_i; + + // FSM to safely apply and receive data from DIVSQRT unit + always_comb begin : flag_fsm + // Default assignments + fsm_out_valid = 1'b0; + fsm_in_ready = 1'b0; + fsm_start = 1'b0; + fsm_busy = 1'b0; + state_d = state_q; + + unique case (state_q) + IDLE: begin + fsm_in_ready = '1; + if (fsm_in_valid) begin + state_d = BUSY; + fsm_start = 1'b1; + end + end + BUSY: begin + fsm_busy = 1'b1; + // If all active lanes are done send data down chain + if (fsm_ready) begin + fsm_out_valid = 1'b1; + if (fsm_out_ready) begin + fsm_in_ready = 1'b1; + if (fsm_in_valid) begin + state_d = BUSY; + fsm_start = 1'b1; + end else begin + state_d = IDLE; + end + end else begin + state_d = HOLD; + end + end + end + HOLD: begin + // Exact same as BUSY, but outer condition is already given + fsm_out_valid = 1'b1; + if (fsm_out_ready) begin + fsm_in_ready = 1'b1; + if (fsm_in_valid) begin + state_d = BUSY; + fsm_start = 1'b1; + end else begin + state_d = IDLE; + end + end else begin + state_d = HOLD; + end + end + + // fall into idle state otherwise + default: state_d = IDLE; + endcase + + // Flushing overrides the other actions + if (flush_i) begin + fsm_out_valid = 1'b0; + state_d = IDLE; + end + end + + `FF(state_q, state_d, IDLE); + + // Start Lanes when FSM starts and lane is active + for (genvar l = 0; l < NumLanes; l++) begin + assign lane_fsm_start_o[l] = fsm_start && in_lane_active[NUM_INP_REGS][l]; + end + + // ---------------- + // Data Holding FFs + // ---------------- + + `FFL( held_tag, in_tag[NUM_INP_REGS], fsm_start, TagType'('0)); + `FFL( held_aux, in_aux[NUM_INP_REGS], fsm_start, AuxType'('0)); + `FFL( held_is_vector, in_is_vector[NUM_INP_REGS], fsm_start, '0); + `FFL(held_lane_active, in_lane_active[NUM_INP_REGS], fsm_start, '0); + + // --------------- + // Output pipeline + // --------------- + + // Output pipeline signals, index i holds signal after i register stages + TagType [0:NUM_OUT_REGS] out_tag; + AuxType [0:NUM_OUT_REGS] out_aux; + logic [0:NUM_OUT_REGS] out_is_vector; + logic [0:NUM_OUT_REGS][NumLanes-1:0] out_lane_active; + logic [0:NUM_OUT_REGS] out_valid; + + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_ready; + + // Connect to upstream Handshake + assign out_valid[0] = fsm_out_valid; + assign fsm_out_ready = out_ready[0]; + + // Connect to Hold Register + assign out_tag [0] = held_tag; + assign out_aux [0] = held_aux; + assign out_is_vector [0] = held_is_vector; + assign out_lane_active[0] = held_lane_active; + + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_ready[i] = out_ready[i+1] | ~out_valid[i+1]; + + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_valid[i+1], out_valid[i], out_ready[i], flush_i, 1'b0, clk_i, rst_ni) + + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_ready[i] & out_valid[i]; + + // Drive external registers with reg enable + assign reg_enable_o[NUM_INP_REGS + i] = reg_ena; + + // Drive external vector registers with reg enable if operation is a vector + assign vector_reg_enable_o[NUM_INP_REGS + i] = reg_ena & out_is_vector[i]; + for (genvar l = 0; l < NumLanes; l++) begin + assign lane_reg_enable_o[l][NUM_INP_REGS + i] = reg_ena & out_lane_active[i][l]; + end + + // Generate the pipeline registers within the stages, use enable-registers + `FFL( out_tag[i+1], out_tag[i], reg_ena, TagType'('0)) + `FFL( out_aux[i+1], out_aux[i], reg_ena, AuxType'('0)) + `FFL( out_is_vector[i+1], out_is_vector[i], reg_ena, '0 ) + `FFL(out_lane_active[i+1], out_lane_active[i], reg_ena, '0 ) + end + + // Ready travels backwards from output side, driven by downstream circuitry + assign out_ready[NUM_OUT_REGS] = out_ready_i; + + // Assign module outputs + assign tag_o = out_tag [NUM_OUT_REGS]; + assign aux_o = out_aux [NUM_OUT_REGS]; + assign is_vector_o = out_is_vector [NUM_OUT_REGS]; + assign out_valid_o = out_valid [NUM_OUT_REGS]; + assign lane_active_o = out_lane_active[NUM_OUT_REGS]; + + // Assign output Flags: Busy if any element inside the pipe is valid + assign busy_o = |in_valid | |out_valid | fsm_busy; + +endmodule diff --git a/src/fpnew_cast_multi.sv b/src/fpnew_cast_multi.sv index fca5f3b6..59827da4 100644 --- a/src/fpnew_cast_multi.sv +++ b/src/fpnew_cast_multi.sv @@ -21,8 +21,7 @@ module fpnew_cast_multi #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, + // Do not change localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig), fpnew_pkg::max_int_width(IntFmtConfig)), @@ -39,25 +38,14 @@ module fpnew_cast_multi #( input fpnew_pkg::fp_format_e src_fmt_i, input fpnew_pkg::fp_format_e dst_fmt_i, input fpnew_pkg::int_format_e int_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -117,12 +105,7 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -133,24 +116,14 @@ module fpnew_cast_multi #( assign inp_pipe_src_fmt_q[0] = src_fmt_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; assign inp_pipe_int_fmt_q[0] = int_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -160,9 +133,7 @@ module fpnew_cast_multi #( `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; @@ -318,9 +289,8 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e src_fmt_q2; fpnew_pkg::fp_format_e dst_fmt_q2; fpnew_pkg::int_format_e int_fmt_q2; - // Internal pipeline signals, index i holds signal after i register stages - + // Internal pipeline signals, index i holds signal after i register stages logic [0:NUM_MID_REGS] mid_pipe_input_sign_q; logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q; logic [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q; @@ -334,12 +304,7 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; fpnew_pkg::int_format_e [0:NUM_MID_REGS] mid_pipe_int_fmt_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_input_sign_q[0] = input_sign; @@ -355,25 +320,14 @@ module fpnew_cast_multi #( assign mid_pipe_src_fmt_q[0] = src_fmt_q; assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; assign mid_pipe_int_fmt_q[0] = int_fmt_q; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0) `FFL(mid_pipe_input_exp_q[i+1], mid_pipe_input_exp_q[i], reg_ena, '0) @@ -388,9 +342,7 @@ module fpnew_cast_multi #( `FFL(mid_pipe_src_fmt_q[i+1], mid_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(mid_pipe_int_fmt_q[i+1], mid_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign input_sign_q = mid_pipe_input_sign_q[NUM_MID_REGS]; @@ -749,52 +701,30 @@ module fpnew_cast_multi #( logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; logic [0:NUM_OUT_REGS] out_pipe_ext_bit_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; assign out_pipe_ext_bit_q[0] = extension_bit; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS]; - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv index ac23c43e..71dfe5b7 100644 --- a/src/fpnew_divsqrt_multi.sv +++ b/src/fpnew_divsqrt_multi.sv @@ -20,8 +20,6 @@ module fpnew_divsqrt_multi #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -34,30 +32,17 @@ module fpnew_divsqrt_multi #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input fpnew_pkg::fp_format_e dst_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - input logic vectorial_op_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - output logic divsqrt_done_o, - input logic simd_synch_done_i, - output logic divsqrt_ready_o, - input logic simd_synch_rdy_i, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic flush_i, + input logic[NumPipeRegs-1:0] reg_enable_i, + input logic fsm_start_i, + output logic fsm_ready_o ); // ---------- @@ -83,61 +68,39 @@ module fpnew_divsqrt_multi #( fpnew_pkg::roundmode_e rnd_mode_q; fpnew_pkg::operation_e op_q; fpnew_pkg::fp_format_e dst_fmt_q; - logic in_valid_q; // Input pipeline signals, index i holds signal after i register stages logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_vec_op_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_vec_op_q[0] = vectorial_op_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to upstream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) - `FFL(inp_pipe_vec_op_q[i+1], inp_pipe_vec_op_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; assign op_q = inp_pipe_op_q[NUM_INP_REGS]; assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; - assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; // ----------------- // Input processing @@ -164,126 +127,19 @@ module fpnew_divsqrt_multi #( divsqrt_operands[1] = input_is_fp8 ? operands_q[1] << 8 : operands_q[1]; end - // ------------ - // Control FSM - // ------------ - - logic in_ready; // input handshake with upstream logic div_valid, sqrt_valid; // input signalling with unit - logic unit_ready, unit_done, unit_done_q; // status signals from unit instance - logic op_starting; // high in the cycle a new operation starts - logic out_valid, out_ready; // output handshake with downstream - logic unit_busy; // valid data in flight - logic simd_synch_done; - // FSM states - typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; - fsm_state_e state_q, state_d; // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. - assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; - assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; - assign op_starting = div_valid | sqrt_valid; - - // Hold additional information while the operation is in progress - logic result_is_fp8_q; - TagType result_tag_q; - logic result_mask_q; - AuxType result_aux_q; - logic result_vec_op_q; - - // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_is_fp8_q, input_is_fp8, op_starting, '0) - `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) - `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0) - - // Wait for other lanes only if the operation is vectorial - assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q; - - // Valid synch with other lanes - // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes - // As soon as all the lanes are over, we can clear this FF and start with a new operation - `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni); - // Tell the other units that this unit has finished now or in the past - assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; - - // Ready synch with other lanes - // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes - assign divsqrt_ready_o = in_ready; - // Upstream ready comes from sanitization FSM, and it is synched among all the lanes - assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready; - - // FSM to safely apply and receive data from DIVSQRT unit - always_comb begin : flag_fsm - // Default assignments - in_ready = 1'b0; - out_valid = 1'b0; - unit_busy = 1'b0; - state_d = state_q; - - unique case (state_q) - // Waiting for work - IDLE: begin - in_ready = 1'b1; // we're ready - if (in_valid_q && unit_ready) begin // New work arrives - state_d = BUSY; // go into processing state - end - end - // Operation in progress - BUSY: begin - unit_busy = 1'b1; // data in flight - // If all the lanes are done with processing - if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin - out_valid = 1'b1; // try to commit result downstream - // If downstream accepts our result - if (out_ready) begin - state_d = IDLE; // we anticipate going back to idling.. - in_ready = 1'b1; // we acknowledge the instruction - if (in_valid_q && unit_ready) begin // ..unless new work comes in - state_d = BUSY; // and stay busy with it - end - // Otherwise if downstream is not ready for the result - end else begin - state_d = HOLD; // wait for the pipeline to take the data - end - end - end - // Waiting with valid result for downstream - HOLD: begin - unit_busy = 1'b1; // data in flight - out_valid = 1'b1; // try to commit result downstream - // If the result is accepted by downstream - if (out_ready) begin - state_d = IDLE; // go back to idle.. - if (in_valid_q && unit_ready) begin // ..unless new work comes in - in_ready = 1'b1; // acknowledge the new transaction - state_d = BUSY; // will be busy with the next instruction - end - end - end - // fall into idle state otherwise - default: state_d = IDLE; - endcase - - // Flushing overrides the other actions - if (flush_i) begin - unit_busy = 1'b0; // data is invalidated - out_valid = 1'b0; // cancel any valid data - state_d = IDLE; // go to default state - end - end - - // FSM status register (asynch active low reset) - `FF(state_q, state_d, IDLE) + assign div_valid = (op_q == fpnew_pkg::DIV) & fsm_start_i; + assign sqrt_valid = (op_q != fpnew_pkg::DIV) & fsm_start_i; // ----------------- // DIVSQRT instance // ----------------- - logic [63:0] unit_result; - logic [WIDTH-1:0] adjusted_result, held_result_q; - fpnew_pkg::status_t unit_status, held_status_q; - logic hold_en; + logic [63:0] raw_unit_result; + logic [WIDTH-1:0] unit_result; + logic unit_done; + fpnew_pkg::status_t unit_status; div_sqrt_top_mvp i_divsqrt_lei ( .Clk_CI ( clk_i ), @@ -296,30 +152,28 @@ module fpnew_divsqrt_multi #( .Precision_ctl_SI ( '0 ), .Format_sel_SI ( divsqrt_fmt ), .Kill_SI ( flush_i ), - .Result_DO ( unit_result ), + .Result_DO ( raw_unit_result ), .Fflags_SO ( unit_status ), - .Ready_SO ( unit_ready ), + .Ready_SO ( fsm_ready_o ), .Done_SO ( unit_done ) ); // Adjust result width and fix FP8 - assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result; + assign unit_result = input_is_fp8 ? raw_unit_result >> 8 : raw_unit_result; - // Hold the result when one lane has finished execution, except when all the lanes finish together, - // or the operation is not vectorial, and the result can be accepted downstream - assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready); - // The Hold register (load, no reset) - `FFLNR(held_result_q, adjusted_result, hold_en, clk_i) - `FFLNR(held_status_q, unit_status, hold_en, clk_i) + // ---------------- + // Hold Result + // ---------------- + logic [WIDTH-1:0] held_result, out_result; + fpnew_pkg::status_t held_status, out_status; + logic out_mask; + + `FFL(held_result, unit_result, unit_done, '0); + `FFL(held_status, unit_status, unit_done, '0); + `FFL(out_mask, inp_pipe_mask_q[NUM_INP_REGS], fsm_start_i, '0); // Mask is stored on start -> Dont need a bypass mux - // -------------- - // Output Select - // -------------- - logic [WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - // Prioritize hold register data - assign result_d = unit_done_q ? held_result_q : adjusted_result; - assign status_d = unit_done_q ? held_status_q : unit_status; + assign out_result = unit_done ? unit_result : held_result; + assign out_status = unit_done ? unit_status : held_status; // ---------------- // Output Pipeline @@ -327,50 +181,28 @@ module fpnew_divsqrt_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = result_d; - assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = result_tag_q; - assign out_pipe_mask_q[0] = result_mask_q; - assign out_pipe_aux_q[0] = result_aux_q; - assign out_pipe_valid_q[0] = out_valid; - // Input stage: Propagate pipeline ready signal to inside pipe - assign out_ready = out_pipe_ready[0]; + assign out_pipe_result_q[0] = out_result; + assign out_pipe_status_q[0] = out_status; + assign out_pipe_mask_q[0] = out_mask; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_divsqrt_th_32.sv b/src/fpnew_divsqrt_th_32.sv index 71d23068..f4f6bb44 100644 --- a/src/fpnew_divsqrt_th_32.sv +++ b/src/fpnew_divsqrt_th_32.sv @@ -23,8 +23,6 @@ module fpnew_divsqrt_th_32 #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = 32, localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -36,25 +34,17 @@ module fpnew_divsqrt_th_32 #( input logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic flush_i, + input logic[NumPipeRegs-1:0] reg_enable_i, + input logic fsm_start_i, + output logic fsm_ready_o ); // ---------- @@ -79,73 +69,45 @@ module fpnew_divsqrt_th_32 #( logic [1:0][WIDTH-1:0] operands_q; fpnew_pkg::roundmode_e rnd_mode_q; fpnew_pkg::operation_e op_q; - logic in_valid_q; // Input pipeline signals, index i holds signal after i register stages logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; assign op_q = inp_pipe_op_q[NUM_INP_REGS]; - assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; - // ------------ - // Control FSM - // ------------ - logic in_ready; // input handshake with upstream + // ----------------- + // Input processing + // ----------------- logic div_op, sqrt_op; // input signalling with unit - logic unit_ready_q, unit_done; // status signals from unit instance logic op_starting; // high in the cycle a new operation starts - logic out_valid, out_ready; // output handshake with downstream - logic hold_result; // whether to put result into hold register - logic data_is_held; // data in hold register is valid - logic unit_busy; // valid data in flight - // FSM states - typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; - fsm_state_e state_q, state_d; // Operations are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. - assign div_op = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; //in_ready delete, valid independent of ready - assign sqrt_op = in_valid_q & (op_q == fpnew_pkg::SQRT) & in_ready & ~flush_i; + assign div_op = (op_q == fpnew_pkg::DIV) & fsm_start_i; //in_ready delete, valid independent of ready + assign sqrt_op = (op_q == fpnew_pkg::SQRT) & fsm_start_i; assign op_starting = div_op | sqrt_op; //start computing or handshake, modify tb handshake right logic fdsu_fpu_ex1_stall, fdsu_fpu_ex1_stall_q; @@ -159,92 +121,11 @@ module fpnew_divsqrt_th_32 #( `FFL(div_op_q, div_op_d, 1'b1, '0) `FFL(sqrt_op_q, sqrt_op_d, 1'b1, '0) - // FSM to safely apply and receive data from DIVSQRT unit - always_comb begin : flag_fsm - // Default assignments - in_ready = 1'b0; - out_valid = 1'b0; - hold_result = 1'b0; - data_is_held = 1'b0; - unit_busy = 1'b0; - state_d = state_q; - inp_pipe_ready[NUM_INP_REGS] = unit_ready_q; - - unique case (state_q) - // Waiting for work - IDLE: begin - // in_ready = 1'b1; // we're ready - in_ready = unit_ready_q; //*** - if (in_valid_q && unit_ready_q) begin // New work arrives - inp_pipe_ready[NUM_INP_REGS] = unit_ready_q && !fdsu_fpu_ex1_stall; - state_d = BUSY; // go into processing state - end - end - // Operation in progress - BUSY: begin - inp_pipe_ready[NUM_INP_REGS] = fdsu_fpu_ex1_stall_q; - unit_busy = 1'b1; // data in flight - // If the unit is done with processing - if (unit_done) begin - out_valid = 1'b1; // try to commit result downstream - // If downstream accepts our result - if (out_ready) begin - state_d = IDLE; // we anticipate going back to idling.. - if (in_valid_q && unit_ready_q) begin // ..unless new work comes in - in_ready = 1'b1; // we acknowledge the instruction - state_d = BUSY; // and stay busy with it - end - // Otherwise if downstream is not ready for the result - end else begin - hold_result = 1'b1; // activate the hold register - state_d = HOLD; // wait for the pipeline to take the data - end - end - end - // Waiting with valid result for downstream - HOLD: begin - unit_busy = 1'b1; // data in flight - data_is_held = 1'b1; // data in hold register is valid - out_valid = 1'b1; // try to commit result downstream - // If the result is accepted by downstream - if (out_ready) begin - state_d = IDLE; // go back to idle.. - if (in_valid_q && unit_ready_q) begin // ..unless new work comes in - in_ready = 1'b1; // acknowledge the new transaction - state_d = BUSY; // will be busy with the next instruction - end - end - end - // fall into idle state otherwise - default: state_d = IDLE; - endcase - - // Flushing overrides the other actions - if (flush_i) begin - unit_busy = 1'b0; // data is invalidated - out_valid = 1'b0; // cancel any valid data - state_d = IDLE; // go to default state - end - end - - // FSM status register (asynch active low reset) - `FF(state_q, state_d, IDLE) - - // Hold additional information while the operation is in progress - TagType result_tag_q; - AuxType result_aux_q; - logic result_mask_q; - - // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - // ----------------- // DIVSQRT instance // ----------------- - logic [WIDTH-1:0] unit_result, held_result_q; - fpnew_pkg::status_t unit_status, held_status_q; + logic [WIDTH-1:0] unit_result; + fpnew_pkg::status_t unit_status; // thead define fdsu module's input and output logic ctrl_fdsu_ex1_sel; @@ -276,7 +157,8 @@ module fpnew_divsqrt_th_32 #( logic [4:0] fpu_idu_fwd_fflags; logic fpu_idu_fwd_vld; - logic unit_ready_d; + logic unit_done; // status signals from unit instance + logic unit_ready_d, unit_ready_q; // unit_ready_q related to state machine, different under special and normal cases. always_comb begin @@ -295,6 +177,8 @@ module fpnew_divsqrt_th_32 #( `FFL(unit_ready_q, unit_ready_d, 1'b1, 1'b1) + assign fsm_ready_o = unit_ready_q && !fdsu_fpu_ex1_stall; + // determine input of time to select operands always_comb begin ctrl_fdsu_ex1_sel = 1'b0; @@ -408,18 +292,23 @@ module fpnew_divsqrt_th_32 #( unit_done = fpu_idu_fwd_vld; end + // ---------------- + // Hold Result + // ---------------- + + // Hold additional information while the operation is in progress + logic [WIDTH-1:0] held_result, out_result; + fpnew_pkg::status_t held_status, out_status; + logic out_mask; + + // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) // The Hold register (load, no reset) - `FFLNR(held_result_q, unit_result, hold_result, clk_i) - `FFLNR(held_status_q, unit_status, hold_result, clk_i) - - // -------------- - // Output Select - // -------------- - logic [WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - // Prioritize hold register data - assign result_d = data_is_held ? held_result_q : unit_result; - assign status_d = data_is_held ? held_status_q : unit_status; + `FFL(held_result, unit_result, unit_done, '0); + `FFL(held_status, unit_status, unit_done, '0); + `FFL(out_mask, inp_pipe_mask_q[NUM_INP_REGS], op_starting, '0) + + assign out_result = unit_done ? unit_result : held_result; + assign out_status = unit_done ? unit_status : held_status; // ---------------- // Output Pipeline @@ -427,50 +316,29 @@ module fpnew_divsqrt_th_32 #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = result_d; - assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = result_tag_q; - assign out_pipe_mask_q[0] = result_mask_q; - assign out_pipe_aux_q[0] = result_aux_q; - assign out_pipe_valid_q[0] = out_valid; - // Input stage: Propagate pipeline ready signal to inside pipe - assign out_ready = out_pipe_ready[0]; + assign out_pipe_result_q[0] = out_result; + assign out_pipe_status_q[0] = out_status; + assign out_pipe_mask_q[0] = out_mask; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); + endmodule diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv index eff0620d..fd6f3fdb 100644 --- a/src/fpnew_divsqrt_th_64_multi.sv +++ b/src/fpnew_divsqrt_th_64_multi.sv @@ -13,7 +13,7 @@ // Authors: Stefan Mach // Roman Marquart - +// Maurus Item `include "common_cells/registers.svh" @@ -22,8 +22,6 @@ module fpnew_divsqrt_th_64_multi #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -36,30 +34,17 @@ module fpnew_divsqrt_th_64_multi #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input fpnew_pkg::fp_format_e dst_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - input logic vectorial_op_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - output logic divsqrt_done_o, - input logic simd_synch_done_i, - output logic divsqrt_ready_o, - input logic simd_synch_rdy_i, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic flush_i, + input logic[NumPipeRegs-1:0] reg_enable_i, + input logic fsm_start_i, + output logic fsm_ready_o ); // ---------- @@ -85,61 +70,39 @@ module fpnew_divsqrt_th_64_multi #( fpnew_pkg::roundmode_e rnd_mode_q; fpnew_pkg::operation_e op_q; fpnew_pkg::fp_format_e dst_fmt_q; - logic in_valid_q; // Input pipeline signals, index i holds signal after i register stages logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_vec_op_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_vec_op_q[0] = vectorial_op_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to upstream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) - `FFL(inp_pipe_vec_op_q[i+1], inp_pipe_vec_op_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; assign op_q = inp_pipe_op_q[NUM_INP_REGS]; assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; - assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; // ----------------- // Input processing @@ -175,124 +138,19 @@ module fpnew_divsqrt_th_64_multi #( $fatal(1, "DivSqrt THMULTI: Unsupported WIDTH (the supported width are 64, 32, 16)"); end - // ------------ - // Control FSM - // ------------ - - logic in_ready; // input handshake with upstream logic div_valid, sqrt_valid; // input signalling with unit - logic unit_ready, unit_done, unit_done_q; // status signals from unit instance - logic op_starting; // high in the cycle a new operation starts - logic out_valid, out_ready; // output handshake with downstream - logic unit_busy; // valid data in flight - logic simd_synch_done; - // FSM states - typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; - fsm_state_e state_q, state_d; // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. - assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; - assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; - assign op_starting = div_valid | sqrt_valid; - - // Hold additional information while the operation is in progress - - TagType result_tag_q; - logic result_mask_q; - AuxType result_aux_q; - logic result_vec_op_q; - - // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) - `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0) - - // Wait for other lanes only if the operation is vectorial - assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q; - - // Valid synch with other lanes - // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes - // As soon as all the lanes are over, we can clear this FF and start with a new operation - `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni); - // Tell the other units that this unit has finished now or in the past - assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; - - // Ready synch with other lanes - // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes - assign divsqrt_ready_o = in_ready; - // Upstream ready comes from sanitization FSM, and it is synched among all the lanes - assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready; - - // FSM to safely apply and receive data from DIVSQRT unit - always_comb begin : flag_fsm - // Default assignments - in_ready = 1'b0; - out_valid = 1'b0; - unit_busy = 1'b0; - state_d = state_q; - - unique case (state_q) - // Waiting for work - IDLE: begin - in_ready = 1'b1; // we're ready - if (in_valid_q && unit_ready) begin // New work arrives - state_d = BUSY; // go into processing state - end - end - // Operation in progress - BUSY: begin - unit_busy = 1'b1; // data in flight - // If all the lanes are done with processing - if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin - out_valid = 1'b1; // try to commit result downstream - // If downstream accepts our result - if (out_ready) begin - state_d = IDLE; // we anticipate going back to idling.. - in_ready = 1'b1; // we acknowledge the instruction - if (in_valid_q && unit_ready) begin // ..unless new work comes in - state_d = BUSY; // and stay busy with it - end - // Otherwise if downstream is not ready for the result - end else begin - state_d = HOLD; // wait for the pipeline to take the data - end - end - end - // Waiting with valid result for downstream - HOLD: begin - unit_busy = 1'b1; // data in flight - out_valid = 1'b1; // try to commit result downstream - // If the result is accepted by downstream - if (out_ready) begin - state_d = IDLE; // go back to idle.. - if (in_valid_q && unit_ready) begin // ..unless new work comes in - in_ready = 1'b1; // acknowledge the new transaction - state_d = BUSY; // will be busy with the next instruction - end - end - end - // fall into idle state otherwise - default: state_d = IDLE; - endcase - - // Flushing overrides the other actions - if (flush_i) begin - unit_busy = 1'b0; // data is invalidated - out_valid = 1'b0; // cancel any valid data - state_d = IDLE; // go to default state - end - end - - // FSM status register (asynch active low reset) - `FF(state_q, state_d, IDLE) + assign div_valid = (op_q == fpnew_pkg::DIV) & fsm_start_i; + assign sqrt_valid = (op_q != fpnew_pkg::DIV) & fsm_start_i; // ----------------- // DIVSQRT instance // ----------------- - logic [63:0] unit_result, held_result_q; - fpnew_pkg::status_t unit_status, held_status_q; - logic hold_en; + logic unit_done; // Unit output is valid and should be saved + + logic [63:0] unit_result; + fpnew_pkg::status_t unit_status; logic vfdsu_dp_fdiv_busy; @@ -305,11 +163,11 @@ module fpnew_divsqrt_th_64_multi #( logic [63:0] srcf0, srcf1; // Save operands in regs, C910 saves all the following information in its regs in the next cycle. - `FFL(rm_q, rnd_mode_q, op_starting, fpnew_pkg::RNE) - `FFL(divsqrt_fmt_q, divsqrt_fmt, op_starting, '0) - `FFL(divsqrt_op_q, op_q, op_starting, fpnew_pkg::DIV) - `FFL(srcf0_q, operands_q[0], op_starting, '0) - `FFL(srcf1_q, operands_q[1], op_starting, '0) + `FFL(rm_q, rnd_mode_q, fsm_start_i, fpnew_pkg::RNE) + `FFL(divsqrt_fmt_q, divsqrt_fmt, fsm_start_i, '0) + `FFL(divsqrt_op_q, op_q, fsm_start_i, fpnew_pkg::DIV) + `FFL(srcf0_q, operands_q[0], fsm_start_i, '0) + `FFL(srcf1_q, operands_q[1], fsm_start_i, '0) // NaN-box inputs with max WIDTH if(WIDTH == 64) begin : gen_fmt_64_bits @@ -370,7 +228,7 @@ module fpnew_divsqrt_th_64_multi #( // Select func 1 cycle after div issue logic func_sel; - `FFLARNC(func_sel, 1'b1, op_starting, func_sel, 1'b0, clk_i, rst_ni) + `FFLARNC(func_sel, 1'b1, fsm_start_i, func_sel, 1'b0, clk_i, rst_ni) // Select operands 2 cycles after div issue logic op_sel; @@ -388,7 +246,7 @@ module fpnew_divsqrt_th_64_multi #( .dp_vfdsu_ex1_pipex_srcf0 ( srcf0 ), // Input for operand 0 .dp_vfdsu_ex1_pipex_srcf1 ( srcf1 ), // Input for operand 1 .dp_vfdsu_fdiv_gateclk_issue ( 1'b1 ), // Local clock enable (same as above) - .dp_vfdsu_idu_fdiv_issue ( op_starting ), // 1. Issue fdiv (FSM in ctrl) + .dp_vfdsu_idu_fdiv_issue ( fsm_start_i ), // 1. Issue fdiv (FSM in ctrl) .forever_cpuclk ( clk_i ), // Clock input .idu_vfpu_rf_pipex_func ( {3'b0, divsqrt_fmt_q, 13'b0 ,sqrt_op, div_op} ), // Defines format (bits 16,15) and operation (bits 1,0) .idu_vfpu_rf_pipex_gateclk_sel ( func_sel ), // 2. Select func @@ -408,23 +266,21 @@ module fpnew_divsqrt_th_64_multi #( .vfdsu_ifu_debug_pipe_busy ( ) // Debug output ); - assign unit_ready = !vfdsu_dp_fdiv_busy; + assign fsm_ready_o = !vfdsu_dp_fdiv_busy; - // Hold the result when one lane has finished execution, except when all the lanes finish together, - // or the operation is not vectorial, and the result can be accepted downstream - assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready); - // The Hold register (load, no reset) - `FFLNR(held_result_q, unit_result, hold_en, clk_i) - `FFLNR(held_status_q, unit_status, hold_en, clk_i) + // ---------------- + // Hold Result + // ---------------- + logic [63:0] held_result, out_result; + fpnew_pkg::status_t held_status, out_status; + logic out_mask; - // -------------- - // Output Select - // -------------- - logic [WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - // Prioritize hold register data - assign result_d[WIDTH-1:0] = unit_done_q ? held_result_q[WIDTH-1:0] : unit_result[WIDTH-1:0]; - assign status_d = unit_done_q ? held_status_q : unit_status; + `FFL(held_result, unit_result, unit_done, '0); + `FFL(held_status, unit_status, unit_done, '0); + `FFL(out_mask, inp_pipe_mask_q[NUM_INP_REGS], fsm_start_i, '0); // Mask is stored on start -> Dont need a bypass mux + + assign out_result = unit_done ? unit_result : held_result; + assign out_status = unit_done ? unit_status : held_status; // ---------------- // Output Pipeline @@ -432,51 +288,29 @@ module fpnew_divsqrt_th_64_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = result_d; - assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = result_tag_q; - assign out_pipe_mask_q[0] = result_mask_q; - assign out_pipe_aux_q[0] = result_aux_q; - assign out_pipe_valid_q[0] = out_valid; - // Input stage: Propagate pipeline ready signal to inside pipe - assign out_ready = out_pipe_ready[0]; + assign out_pipe_result_q[0] = out_result; + assign out_pipe_status_q[0] = out_status; + assign out_pipe_mask_q[0] = out_mask; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); -endmodule +endmodule diff --git a/src/fpnew_fma.sv b/src/fpnew_fma.sv index d725a5d1..c6ef899a 100644 --- a/src/fpnew_fma.sv +++ b/src/fpnew_fma.sv @@ -19,8 +19,6 @@ module fpnew_fma #( parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change ) ( @@ -32,25 +30,14 @@ module fpnew_fma #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input logic op_mod_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -105,12 +92,7 @@ module fpnew_fma #( fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -118,33 +100,21 @@ module fpnew_fma #( assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_op_mod_q[0] = op_mod_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // ----------------- @@ -412,12 +382,7 @@ module fpnew_fma #( logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_eff_sub_q[0] = effective_subtraction; @@ -432,25 +397,14 @@ module fpnew_fma #( assign mid_pipe_res_is_spec_q[0] = result_is_special; assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) @@ -464,10 +418,9 @@ module fpnew_fma #( `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; @@ -647,50 +600,28 @@ module fpnew_fma #( // Output pipeline signals, index i holds signal after i register stages fp_t [0:NUM_OUT_REGS] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_fma_multi.sv b/src/fpnew_fma_multi.sv index e2320846..77886424 100644 --- a/src/fpnew_fma_multi.sv +++ b/src/fpnew_fma_multi.sv @@ -19,8 +19,6 @@ module fpnew_fma_multi #( parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -35,25 +33,14 @@ module fpnew_fma_multi #( input logic op_mod_i, input fpnew_pkg::fp_format_e src_fmt_i, // format of the multiplicands input fpnew_pkg::fp_format_e dst_fmt_i, // format of the addend and result - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -118,12 +105,7 @@ module fpnew_fma_multi #( logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -133,24 +115,14 @@ module fpnew_fma_multi #( assign inp_pipe_op_mod_q[0] = op_mod_i; assign inp_pipe_src_fmt_q[0] = src_fmt_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -159,9 +131,7 @@ module fpnew_fma_multi #( `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; @@ -497,12 +467,7 @@ module fpnew_fma_multi #( logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_eff_sub_q[0] = effective_subtraction; @@ -518,25 +483,14 @@ module fpnew_fma_multi #( assign mid_pipe_res_is_spec_q[0] = result_is_special; assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) @@ -551,9 +505,7 @@ module fpnew_fma_multi #( `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; @@ -796,50 +748,28 @@ module fpnew_fma_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_noncomp.sv b/src/fpnew_noncomp.sv index 8a182617..afd4721c 100644 --- a/src/fpnew_noncomp.sv +++ b/src/fpnew_noncomp.sv @@ -19,8 +19,6 @@ module fpnew_noncomp #( parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change ) ( @@ -32,27 +30,16 @@ module fpnew_noncomp #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input logic op_mod_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, output fpnew_pkg::classmask_e class_mask_o, output logic is_class_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -90,12 +77,7 @@ module fpnew_noncomp #( fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -103,33 +85,21 @@ module fpnew_noncomp #( assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_op_mod_q[0] = op_mod_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // --------------------- @@ -358,12 +328,7 @@ module fpnew_noncomp #( logic [0:NUM_OUT_REGS] out_pipe_extension_bit_q; fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q; logic [0:NUM_OUT_REGS] out_pipe_is_class_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; @@ -371,45 +336,28 @@ module fpnew_noncomp #( assign out_pipe_extension_bit_q[0] = extension_bit_d; assign out_pipe_class_mask_q[0] = class_mask_d; assign out_pipe_is_class_q[0] = is_class_d; - assign out_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign out_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign out_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign out_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0) `FFL(out_pipe_class_mask_q[i+1], out_pipe_class_mask_q[i], reg_ena, fpnew_pkg::QNAN) `FFL(out_pipe_is_class_q[i+1], out_pipe_is_class_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS]; assign class_mask_o = out_pipe_class_mask_q[NUM_OUT_REGS]; assign is_class_o = out_pipe_is_class_q[NUM_OUT_REGS]; - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_opgroup_fmt_slice.sv b/src/fpnew_opgroup_fmt_slice.sv index 60353f21..47c3384a 100644 --- a/src/fpnew_opgroup_fmt_slice.sv +++ b/src/fpnew_opgroup_fmt_slice.sv @@ -58,9 +58,7 @@ module fpnew_opgroup_fmt_slice #( localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat); localparam int unsigned SIMD_WIDTH = unsigned'(Width/NUM_LANES); - localparam int unsigned AUX_BITS = 2; - logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes logic vectorial_op, cmp_op; logic [NUM_LANES*FP_WIDTH-1:0] slice_result; @@ -70,10 +68,8 @@ module fpnew_opgroup_fmt_slice #( fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask; - TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used logic [NUM_LANES-1:0] lane_masks; - logic [NUM_LANES-1:0] lane_busy, lane_is_class; // dito - logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // dito + logic [NUM_LANES-1:0] lane_is_class; // only the first one is actually used logic result_is_vector, result_is_class, result_is_cmp; @@ -84,11 +80,43 @@ module fpnew_opgroup_fmt_slice #( // ----------- // RSR supported only on SDOTP module assign rnd_mode = (rnd_mode_i == fpnew_pkg::RSR) ? fpnew_pkg::RNE : rnd_mode_i; - - assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled assign cmp_op = (op_i == fpnew_pkg::CMP); + // --------------- + // Generate Aux Chain + // --------------- + // Signals to transmit reg enable to other modules + logic [NUM_LANES-1:0] in_lane_active, out_lane_active; + logic [NUM_LANES-1:0][NumPipeRegs-1:0] lane_reg_enable; + + fpnew_aux #( + .NumPipeRegs( NumPipeRegs ), + .TagType ( TagType ), + .AuxType ( logic ), + .NumLanes ( NUM_LANES ) + ) i_aux ( + .clk_i, + .rst_ni, + .tag_i, + .aux_i ( cmp_op ), + .is_vector_i ( vectorial_op ), + .lane_active_i ( in_lane_active ), + .in_valid_i, + .in_ready_o, + .flush_i, + .tag_o, + .aux_o ( result_is_cmp ), + .is_vector_o ( result_is_vector ), + .lane_active_o ( out_lane_active ), + .out_valid_o, + .out_ready_i, + .busy_o, + .reg_enable_o ( /* Unused */ ), + .vector_reg_enable_o ( /* Unused */ ), + .lane_reg_enable_o ( lane_reg_enable ) + ); + // --------------- // Generate Lanes // --------------- @@ -98,15 +126,13 @@ module fpnew_opgroup_fmt_slice #( // Generate instances only if needed, lane 0 always generated if ((lane == 0) || EnableVectors) begin : active_lane - logic in_valid, out_valid, out_ready; // lane-local handshake logic [NUM_OPERANDS-1:0][FP_WIDTH-1:0] local_operands; // lane-local operands logic [FP_WIDTH-1:0] op_result; // lane-local results fpnew_pkg::status_t op_status; - logic [AUX_BITS-1:0] local_aux_data_input; - assign local_aux_data_input = {vectorial_op, cmp_op}; - assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors + assign in_lane_active[lane] = (lane == 0) | vectorial_op; // upper lanes only for vectors + // Slice out the operands for this lane always_comb begin : prepare_input for (int i = 0; i < int'(NUM_OPERANDS); i++) begin @@ -119,116 +145,58 @@ module fpnew_opgroup_fmt_slice #( fpnew_fma #( .FpFormat ( FpFormat ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fma ( .clk_i, .rst_ni, .operands_i ( local_operands ), .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), - .rnd_mode_i ( rnd_mode ), + .rnd_mode_i ( rnd_mode ), .op_i, .op_mod_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( local_aux_data_input ), // Remember whether operation was vectorial - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable[lane] ) ); assign lane_is_class[lane] = 1'b0; assign lane_class_mask[lane] = fpnew_pkg::NEGINF; - end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance - // fpnew_divsqrt #( - // .FpFormat (FpFormat), - // .NumPipeRegs(NumPipeRegs), - // .PipeConfig (PipeConfig), - // .TagType (TagType), - // .AuxType (logic) - // ) i_divsqrt ( - // .clk_i, - // .rst_ni, - // .operands_i ( local_operands ), - // .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), - // .rnd_mode_i ( rnd_mode ), - // .op_i, - // .op_mod_i, - // .tag_i, - // .aux_i ( vectorial_op ), // Remember whether operation was vectorial - // .in_valid_i ( in_valid ), - // .in_ready_o ( lane_in_ready[lane] ), - // .flush_i, - // .result_o ( op_result ), - // .status_o ( op_status ), - // .extension_bit_o ( lane_ext_bit[lane] ), - // .tag_o ( lane_tags[lane] ), - // .aux_o ( lane_aux[lane] ), - // .out_valid_o ( out_valid ), - // .out_ready_i ( out_ready ), - // .busy_o ( lane_busy[lane] ) - // ); - // assign lane_is_class[lane] = 1'b0; end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance fpnew_noncomp #( .FpFormat ( FpFormat ), .NumPipeRegs( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_noncomp ( .clk_i, .rst_ni, .operands_i ( local_operands ), .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), - .rnd_mode_i ( rnd_mode ), + .rnd_mode_i ( rnd_mode ), .op_i, .op_mod_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( local_aux_data_input ), // Remember whether operation was vectorial - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .class_mask_o ( lane_class_mask[lane] ), - .is_class_o ( lane_is_class[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .class_mask_o ( lane_class_mask[lane] ), + .is_class_o ( lane_is_class[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable[lane] ) ); end // ADD OTHER OPTIONS HERE - // Handshakes are only done if the lane is actually used - assign out_ready = out_ready_i & ((lane == 0) | result_is_vector); - assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); - // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; - assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + assign local_result = out_lane_active[lane] ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = out_lane_active[lane] ? op_status : '0; // Otherwise generate constant sign-extension end else begin - assign lane_out_valid[lane] = 1'b0; // unused lane - assign lane_in_ready[lane] = 1'b0; // unused lane assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box assign lane_status[lane] = '0; - assign lane_busy[lane] = 1'b0; assign lane_is_class[lane] = 1'b0; + assign in_lane_active[lane] = 1'b0; // Lane does not exist, it can never be active end // Insert lane result into slice result @@ -267,8 +235,6 @@ module fpnew_opgroup_fmt_slice #( // ------------ // Output Side // ------------ - assign result_is_vector = lane_aux[0][1]; - assign result_is_cmp = lane_aux[0][0]; assign result_is_class = lane_is_class[0]; assign slice_regular_result = $signed({extension_bit_o, slice_result}); @@ -294,11 +260,7 @@ module fpnew_opgroup_fmt_slice #( assign result_o = result_is_class ? slice_class_result : slice_regular_result; end - assign extension_bit_o = lane_ext_bit[0]; // upper lanes unused - assign tag_o = lane_tags[0]; // upper lanes unused - assign busy_o = (| lane_busy); - assign out_valid_o = lane_out_valid[0]; // upper lanes unused - + assign extension_bit_o = lane_ext_bit[0]; // upper lanes unused // Collapse the lane status always_comb begin : output_processing diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index ff6f1a14..390b918c 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -91,12 +91,11 @@ or on 16b inputs producing 32b outputs"); // We will send the format information along with the data localparam int unsigned FMT_BITS = fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS)); - localparam int unsigned AUX_BITS = FMT_BITS + 4; // also add vectorial and integer flags + localparam int unsigned AUX_BITS = FMT_BITS + 3; // add integer flags - logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid, divsqrt_done, divsqrt_ready; // Handshake signals for the lanes logic vectorial_op; logic [FMT_BITS-1:0] dst_fmt; // destination format to pass along with operation - logic [AUX_BITS-1:0] aux_data; + logic [AUX_BITS-1:0] in_aux, out_aux; // aux signals to pass along with the operation // additional flags for CONV logic dst_fmt_is_int, dst_is_cpk; @@ -113,12 +112,9 @@ or on 16b inputs producing 32b outputs"); fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used - TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used logic [NUM_LANES-1:0] lane_masks; - logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used - logic [NUM_LANES-1:0] lane_busy; // dito - logic result_is_vector, result_is_vsum, op_is_vsum; + logic result_is_vsum, op_is_vsum; logic [FMT_BITS-1:0] result_fmt; logic result_fmt_is_int, result_is_cpk; logic [1:0] result_vec_op; // info for vectorial results (for packing) @@ -132,7 +128,6 @@ or on 16b inputs producing 32b outputs"); // RSR supported only on SDOTP module assign rnd_mode = (rnd_mode_i == fpnew_pkg::RSR) ? fpnew_pkg::RNE : rnd_mode_i; - assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled // Cast-and-Pack ops are encoded in operation and modifier @@ -149,7 +144,7 @@ or on 16b inputs producing 32b outputs"); assign dst_fmt = dst_fmt_is_int ? int_fmt_i : dst_fmt_i; // The data sent along consists of the vectorial flag and format bits - assign aux_data = {dst_is_cpk, dst_fmt_is_int, vectorial_op, dst_fmt, op_is_vsum}; + assign in_aux = {dst_is_cpk, dst_fmt_is_int, dst_fmt, op_is_vsum}; assign target_aux_d = dst_vec_op; // CONV passes one operand for assembly after the unit: opC for cpk, opB for others @@ -170,6 +165,74 @@ or on 16b inputs producing 32b outputs"); end end + // --------------- + // Generate Aux Chain + // --------------- + // Signals to transmit reg enable to other modules + logic [NumPipeRegs-1:0] vector_reg_enable; + + logic [NUM_LANES-1:0] in_lane_active, out_lane_active, lane_fsm_ready, lane_fsm_start; + logic [NUM_LANES-1:0][NumPipeRegs-1:0] lane_reg_enabe; + + if (OpGroup == fpnew_pkg::DIVSQRT) begin: gen_fsm_aux + fpnew_aux_fsm #( + .NumPipeRegs( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ), + .NumLanes ( NUM_LANES ) + ) i_aux_fsm ( + .clk_i, + .rst_ni, + .tag_i, + .aux_i ( in_aux ), + .is_vector_i ( vectorial_op ), + .lane_active_i ( in_lane_active ), + .in_valid_i, + .in_ready_o, + .flush_i, + .tag_o, + .aux_o ( out_aux ), + .is_vector_o ( /* Unused */ ), + .lane_active_o ( out_lane_active ), + .out_valid_o, + .out_ready_i, + .busy_o, + .reg_enable_o ( /* Unused */ ), + .vector_reg_enable_o ( vector_reg_enable ), + .lane_reg_enable_o ( lane_reg_enabe ), + .lane_fsm_start_o ( lane_fsm_start ), + .lane_fsm_ready_i ( lane_fsm_ready ) + ); + end else begin: gen_direct_aux + fpnew_aux #( + .NumPipeRegs( NumPipeRegs ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ), + .NumLanes ( NUM_LANES ) + ) i_aux ( + .clk_i, + .rst_ni, + .tag_i, + .aux_i ( in_aux ), + .is_vector_i ( vectorial_op ), + .lane_active_i ( in_lane_active ), + .in_valid_i, + .in_ready_o, + .flush_i, + .tag_o, + .aux_o ( out_aux ), + .is_vector_o ( /* Unused */ ), + .lane_active_o ( out_lane_active ), + .out_valid_o, + .out_ready_i, + .busy_o, + .reg_enable_o ( /* Unused */ ), + .vector_reg_enable_o ( vector_reg_enable ), + .lane_reg_enable_o ( lane_reg_enabe ) + ); + end + // --------------- // Generate Lanes // --------------- @@ -207,16 +270,17 @@ or on 16b inputs producing 32b outputs"); // Generate instances only if needed, lane 0 always generated if ((lane == 0) || (EnableVectors & (!(OpGroup == fpnew_pkg::DOTP && (lane >= NUM_DOTP_LANES)) && !(OpGroup == fpnew_pkg::DIVSQRT && (lane >= NUM_DIVSQRT_LANES))))) begin : active_lane - logic in_valid, out_valid, out_ready; // lane-local handshake logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands; // lane-local oprands logic [LANE_WIDTH-1:0] op_result; // lane-local results fpnew_pkg::status_t op_status; - logic lane_is_used; - assign lane_is_used = (LANE_FORMATS[src_fmt_i] & ~is_up_cast) | - (LANE_FORMATS[dst_fmt_i] & is_up_cast) | (OpGroup == fpnew_pkg::DIVSQRT); - assign in_valid = in_valid_i & ((lane == 0) | vectorial_op) & lane_is_used; // upper lanes only for vectors + // Figure out if lane is active e.g. should be used + assign in_lane_active[lane] = ( + (LANE_FORMATS[src_fmt_i] & ~is_up_cast) | + (LANE_FORMATS[dst_fmt_i] & is_up_cast) | + (OpGroup == fpnew_pkg::DIVSQRT) + ) & ((lane == 0) | vectorial_op); // Slice out the operands for this lane, upper bits are ignored in the unit always_comb begin : prepare_input @@ -256,9 +320,7 @@ or on 16b inputs producing 32b outputs"); fpnew_fma_multi #( .FpFmtConfig ( LANE_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_fma_multi ( .clk_i, .rst_ni, @@ -269,30 +331,19 @@ or on 16b inputs producing 32b outputs"); .op_mod_i, .src_fmt_i, .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ) ); end else if (OpGroup == fpnew_pkg::DOTP) begin : lane_instance fpnew_sdotp_multi_wrapper #( - .LaneWidth ( LANE_WIDTH ), - .FpFmtConfig ( LANE_FORMATS ), // fp64 and fp32 not supported - .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ), + .LaneWidth ( LANE_WIDTH ), + .FpFmtConfig ( LANE_FORMATS ), // fp64 and fp32 not supported + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), .StochasticRndImplementation ( StochasticRndImplementation ) ) i_fpnew_sdotp_multi_wrapper ( .clk_i, @@ -305,136 +356,91 @@ or on 16b inputs producing 32b outputs"); .op_mod_i, .src_fmt_i, .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ) ); + end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance + end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance - if (DivSqrtSel == fpnew_pkg::TH32 && LANE_FORMATS[0] && (LANE_FORMATS[1:fpnew_pkg::NUM_FP_FORMATS-1] == '0)) begin : gen_th32_e906_divsqrt + if (DivSqrtSel == fpnew_pkg::TH32 && LANE_FORMATS[0] && (LANE_FORMATS[1:fpnew_pkg::NUM_FP_FORMATS-1] == '0)) begin : gen_th32_e906_divsqrt // The T-head-based DivSqrt unit is supported only in FP32-only configurations fpnew_divsqrt_th_32 #( .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_divsqrt_multi_th ( .clk_i, .rst_ni, - .operands_i ( local_operands[1:0] ), // 2 operands - .is_boxed_i ( is_boxed_2op ), // 2 operands - .rnd_mode_i ( rnd_mode ), + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i ( rnd_mode ), .op_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), + .mask_i ( simd_mask_i[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ), + .fsm_start_i ( lane_fsm_start[lane] ), + .fsm_ready_o ( lane_fsm_ready[lane] ) ); end else if(DivSqrtSel == fpnew_pkg::THMULTI) begin : gen_thmulti_c910_divsqrt fpnew_divsqrt_th_64_multi #( .FpFmtConfig ( LANE_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_divsqrt_th_64_c910 ( - .clk_i, + .clk_i, .rst_ni, - .operands_i ( local_operands[1:0] ), // 2 operands - .is_boxed_i ( is_boxed_2op ), // 2 operands - .rnd_mode_i ( rnd_mode ), + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i ( rnd_mode ), .op_i, .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .vectorial_op_i ( vectorial_op ), // synchronize only vectorial operations - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .divsqrt_done_o ( divsqrt_done[lane] ), - .simd_synch_done_i( simd_synch_done ), - .divsqrt_ready_o ( divsqrt_ready[lane] ), - .simd_synch_rdy_i ( simd_synch_rdy ), + .mask_i ( simd_mask_i[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ), + .fsm_start_i ( lane_fsm_start[lane] ), + .fsm_ready_o ( lane_fsm_ready[lane] ) ); end else begin : gen_pulp_divsqrt fpnew_divsqrt_multi #( .FpFmtConfig ( LANE_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_divsqrt_multi ( .clk_i, .rst_ni, - .operands_i ( local_operands[1:0] ), // 2 operands - .is_boxed_i ( is_boxed_2op ), // 2 operands - .rnd_mode_i ( rnd_mode ), + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i ( rnd_mode ), .op_i, .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .vectorial_op_i ( vectorial_op ), // synchronize only vectorial operations - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .divsqrt_done_o ( divsqrt_done[lane] ), - .simd_synch_done_i( simd_synch_done ), - .divsqrt_ready_o ( divsqrt_ready[lane] ), - .simd_synch_rdy_i ( simd_synch_rdy ), + .mask_i ( simd_mask_i[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ), + .fsm_start_i ( lane_fsm_start[lane] ), + .fsm_ready_o ( lane_fsm_ready[lane] ) ); end - end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance - end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance fpnew_cast_multi #( .FpFmtConfig ( LANE_FORMATS ), .IntFmtConfig ( CONV_INT_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_cast_multi ( .clk_i, .rst_ni, @@ -446,45 +452,32 @@ or on 16b inputs producing 32b outputs"); .src_fmt_i, .dst_fmt_i, .int_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enabe[lane] ) ); end // ADD OTHER OPTIONS HERE - // Handshakes are only done if the lane is actually used - assign out_ready = out_ready_i & ((lane == 0) | result_is_vector); - assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); + // Guard against accidentally using the wrong aux module + if (OpGroup != fpnew_pkg::DIVSQRT) begin : lane_fsm_guard + assign lane_fsm_ready[lane] = 1'b0; // Lane does not have a FSM, it can not be ready! + end // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = lane_out_valid[lane] ? op_result : {(LANE_WIDTH){lane_ext_bit[0]}}; - assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + assign local_result = out_lane_active[lane] ? op_result: '{default: lane_ext_bit[0]}; + assign lane_status[lane] = out_lane_active[lane] ? op_status : '0; // Otherwise generate constant sign-extension end else begin : inactive_lane - assign lane_out_valid[lane] = 1'b0; // unused lane - assign lane_in_ready[lane] = 1'b0; // unused lane - assign lane_aux[lane] = 1'b0; // unused lane assign lane_masks[lane] = 1'b1; // unused lane - assign lane_tags[lane] = 1'b0; // unused lane - assign divsqrt_done[lane] = 1'b0; // unused lane - assign divsqrt_ready[lane] = 1'b0; // unused lane assign lane_ext_bit[lane] = 1'b1; // NaN-box unused lane assign local_result = {(LANE_WIDTH){lane_ext_bit[0]}}; // sign-extend/nan box assign lane_status[lane] = '0; - assign lane_busy[lane] = 1'b0; + assign in_lane_active[lane] = 1'b0; // Lane does not exist, it can never be active + assign lane_fsm_ready[lane] = 1'b1; // Lane does not exist, it is always ready just in case erronous data gets to the FSM in this slot end // Generate result packing depending on float format @@ -569,32 +562,22 @@ or on 16b inputs producing 32b outputs"); // Bypass pipeline signals, index i holds signal after i register stages logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q; logic [0:NumPipeRegs][1:0] byp_pipe_aux_q; - logic [0:NumPipeRegs] byp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NumPipeRegs] byp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign byp_pipe_target_q[0] = conv_target_d; assign byp_pipe_aux_q[0] = target_aux_d; - assign byp_pipe_valid_q[0] = in_valid_i & vectorial_op; + // Generate the register stages for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline - // Internal register enable for this stage - logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i]; + // Internal register enable for this stage + logic reg_ena; + // Enable register is set externally + assign reg_ena = vector_reg_enable[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0) `FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector; + // Output stage: assign module outputs assign conv_target_q = byp_pipe_target_q[NumPipeRegs]; @@ -626,20 +609,10 @@ or on 16b inputs producing 32b outputs"); assign conv_target_q = '0; end - if ((DivSqrtSel != fpnew_pkg::TH32) && (OpGroup == fpnew_pkg::DIVSQRT)) begin - // Synch lanes if there is more than one - assign simd_synch_rdy = EnableVectors ? &divsqrt_ready[NUM_DIVSQRT_LANES-1:0] : divsqrt_ready[0]; - assign simd_synch_done = EnableVectors ? &divsqrt_done[NUM_DIVSQRT_LANES-1:0] : divsqrt_done[0]; - end else begin - // Unused (TH32 divider only supported for scalar FP32 divsqrt) - assign simd_synch_rdy = '0; - assign simd_synch_done = '0; - end - // ------------ // Output Side // ------------ - assign {result_is_cpk, result_fmt_is_int, result_is_vector, result_fmt, result_is_vsum} = lane_aux[0]; + assign {result_is_cpk, result_fmt_is_int, result_fmt, result_is_vsum} = out_aux; assign result_o = result_fmt_is_int ? ifmt_slice_result[result_fmt] : result_is_cpk ? fmt_conv_cpk_result[result_fmt][result_vec_op] : @@ -647,10 +620,6 @@ or on 16b inputs producing 32b outputs"); fmt_slice_result[result_fmt]; assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones - assign tag_o = lane_tags[0]; // don't care about upper ones - assign busy_o = (| lane_busy); - - assign out_valid_o = lane_out_valid[0]; // don't care about upper ones // Collapse the status always_comb begin : output_processing diff --git a/src/fpnew_sdotp_multi.sv b/src/fpnew_sdotp_multi.sv index a08419cc..c504edf1 100644 --- a/src/fpnew_sdotp_multi.sv +++ b/src/fpnew_sdotp_multi.sv @@ -49,8 +49,6 @@ module fpnew_sdotp_multi #( // Supported destination formats (FP16, FP16ALTt, FP32) parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, // Do not change localparam int unsigned SRC_WIDTH = fpnew_pkg::max_fp_width(SrcDotpFpFmtConfig), @@ -75,25 +73,14 @@ module fpnew_sdotp_multi #( input logic op_mod_i, input fpnew_pkg::fp_format_e src_fmt_i, // format of op_a, op_b, op_c, op_d input fpnew_pkg::fp_format_e dst_fmt_i, // format of the accumulator (op_e) and result - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [DST_WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -183,12 +170,7 @@ module fpnew_sdotp_multi #( logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operand_a_q[0] = operand_a_i; @@ -202,24 +184,14 @@ module fpnew_sdotp_multi #( assign inp_pipe_op_mod_q[0] = op_mod_i; assign inp_pipe_src_fmt_q[0] = src_fmt_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operand_a_q[i+1], inp_pipe_operand_a_q[i], reg_ena, '0) `FFL(inp_pipe_operand_b_q[i+1], inp_pipe_operand_b_q[i], reg_ena, '0) @@ -232,9 +204,7 @@ module fpnew_sdotp_multi #( `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::FP8) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::FP16) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operand_a_q = inp_pipe_operand_a_q[NUM_INP_REGS]; @@ -969,13 +939,8 @@ module fpnew_sdotp_multi #( logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; fp_dst_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; logic [0:NUM_MID_REGS] mid_pipe_sum_carry_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_eff_sub_q[0] = effective_subtraction_first; @@ -1001,26 +966,15 @@ module fpnew_sdotp_multi #( assign mid_pipe_res_is_spec_q[0] = result_is_special; assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; assign mid_pipe_sum_carry_q[0] = sum_carry; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_final_sign_zero_q[i+1], mid_pipe_final_sign_zero_q[i], reg_ena, '0) @@ -1045,9 +999,7 @@ module fpnew_sdotp_multi #( `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) `FFL(mid_pipe_sum_carry_q[i+1], mid_pipe_sum_carry_q[i], reg_ena, '0) end // Output stage: assign selected pipe outputs to signals for later use @@ -1314,8 +1266,7 @@ module fpnew_sdotp_multi #( ? final_sign_zero_q : final_sign_z; logic enable_rsr; - assign enable_rsr = (rnd_mode_q == fpnew_pkg::RSR) && (mid_pipe_ready[NUM_MID_REGS] - && mid_pipe_valid_q[NUM_MID_REGS]); + assign enable_rsr = (rnd_mode_q == fpnew_pkg::RSR) && reg_enable_i[NUM_MID_REGS]; // Perform the rounding fpnew_rounding #( .AbsWidth ( SUPER_DST_EXP_BITS + SUPER_DST_MAN_BITS ), @@ -1395,50 +1346,28 @@ module fpnew_sdotp_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][DST_WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_sdotp_multi_wrapper.sv b/src/fpnew_sdotp_multi_wrapper.sv index d402b67a..108629b0 100644 --- a/src/fpnew_sdotp_multi_wrapper.sv +++ b/src/fpnew_sdotp_multi_wrapper.sv @@ -22,8 +22,6 @@ module fpnew_sdotp_multi_wrapper #( parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, // Do not change localparam fpnew_pkg::fmt_logic_t FpSrcFmtConfig = FpFmtConfig[0] ? (FpFmtConfig & 6'b001111) : (FpFmtConfig & 6'b000101), @@ -44,25 +42,14 @@ module fpnew_sdotp_multi_wrapper #( input logic op_mod_i, input fpnew_pkg::fp_format_e src_fmt_i, input fpnew_pkg::fp_format_e dst_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [OPERAND_WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -147,8 +134,6 @@ module fpnew_sdotp_multi_wrapper #( .DstDotpFpFmtConfig ( FpDstFmtConfig ), // FP32, FP16, FP16ALT .NumPipeRegs ( NumPipeRegs ), .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( AuxType ), .StochasticRndImplementation ( StochasticRndImplementation ) ) i_fpnew_sdotp_multi ( .clk_i, @@ -165,21 +150,12 @@ module fpnew_sdotp_multi_wrapper #( .op_mod_i, .src_fmt_i, // format of the multiplicands .dst_fmt_i, // format of the addend and result - .tag_i, .mask_i, - .aux_i, - .in_valid_i, - .in_ready_o , - .flush_i, .result_o ( local_result[DST_WIDTH-1:0] ), .status_o, .extension_bit_o, - .tag_o, .mask_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o + .reg_enable_i ); if(OPERAND_WIDTH > DST_WIDTH) begin diff --git a/src_files.yml b/src_files.yml index 84348a98..90c34eb8 100644 --- a/src_files.yml +++ b/src_files.yml @@ -33,6 +33,8 @@ fpnew: vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_with_sqrt.v, vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v, vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v, + src/fpnew_aux.sv, + src/fpnew_aux_fsm.sv, src/fpnew_divsqrt_th_32.sv, src/fpnew_divsqrt_th_64_multi.sv, src/fpnew_divsqrt_multi.sv,