Skip to content

Commit

Permalink
Fixed synchronisation of divsqrt module.
Browse files Browse the repository at this point in the history
  • Loading branch information
Maurus Item committed Jun 12, 2024
1 parent 9063ca7 commit f813f4e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 113 deletions.
46 changes: 6 additions & 40 deletions src/fpnew_divsqrt_multi.sv
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@ module fpnew_divsqrt_multi #(
// Input Handshake
input logic in_valid_i,
output logic in_ready_o,
output logic divsqrt_done_o,
input logic simd_synch_done_i,
output logic divsqrt_ready_o,
input logic simd_synch_rdy_i,
input logic flush_i,
// Output signals
output logic [WIDTH-1:0] result_o,
Expand Down Expand Up @@ -170,11 +166,10 @@ module fpnew_divsqrt_multi #(

logic in_ready; // input handshake with upstream
logic div_valid, sqrt_valid; // input signalling with unit
logic unit_ready, unit_done, unit_done_q; // status signals from unit instance
logic unit_ready, unit_done; // status signals from unit instance
logic op_starting; // high in the cycle a new operation starts
logic out_valid, out_ready; // output handshake with downstream
logic unit_busy; // valid data in flight
logic simd_synch_done;
// FSM states
typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e;
fsm_state_e state_q, state_d;
Expand All @@ -198,21 +193,8 @@ module fpnew_divsqrt_multi #(
`FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0)
`FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0)

// Wait for other lanes only if the operation is vectorial
assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q;

// Valid synch with other lanes
// When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes
// As soon as all the lanes are over, we can clear this FF and start with a new operation
`FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni);
// Tell the other units that this unit has finished now or in the past
assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q;

// Ready synch with other lanes
// Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes
assign divsqrt_ready_o = in_ready;
// Upstream ready comes from sanitization FSM, and it is synched among all the lanes
assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready;
// Upstream ready comes from FSM
assign inp_pipe_ready[NUM_INP_REGS] = in_ready;

// FSM to safely apply and receive data from DIVSQRT unit
always_comb begin : flag_fsm
Expand All @@ -234,7 +216,7 @@ module fpnew_divsqrt_multi #(
BUSY: begin
unit_busy = 1'b1; // data in flight
// If all the lanes are done with processing
if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin
if (unit_done) begin
out_valid = 1'b1; // try to commit result downstream
// If downstream accepts our result
if (out_ready) begin
Expand Down Expand Up @@ -305,22 +287,6 @@ module fpnew_divsqrt_multi #(
// Adjust result width and fix FP8
assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result;

// Hold the result when one lane has finished execution, except when all the lanes finish together,
// or the operation is not vectorial, and the result can be accepted downstream
assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready);
// The Hold register (load, no reset)
`FFLNR(held_result_q, adjusted_result, hold_en, clk_i)
`FFLNR(held_status_q, unit_status, hold_en, clk_i)

// --------------
// Output Select
// --------------
logic [WIDTH-1:0] result_d;
fpnew_pkg::status_t status_d;
// Prioritize hold register data
assign result_d = unit_done_q ? held_result_q : adjusted_result;
assign status_d = unit_done_q ? held_status_q : unit_status;

// ----------------
// Output Pipeline
// ----------------
Expand All @@ -335,8 +301,8 @@ module fpnew_divsqrt_multi #(
logic [0:NUM_OUT_REGS] out_pipe_ready;

// Input stage: First element of pipeline is taken from inputs
assign out_pipe_result_q[0] = result_d;
assign out_pipe_status_q[0] = status_d;
assign out_pipe_result_q[0] = adjusted_result;
assign out_pipe_status_q[0] = unit_status;
assign out_pipe_tag_q[0] = result_tag_q;
assign out_pipe_mask_q[0] = result_mask_q;
assign out_pipe_aux_q[0] = result_aux_q;
Expand Down
52 changes: 6 additions & 46 deletions src/fpnew_divsqrt_th_64_multi.sv
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,9 @@ module fpnew_divsqrt_th_64_multi #(
input TagType tag_i,
input logic mask_i,
input AuxType aux_i,
input logic vectorial_op_i,
// Input Handshake
input logic in_valid_i,
output logic in_ready_o,
output logic divsqrt_done_o,
input logic simd_synch_done_i,
output logic divsqrt_ready_o,
input logic simd_synch_rdy_i,
input logic flush_i,
// Output signals
output logic [WIDTH-1:0] result_o,
Expand Down Expand Up @@ -95,7 +90,6 @@ module fpnew_divsqrt_th_64_multi #(
TagType [0:NUM_INP_REGS] inp_pipe_tag_q;
logic [0:NUM_INP_REGS] inp_pipe_mask_q;
AuxType [0:NUM_INP_REGS] inp_pipe_aux_q;
logic [0:NUM_INP_REGS] inp_pipe_vec_op_q;
logic [0:NUM_INP_REGS] inp_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NUM_INP_REGS] inp_pipe_ready;
Expand All @@ -108,7 +102,6 @@ module fpnew_divsqrt_th_64_multi #(
assign inp_pipe_tag_q[0] = tag_i;
assign inp_pipe_mask_q[0] = mask_i;
assign inp_pipe_aux_q[0] = aux_i;
assign inp_pipe_vec_op_q[0] = vectorial_op_i;
assign inp_pipe_valid_q[0] = in_valid_i;
// Input stage: Propagate pipeline ready signal to upstream circuitry
assign in_ready_o = inp_pipe_ready[0];
Expand All @@ -132,7 +125,6 @@ module fpnew_divsqrt_th_64_multi #(
`FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0))
`FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0)
`FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0))
`FFL(inp_pipe_vec_op_q[i+1], inp_pipe_vec_op_q[i], reg_ena, AuxType'('0))
end
// Output stage: assign selected pipe outputs to signals for later use
assign operands_q = inp_pipe_operands_q[NUM_INP_REGS];
Expand Down Expand Up @@ -181,11 +173,11 @@ module fpnew_divsqrt_th_64_multi #(

logic in_ready; // input handshake with upstream
logic div_valid, sqrt_valid; // input signalling with unit
logic unit_ready, unit_done, unit_done_q; // status signals from unit instance
logic unit_ready, unit_done; // status signals from unit instance
logic op_starting; // high in the cycle a new operation starts
logic out_valid, out_ready; // output handshake with downstream
logic unit_busy; // valid data in flight
logic simd_synch_done;

// FSM states
typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e;
fsm_state_e state_q, state_d;
Expand All @@ -200,29 +192,13 @@ module fpnew_divsqrt_th_64_multi #(
TagType result_tag_q;
logic result_mask_q;
AuxType result_aux_q;
logic result_vec_op_q;

// Fill the registers everytime a valid operation arrives (load FF, active low asynch rst)
`FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0)
`FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0)
`FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0)
`FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0)

// Wait for other lanes only if the operation is vectorial
assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q;

// Valid synch with other lanes
// When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes
// As soon as all the lanes are over, we can clear this FF and start with a new operation
`FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni);
// Tell the other units that this unit has finished now or in the past
assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q;

// Ready synch with other lanes
// Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes
assign divsqrt_ready_o = in_ready;
// Upstream ready comes from sanitization FSM, and it is synched among all the lanes
assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready;
assign inp_pipe_ready[NUM_INP_REGS] = in_ready;

// FSM to safely apply and receive data from DIVSQRT unit
always_comb begin : flag_fsm
Expand All @@ -244,7 +220,7 @@ module fpnew_divsqrt_th_64_multi #(
BUSY: begin
unit_busy = 1'b1; // data in flight
// If all the lanes are done with processing
if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin
if (unit_done) begin
out_valid = 1'b1; // try to commit result downstream
// If downstream accepts our result
if (out_ready) begin
Expand Down Expand Up @@ -410,22 +386,6 @@ module fpnew_divsqrt_th_64_multi #(

assign unit_ready = !vfdsu_dp_fdiv_busy;

// Hold the result when one lane has finished execution, except when all the lanes finish together,
// or the operation is not vectorial, and the result can be accepted downstream
assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready);
// The Hold register (load, no reset)
`FFLNR(held_result_q, unit_result, hold_en, clk_i)
`FFLNR(held_status_q, unit_status, hold_en, clk_i)

// --------------
// Output Select
// --------------
logic [WIDTH-1:0] result_d;
fpnew_pkg::status_t status_d;
// Prioritize hold register data
assign result_d[WIDTH-1:0] = unit_done_q ? held_result_q[WIDTH-1:0] : unit_result[WIDTH-1:0];
assign status_d = unit_done_q ? held_status_q : unit_status;

// ----------------
// Output Pipeline
// ----------------
Expand All @@ -440,8 +400,8 @@ module fpnew_divsqrt_th_64_multi #(
logic [0:NUM_OUT_REGS] out_pipe_ready;

// Input stage: First element of pipeline is taken from inputs
assign out_pipe_result_q[0] = result_d;
assign out_pipe_status_q[0] = status_d;
assign out_pipe_result_q[0] = unit_result;
assign out_pipe_status_q[0] = unit_status;
assign out_pipe_tag_q[0] = result_tag_q;
assign out_pipe_mask_q[0] = result_mask_q;
assign out_pipe_aux_q[0] = result_aux_q;
Expand Down
32 changes: 5 additions & 27 deletions src/fpnew_opgroup_multifmt_slice.sv
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ or on 16b inputs producing 32b outputs");
logic result_fmt_is_int, result_is_cpk;
logic [1:0] result_vec_op; // info for vectorial results (for packing)

logic simd_synch_rdy, simd_synch_done;
fpnew_pkg::roundmode_e rnd_mode;

// -----------
Expand Down Expand Up @@ -171,13 +170,13 @@ or on 16b inputs producing 32b outputs");
// ------------
if (OpGroup == fpnew_pkg::DIVSQRT) begin: gen_reduced_throughput_lanes
// Reduced throughput specific lane signals
logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid, divsqrt_done, divsqrt_ready; // Handshake signals for the lanes
logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes
TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used
logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used
logic [NUM_LANES-1:0] lane_busy; // dito

// Input side
assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane
assign in_ready_o = vectorial_op ? &lane_in_ready : lane_in_ready[0]; // Upstream ready is given all lanes if vectorial

// ---------------
// Generate Lanes
Expand Down Expand Up @@ -218,7 +217,7 @@ or on 16b inputs producing 32b outputs");
logic [LANE_WIDTH-1:0] op_result; // lane-local results
fpnew_pkg::status_t op_status;

assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
assign in_valid = in_valid_i & ((lane == 0) | vectorial_op) & in_ready_o; // upper lanes only for vectors

// Slice out the operands for this lane, upper bits are ignored in the unit
always_comb begin : prepare_input
Expand Down Expand Up @@ -277,13 +276,8 @@ or on 16b inputs producing 32b outputs");
.tag_i,
.mask_i ( simd_mask_i[lane] ),
.aux_i ( in_aux ),
.vectorial_op_i ( vectorial_op ), // synchronize only vectorial operations
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.divsqrt_done_o ( divsqrt_done[lane] ),
.simd_synch_done_i( simd_synch_done ),
.divsqrt_ready_o ( divsqrt_ready[lane] ),
.simd_synch_rdy_i ( simd_synch_rdy ),
.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
Expand Down Expand Up @@ -313,13 +307,9 @@ or on 16b inputs producing 32b outputs");
.tag_i,
.mask_i ( simd_mask_i[lane] ),
.aux_i ( in_aux ),
.vectorial_op_i ( vectorial_op ), // synchronize only vectorial operations
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.divsqrt_done_o ( divsqrt_done[lane] ),
.simd_synch_done_i( simd_synch_done ),
.divsqrt_ready_o ( divsqrt_ready[lane] ),
.simd_synch_rdy_i ( simd_synch_rdy ),

.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
Expand Down Expand Up @@ -349,8 +339,6 @@ or on 16b inputs producing 32b outputs");
assign lane_in_ready[lane] = 1'b0; // unused lane
assign lane_aux[lane] = 1'b0; // unused lane
assign lane_tags[lane] = 1'b0; // unused lane
assign divsqrt_done[lane] = 1'b0; // unused lane
assign divsqrt_ready[lane] = 1'b0; // unused lane
assign lane_busy[lane] = 1'b0;

// Signals in any kind of laned instance
Expand Down Expand Up @@ -390,21 +378,11 @@ or on 16b inputs producing 32b outputs");
assign ifmt_slice_result[ifmt] = '0;
end

if ((DivSqrtSel != fpnew_pkg::TH32) && (OpGroup == fpnew_pkg::DIVSQRT)) begin
// Synch lanes if there is more than one
assign simd_synch_rdy = EnableVectors ? &divsqrt_ready[NUM_DIVSQRT_LANES-1:0] : divsqrt_ready[0];
assign simd_synch_done = EnableVectors ? &divsqrt_done[NUM_DIVSQRT_LANES-1:0] : divsqrt_done[0];
end else begin
// Unused (TH32 divider only supported for scalar FP32 divsqrt)
assign simd_synch_rdy = '0;
assign simd_synch_done = '0;
end

// Group signals from all lanes
assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones
assign tag_o = lane_tags[0]; // don't care about upper ones
assign busy_o = lane_busy[0];
assign out_valid_o = lane_out_valid[0]; // don't care about upper ones
assign out_valid_o = result_is_vector ? &lane_out_valid : lane_out_valid[0]; // Only care about upper ones if vectorial
assign out_aux = lane_aux[0]; // don't care about upper ones

// Lane is always non_conv
Expand Down

0 comments on commit f813f4e

Please sign in to comment.