diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index a1459da..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.github/verilog_ci.yml b/.github/workflows/verilog_ci.yml similarity index 80% rename from .github/verilog_ci.yml rename to .github/workflows/verilog_ci.yml index 5f66dd5..1bdf3a0 100644 --- a/.github/verilog_ci.yml +++ b/.github/workflows/verilog_ci.yml @@ -12,9 +12,9 @@ jobs: - name: Setup Icarus Verilog run: sudo apt-get update && sudo apt-get install -y iverilog + - name: Install virtualenv + run: sudo apt-get install -y python3-virtualenv + - name: Test Verilog Code run: | make test - - - name: Run Tests - run: vvp output_name.vvp diff --git a/.gitignore b/.gitignore index 7f2684e..10ef8af 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ *.vcd *.vvp *.DS_Store +**/results.xml +**/sim_build/ +**/build/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/makefile b/Makefile similarity index 100% rename from makefile rename to Makefile diff --git a/build/block.vpp b/build/block.vpp deleted file mode 100755 index 2988285..0000000 --- a/build/block.vpp +++ /dev/null @@ -1,460 +0,0 @@ -#! /usr/local/Cellar/icarus-verilog/12.0/bin/vvp -:ivl_version "12.0 (stable)"; -:ivl_delay_selection "TYPICAL"; -:vpi_time_precision - 12; -:vpi_module "/usr/local/Cellar/icarus-verilog/12.0/lib/ivl/system.vpi"; -:vpi_module "/usr/local/Cellar/icarus-verilog/12.0/lib/ivl/vhdl_sys.vpi"; -:vpi_module "/usr/local/Cellar/icarus-verilog/12.0/lib/ivl/vhdl_textio.vpi"; -:vpi_module "/usr/local/Cellar/icarus-verilog/12.0/lib/ivl/v2005_math.vpi"; -:vpi_module "/usr/local/Cellar/icarus-verilog/12.0/lib/ivl/va_math.vpi"; -S_0x7fdb61b04160 .scope module, "block_tb" "block_tb" 2 4; - .timescale -9 -12; -v0x7fdb61914950_0 .var "clk", 0 0; -v0x7fdb61914a10_0 .var "compute", 0 0; -v0x7fdb61914ac0_0 .var "inp_north", 31 0; -v0x7fdb61914bb0_0 .var "inp_west", 31 0; -v0x7fdb61914c80_0 .net "outp_east", 31 0, v0x7fdb61914450_0; 1 drivers -v0x7fdb61914d50_0 .net "outp_south", 31 0, v0x7fdb61914500_0; 1 drivers -v0x7fdb61914de0_0 .var "rst", 0 0; -v0x7fdb61914e70_0 .var "weight_en", 0 0; -v0x7fdb61914f20_0 .var "weight_in", 31 0; -S_0x7fdb61b042e0 .scope module, "uut" "block" 2 11, 3 1 0, S_0x7fdb61b04160; - .timescale -9 -12; - .port_info 0 /INPUT 32 "inp_north"; - .port_info 1 /INPUT 32 "inp_west"; - .port_info 2 /INPUT 32 "weight_in"; - .port_info 3 /OUTPUT 32 "outp_south"; - .port_info 4 /OUTPUT 32 "outp_east"; - .port_info 5 /INPUT 1 "clk"; - .port_info 6 /INPUT 1 "rst"; - .port_info 7 /INPUT 1 "compute"; - .port_info 8 /INPUT 1 "weight_en"; -v0x7fdb61914000_0 .net "add_result", 31 0, L_0x7fdb61918740; 1 drivers -v0x7fdb619140b0_0 .net "clk", 0 0, v0x7fdb61914950_0; 1 drivers -v0x7fdb61914140_0 .net "compute", 0 0, v0x7fdb61914a10_0; 1 drivers -v0x7fdb619141f0_0 .net "inp_north", 31 0, v0x7fdb61914ac0_0; 1 drivers -v0x7fdb619142b0_0 .net "inp_west", 31 0, v0x7fdb61914bb0_0; 1 drivers -v0x7fdb61914380_0 .net "mul_result", 31 0, L_0x7fdb61915610; 1 drivers -v0x7fdb61914450_0 .var "outp_east", 31 0; -v0x7fdb61914500_0 .var "outp_south", 31 0; -v0x7fdb619145b0_0 .net "rst", 0 0, v0x7fdb61914de0_0; 1 drivers -v0x7fdb619146c0_0 .var "weight", 31 0; -v0x7fdb61914770_0 .net "weight_en", 0 0, v0x7fdb61914e70_0; 1 drivers -v0x7fdb61914800_0 .net "weight_in", 31 0, v0x7fdb61914f20_0; 1 drivers -E_0x7fdb61b045e0 .event posedge, v0x7fdb619140b0_0, v0x7fdb619145b0_0; -S_0x7fdb61b04640 .scope module, "add_instance" "fadd" 3 19, 4 4 0, S_0x7fdb61b042e0; - .timescale -9 -12; - .port_info 0 /INPUT 32 "a_operand"; - .port_info 1 /INPUT 32 "b_operand"; - .port_info 2 /OUTPUT 32 "result"; -L_0x7fdb61916690 .functor OR 1, L_0x7fdb61916430, L_0x7fdb619165f0, C4<0>, C4<0>; -L_0x7fdb61916b10 .functor XOR 1, L_0x7fdb61916930, L_0x7fdb619169d0, C4<0>, C4<0>; -L_0x7fdb61916b80 .functor NOT 1, L_0x7fdb61916b10, C4<0>, C4<0>, C4<0>; -v0x7fdb61b04860_0 .net "Exception", 0 0, L_0x7fdb61916690; 1 drivers -v0x7fdb619108b0_0 .net *"_ivl_100", 7 0, L_0x7fdb61918360; 1 drivers -v0x7fdb61910970_0 .net *"_ivl_103", 7 0, L_0x7fdb61918120; 1 drivers -v0x7fdb61910a10_0 .net *"_ivl_104", 7 0, L_0x7fdb619181c0; 1 drivers -L_0x7fdb61863248 .functor BUFT 1, C4<00000000000000000000000000000000>, C4<0>, C4<0>, C4<0>; -v0x7fdb61910ac0_0 .net/2u *"_ivl_106", 31 0, L_0x7fdb61863248; 1 drivers -v0x7fdb61910bb0_0 .net *"_ivl_108", 31 0, L_0x7fdb61918660; 1 drivers -v0x7fdb61910c60_0 .net *"_ivl_11", 63 0, L_0x7fdb61915d50; 1 drivers -v0x7fdb61910d10_0 .net *"_ivl_13", 63 0, L_0x7fdb61915e10; 1 drivers -v0x7fdb61910dc0_0 .net *"_ivl_16", 7 0, L_0x7fdb61915f70; 1 drivers -v0x7fdb61910ed0_0 .net *"_ivl_20", 7 0, L_0x7fdb61916140; 1 drivers -v0x7fdb61910f80_0 .net *"_ivl_24", 7 0, L_0x7fdb61916320; 1 drivers -v0x7fdb61911030_0 .net *"_ivl_26", 0 0, L_0x7fdb61916430; 1 drivers -v0x7fdb619110d0_0 .net *"_ivl_28", 7 0, L_0x7fdb619164d0; 1 drivers -v0x7fdb61911180_0 .net *"_ivl_30", 0 0, L_0x7fdb619165f0; 1 drivers -v0x7fdb61911220_0 .net *"_ivl_36", 0 0, L_0x7fdb61916930; 1 drivers -v0x7fdb619112d0_0 .net *"_ivl_38", 0 0, L_0x7fdb619169d0; 1 drivers -v0x7fdb61911380_0 .net *"_ivl_39", 0 0, L_0x7fdb61916b10; 1 drivers -v0x7fdb61911510_0 .net *"_ivl_4", 30 0, L_0x7fdb61915920; 1 drivers -L_0x7fdb61863098 .functor BUFT 1, C4<1>, C4<0>, C4<0>, C4<0>; -v0x7fdb619115a0_0 .net/2u *"_ivl_43", 0 0, L_0x7fdb61863098; 1 drivers -v0x7fdb61911650_0 .net *"_ivl_46", 22 0, L_0x7fdb61916bf0; 1 drivers -L_0x7fdb618630e0 .functor BUFT 1, C4<1>, C4<0>, C4<0>, C4<0>; -v0x7fdb61911700_0 .net/2u *"_ivl_49", 0 0, L_0x7fdb618630e0; 1 drivers -v0x7fdb619117b0_0 .net *"_ivl_52", 22 0, L_0x7fdb61916a70; 1 drivers -v0x7fdb61911860_0 .net *"_ivl_56", 7 0, L_0x7fdb619170e0; 1 drivers -v0x7fdb61911910_0 .net *"_ivl_58", 7 0, L_0x7fdb61916e10; 1 drivers -v0x7fdb619119c0_0 .net *"_ivl_6", 30 0, L_0x7fdb619159c0; 1 drivers -v0x7fdb61911a70_0 .net *"_ivl_64", 7 0, L_0x7fdb61917510; 1 drivers -v0x7fdb61911b20_0 .net *"_ivl_67", 24 0, L_0x7fdb61917700; 1 drivers -v0x7fdb61911bd0_0 .net *"_ivl_7", 0 0, L_0x7fdb61915b00; 1 drivers -L_0x7fdb61863128 .functor BUFT 1, C4<0>, C4<0>, C4<0>, C4<0>; -v0x7fdb61911c70_0 .net *"_ivl_70", 0 0, L_0x7fdb61863128; 1 drivers -v0x7fdb61911d20_0 .net *"_ivl_71", 24 0, L_0x7fdb619177a0; 1 drivers -L_0x7fdb61863170 .functor BUFT 1, C4<0>, C4<0>, C4<0>, C4<0>; -v0x7fdb61911dd0_0 .net *"_ivl_74", 0 0, L_0x7fdb61863170; 1 drivers -v0x7fdb61911e80_0 .net *"_ivl_75", 24 0, L_0x7fdb61917610; 1 drivers -L_0x7fdb618631b8 .functor BUFT 1, C4<0000000000000000000000000>, C4<0>, C4<0>, C4<0>; -v0x7fdb61911f30_0 .net/2u *"_ivl_77", 24 0, L_0x7fdb618631b8; 1 drivers -v0x7fdb61911430_0 .net *"_ivl_84", 0 0, L_0x7fdb61917bd0; 1 drivers -v0x7fdb619121c0_0 .net *"_ivl_86", 22 0, L_0x7fdb61917c70; 1 drivers -v0x7fdb61912250_0 .net *"_ivl_88", 22 0, L_0x7fdb61917ac0; 1 drivers -v0x7fdb619122f0_0 .net *"_ivl_89", 22 0, L_0x7fdb61917eb0; 1 drivers -v0x7fdb619123a0_0 .net *"_ivl_9", 63 0, L_0x7fdb61915c30; 1 drivers -v0x7fdb61912450_0 .net *"_ivl_95", 0 0, L_0x7fdb61918080; 1 drivers -L_0x7fdb61863200 .functor BUFT 1, C4<00000001>, C4<0>, C4<0>, C4<0>; -v0x7fdb61912500_0 .net/2u *"_ivl_96", 7 0, L_0x7fdb61863200; 1 drivers -v0x7fdb619125b0_0 .net *"_ivl_99", 7 0, L_0x7fdb61917f50; 1 drivers -v0x7fdb61912660_0 .net "a_operand", 31 0, v0x7fdb61914ac0_0; alias, 1 drivers -v0x7fdb61912710_0 .net "add_sum", 30 0, L_0x7fdb61917d50; 1 drivers -v0x7fdb619127c0_0 .net "b_operand", 31 0, L_0x7fdb61915610; alias, 1 drivers -v0x7fdb61912870_0 .net "exp_a", 0 0, L_0x7fdb619160a0; 1 drivers -v0x7fdb61912910_0 .net "exp_b", 0 0, L_0x7fdb61916280; 1 drivers -v0x7fdb619129b0_0 .net "exponent_b_add", 7 0, L_0x7fdb61917180; 1 drivers -v0x7fdb61912a60_0 .net "exponent_diff", 7 0, L_0x7fdb61917250; 1 drivers -v0x7fdb61912b10_0 .net "operand_a", 31 0, L_0x7fdb619157a0; 1 drivers -v0x7fdb61912bc0_0 .net "operand_b", 31 0, L_0x7fdb61915840; 1 drivers -v0x7fdb61912c70_0 .net "operation_sub_addBar", 0 0, L_0x7fdb61916b80; 1 drivers -v0x7fdb61912d10_0 .net "output_sign", 0 0, L_0x7fdb61916780; 1 drivers -v0x7fdb61912db0_0 .net "result", 31 0, L_0x7fdb61918740; alias, 1 drivers -v0x7fdb61912e60_0 .net "significand_a", 23 0, L_0x7fdb61916cb0; 1 drivers -v0x7fdb61912f10_0 .net "significand_add", 24 0, L_0x7fdb61917a20; 1 drivers -v0x7fdb61912fc0_0 .net "significand_b", 23 0, L_0x7fdb61916ec0; 1 drivers -v0x7fdb61913070_0 .net "significand_b_add", 23 0, L_0x7fdb61917350; 1 drivers -L_0x7fdb619157a0 .part L_0x7fdb61915e10, 32, 32; -L_0x7fdb61915840 .part L_0x7fdb61915e10, 0, 32; -L_0x7fdb61915920 .part v0x7fdb61914ac0_0, 0, 31; -L_0x7fdb619159c0 .part L_0x7fdb61915610, 0, 31; -L_0x7fdb61915b00 .cmp/gt 31, L_0x7fdb619159c0, L_0x7fdb61915920; -L_0x7fdb61915c30 .concat [ 32 32 0 0], v0x7fdb61914ac0_0, L_0x7fdb61915610; -L_0x7fdb61915d50 .concat [ 32 32 0 0], L_0x7fdb61915610, v0x7fdb61914ac0_0; -L_0x7fdb61915e10 .functor MUXZ 64, L_0x7fdb61915d50, L_0x7fdb61915c30, L_0x7fdb61915b00, C4<>; -L_0x7fdb61915f70 .part L_0x7fdb619157a0, 23, 8; -L_0x7fdb619160a0 .part L_0x7fdb61915f70, 0, 1; -L_0x7fdb61916140 .part L_0x7fdb61915840, 23, 8; -L_0x7fdb61916280 .part L_0x7fdb61916140, 0, 1; -L_0x7fdb61916320 .part L_0x7fdb619157a0, 23, 8; -L_0x7fdb61916430 .reduce/and L_0x7fdb61916320; -L_0x7fdb619164d0 .part L_0x7fdb61915840, 23, 8; -L_0x7fdb619165f0 .reduce/and L_0x7fdb619164d0; -L_0x7fdb61916780 .part L_0x7fdb619157a0, 31, 1; -L_0x7fdb61916930 .part L_0x7fdb619157a0, 31, 1; -L_0x7fdb619169d0 .part L_0x7fdb61915840, 31, 1; -L_0x7fdb61916bf0 .part L_0x7fdb619157a0, 0, 23; -L_0x7fdb61916cb0 .concat [ 23 1 0 0], L_0x7fdb61916bf0, L_0x7fdb61863098; -L_0x7fdb61916a70 .part L_0x7fdb61915840, 0, 23; -L_0x7fdb61916ec0 .concat [ 23 1 0 0], L_0x7fdb61916a70, L_0x7fdb618630e0; -L_0x7fdb619170e0 .part L_0x7fdb619157a0, 23, 8; -L_0x7fdb61916e10 .part L_0x7fdb61915840, 23, 8; -L_0x7fdb61917250 .arith/sub 8, L_0x7fdb619170e0, L_0x7fdb61916e10; -L_0x7fdb61917350 .shift/r 24, L_0x7fdb61916ec0, L_0x7fdb61917250; -L_0x7fdb61917510 .part L_0x7fdb61915840, 23, 8; -L_0x7fdb61917180 .arith/sum 8, L_0x7fdb61917510, L_0x7fdb61917250; -L_0x7fdb61917700 .concat [ 24 1 0 0], L_0x7fdb61916cb0, L_0x7fdb61863128; -L_0x7fdb619177a0 .concat [ 24 1 0 0], L_0x7fdb61917350, L_0x7fdb61863170; -L_0x7fdb61917610 .arith/sum 25, L_0x7fdb61917700, L_0x7fdb619177a0; -L_0x7fdb61917a20 .functor MUXZ 25, L_0x7fdb618631b8, L_0x7fdb61917610, L_0x7fdb61916b80, C4<>; -L_0x7fdb61917bd0 .part L_0x7fdb61917a20, 24, 1; -L_0x7fdb61917c70 .part L_0x7fdb61917a20, 1, 23; -L_0x7fdb61917ac0 .part L_0x7fdb61917a20, 0, 23; -L_0x7fdb61917eb0 .functor MUXZ 23, L_0x7fdb61917ac0, L_0x7fdb61917c70, L_0x7fdb61917bd0, C4<>; -L_0x7fdb61917d50 .concat8 [ 23 8 0 0], L_0x7fdb61917eb0, L_0x7fdb619181c0; -L_0x7fdb61918080 .part L_0x7fdb61917a20, 24, 1; -L_0x7fdb61917f50 .part L_0x7fdb619157a0, 23, 8; -L_0x7fdb61918360 .arith/sum 8, L_0x7fdb61863200, L_0x7fdb61917f50; -L_0x7fdb61918120 .part L_0x7fdb619157a0, 23, 8; -L_0x7fdb619181c0 .functor MUXZ 8, L_0x7fdb61918120, L_0x7fdb61918360, L_0x7fdb61918080, C4<>; -L_0x7fdb61918660 .concat [ 31 1 0 0], L_0x7fdb61917d50, L_0x7fdb61916780; -L_0x7fdb61918740 .functor MUXZ 32, L_0x7fdb61918660, L_0x7fdb61863248, L_0x7fdb61916690, C4<>; -S_0x7fdb61913170 .scope module, "mul_instance" "fmul" 3 13, 5 4 0, S_0x7fdb61b042e0; - .timescale -9 -12; - .port_info 0 /INPUT 32 "a_in"; - .port_info 1 /INPUT 32 "b_in"; - .port_info 2 /OUTPUT 32 "result"; -L_0x7fdb619152b0 .functor XOR 1, L_0x7fdb619150f0, L_0x7fdb619151b0, C4<0>, C4<0>; -L_0x7fdb61915560 .functor AND 23, L_0x7fdb61915400, v0x7fdb619134e0_0, C4<11111111111111111111111>, C4<11111111111111111111111>; -v0x7fdb619134e0_0 .var "M_result", 22 0; -L_0x7fdb61863050 .functor BUFT 1, C4<11111111111111111111111>, C4<0>, C4<0>, C4<0>; -v0x7fdb61913570_0 .net/2u *"_ivl_10", 22 0, L_0x7fdb61863050; 1 drivers -v0x7fdb61913600_0 .net *"_ivl_14", 22 0, L_0x7fdb61915560; 1 drivers -v0x7fdb61913690_0 .net *"_ivl_3", 0 0, L_0x7fdb619150f0; 1 drivers -v0x7fdb61913740_0 .net *"_ivl_5", 0 0, L_0x7fdb619151b0; 1 drivers -L_0x7fdb61863008 .functor BUFT 1, C4<00000000000000000000000>, C4<0>, C4<0>, C4<0>; -v0x7fdb61913830_0 .net/2u *"_ivl_8", 22 0, L_0x7fdb61863008; 1 drivers -v0x7fdb619138e0_0 .net "a_in", 31 0, v0x7fdb61914bb0_0; alias, 1 drivers -v0x7fdb61913990_0 .net "b_in", 31 0, v0x7fdb619146c0_0; 1 drivers -v0x7fdb61913a40_0 .net "e_result", 7 0, L_0x7fdb61915050; 1 drivers -v0x7fdb61913b50_0 .var "e_result0", 8 0; -v0x7fdb61913c00_0 .var "mul_fix_out", 47 0; -v0x7fdb61913cb0_0 .var "overflow", 0 0; -v0x7fdb61913d50_0 .net "overflow_mask", 22 0, L_0x7fdb61915400; 1 drivers -v0x7fdb61913e00_0 .net "result", 31 0, L_0x7fdb61915610; alias, 1 drivers -v0x7fdb61913ec0_0 .net "sign", 0 0, L_0x7fdb619152b0; 1 drivers -v0x7fdb61913f50_0 .var "zero_check", 0 0; -E_0x7fdb619133b0/0 .event anyedge, v0x7fdb61913f50_0, v0x7fdb619138e0_0, v0x7fdb61913990_0, v0x7fdb61913c00_0; -E_0x7fdb619133b0/1 .event anyedge, v0x7fdb61913cb0_0; -E_0x7fdb619133b0 .event/or E_0x7fdb619133b0/0, E_0x7fdb619133b0/1; -E_0x7fdb61913430 .event anyedge, v0x7fdb61913c00_0; -E_0x7fdb61913480 .event anyedge, v0x7fdb619138e0_0, v0x7fdb61913990_0; -L_0x7fdb61915050 .part v0x7fdb61913b50_0, 0, 8; -L_0x7fdb619150f0 .part v0x7fdb61914bb0_0, 31, 1; -L_0x7fdb619151b0 .part v0x7fdb619146c0_0, 31, 1; -L_0x7fdb61915400 .functor MUXZ 23, L_0x7fdb61863050, L_0x7fdb61863008, v0x7fdb61913cb0_0, C4<>; -L_0x7fdb61915610 .concat [ 23 8 1 0], L_0x7fdb61915560, L_0x7fdb61915050, L_0x7fdb619152b0; - .scope S_0x7fdb61913170; -T_0 ; - %wait E_0x7fdb61913480; - %pushi/vec4 1, 0, 1; - %load/vec4 v0x7fdb619138e0_0; - %parti/s 23, 0, 2; - %concat/vec4; draw_concat_vec4 - %pad/u 48; - %pushi/vec4 1, 0, 1; - %load/vec4 v0x7fdb61913990_0; - %parti/s 23, 0, 2; - %concat/vec4; draw_concat_vec4 - %pad/u 48; - %mul; - %store/vec4 v0x7fdb61913c00_0, 0, 48; - %jmp T_0; - .thread T_0, $push; - .scope S_0x7fdb61913170; -T_1 ; - %wait E_0x7fdb61913480; - %load/vec4 v0x7fdb619138e0_0; - %parti/s 8, 23, 6; - %pad/u 32; - %cmpi/e 0, 0, 32; - %jmp/1 T_1.2, 4; - %flag_mov 8, 4; - %load/vec4 v0x7fdb61913990_0; - %parti/s 8, 23, 6; - %pad/u 32; - %cmpi/e 0, 0, 32; - %flag_or 4, 8; -T_1.2; - %jmp/0xz T_1.0, 4; - %pushi/vec4 1, 0, 1; - %store/vec4 v0x7fdb61913f50_0, 0, 1; - %jmp T_1.1; -T_1.0 ; - %pushi/vec4 0, 0, 1; - %store/vec4 v0x7fdb61913f50_0, 0, 1; -T_1.1 ; - %jmp T_1; - .thread T_1, $push; - .scope S_0x7fdb61913170; -T_2 ; - %wait E_0x7fdb61913430; - %load/vec4 v0x7fdb61913c00_0; - %parti/s 2, 46, 7; - %dup/vec4; - %pushi/vec4 1, 0, 2; - %cmp/u; - %jmp/1 T_2.0, 6; - %dup/vec4; - %pushi/vec4 2, 0, 2; - %cmp/u; - %jmp/1 T_2.1, 6; - %dup/vec4; - %pushi/vec4 3, 0, 2; - %cmp/u; - %jmp/1 T_2.2, 6; - %load/vec4 v0x7fdb61913c00_0; - %parti/s 23, 24, 6; - %store/vec4 v0x7fdb619134e0_0, 0, 23; - %jmp T_2.4; -T_2.0 ; - %load/vec4 v0x7fdb61913c00_0; - %parti/s 23, 23, 6; - %store/vec4 v0x7fdb619134e0_0, 0, 23; - %jmp T_2.4; -T_2.1 ; - %load/vec4 v0x7fdb61913c00_0; - %parti/s 23, 24, 6; - %store/vec4 v0x7fdb619134e0_0, 0, 23; - %jmp T_2.4; -T_2.2 ; - %load/vec4 v0x7fdb61913c00_0; - %parti/s 23, 24, 6; - %store/vec4 v0x7fdb619134e0_0, 0, 23; - %jmp T_2.4; -T_2.4 ; - %pop/vec4 1; - %jmp T_2; - .thread T_2, $push; - .scope S_0x7fdb61913170; -T_3 ; - %wait E_0x7fdb619133b0; - %load/vec4 v0x7fdb61913f50_0; - %flag_set/vec4 8; - %jmp/1 T_3.1, 8; - %pushi/vec4 0, 0, 1; - %load/vec4 v0x7fdb619138e0_0; - %parti/s 8, 23, 6; - %concat/vec4; draw_concat_vec4 - %pushi/vec4 0, 0, 1; - %load/vec4 v0x7fdb61913990_0; - %parti/s 8, 23, 6; - %concat/vec4; draw_concat_vec4 - %add; - %pushi/vec4 0, 0, 8; - %load/vec4 v0x7fdb61913c00_0; - %parti/s 1, 47, 7; - %concat/vec4; draw_concat_vec4 - %add; - %cmpi/u 127, 0, 9; - %flag_or 8, 5; -T_3.1; - %flag_get/vec4 8; - %jmp/1 T_3.0, 8; - %pushi/vec4 381, 0, 32; - %pushi/vec4 0, 0, 1; - %load/vec4 v0x7fdb619138e0_0; - %parti/s 8, 23, 6; - %concat/vec4; draw_concat_vec4 - %pad/u 32; - %pushi/vec4 0, 0, 1; - %load/vec4 v0x7fdb61913990_0; - %parti/s 8, 23, 6; - %concat/vec4; draw_concat_vec4 - %pad/u 32; - %add; - %pushi/vec4 0, 0, 8; - %load/vec4 v0x7fdb61913c00_0; - %parti/s 1, 47, 7; - %concat/vec4; draw_concat_vec4 - %pad/u 32; - %add; - %cmp/u; - %flag_get/vec4 5; - %or; -T_3.0; - %store/vec4 v0x7fdb61913cb0_0, 0, 1; - %load/vec4 v0x7fdb61913f50_0; - %inv; - %flag_set/vec4 8; - %jmp/0xz T_3.2, 8; - %load/vec4 v0x7fdb61913cb0_0; - %flag_set/vec4 8; - %jmp/0xz T_3.4, 8; - %pushi/vec4 511, 0, 9; - %store/vec4 v0x7fdb61913b50_0, 0, 9; - %jmp T_3.5; -T_3.4 ; - %pushi/vec4 0, 0, 1; - %load/vec4 v0x7fdb619138e0_0; - %parti/s 8, 23, 6; - %concat/vec4; draw_concat_vec4 - %pushi/vec4 0, 0, 1; - %load/vec4 v0x7fdb61913990_0; - %parti/s 8, 23, 6; - %concat/vec4; draw_concat_vec4 - %add; - %pushi/vec4 0, 0, 8; - %load/vec4 v0x7fdb61913c00_0; - %parti/s 1, 47, 7; - %concat/vec4; draw_concat_vec4 - %add; - %subi 127, 0, 9; - %store/vec4 v0x7fdb61913b50_0, 0, 9; -T_3.5 ; - %jmp T_3.3; -T_3.2 ; - %pushi/vec4 0, 0, 9; - %store/vec4 v0x7fdb61913b50_0, 0, 9; -T_3.3 ; - %jmp T_3; - .thread T_3, $push; - .scope S_0x7fdb61b042e0; -T_4 ; - %wait E_0x7fdb61b045e0; - %load/vec4 v0x7fdb619145b0_0; - %flag_set/vec4 8; - %jmp/0xz T_4.0, 8; - %pushi/vec4 0, 0, 32; - %assign/vec4 v0x7fdb61914450_0, 0; - %pushi/vec4 0, 0, 32; - %assign/vec4 v0x7fdb61914500_0, 0; - %pushi/vec4 0, 0, 32; - %assign/vec4 v0x7fdb619146c0_0, 0; - %jmp T_4.1; -T_4.0 ; - %load/vec4 v0x7fdb61914770_0; - %flag_set/vec4 8; - %jmp/0xz T_4.2, 8; - %load/vec4 v0x7fdb61914800_0; - %assign/vec4 v0x7fdb619146c0_0, 0; -T_4.2 ; - %load/vec4 v0x7fdb61914140_0; - %flag_set/vec4 8; - %jmp/0xz T_4.4, 8; - %load/vec4 v0x7fdb619142b0_0; - %assign/vec4 v0x7fdb61914450_0, 0; - %load/vec4 v0x7fdb61914000_0; - %assign/vec4 v0x7fdb61914500_0, 0; -T_4.4 ; -T_4.1 ; - %jmp T_4; - .thread T_4; - .scope S_0x7fdb61b04160; -T_5 ; - %pushi/vec4 0, 0, 1; - %store/vec4 v0x7fdb61914950_0, 0, 1; -T_5.0 ; - %delay 5000, 0; - %load/vec4 v0x7fdb61914950_0; - %inv; - %store/vec4 v0x7fdb61914950_0, 0, 1; - %jmp T_5.0; - %end; - .thread T_5; - .scope S_0x7fdb61b04160; -T_6 ; - %pushi/vec4 1, 0, 1; - %store/vec4 v0x7fdb61914de0_0, 0, 1; - %pushi/vec4 0, 0, 1; - %store/vec4 v0x7fdb61914e70_0, 0, 1; - %pushi/vec4 0, 0, 1; - %store/vec4 v0x7fdb61914a10_0, 0, 1; - %pushi/vec4 0, 0, 32; - %store/vec4 v0x7fdb61914ac0_0, 0, 32; - %pushi/vec4 0, 0, 32; - %store/vec4 v0x7fdb61914bb0_0, 0, 32; - %pushi/vec4 0, 0, 32; - %store/vec4 v0x7fdb61914f20_0, 0, 32; - %delay 20000, 0; - %pushi/vec4 0, 0, 1; - %store/vec4 v0x7fdb61914de0_0, 0, 1; - %pushi/vec4 1, 0, 1; - %store/vec4 v0x7fdb61914e70_0, 0, 1; - %pushi/vec4 13, 0, 32; - %store/vec4 v0x7fdb61914f20_0, 0, 32; - %pushi/vec4 2, 0, 32; - %store/vec4 v0x7fdb61914bb0_0, 0, 32; - %delay 10000, 0; - %pushi/vec4 0, 0, 1; - %store/vec4 v0x7fdb61914e70_0, 0, 1; - %pushi/vec4 1, 0, 1; - %store/vec4 v0x7fdb61914a10_0, 0, 1; - %delay 10000, 0; - %pushi/vec4 3, 0, 32; - %store/vec4 v0x7fdb61914bb0_0, 0, 32; - %pushi/vec4 5, 0, 32; - %store/vec4 v0x7fdb61914ac0_0, 0, 32; - %delay 100000, 0; - %vpi_call 2 63 "$finish" {0 0 0}; - %end; - .thread T_6; - .scope S_0x7fdb61b04160; -T_7 ; - %vpi_call 2 68 "$monitor", "Time = %t, rst = %b, compute = %b, weight_en = %b, inp_north = %h, inp_west = %h, weight_in = %h, outp_south = %h, outp_east = %h", $time, v0x7fdb61914de0_0, v0x7fdb61914a10_0, v0x7fdb61914e70_0, v0x7fdb61914ac0_0, v0x7fdb61914bb0_0, v0x7fdb61914f20_0, v0x7fdb61914d50_0, v0x7fdb61914c80_0 {0 0 0}; - %end; - .thread T_7; - .scope S_0x7fdb61b04160; -T_8 ; - %vpi_call 2 73 "$dumpfile", "block_wave.vcd" {0 0 0}; - %vpi_call 2 74 "$dumpvars", 32'sb00000000000000000000000000000000, S_0x7fdb61b04160 {0 0 0}; - %end; - .thread T_8; -# The file index is used to find the file name in the following table. -:file_names 6; - "N/A"; - ""; - "tb/block_tb.v"; - "src/block.v"; - "src/fadd.v"; - "src/fmul.v"; diff --git a/src/block.v b/src/block.v index 8fa353f..72fde27 100644 --- a/src/block.v +++ b/src/block.v @@ -17,8 +17,8 @@ module block(inp_north, inp_west, weight_in, outp_south, outp_east, clk, rst, c ); wire [31:0] add_result; fadd add_instance ( - .a_operand(inp_north), - .b_operand(mul_result), + .a_in(inp_north), + .b_in(mul_result), .result(add_result) ); diff --git a/src/fadd.v b/src/fadd.v index 89d75dc..6592b36 100644 --- a/src/fadd.v +++ b/src/fadd.v @@ -2,7 +2,7 @@ module fadd( - input [`BIT_W-1:0] a_operand, b_operand, // Inputs in the format of IEEE-`EXP_W-154 Representation. + input [`BIT_W-1:0] a_in, b_in, // Inputs in the format of IEEE-`EXP_W-154 Representation. output [`BIT_W-1:0] result // Outputs in the format of IEEE-`EXP_W-154 Representation. ); @@ -20,38 +20,47 @@ wire [`EXP_W-1:0] exponent_b_add; wire [`M_W+1:0] significand_add; wire [`BIT_W-2:0] add_sum; +wire [`EXP_W-1:0] exp_a, exp_b; -//for operations always operand_a must not be less than b_operand -assign {operand_a,operand_b} = (a_operand[`BIT_W-2:0] < b_operand[`BIT_W-2:0]) ? {b_operand,a_operand} : {a_operand,b_operand}; -assign exp_a = operand_a[`BIT_W-2:`M_W]; -assign exp_b = operand_b[`BIT_W-2:`M_W]; +//for operations always operand_a must not be less than b_in +assign {operand_a,operand_b} = (a_in[`BIT_W-2:0] < b_in[`BIT_W-2:0]) ? {b_in,a_in} : {a_in,b_in}; + +assign exp_a = operand_a[`BIT_W-2:`M_W]; // extract exponent from operand_a +assign exp_b = operand_b[`BIT_W-2:`M_W]; // extract exponent from operand_b //Exception flag sets 1 if either one of the exponent is 255. assign Exception = (&operand_a[`BIT_W-2:`M_W]) | (&operand_b[`BIT_W-2:`M_W]); -assign output_sign = operand_a[`BIT_W-1] ; +assign output_sign = operand_a[`BIT_W-1] ; // since the operand_a is always greater than operand_b, the sign of the result will be same as operand_a. +//operation_sub_addBar is 1 if we are doing subtraction else 0. assign operation_sub_addBar = ~(operand_a[`BIT_W-1] ^ operand_b[`BIT_W-1]); //Assigining significand values according to Hidden Bit. -assign significand_a = {1'b1,operand_a[`M_W-1:0]}; -assign significand_b = {1'b1,operand_b[`M_W-1:0]}; +assign significand_a = {1'b1,operand_a[`M_W-1:0]}; // expand the mantissa by 1 bit before multiplication since its always implied +assign significand_b = {1'b1,operand_b[`M_W-1:0]}; // same as above //Evaluating Exponent Difference assign exponent_diff = operand_a[`BIT_W-2:`M_W] - operand_b[`BIT_W-2:`M_W]; -//Shifting significand_b according to exponent_diff +//Shifting significand_b to the right according to exponent_diff. Exapmle: if we have 1.0101 >> 2 = 0.0101 then exponent_diff = 2 and significand_b_add = significand_b >> exponent_diff assign significand_b_add = significand_b >> exponent_diff; +//Adding exponent_diff to exponent_b. Exapmle: if we have 1.0101 << 2 = 101.01 then exponent_diff = 2 and exponent_b_add = exponent_b + exponent_diff assign exponent_b_add = operand_b[`BIT_W-2:`M_W] + exponent_diff; //------------------------------------------------ADD BLOCK------------------------------------------// +//if we are adding(operation_sub_addBar=1) need to add significand_b_add to significand_a. +//Or sets the significand to zero if the signs are different(this means we are doing subtraction), effectively determining the core operation of the floating-point addition based on the sign of the operands. assign significand_add = ( operation_sub_addBar) ? (significand_a + significand_b_add) : {(`M_W+2){1'b0}}; -//Result will be equal to Most `M_W bits if carry generates else it will be Least `M_W-1 bits. +//Taking care of the resulting mantissa. +//If there is a carry, then the result is normalized by shifting the significand right by one bit(because its implied) and incrementing the exponent by one. +//If there is no carry, we just use the result of the addition, and we have `M_W-1:0 due to the fact that we are using the hidden bit(implied 1). assign add_sum[`M_W-1:0] = significand_add[`M_W+1] ? significand_add[`M_W:1] : significand_add[`M_W-1:0]; +// Taking care of the resulting exponent. //If carry generates in sum value then exponent must be added with 1 else feed as it is. assign add_sum[`BIT_W-2:`M_W] = significand_add[`M_W+1] ? (1'b1 + operand_a[`BIT_W-2:`M_W]) : operand_a[`BIT_W-2:`M_W]; diff --git a/src/fmul.v b/src/fmul.v index 1dc253f..dd91d24 100644 --- a/src/fmul.v +++ b/src/fmul.v @@ -17,7 +17,7 @@ module fmul( // Multiplication logic always @* begin - mul_fix_out = {1'b1, a_in[`M_W-1:0]} * {1'b1, b_in[`M_W-1:0]}; + mul_fix_out = {1'b1, a_in[`M_W-1:0]} * {1'b1, b_in[`M_W-1:0]}; //extend the mantissa by 1 bit before multiplication end // Zero check @@ -29,24 +29,34 @@ module fmul( end end - // Generate M + // Generate Mantissa. We are only considering the most significat bits of the product to generate the mantissa. always @* begin + //select two MSBs of the product case(mul_fix_out[`MULT_W-1:`MULT_W-2]) - 2'b01: M_result = mul_fix_out[`MULT_W-3:`M_W]; - 2'b10: M_result = mul_fix_out[`MULT_W-2:`M_W+1]; - 2'b11: M_result = mul_fix_out[`MULT_W-2:`M_W+1]; - default: M_result = mul_fix_out[`MULT_W-2:`M_W+1]; + //Example: If mul_fix_out is 8 bits wide and represents 01xxxxxx (binary), it extracts xxxxxx, assuming the MSBs are 01 + 2'b01: M_result = mul_fix_out[`MULT_W-3:`M_W]; //MSB is dropped(as it is always 1) + //In 2'b10 or 2'b11 case: 10yyyyyy → Shift → 0yyyyyy (Extract yyyyyy) + 2'b10: M_result = mul_fix_out[`MULT_W-2:`M_W+1]; // Between two and just under 4. product larger than normalized range, so we need to shift right + 2'b11: M_result = mul_fix_out[`MULT_W-2:`M_W+1]; // same as line above. + default: M_result = mul_fix_out[`MULT_W-2:`M_W+1]; // default same as two lines above endcase end // Overflow check always @* begin + //Different cases for overflow: + //1. If either of the inputs is zero, then the result is zero and there is no overflow. + //2. Underflow check: If the sum of the exponents is less than the minimum exponent, then the result is zero and there is no overflow. {2'b0,{(EXP_W-1){1'b1}}} is the minimum exponent(001111111 in case of 32bit float) + //3. Overflow check: If the sum of the exponents is greater than the maximum exponent, then the result is infinity and there is overflow. EXP_MAX is the maximum exponent. overflow = (zero_check || ({1'b0, a_in[`BIT_W-2:`M_W]} + {1'b0, b_in[`BIT_W-2:`M_W]} + {{`EXP_W{1'b0}}, mul_fix_out[`MULT_W-1]}) < {2'b0,{(`EXP_W-1){1'b1}}} || ({1'b0, a_in[`BIT_W-2:`M_W]} + {1'b0, b_in[`BIT_W-2:`M_W]} + {8'd0, mul_fix_out[`MULT_W-1]}) > `EXP_MAX); if (~zero_check) begin if (overflow) begin e_result0 = {(`EXP_W+1){1'b1}}; end else begin + //1. We extend the exponent by 1 bit because the result of addition of two exponents can be 1 bit larger than the exponent itself. + //2. We add the MSB of the mantissa multiplication(before normalization) to the exponent sum to account for the shifting of the mantissa. + //3. We subtract the bias from the exponent sum to get the final exponent because just adding two exponents would give us exp1 + exp2 + 2 x bias. e_result0 = ({1'b0, a_in[`BIT_W-2:`M_W]} + {1'b0, b_in[`BIT_W-2:`M_W]} + {{`EXP_W{1'b0}}, mul_fix_out[`MULT_W-1]}) - {2'b0,{(`EXP_W-1){1'b1}}}; end end else begin diff --git a/tb/block_tb.v b/tb/block_tb.v deleted file mode 100644 index 7bd07c2..0000000 --- a/tb/block_tb.v +++ /dev/null @@ -1,77 +0,0 @@ - -`timescale 1ns/1ps - -module block_tb; - - reg [31:0] inp_north, inp_west, weight_in; - reg clk, rst, compute, weight_en; - wire [31:0] outp_south, outp_east; - - // Instantiate the block module - block uut ( - .inp_north(inp_north), - .inp_west(inp_west), - .weight_in(weight_in), - .outp_south(outp_south), - .outp_east(outp_east), - .clk(clk), - .rst(rst), - .compute(compute), - .weight_en(weight_en) - ); - - // Clock generation - initial begin - clk = 0; - forever #5 clk = ~clk; // Generate a clock with 10ns period (100MHz) - end - - // Test stimulus - initial begin - // Initialize inputs - rst = 1; // Assert reset - weight_en = 0; - compute = 0; - inp_north = 0; - inp_west = 0; - weight_in = 0; - - // Wait for a few clock cycles - #(20); - - // Deassert reset - rst = 0; - weight_en = 1; - weight_in = 32'd13; // Load a sample weight - inp_west = 32'd2; // Load a sample activation - - // Wait for the weight to be loaded - #(10); - weight_en = 0; - - // Start computation - compute = 1; - #(10); - inp_west = 32'd3; - inp_north = 32'd5; // Load a sample partial sum - - // Continue the simulation for several cycles to observe behavior - // This should be expanded with specific cases and assertions as needed - #(100); - - // Finish the simulation - $finish; - end - - // Optional: Monitor outputs and important signal transitions - initial begin - $monitor("Time = %t, rst = %b, compute = %b, weight_en = %b, inp_north = %h, inp_west = %h, weight_in = %h, outp_south = %h, outp_east = %h", - $time, rst, compute, weight_en, inp_north, inp_west, weight_in, outp_south, outp_east); - end - - initial begin - $dumpfile("block_wave.vcd"); - $dumpvars(0, block_tb); - end - -endmodule diff --git a/tb/fmul_tb.v b/tb/fmul_tb.v deleted file mode 100644 index 345bca2..0000000 --- a/tb/fmul_tb.v +++ /dev/null @@ -1,52 +0,0 @@ -`timescale 1ns / 1ps - -module fmul_tb; - - `define BIT_W 32 - - // Inputs - reg [`BIT_W-1:0] a_in; - reg [`BIT_W-1:0] b_in; - - // Outputs - wire [`BIT_W-1:0] result; - - // Instantiate the Unit Under Test (UUT) - fmul uut ( - .a_in(a_in), - .b_in(b_in), - .result(result) - ); - - initial begin - // Initialize Inputs - a_in = 0; - b_in = 0; - - // Wait 100 ns for global reset to finish - #100; - - // Apply test cases - // Note: You will need to replace these with actual test values that are meaningful for your application. - // These are just placeholders to demonstrate the structure of the testbench. - - a_in = 32'b01000000110000000000000000000000; // Replace 'xxxxxxxx' with actual test data - b_in = 32'b01000000111000000000000000000000; // Replace 'xxxxxxxx' with actual test data - #10; // Wait for some time - - // a_in = 32'hxxxxxxxx; // Next test data - // b_in = 32'hxxxxxxxx; - // #10; - - // Add as many test cases as needed to thoroughly test your module - - // Finish the simulation - $finish; - end - - initial begin - $dumpfile("fmul_wave.vcd"); - $dumpvars(0, fmul_tb); - end - -endmodule diff --git a/tb/spi_slave_tb.v b/tb/spi_slave_tb.v deleted file mode 100644 index 7580b7d..0000000 --- a/tb/spi_slave_tb.v +++ /dev/null @@ -1,79 +0,0 @@ -`timescale 1ns / 1ps - -module spi_slave_tb; - - reg clk; - reg rst; - reg ss; - reg mosi; - wire miso; - reg sck; - wire done; - reg [7:0] din; - wire [7:0] dout; - - // Instantiate the Unit Under Test (UUT) - spi_slave uut ( - .clk(clk), - .rst(rst), - .ss(ss), - .mosi(mosi), - .miso(miso), - .sck(sck), - .done(done), - .din(din), - .dout(dout) - ); - - // Clock generation - always #5 clk = (clk === 1'b0); // 100MHz clock - - // Task to upload data - task upload_data; - input [7:0] data; - integer i; - begin - ss <= 1'b0; // Assert slave select - for (i=7; i>=0; i=i-1) begin - mosi <= data[i]; // Set MOSI to the current bit of data - #10; // Wait half period for stability - sck <= 1'b1; // Clock high - #10; // Complete the clock period - sck <= 1'b0; // Clock low - end - ss <= 1'b1; // Deassert slave select - #20; // Wait some time after data upload - end - endtask - - // Test sequence - initial begin - // Initialize inputs - clk <= 0; - rst <= 1; - ss <= 1; - mosi <= 0; - sck <= 0; - din <= 8'hAA; // Example data to load into the slave - - // Dump file - $dumpfile("spi_slave.vcd"); - $dumpvars(0, spi_slave_tb); - - // Reset the system - #15; - rst <= 0; - #10; - rst <= 1; - #20; - - // Upload data - upload_data(8'h55); // Send 0x55 as an example - upload_data(8'h3C); // Send another byte - - // Finish simulation - #100; - $finish; - end - -endmodule diff --git a/tb/systolic_array_tb.v b/tb/systolic_array_tb.v deleted file mode 100644 index 7b7077a..0000000 --- a/tb/systolic_array_tb.v +++ /dev/null @@ -1,92 +0,0 @@ -`timescale 1ns / 1ps - -module systolic_array_tb; - - reg [31:0] inp_west0, inp_west3, inp_west6; - reg [31:0] inp_weight0, inp_weight1, inp_weight2, inp_weight3, - inp_weight4, inp_weight5, inp_weight6, inp_weight7, inp_weight8; - reg clk, rst, compute, weight_en; - - // Instantiate the Unit Under Test (UUT) - systolic_array uut ( - .inp_west0(inp_west0), .inp_west3(inp_west3), .inp_west6(inp_west6), - .inp_weight0(inp_weight0), .inp_weight1(inp_weight1), .inp_weight2(inp_weight2), - .inp_weight3(inp_weight3), .inp_weight4(inp_weight4), .inp_weight5(inp_weight5), - .inp_weight6(inp_weight6), .inp_weight7(inp_weight7), .inp_weight8(inp_weight8), - .clk(clk), .rst(rst), .compute(compute), .weight_en(weight_en) - ); - - // Clock generation - initial begin - clk = 0; - forever #10 clk = ~clk; // 50 MHz clock - end - - // Test sequence - initial begin - // Initialize Inputs - rst = 1; - compute = 0; - weight_en = 0; - // Reset the system - #100; - rst = 0; - - // Load the weight matrix - weight_en = 1; - inp_weight0 = 32'd1; - inp_weight1 = 32'd2; - inp_weight2 = 32'd3; - inp_weight3 = 32'd4; - inp_weight4 = 32'd5; - inp_weight5 = 32'd6; - inp_weight6 = 32'd7; - inp_weight7 = 32'd8; - inp_weight8 = 32'd9; - #20; - weight_en = 0; // Disable weight loading - - // Input activation matrix - inp_west0 = 32'd1; - inp_west3 = 32'd0; - inp_west6 = 32'd0; - compute = 1; // Start computation - #20; - - inp_west0 = 32'd4; - inp_west3 = 32'd2; - inp_west6 = 32'd0; - #20; - inp_west0 = 32'd7; - inp_west3 = 32'd5; - inp_west6 = 32'd3; - #20; - inp_west0 = 32'd0; - inp_west3 = 32'd8; - inp_west6 = 32'd6; - #20; - inp_west0 = 32'd0; - inp_west3 = 32'd0; - inp_west6 = 32'd9; - // Observe the output for a few cycles - // Note: You would need additional logic to read out the final results from the array - #100; - - compute = 0; // Stop computation - #20; - - // Add your own checks to validate the outputs - // This will depend on how you decide to capture and observe the outputs - // from the systolic array. - - // Finish simulation - $finish; - end - - - initial begin - $dumpfile("systolic_array_wave.vcd"); - $dumpvars(0, systolic_array_tb); - end - -endmodule diff --git a/tests/Makefile b/tests/Makefile index 87acaa0..d7b01e3 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -10,10 +10,9 @@ VERILOG_SOURCES = ../src/define.v \ ../src/systolic_array.v # You will have to create separate test rules for each module -# For example, to test fmul.v TOPLEVEL_LANG = verilog -TOPLEVEL = fmul -MODULE = test_fmul +TOPLEVEL = systolic_array +MODULE = test_systolic_array include $(shell cocotb-config --makefiles)/Makefile.sim diff --git a/tests/test_block.py b/tests/test_block.py index e69de29..2b5ba52 100644 --- a/tests/test_block.py +++ b/tests/test_block.py @@ -0,0 +1,42 @@ +import cocotb +from cocotb.triggers import Timer, RisingEdge +from cocotb.clock import Clock +from cocotb.binary import BinaryValue + +@cocotb.test() +async def block_test(dut): + # Clock generation + clock = Clock(dut.clk, 10, units="ns") # 100 MHz clock + cocotb.fork(clock.start()) + + # Initialize inputs + dut.rst.value = 1 # Assert reset + dut.weight_en.value = 0 + dut.compute.value = 0 + dut.inp_north.value = 0 + dut.inp_west.value = 0 + dut.weight_in.value = 0 + + # Wait for a few clock cycles + await Timer(20, units="ns") + + # Deassert reset and set initial test values + dut.rst.value = 0 + dut.weight_en.value = 1 + dut.weight_in.value = BinaryValue("01000001010100000000000000000000") # Sample weight + dut.inp_west.value = BinaryValue("01000000000000000000000000000000") # Sample activation + + # Wait for the weight to be loaded + await Timer(10, units="ns") + dut.weight_en.value = 0 + + # Start computation + dut.compute.value = 1 + + await Timer(10, units="ns") + assert dut.outp_east.value == BinaryValue("01000000000000000000000000000000"), "Mismatch in outp_east" + assert dut.outp_south.value == BinaryValue("01000001110100000000000000000000"), "Mismatch in outp_south" + + + # Finish simulation + # (Cocotb automatically closes the simulation after the test completes) diff --git a/tests/test_fadd.py b/tests/test_fadd.py index e69de29..3849b3b 100644 --- a/tests/test_fadd.py +++ b/tests/test_fadd.py @@ -0,0 +1,51 @@ +import cocotb +from cocotb.triggers import Timer +from cocotb.binary import BinaryValue + +@cocotb.test() +async def fmul_tb(dut): + """ Test for floating point addition """ + + # Define the bit width + BIT_W = 32 + + # Initialize Inputs + dut.a_in.value = 0 + dut.b_in.value = 0 + + # Wait 100 ns for global reset to finish + await Timer(10, units='ns') + + # Test two positive. 6 +7 = 13 + dut.a_in.value = BinaryValue("01000000110000000000000000000000") + dut.b_in.value = BinaryValue("01000000111000000000000000000000") + await Timer(1, units='ns') + assert dut.result.value == BinaryValue("01000001010100000000000000000000") + await Timer(9, units='ns') + + # Test one positive, and one negative number. 16 -5 = 11 + dut.a_in.value = BinaryValue("01000001100000000000000000000000") + dut.b_in.value = BinaryValue("11000000101000000000000000000000") + await Timer(1, units='ns') + assert dut.result.value == BinaryValue("01000001001100000000000000000000") + await Timer(9, units='ns') + + # Test one positive, and one negative number. 0.25 + 0.3 = 0.55 + dut.a_in.value = BinaryValue("00111110100000000000000000000000") + dut.b_in.value = BinaryValue("00111110100110011001100110011010") + await Timer(1, units='ns') + assert dut.result.value == BinaryValue("00111111000011001100110011001101") + await Timer(9, units='ns') + + dut.a_in.value = BinaryValue("00000000000000000000000000000000") + dut.b_in.value = BinaryValue("00000000000000000000000000000000") + await Timer(1, units='ns') + assert dut.result.value == BinaryValue("00000000000000000000000000000000") + await Timer(9, units='ns') + + dut.a_in.value = BinaryValue("11111111111111111111111111111111") + dut.b_in.value = BinaryValue("11111111111111111111111111111111") + await Timer(10, units='ns') + + # Finish the simulation + dut._log.info("Test completed") diff --git a/tests/test_fmul.py b/tests/test_fmul.py index 395d7f0..977cfd6 100644 --- a/tests/test_fmul.py +++ b/tests/test_fmul.py @@ -14,7 +14,7 @@ async def fmul_tb(dut): dut.b_in.value = 0 # Wait 100 ns for global reset to finish - await Timer(100, units='ns') + await Timer(10, units='ns') # Apply test cases dut.a_in.value = BinaryValue("01000000110000000000000000000000") # Replace with actual test data diff --git a/tests/test_systolic_array.py b/tests/test_systolic_array.py index e69de29..2620d9d 100644 --- a/tests/test_systolic_array.py +++ b/tests/test_systolic_array.py @@ -0,0 +1,71 @@ +import cocotb +from cocotb.triggers import RisingEdge, Timer +from cocotb.clock import Clock +from cocotb.binary import BinaryValue + +@cocotb.test() +async def systolic_array_test(dut): + # Clock generation + clock = Clock(dut.clk, 20, units="ns") # 50 MHz clock + cocotb.fork(clock.start()) + + # Initialize Inputs + dut.rst.value = 1 + dut.compute.value = 0 + dut.weight_en.value = 0 + + # Reset the system + await Timer(100, units="ns") + dut.rst.value = 0 + + # Load the weight matrix + dut.weight_en.value = 1 + dut.inp_weight0.value = BinaryValue("00111111100000000000000000000000") # 1 + dut.inp_weight1.value = BinaryValue("01000000000000000000000000000000") # 2 + dut.inp_weight2.value = BinaryValue("01000000010000000000000000000000") # 3 + dut.inp_weight3.value = BinaryValue("01000000100000000000000000000000") # 4 + dut.inp_weight4.value = BinaryValue("01000000101000000000000000000000") # 5 + dut.inp_weight5.value = BinaryValue("01000000110000000000000000000000") # 6 + dut.inp_weight6.value = BinaryValue("01000000111000000000000000000000") # 7 + dut.inp_weight7.value = BinaryValue("01000001000000000000000000000000") # 8 + dut.inp_weight8.value = BinaryValue("01000001000100000000000000000000") # 9 + await Timer(20, units="ns") + dut.weight_en.value = 0 # Disable weight loading + + # Input activation matrix + dut.inp_west0.value = BinaryValue("00111111100000000000000000000000") # 1 + dut.inp_west3.value = BinaryValue("00000000000000000000000000000000") # 0 + dut.inp_west6.value = BinaryValue("00000000000000000000000000000000") # 0 + dut.compute.value = 1 # Start computation + await Timer(20, units="ns") + + # Continuing the input activation matrix sequence + dut.inp_west0.value = BinaryValue("01000000100000000000000000000000") + dut.inp_west3.value = BinaryValue("01000000000000000000000000000000") + dut.inp_west6.value = BinaryValue("00000000000000000000000000000000") + await Timer(20, units="ns") + + dut.inp_west0.value = BinaryValue("01000000111000000000000000000000") + dut.inp_west3.value = BinaryValue("01000000101000000000000000000000") + dut.inp_west6.value = BinaryValue("01000000010000000000000000000000") + await Timer(20, units="ns") + + dut.inp_west0.value = BinaryValue("00000000000000000000000000000000") + dut.inp_west3.value = BinaryValue("01000001000000000000000000000000") + dut.inp_west6.value = BinaryValue("01000000110000000000000000000000") + await Timer(20, units="ns") + + dut.inp_west0.value = BinaryValue("00000000000000000000000000000000") + dut.inp_west3.value = BinaryValue("00000000000000000000000000000000") + dut.inp_west6.value = BinaryValue("01000001000100000000000000000000") + await Timer(20, units="ns") + + # Observe the output for a few cycles + # Note: Need additional logic to read out the final results from the array + # This can involve checking the output signals of the systolic array + # For example: assert dut.output_signal.value == expected_value, "Mismatch in output" + await Timer(100, units="ns") + + # Stop computation + dut.compute.value = 0 + await Timer(20, units="ns") \ No newline at end of file