diff --git a/src/ariane.sv b/src/ariane.sv index 549e7ae65d4f0711f7fb1a1b3899dfb1698bc047..33752853c76c66d581a5a5b1908810ebeab6aee9 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -80,8 +80,14 @@ module ariane import ariane_pkg::*; #( // IF -> verifier // -------------- logic has_mem_access_if_verif; + logic has_ctrl_flow_if_icache; logic bsp_if_perf; + // -------------- + // IF <-> * + // -------------- + logic begin_spec_if_ic; + // -------------- // ID <-> ISSUE // -------------- @@ -94,11 +100,13 @@ module ariane import ariane_pkg::*; #( // ID -> verifier // -------------- logic has_mem_access_id_verif; + logic has_ctrl_flow_id_icache; // -------------- // ISSUE -> verifier // -------------- logic has_mem_access_is_verif; + logic has_ctrl_flow_is_icache; // -------------- // ISSUE <-> EX @@ -285,7 +293,9 @@ module ariane import ariane_pkg::*; #( .fetch_entry_valid_o ( fetch_valid_if_id ), .fetch_entry_ready_i ( fetch_ready_id_if ), .has_mem_access_o ( has_mem_access_if_verif ), + .has_cf_o ( has_ctrl_flow_if_icache ), .branch_speculation_o( bsp_if_perf ), + .begin_spec_o ( begin_spec_if_ic ), .* ); @@ -306,6 +316,7 @@ module ariane import ariane_pkg::*; #( .issue_entry_valid_o ( issue_entry_valid_id_issue ), .is_ctrl_flow_o ( is_ctrl_fow_id_issue ), .is_mem_instr_o ( has_mem_access_id_verif ), + .has_ctrl_flow_o ( has_ctrl_flow_id_icache ), .issue_instr_ack_i ( issue_instr_issue_id ), .priv_lvl_i ( priv_lvl ), @@ -378,6 +389,7 @@ module ariane import ariane_pkg::*; #( .commit_instr_o ( commit_instr_id_commit ), .commit_ack_i ( commit_ack ), .has_mem_access_o ( has_mem_access_is_verif ), + .has_control_flow_o ( has_ctrl_flow_is_icache ), .* ); @@ -660,12 +672,15 @@ module ariane import ariane_pkg::*; #( // IF .if_has_mem_access_i (has_mem_access_if_verif), + .if_has_cf_i (has_ctrl_flow_if_icache), // ID .id_has_mem_access_i (has_mem_access_id_verif), + .id_has_cf_i (has_ctrl_flow_id_icache), // IS .is_has_mem_access_i (has_mem_access_is_verif), + .is_has_cf_i (has_ctrl_flow_is_icache), // LSU .no_st_pending_commit_i (no_st_pending_ex), @@ -682,6 +697,11 @@ module ariane import ariane_pkg::*; #( // ------------------- `ifdef WT_DCACHE + logic icache_valid_spec, icache_bad_spec; + + assign icache_valid_spec = resolved_branch.valid && !(resolved_branch.is_mispredict) && (resolved_branch.cf_type != Jump); + assign icache_bad_spec = resolved_branch.valid && resolved_branch.is_mispredict; + // this is a cache subsystem that is compatible with OpenPiton wt_cache_subsystem #( .ArianeCfg ( ArianeCfg ) @@ -698,6 +718,9 @@ module ariane import ariane_pkg::*; #( .icache_dreq_i ( icache_dreq_if_cache ), .icache_dreq_o ( icache_dreq_cache_if ), .icache_stall_i ( icache_stall_ctrl ), + .icache_begin_spec_i ( begin_spec_if_ic ), + .icache_valid_spec_i ( icache_valid_spec ), + .icache_bad_spec_i ( icache_bad_spec ), // D$ .dcache_enable_i ( dcache_en_csr_nbdcache ), .dcache_flush_i ( dcache_flush_ctrl_cache ), diff --git a/src/branch_unit.sv b/src/branch_unit.sv index 2b4b3fce50bb8f7e676ddecbe7e59c0b8ced8cf0..1ef9b5389daa84a7d83fbcb8c61a7514d327c1a6 100644 --- a/src/branch_unit.sv +++ b/src/branch_unit.sv @@ -65,11 +65,11 @@ module branch_unit ( resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc; resolved_branch_o.is_taken = branch_comp_res_i; // check the outcome of the branch speculation - if (ariane_pkg::op_is_branch(fu_data_i.operator)) begin - resolved_branch_o.cf_type = ariane_pkg::Branch; + if (ariane_pkg::op_is_branch(fu_data_i.operator) && branch_comp_res_i != (branch_predict_i.cf == ariane_pkg::Branch)) begin // we mis-predicted the outcome // if the outcome doesn't match we've got a mis-predict - resolved_branch_o.is_mispredict = branch_comp_res_i != (branch_predict_i.cf == ariane_pkg::Branch); + resolved_branch_o.is_mispredict = 1'b1; + resolved_branch_o.cf_type = ariane_pkg::Branch; end if (fu_data_i.operator == ariane_pkg::JALR // check if the address of the jump register is correct and that we actually predicted diff --git a/src/cache_subsystem/lru_4way.sv b/src/cache_subsystem/lru_4way.sv new file mode 100644 index 0000000000000000000000000000000000000000..2a3235ca8783759106ebdb398d1c81d23c2840c9 --- /dev/null +++ b/src/cache_subsystem/lru_4way.sv @@ -0,0 +1,77 @@ +module lru_4way ( + input logic [1:0] hit_i, + input logic miss_i, + input logic [4:0] ages_i, + output logic [4:0] ages_o, + output logic [1:0] evicted_o, + output logic updated_o +); + + function automatic logic [1:0] ordered_2 (logic [1:0] fstway, logic [1:0] sndway); + return (fstway != 2'b00) ? 2'b00 : ((sndway == 2'b01) ? 2'b10 : 2'b01); + endfunction + + function automatic logic [1:0] ordered_3 (logic [1:0] fstway, logic [1:0] sndway); + return (sndway != 2'b11) ? 2'b11 : ((fstway == 2'b10) ? 2'b01 : 2'b10); + endfunction + + logic [3:0][1:0] ways, ordered_ways; + logic [2:0][1:0] new_ways; + logic [1:0][1:0] new_ways_ordered; + logic [1:0] predicted_3rd_way; + + always_comb begin : decode + ways[0] = ages_i[1:0]; + ways[1] = (ways[0] == 2'b00 && ages_i[3:2] == 2'b00) ? 2'b01 : ages_i[3:2]; + + ordered_ways[0] = (ways[0] < ways[1]) ? ways[0] : ways[1]; + ordered_ways[1] = (ways[0] < ways[1]) ? ways[1] : ways[0]; + + ordered_ways[2] = ordered_2(ordered_ways[0], ordered_ways[1]); + ordered_ways[3] = ordered_3(ordered_ways[0], ordered_ways[1]); + + ways[2] = (ages_i[4]) ? ordered_ways[3] : ordered_ways[2]; + ways[3] = (ages_i[4]) ? ordered_ways[2] : ordered_ways[3]; + end + + // assign ways[0] = ages_i[1:0]; + // assign ways[1] = (ages_i[1:0] == 0'b00 && ages_i[3:2] == 0'b00) ? 0'b01 : ages_i[3:2]; + + // assign ordered_ways[0] = (ways[0] < ways[1]) ? ways[0] : ways[1]; + // assign ordered_ways[1] = (ways[0] < ways[1]) ? ways[1] : ways[0]; + + // assign ordered_ways[2] = ordered_2(ordered_ways[0], ordered_ways[1]); + // assign ordered_ways[3] = ordered_3(ordered_ways[0], ordered_ways[1]); + + // assign ways[2] = (ages_i[4]) ? ordered_ways[3] : ordered_ways[2]; + // assign ways[3] = (ages_i[4]) ? ordered_ways[2] : ordered_ways[3]; + + assign evicted_o = ways[3]; + assign updated_o = miss_i || (ways[0] != hit_i) || (ways[1] != ages_i[3:2]); + + always_comb begin : encode + ages_o = ages_i; + new_ways = ways[2:0]; + + if (miss_i || ways[0] != hit_i) begin + if (miss_i || ways[1] != hit_i) begin + new_ways[0] = (miss_i) ? ways[3] : hit_i; + new_ways[1] = ways[0]; + new_ways[2] = ways[1]; + end else begin + new_ways[0] = ways[1]; + new_ways[1] = ways[0]; + end + end + + new_ways_ordered[0] = (new_ways[0] < new_ways[1]) ? new_ways[0] : new_ways[1]; + new_ways_ordered[1] = (new_ways[0] < new_ways[1]) ? new_ways[1] : new_ways[0]; + + predicted_3rd_way = ordered_2(new_ways_ordered[0], new_ways_ordered[1]); + if (new_ways[2] == predicted_3rd_way) begin + ages_o = {1'b0, new_ways[1], new_ways[0]}; + end else begin + ages_o = {1'b1, new_ways[1], new_ways[0]}; + end + end +endmodule diff --git a/src/cache_subsystem/wt_cache_subsystem.sv b/src/cache_subsystem/wt_cache_subsystem.sv index 36f7a2be0d361f1b26370863d2a319a8ad6de15d..73e4527773ab25e3bf27de778e0c4975e91b9e6a 100644 --- a/src/cache_subsystem/wt_cache_subsystem.sv +++ b/src/cache_subsystem/wt_cache_subsystem.sv @@ -35,6 +35,9 @@ module wt_cache_subsystem import ariane_pkg::*; import wt_cache_pkg::*; #( input icache_dreq_i_t icache_dreq_i, // to/from frontend output icache_dreq_o_t icache_dreq_o, input logic icache_stall_i, + input logic icache_begin_spec_i, + input logic icache_valid_spec_i, + input logic icache_bad_spec_i, // D$ // Cache management input logic dcache_enable_i, // from CSR @@ -91,7 +94,10 @@ module wt_cache_subsystem import ariane_pkg::*; import wt_cache_pkg::*; #( .mem_data_req_o ( icache_adapter_data_req ), .mem_data_ack_i ( adapter_icache_data_ack ), .mem_data_o ( icache_adapter ), - .stall_req_i ( icache_stall_i ) + .stall_req_i ( icache_stall_i ), + .begin_spec_i ( icache_begin_spec_i ), + .valid_spec_i ( icache_valid_spec_i ), + .bad_spec_i ( icache_bad_spec_i ) ); diff --git a/src/cache_subsystem/wt_icache.sv b/src/cache_subsystem/wt_icache.sv index 0b3cd9db77074fcb6f5ccd69abf6a46c53e2472c..36b78e2aef403f42509a1973b2302a52727a0a7c 100644 --- a/src/cache_subsystem/wt_icache.sv +++ b/src/cache_subsystem/wt_icache.sv @@ -27,7 +27,8 @@ module wt_icache import ariane_pkg::*; import wt_cache_pkg::*; #( parameter logic [CACHE_ID_WIDTH-1:0] RdTxId = 0, // ID to be used for read transactions - parameter ariane_pkg::ariane_cfg_t ArianeCfg = ariane_pkg::ArianeDefaultConfig // contains cacheable regions + parameter ariane_pkg::ariane_cfg_t ArianeCfg = ariane_pkg::ArianeDefaultConfig, // contains cacheable regions + parameter int SpecDepth = 16 ) ( input logic clk_i, input logic rst_ni, @@ -47,7 +48,10 @@ module wt_icache import ariane_pkg::*; import wt_cache_pkg::*; #( output logic mem_data_req_o, input logic mem_data_ack_i, output icache_req_t mem_data_o, - input logic stall_req_i + input logic stall_req_i, + input logic begin_spec_i, + input logic valid_spec_i, + input logic bad_spec_i ); // signals @@ -61,11 +65,10 @@ module wt_icache import ariane_pkg::*; import wt_cache_pkg::*; #( logic flush_d, flush_q; // used to register and signal pending flushes // replacement strategy - logic update_lfsr; // shift the LFSR + logic lru_miss; // evict the oldest cacheline in the set logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered - logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace - logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot) + logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way_d, repl_way_q; // way to replace (d/q) logic all_ways_valid; // we need to switch repl strategy since all are valid // invalidations / flushing @@ -88,6 +91,9 @@ module wt_icache import ariane_pkg::*; import wt_cache_pkg::*; #( logic [ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit + // LRU + logic lru_lock_d, lru_lock_q; + // cpmtroller FSM typedef enum logic[2:0] {FLUSH, IDLE, READ, MISS, TLB_MISS, KILL_ATRANS, KILL_MISS} state_e; state_e state_d, state_q; @@ -229,7 +235,7 @@ end else begin : gen_piton_offset if (flush_d) begin state_d = IDLE; // we have a hit or an exception output valid result - end else if ((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid) begin + end else if (!lru_lock_d && ((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid)) begin dreq_o.valid = ~dreq_i.kill_s2;// just don't output in this case state_d = IDLE; @@ -250,7 +256,7 @@ end else begin : gen_piton_offset // we have a miss / NC transaction end else if (dreq_i.kill_s2) begin state_d = IDLE; - end else begin + end else if (!lru_lock_d && !stall_req_i) begin cmp_en_d = 1'b0; // only count this as a miss if the cache is enabled, and // the address is cacheable @@ -260,6 +266,8 @@ end else begin : gen_piton_offset miss_o = ~paddr_is_nc; state_d = MISS; end + end else if (lru_lock_d || stall_req_i) begin + state_d = READ; end // bail out if this request is being killed (and we missed on the TLB) end else if (dreq_i.kill_s2 || flush_d) begin @@ -365,26 +373,26 @@ end else begin : gen_piton_offset assign vld_req = (flush_en || cache_rden) ? '1 : (mem_rtrn_i.inv.all && inv_en) ? '1 : (mem_rtrn_i.inv.vld && inv_en) ? icache_way_bin2oh(mem_rtrn_i.inv.way) : - repl_way_oh_q; + icache_way_bin2oh(repl_way_q); assign vld_wdata = (cache_wren) ? '1 : '0; assign vld_we = (cache_wren | inv_en | flush_en); // assign vld_req = (vld_we | cache_rden); + logic [$clog2(ICACHE_SET_ASSOC)-1:0] evicted; - // chose random replacement if all are valid - assign update_lfsr = cache_wren & all_ways_valid; - assign repl_way = (all_ways_valid) ? rnd_way : inv_way; - assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q; + // chose LRU replacement if all are valid + assign lru_miss = cache_wren & all_ways_valid; + assign repl_way = (all_ways_valid) ? evicted : inv_way; + assign repl_way_d = (cmp_en_q) ? repl_way : repl_way_q; // enable signals for memory arrays assign cl_req = (cache_rden) ? '1 : - (cache_wren) ? repl_way_oh_q : + (cache_wren) ? icache_way_bin2oh(repl_way_q) : '0; assign cl_we = cache_wren; - // find invalid cache line lzc #( .WIDTH ( ICACHE_SET_ASSOC ) @@ -394,18 +402,6 @@ end else begin : gen_piton_offset .empty_o ( all_ways_valid ) ); - // generate random cacheline index - lfsr_8bit #( - .WIDTH (ICACHE_SET_ASSOC) - ) i_lfsr ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .en_i ( update_lfsr ), - .refill_way_oh ( ), - .refill_way_bin ( rnd_way ) - ); - - /////////////////////////////////////////////////////// // tag comparison, hit generation /////////////////////////////////////////////////////// @@ -417,7 +413,6 @@ end else begin : gen_piton_offset assign cl_sel[i] = cl_rdata[i][{cl_offset_q,3'b0} +: FETCH_WIDTH]; end - lzc #( .WIDTH ( ICACHE_SET_ASSOC ) ) i_lzc_hit ( @@ -430,11 +425,64 @@ end else begin : gen_piton_offset mem_rtrn_i.data[{cl_offset_q,3'b0} +: FETCH_WIDTH]; /////////////////////////////////////////////////////// -// memory arrays and regs +// LRU /////////////////////////////////////////////////////// - logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata [ICACHE_SET_ASSOC-1:0]; + logic [4:0] new_ages, lru_sram_read, lru_sram_rread; + logic [1:0] lru_hit; + logic ages_updated, lru_wren; + + logic [$clog2(SpecDepth)-1:0] ptr_spec_d, ptr_spec_q; + logic [$clog2(SpecDepth)-1:0] ptr_backup_d, ptr_backup_q; + logic [SpecDepth-1:0][ICACHE_NUM_WORDS-1:0][4:0] lru_sram_d, lru_sram_q; + + assign ptr_spec_d = (bad_spec_i) ? ptr_backup_q : (begin_spec_i) ? ptr_spec_q + 1'b1 : ptr_spec_q; + assign ptr_backup_d = (valid_spec_i) ? ptr_backup_q + 1'b1 : ptr_backup_q; + + assign lru_hit = (cache_wren) ? repl_way_q : hit_idx; + assign lru_wren = |vld_req & (cache_rden | cache_wren) & ages_updated & dreq_o.valid; + assign lru_sram_rread = (|vld_rdata) ? lru_sram_read : '0; + + always_comb begin + lru_lock_d = lru_lock_q; + + if (begin_spec_i & ~bad_spec_i && (ptr_spec_d == ptr_backup_d)) begin + lru_lock_d = 1'b1; + end else if (valid_spec_i || bad_spec_i) begin + lru_lock_d = 1'b0; + end + end + + always_comb begin + lru_sram_d = lru_sram_q; + lru_sram_read = '0; + + if (!bad_spec_i && begin_spec_i) begin + lru_sram_d[ptr_spec_d] = lru_sram_q[ptr_spec_q]; + end + + if (cache_rden) begin + lru_sram_read = lru_sram_d[ptr_spec_d][vld_addr]; + end + + if (lru_wren) begin + lru_sram_d[ptr_spec_d][vld_addr] = ages_updated; + end + end + + lru_4way lru ( + .hit_i ( lru_hit ), + .miss_i ( lru_miss ), + .ages_i ( lru_sram_rread ), + .ages_o ( new_ages ), + .evicted_o ( evicted ), + .updated_o ( ages_updated ) + ); + +/////////////////////////////////////////////////////// +// memory arrays and regs +/////////////////////////////////////////////////////// for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_sram // Tag RAM @@ -474,7 +522,6 @@ end else begin : gen_piton_offset ); end - always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs if(!rst_ni) begin cl_tag_q <= '0; @@ -485,7 +532,11 @@ end else begin : gen_piton_offset flush_q <= '0; state_q <= IDLE; cl_offset_q <= '0; - repl_way_oh_q <= '0; + repl_way_q <= '0; + ptr_spec_q <= '0; + ptr_backup_q <= '0; + lru_lock_q <= '0; + lru_sram_q <= '0; end else begin cl_tag_q <= cl_tag_d; flush_cnt_q <= flush_cnt_d; @@ -495,7 +546,11 @@ end else begin : gen_piton_offset flush_q <= flush_d; state_q <= state_d; cl_offset_q <= cl_offset_d; - repl_way_oh_q <= repl_way_oh_d; + repl_way_q <= repl_way_d; + ptr_spec_q <= ptr_spec_d; + ptr_backup_q <= ptr_backup_d; + lru_lock_q <= lru_lock_d; + lru_sram_q <= lru_sram_d; end end @@ -521,6 +576,10 @@ end else begin : gen_piton_offset @(posedge clk_i) disable iff (!rst_ni) (!inv_en) |-> cache_rden |=> cmp_en_q |-> $onehot0(cl_hit)) else $fatal(1,"[l1 icache] cl_hit signal must be hot1"); + valid_or_bad_spec: assert property ( + @(posedge clk_i) disable iff (!rst_ni) !(valid_spec_i) || (!bad_spec_i)) + else $fatal(1,"[l1 icache] valid and bad speculation at the same time"); + // this is only used for verification! logic vld_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0]; logic [ariane_pkg::ICACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0]; @@ -548,11 +607,19 @@ end else begin : gen_piton_offset @(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test)) else $fatal(1,"[l1 icache] cannot allocate a CL that is already present in the cache"); - initial begin // assert wrong parameterizations assert (ICACHE_INDEX_WIDTH<=12) else $fatal(1,"[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages"); + + assert (ICACHE_SET_ASSOC==4) + else $fatal(1,"[l1 icache] cache must be 4-associative"); + + assert (2 ** $clog2(SpecDepth) == SpecDepth) + else $fatal(1,"[l1 icache] SpecDepth is not a power of 2"); + + assert (SpecDepth >= 2) + else $fatal(1,"[l1 icache] SpecDepth is lower than 2"); end `endif //pragma translate_on diff --git a/src/commit_stage.sv b/src/commit_stage.sv index 7ae9e2ab654b1411a71dc4d4c9724bf07b2edb2a..42e65b8e793a2b304759f67706433827674cfe9e 100644 --- a/src/commit_stage.sv +++ b/src/commit_stage.sv @@ -92,20 +92,22 @@ module commit_stage import ariane_pkg::*; #( always_comb begin : commit // default assignments commit_ack_o[0] = 1'b0; + commit_ack_o[1] = 1'b0; - // amo_valid_commit_o = 1'b0; + amo_valid_commit_o = 1'b0; we_gpr_o[0] = 1'b0; we_gpr_o[1] = 1'b0; we_fpr_o = '{default: 1'b0}; - // commit_lsu_o = 1'b0; - // commit_csr_o = 1'b0; + commit_lsu_o = 1'b0; + commit_csr_o = 1'b0; // amos will commit on port 0 wdata_o[0] = (amo_resp_i.ack) ? amo_resp_i.result[riscv::XLEN-1:0] : commit_instr_i[0].result; - // csr_op_o = ADD; // this corresponds to a CSR NOP - // csr_wdata_o = {riscv::XLEN{1'b0}}; + wdata_o[1] = commit_instr_i[1].result; + csr_op_o = ADD; // this corresponds to a CSR NOP + csr_wdata_o = {riscv::XLEN{1'b0}}; fence_i_o = 1'b0; - // fence_o = 1'b0; + fence_o = 1'b0; sfence_vma_o = 1'b0; csr_write_fflags_o = 1'b0; flush_commit_o = 1'b0; @@ -126,7 +128,7 @@ module commit_stage import ariane_pkg::*; #( // check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store) if (commit_lsu_ready_i) begin commit_ack_o[0] = 1'b1; - // commit_lsu_o = 1'b1; + commit_lsu_o = 1'b1; // stall in case the store buffer is not able to accept anymore instructions end else begin commit_ack_o[0] = 1'b0; @@ -137,7 +139,7 @@ module commit_stage import ariane_pkg::*; #( // --------- if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin // write the CSR with potential exception flags from retiring floating point instruction - // csr_wdata_o = {{riscv::XLEN-5{1'b0}}, commit_instr_i[0].ex.cause[4:0]}; + csr_wdata_o = {{riscv::XLEN-5{1'b0}}, commit_instr_i[0].ex.cause[4:0]}; csr_write_fflags_o = 1'b1; commit_ack_o[0] = 1'b1; end @@ -148,10 +150,10 @@ module commit_stage import ariane_pkg::*; #( // throw an exception if (commit_instr_i[0].fu == CSR) begin // write the CSR file - // csr_op_o = commit_instr_i[0].op; - // csr_wdata_o = commit_instr_i[0].result; + csr_op_o = commit_instr_i[0].op; + csr_wdata_o = commit_instr_i[0].result; if (!csr_exception_i.valid) begin - // commit_csr_o = 1'b1; + commit_csr_o = 1'b1; wdata_o[0] = csr_rdata_i; commit_ack_o[0] = 1'b1; end else begin @@ -191,7 +193,7 @@ module commit_stage import ariane_pkg::*; #( if (commit_instr_i[0].op == FENCE) begin commit_ack_o[0] = no_st_pending_i; // tell the controller to flush the D$ - // fence_o = no_st_pending_i; + fence_o = no_st_pending_i; end // ------------------ // AMO @@ -201,27 +203,49 @@ module commit_stage import ariane_pkg::*; #( commit_ack_o[0] = amo_resp_i.ack; // flush the pipeline flush_commit_o = amo_resp_i.ack; - // amo_valid_commit_o = 1'b1; + amo_valid_commit_o = 1'b1; we_gpr_o[0] = amo_resp_i.ack; end end - end - logic can_commit_instr; - assign can_commit_instr = commit_instr_i[0].valid && !commit_instr_i[0].ex.valid && !halt_i; + if (NR_COMMIT_PORTS > 1) begin + // ----------------- + // Commit Port 2 + // ----------------- + // check if the second instruction can be committed as well and the first wasn't a CSR instruction + // also if we are in single step mode don't retire the second instruction + if (commit_ack_o[0] && commit_instr_i[1].valid + && !halt_i + && !(commit_instr_i[0].fu inside {CSR}) + && !flush_dcache_i + && !instr_0_is_amo + && !single_step_i) begin + // only if the first instruction didn't throw an exception and this instruction won't throw an exception + // and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT, FPU or FPU_VEC + if (!exception_o.valid && !commit_instr_i[1].ex.valid + && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU, FPU_VEC})) begin - assign csr_op_o = (can_commit_instr && commit_instr_i[0].fu == CSR) ? commit_instr_i[0].op : ADD; - assign csr_wdata_o = (can_commit_instr && - commit_instr_i[0].fu inside {FPU, FPU_VEC}) ? {{riscv::XLEN-5{1'b0}}, commit_instr_i[0].ex.cause[4:0]} : - (can_commit_instr && commit_instr_i[0].fu == CSR) ? commit_instr_i[0].result : - {riscv::XLEN{1'b0}}; + if (is_rd_fpr(commit_instr_i[1].op)) + we_fpr_o[1] = 1'b1; + else + we_gpr_o[1] = 1'b1; - assign commit_csr_o = can_commit_instr && commit_instr_i[0].fu == CSR && !csr_exception_i.valid; + commit_ack_o[1] = 1'b1; - assign commit_lsu_o = can_commit_instr && commit_instr_i[0].fu == STORE && !instr_0_is_amo && commit_lsu_ready_i; - assign amo_valid_commit_o = can_commit_instr && RVA && instr_0_is_amo; + // additionally check if we are retiring an FPU instruction because we need to make sure that we write all + // exception flags + if (commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin + if (csr_write_fflags_o) + csr_wdata_o = {{riscv::XLEN-5{1'b0}}, (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])}; + else + csr_wdata_o = {{riscv::XLEN-5{1'b0}}, commit_instr_i[1].ex.cause[4:0]}; - assign fence_o = can_commit_instr && commit_instr_i[0].op == FENCE; + csr_write_fflags_o = 1'b1; + end + end + end + end + end // ----------------------------- // Exception & Interrupt Logic diff --git a/src/controller.sv b/src/controller.sv index 6e8a1dc2d22a908a59525af4498bdd2ef74598aa..ef40190d3b59c1f172b611358f2c6db9ba4d620d 100644 --- a/src/controller.sv +++ b/src/controller.sv @@ -97,17 +97,13 @@ module controller import ariane_pkg::*; ( flush_id_o = 1'b1; flush_ex_o = 1'b1; flush_icache_o = 1'b1; -// this is not needed in the case since we -// have a write-through cache in this case -`ifndef WT_DCACHE + // Force D$ flushing to avoid issues on the FPGA. flush_dcache = 1'b1; fence_active_d = 1'b1; -`endif + + flush_bp_o = 1'b1; end -// this is not needed in the case since we -// have a write-through cache in this case -`ifndef WT_DCACHE // wait for the acknowledge here if (flush_dcache_ack_i && fence_active_q) begin fence_active_d = 1'b0; @@ -115,7 +111,7 @@ module controller import ariane_pkg::*; ( end else if (fence_active_q) begin flush_dcache = 1'b1; end -`endif + // --------------------------------- // SFENCE.VMA // --------------------------------- diff --git a/src/csr_buffer.sv b/src/csr_buffer.sv index 3937aa3b239f25d0650c4f8479832213890b8503..15108f780054bef057b3b715d67df4a447113dc2 100644 --- a/src/csr_buffer.sv +++ b/src/csr_buffer.sv @@ -28,37 +28,46 @@ module csr_buffer import ariane_pkg::*; ( // to CSR file output logic [11:0] csr_addr_o // CSR address to commit stage ); - // This is a single entry store buffer for the address of the CSR + // this is a single entry store buffer for the address of the CSR // which we are going to need in the commit stage - logic [11:0] csr_address_n, csr_address_q; - logic csr_valid_n, csr_valid_q; + struct packed { + logic [11:0] csr_address; + logic valid; + } csr_reg_n, csr_reg_q; - // Write logic - // Clear the buffer if we flushed or if there is a commit with no new valid instruction - assign csr_valid_n = (flush_i) ? 1'b0 : - (csr_commit_i && ~csr_valid_i) ? 1'b0 : - (csr_valid_i) ? 1'b1 : - csr_valid_q; + // control logic, scoreboard signals + assign csr_result_o = fu_data_i.operand_a; + assign csr_addr_o = csr_reg_q.csr_address; - // Store the CSR address if we got a valid from the scoreboard. - assign csr_address_n = (csr_valid_i) ? fu_data_i.operand_b[11:0] : csr_address_q; - - // If we have a valid uncomitted CSR req or are just getting one without a - // commit in, we are not ready. - assign csr_ready_o = ~((csr_valid_q || csr_valid_i) && ~csr_commit_i); - - // Control logic, scoreboard signals - assign csr_result_o = fu_data_i.operand_a; - assign csr_addr_o = csr_address_q; - - // Sequential process + // write logic + always_comb begin : write + csr_reg_n = csr_reg_q; + // by default we are ready + csr_ready_o = 1'b1; + // if we have a valid uncomiited csr req or are just getting one WITHOUT a commit in, we are not ready + if ((csr_reg_q.valid || csr_valid_i) && ~csr_commit_i) + csr_ready_o = 1'b0; + // if we got a valid from the scoreboard + // store the CSR address + if (csr_valid_i) begin + csr_reg_n.csr_address = fu_data_i.operand_b[11:0]; + csr_reg_n.valid = 1'b1; + end + // if we get a commit and no new valid instruction -> clear the valid bit + if (csr_commit_i && ~csr_valid_i) begin + csr_reg_n.valid = 1'b0; + end + // clear the buffer if we flushed + if (flush_i) + csr_reg_n.valid = 1'b0; + end + // sequential process always_ff @(posedge clk_i or negedge rst_ni) begin - if (~rst_ni) begin - csr_valid_q <= 1'b0; - csr_address_q <= '0; + if(~rst_ni) begin + csr_reg_q <= '{default: 0}; end else begin - csr_valid_q <= csr_valid_n; - csr_address_q <= csr_address_n; + csr_reg_q <= csr_reg_n; end end + endmodule diff --git a/src/decoder.sv b/src/decoder.sv index 4ea5de725a8582604c894313382488af6abcbbe7..e0c551e79008c34dca3810b3d9a0db2cd99958f4 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -80,10 +80,6 @@ module decoder import ariane_pkg::*; ( instruction_o.trans_id = '0; instruction_o.is_compressed = is_compressed_i; instruction_o.use_zimm = 1'b0; - instruction_o.use_rs1_fpr = 1'b0; - instruction_o.use_rs2_fpr = 1'b0; - instruction_o.use_imm_fpr = 1'b0; - instruction_o.use_rd_fpr = 1'b0; instruction_o.bp = branch_predict_i; ecall = 1'b0; ebreak = 1'b0; @@ -1009,11 +1005,6 @@ module decoder import ariane_pkg::*; ( default: illegal_instr = 1'b1; endcase - - instruction_o.use_rs1_fpr = is_rs1_fpr(instruction_o.op); - instruction_o.use_rs2_fpr = is_rs2_fpr(instruction_o.op); - instruction_o.use_imm_fpr = is_imm_fpr(instruction_o.op); - instruction_o.use_rd_fpr = is_rd_fpr(instruction_o.op); end end diff --git a/src/frontend/bht.sv b/src/frontend/bht.sv index e57c34bb7fa0de0fbcf2d4b8b22036f6a34b1e5f..9eae69d3559a21f07f8af295c91c3e5044e47c7b 100644 --- a/src/frontend/bht.sv +++ b/src/frontend/bht.sv @@ -34,6 +34,8 @@ module bht #( localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH); // number of bits we should use for prediction localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS; + // we are not interested in all bits of the address + unread i_unread (.d_i(|vpc_i)); struct packed { logic valid; diff --git a/src/frontend/btb.sv b/src/frontend/btb.sv index 23f365fd460c91e22542f154b2221ffabd955ba1..86eeadc0ac34e6079c9238da41a36b23708cf199 100644 --- a/src/frontend/btb.sv +++ b/src/frontend/btb.sv @@ -36,6 +36,8 @@ module btb #( localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS; // prevent aliasing to degrade performance localparam ANTIALIAS_BITS = 8; + // we are not interested in all bits of the address + unread i_unread (.d_i(|vpc_i)); // typedef for all branch target entries // we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects diff --git a/src/frontend/frontend.sv b/src/frontend/frontend.sv index 813294fe48088af6b3ec22fe7e57a9cd9c0e20a7..d0f7b5ca4e9119c53da0d7442e4e0fb8610487a0 100644 --- a/src/frontend/frontend.sv +++ b/src/frontend/frontend.sv @@ -46,7 +46,9 @@ module frontend import ariane_pkg::*; #( input logic fetch_entry_ready_i, // ID acknowledged this instruction output logic has_mem_access_o, - output logic branch_speculation_o + output logic branch_speculation_o, + output logic begin_spec_o, + output logic has_cf_o ); // Instruction Cache Registers, from I$ logic [FETCH_WIDTH-1:0] icache_data_q; @@ -96,7 +98,7 @@ module frontend import ariane_pkg::*; #( ras_t ras_predict; // branch-predict update - logic is_mispredict; + logic is_correct_predict, is_mispredict; logic ras_push, ras_pop; logic [riscv::VLEN-1:0] ras_update; @@ -145,6 +147,7 @@ module frontend import ariane_pkg::*; #( logic [INSTR_PER_FETCH-1:0] is_jump; logic [INSTR_PER_FETCH-1:0] is_return; logic [INSTR_PER_FETCH-1:0] is_jalr; + logic [INSTR_PER_FETCH-1:0] is_cf; for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin // branch history table -> BHT @@ -156,9 +159,14 @@ module frontend import ariane_pkg::*; #( // unconditional jumps with known target -> immediately resolved assign is_jump[i] = instruction_valid[i] & (rvi_jump[i] | rvc_jump[i]); // unconditional jumps with unknown target -> BTB - assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]); + assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & ~is_call[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]); + + // cf that needs a prediction + assign is_cf[i] = instruction_valid[i] & (rvi_branch[i] | rvc_branch[i] | rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]); end + assign begin_spec_o = (|is_cf) & (~replay); + // taken/not taken always_comb begin taken_rvi_cf = '0; @@ -173,13 +181,12 @@ module frontend import ariane_pkg::*; #( // lower most prediction gets precedence for (int i = INSTR_PER_FETCH - 1; i >= 0 ; i--) begin - ras_pop = 1'b0; - ras_push = 1'b0; - unique case ({is_branch[i], is_return[i], is_jump[i], is_jalr[i]}) 4'b0000:; // regular instruction e.g.: no branch // unconditional jump to register, we need the BTB to resolve this 4'b0001: begin + ras_pop = 1'b0; + ras_push = 1'b0; if (btb_prediction_shifted[i].valid) begin predict_address = btb_prediction_shifted[i].target_address; cf_type[i] = ariane_pkg::JumpR; @@ -187,6 +194,8 @@ module frontend import ariane_pkg::*; #( end // its an unconditional jump to an immediate 4'b0010: begin + ras_pop = 1'b0; + ras_push = 1'b0; taken_rvi_cf[i] = rvi_jump[i]; taken_rvc_cf[i] = rvc_jump[i]; cf_type[i] = ariane_pkg::Jump; @@ -195,11 +204,14 @@ module frontend import ariane_pkg::*; #( 4'b0100: begin // make sure to only alter the RAS if we actually consumed the instruction ras_pop = ras_predict.valid & instr_queue_consumed[i]; + ras_push = 1'b0; predict_address = ras_predict.ra; cf_type[i] = ariane_pkg::Return; end // branch prediction 4'b1000: begin + ras_pop = 1'b0; + ras_push = 1'b0; // if we have a valid dynamic prediction use it if (bht_prediction_shifted[i].valid) begin taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken; @@ -226,6 +238,12 @@ module frontend import ariane_pkg::*; #( predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]); end end + + if (is_mispredict) begin + ras_update = resolved_branch_i.pc + 4; // TODO fix for compressed instructions + ras_push = 1'b0; + ras_pop = resolved_branch_i.cf_type == Return; + end end // or reduce struct always_comb begin @@ -235,6 +253,8 @@ module frontend import ariane_pkg::*; #( // contains a valid prediction. for (int i = 0; i < INSTR_PER_FETCH; i++) bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid)); end + + assign is_correct_predict = resolved_branch_i.valid & !(resolved_branch_i.is_mispredict) & (resolved_branch_i.cf_type != Jump); assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict; // Cache interface @@ -355,16 +375,19 @@ module frontend import ariane_pkg::*; #( end end - ras #( - .DEPTH ( ArianeCfg.RASDepth ) + sras #( + .DEPTH ( ArianeCfg.RASDepth ) ) i_ras ( .clk_i, .rst_ni, - .flush_i( flush_bp_i ), - .push_i ( ras_push ), - .pop_i ( ras_pop ), - .data_i ( ras_update ), - .data_o ( ras_predict ) + .flush_i ( flush_bp_i ), + .push_i ( ras_push ), + .pop_i ( ras_pop ), + .data_i ( ras_update ), + .begin_spec_i ( begin_spec_o ), + .valid_spec_i ( is_correct_predict ), + .bad_spec_i ( is_mispredict ), + .data_o ( ras_predict ) ); btb #( @@ -418,6 +441,7 @@ module frontend import ariane_pkg::*; #( .flush_i ( flush_i ), .instr_i ( instr ), // from re-aligner .addr_i ( addr ), // from re-aligner + .is_cf_i ( is_cf ), .exception_i ( icache_ex_valid_q ), // from I$ .exception_addr_i ( icache_vaddr_q ), .predict_address_i ( predict_address ), @@ -430,7 +454,8 @@ module frontend import ariane_pkg::*; #( .fetch_entry_o ( fetch_entry_o ), // to back-end .fetch_entry_valid_o ( fetch_entry_valid_o ), // to back-end .fetch_entry_ready_i ( fetch_entry_ready_i ), // to back-end - .has_mem_access_o ( has_mem_access_o ) // to verifier + .has_mem_access_o ( has_mem_access_o ), // to verifier + .has_cf_o ( has_cf_o ) // to I$ ); // pragma translate_off @@ -438,6 +463,18 @@ module frontend import ariane_pkg::*; #( initial begin assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64) else $fatal("[frontend] fetch width != not supported"); end + + assert property ( + @(posedge clk_i) disable iff (!rst_ni) replay |-> (replay_addr == icache_vaddr_q)) + else $warning(1, "[frontend] replay_addr != icache_vaddr_q"); + + assert property ( + @(posedge clk_i) disable iff (!rst_ni) replay |-> ~instr_queue_ready) + else $warning(1, "[frontend] replay & instr_queue_ready..."); + + assert property ( + @(posedge clk_i) disable iff (!rst_ni) resolved_branch_i.cf_type == Jump |-> !resolved_branch_i.is_mispredict) + else $warning(1, "[frontend] mispredicted jump"); `endif // pragma translate_on endmodule diff --git a/src/frontend/instr_queue.sv b/src/frontend/instr_queue.sv index 84df044d9528fc4532798cf5de4f9fdee6490956..51c612624865dd19ef3ae4201c6338f018fd0759 100644 --- a/src/frontend/instr_queue.sv +++ b/src/frontend/instr_queue.sv @@ -49,6 +49,7 @@ module instr_queue ( input logic flush_i, input logic [ariane_pkg::INSTR_PER_FETCH-1:0][31:0] instr_i, input logic [ariane_pkg::INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_i, + input logic [ariane_pkg::INSTR_PER_FETCH-1:0] is_cf_i, input logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid_i, output logic ready_o, output logic [ariane_pkg::INSTR_PER_FETCH-1:0] consumed_o, @@ -66,12 +67,14 @@ module instr_queue ( output logic fetch_entry_valid_o, input logic fetch_entry_ready_i, - output logic has_mem_access_o + output logic has_mem_access_o, + output logic has_cf_o ); typedef struct packed { logic [31:0] instr; // instruction word ariane_pkg::cf_t cf; // branch was taken + logic is_cf; // instruction is a cf that needs a prediction (ie. not a jump) ariane_pkg::frontend_exception_t ex; // exception happened logic [riscv::VLEN-1:0] ex_vaddr; // lower VLEN bits of tval for exception } instr_data_t; @@ -125,6 +128,11 @@ module instr_queue ( logic [ariane_pkg::INSTR_PER_FETCH-1:0] fifo_output_is_mem, fifo_has_no_mem; logic output_is_mem; + // cf count + logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] input_is_cf; + logic [ariane_pkg::INSTR_PER_FETCH-1:0] fifo_has_no_cf; + logic output_is_cf; + assign ready_o = ~(|instr_queue_full) & ~full_address; for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_unpack_taken @@ -199,12 +207,23 @@ module instr_queue ( assign has_mem_access_o = (|input_is_mem[ariane_pkg::INSTR_PER_FETCH-1:0]) | output_is_mem | ~(&fifo_has_no_mem); + // ---------------------- + // Memory access detector + // ---------------------- + assign output_is_cf = fetch_entry_valid_o & + (fetch_entry_o.instruction[6:0] inside {riscv::OpcodeJalr, riscv::OpcodeBranch}); + + assign has_cf_o = (|input_is_cf) | output_is_cf | + ~(&fifo_has_no_cf); + // duplicate the entries for easier selection e.g.: 3 2 1 0 3 2 1 0 for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_duplicate_instr_input assign instr[i] = instr_i[i]; assign instr[i + ariane_pkg::INSTR_PER_FETCH] = instr_i[i]; assign cf[i] = cf_type_i[i]; assign cf[i + ariane_pkg::INSTR_PER_FETCH] = cf_type_i[i]; + assign input_is_cf[i] = is_cf_i[i]; + assign input_is_cf[i + ariane_pkg::INSTR_PER_FETCH] = is_cf_i[i]; end // shift the inputs @@ -215,6 +234,7 @@ module instr_queue ( assign instr_data_in[i].ex = exception_i; // exceptions hold for the whole fetch packet assign instr_data_in[i].ex_vaddr = exception_addr_i; assign input_is_mem_in[i] = input_is_mem[i + idx_is_q]; + assign instr_data_in[i].is_cf = input_is_cf[i + idx_is_q]; /* verilator lint_on WIDTH */ end @@ -346,6 +366,23 @@ module instr_queue ( .data_o (), .pop_i (pop_instr[i] & fifo_output_is_mem[i]) ); + + fifo_v3 #( + .DEPTH (ariane_pkg::FETCH_FIFO_DEPTH), + .dtype (logic) + ) i_fifo_cf ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (flush_i), + .testmode_i (1'b0), + .full_o (), + .empty_o (fifo_has_no_cf[i]), + .usage_o (), + .data_i (1'b1), + .push_i (push_instr_fifo[i] & instr_data_in[i].is_cf), + .data_o (), + .pop_i (pop_instr[i] & instr_data_out[i].is_cf) + ); end // or reduce and check whether we are retiring a taken branch (might be that the corresponding) // fifo is full. @@ -374,6 +411,12 @@ module instr_queue ( .pop_i ( pop_address ) ); + unread i_unread_address_fifo (.d_i(|{empty_address, address_queue_usage})); + unread i_unread_branch_mask (.d_i(|branch_mask_extended)); + unread i_unread_lzc (.d_i(|{branch_empty})); + unread i_unread_fifo_pos (.d_i(|fifo_pos_extended)); // we don't care about the lower signals + unread i_unread_instr_fifo (.d_i(|instr_queue_usage)); + always_ff @(posedge clk_i or negedge rst_ni) begin if (!rst_ni) begin idx_ds_q <= 'b1; diff --git a/src/frontend/sras.sv b/src/frontend/sras.sv new file mode 100644 index 0000000000000000000000000000000000000000..6328a52c84402360ca6f29180441aebdd4d63c2b --- /dev/null +++ b/src/frontend/sras.sv @@ -0,0 +1,150 @@ +//Copyright (C) 2018 to present, +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 2.0 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 08.02.2018 +// Migrated: Luis Vitorio Cargnini, IEEE +// Date: 09.06.2018 + +// segmented return address stack +module sras #( + parameter int unsigned DEPTH = 2, + parameter int unsigned SpecDepth = 16 +)( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic push_i, + input logic pop_i, + input logic [riscv::VLEN-1:0] data_i, + input logic begin_spec_i, + input logic valid_spec_i, + input logic bad_spec_i, + output ariane_pkg::ras_t data_o +); + + logic [$clog2(SpecDepth)-1:0] ptr_spec_d, ptr_spec_q; + logic [$clog2(SpecDepth)-1:0] ptr_backup_d, ptr_backup_q; + logic [SpecDepth-1:0][$clog2(DEPTH)-1:0] tos_d, tos_q; + ariane_pkg::ras_t [SpecDepth-1:0][DEPTH-1:0] stack_d, stack_q; + + assign ptr_spec_d = (bad_spec_i) ? ptr_backup_q : (begin_spec_i) ? ptr_spec_q + 1'b1 : ptr_spec_q; + assign ptr_backup_d = (valid_spec_i) ? ptr_backup_q + 1'b1 : ptr_backup_q; + + logic [$clog2(SpecDepth)-1:0] previous_tos_addr; + logic [$clog2(DEPTH)-1:0] previous_tos, prev_plus_one, pp_plus_one, prev_minus_one; + + assign previous_tos_addr = (!bad_spec_i && begin_spec_i) ? ptr_spec_q : ptr_spec_d; + assign previous_tos = tos_q[previous_tos_addr]; + + assign prev_plus_one = previous_tos + 1'b1; + assign pp_plus_one = tos_q[ptr_spec_q] + 1'b1; + assign prev_minus_one = previous_tos - 1'b1; + + always_comb begin + tos_d = tos_q; + + if (flush_i) begin + tos_d = '0; + end else if (!bad_spec_i) begin + if (push_i && !pop_i) begin + tos_d[ptr_spec_d] = prev_plus_one; + if (begin_spec_i) begin + tos_d[ptr_spec_q] = pp_plus_one; + end + end else if (!push_i && pop_i) begin + tos_d[ptr_spec_d] = prev_minus_one; + end else if (begin_spec_i) begin + tos_d[ptr_spec_d] = tos_q[ptr_spec_q]; + end + end + end + + logic can_pop, can_push; + assign can_pop = pop_i && !bad_spec_i; + assign can_push = push_i && !bad_spec_i; + + assign data_o = stack_q[previous_tos_addr][previous_tos]; + + ariane_pkg::ras_t to_push; + assign to_push.ra = (push_i) ? data_i : 0; + assign to_push.valid = can_push; + + ariane_pkg::ras_t [DEPTH-1:0] new_stack, prev_stack; + + always_comb begin + new_stack = stack_q[ptr_spec_d]; + + if (!bad_spec_i && begin_spec_i) begin + new_stack = stack_q[ptr_spec_q]; + end + + if (can_pop) begin + new_stack[previous_tos] = to_push; + end else if (can_push) begin + new_stack[prev_plus_one] = to_push; + end + end + + always_comb begin + prev_stack = stack_q[ptr_spec_q]; + + if (can_push && begin_spec_i) begin + prev_stack[pp_plus_one] = to_push; + end + end + + for (genvar i = 0; i < SpecDepth; i++) begin + assign stack_d[i] = (flush_i) ? '0 : + (i == ptr_spec_d) ? new_stack : + (i == ptr_spec_q) ? prev_stack : + stack_q[i]; + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + stack_q <= '0; + ptr_spec_q <= '0; + ptr_backup_q <= '0; + tos_q <= '0; + end else begin + stack_q <= stack_d; + ptr_spec_q <= ptr_spec_d; + ptr_backup_q <= ptr_backup_d; + tos_q <= tos_d; + end + end + + // pragma translate_off + `ifndef VERILATOR + initial begin + assert (2 ** $clog2(SpecDepth) == SpecDepth) else $fatal(1,"[sras] SpecDepth is not a power of 2"); + assert (SpecDepth >= 2) else $fatal(1,"[sras] SpecDepth is lower than 2"); + assert (2 ** $clog2(DEPTH) == DEPTH) else $fatal(1,"[sras] DEPTH is not a power of 2"); + end + + // assert property ( + // @(posedge clk_i) disable iff (!rst_ni) push_i |-> begin_spec_i) + // else $warning (1,"[sras] push_i & ~begin_spec_i"); + + assert property ( + @(posedge clk_i) disable iff (!rst_ni) (begin_spec_i & !(bad_spec_i)) |-> (ptr_spec_d != ptr_backup_d)) + else $fatal (1,"[sras] speculation overflow"); + + assert property ( + @(posedge clk_i) disable iff (!rst_ni) valid_spec_i |-> ((ptr_backup_q == ptr_spec_q) |-> (ptr_backup_d == ptr_spec_d))) + else $fatal (1,"[sras] backup overtake"); + + assert property ( + @(posedge clk_i) disable iff (!rst_ni) can_push |-> ~can_pop && can_pop |-> ~can_push) + else $fatal (1,"[sras] push & pop at the same time"); + `endif + // pragma translate_on +endmodule diff --git a/src/id_stage.sv b/src/id_stage.sv index 7edc74a97b3aca367747b71ad1ad8ef408fdf3da..6dd3ff789d7d360f9b3e75c79311c71d63cf29c6 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -38,7 +38,8 @@ module id_stage ( input logic tvm_i, input logic tw_i, input logic tsr_i, - output logic is_mem_instr_o + output logic is_mem_instr_o, + output logic has_ctrl_flow_o // speculative CF (ie. no jals) ); // ID/ISSUE register stage struct packed { @@ -96,6 +97,7 @@ module id_stage ( assign issue_entry_o = issue_q.sbe; assign issue_entry_valid_o = issue_q.valid; assign is_ctrl_flow_o = issue_q.is_ctrl_flow; + assign has_ctrl_flow_o = issue_q.valid && issue_q.is_ctrl_flow && issue_q.sbe.op != ariane_pkg::ADD; assign is_mem_instr_o = is_mem_instr_q; diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index 7e6a2b2708b275b23d6a2ae8f7a164276df2bd7a..1bdd15a8bfef214d8eb1277510f4ff19474f9b64 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -26,10 +26,13 @@ module issue_read_operands import ariane_pkg::*; #( input logic issue_instr_valid_i, output logic issue_ack_o, // lookup rd in scoreboard + output logic [REG_ADDR_SIZE-1:0] rs1_o, input riscv::xlen_t rs1_i, input logic rs1_valid_i, + output logic [REG_ADDR_SIZE-1:0] rs2_o, input riscv::xlen_t rs2_i, input logic rs2_valid_i, + output logic [REG_ADDR_SIZE-1:0] rs3_o, input logic [FLEN-1:0] rs3_i, input logic rs3_valid_i, // get clobber input @@ -146,36 +149,93 @@ module issue_read_operands import ariane_pkg::*; #( // --------------- // check that all operands are available, otherwise stall // forward corresponding register - logic is_instr_csr_or_fence_rs1, is_instr_csr_or_fence_rs2; - assign is_instr_csr_or_fence_rs1 = rs1_valid_i && (issue_instr_i.use_rs1_fpr || rd_clobber_gpr_i[issue_instr_i.rs1] != CSR || issue_instr_i.op == SFENCE_VMA); - assign is_instr_csr_or_fence_rs2 = rs2_valid_i && (issue_instr_i.use_rs2_fpr || rd_clobber_gpr_i[issue_instr_i.rs2] != CSR || issue_instr_i.op == SFENCE_VMA); - - logic should_forward_rs1, should_forward_rs2, should_forward_rs3; - assign should_forward_rs1 = !issue_instr_i.use_zimm && ((issue_instr_i.use_rs1_fpr && rd_clobber_fpr_i[issue_instr_i.rs1]) || rd_clobber_gpr_i[issue_instr_i.rs1] != NONE); - assign should_forward_rs2 = (issue_instr_i.use_rs2_fpr && rd_clobber_fpr_i[issue_instr_i.rs2]) || rd_clobber_gpr_i[issue_instr_i.rs2] != NONE; - assign should_forward_rs3 = issue_instr_i.use_imm_fpr && rd_clobber_fpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE; + always_comb begin : operands_available + stall = 1'b0; + // operand forwarding signals + forward_rs1 = 1'b0; + forward_rs2 = 1'b0; + forward_rs3 = 1'b0; // FPR only + // poll the scoreboard for those values + rs1_o = issue_instr_i.rs1; + rs2_o = issue_instr_i.rs2; + rs3_o = issue_instr_i.result[REG_ADDR_SIZE-1:0]; // rs3 is encoded in imm field + + // 0. check that we are not using the zimm type in RS1 + // as this is an immediate we do not have to wait on anything here + // 1. check if the source registers are clobbered --> check appropriate clobber list (gpr/fpr) + // 2. poll the scoreboard + if (!issue_instr_i.use_zimm && (is_rs1_fpr(issue_instr_i.op) ? rd_clobber_fpr_i[issue_instr_i.rs1] != NONE + : rd_clobber_gpr_i[issue_instr_i.rs1] != NONE)) begin + // check if the clobbering instruction is not a CSR instruction, CSR instructions can only + // be fetched through the register file since they can't be forwarded + // if the operand is available, forward it. CSRs don't write to/from FPR + if (rs1_valid_i && (is_rs1_fpr(issue_instr_i.op) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i.rs1] != CSR) || (issue_instr_i.op == SFENCE_VMA)))) begin + forward_rs1 = 1'b1; + end else begin // the operand is not available -> stall + stall = 1'b1; + end + end - assign stall = (should_forward_rs1 && !is_instr_csr_or_fence_rs1) || - (should_forward_rs2 && !is_instr_csr_or_fence_rs2) || - (should_forward_rs3 && !rs3_valid_i); + if (is_rs2_fpr(issue_instr_i.op) ? rd_clobber_fpr_i[issue_instr_i.rs2] != NONE + : rd_clobber_gpr_i[issue_instr_i.rs2] != NONE) begin + // if the operand is available, forward it. CSRs don't write to/from FPR + if (rs2_valid_i && (is_rs2_fpr(issue_instr_i.op) ? 1'b1 : ( (rd_clobber_gpr_i[issue_instr_i.rs2] != CSR) || (issue_instr_i.op == SFENCE_VMA)))) begin + forward_rs2 = 1'b1; + end else begin // the operand is not available -> stall + stall = 1'b1; + end + end - assign forward_rs1 = should_forward_rs1 && is_instr_csr_or_fence_rs1; - assign forward_rs2 = should_forward_rs2 && is_instr_csr_or_fence_rs2; - assign forward_rs3 = should_forward_rs3 && rs3_valid_i; + if (is_imm_fpr(issue_instr_i.op) && rd_clobber_fpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE) begin + // if the operand is available, forward it. CSRs don't write to/from FPR so no need to check + if (rs3_valid_i) begin + forward_rs3 = 1'b1; + end else begin // the operand is not available -> stall + stall = 1'b1; + end + end + end // Forwarding/Output MUX - assign operand_a_n = (issue_instr_i.use_zimm) ? {{riscv::XLEN-5{1'b0}}, issue_instr_i.rs1[4:0]} : - (issue_instr_i.use_pc) ? {{riscv::XLEN-riscv::VLEN{issue_instr_i.pc[riscv::VLEN-1]}}, issue_instr_i.pc} : - (forward_rs1) ? rs1_i : operand_a_regfile; - assign operand_b_n = (issue_instr_i.use_imm && !(issue_instr_i.fu inside {STORE, CTRL_FLOW}) && !issue_instr_i.use_rs2_fpr) ? issue_instr_i.result : - (forward_rs2) ? rs2_i : operand_b_regfile; - - assign imm_n = (forward_rs3) ? {{riscv::XLEN-FLEN{1'b0}}, rs3_i} : - issue_instr_i.use_imm_fpr ? {{riscv::XLEN-FLEN{1'b0}}, operand_c_regfile} : - issue_instr_i.result; - assign trans_id_n = issue_instr_i.trans_id; - assign fu_n = issue_instr_i.fu; - assign operator_n = issue_instr_i.op; + always_comb begin : forwarding_operand_select + // default is regfiles (gpr or fpr) + operand_a_n = operand_a_regfile; + operand_b_n = operand_b_regfile; + // immediates are the third operands in the store case + // for FP operations, the imm field can also be the third operand from the regfile + imm_n = is_imm_fpr(issue_instr_i.op) ? {{riscv::XLEN-FLEN{1'b0}}, operand_c_regfile} : issue_instr_i.result; + trans_id_n = issue_instr_i.trans_id; + fu_n = issue_instr_i.fu; + operator_n = issue_instr_i.op; + // or should we forward + if (forward_rs1) begin + operand_a_n = rs1_i; + end + + if (forward_rs2) begin + operand_b_n = rs2_i; + end + + if (forward_rs3) begin + imm_n = {{riscv::XLEN-FLEN{1'b0}}, rs3_i}; + end + + // use the PC as operand a + if (issue_instr_i.use_pc) begin + operand_a_n = {{riscv::XLEN-riscv::VLEN{issue_instr_i.pc[riscv::VLEN-1]}}, issue_instr_i.pc}; + end + + // use the zimm as operand a + if (issue_instr_i.use_zimm) begin + // zero extend operand a + operand_a_n = {{riscv::XLEN-5{1'b0}}, issue_instr_i.rs1[4:0]}; + end + // or is it an immediate (including PC), this is not the case for a store and control flow instructions + // also make sure operand B is not already used as an FP operand + if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && !is_rs2_fpr(issue_instr_i.op)) begin + operand_b_n = issue_instr_i.result; + end + end // FU select, assert the correct valid out signal (in the next cycle) // This needs to be like this to make verilator happy. I know its ugly. @@ -239,65 +299,47 @@ module issue_read_operands import ariane_pkg::*; #( end end - logic [NR_COMMIT_PORTS-1:0] waw_check; - logic rd_clobbered; - - assign rd_clobbered = (issue_instr_i.use_rd_fpr && rd_clobber_fpr_i[issue_instr_i.rd] == NONE) || - rd_clobber_gpr_i[issue_instr_i.rd] == NONE; - - for (genvar i = 0; i < NR_COMMIT_PORTS; i++) begin - assign waw_check[i] = (issue_instr_i.use_rd_fpr && we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd) || - we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd; - end - - assign issue_ack_o = issue_instr_valid_i && - ( - (!stall && !fu_busy && (rd_clobbered || (|waw_check))) || - (issue_instr_i.ex.valid) || - (issue_instr_i.fu == NONE) - ); - // We can issue an instruction if we do not detect that any other instruction is writing the same // destination register. // We also need to check if there is an unresolved branch in the scoreboard. - // always_comb begin : issue_scoreboard - // // default assignment - // issue_ack_o = 1'b0; - // // check that we didn't stall, that the instruction we got is valid - // // and that the functional unit we need is not busy - // if (issue_instr_valid_i) begin - // // check that the corresponding functional unit is not busy - // if (!stall && !fu_busy) begin - // // ----------------------------------------- - // // WAW - Write After Write Dependency Check - // // ----------------------------------------- - // // no other instruction has the same destination register -> issue the instruction - // if (is_rd_fpr(issue_instr_i.op) ? (rd_clobber_fpr_i[issue_instr_i.rd] == NONE) - // : (rd_clobber_gpr_i[issue_instr_i.rd] == NONE)) begin - // issue_ack_o = 1'b1; - // end - // // or check that the target destination register will be written in this cycle by the - // // commit stage - // for (int unsigned i = 0; i < NR_COMMIT_PORTS; i++) - // if (is_rd_fpr(issue_instr_i.op) ? (we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd) - // : (we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd)) begin - // issue_ack_o = 1'b1; - // end - // end - // // we can also issue the instruction under the following two circumstances: - // // we can do this even if we are stalled or no functional unit is ready (as we don't need one) - // // the decoder needs to make sure that the instruction is marked as valid when it does not - // // need any functional unit or if an exception occurred previous to the execute stage. - // // 1. we already got an exception - // if (issue_instr_i.ex.valid) begin - // issue_ack_o = 1'b1; - // end - // // 2. it is an instruction which does not need any functional unit - // if (issue_instr_i.fu == NONE) begin - // issue_ack_o = 1'b1; - // end - // end - // end + always_comb begin : issue_scoreboard + // default assignment + issue_ack_o = 1'b0; + // check that we didn't stall, that the instruction we got is valid + // and that the functional unit we need is not busy + if (issue_instr_valid_i) begin + // check that the corresponding functional unit is not busy + if (!stall && !fu_busy) begin + // ----------------------------------------- + // WAW - Write After Write Dependency Check + // ----------------------------------------- + // no other instruction has the same destination register -> issue the instruction + if (is_rd_fpr(issue_instr_i.op) ? (rd_clobber_fpr_i[issue_instr_i.rd] == NONE) + : (rd_clobber_gpr_i[issue_instr_i.rd] == NONE)) begin + issue_ack_o = 1'b1; + end + // or check that the target destination register will be written in this cycle by the + // commit stage + for (int unsigned i = 0; i < NR_COMMIT_PORTS; i++) + if (is_rd_fpr(issue_instr_i.op) ? (we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd) + : (we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd)) begin + issue_ack_o = 1'b1; + end + end + // we can also issue the instruction under the following two circumstances: + // we can do this even if we are stalled or no functional unit is ready (as we don't need one) + // the decoder needs to make sure that the instruction is marked as valid when it does not + // need any functional unit or if an exception occurred previous to the execute stage. + // 1. we already got an exception + if (issue_instr_i.ex.valid) begin + issue_ack_o = 1'b1; + end + // 2. it is an instruction which does not need any functional unit + if (issue_instr_i.fu == NONE) begin + issue_ack_o = 1'b1; + end + end + end // ---------------------- // Integer Register File @@ -366,8 +408,8 @@ module issue_read_operands import ariane_pkg::*; #( end endgenerate - assign operand_a_regfile = issue_instr_i.use_rs1_fpr ? {{riscv::XLEN-FLEN{1'b0}}, fprdata[0]} : rdata[0]; - assign operand_b_regfile = issue_instr_i.use_rs2_fpr ? {{riscv::XLEN-FLEN{1'b0}}, fprdata[1]} : rdata[1]; + assign operand_a_regfile = is_rs1_fpr(issue_instr_i.op) ? {{riscv::XLEN-FLEN{1'b0}}, fprdata[0]} : rdata[0]; + assign operand_b_regfile = is_rs2_fpr(issue_instr_i.op) ? {{riscv::XLEN-FLEN{1'b0}}, fprdata[1]} : rdata[1]; assign operand_c_regfile = fprdata[2]; // ---------------------- diff --git a/src/issue_stage.sv b/src/issue_stage.sv index 0bdde4010404ef0f2d68e2257b356f00bbc666a0..2492b203d7315a599daf98fdfa63f54bc45f6f47 100644 --- a/src/issue_stage.sv +++ b/src/issue_stage.sv @@ -74,7 +74,8 @@ module issue_stage import ariane_pkg::*; #( input logic [NR_COMMIT_PORTS-1:0] commit_ack_i, // to verifier - output has_mem_access_o + output logic has_control_flow_o, + output logic has_mem_access_o ); // --------------------------------------------------- // Scoreboard (SB) <-> Issue and Read Operands (IRO) @@ -82,12 +83,15 @@ module issue_stage import ariane_pkg::*; #( fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_sb_iro; fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_sb_iro; + logic [REG_ADDR_SIZE-1:0] rs1_iro_sb; riscv::xlen_t rs1_sb_iro; logic rs1_valid_sb_iro; + logic [REG_ADDR_SIZE-1:0] rs2_iro_sb; riscv::xlen_t rs2_sb_iro; logic rs2_valid_iro_sb; + logic [REG_ADDR_SIZE-1:0] rs3_iro_sb; logic [FLEN-1:0] rs3_sb_iro; logic rs3_valid_iro_sb; @@ -127,10 +131,13 @@ module issue_stage import ariane_pkg::*; #( .unresolved_branch_i ( 1'b0 ), .rd_clobber_gpr_o ( rd_clobber_gpr_sb_iro ), .rd_clobber_fpr_o ( rd_clobber_fpr_sb_iro ), + .rs1_i ( rs1_iro_sb ), .rs1_o ( rs1_sb_iro ), .rs1_valid_o ( rs1_valid_sb_iro ), + .rs2_i ( rs2_iro_sb ), .rs2_o ( rs2_sb_iro ), .rs2_valid_o ( rs2_valid_iro_sb ), + .rs3_i ( rs3_iro_sb ), .rs3_o ( rs3_sb_iro ), .rs3_valid_o ( rs3_valid_iro_sb ), @@ -160,10 +167,13 @@ module issue_stage import ariane_pkg::*; #( .issue_ack_o ( issue_ack_iro_sb ), .fu_data_o ( fu_data_o ), .flu_ready_i ( flu_ready_i ), + .rs1_o ( rs1_iro_sb ), .rs1_i ( rs1_sb_iro ), .rs1_valid_i ( rs1_valid_sb_iro ), + .rs2_o ( rs2_iro_sb ), .rs2_i ( rs2_sb_iro ), .rs2_valid_i ( rs2_valid_iro_sb ), + .rs3_o ( rs3_iro_sb ), .rs3_i ( rs3_sb_iro ), .rs3_valid_i ( rs3_valid_iro_sb ), .rd_clobber_gpr_i ( rd_clobber_gpr_sb_iro ), diff --git a/src/scoreboard.sv b/src/scoreboard.sv index 9871991048ce8f0fb904bf00cfa5e5a50df6dde1..eb54630651b244a5a0550e5347606ea2e30adab2 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -28,12 +28,15 @@ module scoreboard #( output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_fpr_o, // regfile like interface to operand read stage + input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs1_i, output riscv::xlen_t rs1_o, output logic rs1_valid_o, + input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs2_i, output riscv::xlen_t rs2_o, output logic rs2_valid_o, + input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs3_i, output logic [ariane_pkg::FLEN-1:0] rs3_o, output logic rs3_valid_o, @@ -60,6 +63,7 @@ module scoreboard #( input logic [NR_WB_PORTS-1:0] wt_valid_i, // data in is valid // to verifier + output logic has_control_flow_o, output logic has_mem_access_o ); localparam int unsigned BITS_ENTRIES = $clog2(NR_ENTRIES); @@ -73,7 +77,7 @@ module scoreboard #( } mem_q [NR_ENTRIES-1:0], mem_n [NR_ENTRIES-1:0]; logic issue_full, issue_en; - logic [BITS_ENTRIES:0] issue_cnt_n, issue_cnt_q; + logic [BITS_ENTRIES-1:0] issue_cnt_n, issue_cnt_q; logic [BITS_ENTRIES-1:0] write_pointer_n, write_pointer_q; logic [BITS_ENTRIES-1:0] issue_pointer_n, issue_pointer_q; logic [NR_COMMIT_PORTS-1:0][BITS_ENTRIES-1:0] commit_pointer_n, commit_pointer_q; @@ -82,11 +86,11 @@ module scoreboard #( logic [NR_ENTRIES-1:0] flushed; logic [$clog2(NR_ENTRIES)-1:0] num_flush; - logic [NR_ENTRIES-1:0] has_mem_access_n, has_mem_access_q; + logic [NR_ENTRIES-1:0] is_cf, has_mem_access; // the issue queue is full don't issue any new instructions // works since aligned to power of 2 - assign issue_full = (issue_cnt_q[BITS_ENTRIES] == 1'b1); + assign issue_full = &issue_cnt_q; assign sb_full_o = issue_full; @@ -98,8 +102,18 @@ module scoreboard #( end end - // check instructions in the scoreboard for memory operations - assign has_mem_access_o = (|has_mem_access_q); + // check instructions in the scoreboard for memory operations and ctrl flow + for (genvar i = 0; i < NR_ENTRIES; i++) begin + assign is_cf[i] = mem_q[i].issued && ~mem_q[i].sbe.valid && + mem_q[i].sbe.fu == ariane_pkg::CTRL_FLOW && + mem_q[i].sbe.op != ariane_pkg::ADD; + assign has_mem_access[i] = (mem_q[i].issued && ~mem_q[i].sbe.valid && + mem_q[i].sbe.fu inside {ariane_pkg::LOAD, ariane_pkg::STORE}) || + mem_q[i].sbe.valid && mem_q[i].sbe.fu == ariane_pkg::STORE; + end + + assign has_control_flow_o = |is_cf; + assign has_mem_access_o = |has_mem_access; // maintain a FIFO with issued instructions // keep track of all issued instructions @@ -107,8 +121,6 @@ module scoreboard #( // default assignment mem_n = mem_q; issue_en = 1'b0; - has_mem_access_n = has_mem_access_q; - decoded_instr_ack_o = 1'b0; flushed = '0; decoded_instr_ack_o = decoded_instr_valid_i && ~issue_full; @@ -118,14 +130,11 @@ module scoreboard #( // the decoded instruction we put in there is valid (1st bit) // increase the issue counter and advance issue pointer issue_en = 1'b1; - // decoded_instr_ack_o = 1'b1; mem_n[write_pointer_q] = {1'b1, 1'b1, // valid bit - decoded_instr_i.use_rd_fpr, - // ariane_pkg::is_rd_fpr(decoded_instr_i.op), // whether rd goes to the fpr + ariane_pkg::is_rd_fpr(decoded_instr_i.op), // whether rd goes to the fpr decoded_instr_i // decoded instruction record }; - has_mem_access_n[write_pointer_q] = decoded_instr_i.fu inside {ariane_pkg::LOAD, ariane_pkg::STORE}; end issue_instr_o = mem_n[issue_pointer_q]; @@ -143,7 +152,6 @@ module scoreboard #( mem_n[i].issued = 1'b0; mem_n[i].sbe.valid = 1'b0; flushed[i] = 1'b1; - has_mem_access_n[i] = 1'b0; end end end else if (mem_q[issue_pointer_q].sbe.fu == ariane_pkg::NONE && @@ -173,9 +181,6 @@ module scoreboard #( // write the fflags back from the FPU (exception valid is never set), leave tval intact else if (mem_q[trans_id_i[i]].sbe.fu inside {ariane_pkg::FPU, ariane_pkg::FPU_VEC}) mem_n[trans_id_i[i]].sbe.ex.cause = ex_i[i].cause; - - if (mem_n[trans_id_i[i]].sbe.fu != ariane_pkg::STORE) - has_mem_access_n[trans_id_i[i]] = 1'b0; end end @@ -189,7 +194,6 @@ module scoreboard #( mem_n[commit_pointer_q[i]].pending = 1'b0; mem_n[commit_pointer_q[i]].issued = 1'b0; mem_n[commit_pointer_q[i]].sbe.valid = 1'b0; - has_mem_access_n[commit_pointer_q[i]] = 1'b0; end end @@ -203,7 +207,6 @@ module scoreboard #( mem_n[i].issued = 1'b0; mem_n[i].sbe.valid = 1'b0; mem_n[i].sbe.ex.valid = 1'b0; - has_mem_access_n[i] = 1'b0; end end end @@ -287,22 +290,17 @@ module scoreboard #( logic [NR_ENTRIES+NR_WB_PORTS-1:0][riscv::XLEN-1:0] rs_data; logic rs1_valid, rs2_valid; - logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs1, rs2, rs3; - assign rs1 = issue_instr_o.rs1; - assign rs2 = issue_instr_o.rs2; - assign rs3 = issue_instr_o.result[ariane_pkg::REG_ADDR_SIZE-1:0]; // rs3 is encoded in imm field - // WB ports have higher prio than entries for (genvar k = 0; unsigned'(k) < NR_WB_PORTS; k++) begin : gen_rs_wb - assign rs1_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs1) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == issue_instr_o.use_rs1_fpr); - assign rs2_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs2) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == issue_instr_o.use_rs2_fpr); - assign rs3_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs3) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == issue_instr_o.use_imm_fpr); + assign rs1_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs1_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == ariane_pkg::is_rs1_fpr(issue_instr_o.op)); + assign rs2_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs2_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == ariane_pkg::is_rs2_fpr(issue_instr_o.op)); + assign rs3_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs3_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == ariane_pkg::is_imm_fpr(issue_instr_o.op)); assign rs_data[k] = wbdata_i[k]; end for (genvar k = 0; unsigned'(k) < NR_ENTRIES; k++) begin : gen_rs_entries - assign rs1_fwd_req[k+NR_WB_PORTS] = (mem_q[k].sbe.rd == rs1) & ~mem_q[k].pending & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == issue_instr_o.use_rs1_fpr); - assign rs2_fwd_req[k+NR_WB_PORTS] = (mem_q[k].sbe.rd == rs2) & ~mem_q[k].pending & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == issue_instr_o.use_rs2_fpr); - assign rs3_fwd_req[k+NR_WB_PORTS] = (mem_q[k].sbe.rd == rs3) & ~mem_q[k].pending & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == issue_instr_o.use_imm_fpr); + assign rs1_fwd_req[k+NR_WB_PORTS] = (mem_q[k].sbe.rd == rs1_i) & ~mem_q[k].pending & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == ariane_pkg::is_rs1_fpr(issue_instr_o.op)); + assign rs2_fwd_req[k+NR_WB_PORTS] = (mem_q[k].sbe.rd == rs2_i) & ~mem_q[k].pending & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == ariane_pkg::is_rs2_fpr(issue_instr_o.op)); + assign rs3_fwd_req[k+NR_WB_PORTS] = (mem_q[k].sbe.rd == rs3_i) & ~mem_q[k].pending & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == ariane_pkg::is_imm_fpr(issue_instr_o.op)); assign rs_data[k+NR_WB_PORTS] = mem_q[k].sbe.result; end @@ -342,8 +340,8 @@ module scoreboard #( assign rs3_o = (rs3_valid_o) ? rs_data[rs3_fwd_idx] : '0; // check whether we are accessing GPR[0], rs3 is only used with the FPR! - assign rs1_valid_o = rs1_valid & ((|rs1) | issue_instr_o.use_rs1_fpr); - assign rs2_valid_o = rs2_valid & ((|rs2) | issue_instr_o.use_rs2_fpr); + assign rs1_valid_o = rs1_valid & ((|rs1_i) | ariane_pkg::is_rs1_fpr(issue_instr_o.op)); + assign rs2_valid_o = rs2_valid & ((|rs2_i) | ariane_pkg::is_rs2_fpr(issue_instr_o.op)); // sequential process always_ff @(posedge clk_i or negedge rst_ni) begin : regs @@ -353,14 +351,12 @@ module scoreboard #( commit_pointer_q <= '0; issue_pointer_q <= '0; write_pointer_q <= '0; - has_mem_access_q <= '0; end else begin issue_cnt_q <= issue_cnt_n; issue_pointer_q <= issue_pointer_n; write_pointer_q <= write_pointer_n; mem_q <= mem_n; commit_pointer_q <= commit_pointer_n; - has_mem_access_q <= has_mem_access_n; end end diff --git a/src/verifier.sv b/src/verifier.sv index 9c3cb3cf35970eb8779f81cc386f34e3d751a2c1..ebe7f803f5d74531061cb6a627bb1205eb2d4142 100644 --- a/src/verifier.sv +++ b/src/verifier.sv @@ -11,12 +11,15 @@ module verifier #( // Frontend input logic if_has_mem_access_i, + input logic if_has_cf_i, // ID input logic id_has_mem_access_i, + input logic id_has_cf_i, // IS input logic is_has_mem_access_i, + input logic is_has_cf_i, // LSU input logic no_st_pending_commit_i, @@ -33,8 +36,9 @@ module verifier #( // Bus accesses (I$ misses and memory instructions in the pipeline) logic has_mem_access; assign has_mem_access = if_has_mem_access_i | id_has_mem_access_i | is_has_mem_access_i | (~no_st_pending_commit_i); + // assign should_lock_icache_o = has_mem_access & icache_miss_i; - assign should_lock_icache_o = has_mem_access; + assign should_lock_icache_o = has_mem_access | if_has_cf_i | id_has_cf_i | is_has_cf_i; //pragma translate off // CO