diff --git a/src/frontend/frontend.sv b/src/frontend/frontend.sv index 819a885f2ac365d873ee6bfbf4228d31d8853696..b9104b92b1e952984a893dcb4d8211b2a083263c 100644 --- a/src/frontend/frontend.sv +++ b/src/frontend/frontend.sv @@ -89,20 +89,10 @@ module frontend import ariane_pkg::*; #( // re-aligned instruction and address (coming from cache - combinationally) logic [INSTR_PER_FETCH-1:0][31:0] instr; logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr; - logic [INSTR_PER_FETCH-1:0] instruction_valid, instruction_really_valid; - logic [INSTR_PER_FETCH-1:0] instr_really_valid; - - // BHT, BTB and RAS prediction - bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction; - btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction; - bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted; - btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted; - ras_t ras_predict; + logic [INSTR_PER_FETCH-1:0] instruction_valid; // branch-predict update logic is_mispredict; - logic ras_push, ras_pop; - logic [riscv::VLEN-1:0] ras_update; // Instruction FIFO logic [riscv::VLEN-1:0] predict_address; @@ -111,7 +101,7 @@ module frontend import ariane_pkg::*; #( logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvc_cf; logic stalling_d, stalling_q; - logic previous_is_branch; + logic bp_valid; logic serving_unaligned; // Re-align instructions @@ -127,25 +117,8 @@ module frontend import ariane_pkg::*; #( .addr_o ( addr ), .instr_o ( instr ) ); - // -------------------- - // Branch Prediction - // -------------------- - // select the right branch prediction result - // in case we are serving an unaligned instruction in instr[0] we need to take - // the prediction we saved from the previous fetch - assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[0]; - assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[0]; - // for all other predictions we can use the generated address to index - // into the branch prediction data structures - for (genvar i = 1; i < INSTR_PER_FETCH; i++) begin : gen_prediction_address - assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]]; - assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]]; - end - // for the return address stack it doens't matter as we have the - // address of the call/return already - logic bp_valid; - assign branch_speculation_o = bp_valid; + assign branch_speculation_o = 1'b0; logic [INSTR_PER_FETCH-1:0] is_branch; logic [INSTR_PER_FETCH-1:0] is_call; @@ -166,121 +139,48 @@ module frontend import ariane_pkg::*; #( assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & ~is_call[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]); end + logic should_stall; + assign should_stall = (stalling_q | (|is_branch) | (|is_return) | (|is_jalr)) & (!resolved_branch_i.valid); + // taken/not taken always_comb begin + stalling_d = stalling_q; taken_rvi_cf = '0; taken_rvc_cf = '0; predict_address = '0; - - for (int i = 0; i < INSTR_PER_FETCH; i++) cf_type[i] = ariane_pkg::NoCF; - - ras_push = 1'b0; - ras_pop = 1'b0; - ras_update = '0; - - // lower most prediction gets precedence - for (int unsigned i = INSTR_PER_FETCH - 1; i >= 0 ; i--) begin - // Direct jumps are always predicted correctly since we have their - // target address. - - unique case ({is_branch[i], is_return[i], is_jump[i], is_jalr[i]}) - 4'b0000:; // regular instruction e.g.: no branch - // unconditional jump to register, we need the BTB to resolve this - 4'b0001: begin - ras_pop = 1'b0; - ras_push = 1'b0; - if (btb_prediction_shifted[i].valid & enable_bp_i) begin - predict_address = btb_prediction_shifted[i].target_address; - cf_type[i] = ariane_pkg::JumpR; - end - end - // its an unconditional jump to an immediate - 4'b0010: begin - ras_pop = 1'b0; - ras_push = 1'b0; - taken_rvi_cf[i] = rvi_jump[i]; - taken_rvc_cf[i] = rvc_jump[i]; - cf_type[i] = ariane_pkg::Jump; - end - // return - 4'b0100: begin - // make sure to only alter the RAS if we actually consumed the instruction - ras_push = 1'b0; - ras_pop = ras_predict.valid & instr_queue_consumed[i]; - if (enable_bp_i) begin - predict_address = ras_predict.ra; - cf_type[i] = ariane_pkg::Return; - end - end - // branch prediction - 4'b1000: begin - ras_pop = 1'b0; - ras_push = 1'b0; - if (enable_bp_i) begin - // if we have a valid dynamic prediction use it - if (bht_prediction_shifted[i].valid) begin - taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken; - taken_rvc_cf[i] = rvc_branch[i] & bht_prediction_shifted[i].taken; - // otherwise default to static prediction - end else begin - // set if immediate is negative - static prediction - taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][riscv::VLEN-1]; - taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][riscv::VLEN-1]; - end - if (taken_rvi_cf[i] || taken_rvc_cf[i]) cf_type[i] = ariane_pkg::Branch; - end - end - default:; - // default: $error("Decoded more than one control flow"); - endcase - // if this instruction, in addition, is a call, save the resulting address - // but only if we actually consumed the address - if (is_call[i]) begin - ras_push = instr_queue_consumed[i]; - ras_update = addr[i] + (rvc_call[i] ? 2 : 4); - end - // calculate the jump target address - if (taken_rvc_cf[i] || taken_rvi_cf[i]) begin - if (rvi_jump[i] || rvc_jump[i] || enable_bp_i) begin - predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]); - end - end - end - end // always_comb - - // or reduce struct - always_comb begin bp_valid = 1'b0; - instruction_really_valid = '0; - previous_is_branch = 1'b0; - stalling_d = stalling_q; - - if (!stalling_q) begin - // BP cannot be valid if we have a return instruction and the RAS is not giving a valid address - // Check that we encountered a control flow and that for a return the RAS - // contains a valid prediction. - for (int unsigned i = 0; i < INSTR_PER_FETCH; i++) begin - bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid)); - if (enable_bp_i) begin - instruction_really_valid[i] = instruction_valid[i]; - end else begin - instruction_really_valid[i] = instruction_valid[i] && !(previous_is_branch); - end + if (!should_stall) begin + stalling_d = 1'b0; - previous_is_branch |= !(cf_type[i] inside {NoCF, Jump}); + for (int unsigned i = 0; i < INSTR_PER_FETCH; i++) begin + unique case ({is_branch[i] | is_return[i] | is_jalr[i], is_jump[i]}) + 2'b00:; + 2'b01: begin + if (!(|taken_rvi_cf) && !(|taken_rvc_cf)) + predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]); + + taken_rvi_cf[i] = rvi_jump[i]; + taken_rvc_cf[i] = rvc_jump[i]; + cf_type[i] = ariane_pkg::Jump; + bp_valid = 1'b1; + end + 2'b10: begin + taken_rvi_cf[i] = 1'b1; + taken_rvc_cf[i] = 1'b1; + bp_valid = 1'b1; + stalling_d = 1'b1; + end + default:; + endcase end - - stalling_d |= previous_is_branch; - end else if (resolved_branch_i.valid) begin - stalling_d = 1'b0; end end assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict; // Cache interface - assign icache_dreq_o.req = instr_queue_ready; + assign icache_dreq_o.req = instr_queue_ready & (!should_stall); assign if_ready = icache_dreq_i.ready & instr_queue_ready; // We need to flush the cache pipeline if: // 1. We mispredicted @@ -291,21 +191,6 @@ module frontend import ariane_pkg::*; #( // also if we killed the first stage we also need to kill the second stage (inclusive flush) assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid; - // Update Control Flow Predictions - bht_update_t bht_update; - btb_update_t btb_update; - - assign bht_update.valid = resolved_branch_i.valid - & (resolved_branch_i.cf_type == ariane_pkg::Branch); - assign bht_update.pc = resolved_branch_i.pc; - assign bht_update.taken = resolved_branch_i.is_taken; - // only update mispredicted branches e.g. no returns from the RAS - assign btb_update.valid = resolved_branch_i.valid - & resolved_branch_i.is_mispredict - & (resolved_branch_i.cf_type == ariane_pkg::JumpR); - assign btb_update.pc = resolved_branch_i.pc; - assign btb_update.target_address = resolved_branch_i.target_address; - // ------------------- // Next PC // ------------------- @@ -334,33 +219,36 @@ module frontend import ariane_pkg::*; #( // keep stable by default npc_d = npc_q; end - // 0. Branch Prediction - if (bp_valid) begin - fetch_address = predict_address; - npc_d = predict_address; + + if (!should_stall) begin + // 0. Branch Prediction + if (bp_valid) begin + fetch_address = predict_address; + npc_d = predict_address; + end + // 1. Default assignment + if (if_ready) npc_d = {fetch_address[riscv::VLEN-1:2], 2'b0} + 'h4; + // 2. Replay instruction fetch + if (replay) npc_d = replay_addr; + // 3. Control flow change request + if (is_mispredict) npc_d = resolved_branch_i.target_address; + // 4. Return from environment call + if (eret_i) npc_d = epc_i; + // 5. Exception/Interrupt + if (ex_valid_i) npc_d = trap_vector_base_i; + // 6. Pipeline Flush because of CSR side effects + // On a pipeline flush start fetching from the next address + // of the instruction in the commit stage + // we came here from a flush request of a CSR instruction or AMO, + // as CSR or AMO instructions do not exist in a compressed form + // we can unconditionally do PC + 4 here + // TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage + if (set_pc_commit_i) npc_d = pc_commit_i + {{riscv::VLEN-3{1'b0}}, 3'b100}; + // 7. Debug + // enter debug on a hard-coded base-address + if (set_debug_pc_i) npc_d = ArianeCfg.DmBaseAddress[riscv::VLEN-1:0] + dm::HaltAddress[riscv::VLEN-1:0]; + icache_dreq_o.vaddr = fetch_address; end - // 1. Default assignment - if (if_ready) npc_d = {fetch_address[riscv::VLEN-1:2], 2'b0} + 'h4; - // 2. Replay instruction fetch - if (replay) npc_d = replay_addr; - // 3. Control flow change request - if (is_mispredict) npc_d = resolved_branch_i.target_address; - // 4. Return from environment call - if (eret_i) npc_d = epc_i; - // 5. Exception/Interrupt - if (ex_valid_i) npc_d = trap_vector_base_i; - // 6. Pipeline Flush because of CSR side effects - // On a pipeline flush start fetching from the next address - // of the instruction in the commit stage - // we came here from a flush request of a CSR instruction or AMO, - // as CSR or AMO instructions do not exist in a compressed form - // we can unconditionally do PC + 4 here - // TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage - if (set_pc_commit_i) npc_d = pc_commit_i + {{riscv::VLEN-3{1'b0}}, 3'b100}; - // 7. Debug - // enter debug on a hard-coded base-address - if (set_debug_pc_i) npc_d = ArianeCfg.DmBaseAddress[riscv::VLEN-1:0] + dm::HaltAddress[riscv::VLEN-1:0]; - icache_dreq_o.vaddr = fetch_address; end logic [FETCH_WIDTH-1:0] icache_data; @@ -392,49 +280,11 @@ module frontend import ariane_pkg::*; #( icache_ex_valid_q <= ariane_pkg::FE_INSTR_ACCESS_FAULT; end else icache_ex_valid_q <= ariane_pkg::FE_NONE; // save the uppermost prediction - btb_q <= btb_prediction[INSTR_PER_FETCH-1]; - bht_q <= bht_prediction[INSTR_PER_FETCH-1]; - stalling_q <= stalling_d; end + stalling_q <= stalling_d; end end - ras #( - .DEPTH ( ArianeCfg.RASDepth ) - ) i_ras ( - .clk_i, - .rst_ni, - .flush_i( flush_bp_i ), - .push_i ( ras_push ), - .pop_i ( ras_pop ), - .data_i ( ras_update ), - .data_o ( ras_predict ) - ); - - btb #( - .NR_ENTRIES ( ArianeCfg.BTBEntries ) - ) i_btb ( - .clk_i, - .rst_ni, - .flush_i ( flush_bp_i ), - .debug_mode_i, - .vpc_i ( icache_vaddr_q ), - .btb_update_i ( btb_update ), - .btb_prediction_o ( btb_prediction ) - ); - - bht #( - .NR_ENTRIES ( ArianeCfg.BHTEntries ) - ) i_bht ( - .clk_i, - .rst_ni, - .flush_i ( flush_bp_i ), - .debug_mode_i, - .vpc_i ( icache_vaddr_q ), - .bht_update_i ( bht_update ), - .bht_prediction_o ( bht_prediction ) - ); - // we need to inspect up to INSTR_PER_FETCH instructions for branches // and jumps for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin : gen_instr_scan @@ -466,7 +316,7 @@ module frontend import ariane_pkg::*; #( .exception_addr_i ( icache_vaddr_q ), .predict_address_i ( predict_address ), .cf_type_i ( cf_type ), - .valid_i ( instruction_really_valid ), // from re-aligner + .valid_i ( instruction_valid ), // from re-aligner .consumed_o ( instr_queue_consumed ), .ready_o ( instr_queue_ready ), .replay_o ( replay ),