From 68232c03f0d0ffb39b40aa90cbf8d824b668a3a0 Mon Sep 17 00:00:00 2001
From: Alban Gruin <alban.gruin@irit.fr>
Date: Fri, 9 Apr 2021 16:16:17 +0200
Subject: [PATCH] fixup! frontend: disable branch prediction, mkII

---
 src/frontend/frontend.sv | 272 +++++++++------------------------------
 1 file changed, 61 insertions(+), 211 deletions(-)

diff --git a/src/frontend/frontend.sv b/src/frontend/frontend.sv
index 819a885f..b9104b92 100644
--- a/src/frontend/frontend.sv
+++ b/src/frontend/frontend.sv
@@ -89,20 +89,10 @@ module frontend import ariane_pkg::*; #(
     // re-aligned instruction and address (coming from cache - combinationally)
     logic [INSTR_PER_FETCH-1:0][31:0] instr;
     logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr;
-    logic [INSTR_PER_FETCH-1:0]       instruction_valid, instruction_really_valid;
-    logic [INSTR_PER_FETCH-1:0]       instr_really_valid;
-
-    // BHT, BTB and RAS prediction
-    bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction;
-    btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction;
-    bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted;
-    btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted;
-    ras_t            ras_predict;
+    logic [INSTR_PER_FETCH-1:0]       instruction_valid;
 
     // branch-predict update
     logic            is_mispredict;
-    logic            ras_push, ras_pop;
-    logic [riscv::VLEN-1:0]     ras_update;
 
     // Instruction FIFO
     logic [riscv::VLEN-1:0]                 predict_address;
@@ -111,7 +101,7 @@ module frontend import ariane_pkg::*; #(
     logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvc_cf;
 
     logic stalling_d, stalling_q;
-    logic previous_is_branch;
+    logic bp_valid;
 
     logic serving_unaligned;
     // Re-align instructions
@@ -127,25 +117,8 @@ module frontend import ariane_pkg::*; #(
       .addr_o              ( addr                  ),
       .instr_o             ( instr                 )
     );
-    // --------------------
-    // Branch Prediction
-    // --------------------
-    // select the right branch prediction result
-    // in case we are serving an unaligned instruction in instr[0] we need to take
-    // the prediction we saved from the previous fetch
-    assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[0];
-    assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[0];
-    // for all other predictions we can use the generated address to index
-    // into the branch prediction data structures
-    for (genvar i = 1; i < INSTR_PER_FETCH; i++) begin : gen_prediction_address
-      assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
-      assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
-    end
-    // for the return address stack it doens't matter as we have the
-    // address of the call/return already
-    logic bp_valid;
 
-    assign branch_speculation_o = bp_valid;
+    assign branch_speculation_o = 1'b0;
 
     logic [INSTR_PER_FETCH-1:0] is_branch;
     logic [INSTR_PER_FETCH-1:0] is_call;
@@ -166,121 +139,48 @@ module frontend import ariane_pkg::*; #(
       assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & ~is_call[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]);
     end
 
+    logic should_stall;
+    assign should_stall = (stalling_q | (|is_branch) | (|is_return) | (|is_jalr)) & (!resolved_branch_i.valid);
+
     // taken/not taken
     always_comb begin
+      stalling_d = stalling_q;
       taken_rvi_cf = '0;
       taken_rvc_cf = '0;
       predict_address = '0;
-
-      for (int i = 0; i < INSTR_PER_FETCH; i++)  cf_type[i] = ariane_pkg::NoCF;
-
-      ras_push = 1'b0;
-      ras_pop = 1'b0;
-      ras_update = '0;
-
-      // lower most prediction gets precedence
-      for (int unsigned i = INSTR_PER_FETCH - 1; i >= 0 ; i--) begin
-        // Direct jumps are always predicted correctly since we have their
-        // target address.
-
-        unique case ({is_branch[i], is_return[i], is_jump[i], is_jalr[i]})
-          4'b0000:; // regular instruction e.g.: no branch
-          // unconditional jump to register, we need the BTB to resolve this
-          4'b0001: begin
-            ras_pop = 1'b0;
-            ras_push = 1'b0;
-            if (btb_prediction_shifted[i].valid & enable_bp_i) begin
-              predict_address = btb_prediction_shifted[i].target_address;
-              cf_type[i] = ariane_pkg::JumpR;
-            end
-          end
-          // its an unconditional jump to an immediate
-          4'b0010: begin
-            ras_pop = 1'b0;
-            ras_push = 1'b0;
-            taken_rvi_cf[i] = rvi_jump[i];
-            taken_rvc_cf[i] = rvc_jump[i];
-            cf_type[i] = ariane_pkg::Jump;
-          end
-          // return
-          4'b0100: begin
-            // make sure to only alter the RAS if we actually consumed the instruction
-            ras_push = 1'b0;
-            ras_pop = ras_predict.valid & instr_queue_consumed[i];
-            if (enable_bp_i) begin
-              predict_address = ras_predict.ra;
-              cf_type[i] = ariane_pkg::Return;
-            end
-          end
-          // branch prediction
-          4'b1000: begin
-            ras_pop = 1'b0;
-            ras_push = 1'b0;
-            if (enable_bp_i) begin
-              // if we have a valid dynamic prediction use it
-              if (bht_prediction_shifted[i].valid) begin
-                taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken;
-                taken_rvc_cf[i] = rvc_branch[i] & bht_prediction_shifted[i].taken;
-              // otherwise default to static prediction
-              end else begin
-                // set if immediate is negative - static prediction
-                taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][riscv::VLEN-1];
-                taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][riscv::VLEN-1];
-              end
-              if (taken_rvi_cf[i] || taken_rvc_cf[i]) cf_type[i] = ariane_pkg::Branch;
-            end
-          end
-          default:;
-            // default: $error("Decoded more than one control flow");
-        endcase
-          // if this instruction, in addition, is a call, save the resulting address
-          // but only if we actually consumed the address
-          if (is_call[i]) begin
-            ras_push = instr_queue_consumed[i];
-            ras_update = addr[i] + (rvc_call[i] ? 2 : 4);
-          end
-          // calculate the jump target address
-          if (taken_rvc_cf[i] || taken_rvi_cf[i]) begin
-            if (rvi_jump[i] || rvc_jump[i] || enable_bp_i) begin
-              predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]);
-             end
-          end
-      end
-    end // always_comb
-
-    // or reduce struct
-    always_comb begin
       bp_valid = 1'b0;
-      instruction_really_valid = '0;
-      previous_is_branch = 1'b0;
-      stalling_d = stalling_q;
-
-      if (!stalling_q) begin
-         // BP cannot be valid if we have a return instruction and the RAS is not giving a valid address
-         // Check that we encountered a control flow and that for a return the RAS
-         // contains a valid prediction.
-         for (int unsigned i = 0; i < INSTR_PER_FETCH; i++) begin
-            bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid));
 
-            if (enable_bp_i) begin
-               instruction_really_valid[i] = instruction_valid[i];
-            end else begin
-               instruction_really_valid[i] = instruction_valid[i] && !(previous_is_branch);
-            end
+      if (!should_stall) begin
+         stalling_d = 1'b0;
 
-            previous_is_branch |= !(cf_type[i] inside {NoCF, Jump});
+         for (int unsigned i = 0; i < INSTR_PER_FETCH; i++) begin
+            unique case ({is_branch[i] | is_return[i] | is_jalr[i], is_jump[i]})
+              2'b00:;
+              2'b01: begin
+                 if (!(|taken_rvi_cf) && !(|taken_rvc_cf))
+                   predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]);
+
+                 taken_rvi_cf[i] = rvi_jump[i];
+                 taken_rvc_cf[i] = rvc_jump[i];
+                 cf_type[i] = ariane_pkg::Jump;
+                 bp_valid = 1'b1;
+              end
+              2'b10: begin
+                 taken_rvi_cf[i] = 1'b1;
+                 taken_rvc_cf[i] = 1'b1;
+                 bp_valid = 1'b1;
+                 stalling_d = 1'b1;
+              end
+              default:;
+            endcase
          end
-
-         stalling_d |= previous_is_branch;
-      end else if (resolved_branch_i.valid) begin
-         stalling_d = 1'b0;
       end
     end
 
     assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
 
     // Cache interface
-    assign icache_dreq_o.req = instr_queue_ready;
+    assign icache_dreq_o.req = instr_queue_ready & (!should_stall);
     assign if_ready = icache_dreq_i.ready & instr_queue_ready;
     // We need to flush the cache pipeline if:
     // 1. We mispredicted
@@ -291,21 +191,6 @@ module frontend import ariane_pkg::*; #(
     // also if we killed the first stage we also need to kill the second stage (inclusive flush)
     assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid;
 
-    // Update Control Flow Predictions
-    bht_update_t bht_update;
-    btb_update_t btb_update;
-
-    assign bht_update.valid = resolved_branch_i.valid
-                                & (resolved_branch_i.cf_type == ariane_pkg::Branch);
-    assign bht_update.pc    = resolved_branch_i.pc;
-    assign bht_update.taken = resolved_branch_i.is_taken;
-    // only update mispredicted branches e.g. no returns from the RAS
-    assign btb_update.valid = resolved_branch_i.valid
-                                & resolved_branch_i.is_mispredict
-                                & (resolved_branch_i.cf_type == ariane_pkg::JumpR);
-    assign btb_update.pc    = resolved_branch_i.pc;
-    assign btb_update.target_address = resolved_branch_i.target_address;
-
     // -------------------
     // Next PC
     // -------------------
@@ -334,33 +219,36 @@ module frontend import ariane_pkg::*; #(
         // keep stable by default
         npc_d            = npc_q;
       end
-      // 0. Branch Prediction
-      if (bp_valid) begin
-        fetch_address = predict_address;
-        npc_d = predict_address;
+
+      if (!should_stall) begin
+         // 0. Branch Prediction
+         if (bp_valid) begin
+            fetch_address = predict_address;
+            npc_d = predict_address;
+         end
+         // 1. Default assignment
+         if (if_ready) npc_d = {fetch_address[riscv::VLEN-1:2], 2'b0}  + 'h4;
+         // 2. Replay instruction fetch
+         if (replay) npc_d = replay_addr;
+         // 3. Control flow change request
+         if (is_mispredict) npc_d = resolved_branch_i.target_address;
+         // 4. Return from environment call
+         if (eret_i) npc_d = epc_i;
+         // 5. Exception/Interrupt
+         if (ex_valid_i) npc_d = trap_vector_base_i;
+         // 6. Pipeline Flush because of CSR side effects
+         // On a pipeline flush start fetching from the next address
+         // of the instruction in the commit stage
+         // we came here from a flush request of a CSR instruction or AMO,
+         // as CSR or AMO instructions do not exist in a compressed form
+         // we can unconditionally do PC + 4 here
+         // TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage
+         if (set_pc_commit_i) npc_d = pc_commit_i + {{riscv::VLEN-3{1'b0}}, 3'b100};
+         // 7. Debug
+         // enter debug on a hard-coded base-address
+         if (set_debug_pc_i) npc_d = ArianeCfg.DmBaseAddress[riscv::VLEN-1:0] + dm::HaltAddress[riscv::VLEN-1:0];
+         icache_dreq_o.vaddr = fetch_address;
       end
-      // 1. Default assignment
-      if (if_ready) npc_d = {fetch_address[riscv::VLEN-1:2], 2'b0}  + 'h4;
-      // 2. Replay instruction fetch
-      if (replay) npc_d = replay_addr;
-      // 3. Control flow change request
-      if (is_mispredict) npc_d = resolved_branch_i.target_address;
-      // 4. Return from environment call
-      if (eret_i) npc_d = epc_i;
-      // 5. Exception/Interrupt
-      if (ex_valid_i) npc_d = trap_vector_base_i;
-      // 6. Pipeline Flush because of CSR side effects
-      // On a pipeline flush start fetching from the next address
-      // of the instruction in the commit stage
-      // we came here from a flush request of a CSR instruction or AMO,
-      // as CSR or AMO instructions do not exist in a compressed form
-      // we can unconditionally do PC + 4 here
-      // TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage
-      if (set_pc_commit_i) npc_d = pc_commit_i + {{riscv::VLEN-3{1'b0}}, 3'b100};
-      // 7. Debug
-      // enter debug on a hard-coded base-address
-      if (set_debug_pc_i) npc_d = ArianeCfg.DmBaseAddress[riscv::VLEN-1:0] + dm::HaltAddress[riscv::VLEN-1:0];
-      icache_dreq_o.vaddr = fetch_address;
     end
 
     logic [FETCH_WIDTH-1:0] icache_data;
@@ -392,49 +280,11 @@ module frontend import ariane_pkg::*; #(
             icache_ex_valid_q <= ariane_pkg::FE_INSTR_ACCESS_FAULT;
           end else icache_ex_valid_q <= ariane_pkg::FE_NONE;
           // save the uppermost prediction
-          btb_q                <= btb_prediction[INSTR_PER_FETCH-1];
-          bht_q                <= bht_prediction[INSTR_PER_FETCH-1];
-          stalling_q           <= stalling_d;
         end
+        stalling_q           <= stalling_d;
       end
     end
 
-    ras #(
-      .DEPTH  ( ArianeCfg.RASDepth  )
-    ) i_ras (
-      .clk_i,
-      .rst_ni,
-      .flush_i( flush_bp_i  ),
-      .push_i ( ras_push    ),
-      .pop_i  ( ras_pop     ),
-      .data_i ( ras_update  ),
-      .data_o ( ras_predict )
-    );
-
-    btb #(
-      .NR_ENTRIES       ( ArianeCfg.BTBEntries   )
-    ) i_btb (
-      .clk_i,
-      .rst_ni,
-      .flush_i          ( flush_bp_i       ),
-      .debug_mode_i,
-      .vpc_i            ( icache_vaddr_q   ),
-      .btb_update_i     ( btb_update       ),
-      .btb_prediction_o ( btb_prediction   )
-    );
-
-    bht #(
-      .NR_ENTRIES       ( ArianeCfg.BHTEntries   )
-    ) i_bht (
-      .clk_i,
-      .rst_ni,
-      .flush_i          ( flush_bp_i       ),
-      .debug_mode_i,
-      .vpc_i            ( icache_vaddr_q   ),
-      .bht_update_i     ( bht_update       ),
-      .bht_prediction_o ( bht_prediction   )
-    );
-
     // we need to inspect up to INSTR_PER_FETCH instructions for branches
     // and jumps
     for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin : gen_instr_scan
@@ -466,7 +316,7 @@ module frontend import ariane_pkg::*; #(
       .exception_addr_i    ( icache_vaddr_q       ),
       .predict_address_i   ( predict_address      ),
       .cf_type_i           ( cf_type              ),
-      .valid_i             ( instruction_really_valid ), // from re-aligner
+      .valid_i             ( instruction_valid    ), // from re-aligner
       .consumed_o          ( instr_queue_consumed ),
       .ready_o             ( instr_queue_ready    ),
       .replay_o            ( replay               ),
-- 
GitLab