diff --git a/src/ariane.sv b/src/ariane.sv
index 549e7ae65d4f0711f7fb1a1b3899dfb1698bc047..33752853c76c66d581a5a5b1908810ebeab6aee9 100644
--- a/src/ariane.sv
+++ b/src/ariane.sv
@@ -80,8 +80,14 @@ module ariane import ariane_pkg::*; #(
   // IF -> verifier
   // --------------
   logic                     has_mem_access_if_verif;
+  logic                     has_ctrl_flow_if_icache;
   logic                     bsp_if_perf;
 
+  // --------------
+  // IF <-> *
+  // --------------
+  logic                     begin_spec_if_ic;
+
   // --------------
   // ID <-> ISSUE
   // --------------
@@ -94,11 +100,13 @@ module ariane import ariane_pkg::*; #(
   // ID -> verifier
   // --------------
   logic                     has_mem_access_id_verif;
+  logic                     has_ctrl_flow_id_icache;
 
   // --------------
   // ISSUE -> verifier
   // --------------
   logic                     has_mem_access_is_verif;
+  logic                     has_ctrl_flow_is_icache;
 
   // --------------
   // ISSUE <-> EX
@@ -285,7 +293,9 @@ module ariane import ariane_pkg::*; #(
     .fetch_entry_valid_o ( fetch_valid_if_id             ),
     .fetch_entry_ready_i ( fetch_ready_id_if             ),
     .has_mem_access_o    ( has_mem_access_if_verif       ),
+    .has_cf_o            ( has_ctrl_flow_if_icache       ),
     .branch_speculation_o( bsp_if_perf                   ),
+    .begin_spec_o        ( begin_spec_if_ic              ),
     .*
   );
 
@@ -306,6 +316,7 @@ module ariane import ariane_pkg::*; #(
     .issue_entry_valid_o        ( issue_entry_valid_id_issue ),
     .is_ctrl_flow_o             ( is_ctrl_fow_id_issue       ),
     .is_mem_instr_o             ( has_mem_access_id_verif    ),
+    .has_ctrl_flow_o            ( has_ctrl_flow_id_icache    ),
     .issue_instr_ack_i          ( issue_instr_issue_id       ),
 
     .priv_lvl_i                 ( priv_lvl                   ),
@@ -378,6 +389,7 @@ module ariane import ariane_pkg::*; #(
     .commit_instr_o             ( commit_instr_id_commit       ),
     .commit_ack_i               ( commit_ack                   ),
     .has_mem_access_o           ( has_mem_access_is_verif      ),
+    .has_control_flow_o         ( has_ctrl_flow_is_icache      ),
     .*
   );
 
@@ -660,12 +672,15 @@ module ariane import ariane_pkg::*; #(
 
     // IF
     .if_has_mem_access_i (has_mem_access_if_verif),
+    .if_has_cf_i         (has_ctrl_flow_if_icache),
 
     // ID
     .id_has_mem_access_i (has_mem_access_id_verif),
+    .id_has_cf_i         (has_ctrl_flow_id_icache),
 
     // IS
     .is_has_mem_access_i (has_mem_access_is_verif),
+    .is_has_cf_i         (has_ctrl_flow_is_icache),
 
     // LSU
     .no_st_pending_commit_i (no_st_pending_ex),
@@ -682,6 +697,11 @@ module ariane import ariane_pkg::*; #(
   // -------------------
 
 `ifdef WT_DCACHE
+  logic icache_valid_spec, icache_bad_spec;
+
+  assign icache_valid_spec = resolved_branch.valid && !(resolved_branch.is_mispredict) && (resolved_branch.cf_type != Jump);
+  assign icache_bad_spec = resolved_branch.valid && resolved_branch.is_mispredict;
+
   // this is a cache subsystem that is compatible with OpenPiton
   wt_cache_subsystem #(
     .ArianeCfg            ( ArianeCfg     )
@@ -698,6 +718,9 @@ module ariane import ariane_pkg::*; #(
     .icache_dreq_i         ( icache_dreq_if_cache        ),
     .icache_dreq_o         ( icache_dreq_cache_if        ),
     .icache_stall_i        ( icache_stall_ctrl           ),
+    .icache_begin_spec_i   ( begin_spec_if_ic            ),
+    .icache_valid_spec_i   ( icache_valid_spec           ),
+    .icache_bad_spec_i     ( icache_bad_spec             ),
     // D$
     .dcache_enable_i       ( dcache_en_csr_nbdcache      ),
     .dcache_flush_i        ( dcache_flush_ctrl_cache     ),
diff --git a/src/branch_unit.sv b/src/branch_unit.sv
index 2b4b3fce50bb8f7e676ddecbe7e59c0b8ced8cf0..1ef9b5389daa84a7d83fbcb8c61a7514d327c1a6 100644
--- a/src/branch_unit.sv
+++ b/src/branch_unit.sv
@@ -65,11 +65,11 @@ module branch_unit (
             resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc;
             resolved_branch_o.is_taken = branch_comp_res_i;
             // check the outcome of the branch speculation
-            if (ariane_pkg::op_is_branch(fu_data_i.operator)) begin
-                resolved_branch_o.cf_type = ariane_pkg::Branch;
+            if (ariane_pkg::op_is_branch(fu_data_i.operator) && branch_comp_res_i != (branch_predict_i.cf == ariane_pkg::Branch)) begin
                 // we mis-predicted the outcome
                 // if the outcome doesn't match we've got a mis-predict
-                resolved_branch_o.is_mispredict = branch_comp_res_i != (branch_predict_i.cf == ariane_pkg::Branch);
+                resolved_branch_o.is_mispredict  = 1'b1;
+                resolved_branch_o.cf_type = ariane_pkg::Branch;
             end
             if (fu_data_i.operator == ariane_pkg::JALR
                 // check if the address of the jump register is correct and that we actually predicted
diff --git a/src/cache_subsystem/lru_4way.sv b/src/cache_subsystem/lru_4way.sv
new file mode 100644
index 0000000000000000000000000000000000000000..2a3235ca8783759106ebdb398d1c81d23c2840c9
--- /dev/null
+++ b/src/cache_subsystem/lru_4way.sv
@@ -0,0 +1,77 @@
+module lru_4way (
+  input logic [1:0] hit_i,
+  input logic miss_i,
+  input logic [4:0] ages_i,
+  output logic [4:0] ages_o,
+  output logic [1:0] evicted_o,
+  output logic updated_o
+);
+
+  function automatic logic [1:0] ordered_2 (logic [1:0] fstway, logic [1:0] sndway);
+    return (fstway != 2'b00) ? 2'b00 : ((sndway == 2'b01) ? 2'b10 : 2'b01);
+  endfunction
+
+  function automatic logic [1:0] ordered_3 (logic [1:0] fstway, logic [1:0] sndway);
+    return (sndway != 2'b11) ? 2'b11 : ((fstway == 2'b10) ? 2'b01 : 2'b10);
+  endfunction
+
+  logic [3:0][1:0] ways, ordered_ways;
+  logic [2:0][1:0] new_ways;
+  logic [1:0][1:0] new_ways_ordered;
+  logic [1:0] predicted_3rd_way;
+
+  always_comb begin : decode
+    ways[0] = ages_i[1:0];
+    ways[1] = (ways[0] == 2'b00 && ages_i[3:2] == 2'b00) ? 2'b01 : ages_i[3:2];
+
+    ordered_ways[0] = (ways[0] < ways[1]) ? ways[0] : ways[1];
+    ordered_ways[1] = (ways[0] < ways[1]) ? ways[1] : ways[0];
+
+    ordered_ways[2] = ordered_2(ordered_ways[0], ordered_ways[1]);
+    ordered_ways[3] = ordered_3(ordered_ways[0], ordered_ways[1]);
+
+    ways[2] = (ages_i[4]) ? ordered_ways[3] : ordered_ways[2];
+    ways[3] = (ages_i[4]) ? ordered_ways[2] : ordered_ways[3];
+  end
+
+  // assign ways[0] = ages_i[1:0];
+  // assign ways[1] = (ages_i[1:0] == 0'b00 && ages_i[3:2] == 0'b00) ? 0'b01 : ages_i[3:2];
+
+  // assign ordered_ways[0] = (ways[0] < ways[1]) ? ways[0] : ways[1];
+  // assign ordered_ways[1] = (ways[0] < ways[1]) ? ways[1] : ways[0];
+
+  // assign ordered_ways[2] = ordered_2(ordered_ways[0], ordered_ways[1]);
+  // assign ordered_ways[3] = ordered_3(ordered_ways[0], ordered_ways[1]);
+
+  // assign ways[2] = (ages_i[4]) ? ordered_ways[3] : ordered_ways[2];
+  // assign ways[3] = (ages_i[4]) ? ordered_ways[2] : ordered_ways[3];
+
+  assign evicted_o = ways[3];
+  assign updated_o = miss_i || (ways[0] != hit_i) || (ways[1] != ages_i[3:2]);
+
+  always_comb begin : encode
+    ages_o = ages_i;
+    new_ways = ways[2:0];
+
+    if (miss_i || ways[0] != hit_i) begin
+      if (miss_i || ways[1] != hit_i) begin
+        new_ways[0] = (miss_i) ? ways[3] : hit_i;
+        new_ways[1] = ways[0];
+        new_ways[2] = ways[1];
+      end else begin
+        new_ways[0] = ways[1];
+        new_ways[1] = ways[0];
+      end
+    end
+
+    new_ways_ordered[0] = (new_ways[0] < new_ways[1]) ? new_ways[0] : new_ways[1];
+    new_ways_ordered[1] = (new_ways[0] < new_ways[1]) ? new_ways[1] : new_ways[0];
+
+    predicted_3rd_way = ordered_2(new_ways_ordered[0], new_ways_ordered[1]);
+    if (new_ways[2] == predicted_3rd_way) begin
+      ages_o = {1'b0, new_ways[1], new_ways[0]};
+    end else begin
+      ages_o = {1'b1, new_ways[1], new_ways[0]};
+    end
+  end
+endmodule
diff --git a/src/cache_subsystem/wt_cache_subsystem.sv b/src/cache_subsystem/wt_cache_subsystem.sv
index 36f7a2be0d361f1b26370863d2a319a8ad6de15d..73e4527773ab25e3bf27de778e0c4975e91b9e6a 100644
--- a/src/cache_subsystem/wt_cache_subsystem.sv
+++ b/src/cache_subsystem/wt_cache_subsystem.sv
@@ -35,6 +35,9 @@ module wt_cache_subsystem import ariane_pkg::*; import wt_cache_pkg::*; #(
   input  icache_dreq_i_t                 icache_dreq_i,          // to/from frontend
   output icache_dreq_o_t                 icache_dreq_o,
   input logic                            icache_stall_i,
+  input logic                            icache_begin_spec_i,
+  input logic                            icache_valid_spec_i,
+  input logic                            icache_bad_spec_i,
   // D$
   // Cache management
   input  logic                           dcache_enable_i,        // from CSR
@@ -91,7 +94,10 @@ module wt_cache_subsystem import ariane_pkg::*; import wt_cache_pkg::*; #(
     .mem_data_req_o     ( icache_adapter_data_req ),
     .mem_data_ack_i     ( adapter_icache_data_ack ),
     .mem_data_o         ( icache_adapter          ),
-    .stall_req_i        ( icache_stall_i          )
+    .stall_req_i        ( icache_stall_i          ),
+    .begin_spec_i       ( icache_begin_spec_i     ),
+    .valid_spec_i       ( icache_valid_spec_i     ),
+    .bad_spec_i         ( icache_bad_spec_i       )
   );
 
 
diff --git a/src/cache_subsystem/wt_icache.sv b/src/cache_subsystem/wt_icache.sv
index 0b3cd9db77074fcb6f5ccd69abf6a46c53e2472c..36b78e2aef403f42509a1973b2302a52727a0a7c 100644
--- a/src/cache_subsystem/wt_icache.sv
+++ b/src/cache_subsystem/wt_icache.sv
@@ -27,7 +27,8 @@
 
 module wt_icache import ariane_pkg::*; import wt_cache_pkg::*; #(
   parameter logic [CACHE_ID_WIDTH-1:0]  RdTxId             = 0,                                  // ID to be used for read transactions
-  parameter ariane_pkg::ariane_cfg_t    ArianeCfg          = ariane_pkg::ArianeDefaultConfig     // contains cacheable regions
+  parameter ariane_pkg::ariane_cfg_t    ArianeCfg          = ariane_pkg::ArianeDefaultConfig,    // contains cacheable regions
+  parameter int                         SpecDepth          = 16
 ) (
   input  logic                      clk_i,
   input  logic                      rst_ni,
@@ -47,7 +48,10 @@ module wt_icache import ariane_pkg::*; import wt_cache_pkg::*; #(
   output logic                      mem_data_req_o,
   input  logic                      mem_data_ack_i,
   output icache_req_t               mem_data_o,
-  input  logic                      stall_req_i
+  input  logic                      stall_req_i,
+  input  logic                      begin_spec_i,
+  input  logic                      valid_spec_i,
+  input  logic                      bad_spec_i
 );
 
   // signals
@@ -61,11 +65,10 @@ module wt_icache import ariane_pkg::*; import wt_cache_pkg::*; #(
   logic                                 flush_d, flush_q;             // used to register and signal pending flushes
 
   // replacement strategy
-  logic                                 update_lfsr;                  // shift the LFSR
+  logic                                 lru_miss;                     // evict the oldest cacheline in the set
   logic [$clog2(ICACHE_SET_ASSOC)-1:0]  inv_way;                      // first non-valid encountered
-  logic [$clog2(ICACHE_SET_ASSOC)-1:0]  rnd_way;                      // random index for replacement
   logic [$clog2(ICACHE_SET_ASSOC)-1:0]  repl_way;                     // way to replace
-  logic [ICACHE_SET_ASSOC-1:0]          repl_way_oh_d, repl_way_oh_q; // way to replace (onehot)
+  logic [$clog2(ICACHE_SET_ASSOC)-1:0]  repl_way_d, repl_way_q;       // way to replace (d/q)
   logic                                 all_ways_valid;               // we need to switch repl strategy since all are valid
 
   // invalidations / flushing
@@ -88,6 +91,9 @@ module wt_icache import ariane_pkg::*; import wt_cache_pkg::*; #(
   logic [ICACHE_SET_ASSOC-1:0]          vld_rdata;                    // valid bits coming from valid regs
   logic [ICACHE_CL_IDX_WIDTH-1:0]       vld_addr;                     // valid bit
 
+  // LRU
+  logic lru_lock_d, lru_lock_q;
+
   // cpmtroller FSM
   typedef enum logic[2:0] {FLUSH, IDLE, READ, MISS, TLB_MISS, KILL_ATRANS, KILL_MISS} state_e;
   state_e state_d, state_q;
@@ -229,7 +235,7 @@ end else begin : gen_piton_offset
             if (flush_d) begin
               state_d  = IDLE;
             // we have a hit or an exception output valid result
-            end else if ((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid) begin
+            end else if (!lru_lock_d && ((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid)) begin
               dreq_o.valid     = ~dreq_i.kill_s2;// just don't output in this case
               state_d          = IDLE;
 
@@ -250,7 +256,7 @@ end else begin : gen_piton_offset
             // we have a miss / NC transaction
             end else if (dreq_i.kill_s2) begin
               state_d = IDLE;
-            end else begin
+            end else if (!lru_lock_d && !stall_req_i) begin
               cmp_en_d = 1'b0;
               // only count this as a miss if the cache is enabled, and
               // the address is cacheable
@@ -260,6 +266,8 @@ end else begin : gen_piton_offset
                 miss_o         = ~paddr_is_nc;
                 state_d        = MISS;
               end
+            end else if (lru_lock_d || stall_req_i) begin
+              state_d = READ;
             end
           // bail out if this request is being killed (and we missed on the TLB)
           end else if (dreq_i.kill_s2 || flush_d) begin
@@ -365,26 +373,26 @@ end else begin : gen_piton_offset
   assign vld_req  = (flush_en || cache_rden)        ? '1                                    :
                     (mem_rtrn_i.inv.all && inv_en)  ? '1                                    :
                     (mem_rtrn_i.inv.vld && inv_en)  ? icache_way_bin2oh(mem_rtrn_i.inv.way) :
-                                                      repl_way_oh_q;
+                                                      icache_way_bin2oh(repl_way_q);
 
   assign vld_wdata = (cache_wren) ? '1 : '0;
 
   assign vld_we    = (cache_wren | inv_en | flush_en);
   // assign vld_req   = (vld_we | cache_rden);
 
+  logic [$clog2(ICACHE_SET_ASSOC)-1:0] evicted;
 
-  // chose random replacement if all are valid
-  assign update_lfsr   = cache_wren & all_ways_valid;
-  assign repl_way      = (all_ways_valid) ? rnd_way : inv_way;
-  assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q;
+  // chose LRU replacement if all are valid
+  assign lru_miss   = cache_wren & all_ways_valid;
+  assign repl_way   = (all_ways_valid) ? evicted : inv_way;
+  assign repl_way_d = (cmp_en_q) ? repl_way : repl_way_q;
 
   // enable signals for memory arrays
   assign cl_req   = (cache_rden) ? '1            :
-                    (cache_wren) ? repl_way_oh_q :
+                    (cache_wren) ? icache_way_bin2oh(repl_way_q) :
                                    '0;
   assign cl_we    = cache_wren;
 
-
   // find invalid cache line
   lzc #(
     .WIDTH ( ICACHE_SET_ASSOC )
@@ -394,18 +402,6 @@ end else begin : gen_piton_offset
     .empty_o ( all_ways_valid )
   );
 
-  // generate random cacheline index
-  lfsr_8bit #(
-    .WIDTH (ICACHE_SET_ASSOC)
-  ) i_lfsr (
-    .clk_i          ( clk_i       ),
-    .rst_ni         ( rst_ni      ),
-    .en_i           ( update_lfsr ),
-    .refill_way_oh  (             ),
-    .refill_way_bin ( rnd_way     )
-  );
-
-
 ///////////////////////////////////////////////////////
 // tag comparison, hit generation
 ///////////////////////////////////////////////////////
@@ -417,7 +413,6 @@ end else begin : gen_piton_offset
     assign cl_sel[i] = cl_rdata[i][{cl_offset_q,3'b0} +: FETCH_WIDTH];
   end
 
-
   lzc #(
     .WIDTH ( ICACHE_SET_ASSOC )
   ) i_lzc_hit (
@@ -430,11 +425,64 @@ end else begin : gen_piton_offset
                                     mem_rtrn_i.data[{cl_offset_q,3'b0} +: FETCH_WIDTH];
 
 ///////////////////////////////////////////////////////
-// memory arrays and regs
+// LRU
 ///////////////////////////////////////////////////////
 
-
   logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata [ICACHE_SET_ASSOC-1:0];
+  logic [4:0] new_ages, lru_sram_read, lru_sram_rread;
+  logic [1:0] lru_hit;
+  logic ages_updated, lru_wren;
+
+  logic [$clog2(SpecDepth)-1:0] ptr_spec_d, ptr_spec_q;
+  logic [$clog2(SpecDepth)-1:0] ptr_backup_d, ptr_backup_q;
+  logic [SpecDepth-1:0][ICACHE_NUM_WORDS-1:0][4:0] lru_sram_d, lru_sram_q;
+
+  assign ptr_spec_d = (bad_spec_i) ? ptr_backup_q : (begin_spec_i) ? ptr_spec_q + 1'b1 : ptr_spec_q;
+  assign ptr_backup_d = (valid_spec_i) ? ptr_backup_q + 1'b1 : ptr_backup_q;
+
+  assign lru_hit = (cache_wren) ? repl_way_q : hit_idx;
+  assign lru_wren = |vld_req & (cache_rden | cache_wren) & ages_updated & dreq_o.valid;
+  assign lru_sram_rread = (|vld_rdata) ? lru_sram_read : '0;
+
+  always_comb begin
+    lru_lock_d = lru_lock_q;
+
+    if (begin_spec_i & ~bad_spec_i && (ptr_spec_d == ptr_backup_d)) begin
+      lru_lock_d = 1'b1;
+    end else if (valid_spec_i || bad_spec_i) begin
+      lru_lock_d = 1'b0;
+    end
+  end
+
+  always_comb begin
+    lru_sram_d = lru_sram_q;
+    lru_sram_read = '0;
+
+    if (!bad_spec_i && begin_spec_i) begin
+      lru_sram_d[ptr_spec_d] = lru_sram_q[ptr_spec_q];
+    end
+
+    if (cache_rden) begin
+      lru_sram_read = lru_sram_d[ptr_spec_d][vld_addr];
+    end
+
+    if (lru_wren) begin
+      lru_sram_d[ptr_spec_d][vld_addr] = ages_updated;
+    end
+  end
+
+  lru_4way lru (
+    .hit_i     ( lru_hit        ),
+    .miss_i    ( lru_miss       ),
+    .ages_i    ( lru_sram_rread ),
+    .ages_o    ( new_ages       ),
+    .evicted_o ( evicted        ),
+    .updated_o ( ages_updated   )
+  );
+
+///////////////////////////////////////////////////////
+// memory arrays and regs
+///////////////////////////////////////////////////////
 
   for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_sram
     // Tag RAM
@@ -474,7 +522,6 @@ end else begin : gen_piton_offset
     );
   end
 
-
   always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
     if(!rst_ni) begin
       cl_tag_q      <= '0;
@@ -485,7 +532,11 @@ end else begin : gen_piton_offset
       flush_q       <= '0;
       state_q       <= IDLE;
       cl_offset_q   <= '0;
-      repl_way_oh_q <= '0;
+      repl_way_q    <= '0;
+      ptr_spec_q    <= '0;
+      ptr_backup_q  <= '0;
+      lru_lock_q    <= '0;
+      lru_sram_q    <= '0;
     end else begin
       cl_tag_q      <= cl_tag_d;
       flush_cnt_q   <= flush_cnt_d;
@@ -495,7 +546,11 @@ end else begin : gen_piton_offset
       flush_q       <= flush_d;
       state_q       <= state_d;
       cl_offset_q   <= cl_offset_d;
-      repl_way_oh_q <= repl_way_oh_d;
+      repl_way_q    <= repl_way_d;
+      ptr_spec_q    <= ptr_spec_d;
+      ptr_backup_q  <= ptr_backup_d;
+      lru_lock_q    <= lru_lock_d;
+      lru_sram_q    <= lru_sram_d;
     end
   end
 
@@ -521,6 +576,10 @@ end else begin : gen_piton_offset
     @(posedge clk_i) disable iff (!rst_ni) (!inv_en) |-> cache_rden |=> cmp_en_q |-> $onehot0(cl_hit))
       else $fatal(1,"[l1 icache] cl_hit signal must be hot1");
 
+  valid_or_bad_spec: assert property (
+    @(posedge clk_i) disable iff (!rst_ni) !(valid_spec_i) || (!bad_spec_i))
+      else $fatal(1,"[l1 icache] valid and bad speculation at the same time");
+
   // this is only used for verification!
   logic                                    vld_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
   logic [ariane_pkg::ICACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
@@ -548,11 +607,19 @@ end else begin : gen_piton_offset
     @(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test))
       else $fatal(1,"[l1 icache] cannot allocate a CL that is already present in the cache");
 
-
  initial begin
   // assert wrong parameterizations
   assert (ICACHE_INDEX_WIDTH<=12)
     else $fatal(1,"[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages");
+
+  assert (ICACHE_SET_ASSOC==4)
+    else $fatal(1,"[l1 icache] cache must be 4-associative");
+
+  assert (2 ** $clog2(SpecDepth) == SpecDepth)
+    else $fatal(1,"[l1 icache] SpecDepth is not a power of 2");
+
+  assert (SpecDepth >= 2)
+    else $fatal(1,"[l1 icache] SpecDepth is lower than 2");
  end
 `endif
 //pragma translate_on
diff --git a/src/commit_stage.sv b/src/commit_stage.sv
index 7ae9e2ab654b1411a71dc4d4c9724bf07b2edb2a..42e65b8e793a2b304759f67706433827674cfe9e 100644
--- a/src/commit_stage.sv
+++ b/src/commit_stage.sv
@@ -92,20 +92,22 @@ module commit_stage import ariane_pkg::*; #(
     always_comb begin : commit
         // default assignments
         commit_ack_o[0]    = 1'b0;
+        commit_ack_o[1]    = 1'b0;
 
-        // amo_valid_commit_o = 1'b0;
+        amo_valid_commit_o = 1'b0;
 
         we_gpr_o[0]        = 1'b0;
         we_gpr_o[1]        = 1'b0;
         we_fpr_o           = '{default: 1'b0};
-        // commit_lsu_o       = 1'b0;
-        // commit_csr_o       = 1'b0;
+        commit_lsu_o       = 1'b0;
+        commit_csr_o       = 1'b0;
         // amos will commit on port 0
         wdata_o[0]      = (amo_resp_i.ack) ? amo_resp_i.result[riscv::XLEN-1:0] : commit_instr_i[0].result;
-        // csr_op_o        = ADD; // this corresponds to a CSR NOP
-        // csr_wdata_o        = {riscv::XLEN{1'b0}};
+        wdata_o[1]      = commit_instr_i[1].result;
+        csr_op_o        = ADD; // this corresponds to a CSR NOP
+        csr_wdata_o        = {riscv::XLEN{1'b0}};
         fence_i_o          = 1'b0;
-        // fence_o            = 1'b0;
+        fence_o            = 1'b0;
         sfence_vma_o       = 1'b0;
         csr_write_fflags_o = 1'b0;
         flush_commit_o  = 1'b0;
@@ -126,7 +128,7 @@ module commit_stage import ariane_pkg::*; #(
                 // check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store)
                 if (commit_lsu_ready_i) begin
                     commit_ack_o[0] = 1'b1;
-                    // commit_lsu_o = 1'b1;
+                    commit_lsu_o = 1'b1;
                 // stall in case the store buffer is not able to accept anymore instructions
                 end else begin
                     commit_ack_o[0] = 1'b0;
@@ -137,7 +139,7 @@ module commit_stage import ariane_pkg::*; #(
             // ---------
             if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin
                 // write the CSR with potential exception flags from retiring floating point instruction
-                // csr_wdata_o = {{riscv::XLEN-5{1'b0}}, commit_instr_i[0].ex.cause[4:0]};
+                csr_wdata_o = {{riscv::XLEN-5{1'b0}}, commit_instr_i[0].ex.cause[4:0]};
                 csr_write_fflags_o = 1'b1;
                 commit_ack_o[0] = 1'b1;
             end
@@ -148,10 +150,10 @@ module commit_stage import ariane_pkg::*; #(
             // throw an exception
             if (commit_instr_i[0].fu == CSR) begin
                 // write the CSR file
-                // csr_op_o     = commit_instr_i[0].op;
-                // csr_wdata_o  = commit_instr_i[0].result;
+                csr_op_o     = commit_instr_i[0].op;
+                csr_wdata_o  = commit_instr_i[0].result;
                 if (!csr_exception_i.valid) begin
-                  // commit_csr_o = 1'b1;
+                  commit_csr_o = 1'b1;
                   wdata_o[0]   = csr_rdata_i;
                   commit_ack_o[0] = 1'b1;
                 end else begin
@@ -191,7 +193,7 @@ module commit_stage import ariane_pkg::*; #(
             if (commit_instr_i[0].op == FENCE) begin
                 commit_ack_o[0] = no_st_pending_i;
                 // tell the controller to flush the D$
-                // fence_o = no_st_pending_i;
+                fence_o = no_st_pending_i;
             end
             // ------------------
             // AMO
@@ -201,27 +203,49 @@ module commit_stage import ariane_pkg::*; #(
                 commit_ack_o[0] = amo_resp_i.ack;
                 // flush the pipeline
                 flush_commit_o = amo_resp_i.ack;
-                // amo_valid_commit_o = 1'b1;
+                amo_valid_commit_o = 1'b1;
                 we_gpr_o[0] = amo_resp_i.ack;
             end
         end
-    end
 
-    logic can_commit_instr;
-    assign can_commit_instr = commit_instr_i[0].valid && !commit_instr_i[0].ex.valid && !halt_i;
+        if (NR_COMMIT_PORTS > 1) begin
+            // -----------------
+            // Commit Port 2
+            // -----------------
+            // check if the second instruction can be committed as well and the first wasn't a CSR instruction
+            // also if we are in single step mode don't retire the second instruction
+            if (commit_ack_o[0] && commit_instr_i[1].valid
+                                && !halt_i
+                                && !(commit_instr_i[0].fu inside {CSR})
+                                && !flush_dcache_i
+                                && !instr_0_is_amo
+                                && !single_step_i) begin
+                // only if the first instruction didn't throw an exception and this instruction won't throw an exception
+                // and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT, FPU or FPU_VEC
+                if (!exception_o.valid && !commit_instr_i[1].ex.valid
+                                       && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU, FPU_VEC})) begin
 
-    assign csr_op_o = (can_commit_instr && commit_instr_i[0].fu == CSR) ? commit_instr_i[0].op : ADD;
-    assign csr_wdata_o = (can_commit_instr &&
-                          commit_instr_i[0].fu inside {FPU, FPU_VEC}) ? {{riscv::XLEN-5{1'b0}}, commit_instr_i[0].ex.cause[4:0]} :
-                         (can_commit_instr && commit_instr_i[0].fu == CSR) ? commit_instr_i[0].result :
-                         {riscv::XLEN{1'b0}};
+                    if (is_rd_fpr(commit_instr_i[1].op))
+                        we_fpr_o[1] = 1'b1;
+                    else
+                        we_gpr_o[1] = 1'b1;
 
-    assign commit_csr_o = can_commit_instr && commit_instr_i[0].fu == CSR && !csr_exception_i.valid;
+                    commit_ack_o[1] = 1'b1;
 
-    assign commit_lsu_o = can_commit_instr && commit_instr_i[0].fu == STORE && !instr_0_is_amo && commit_lsu_ready_i;
-    assign amo_valid_commit_o = can_commit_instr && RVA && instr_0_is_amo;
+                    // additionally check if we are retiring an FPU instruction because we need to make sure that we write all
+                    // exception flags
+                    if (commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin
+                        if (csr_write_fflags_o)
+                            csr_wdata_o = {{riscv::XLEN-5{1'b0}}, (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])};
+                        else
+                            csr_wdata_o = {{riscv::XLEN-5{1'b0}}, commit_instr_i[1].ex.cause[4:0]};
 
-    assign fence_o = can_commit_instr && commit_instr_i[0].op == FENCE;
+                        csr_write_fflags_o = 1'b1;
+                    end
+                end
+            end
+        end
+    end
 
     // -----------------------------
     // Exception & Interrupt Logic
diff --git a/src/controller.sv b/src/controller.sv
index 6e8a1dc2d22a908a59525af4498bdd2ef74598aa..ef40190d3b59c1f172b611358f2c6db9ba4d620d 100644
--- a/src/controller.sv
+++ b/src/controller.sv
@@ -97,17 +97,13 @@ module controller import ariane_pkg::*; (
             flush_id_o             = 1'b1;
             flush_ex_o             = 1'b1;
             flush_icache_o         = 1'b1;
-// this is not needed in the case since we
-// have a write-through cache in this case
-`ifndef WT_DCACHE
+            // Force D$ flushing to avoid issues on the FPGA.
             flush_dcache           = 1'b1;
             fence_active_d         = 1'b1;
-`endif
+
+            flush_bp_o             = 1'b1;
         end
 
-// this is not needed in the case since we
-// have a write-through cache in this case
-`ifndef WT_DCACHE
         // wait for the acknowledge here
         if (flush_dcache_ack_i && fence_active_q) begin
             fence_active_d = 1'b0;
@@ -115,7 +111,7 @@ module controller import ariane_pkg::*; (
         end else if (fence_active_q) begin
             flush_dcache = 1'b1;
         end
-`endif
+
         // ---------------------------------
         // SFENCE.VMA
         // ---------------------------------
diff --git a/src/csr_buffer.sv b/src/csr_buffer.sv
index 3937aa3b239f25d0650c4f8479832213890b8503..15108f780054bef057b3b715d67df4a447113dc2 100644
--- a/src/csr_buffer.sv
+++ b/src/csr_buffer.sv
@@ -28,37 +28,46 @@ module csr_buffer import ariane_pkg::*; (
     // to CSR file
     output logic  [11:0]             csr_addr_o      // CSR address to commit stage
 );
-    // This is a single entry store buffer for the address of the CSR
+    // this is a single entry store buffer for the address of the CSR
     // which we are going to need in the commit stage
-    logic [11:0] csr_address_n, csr_address_q;
-    logic        csr_valid_n, csr_valid_q;
+    struct packed {
+        logic [11:0] csr_address;
+        logic        valid;
+    } csr_reg_n, csr_reg_q;
 
-    // Write logic
-    // Clear the buffer if we flushed or if there is a commit with no new valid instruction
-    assign csr_valid_n = (flush_i) ? 1'b0 :
-                         (csr_commit_i && ~csr_valid_i) ? 1'b0 :
-                         (csr_valid_i) ? 1'b1 :
-                         csr_valid_q;
+    // control logic, scoreboard signals
+    assign csr_result_o   = fu_data_i.operand_a;
+    assign csr_addr_o     = csr_reg_q.csr_address;
 
-    // Store the CSR address if we got a valid from the scoreboard.
-    assign csr_address_n = (csr_valid_i) ? fu_data_i.operand_b[11:0] : csr_address_q;
-
-    // If we have a valid uncomitted CSR req or are just getting one without a
-    // commit in, we are not ready.
-    assign csr_ready_o = ~((csr_valid_q || csr_valid_i) && ~csr_commit_i);
-
-    // Control logic, scoreboard signals
-    assign csr_result_o = fu_data_i.operand_a;
-    assign csr_addr_o   = csr_address_q;
-
-    // Sequential process
+    // write logic
+    always_comb begin : write
+        csr_reg_n  = csr_reg_q;
+        // by default we are ready
+        csr_ready_o = 1'b1;
+        // if we have a valid uncomiited csr req or are just getting one WITHOUT a commit in, we are not ready
+        if ((csr_reg_q.valid || csr_valid_i) && ~csr_commit_i)
+            csr_ready_o = 1'b0;
+        // if we got a valid from the scoreboard
+        // store the CSR address
+        if (csr_valid_i) begin
+            csr_reg_n.csr_address = fu_data_i.operand_b[11:0];
+            csr_reg_n.valid       = 1'b1;
+        end
+        // if we get a commit and no new valid instruction -> clear the valid bit
+        if (csr_commit_i && ~csr_valid_i) begin
+            csr_reg_n.valid       = 1'b0;
+        end
+        // clear the buffer if we flushed
+        if (flush_i)
+            csr_reg_n.valid       = 1'b0;
+    end
+    // sequential process
     always_ff @(posedge clk_i or negedge rst_ni) begin
-        if (~rst_ni) begin
-            csr_valid_q   <= 1'b0;
-            csr_address_q <= '0;
+        if(~rst_ni) begin
+            csr_reg_q <= '{default: 0};
         end else begin
-            csr_valid_q   <= csr_valid_n;
-            csr_address_q <= csr_address_n;
+            csr_reg_q <= csr_reg_n;
         end
     end
+
 endmodule
diff --git a/src/decoder.sv b/src/decoder.sv
index 4ea5de725a8582604c894313382488af6abcbbe7..e0c551e79008c34dca3810b3d9a0db2cd99958f4 100644
--- a/src/decoder.sv
+++ b/src/decoder.sv
@@ -80,10 +80,6 @@ module decoder import ariane_pkg::*; (
         instruction_o.trans_id      = '0;
         instruction_o.is_compressed = is_compressed_i;
         instruction_o.use_zimm      = 1'b0;
-        instruction_o.use_rs1_fpr   = 1'b0;
-        instruction_o.use_rs2_fpr   = 1'b0;
-        instruction_o.use_imm_fpr   = 1'b0;
-        instruction_o.use_rd_fpr    = 1'b0;
         instruction_o.bp            = branch_predict_i;
         ecall                       = 1'b0;
         ebreak                      = 1'b0;
@@ -1009,11 +1005,6 @@ module decoder import ariane_pkg::*; (
 
                 default: illegal_instr = 1'b1;
             endcase
-
-            instruction_o.use_rs1_fpr = is_rs1_fpr(instruction_o.op);
-            instruction_o.use_rs2_fpr = is_rs2_fpr(instruction_o.op);
-            instruction_o.use_imm_fpr = is_imm_fpr(instruction_o.op);
-            instruction_o.use_rd_fpr = is_rd_fpr(instruction_o.op);
         end
     end
 
diff --git a/src/frontend/bht.sv b/src/frontend/bht.sv
index e57c34bb7fa0de0fbcf2d4b8b22036f6a34b1e5f..9eae69d3559a21f07f8af295c91c3e5044e47c7b 100644
--- a/src/frontend/bht.sv
+++ b/src/frontend/bht.sv
@@ -34,6 +34,8 @@ module bht #(
     localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
     // number of bits we should use for prediction
     localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
+    // we are not interested in all bits of the address
+    unread i_unread (.d_i(|vpc_i));
 
     struct packed {
         logic       valid;
diff --git a/src/frontend/btb.sv b/src/frontend/btb.sv
index 23f365fd460c91e22542f154b2221ffabd955ba1..86eeadc0ac34e6079c9238da41a36b23708cf199 100644
--- a/src/frontend/btb.sv
+++ b/src/frontend/btb.sv
@@ -36,6 +36,8 @@ module btb #(
     localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
     // prevent aliasing to degrade performance
     localparam ANTIALIAS_BITS = 8;
+    // we are not interested in all bits of the address
+    unread i_unread (.d_i(|vpc_i));
 
     // typedef for all branch target entries
     // we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
diff --git a/src/frontend/frontend.sv b/src/frontend/frontend.sv
index 813294fe48088af6b3ec22fe7e57a9cd9c0e20a7..d0f7b5ca4e9119c53da0d7442e4e0fb8610487a0 100644
--- a/src/frontend/frontend.sv
+++ b/src/frontend/frontend.sv
@@ -46,7 +46,9 @@ module frontend import ariane_pkg::*; #(
   input  logic               fetch_entry_ready_i, // ID acknowledged this instruction
 
   output logic               has_mem_access_o,
-  output logic               branch_speculation_o
+  output logic               branch_speculation_o,
+  output logic               begin_spec_o,
+  output logic               has_cf_o
 );
     // Instruction Cache Registers, from I$
     logic [FETCH_WIDTH-1:0] icache_data_q;
@@ -96,7 +98,7 @@ module frontend import ariane_pkg::*; #(
     ras_t            ras_predict;
 
     // branch-predict update
-    logic            is_mispredict;
+    logic            is_correct_predict, is_mispredict;
     logic            ras_push, ras_pop;
     logic [riscv::VLEN-1:0]     ras_update;
 
@@ -145,6 +147,7 @@ module frontend import ariane_pkg::*; #(
     logic [INSTR_PER_FETCH-1:0] is_jump;
     logic [INSTR_PER_FETCH-1:0] is_return;
     logic [INSTR_PER_FETCH-1:0] is_jalr;
+    logic [INSTR_PER_FETCH-1:0] is_cf;
 
     for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
       // branch history table -> BHT
@@ -156,9 +159,14 @@ module frontend import ariane_pkg::*; #(
       // unconditional jumps with known target -> immediately resolved
       assign is_jump[i] = instruction_valid[i] & (rvi_jump[i] | rvc_jump[i]);
       // unconditional jumps with unknown target -> BTB
-      assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]);
+      assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & ~is_call[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]);
+
+      // cf that needs a prediction
+      assign is_cf[i] = instruction_valid[i] & (rvi_branch[i] | rvc_branch[i] | rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]);
     end
 
+    assign begin_spec_o = (|is_cf) & (~replay);
+
     // taken/not taken
     always_comb begin
       taken_rvi_cf = '0;
@@ -173,13 +181,12 @@ module frontend import ariane_pkg::*; #(
 
       // lower most prediction gets precedence
       for (int i = INSTR_PER_FETCH - 1; i >= 0 ; i--) begin
-        ras_pop = 1'b0;
-        ras_push = 1'b0;
-
         unique case ({is_branch[i], is_return[i], is_jump[i], is_jalr[i]})
           4'b0000:; // regular instruction e.g.: no branch
           // unconditional jump to register, we need the BTB to resolve this
           4'b0001: begin
+            ras_pop = 1'b0;
+            ras_push = 1'b0;
             if (btb_prediction_shifted[i].valid) begin
               predict_address = btb_prediction_shifted[i].target_address;
               cf_type[i] = ariane_pkg::JumpR;
@@ -187,6 +194,8 @@ module frontend import ariane_pkg::*; #(
           end
           // its an unconditional jump to an immediate
           4'b0010: begin
+            ras_pop = 1'b0;
+            ras_push = 1'b0;
             taken_rvi_cf[i] = rvi_jump[i];
             taken_rvc_cf[i] = rvc_jump[i];
             cf_type[i] = ariane_pkg::Jump;
@@ -195,11 +204,14 @@ module frontend import ariane_pkg::*; #(
           4'b0100: begin
             // make sure to only alter the RAS if we actually consumed the instruction
             ras_pop = ras_predict.valid & instr_queue_consumed[i];
+            ras_push = 1'b0;
             predict_address = ras_predict.ra;
             cf_type[i] = ariane_pkg::Return;
           end
           // branch prediction
           4'b1000: begin
+            ras_pop = 1'b0;
+            ras_push = 1'b0;
             // if we have a valid dynamic prediction use it
             if (bht_prediction_shifted[i].valid) begin
               taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken;
@@ -226,6 +238,12 @@ module frontend import ariane_pkg::*; #(
             predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]);
           end
       end
+
+      if (is_mispredict) begin
+        ras_update = resolved_branch_i.pc + 4;  // TODO fix for compressed instructions
+        ras_push = 1'b0;
+        ras_pop = resolved_branch_i.cf_type == Return;
+      end
     end
     // or reduce struct
     always_comb begin
@@ -235,6 +253,8 @@ module frontend import ariane_pkg::*; #(
       // contains a valid prediction.
       for (int i = 0; i < INSTR_PER_FETCH; i++) bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid));
     end
+
+    assign is_correct_predict = resolved_branch_i.valid & !(resolved_branch_i.is_mispredict) & (resolved_branch_i.cf_type != Jump);
     assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
 
     // Cache interface
@@ -355,16 +375,19 @@ module frontend import ariane_pkg::*; #(
       end
     end
 
-    ras #(
-      .DEPTH  ( ArianeCfg.RASDepth  )
+    sras #(
+      .DEPTH  ( ArianeCfg.RASDepth )
     ) i_ras (
       .clk_i,
       .rst_ni,
-      .flush_i( flush_bp_i  ),
-      .push_i ( ras_push    ),
-      .pop_i  ( ras_pop     ),
-      .data_i ( ras_update  ),
-      .data_o ( ras_predict )
+      .flush_i      ( flush_bp_i         ),
+      .push_i       ( ras_push           ),
+      .pop_i        ( ras_pop            ),
+      .data_i       ( ras_update         ),
+      .begin_spec_i ( begin_spec_o       ),
+      .valid_spec_i ( is_correct_predict ),
+      .bad_spec_i   ( is_mispredict      ),
+      .data_o       ( ras_predict        )
     );
 
     btb #(
@@ -418,6 +441,7 @@ module frontend import ariane_pkg::*; #(
       .flush_i             ( flush_i              ),
       .instr_i             ( instr                ), // from re-aligner
       .addr_i              ( addr                 ), // from re-aligner
+      .is_cf_i             ( is_cf                ),
       .exception_i         ( icache_ex_valid_q    ), // from I$
       .exception_addr_i    ( icache_vaddr_q       ),
       .predict_address_i   ( predict_address      ),
@@ -430,7 +454,8 @@ module frontend import ariane_pkg::*; #(
       .fetch_entry_o       ( fetch_entry_o        ), // to back-end
       .fetch_entry_valid_o ( fetch_entry_valid_o  ), // to back-end
       .fetch_entry_ready_i ( fetch_entry_ready_i  ), // to back-end
-      .has_mem_access_o    ( has_mem_access_o     )  // to verifier
+      .has_mem_access_o    ( has_mem_access_o     ), // to verifier
+      .has_cf_o            ( has_cf_o             )  // to I$
     );
 
     // pragma translate_off
@@ -438,6 +463,18 @@ module frontend import ariane_pkg::*; #(
       initial begin
         assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64) else $fatal("[frontend] fetch width != not supported");
       end
+
+      assert property (
+        @(posedge clk_i) disable iff (!rst_ni) replay |-> (replay_addr == icache_vaddr_q))
+          else $warning(1, "[frontend] replay_addr != icache_vaddr_q");
+
+      assert property (
+        @(posedge clk_i) disable iff (!rst_ni) replay |-> ~instr_queue_ready)
+          else $warning(1, "[frontend] replay & instr_queue_ready...");
+
+      assert property (
+        @(posedge clk_i) disable iff (!rst_ni) resolved_branch_i.cf_type == Jump |-> !resolved_branch_i.is_mispredict)
+        else $warning(1, "[frontend] mispredicted jump");
     `endif
     // pragma translate_on
 endmodule
diff --git a/src/frontend/instr_queue.sv b/src/frontend/instr_queue.sv
index 84df044d9528fc4532798cf5de4f9fdee6490956..51c612624865dd19ef3ae4201c6338f018fd0759 100644
--- a/src/frontend/instr_queue.sv
+++ b/src/frontend/instr_queue.sv
@@ -49,6 +49,7 @@ module instr_queue (
   input  logic                                               flush_i,
   input  logic [ariane_pkg::INSTR_PER_FETCH-1:0][31:0]       instr_i,
   input  logic [ariane_pkg::INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_i,
+  input  logic [ariane_pkg::INSTR_PER_FETCH-1:0]             is_cf_i,
   input  logic [ariane_pkg::INSTR_PER_FETCH-1:0]             valid_i,
   output logic                                               ready_o,
   output logic [ariane_pkg::INSTR_PER_FETCH-1:0]             consumed_o,
@@ -66,12 +67,14 @@ module instr_queue (
   output logic                                               fetch_entry_valid_o,
   input  logic                                               fetch_entry_ready_i,
 
-  output logic has_mem_access_o
+  output logic has_mem_access_o,
+  output logic has_cf_o
 );
 
   typedef struct packed {
     logic [31:0]     instr; // instruction word
     ariane_pkg::cf_t cf;    // branch was taken
+    logic            is_cf; // instruction is a cf that needs a prediction (ie. not a jump)
     ariane_pkg::frontend_exception_t ex;    // exception happened
     logic [riscv::VLEN-1:0] ex_vaddr;       // lower VLEN bits of tval for exception
   } instr_data_t;
@@ -125,6 +128,11 @@ module instr_queue (
   logic [ariane_pkg::INSTR_PER_FETCH-1:0]   fifo_output_is_mem, fifo_has_no_mem;
   logic                                     output_is_mem;
 
+  // cf count
+  logic [ariane_pkg::INSTR_PER_FETCH*2-1:0]  input_is_cf;
+  logic [ariane_pkg::INSTR_PER_FETCH-1:0] fifo_has_no_cf;
+  logic                                   output_is_cf;
+
   assign ready_o = ~(|instr_queue_full) & ~full_address;
 
   for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_unpack_taken
@@ -199,12 +207,23 @@ module instr_queue (
   assign has_mem_access_o = (|input_is_mem[ariane_pkg::INSTR_PER_FETCH-1:0]) | output_is_mem |
                             ~(&fifo_has_no_mem);
 
+  // ----------------------
+  // Memory access detector
+  // ----------------------
+  assign output_is_cf = fetch_entry_valid_o &
+                        (fetch_entry_o.instruction[6:0] inside {riscv::OpcodeJalr, riscv::OpcodeBranch});
+
+  assign has_cf_o = (|input_is_cf) | output_is_cf |
+                    ~(&fifo_has_no_cf);
+
   // duplicate the entries for easier selection e.g.: 3 2 1 0 3 2 1 0
   for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_duplicate_instr_input
     assign instr[i] = instr_i[i];
     assign instr[i + ariane_pkg::INSTR_PER_FETCH] = instr_i[i];
     assign cf[i] = cf_type_i[i];
     assign cf[i + ariane_pkg::INSTR_PER_FETCH] = cf_type_i[i];
+    assign input_is_cf[i] = is_cf_i[i];
+    assign input_is_cf[i + ariane_pkg::INSTR_PER_FETCH] = is_cf_i[i];
   end
 
   // shift the inputs
@@ -215,6 +234,7 @@ module instr_queue (
     assign instr_data_in[i].ex = exception_i; // exceptions hold for the whole fetch packet
     assign instr_data_in[i].ex_vaddr = exception_addr_i;
     assign input_is_mem_in[i] = input_is_mem[i + idx_is_q];
+    assign instr_data_in[i].is_cf = input_is_cf[i + idx_is_q];
     /* verilator lint_on WIDTH */
   end
 
@@ -346,6 +366,23 @@ module instr_queue (
       .data_o (),
       .pop_i (pop_instr[i] & fifo_output_is_mem[i])
     );
+
+    fifo_v3 #(
+      .DEPTH (ariane_pkg::FETCH_FIFO_DEPTH),
+      .dtype (logic)
+    ) i_fifo_cf (
+      .clk_i (clk_i),
+      .rst_ni (rst_ni),
+      .flush_i (flush_i),
+      .testmode_i (1'b0),
+      .full_o (),
+      .empty_o (fifo_has_no_cf[i]),
+      .usage_o (),
+      .data_i (1'b1),
+      .push_i (push_instr_fifo[i] & instr_data_in[i].is_cf),
+      .data_o (),
+      .pop_i (pop_instr[i] & instr_data_out[i].is_cf)
+    );
   end
   // or reduce and check whether we are retiring a taken branch (might be that the corresponding)
   // fifo is full.
@@ -374,6 +411,12 @@ module instr_queue (
     .pop_i      ( pop_address                  )
   );
 
+  unread i_unread_address_fifo (.d_i(|{empty_address, address_queue_usage}));
+  unread i_unread_branch_mask (.d_i(|branch_mask_extended));
+  unread i_unread_lzc (.d_i(|{branch_empty}));
+  unread i_unread_fifo_pos (.d_i(|fifo_pos_extended)); // we don't care about the lower signals
+  unread i_unread_instr_fifo (.d_i(|instr_queue_usage));
+
   always_ff @(posedge clk_i or negedge rst_ni) begin
     if (!rst_ni) begin
       idx_ds_q        <= 'b1;
diff --git a/src/frontend/sras.sv b/src/frontend/sras.sv
new file mode 100644
index 0000000000000000000000000000000000000000..6328a52c84402360ca6f29180441aebdd4d63c2b
--- /dev/null
+++ b/src/frontend/sras.sv
@@ -0,0 +1,150 @@
+//Copyright (C) 2018 to present,
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 2.0 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Date: 08.02.2018
+// Migrated: Luis Vitorio Cargnini, IEEE
+// Date: 09.06.2018
+
+// segmented return address stack
+module sras #(
+    parameter int unsigned DEPTH = 2,
+    parameter int unsigned SpecDepth = 16
+)(
+    input  logic             clk_i,
+    input  logic             rst_ni,
+    input  logic             flush_i,
+    input  logic             push_i,
+    input  logic             pop_i,
+    input  logic [riscv::VLEN-1:0]      data_i,
+    input  logic             begin_spec_i,
+    input  logic             valid_spec_i,
+    input  logic             bad_spec_i,
+    output ariane_pkg::ras_t data_o
+);
+
+    logic [$clog2(SpecDepth)-1:0] ptr_spec_d, ptr_spec_q;
+    logic [$clog2(SpecDepth)-1:0] ptr_backup_d, ptr_backup_q;
+    logic [SpecDepth-1:0][$clog2(DEPTH)-1:0] tos_d, tos_q;
+    ariane_pkg::ras_t [SpecDepth-1:0][DEPTH-1:0] stack_d, stack_q;
+
+    assign ptr_spec_d = (bad_spec_i) ? ptr_backup_q : (begin_spec_i) ? ptr_spec_q + 1'b1 : ptr_spec_q;
+    assign ptr_backup_d = (valid_spec_i) ? ptr_backup_q + 1'b1 : ptr_backup_q;
+
+    logic [$clog2(SpecDepth)-1:0] previous_tos_addr;
+    logic [$clog2(DEPTH)-1:0] previous_tos, prev_plus_one, pp_plus_one, prev_minus_one;
+
+    assign previous_tos_addr = (!bad_spec_i && begin_spec_i) ? ptr_spec_q : ptr_spec_d;
+    assign previous_tos = tos_q[previous_tos_addr];
+
+    assign prev_plus_one = previous_tos + 1'b1;
+    assign pp_plus_one = tos_q[ptr_spec_q] + 1'b1;
+    assign prev_minus_one = previous_tos - 1'b1;
+
+    always_comb begin
+        tos_d = tos_q;
+
+        if (flush_i) begin
+            tos_d = '0;
+        end else if (!bad_spec_i) begin
+            if (push_i && !pop_i) begin
+                tos_d[ptr_spec_d] = prev_plus_one;
+                if (begin_spec_i) begin
+                    tos_d[ptr_spec_q] = pp_plus_one;
+                end
+            end else if (!push_i && pop_i) begin
+                    tos_d[ptr_spec_d] = prev_minus_one;
+            end else if (begin_spec_i) begin
+                tos_d[ptr_spec_d] = tos_q[ptr_spec_q];
+            end
+        end
+    end
+
+    logic can_pop, can_push;
+    assign can_pop = pop_i && !bad_spec_i;
+    assign can_push = push_i && !bad_spec_i;
+
+    assign data_o = stack_q[previous_tos_addr][previous_tos];
+
+    ariane_pkg::ras_t to_push;
+    assign to_push.ra = (push_i) ? data_i : 0;
+    assign to_push.valid = can_push;
+
+    ariane_pkg::ras_t [DEPTH-1:0] new_stack, prev_stack;
+
+    always_comb begin
+        new_stack = stack_q[ptr_spec_d];
+
+        if (!bad_spec_i && begin_spec_i) begin
+            new_stack = stack_q[ptr_spec_q];
+        end
+
+        if (can_pop) begin
+            new_stack[previous_tos] = to_push;
+        end else if (can_push) begin
+            new_stack[prev_plus_one] = to_push;
+        end
+    end
+
+    always_comb begin
+        prev_stack = stack_q[ptr_spec_q];
+
+        if (can_push && begin_spec_i) begin
+            prev_stack[pp_plus_one] = to_push;
+        end
+    end
+
+    for (genvar i = 0; i < SpecDepth; i++) begin
+        assign stack_d[i] = (flush_i) ? '0 :
+                            (i == ptr_spec_d) ? new_stack :
+                            (i == ptr_spec_q) ? prev_stack :
+                            stack_q[i];
+    end
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin
+        if (~rst_ni) begin
+            stack_q      <= '0;
+            ptr_spec_q   <= '0;
+            ptr_backup_q <= '0;
+            tos_q        <= '0;
+        end else begin
+            stack_q      <= stack_d;
+            ptr_spec_q   <= ptr_spec_d;
+            ptr_backup_q <= ptr_backup_d;
+            tos_q        <= tos_d;
+        end
+    end
+
+    // pragma translate_off
+    `ifndef VERILATOR
+      initial begin
+         assert (2 ** $clog2(SpecDepth) == SpecDepth) else $fatal(1,"[sras] SpecDepth is not a power of 2");
+         assert (SpecDepth >= 2) else $fatal(1,"[sras] SpecDepth is lower than 2");
+         assert (2 ** $clog2(DEPTH) == DEPTH) else $fatal(1,"[sras] DEPTH is not a power of 2");
+      end
+
+      // assert property (
+      //   @(posedge clk_i) disable iff (!rst_ni) push_i |-> begin_spec_i)
+      //     else $warning (1,"[sras] push_i & ~begin_spec_i");
+
+      assert property (
+        @(posedge clk_i) disable iff (!rst_ni) (begin_spec_i & !(bad_spec_i)) |-> (ptr_spec_d != ptr_backup_d))
+          else $fatal (1,"[sras] speculation overflow");
+
+      assert property (
+        @(posedge clk_i) disable iff (!rst_ni) valid_spec_i |-> ((ptr_backup_q == ptr_spec_q) |-> (ptr_backup_d == ptr_spec_d)))
+          else $fatal (1,"[sras] backup overtake");
+
+      assert property (
+        @(posedge clk_i) disable iff (!rst_ni) can_push |-> ~can_pop && can_pop |-> ~can_push)
+          else $fatal (1,"[sras] push & pop at the same time");
+    `endif
+    // pragma translate_on
+endmodule
diff --git a/src/id_stage.sv b/src/id_stage.sv
index 7edc74a97b3aca367747b71ad1ad8ef408fdf3da..6dd3ff789d7d360f9b3e75c79311c71d63cf29c6 100644
--- a/src/id_stage.sv
+++ b/src/id_stage.sv
@@ -38,7 +38,8 @@ module id_stage (
     input  logic                          tvm_i,
     input  logic                          tw_i,
     input  logic                          tsr_i,
-    output logic                          is_mem_instr_o
+    output logic                          is_mem_instr_o,
+    output logic                          has_ctrl_flow_o      // speculative CF (ie. no jals)
 );
     // ID/ISSUE register stage
     struct packed {
@@ -96,6 +97,7 @@ module id_stage (
     assign issue_entry_o = issue_q.sbe;
     assign issue_entry_valid_o = issue_q.valid;
     assign is_ctrl_flow_o = issue_q.is_ctrl_flow;
+    assign has_ctrl_flow_o = issue_q.valid && issue_q.is_ctrl_flow && issue_q.sbe.op != ariane_pkg::ADD;
 
     assign is_mem_instr_o = is_mem_instr_q;
 
diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv
index 7e6a2b2708b275b23d6a2ae8f7a164276df2bd7a..1bdd15a8bfef214d8eb1277510f4ff19474f9b64 100644
--- a/src/issue_read_operands.sv
+++ b/src/issue_read_operands.sv
@@ -26,10 +26,13 @@ module issue_read_operands import ariane_pkg::*; #(
     input  logic                                   issue_instr_valid_i,
     output logic                                   issue_ack_o,
     // lookup rd in scoreboard
+    output logic [REG_ADDR_SIZE-1:0]               rs1_o,
     input  riscv::xlen_t                           rs1_i,
     input  logic                                   rs1_valid_i,
+    output logic [REG_ADDR_SIZE-1:0]               rs2_o,
     input  riscv::xlen_t                           rs2_i,
     input  logic                                   rs2_valid_i,
+    output logic [REG_ADDR_SIZE-1:0]               rs3_o,
     input  logic [FLEN-1:0]                        rs3_i,
     input  logic                                   rs3_valid_i,
     // get clobber input
@@ -146,36 +149,93 @@ module issue_read_operands import ariane_pkg::*; #(
     // ---------------
     // check that all operands are available, otherwise stall
     // forward corresponding register
-    logic is_instr_csr_or_fence_rs1, is_instr_csr_or_fence_rs2;
-    assign is_instr_csr_or_fence_rs1 = rs1_valid_i && (issue_instr_i.use_rs1_fpr || rd_clobber_gpr_i[issue_instr_i.rs1] != CSR || issue_instr_i.op == SFENCE_VMA);
-    assign is_instr_csr_or_fence_rs2 = rs2_valid_i && (issue_instr_i.use_rs2_fpr || rd_clobber_gpr_i[issue_instr_i.rs2] != CSR || issue_instr_i.op == SFENCE_VMA);
-
-    logic should_forward_rs1, should_forward_rs2, should_forward_rs3;
-    assign should_forward_rs1 = !issue_instr_i.use_zimm && ((issue_instr_i.use_rs1_fpr && rd_clobber_fpr_i[issue_instr_i.rs1]) || rd_clobber_gpr_i[issue_instr_i.rs1] != NONE);
-    assign should_forward_rs2 = (issue_instr_i.use_rs2_fpr && rd_clobber_fpr_i[issue_instr_i.rs2]) || rd_clobber_gpr_i[issue_instr_i.rs2] != NONE;
-    assign should_forward_rs3 = issue_instr_i.use_imm_fpr && rd_clobber_fpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE;
+    always_comb begin : operands_available
+        stall = 1'b0;
+        // operand forwarding signals
+        forward_rs1 = 1'b0;
+        forward_rs2 = 1'b0;
+        forward_rs3 = 1'b0; // FPR only
+        // poll the scoreboard for those values
+        rs1_o = issue_instr_i.rs1;
+        rs2_o = issue_instr_i.rs2;
+        rs3_o = issue_instr_i.result[REG_ADDR_SIZE-1:0]; // rs3 is encoded in imm field
+
+        // 0. check that we are not using the zimm type in RS1
+        //    as this is an immediate we do not have to wait on anything here
+        // 1. check if the source registers are clobbered --> check appropriate clobber list (gpr/fpr)
+        // 2. poll the scoreboard
+        if (!issue_instr_i.use_zimm && (is_rs1_fpr(issue_instr_i.op) ? rd_clobber_fpr_i[issue_instr_i.rs1] != NONE
+                                                                     : rd_clobber_gpr_i[issue_instr_i.rs1] != NONE)) begin
+            // check if the clobbering instruction is not a CSR instruction, CSR instructions can only
+            // be fetched through the register file since they can't be forwarded
+            // if the operand is available, forward it. CSRs don't write to/from FPR
+            if (rs1_valid_i && (is_rs1_fpr(issue_instr_i.op) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i.rs1] != CSR) || (issue_instr_i.op == SFENCE_VMA)))) begin
+                forward_rs1 = 1'b1;
+            end else begin // the operand is not available -> stall
+                stall = 1'b1;
+            end
+        end
 
-    assign stall = (should_forward_rs1 && !is_instr_csr_or_fence_rs1) ||
-                   (should_forward_rs2 && !is_instr_csr_or_fence_rs2) ||
-                   (should_forward_rs3 && !rs3_valid_i);
+        if (is_rs2_fpr(issue_instr_i.op) ? rd_clobber_fpr_i[issue_instr_i.rs2] != NONE
+                                         : rd_clobber_gpr_i[issue_instr_i.rs2] != NONE) begin
+            // if the operand is available, forward it. CSRs don't write to/from FPR
+            if (rs2_valid_i && (is_rs2_fpr(issue_instr_i.op) ? 1'b1 : ( (rd_clobber_gpr_i[issue_instr_i.rs2] != CSR) || (issue_instr_i.op == SFENCE_VMA))))  begin
+                forward_rs2 = 1'b1;
+            end else begin // the operand is not available -> stall
+                stall = 1'b1;
+            end
+        end
 
-    assign forward_rs1 = should_forward_rs1 && is_instr_csr_or_fence_rs1;
-    assign forward_rs2 = should_forward_rs2 && is_instr_csr_or_fence_rs2;
-    assign forward_rs3 = should_forward_rs3 && rs3_valid_i;
+        if (is_imm_fpr(issue_instr_i.op) && rd_clobber_fpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE) begin
+            // if the operand is available, forward it. CSRs don't write to/from FPR so no need to check
+            if (rs3_valid_i) begin
+                forward_rs3 = 1'b1;
+            end else begin // the operand is not available -> stall
+                stall = 1'b1;
+            end
+        end
+    end
 
     // Forwarding/Output MUX
-    assign operand_a_n = (issue_instr_i.use_zimm) ? {{riscv::XLEN-5{1'b0}}, issue_instr_i.rs1[4:0]} :
-                         (issue_instr_i.use_pc) ? {{riscv::XLEN-riscv::VLEN{issue_instr_i.pc[riscv::VLEN-1]}}, issue_instr_i.pc} :
-                         (forward_rs1) ? rs1_i : operand_a_regfile;
-    assign operand_b_n = (issue_instr_i.use_imm && !(issue_instr_i.fu inside {STORE, CTRL_FLOW}) && !issue_instr_i.use_rs2_fpr) ? issue_instr_i.result :
-                         (forward_rs2) ? rs2_i : operand_b_regfile;
-
-    assign imm_n = (forward_rs3) ? {{riscv::XLEN-FLEN{1'b0}}, rs3_i} :
-                   issue_instr_i.use_imm_fpr ? {{riscv::XLEN-FLEN{1'b0}}, operand_c_regfile} :
-                   issue_instr_i.result;
-    assign trans_id_n = issue_instr_i.trans_id;
-    assign fu_n = issue_instr_i.fu;
-    assign operator_n = issue_instr_i.op;
+    always_comb begin : forwarding_operand_select
+        // default is regfiles (gpr or fpr)
+        operand_a_n = operand_a_regfile;
+        operand_b_n = operand_b_regfile;
+        // immediates are the third operands in the store case
+        // for FP operations, the imm field can also be the third operand from the regfile
+        imm_n      = is_imm_fpr(issue_instr_i.op) ? {{riscv::XLEN-FLEN{1'b0}}, operand_c_regfile} : issue_instr_i.result;
+        trans_id_n = issue_instr_i.trans_id;
+        fu_n       = issue_instr_i.fu;
+        operator_n = issue_instr_i.op;
+        // or should we forward
+        if (forward_rs1) begin
+            operand_a_n  = rs1_i;
+        end
+
+        if (forward_rs2) begin
+            operand_b_n  = rs2_i;
+        end
+
+        if (forward_rs3) begin
+            imm_n  = {{riscv::XLEN-FLEN{1'b0}}, rs3_i};
+        end
+
+        // use the PC as operand a
+        if (issue_instr_i.use_pc) begin
+            operand_a_n = {{riscv::XLEN-riscv::VLEN{issue_instr_i.pc[riscv::VLEN-1]}}, issue_instr_i.pc};
+        end
+
+        // use the zimm as operand a
+        if (issue_instr_i.use_zimm) begin
+            // zero extend operand a
+            operand_a_n = {{riscv::XLEN-5{1'b0}}, issue_instr_i.rs1[4:0]};
+        end
+        // or is it an immediate (including PC), this is not the case for a store and control flow instructions
+        // also make sure operand B is not already used as an FP operand
+        if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && !is_rs2_fpr(issue_instr_i.op)) begin
+            operand_b_n = issue_instr_i.result;
+        end
+    end
 
     // FU select, assert the correct valid out signal (in the next cycle)
     // This needs to be like this to make verilator happy. I know its ugly.
@@ -239,65 +299,47 @@ module issue_read_operands import ariane_pkg::*; #(
       end
     end
 
-    logic [NR_COMMIT_PORTS-1:0] waw_check;
-    logic                       rd_clobbered;
-
-    assign rd_clobbered = (issue_instr_i.use_rd_fpr && rd_clobber_fpr_i[issue_instr_i.rd] == NONE) ||
-                          rd_clobber_gpr_i[issue_instr_i.rd] == NONE;
-
-    for (genvar i = 0; i < NR_COMMIT_PORTS; i++) begin
-        assign waw_check[i] = (issue_instr_i.use_rd_fpr && we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd) ||
-                              we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd;
-    end
-
-    assign issue_ack_o = issue_instr_valid_i &&
-                         (
-                          (!stall && !fu_busy && (rd_clobbered || (|waw_check))) ||
-                          (issue_instr_i.ex.valid) ||
-                          (issue_instr_i.fu == NONE)
-                         );
-
     // We can issue an instruction if we do not detect that any other instruction is writing the same
     // destination register.
     // We also need to check if there is an unresolved branch in the scoreboard.
-    // always_comb begin : issue_scoreboard
-    //     // default assignment
-    //     issue_ack_o = 1'b0;
-    //     // check that we didn't stall, that the instruction we got is valid
-    //     // and that the functional unit we need is not busy
-    //     if (issue_instr_valid_i) begin
-    //         // check that the corresponding functional unit is not busy
-    //         if (!stall && !fu_busy) begin
-    //             // -----------------------------------------
-    //             // WAW - Write After Write Dependency Check
-    //             // -----------------------------------------
-    //             // no other instruction has the same destination register -> issue the instruction
-    //             if (is_rd_fpr(issue_instr_i.op) ? (rd_clobber_fpr_i[issue_instr_i.rd] == NONE)
-    //                                             : (rd_clobber_gpr_i[issue_instr_i.rd] == NONE)) begin
-    //                 issue_ack_o = 1'b1;
-    //             end
-    //             // or check that the target destination register will be written in this cycle by the
-    //             // commit stage
-    //             for (int unsigned i = 0; i < NR_COMMIT_PORTS; i++)
-    //                 if (is_rd_fpr(issue_instr_i.op) ? (we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd)
-    //                                                 : (we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd)) begin
-    //                     issue_ack_o = 1'b1;
-    //                 end
-    //         end
-    //         // we can also issue the instruction under the following two circumstances:
-    //         // we can do this even if we are stalled or no functional unit is ready (as we don't need one)
-    //         // the decoder needs to make sure that the instruction is marked as valid when it does not
-    //         // need any functional unit or if an exception occurred previous to the execute stage.
-    //         // 1. we already got an exception
-    //         if (issue_instr_i.ex.valid) begin
-    //             issue_ack_o = 1'b1;
-    //         end
-    //         // 2. it is an instruction which does not need any functional unit
-    //         if (issue_instr_i.fu == NONE) begin
-    //             issue_ack_o = 1'b1;
-    //         end
-    //     end
-    // end
+    always_comb begin : issue_scoreboard
+        // default assignment
+        issue_ack_o = 1'b0;
+        // check that we didn't stall, that the instruction we got is valid
+        // and that the functional unit we need is not busy
+        if (issue_instr_valid_i) begin
+            // check that the corresponding functional unit is not busy
+            if (!stall && !fu_busy) begin
+                // -----------------------------------------
+                // WAW - Write After Write Dependency Check
+                // -----------------------------------------
+                // no other instruction has the same destination register -> issue the instruction
+                if (is_rd_fpr(issue_instr_i.op) ? (rd_clobber_fpr_i[issue_instr_i.rd] == NONE)
+                                                : (rd_clobber_gpr_i[issue_instr_i.rd] == NONE)) begin
+                    issue_ack_o = 1'b1;
+                end
+                // or check that the target destination register will be written in this cycle by the
+                // commit stage
+                for (int unsigned i = 0; i < NR_COMMIT_PORTS; i++)
+                    if (is_rd_fpr(issue_instr_i.op) ? (we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd)
+                                                    : (we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd)) begin
+                        issue_ack_o = 1'b1;
+                    end
+            end
+            // we can also issue the instruction under the following two circumstances:
+            // we can do this even if we are stalled or no functional unit is ready (as we don't need one)
+            // the decoder needs to make sure that the instruction is marked as valid when it does not
+            // need any functional unit or if an exception occurred previous to the execute stage.
+            // 1. we already got an exception
+            if (issue_instr_i.ex.valid) begin
+                issue_ack_o = 1'b1;
+            end
+            // 2. it is an instruction which does not need any functional unit
+            if (issue_instr_i.fu == NONE) begin
+                issue_ack_o = 1'b1;
+            end
+        end
+    end
 
     // ----------------------
     // Integer Register File
@@ -366,8 +408,8 @@ module issue_read_operands import ariane_pkg::*; #(
         end
     endgenerate
 
-    assign operand_a_regfile = issue_instr_i.use_rs1_fpr ? {{riscv::XLEN-FLEN{1'b0}}, fprdata[0]} : rdata[0];
-    assign operand_b_regfile = issue_instr_i.use_rs2_fpr ? {{riscv::XLEN-FLEN{1'b0}}, fprdata[1]} : rdata[1];
+    assign operand_a_regfile = is_rs1_fpr(issue_instr_i.op) ? {{riscv::XLEN-FLEN{1'b0}}, fprdata[0]} : rdata[0];
+    assign operand_b_regfile = is_rs2_fpr(issue_instr_i.op) ? {{riscv::XLEN-FLEN{1'b0}}, fprdata[1]} : rdata[1];
     assign operand_c_regfile = fprdata[2];
 
     // ----------------------
diff --git a/src/issue_stage.sv b/src/issue_stage.sv
index 0bdde4010404ef0f2d68e2257b356f00bbc666a0..2492b203d7315a599daf98fdfa63f54bc45f6f47 100644
--- a/src/issue_stage.sv
+++ b/src/issue_stage.sv
@@ -74,7 +74,8 @@ module issue_stage import ariane_pkg::*; #(
     input  logic              [NR_COMMIT_PORTS-1:0]  commit_ack_i,
 
     // to verifier
-    output has_mem_access_o
+    output logic has_control_flow_o,
+    output logic has_mem_access_o
 );
     // ---------------------------------------------------
     // Scoreboard (SB) <-> Issue and Read Operands (IRO)
@@ -82,12 +83,15 @@ module issue_stage import ariane_pkg::*; #(
     fu_t  [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_sb_iro;
     fu_t  [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_sb_iro;
 
+    logic [REG_ADDR_SIZE-1:0]  rs1_iro_sb;
     riscv::xlen_t              rs1_sb_iro;
     logic                      rs1_valid_sb_iro;
 
+    logic [REG_ADDR_SIZE-1:0]  rs2_iro_sb;
     riscv::xlen_t              rs2_sb_iro;
     logic                      rs2_valid_iro_sb;
 
+    logic [REG_ADDR_SIZE-1:0]  rs3_iro_sb;
     logic [FLEN-1:0]           rs3_sb_iro;
     logic                      rs3_valid_iro_sb;
 
@@ -127,10 +131,13 @@ module issue_stage import ariane_pkg::*; #(
         .unresolved_branch_i   ( 1'b0                                      ),
         .rd_clobber_gpr_o      ( rd_clobber_gpr_sb_iro                     ),
         .rd_clobber_fpr_o      ( rd_clobber_fpr_sb_iro                     ),
+        .rs1_i                 ( rs1_iro_sb                                ),
         .rs1_o                 ( rs1_sb_iro                                ),
         .rs1_valid_o           ( rs1_valid_sb_iro                          ),
+        .rs2_i                 ( rs2_iro_sb                                ),
         .rs2_o                 ( rs2_sb_iro                                ),
         .rs2_valid_o           ( rs2_valid_iro_sb                          ),
+        .rs3_i                 ( rs3_iro_sb                                ),
         .rs3_o                 ( rs3_sb_iro                                ),
         .rs3_valid_o           ( rs3_valid_iro_sb                          ),
 
@@ -160,10 +167,13 @@ module issue_stage import ariane_pkg::*; #(
         .issue_ack_o         ( issue_ack_iro_sb                ),
         .fu_data_o           ( fu_data_o                       ),
         .flu_ready_i         ( flu_ready_i                     ),
+        .rs1_o               ( rs1_iro_sb                      ),
         .rs1_i               ( rs1_sb_iro                      ),
         .rs1_valid_i         ( rs1_valid_sb_iro                ),
+        .rs2_o               ( rs2_iro_sb                      ),
         .rs2_i               ( rs2_sb_iro                      ),
         .rs2_valid_i         ( rs2_valid_iro_sb                ),
+        .rs3_o               ( rs3_iro_sb                      ),
         .rs3_i               ( rs3_sb_iro                      ),
         .rs3_valid_i         ( rs3_valid_iro_sb                ),
         .rd_clobber_gpr_i    ( rd_clobber_gpr_sb_iro           ),
diff --git a/src/scoreboard.sv b/src/scoreboard.sv
index 9871991048ce8f0fb904bf00cfa5e5a50df6dde1..eb54630651b244a5a0550e5347606ea2e30adab2 100644
--- a/src/scoreboard.sv
+++ b/src/scoreboard.sv
@@ -28,12 +28,15 @@ module scoreboard #(
   output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0]    rd_clobber_fpr_o,
 
   // regfile like interface to operand read stage
+  input  logic [ariane_pkg::REG_ADDR_SIZE-1:0]                  rs1_i,
   output riscv::xlen_t                                          rs1_o,
   output logic                                                  rs1_valid_o,
 
+  input  logic [ariane_pkg::REG_ADDR_SIZE-1:0]                  rs2_i,
   output riscv::xlen_t                                          rs2_o,
   output logic                                                  rs2_valid_o,
 
+  input  logic [ariane_pkg::REG_ADDR_SIZE-1:0]                  rs3_i,
   output logic [ariane_pkg::FLEN-1:0]                           rs3_o,
   output logic                                                  rs3_valid_o,
 
@@ -60,6 +63,7 @@ module scoreboard #(
   input logic [NR_WB_PORTS-1:0]                                 wt_valid_i,  // data in is valid
 
   // to verifier
+  output logic has_control_flow_o,
   output logic has_mem_access_o
 );
   localparam int unsigned BITS_ENTRIES = $clog2(NR_ENTRIES);
@@ -73,7 +77,7 @@ module scoreboard #(
   } mem_q [NR_ENTRIES-1:0], mem_n [NR_ENTRIES-1:0];
 
   logic                    issue_full, issue_en;
-  logic [BITS_ENTRIES:0]   issue_cnt_n,      issue_cnt_q;
+  logic [BITS_ENTRIES-1:0] issue_cnt_n,      issue_cnt_q;
   logic [BITS_ENTRIES-1:0] write_pointer_n,  write_pointer_q;
   logic [BITS_ENTRIES-1:0] issue_pointer_n,  issue_pointer_q;
   logic [NR_COMMIT_PORTS-1:0][BITS_ENTRIES-1:0] commit_pointer_n, commit_pointer_q;
@@ -82,11 +86,11 @@ module scoreboard #(
   logic [NR_ENTRIES-1:0] flushed;
   logic [$clog2(NR_ENTRIES)-1:0] num_flush;
 
-  logic [NR_ENTRIES-1:0] has_mem_access_n, has_mem_access_q;
+  logic [NR_ENTRIES-1:0] is_cf, has_mem_access;
 
   // the issue queue is full don't issue any new instructions
   // works since aligned to power of 2
-  assign issue_full = (issue_cnt_q[BITS_ENTRIES] == 1'b1);
+  assign issue_full = &issue_cnt_q;
 
   assign sb_full_o = issue_full;
 
@@ -98,8 +102,18 @@ module scoreboard #(
     end
   end
 
-  // check instructions in the scoreboard for memory operations
-  assign has_mem_access_o = (|has_mem_access_q);
+  // check instructions in the scoreboard for memory operations and ctrl flow
+  for (genvar i = 0; i < NR_ENTRIES; i++) begin
+      assign is_cf[i] = mem_q[i].issued && ~mem_q[i].sbe.valid &&
+                        mem_q[i].sbe.fu == ariane_pkg::CTRL_FLOW &&
+                        mem_q[i].sbe.op != ariane_pkg::ADD;
+      assign has_mem_access[i] = (mem_q[i].issued && ~mem_q[i].sbe.valid &&
+                                  mem_q[i].sbe.fu inside {ariane_pkg::LOAD, ariane_pkg::STORE}) ||
+                                 mem_q[i].sbe.valid && mem_q[i].sbe.fu == ariane_pkg::STORE;
+  end
+
+  assign has_control_flow_o = |is_cf;
+  assign has_mem_access_o = |has_mem_access;
 
   // maintain a FIFO with issued instructions
   // keep track of all issued instructions
@@ -107,8 +121,6 @@ module scoreboard #(
     // default assignment
     mem_n    = mem_q;
     issue_en = 1'b0;
-    has_mem_access_n = has_mem_access_q;
-    decoded_instr_ack_o = 1'b0;
     flushed = '0;
 
     decoded_instr_ack_o = decoded_instr_valid_i && ~issue_full;
@@ -118,14 +130,11 @@ module scoreboard #(
       // the decoded instruction we put in there is valid (1st bit)
       // increase the issue counter and advance issue pointer
       issue_en = 1'b1;
-      // decoded_instr_ack_o = 1'b1;
       mem_n[write_pointer_q] = {1'b1,
                                 1'b1,                                      // valid bit
-                                decoded_instr_i.use_rd_fpr,
-                                // ariane_pkg::is_rd_fpr(decoded_instr_i.op), // whether rd goes to the fpr
+                                ariane_pkg::is_rd_fpr(decoded_instr_i.op), // whether rd goes to the fpr
                                 decoded_instr_i                            // decoded instruction record
                                 };
-      has_mem_access_n[write_pointer_q] = decoded_instr_i.fu inside {ariane_pkg::LOAD, ariane_pkg::STORE};
     end
 
     issue_instr_o = mem_n[issue_pointer_q];
@@ -143,7 +152,6 @@ module scoreboard #(
           mem_n[i].issued = 1'b0;
           mem_n[i].sbe.valid = 1'b0;
           flushed[i] = 1'b1;
-          has_mem_access_n[i] = 1'b0;
         end
       end
     end else if (mem_q[issue_pointer_q].sbe.fu == ariane_pkg::NONE &&
@@ -173,9 +181,6 @@ module scoreboard #(
         // write the fflags back from the FPU (exception valid is never set), leave tval intact
         else if (mem_q[trans_id_i[i]].sbe.fu inside {ariane_pkg::FPU, ariane_pkg::FPU_VEC})
           mem_n[trans_id_i[i]].sbe.ex.cause = ex_i[i].cause;
-
-        if (mem_n[trans_id_i[i]].sbe.fu != ariane_pkg::STORE)
-          has_mem_access_n[trans_id_i[i]] = 1'b0;
       end
     end
 
@@ -189,7 +194,6 @@ module scoreboard #(
         mem_n[commit_pointer_q[i]].pending   = 1'b0;
         mem_n[commit_pointer_q[i]].issued    = 1'b0;
         mem_n[commit_pointer_q[i]].sbe.valid = 1'b0;
-        has_mem_access_n[commit_pointer_q[i]] = 1'b0;
       end
     end
 
@@ -203,7 +207,6 @@ module scoreboard #(
         mem_n[i].issued       = 1'b0;
         mem_n[i].sbe.valid    = 1'b0;
         mem_n[i].sbe.ex.valid = 1'b0;
-        has_mem_access_n[i]   = 1'b0;
       end
     end
   end
@@ -287,22 +290,17 @@ module scoreboard #(
   logic [NR_ENTRIES+NR_WB_PORTS-1:0][riscv::XLEN-1:0] rs_data;
   logic rs1_valid, rs2_valid;
 
-  logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs1, rs2, rs3;
-  assign rs1 = issue_instr_o.rs1;
-  assign rs2 = issue_instr_o.rs2;
-  assign rs3 = issue_instr_o.result[ariane_pkg::REG_ADDR_SIZE-1:0];  // rs3 is encoded in imm field
-
   // WB ports have higher prio than entries
   for (genvar k = 0; unsigned'(k) < NR_WB_PORTS; k++) begin : gen_rs_wb
-    assign rs1_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs1) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == issue_instr_o.use_rs1_fpr);
-    assign rs2_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs2) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == issue_instr_o.use_rs2_fpr);
-    assign rs3_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs3) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == issue_instr_o.use_imm_fpr);
+    assign rs1_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs1_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == ariane_pkg::is_rs1_fpr(issue_instr_o.op));
+    assign rs2_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs2_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == ariane_pkg::is_rs2_fpr(issue_instr_o.op));
+    assign rs3_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs3_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == ariane_pkg::is_imm_fpr(issue_instr_o.op));
     assign rs_data[k]     = wbdata_i[k];
   end
   for (genvar k = 0; unsigned'(k) < NR_ENTRIES; k++) begin : gen_rs_entries
-    assign rs1_fwd_req[k+NR_WB_PORTS] = (mem_q[k].sbe.rd == rs1) & ~mem_q[k].pending & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == issue_instr_o.use_rs1_fpr);
-    assign rs2_fwd_req[k+NR_WB_PORTS] = (mem_q[k].sbe.rd == rs2) & ~mem_q[k].pending & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == issue_instr_o.use_rs2_fpr);
-    assign rs3_fwd_req[k+NR_WB_PORTS] = (mem_q[k].sbe.rd == rs3) & ~mem_q[k].pending & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == issue_instr_o.use_imm_fpr);
+    assign rs1_fwd_req[k+NR_WB_PORTS] = (mem_q[k].sbe.rd == rs1_i) & ~mem_q[k].pending & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == ariane_pkg::is_rs1_fpr(issue_instr_o.op));
+    assign rs2_fwd_req[k+NR_WB_PORTS] = (mem_q[k].sbe.rd == rs2_i) & ~mem_q[k].pending & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == ariane_pkg::is_rs2_fpr(issue_instr_o.op));
+    assign rs3_fwd_req[k+NR_WB_PORTS] = (mem_q[k].sbe.rd == rs3_i) & ~mem_q[k].pending & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == ariane_pkg::is_imm_fpr(issue_instr_o.op));
     assign rs_data[k+NR_WB_PORTS]     = mem_q[k].sbe.result;
   end
 
@@ -342,8 +340,8 @@ module scoreboard #(
   assign rs3_o = (rs3_valid_o) ? rs_data[rs3_fwd_idx] : '0;
 
   // check whether we are accessing GPR[0], rs3 is only used with the FPR!
-  assign rs1_valid_o = rs1_valid & ((|rs1) | issue_instr_o.use_rs1_fpr);
-  assign rs2_valid_o = rs2_valid & ((|rs2) | issue_instr_o.use_rs2_fpr);
+  assign rs1_valid_o = rs1_valid & ((|rs1_i) | ariane_pkg::is_rs1_fpr(issue_instr_o.op));
+  assign rs2_valid_o = rs2_valid & ((|rs2_i) | ariane_pkg::is_rs2_fpr(issue_instr_o.op));
 
   // sequential process
   always_ff @(posedge clk_i or negedge rst_ni) begin : regs
@@ -353,14 +351,12 @@ module scoreboard #(
       commit_pointer_q <= '0;
       issue_pointer_q  <= '0;
       write_pointer_q  <= '0;
-      has_mem_access_q <= '0;
     end else begin
       issue_cnt_q      <= issue_cnt_n;
       issue_pointer_q  <= issue_pointer_n;
       write_pointer_q  <= write_pointer_n;
       mem_q            <= mem_n;
       commit_pointer_q <= commit_pointer_n;
-      has_mem_access_q <= has_mem_access_n;
     end
   end
 
diff --git a/src/verifier.sv b/src/verifier.sv
index 9c3cb3cf35970eb8779f81cc386f34e3d751a2c1..ebe7f803f5d74531061cb6a627bb1205eb2d4142 100644
--- a/src/verifier.sv
+++ b/src/verifier.sv
@@ -11,12 +11,15 @@ module verifier #(
 
   // Frontend
   input logic                       if_has_mem_access_i,
+  input logic                       if_has_cf_i,
 
   // ID
   input logic                       id_has_mem_access_i,
+  input logic                       id_has_cf_i,
 
   // IS
   input logic                       is_has_mem_access_i,
+  input logic                       is_has_cf_i,
 
   // LSU
   input logic                       no_st_pending_commit_i,
@@ -33,8 +36,9 @@ module verifier #(
   // Bus accesses (I$ misses and memory instructions in the pipeline)
   logic                             has_mem_access;
   assign has_mem_access = if_has_mem_access_i | id_has_mem_access_i | is_has_mem_access_i | (~no_st_pending_commit_i);
+
   // assign should_lock_icache_o = has_mem_access & icache_miss_i;
-  assign should_lock_icache_o = has_mem_access;
+  assign should_lock_icache_o = has_mem_access | if_has_cf_i | id_has_cf_i | is_has_cf_i;
 
   //pragma translate off
   // CO