-
Alban Gruin authored
Signed-off-by:
Alban Gruin <alban.gruin@irit.fr>
Alban Gruin authoredSigned-off-by:
Alban Gruin <alban.gruin@irit.fr>
instr_queue.sv 19.53 KiB
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 26.10.2018sim:/ariane_tb/dut/i_ariane/i_frontend/icache_ex_valid_q
// Description: Instruction Queue, separates instruction front-end from processor
// back-end.
//
// This is an optimized instruction queue which supports the handling of
// compressed instructions (16 bit instructions). Internally it is organized as
// FETCH_ENTRY x 32 bit queues which are filled in a consecutive manner. Two pointers
// point into (`idx_is_q` and `idx_ds_q`) the fill port and the read port. The read port
// is designed so that it will easily allow for multiple issue implementation.
// The input supports arbitrary power of two instruction fetch widths.
//
// The queue supports handling of branch prediction and will take care of
// only saving a valid instruction stream.
//
// Furthermore it contains a replay interface in case the instruction queue
// is already full. As instructions are in general easily replayed this should
// increase the efficiency as I$ misses are potentially hidden. This stands in
// contrast to pessimistic actions (early stalling) or credit based approaches.
// Credit based systems might be difficult to implement with the current system
// as we do not exactly know how much space we are going to need in the fifos
// as each instruction can take either one or two slots.
//
// So the consumed/valid interface degenerates to a `information` interface. If the
// upstream circuits keeps pushing the queue will discard the information
// and start replaying from the point were it could last manage to accept instructions.
//
// The instruction front-end will stop issuing instructions as soon as the
// fifo is full. This will gate the logic if the processor is e.g.: halted
//
// TODO(zarubaf): The instruction queues can be reduced to 16 bit. Potentially
// the replay mechanism gets more complicated as it can be that a 32 bit instruction
// can not be pushed at once.
module instr_queue (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0][31:0] instr_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0] is_cf_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid_i,
output logic ready_o,
output logic [ariane_pkg::INSTR_PER_FETCH-1:0] consumed_o,
// we've encountered an exception, at this point the only possible exceptions are page-table faults
input ariane_pkg::frontend_exception_t exception_i,
input logic [riscv::VLEN-1:0] exception_addr_i,
// branch predict
input logic [riscv::VLEN-1:0] predict_address_i,
input ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type_i,
// replay instruction because one of the FIFO was already full
output logic replay_o,
output logic [riscv::VLEN-1:0] replay_addr_o, // address at which to replay this instruction
// to processor backend
output ariane_pkg::fetch_entry_t fetch_entry_o,
output logic fetch_entry_valid_o,
input logic fetch_entry_ready_i,
output logic has_mem_access_o,
output logic has_cf_o
);
typedef struct packed {
logic [31:0] instr; // instruction word
ariane_pkg::cf_t cf; // branch was taken
logic is_cf; // instruction is a cf that needs a prediction (ie. not a jump)
ariane_pkg::frontend_exception_t ex; // exception happened
logic [riscv::VLEN-1:0] ex_vaddr; // lower VLEN bits of tval for exception
} instr_data_t;
logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] branch_index;
// instruction queues
logic [ariane_pkg::INSTR_PER_FETCH-1:0]
[$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] instr_queue_usage;
instr_data_t [ariane_pkg::INSTR_PER_FETCH-1:0] instr_data_in, instr_data_out;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] push_instr, push_instr_fifo;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] pop_instr;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_full;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_empty;
logic instr_overflow;
// address queue
logic [$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] address_queue_usage;
logic [riscv::VLEN-1:0] address_out;
logic pop_address;
logic push_address;
logic full_address;
logic empty_address;
logic address_overflow;
// input stream counter
logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] idx_is_d, idx_is_q;
// Registers
// output FIFO select, one-hot
logic [ariane_pkg::INSTR_PER_FETCH-1:0] idx_ds_d, idx_ds_q;
logic [riscv::VLEN-1:0] pc_d, pc_q; // current PC
logic reset_address_d, reset_address_q; // we need to re-set the address because of a flush
logic [ariane_pkg::INSTR_PER_FETCH*2-2:0] branch_mask_extended;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] branch_mask;
logic branch_empty;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken;
// shift amount, e.g.: instructions we want to retire
logic [$clog2(ariane_pkg::INSTR_PER_FETCH):0] popcount;
logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] shamt;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid;
logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] consumed_extended;
// FIFO mask
logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] fifo_pos_extended;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] fifo_pos;
logic [ariane_pkg::INSTR_PER_FETCH*2-1:0][31:0] instr;
ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH*2-1:0] cf;
// replay interface
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_overflow_fifo;
// memory access count
logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] input_is_mem;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] input_is_mem_in;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] fifo_output_is_mem, fifo_has_no_mem;
logic output_is_mem;
// cf count
logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] input_is_cf;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] fifo_has_no_cf;
logic output_is_cf;
assign ready_o = ~(|instr_queue_full) & ~full_address;
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_unpack_taken
assign taken[i] = cf_type_i[i] != ariane_pkg::NoCF;
end
// calculate a branch mask, e.g.: get the first taken branch
lzc #(
.WIDTH ( ariane_pkg::INSTR_PER_FETCH ),
.MODE ( 0 ) // count trailing zeros
) i_lzc_branch_index (
.in_i ( taken ), // we want to count trailing zeros
.cnt_o ( branch_index ), // first branch on branch_index
.empty_o ( branch_empty )
);
// the first index is for sure valid
// for example (64 bit fetch):
// taken mask: 0 1 1 0
// leading zero count = 1
// 0 0 0 1, 1 1 1 << 1 = 0 0 1 1, 1 1 0
// take the upper 4 bits: 0 0 1 1
assign branch_mask_extended = {{{ariane_pkg::INSTR_PER_FETCH-1}{1'b0}}, {{ariane_pkg::INSTR_PER_FETCH}{1'b1}}} << branch_index;
assign branch_mask = branch_mask_extended[ariane_pkg::INSTR_PER_FETCH * 2 - 2:ariane_pkg::INSTR_PER_FETCH - 1];
// mask with taken branches to get the actual amount of instructions we want to push
assign valid = valid_i & branch_mask;
// rotate right again
assign consumed_extended = {push_instr_fifo, push_instr_fifo} >> idx_is_q;
assign consumed_o = consumed_extended[ariane_pkg::INSTR_PER_FETCH-1:0];
// count the numbers of valid instructions we've pushed from this package
popcount #(
.INPUT_WIDTH ( ariane_pkg::INSTR_PER_FETCH )
) i_popcount (
.data_i ( push_instr_fifo ),
.popcount_o ( popcount )
);
assign shamt = popcount[$bits(shamt)-1:0];
// save the shift amount for next cycle
assign idx_is_d = idx_is_q + shamt;
// ----------------------
// Input interface
// ----------------------
// rotate left by the current position
assign fifo_pos_extended = { valid, valid } << idx_is_q;
// we just care about the upper bits
assign fifo_pos = fifo_pos_extended[ariane_pkg::INSTR_PER_FETCH*2-1:ariane_pkg::INSTR_PER_FETCH];
// the fifo_position signal can directly be used to guide the push signal of each FIFO
// make sure it is not full
assign push_instr = fifo_pos & ~instr_queue_full;
// ----------------------
// Memory access detector
// ----------------------
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
assign input_is_mem[i] = valid_i[i] &
((instr_i[i][6:0] == riscv::OpcodeLoad) |
(instr_i[i][6:0] == riscv::OpcodeLoadFp) |
(instr_i[i][6:0] == riscv::OpcodeStore) |
(instr_i[i][6:0] == riscv::OpcodeStoreFp) |
(instr_i[i][6:0] == riscv::OpcodeAmo));
assign input_is_mem[i + ariane_pkg::INSTR_PER_FETCH] = input_is_mem[i];
end
assign output_is_mem = fetch_entry_valid_o &
((fetch_entry_o.instruction[6:0] == riscv::OpcodeLoad) |
(fetch_entry_o.instruction[6:0] == riscv::OpcodeLoadFp) |
(fetch_entry_o.instruction[6:0] == riscv::OpcodeStore) |
(fetch_entry_o.instruction[6:0] == riscv::OpcodeStoreFp) |
(fetch_entry_o.instruction[6:0] == riscv::OpcodeAmo));
assign has_mem_access_o = (|input_is_mem[ariane_pkg::INSTR_PER_FETCH-1:0]) | output_is_mem |
~(&fifo_has_no_mem);
// ----------------------
// Memory access detector
// ----------------------
assign output_is_cf = fetch_entry_valid_o &
(fetch_entry_o.instruction[6:0] inside {riscv::OpcodeJalr, riscv::OpcodeBranch});
assign has_cf_o = (|input_is_cf) | output_is_cf |
~(&fifo_has_no_cf);
// duplicate the entries for easier selection e.g.: 3 2 1 0 3 2 1 0
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_duplicate_instr_input
assign instr[i] = instr_i[i];
assign instr[i + ariane_pkg::INSTR_PER_FETCH] = instr_i[i];
assign cf[i] = cf_type_i[i];
assign cf[i + ariane_pkg::INSTR_PER_FETCH] = cf_type_i[i];
assign input_is_cf[i] = is_cf_i[i];
assign input_is_cf[i + ariane_pkg::INSTR_PER_FETCH] = is_cf_i[i];
end
// shift the inputs
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_fifo_input_select
/* verilator lint_off WIDTH */
assign instr_data_in[i].instr = instr[i + idx_is_q];
assign instr_data_in[i].cf = cf[i + idx_is_q];
assign instr_data_in[i].ex = exception_i; // exceptions hold for the whole fetch packet
assign instr_data_in[i].ex_vaddr = exception_addr_i;
assign input_is_mem_in[i] = input_is_mem[i + idx_is_q];
assign instr_data_in[i].is_cf = input_is_cf[i + idx_is_q];
/* verilator lint_on WIDTH */
end
// ----------------------
// Replay Logic
// ----------------------
// We need to replay a instruction fetch iff:
// 1. One of the instruction data FIFOs was full and we needed it
// (e.g.: we pushed and it was full)
// 2. The address/branch predict FIFO was full
// if one of the FIFOs was full we need to replay the faulting instruction
assign instr_overflow_fifo = instr_queue_full & fifo_pos;
assign instr_overflow = |instr_overflow_fifo; // at least one instruction overflowed
assign address_overflow = full_address & push_address;
assign replay_o = instr_overflow | address_overflow;
// select the address, in the case of an address fifo overflow just
// use the base of this package
// if we successfully pushed some instructions we can output the next instruction
// which we didn't manage to push
assign replay_addr_o = (address_overflow) ? addr_i[0] : addr_i[shamt];
// ----------------------
// Downstream interface
// ----------------------
// as long as there is at least one queue which can take the value we have a valid instruction
assign fetch_entry_valid_o = ~(&instr_queue_empty);
always_comb begin
idx_ds_d = idx_ds_q;
pop_instr = '0;
// assemble fetch entry
fetch_entry_o.instruction = '0;
fetch_entry_o.address = pc_q;
fetch_entry_o.ex.valid = 1'b0;
fetch_entry_o.ex.tval = '0;
fetch_entry_o.branch_predict.predict_address = address_out;
fetch_entry_o.branch_predict.cf = ariane_pkg::NoCF;
// output mux select
for (int unsigned i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (idx_ds_q[i]) begin
if (instr_data_out[i].ex == ariane_pkg::FE_INSTR_ACCESS_FAULT) begin
fetch_entry_o.ex.cause = riscv::INSTR_ACCESS_FAULT;
end else begin
fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT;
end
fetch_entry_o.instruction = instr_data_out[i].instr;
fetch_entry_o.ex.valid = instr_data_out[i].ex != ariane_pkg::FE_NONE;
fetch_entry_o.ex.tval = {{64-riscv::VLEN{1'b0}}, instr_data_out[i].ex_vaddr};
fetch_entry_o.branch_predict.cf = instr_data_out[i].cf;
pop_instr[i] = fetch_entry_valid_o & fetch_entry_ready_i;
end
end
// rotate the pointer left
if (fetch_entry_ready_i) begin
idx_ds_d = {idx_ds_q[ariane_pkg::INSTR_PER_FETCH-2:0], idx_ds_q[ariane_pkg::INSTR_PER_FETCH-1]};
end
end
// TODO(zarubaf): This needs to change for dual-issue
// if the handshaking is successful and we had a prediction pop one address entry
assign pop_address = ((fetch_entry_o.branch_predict.cf != ariane_pkg::NoCF) & |pop_instr);
// ----------------------
// Calculate (Next) PC
// ----------------------
always_comb begin
pc_d = pc_q;
reset_address_d = flush_i ? 1'b1 : reset_address_q;
if (fetch_entry_ready_i) begin
// TODO(zarubaf): This needs to change for a dual issue implementation
// advance the PC
pc_d = pc_q + ((fetch_entry_o.instruction[1:0] != 2'b11) ? 'd2 : 'd4);
end
if (pop_address) pc_d = address_out;
// we previously flushed so we need to reset the address
if (valid_i[0] && reset_address_q) begin
// this is the base of the first instruction
pc_d = addr_i[0];
reset_address_d = 1'b0;
end
end
// FIFOs
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_instr_fifo
// Make sure we don't save any instructions if we couldn't save the address
assign push_instr_fifo[i] = push_instr[i] & ~address_overflow;
fifo_v3 #(
.DEPTH ( ariane_pkg::FETCH_FIFO_DEPTH ),
.dtype ( instr_data_t )
) i_fifo_instr_data (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.testmode_i ( 1'b0 ),
.full_o ( instr_queue_full[i] ),
.empty_o ( instr_queue_empty[i] ),
.usage_o ( instr_queue_usage[i] ),
.data_i ( instr_data_in[i] ),
.push_i ( push_instr_fifo[i] ),
.data_o ( instr_data_out[i] ),
.pop_i ( pop_instr[i] )
);
assign fifo_output_is_mem[i] = ((instr_data_out[i].instr[6:0] == riscv::OpcodeLoad) |
(instr_data_out[i].instr[6:0] == riscv::OpcodeLoadFp) |
(instr_data_out[i].instr[6:0] == riscv::OpcodeStore) |
(instr_data_out[i].instr[6:0] == riscv::OpcodeStoreFp) |
(instr_data_out[i].instr[6:0] == riscv::OpcodeAmo));
fifo_v3 #(
.DEPTH ( ariane_pkg::FETCH_FIFO_DEPTH ),
.dtype (logic)
) i_fifo_mem_ops (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_i),
.testmode_i (1'b0),
.full_o (),
.empty_o (fifo_has_no_mem[i]),
.usage_o (),
.data_i (1'b1),
.push_i (push_instr_fifo[i] & input_is_mem_in[i]),
.data_o (),
.pop_i (pop_instr[i] & fifo_output_is_mem[i])
);
fifo_v3 #(
.DEPTH (ariane_pkg::FETCH_FIFO_DEPTH),
.dtype (logic)
) i_fifo_cf (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_i),
.testmode_i (1'b0),
.full_o (),
.empty_o (fifo_has_no_cf[i]),
.usage_o (),
.data_i (1'b1),
.push_i (push_instr_fifo[i] & instr_data_in[i].is_cf),
.data_o (),
.pop_i (pop_instr[i] & instr_data_out[i].is_cf)
);
end
// or reduce and check whether we are retiring a taken branch (might be that the corresponding)
// fifo is full.
always_comb begin
push_address = 1'b0;
// check if we are pushing a ctrl flow change, if so save the address
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
push_address |= push_instr[i] & (instr_data_in[i].cf != ariane_pkg::NoCF);
end
end
fifo_v3 #(
.DEPTH ( ariane_pkg::FETCH_FIFO_DEPTH ), // TODO(zarubaf): Fork out to separate param
.DATA_WIDTH ( riscv::VLEN )
) i_fifo_address (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.testmode_i ( 1'b0 ),
.full_o ( full_address ),
.empty_o ( empty_address ),
.usage_o ( address_queue_usage ),
.data_i ( predict_address_i ),
.push_i ( push_address & ~full_address ),
.data_o ( address_out ),
.pop_i ( pop_address )
);
unread i_unread_address_fifo (.d_i(|{empty_address, address_queue_usage}));
unread i_unread_branch_mask (.d_i(|branch_mask_extended));
unread i_unread_lzc (.d_i(|{branch_empty}));
unread i_unread_fifo_pos (.d_i(|fifo_pos_extended)); // we don't care about the lower signals
unread i_unread_instr_fifo (.d_i(|instr_queue_usage));
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
idx_ds_q <= 'b1;
idx_is_q <= '0;
pc_q <= '0;
reset_address_q <= 1'b1;
end else begin
pc_q <= pc_d;
reset_address_q <= reset_address_d;
if (flush_i) begin
// one-hot encoded
idx_ds_q <= 'b1;
// binary encoded
idx_is_q <= '0;
reset_address_q <= 1'b1;
end else begin
idx_ds_q <= idx_ds_d;
idx_is_q <= idx_is_d;
end
end
end
// pragma translate_off
`ifndef VERILATOR
replay_address_fifo: assert property (
@(posedge clk_i) disable iff (!rst_ni) replay_o |-> !i_fifo_address.push_i
) else $fatal(1,"[instr_queue] Pushing address although replay asserted");
output_select_onehot: assert property (
@(posedge clk_i) $onehot0(idx_ds_q)
) else begin $error("Output select should be one-hot encoded"); $stop(); end
`endif
// pragma translate_on
endmodule