@ -38,17 +38,27 @@ end entity register_file;
architecture behaviour of register_file is
type regfile is array(0 to 63) of std_ulogic_vector(63 downto 0);
signal registers : regfile := (others => (others => '0'));
signal rd_port_b : std_ulogic_vector(63 downto 0);
signal dbg_data : std_ulogic_vector(63 downto 0);
signal dbg_ack : std_ulogic;
signal dbg_gpr_done : std_ulogic;
signal addr_1_reg : gspr_index_t;
signal addr_2_reg : gspr_index_t;
signal addr_3_reg : gspr_index_t;
signal rd_2 : std_ulogic;
signal fwd_1 : std_ulogic;
signal fwd_2 : std_ulogic;
signal fwd_3 : std_ulogic;
signal data_1 : std_ulogic_vector(63 downto 0);
signal data_2 : std_ulogic_vector(63 downto 0);
signal data_3 : std_ulogic_vector(63 downto 0);
signal prev_write_data : std_ulogic_vector(63 downto 0);
begin
-- synchronous writes
-- synchronous reads and writes
register_write_0: process(clk)
variable a_addr, b_addr, c_addr : gspr_index_t;
variable w_addr : gspr_index_t;
variable b_enable : std_ulogic;
begin
if rising_edge(clk) then
if w_in.write_enable = '1' then
@ -66,57 +76,94 @@ begin
a_addr := d1_in.reg_1_addr;
b_addr := d1_in.reg_2_addr;
c_addr := d1_in.reg_3_addr;
if stall = '0' then
b_enable := d1_in.read_2_enable;
if stall = '1' then
a_addr := addr_1_reg;
b_addr := addr_2_reg;
c_addr := addr_3_reg;
b_enable := rd_2;
else
addr_1_reg <= a_addr;
addr_2_reg <= b_addr;
addr_3_reg <= c_addr;
rd_2 <= b_enable;
end if;
fwd_1 <= '0';
fwd_2 <= '0';
fwd_3 <= '0';
if w_in.write_enable = '1' then
if w_addr = a_addr then
fwd_1 <= '1';
end if;
if w_addr = b_addr then
fwd_2 <= '1';
end if;
if w_addr = c_addr then
fwd_3 <= '1';
end if;
end if;
-- Do debug reads to GPRs and FPRs using the B port when it is not in use
if dbg_gpr_req = '1' then
if b_enable = '0' then
b_addr := dbg_gpr_addr(5 downto 0);
dbg_gpr_done <= '1';
end if;
else
dbg_gpr_done <= '0';
end if;
if not HAS_FPU then
-- Make it obvious that we only want 32 GSPRs for a no-FPU implementation
a_addr(5) := '0';
b_addr(5) := '0';
c_addr(5) := '0';
end if;
data_1 <= registers(to_integer(unsigned(a_addr)));
data_2 <= registers(to_integer(unsigned(b_addr)));
data_3 <= registers(to_integer(unsigned(c_addr)));
prev_write_data <= w_in.write_data;
assert (d_in.read1_enable = '0') or (d_in.read1_reg = addr_1_reg) severity failure;
assert (d_in.read2_enable = '0') or (d_in.read2_reg = addr_2_reg) severity failure;
assert (d_in.read3_enable = '0') or (d_in.read3_reg = addr_3_reg) severity failure;
end if;
end process register_write_0;
-- asynchronous reads
-- asynchronous forwarding of write data
register_read_0: process(all)
variable a_addr, b_addr, c_addr : gspr_index_t;
variable w_addr : gspr_index_t;
variable out_data_1 : std_ulogic_vector(63 downto 0);
variable out_data_2 : std_ulogic_vector(63 downto 0);
variable out_data_3 : std_ulogic_vector(63 downto 0);
begin
a_addr := d_in.read1_reg;
b_addr := d_in.read2_reg;
c_addr := d_in.read3_reg;
w_addr := w_in.write_reg;
if not HAS_FPU then
-- Make it obvious that we only want 32 GSPRs for a no-FPU implementation
a_addr(5) := '0';
b_addr(5) := '0';
c_addr(5) := '0';
w_addr(5) := '0';
out_data_1 := data_1;
out_data_2 := data_2;
out_data_3 := data_3;
if fwd_1 = '1' then
out_data_1 := prev_write_data;
end if;
if fwd_2 = '1' then
out_data_2 := prev_write_data;
end if;
if fwd_3 = '1' then
out_data_3 := prev_write_data;
end if;
if d_in.read1_enable = '1' then
report "Reading GPR " & to_hstring(a_addr) & " " & to_hstring(registers(to_integer(unsigned(a_addr))));
report "Reading GPR " & to_hstring(addr_1_reg) & " " & to_hstring(out_data_1);
end if;
if d_in.read2_enable = '1' then
report "Reading GPR " & to_hstring(b_addr) & " " & to_hstring(registers(to_integer(unsigned(b_addr))));
report "Reading GPR " & to_hstring(addr_2_reg) & " " & to_hstring(out_data_2);
end if;
if d_in.read3_enable = '1' then
report "Reading GPR " & to_hstring(c_addr) & " " & to_hstring(registers(to_integer(unsigned(c_addr))));
end if;
d_out.read1_data <= registers(to_integer(unsigned(a_addr)));
-- B read port is multiplexed with reads from the debug circuitry
if d_in.read2_enable = '0' and dbg_gpr_req = '1' and dbg_ack = '0' then
b_addr := dbg_gpr_addr;
if not HAS_FPU then
b_addr(5) := '0';
end if;
report "Reading GPR " & to_hstring(addr_3_reg) & " " & to_hstring(out_data_3);
end if;
rd_port_b <= registers(to_integer(unsigned(b_addr)));
d_out.read2_data <= rd_port_b;
d_out.read3_data <= registers(to_integer(unsigned(c_addr)));
-- Forwarding of written data is now done explicitly with a bypass path
-- from writeback to decode2.
d_out.read1_data <= out_data_1;
d_out.read2_data <= out_data_2;
d_out.read3_data <= out_data_3;
end process register_read_0;
-- Latch read data and ack if dbg read requested and B port not busy
@ -124,8 +171,8 @@ begin
begin
if rising_edge(clk) then
if dbg_gpr_req = '1' then
if d_in.read2_enable = '0' and dbg_ack = '0' then
dbg_data <= rd_port_b;
if dbg_ack = '0' and dbg_gpr_done = '1' then
dbg_data <= data_2;
dbg_ack <= '1';
end if;
else