Plumb insn_type through to loadstore1

In preparation for adding a TLB to the dcache, this plumbs the
insn_type from execute1 through to loadstore1, so that we can have
other operations besides loads and stores (e.g. tlbie) going to
loadstore1 and thence to the dcache.  This also plumbs the unit field
of the decode ROM from decode2 through to execute1 to simplify the
logic around which ops need to go to loadstore1.

The load and store data formatting are now not conditional on the
op being OP_LOAD or OP_STORE.  This eliminates the inferred latches
clocked by each of the bits of r.op that we were getting previously.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/166/head
Paul Mackerras 5 years ago
parent 74db071067
commit 167e37d667

@ -58,7 +58,7 @@ icache_tb.o: common.o wishbone_types.o icache.o wishbone_bram_wrapper.o
dcache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o
dcache_tb.o: common.o wishbone_types.o dcache.o wishbone_bram_wrapper.o
insn_helpers.o:
loadstore1.o: common.o helpers.o
loadstore1.o: common.o helpers.o decode_types.o
logical.o: decode_types.o
multiply_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o multiply.o
multiply.o: common.o decode_types.o

@ -118,6 +118,7 @@ package common is

type Decode2ToExecute1Type is record
valid: std_ulogic;
unit : unit_t;
insn_type: insn_type_t;
nia: std_ulogic_vector(63 downto 0);
write_reg: gspr_index_t;
@ -150,7 +151,7 @@ package common is
reserve : std_ulogic; -- set for larx/stcx
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
(valid => '0', unit => NONE, insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
lr => '0', rc => '0', oe => '0', invert_a => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0',
@ -213,7 +214,7 @@ package common is

type Execute1ToLoadstore1Type is record
valid : std_ulogic;
load : std_ulogic; -- is this a load or store
op : insn_type_t; -- what ld/st op to do
addr1 : std_ulogic_vector(63 downto 0);
addr2 : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- data to write, unused for read
@ -228,7 +229,7 @@ package common is
reserve : std_ulogic; -- set for larx/stcx.
rc : std_ulogic; -- set for stcx.
end record;
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', load => '0', ci => '0', byte_reverse => '0',
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
reserve => '0', rc => '0', others => (others => '0'));


@ -304,6 +304,7 @@ begin

-- execute unit
v.e.nia := d_in.nia;
v.e.unit := d_in.decode.unit;
v.e.insn_type := d_in.decode.insn_type;
v.e.read_reg1 := decoded_reg_a.reg;
v.e.read_data1 := decoded_reg_a.data;

@ -464,7 +464,7 @@ begin
ctrl_tmp.srr1(63 - 45) <= '1';
report "privileged instruction";
elsif e_in.valid = '1' then
elsif e_in.valid = '1' and e_in.unit = ALU then

v.e.valid := '1';
v.e.write_reg := e_in.write_reg;
@ -844,11 +844,6 @@ begin
stall_out <= '1';
x_to_divider.valid <= '1';

when OP_LOAD | OP_STORE =>
-- loadstore/dcache has its own port to writeback
v.e.valid := '0';
lv.valid := '1';

when others =>
terminate_out <= '1';
report "illegal";
@ -874,6 +869,14 @@ begin
report "Delayed LR update to " & to_hstring(next_nia);
stall_out <= '1';
end if;

elsif e_in.valid = '1' then
-- instruction for other units, i.e. LDST
v.e.valid := '0';
if e_in.unit = LDST then
lv.valid := '1';
end if;

elsif r.lr_update = '1' then
result_en := '1';
result := r.next_lr;
@ -940,9 +943,7 @@ begin
v.e.write_enable := result_en;

-- Outputs to loadstore1 (async)
if e_in.insn_type = OP_LOAD then
lv.load := '1';
end if;
lv.op := e_in.insn_type;
lv.addr1 := a_in;
lv.addr2 := b_in;
lv.data := c_in;

@ -3,6 +3,7 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.decode_types.all;
use work.common.all;
use work.helpers.all;

@ -41,7 +42,7 @@ architecture behave of loadstore1 is

type reg_stage_t is record
-- latch most of the input request
load : std_ulogic;
load : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
store_data : std_ulogic_vector(63 downto 0);
load_data : std_ulogic_vector(63 downto 0);
@ -146,59 +147,60 @@ begin
two_dwords := or (r.second_bytes);

-- load data formatting
if r.load = '1' then
byte_offset := unsigned(r.addr(2 downto 0));
brev_lenm1 := "000";
if r.byte_reverse = '1' then
brev_lenm1 := unsigned(r.length(2 downto 0)) - 1;
end if;
byte_offset := unsigned(r.addr(2 downto 0));
brev_lenm1 := "000";
if r.byte_reverse = '1' then
brev_lenm1 := unsigned(r.length(2 downto 0)) - 1;
end if;

-- shift and byte-reverse data bytes
for i in 0 to 7 loop
kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
use_second(i) := kk(3);
j := to_integer(kk(2 downto 0)) * 8;
data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
end loop;

-- Work out the sign bit for sign extension.
-- Assumes we are not doing both sign extension and byte reversal,
-- in that for unaligned loads crossing two dwords we end up
-- using a bit from the second dword, whereas for a byte-reversed
-- (i.e. big-endian) load the sign bit would be in the first dword.
negative := (r.length(3) and data_permuted(63)) or
(r.length(2) and data_permuted(31)) or
(r.length(1) and data_permuted(15)) or
(r.length(0) and data_permuted(7));

-- trim and sign-extend
for i in 0 to 7 loop
if i < to_integer(unsigned(r.length)) then
if two_dwords = '1' then
trim_ctl(i) := '1' & not use_second(i);
else
trim_ctl(i) := not use_second(i) & '0';
end if;
-- shift and byte-reverse data bytes
for i in 0 to 7 loop
kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
use_second(i) := kk(3);
j := to_integer(kk(2 downto 0)) * 8;
data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
end loop;

-- Work out the sign bit for sign extension.
-- Assumes we are not doing both sign extension and byte reversal,
-- in that for unaligned loads crossing two dwords we end up
-- using a bit from the second dword, whereas for a byte-reversed
-- (i.e. big-endian) load the sign bit would be in the first dword.
negative := (r.length(3) and data_permuted(63)) or
(r.length(2) and data_permuted(31)) or
(r.length(1) and data_permuted(15)) or
(r.length(0) and data_permuted(7));

-- trim and sign-extend
for i in 0 to 7 loop
if i < to_integer(unsigned(r.length)) then
if two_dwords = '1' then
trim_ctl(i) := '1' & not use_second(i);
else
trim_ctl(i) := '0' & (negative and r.sign_extend);
trim_ctl(i) := not use_second(i) & '0';
end if;
case trim_ctl(i) is
when "11" =>
data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8);
when "10" =>
data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
when "01" =>
data_trimmed(i * 8 + 7 downto i * 8) := x"FF";
when others =>
data_trimmed(i * 8 + 7 downto i * 8) := x"00";
end case;
end loop;
end if;
else
trim_ctl(i) := '0' & (negative and r.sign_extend);
end if;
case trim_ctl(i) is
when "11" =>
data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8);
when "10" =>
data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
when "01" =>
data_trimmed(i * 8 + 7 downto i * 8) := x"FF";
when others =>
data_trimmed(i * 8 + 7 downto i * 8) := x"00";
end case;
end loop;

case r.state is
when IDLE =>
if l_in.valid = '1' then
v.load := l_in.load;
v.load := '0';
if l_in.op = OP_LOAD then
v.load := '1';
end if;
v.addr := lsu_sum;
v.write_reg := l_in.write_reg;
v.length := l_in.length;
@ -229,18 +231,16 @@ begin
v.addr := lsu_sum;

-- Do byte reversing and rotating for stores in the first cycle
if v.load = '0' then
byte_offset := unsigned(lsu_sum(2 downto 0));
brev_lenm1 := "000";
if l_in.byte_reverse = '1' then
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
end if;
for i in 0 to 7 loop
k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset;
j := to_integer(k) * 8;
v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8);
end loop;
byte_offset := unsigned(lsu_sum(2 downto 0));
brev_lenm1 := "000";
if l_in.byte_reverse = '1' then
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
end if;
for i in 0 to 7 loop
k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset;
j := to_integer(k) * 8;
v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8);
end loop;

req := '1';
stall := '1';

Loading…
Cancel
Save