Merge pull request #118 from antonblanchard/bus-pipeline

Bus pipeline
pull/123/head
Anton Blanchard 4 years ago committed by GitHub
commit 9b1394e236
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -2,8 +2,8 @@ GHDL=ghdl
GHDLFLAGS=--std=08 -Psim-unisim
CFLAGS=-O2 -Wall

all = core_tb simple_ram_behavioural_tb soc_reset_tb icache_tb dcache_tb multiply_tb dmi_dtm_tb divider_tb \
rotator_tb countzero_tb
all = core_tb soc_reset_tb icache_tb dcache_tb multiply_tb dmi_dtm_tb divider_tb \
rotator_tb countzero_tb wishbone_bram_tb

# XXX
# loadstore_tb fetch_tb
@ -35,10 +35,14 @@ helpers.o:
cache_ram.o:
plru.o:
plru_tb.o: plru.o
icache.o: common.o wishbone_types.o plru.o cache_ram.o
icache_tb.o: common.o wishbone_types.o icache.o simple_ram_behavioural.o
dcache.o: common.o wishbone_types.o plru.o cache_ram.o
dcache_tb.o: common.o wishbone_types.o dcache.o simple_ram_behavioural.o
utils.o:
sim_bram.o: sim_bram_helpers.o utils.o
wishbone_bram_wrapper.o: wishbone_types.o sim_bram.o utils.o
wishbone_bram_tb.o: wishbone_bram_wrapper.o
icache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o
icache_tb.o: common.o wishbone_types.o icache.o wishbone_bram_wrapper.o
dcache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o
dcache_tb.o: common.o wishbone_types.o dcache.o wishbone_bram_wrapper.o
insn_helpers.o:
loadstore1.o: common.o helpers.o
logical.o: decode_types.o
@ -51,11 +55,8 @@ register_file.o: common.o
rotator.o: common.o
rotator_tb.o: common.o glibc_random.o ppc_fx_insns.o insn_helpers.o rotator.o
sim_console.o:
simple_ram_behavioural_helpers.o:
simple_ram_behavioural_tb.o: wishbone_types.o simple_ram_behavioural.o
simple_ram_behavioural.o: wishbone_types.o simple_ram_behavioural_helpers.o
sim_uart.o: wishbone_types.o sim_console.o
soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o dmi_dtm_xilinx.o wishbone_debug_master.o
soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o wishbone_bram_wrapper.o dmi_dtm_xilinx.o wishbone_debug_master.o
wishbone_arbiter.o: wishbone_types.o
wishbone_types.o:
writeback.o: common.o crhelpers.o
@ -73,17 +74,17 @@ fpga/soc_reset_tb.o: fpga/soc_reset.o
soc_reset_tb: fpga/soc_reset_tb.o fpga/soc_reset.o
$(GHDL) -e $(GHDLFLAGS) soc_reset_tb

core_tb: core_tb.o simple_ram_behavioural_helpers_c.o sim_console_c.o sim_jtag_socket_c.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o -Wl,sim_console_c.o -Wl,sim_jtag_socket_c.o $@
core_tb: core_tb.o sim_bram_helpers_c.o sim_console_c.o sim_jtag_socket_c.o
$(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o -Wl,sim_console_c.o -Wl,sim_jtag_socket_c.o $@

fetch_tb: fetch_tb.o
$(GHDL) -e $(GHDLFLAGS) $@

icache_tb: icache_tb.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@
$(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@

dcache_tb: dcache_tb.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@
$(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@

plru_tb: plru_tb.o
$(GHDL) -e $(GHDLFLAGS) $@
@ -106,11 +107,11 @@ countzero_tb: countzero_tb.o
simple_ram_tb: simple_ram_tb.o
$(GHDL) -e $(GHDLFLAGS) $@

simple_ram_behavioural_tb: simple_ram_behavioural_helpers_c.o simple_ram_behavioural_tb.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@
wishbone_bram_tb: sim_bram_helpers_c.o wishbone_bram_tb.o
$(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@

dmi_dtm_tb: dmi_dtm_tb.o simple_ram_behavioural_helpers_c.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@
dmi_dtm_tb: dmi_dtm_tb.o sim_bram_helpers_c.o
$(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@

tests = $(sort $(patsubst tests/%.out,%,$(wildcard tests/*.out)))


@ -39,7 +39,7 @@ make
- Link in the micropython image:

```
ln -s ../micropython/ports/powerpc/build/firmware.bin simple_ram_behavioural.bin
ln -s ../micropython/ports/powerpc/build/firmware.bin main_ram.bin
```

- Now run microwatt, sending debug output to /dev/null:

@ -8,7 +8,8 @@ use work.wishbone_types.all;

entity core is
generic (
SIM : boolean := false
SIM : boolean := false;
DISABLE_FLATTEN : boolean := false
);
port (
clk : in std_logic;
@ -93,6 +94,29 @@ architecture behave of core is
-- Debug status
signal dbg_core_is_stopped: std_ulogic;

function keep_h(disable : boolean) return string is
begin
if disable then
return "yes";
else
return "no";
end if;
end function;
attribute keep_hierarchy : string;
attribute keep_hierarchy of fetch1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of icache_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of fetch2_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of decode1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of decode2_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of multiply_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of divider_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of writeback_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of debug_0 : label is keep_h(DISABLE_FLATTEN);
begin

core_rst <= dbg_core_rst or rst;

@ -20,7 +20,7 @@ begin
generic map(
SIM => true,
MEMORY_SIZE => 524288,
RAM_INIT_FILE => "simple_ram_behavioural.bin",
RAM_INIT_FILE => "main_ram.bin",
RESET_LOW => false
)
port map(

@ -16,6 +16,7 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.utils.all;
use work.common.all;
use work.helpers.all;
use work.wishbone_types.all;
@ -44,26 +45,6 @@ entity dcache is
end entity dcache;

architecture rtl of dcache is
function log2(i : natural) return integer is
variable tmp : integer := i;
variable ret : integer := 0;
begin
while tmp > 1 loop
ret := ret + 1;
tmp := tmp / 2;
end loop;
return ret;
end function;

function ispow2(i : integer) return boolean is
begin
if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then
return true;
else
return false;
end if;
end function;

-- BRAM organisation: We never access more than wishbone_data_bits at
-- a time so to save resources we make the array only that wide, and
-- use consecutive indices for to make a cache "line"
@ -187,6 +168,7 @@ architecture rtl of dcache is
state : state_t;
wb : wishbone_master_out;
store_way : way_t;
store_row : row_t;
store_index : index_t;
end record;

@ -213,6 +195,7 @@ architecture rtl of dcache is
signal req_hit_way : way_t;
signal req_tag : cache_tag_t;
signal req_op : op_t;
signal req_laddr : std_ulogic_vector(63 downto 0);

-- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t;
@ -244,12 +227,21 @@ architecture rtl of dcache is
end;

-- Returns whether this is the last row of a line
function is_last_row(addr: wishbone_addr_type) return boolean is
function is_last_row_addr(addr: wishbone_addr_type) return boolean is
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
begin
return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones;
end;

-- Returns whether this is the last row of a line
function is_last_row(row: row_t) return boolean is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
begin
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
return row_v(ROW_LINEBITS-1 downto 0) = ones;
end;

-- Return the address of the next row in the current cache line
function next_row_addr(addr: wishbone_addr_type) return std_ulogic_vector is
variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
@ -263,6 +255,21 @@ architecture rtl of dcache is
return result;
end;

-- Return the next row in the current cache line. We use a dedicated
-- function in order to limit the size of the generated adder to be
-- only the bits within a cache line (3 bits with default settings)
--
function next_row(row: row_t) return row_t is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
begin
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
row_idx := row_v(ROW_LINEBITS-1 downto 0);
row_v(ROW_LINEBITS-1 downto 0) := std_ulogic_vector(unsigned(row_idx) + 1);
return to_integer(unsigned(row_v));
end;

-- Get the tag value from the address
function get_tag(addr: std_ulogic_vector(63 downto 0)) return cache_tag_t is
begin
@ -381,6 +388,12 @@ begin
req_row <= get_row(d_in.addr);
req_tag <= get_tag(d_in.addr);

-- Calculate address of beginning of cache line, will be
-- used for cache miss processing if needed
--
req_laddr <= d_in.addr(63 downto LINE_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0');

-- Test if pending request is a hit on any way
hit_way := 0;
is_hit := '0';
@ -573,7 +586,8 @@ begin
wr_data => wr_data
);
process(all)
variable tmp_adr : std_ulogic_vector(63 downto 0);
variable tmp_adr : std_ulogic_vector(63 downto 0);
variable reloading : boolean;
begin
-- Cache hit reads
do_read <= '1';
@ -596,17 +610,17 @@ begin
-- Otherwise, we might be doing a reload
wr_data <= wishbone_in.dat;
wr_sel <= (others => '1');
tmp_adr := (r1.wb.adr'left downto 0 => r1.wb.adr, others => '0');
wr_addr <= std_ulogic_vector(to_unsigned(get_row(tmp_adr), ROW_BITS));
wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS));
end if;

-- The two actual write cases here
do_write <= '0';
if r1.state = RELOAD_WAIT_ACK and wishbone_in.ack = '1' and r1.store_way = i then
reloading := r1.state = RELOAD_WAIT_ACK;
if reloading and wishbone_in.ack = '1' and r1.store_way = i then
do_write <= '1';
end if;
if req_op = OP_STORE_HIT and req_hit_way = i then
assert r1.state /= RELOAD_WAIT_ACK report "Store hit while in state:" &
assert not reloading report "Store hit while in state:" &
state_t'image(r1.state)
severity FAILURE;
do_write <= '1';
@ -637,7 +651,7 @@ begin
-- single issue on load/stores so we are fine, later, we can generate
-- a stall output if necessary).

if d_in.valid = '1' then
if req_op /= OP_NONE then
r1.req <= d_in;

report "op:" & op_t'image(req_op) &
@ -672,7 +686,8 @@ begin
-- operates at stage 1.
--
dcache_slow : process(clk)
variable tagset : cache_tags_set_t;
variable tagset : cache_tags_set_t;
variable stbs_done : boolean;
begin
if rising_edge(clk) then
-- On reset, clear all valid bits to force misses
@ -731,16 +746,18 @@ begin
-- Keep track of our index and way for subsequent stores.
r1.store_index <= req_index;
r1.store_way <= replace_way;
r1.store_row <= get_row(req_laddr);

-- Prep for first wishbone read. We calculate the address of
-- the start of the cache line
-- the start of the cache line and start the WB cycle
--
r1.wb.adr <= d_in.addr(r1.wb.adr'left downto LINE_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0');
r1.wb.adr <= req_laddr(r1.wb.adr'left downto 0);
r1.wb.sel <= (others => '1');
r1.wb.we <= '0';
r1.wb.cyc <= '1';
r1.wb.stb <= '1';

-- Track that we had one request sent
r1.state <= RELOAD_WAIT_ACK;

when OP_LOAD_NC =>
@ -770,6 +787,25 @@ begin
end case;

when RELOAD_WAIT_ACK =>
-- Requests are all sent if stb is 0
stbs_done := r1.wb.stb = '0';

-- If we are still sending requests, was one accepted ?
if wishbone_in.stall = '0' and not stbs_done then
-- That was the last word ? We are done sending. Clear
-- stb and set stbs_done so we can handle an eventual last
-- ack on the same cycle.
--
if is_last_row_addr(r1.wb.adr) then
r1.wb.stb <= '0';
stbs_done := true;
end if;

-- Calculate the next row address
r1.wb.adr <= next_row_addr(r1.wb.adr);
end if;

-- Incoming acks processing
if wishbone_in.ack = '1' then
-- Is this the data we were looking for ? Latch it so
-- we can respond later. We don't currently complete the
@ -779,16 +815,17 @@ begin
-- not idle, which we don't currently know how to deal
-- with.
--
if r1.wb.adr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) =
r1.req.addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) then
if r1.store_row = get_row(r1.req.addr) then
r1.slow_data <= wishbone_in.dat;
end if;

-- That was the last word ? We are done
if is_last_row(r1.wb.adr) then
cache_valids(r1.store_index)(r1.store_way) <= '1';
-- Check for completion
if stbs_done and is_last_row(r1.store_row) then
-- Complete wishbone cycle
r1.wb.cyc <= '0';
r1.wb.stb <= '0';

-- Cache line is now valid
cache_valids(r1.store_index)(r1.store_way) <= '1';

-- Complete the load that missed. For load with update
-- we also need to do the deferred update cycle.
@ -801,10 +838,10 @@ begin
r1.state <= IDLE;
report "completing miss !";
end if;
else
-- Otherwise, calculate the next row address
r1.wb.adr <= next_row_addr(r1.wb.adr);
end if;

-- Increment store row counter
r1.store_row <= next_row(r1.store_row);
end if;

when LOAD_UPDATE =>
@ -816,7 +853,13 @@ begin
r1.state <= IDLE;

when STORE_WAIT_ACK | NC_LOAD_WAIT_ACK =>
if wishbone_in.ack = '1' then
-- Clear stb when slave accepted request
if wishbone_in.stall = '0' then
r1.wb.stb <= '0';
end if;

-- Got ack ? complete.
if wishbone_in.ack = '1' then
if r1.state = NC_LOAD_WAIT_ACK then
r1.slow_data <= wishbone_in.dat;
end if;

@ -35,9 +35,9 @@ begin
);

-- BRAM Memory slave
bram0: entity work.mw_soc_memory
bram0: entity work.wishbone_bram_wrapper
generic map(
MEMORY_SIZE => 128,
MEMORY_SIZE => 1024,
RAM_INIT_FILE => "icache_test.bin"
)
port map(
@ -121,7 +121,6 @@ begin
d_in.valid <= '1';
wait until rising_edge(clk);
d_in.valid <= '0';

wait until rising_edge(clk) and d_out.write_enable = '1';
assert d_out.valid = '1';
assert d_out.write_data = x"0000004100000040"
@ -130,7 +129,10 @@ begin
" expected 0000004100000040"
severity failure;

wait for clk_period*4;
wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);

assert false report "end of test" severity failure;
wait;

@ -2,92 +2,93 @@ library ieee;
use ieee.std_logic_1164.all;

package decode_types is
type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB,
OP_CNTZ, OP_CRAND,
OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC,
OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIV, OP_EXTS,
OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF,
OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD,
OP_MTCRF, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR,
OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD,
OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
OP_SHL, OP_SHR,
OP_SYNC, OP_TD, OP_TDI, OP_TW,
OP_TWI, OP_XOR, OP_SIM_CONFIG);

type input_reg_a_t is (NONE, RA, RA_OR_ZERO);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32);
type input_reg_c_t is (NONE, RS);
type output_reg_a_t is (NONE, RT, RA);
type rc_t is (NONE, ONE, RC);
type carry_in_t is (ZERO, CA, ONE);

constant SH_OFFSET : integer := 0;
constant MB_OFFSET : integer := 1;
constant ME_OFFSET : integer := 1;
constant SH32_OFFSET : integer := 0;
constant MB32_OFFSET : integer := 1;
constant ME32_OFFSET : integer := 2;

constant FXM_OFFSET : integer := 0;

constant BO_OFFSET : integer := 0;
constant BI_OFFSET : integer := 1;
constant BH_OFFSET : integer := 2;

constant BF_OFFSET : integer := 0;
constant L_OFFSET : integer := 1;

constant TOO_OFFSET : integer := 0;

type unit_t is (NONE, ALU, LDST, MUL, DIV);
type length_t is (NONE, is1B, is2B, is4B, is8B);

type decode_rom_t is record
unit : unit_t;
insn_type : insn_type_t;
input_reg_a : input_reg_a_t;
input_reg_b : input_reg_b_t;
input_reg_c : input_reg_c_t;
output_reg_a : output_reg_a_t;

input_cr : std_ulogic;
output_cr : std_ulogic;

invert_a : std_ulogic;
invert_out : std_ulogic;
input_carry : carry_in_t;
output_carry : std_ulogic;

-- load/store signals
length : length_t;
byte_reverse : std_ulogic;
sign_extend : std_ulogic;
update : std_ulogic;
reserve : std_ulogic;

-- multiplier and ALU signals
is_32bit : std_ulogic;
is_signed : std_ulogic;

rc : rc_t;
lr : std_ulogic;

sgl_pipe : std_ulogic;
end record;
constant decode_rom_init : decode_rom_t := (unit => NONE,
insn_type => OP_ILLEGAL, input_reg_a => NONE,
input_reg_b => NONE, input_reg_c => NONE,
output_reg_a => NONE, input_cr => '0', output_cr => '0',
invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0',
length => NONE, byte_reverse => '0', sign_extend => '0',
update => '0', reserve => '0', is_32bit => '0',
is_signed => '0', rc => NONE, lr => '0', sgl_pipe => '0');
type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB,
OP_CNTZ, OP_CRAND,
OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC,
OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIV, OP_EXTS,
OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF,
OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD,
OP_MTCRF, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR,
OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD,
OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
OP_SHL, OP_SHR,
OP_SYNC, OP_TD, OP_TDI, OP_TW,
OP_TWI, OP_XOR, OP_SIM_CONFIG
);

type input_reg_a_t is (NONE, RA, RA_OR_ZERO);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32);
type input_reg_c_t is (NONE, RS);
type output_reg_a_t is (NONE, RT, RA);
type rc_t is (NONE, ONE, RC);
type carry_in_t is (ZERO, CA, ONE);

constant SH_OFFSET : integer := 0;
constant MB_OFFSET : integer := 1;
constant ME_OFFSET : integer := 1;
constant SH32_OFFSET : integer := 0;
constant MB32_OFFSET : integer := 1;
constant ME32_OFFSET : integer := 2;

constant FXM_OFFSET : integer := 0;

constant BO_OFFSET : integer := 0;
constant BI_OFFSET : integer := 1;
constant BH_OFFSET : integer := 2;

constant BF_OFFSET : integer := 0;
constant L_OFFSET : integer := 1;

constant TOO_OFFSET : integer := 0;

type unit_t is (NONE, ALU, LDST, MUL, DIV);
type length_t is (NONE, is1B, is2B, is4B, is8B);

type decode_rom_t is record
unit : unit_t;
insn_type : insn_type_t;
input_reg_a : input_reg_a_t;
input_reg_b : input_reg_b_t;
input_reg_c : input_reg_c_t;
output_reg_a : output_reg_a_t;

input_cr : std_ulogic;
output_cr : std_ulogic;

invert_a : std_ulogic;
invert_out : std_ulogic;
input_carry : carry_in_t;
output_carry : std_ulogic;

-- load/store signals
length : length_t;
byte_reverse : std_ulogic;
sign_extend : std_ulogic;
update : std_ulogic;
reserve : std_ulogic;

-- multiplier and ALU signals
is_32bit : std_ulogic;
is_signed : std_ulogic;

rc : rc_t;
lr : std_ulogic;

sgl_pipe : std_ulogic;
end record;
constant decode_rom_init : decode_rom_t := (unit => NONE,
insn_type => OP_ILLEGAL, input_reg_a => NONE,
input_reg_b => NONE, input_reg_c => NONE,
output_reg_a => NONE, input_cr => '0', output_cr => '0',
invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0',
length => NONE, byte_reverse => '0', sign_extend => '0',
update => '0', reserve => '0', is_32bit => '0',
is_signed => '0', rc => NONE, lr => '0', sgl_pipe => '0');

end decode_types;


@ -50,8 +50,8 @@ begin
dmi_ack => dmi_ack
);

simple_ram_0: entity work.mw_soc_memory
generic map(RAM_INIT_FILE => "simple_ram_behavioural.bin",
simple_ram_0: entity work.wishbone_bram_wrapper
generic map(RAM_INIT_FILE => "main_ram.bin",
MEMORY_SIZE => 524288)
port map(clk => clk, rst => rst,
wishbone_in => wishbone_ram_out,

@ -0,0 +1,83 @@
-- Single port Block RAM with one cycle output buffer

library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_unsigned.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;

entity main_bram is
generic(
WIDTH : natural := 64;
HEIGHT_BITS : natural := 1024;
MEMORY_SIZE : natural := 65536;
RAM_INIT_FILE : string
);
port(
clk : in std_logic;
addr : in std_logic_vector(HEIGHT_BITS - 1 downto 0) ;
di : in std_logic_vector(WIDTH-1 downto 0);
do : out std_logic_vector(WIDTH-1 downto 0);
sel : in std_logic_vector((WIDTH/8)-1 downto 0);
re : in std_ulogic;
we : in std_ulogic
);
end entity main_bram;

architecture behaviour of main_bram is

constant WIDTH_BYTES : natural := WIDTH / 8;

-- RAM type definition
type ram_t is array(0 to (MEMORY_SIZE / WIDTH_BYTES) - 1) of std_logic_vector(WIDTH-1 downto 0);

-- RAM loading
impure function init_ram(name : STRING) return ram_t is
file ram_file : text open read_mode is name;
variable ram_line : line;
variable temp_word : std_logic_vector(WIDTH-1 downto 0);
variable temp_ram : ram_t := (others => (others => '0'));
begin
for i in 0 to (MEMORY_SIZE / WIDTH_BYTES) - 1 loop
exit when endfile(ram_file);
readline(ram_file, ram_line);
hread(ram_line, temp_word);
temp_ram(i) := temp_word;
end loop;

return temp_ram;
end function;

-- RAM instance
signal memory : ram_t := init_ram(RAM_INIT_FILE);
attribute ram_style : string;
attribute ram_style of memory : signal is "block";
attribute ram_decomp : string;
attribute ram_decomp of memory : signal is "power";

-- Others
signal obuf : std_logic_vector(WIDTH-1 downto 0);
begin

-- Actual RAM template
memory_0: process(clk)
begin
if rising_edge(clk) then
if we = '1' then
for i in 0 to 7 loop
if sel(i) = '1' then
memory(conv_integer(addr))((i + 1) * 8 - 1 downto i * 8) <=
di((i + 1) * 8 - 1 downto i * 8);
end if;
end loop;
end if;
if re = '1' then
obuf <= memory(conv_integer(addr));
end if;
do <= obuf;
end if;
end process;

end architecture behaviour;

@ -1,106 +0,0 @@
-- Based on:
-- The Potato Processor - A simple processor for FPGAs
-- (c) Kristian Klomsten Skordal 2014 - 2015 <kristian.skordal@wafflemail.net>

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;
use work.wishbone_types.all;

use work.pp_utilities.all;

--! @brief Simple memory module for use in Wishbone-based systems.
entity mw_soc_memory is
generic(
MEMORY_SIZE : natural := 4096; --! Memory size in bytes.
RAM_INIT_FILE : string
);
port(
clk : in std_logic;
rst : in std_logic;

-- Wishbone interface:
wishbone_in : in wishbone_master_out;
wishbone_out : out wishbone_slave_out
);
end entity mw_soc_memory;

architecture behaviour of mw_soc_memory is
signal wb_adr_in : std_logic_vector(log2(MEMORY_SIZE) - 1 downto 0);
type ram_t is array(0 to (MEMORY_SIZE / 8) - 1) of std_logic_vector(63 downto 0);

impure function init_ram(name : STRING) return ram_t is
file ram_file : text open read_mode is name;
variable ram_line : line;
variable temp_word : std_logic_vector(63 downto 0);
variable temp_ram : ram_t := (others => (others => '0'));
begin
for i in 0 to (MEMORY_SIZE/8)-1 loop
exit when endfile(ram_file);
readline(ram_file, ram_line);
hread(ram_line, temp_word);
temp_ram(i) := temp_word;
end loop;

return temp_ram;
end function;

signal memory : ram_t := init_ram(RAM_INIT_FILE);

attribute ram_style : string;
attribute ram_style of memory : signal is "block";

attribute ram_decomp : string;
attribute ram_decomp of memory : signal is "power";

type state_type is (IDLE, ACK);
signal state : state_type;

signal read_ack : std_logic;

begin

wb_adr_in <= wishbone_in.adr(log2(MEMORY_SIZE) - 1 downto 0);

wishbone_out.ack <= read_ack and wishbone_in.stb;

memory_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
read_ack <= '0';
state <= IDLE;
else
if wishbone_in.cyc = '1' then
case state is
when IDLE =>
if wishbone_in.stb = '1' and wishbone_in.we = '1' then
for i in 0 to 7 loop
if wishbone_in.sel(i) = '1' then
memory(to_integer(unsigned(wb_adr_in(wb_adr_in'left downto 3))))(((i + 1) * 8) - 1 downto i * 8)
<= wishbone_in.dat(((i + 1) * 8) - 1 downto i * 8);
end if;
end loop;
read_ack <= '1';
state <= ACK;
elsif wishbone_in.stb = '1' then
wishbone_out.dat <= memory(to_integer(unsigned(wb_adr_in(wb_adr_in'left downto 3))));
read_ack <= '1';
state <= ACK;
end if;
when ACK =>
read_ack <= '0';
state <= IDLE;
end case;
else
state <= IDLE;
read_ack <= '0';
end if;
end if;
end if;
end process;

end architecture behaviour;

@ -34,351 +34,353 @@ use ieee.numeric_std.all;
--! - Bit 0: data received (receive buffer not empty)
--! - Bit 1: ready to send data (transmit buffer empty)
entity pp_soc_uart is
generic(
FIFO_DEPTH : natural := 64 --! Depth of the input and output FIFOs.
generic(
FIFO_DEPTH : natural := 64 --! Depth of the input and output FIFOs.
);
port(
clk : in std_logic;
reset : in std_logic;

-- UART ports:
txd : out std_logic;
rxd : in std_logic;

-- Interrupt signal:
irq : out std_logic;

-- Wishbone ports:
wb_adr_in : in std_logic_vector(11 downto 0);
wb_dat_in : in std_logic_vector( 7 downto 0);
wb_dat_out : out std_logic_vector( 7 downto 0);
wb_we_in : in std_logic;
wb_cyc_in : in std_logic;
wb_stb_in : in std_logic;
wb_ack_out : out std_logic
port(
clk : in std_logic;
reset : in std_logic;

-- UART ports:
txd : out std_logic;
rxd : in std_logic;

-- Interrupt signal:
irq : out std_logic;

-- Wishbone ports:
wb_adr_in : in std_logic_vector(11 downto 0);
wb_dat_in : in std_logic_vector( 7 downto 0);
wb_dat_out : out std_logic_vector( 7 downto 0);
wb_we_in : in std_logic;
wb_cyc_in : in std_logic;
wb_stb_in : in std_logic;
wb_ack_out : out std_logic
);
end entity pp_soc_uart;

architecture behaviour of pp_soc_uart is

subtype bitnumber is natural range 0 to 7; --! Type representing the index of a bit.
subtype bitnumber is natural range 0 to 7; --! Type representing the index of a bit.

-- UART sample clock signals:
signal sample_clk : std_logic;
signal sample_clk_divisor : std_logic_vector(7 downto 0);
signal sample_clk_counter : std_logic_vector(sample_clk_divisor'range);
-- UART sample clock signals:
signal sample_clk : std_logic;
signal sample_clk_divisor : std_logic_vector(7 downto 0);
signal sample_clk_counter : std_logic_vector(sample_clk_divisor'range);

-- UART receive process signals:
type rx_state_type is (IDLE, RECEIVE, STARTBIT, STOPBIT);
signal rx_state : rx_state_type;
signal rx_byte : std_logic_vector(7 downto 0);
signal rx_current_bit : bitnumber;
-- UART receive process signals:
type rx_state_type is (IDLE, RECEIVE, STARTBIT, STOPBIT);
signal rx_state : rx_state_type;
signal rx_byte : std_logic_vector(7 downto 0);
signal rx_current_bit : bitnumber;

subtype rx_sample_counter_type is natural range 0 to 15;
signal rx_sample_counter : rx_sample_counter_type;
signal rx_sample_value : rx_sample_counter_type;
subtype rx_sample_counter_type is natural range 0 to 15;
signal rx_sample_counter : rx_sample_counter_type;
signal rx_sample_value : rx_sample_counter_type;

subtype rx_sample_delay_type is natural range 0 to 7;
signal rx_sample_delay : rx_sample_delay_type;
subtype rx_sample_delay_type is natural range 0 to 7;
signal rx_sample_delay : rx_sample_delay_type;

-- UART transmit process signals:
type tx_state_type is (IDLE, TRANSMIT, STOPBIT);
signal tx_state : tx_state_type;
signal tx_byte : std_logic_vector(7 downto 0);
signal tx_current_bit : bitnumber;
-- UART transmit process signals:
type tx_state_type is (IDLE, TRANSMIT, STOPBIT);
signal tx_state : tx_state_type;
signal tx_byte : std_logic_vector(7 downto 0);
signal tx_current_bit : bitnumber;

-- UART transmit clock:
subtype uart_tx_counter_type is natural range 0 to 15;
signal uart_tx_counter : uart_tx_counter_type := 0;
signal uart_tx_clk : std_logic;
-- UART transmit clock:
subtype uart_tx_counter_type is natural range 0 to 15;
signal uart_tx_counter : uart_tx_counter_type := 0;
signal uart_tx_clk : std_logic;

-- Buffer signals:
signal send_buffer_full, send_buffer_empty : std_logic;
signal recv_buffer_full, recv_buffer_empty : std_logic;
signal send_buffer_input, send_buffer_output : std_logic_vector(7 downto 0);
signal recv_buffer_input, recv_buffer_output : std_logic_vector(7 downto 0);
signal send_buffer_push, send_buffer_pop : std_logic := '0';
signal recv_buffer_push, recv_buffer_pop : std_logic := '0';
-- Buffer signals:
signal send_buffer_full, send_buffer_empty : std_logic;
signal recv_buffer_full, recv_buffer_empty : std_logic;
signal send_buffer_input, send_buffer_output : std_logic_vector(7 downto 0);
signal recv_buffer_input, recv_buffer_output : std_logic_vector(7 downto 0);
signal send_buffer_push, send_buffer_pop : std_logic := '0';
signal recv_buffer_push, recv_buffer_pop : std_logic := '0';

-- IRQ enable signals:
signal irq_recv_enable, irq_tx_ready_enable : std_logic := '0';
-- IRQ enable signals:
signal irq_recv_enable, irq_tx_ready_enable : std_logic := '0';

-- Wishbone signals:
type wb_state_type is (IDLE, WRITE_ACK, READ_ACK);
signal wb_state : wb_state_type;
-- Wishbone signals:
type wb_state_type is (IDLE, WRITE_ACK, READ_ACK);
signal wb_state : wb_state_type;

signal wb_ack : std_logic; --! Wishbone acknowledge signal
signal wb_ack : std_logic; --! Wishbone acknowledge signal

begin

irq <= (irq_recv_enable and (not recv_buffer_empty))
or (irq_tx_ready_enable and send_buffer_empty);

---------- UART receive ----------

recv_buffer_input <= rx_byte;

uart_receive: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
rx_state <= IDLE;
recv_buffer_push <= '0';
irq <= (irq_recv_enable and (not recv_buffer_empty))
or (irq_tx_ready_enable and send_buffer_empty);

---------- UART receive ----------

recv_buffer_input <= rx_byte;

uart_receive: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
rx_state <= IDLE;
recv_buffer_push <= '0';
else
case rx_state is
when IDLE =>
if recv_buffer_push = '1' then
recv_buffer_push <= '0';
end if;

if sample_clk = '1' and rxd = '0' then
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
rx_current_bit <= 0;
rx_state <= STARTBIT;
end if;
when STARTBIT =>
if sample_clk = '1' then
if rx_sample_delay = 7 then
rx_state <= RECEIVE;
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
else
case rx_state is
when IDLE =>
if recv_buffer_push = '1' then
recv_buffer_push <= '0';
end if;

if sample_clk = '1' and rxd = '0' then
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
rx_current_bit <= 0;
rx_state <= STARTBIT;
end if;
when STARTBIT =>
if sample_clk = '1' then
if rx_sample_delay = 7 then
rx_state <= RECEIVE;
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
else
rx_sample_delay <= rx_sample_delay + 1;
end if;
end if;
when RECEIVE =>
if sample_clk = '1' and rx_sample_counter = rx_sample_value then
if rx_current_bit /= 7 then
rx_byte(rx_current_bit) <= rxd;
rx_current_bit <= rx_current_bit + 1;
else
rx_byte(rx_current_bit) <= rxd;
rx_state <= STOPBIT;
end if;
end if;
when STOPBIT =>
if sample_clk = '1' and rx_sample_counter = rx_sample_value then
rx_state <= IDLE;

if recv_buffer_full = '0' then
recv_buffer_push <= '1';
end if;
end if;
end case;
rx_sample_delay <= rx_sample_delay + 1;
end if;
end if;
end process uart_receive;

sample_counter: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
rx_sample_counter <= 0;
elsif sample_clk = '1' then
if rx_sample_counter = 15 then
rx_sample_counter <= 0;
else
rx_sample_counter <= rx_sample_counter + 1;
end if;
end if;
when RECEIVE =>
if sample_clk = '1' and rx_sample_counter = rx_sample_value then
if rx_current_bit /= 7 then
rx_byte(rx_current_bit) <= rxd;
rx_current_bit <= rx_current_bit + 1;
else
rx_byte(rx_current_bit) <= rxd;
rx_state <= STOPBIT;
end if;
end if;
end process sample_counter;

---------- UART transmit ----------
end if;
when STOPBIT =>
if sample_clk = '1' and rx_sample_counter = rx_sample_value then
rx_state <= IDLE;

tx_byte <= send_buffer_output;

uart_transmit: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
txd <= '1';
tx_state <= IDLE;
send_buffer_pop <= '0';
tx_current_bit <= 0;
else
case tx_state is
when IDLE =>
if send_buffer_empty = '0' and uart_tx_clk = '1' then
txd <= '0';
send_buffer_pop <= '1';
tx_current_bit <= 0;
tx_state <= TRANSMIT;
elsif uart_tx_clk = '1' then
txd <= '1';
end if;
when TRANSMIT =>
if send_buffer_pop = '1' then
send_buffer_pop <= '0';
elsif uart_tx_clk = '1' and tx_current_bit = 7 then
txd <= tx_byte(tx_current_bit);
tx_state <= STOPBIT;
elsif uart_tx_clk = '1' then
txd <= tx_byte(tx_current_bit);
tx_current_bit <= tx_current_bit + 1;
end if;
when STOPBIT =>
if uart_tx_clk = '1' then
txd <= '1';
tx_state <= IDLE;
end if;
end case;
if recv_buffer_full = '0' then
recv_buffer_push <= '1';
end if;
end if;
end case;
end if;
end if;
end process uart_receive;

sample_counter: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
rx_sample_counter <= 0;
elsif sample_clk = '1' then
if rx_sample_counter = 15 then
rx_sample_counter <= 0;
else
rx_sample_counter <= rx_sample_counter + 1;
end if;
end process uart_transmit;

uart_tx_clock_generator: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
uart_tx_counter <= 0;
uart_tx_clk <= '0';
else
if sample_clk = '1' then
if uart_tx_counter = 15 then
uart_tx_counter <= 0;
uart_tx_clk <= '1';
else
uart_tx_counter <= uart_tx_counter + 1;
uart_tx_clk <= '0';
end if;
else
uart_tx_clk <= '0';
end if;
end if;
end if;
end if;
end process sample_counter;

---------- UART transmit ----------

tx_byte <= send_buffer_output;

uart_transmit: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
txd <= '1';
tx_state <= IDLE;
send_buffer_pop <= '0';
tx_current_bit <= 0;
else
case tx_state is
when IDLE =>
if send_buffer_empty = '0' and uart_tx_clk = '1' then
txd <= '0';
send_buffer_pop <= '1';
tx_current_bit <= 0;
tx_state <= TRANSMIT;
elsif uart_tx_clk = '1' then
txd <= '1';
end if;
when TRANSMIT =>
if send_buffer_pop = '1' then
send_buffer_pop <= '0';
elsif uart_tx_clk = '1' and tx_current_bit = 7 then
txd <= tx_byte(tx_current_bit);
tx_state <= STOPBIT;
elsif uart_tx_clk = '1' then
txd <= tx_byte(tx_current_bit);
tx_current_bit <= tx_current_bit + 1;
end if;
when STOPBIT =>
if uart_tx_clk = '1' then
txd <= '1';
tx_state <= IDLE;
end if;
end case;
end if;
end if;
end process uart_transmit;

uart_tx_clock_generator: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
uart_tx_counter <= 0;
uart_tx_clk <= '0';
else
if sample_clk = '1' then
if uart_tx_counter = 15 then
uart_tx_counter <= 0;
uart_tx_clk <= '1';
else
uart_tx_counter <= uart_tx_counter + 1;
uart_tx_clk <= '0';
end if;
else
uart_tx_clk <= '0';
end if;
end process uart_tx_clock_generator;

---------- Sample clock generator ----------

sample_clock_generator: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
sample_clk_counter <= (others => '0');
sample_clk <= '0';
else
if sample_clk_divisor /= x"00" then
if sample_clk_counter = sample_clk_divisor then
sample_clk_counter <= (others => '0');
sample_clk <= '1';
else
sample_clk_counter <= std_logic_vector(unsigned(sample_clk_counter) + 1);
sample_clk <= '0';
end if;
end if;
end if;
end if;
end if;
end process uart_tx_clock_generator;

---------- Sample clock generator ----------

sample_clock_generator: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
sample_clk_counter <= (others => '0');
sample_clk <= '0';
else
if sample_clk_divisor /= x"00" then
if sample_clk_counter = sample_clk_divisor then
sample_clk_counter <= (others => '0');
sample_clk <= '1';
else
sample_clk_counter <= std_logic_vector(unsigned(sample_clk_counter) + 1);
sample_clk <= '0';
end if;
end if;
end process sample_clock_generator;

---------- Data Buffers ----------

send_buffer: entity work.pp_fifo
generic map(
DEPTH => FIFO_DEPTH,
WIDTH => 8
) port map(
clk => clk,
reset => reset,
full => send_buffer_full,
empty => send_buffer_empty,
data_in => send_buffer_input,
data_out => send_buffer_output,
push => send_buffer_push,
pop => send_buffer_pop
end if;
end if;
end process sample_clock_generator;

---------- Data Buffers ----------

send_buffer: entity work.pp_fifo
generic map(
DEPTH => FIFO_DEPTH,
WIDTH => 8
) port map(
clk => clk,
reset => reset,
full => send_buffer_full,
empty => send_buffer_empty,
data_in => send_buffer_input,
data_out => send_buffer_output,
push => send_buffer_push,
pop => send_buffer_pop
);

recv_buffer: entity work.pp_fifo
generic map(
DEPTH => FIFO_DEPTH,
WIDTH => 8
) port map(
clk => clk,
reset => reset,
full => recv_buffer_full,
empty => recv_buffer_empty,
data_in => recv_buffer_input,
data_out => recv_buffer_output,
push => recv_buffer_push,
pop => recv_buffer_pop
recv_buffer: entity work.pp_fifo
generic map(
DEPTH => FIFO_DEPTH,
WIDTH => 8
) port map(
clk => clk,
reset => reset,
full => recv_buffer_full,
empty => recv_buffer_empty,
data_in => recv_buffer_input,
data_out => recv_buffer_output,
push => recv_buffer_push,
pop => recv_buffer_pop
);

---------- Wishbone Interface ----------

wb_ack_out <= wb_ack and wb_cyc_in and wb_stb_in;

wishbone: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
wb_ack <= '0';
wb_state <= IDLE;
send_buffer_push <= '0';
recv_buffer_pop <= '0';
sample_clk_divisor <= (others => '0');
irq_recv_enable <= '0';
irq_tx_ready_enable <= '0';
else
case wb_state is
when IDLE =>
if wb_cyc_in = '1' and wb_stb_in = '1' then
if wb_we_in = '1' then -- Write to register
if wb_adr_in = x"000" then
send_buffer_input <= wb_dat_in;
send_buffer_push <= '1';
elsif wb_adr_in = x"018" then
sample_clk_divisor <= wb_dat_in;
elsif wb_adr_in = x"020" then
irq_recv_enable <= wb_dat_in(0);
irq_tx_ready_enable <= wb_dat_in(1);
end if;

-- Invalid writes are acked and ignored.

wb_ack <= '1';
wb_state <= WRITE_ACK;
else -- Read from register
if wb_adr_in = x"008" then
recv_buffer_pop <= '1';
elsif wb_adr_in = x"010" then
wb_dat_out <= x"0" & send_buffer_full & recv_buffer_full & send_buffer_empty & recv_buffer_empty;
wb_ack <= '1';
elsif wb_adr_in = x"018" then
wb_dat_out <= sample_clk_divisor;
wb_ack <= '1';
elsif wb_adr_in = x"020" then
wb_dat_out <= (0 => irq_recv_enable, 1 => irq_tx_ready_enable, others => '0');
wb_ack <= '1';
else
wb_dat_out <= (others => '0');
wb_ack <= '1';
end if;
wb_state <= READ_ACK;
end if;
end if;
when WRITE_ACK =>
send_buffer_push <= '0';

if wb_stb_in = '0' then
wb_ack <= '0';
wb_state <= IDLE;
end if;
when READ_ACK =>
if recv_buffer_pop = '1' then
recv_buffer_pop <= '0';
else
wb_dat_out <= recv_buffer_output;
wb_ack <= '1';
end if;

if wb_stb_in = '0' then
wb_ack <= '0';
wb_state <= IDLE;
end if;
end case;
---------- Wishbone Interface ----------

wb_ack_out <= wb_ack and wb_cyc_in and wb_stb_in;

wishbone: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
wb_ack <= '0';
wb_state <= IDLE;
send_buffer_push <= '0';
recv_buffer_pop <= '0';
sample_clk_divisor <= (others => '0');
irq_recv_enable <= '0';
irq_tx_ready_enable <= '0';
else
case wb_state is
when IDLE =>
if wb_cyc_in = '1' and wb_stb_in = '1' then
if wb_we_in = '1' then -- Write to register
if wb_adr_in = x"000" then
send_buffer_input <= wb_dat_in;
send_buffer_push <= '1';
elsif wb_adr_in = x"018" then
sample_clk_divisor <= wb_dat_in;
elsif wb_adr_in = x"020" then
irq_recv_enable <= wb_dat_in(0);
irq_tx_ready_enable <= wb_dat_in(1);
end if;

-- Invalid writes are acked and ignored.
wb_ack <= '1';
wb_state <= WRITE_ACK;
else -- Read from register
if wb_adr_in = x"008" then
recv_buffer_pop <= '1';
elsif wb_adr_in = x"010" then
wb_dat_out <= x"0" & send_buffer_full & recv_buffer_full &
send_buffer_empty & recv_buffer_empty;
wb_ack <= '1';
elsif wb_adr_in = x"018" then
wb_dat_out <= sample_clk_divisor;
wb_ack <= '1';
elsif wb_adr_in = x"020" then
wb_dat_out <= (0 => irq_recv_enable,
1 => irq_tx_ready_enable,
others => '0');
wb_ack <= '1';
else
wb_dat_out <= (others => '0');
wb_ack <= '1';
end if;
wb_state <= READ_ACK;
end if;
end if;
end process wishbone;
end if;
when WRITE_ACK =>
send_buffer_push <= '0';

if wb_stb_in = '0' then
wb_ack <= '0';
wb_state <= IDLE;
end if;
when READ_ACK =>
if recv_buffer_pop = '1' then
recv_buffer_pop <= '0';
else
wb_dat_out <= recv_buffer_output;
wb_ack <= '1';
end if;

if wb_stb_in = '0' then
wb_ack <= '0';
wb_state <= IDLE;
end if;
end case;
end if;
end if;
end process wishbone;

end architecture behaviour;

@ -7,7 +7,8 @@ entity toplevel is
RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true;
CLK_INPUT : positive := 100000000;
CLK_FREQUENCY : positive := 100000000
CLK_FREQUENCY : positive := 100000000;
DISABLE_FLATTEN_CORE : boolean := false
);
port(
ext_clk : in std_ulogic;
@ -62,7 +63,8 @@ begin
MEMORY_SIZE => MEMORY_SIZE,
RAM_INIT_FILE => RAM_INIT_FILE,
RESET_LOW => RESET_LOW,
SIM => false
SIM => false,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE
)
port map (
system_clk => system_clk,

@ -21,6 +21,7 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.utils.all;
use work.common.all;
use work.wishbone_types.all;

@ -51,26 +52,6 @@ entity icache is
end entity icache;

architecture rtl of icache is
function log2(i : natural) return integer is
variable tmp : integer := i;
variable ret : integer := 0;
begin
while tmp > 1 loop
ret := ret + 1;
tmp := tmp / 2;
end loop;
return ret;
end function;

function ispow2(i : integer) return boolean is
begin
if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then
return true;
else
return false;
end if;
end function;

-- BRAM organisation: We never access more than wishbone_data_bits at
-- a time so to save resources we make the array only that wide, and
-- use consecutive indices for to make a cache "line"
@ -159,6 +140,7 @@ architecture rtl of icache is
wb : wishbone_master_out;
store_way : way_t;
store_index : index_t;
store_row : row_t;
end record;

signal r : reg_internal_t;
@ -170,6 +152,7 @@ architecture rtl of icache is
signal req_tag : cache_tag_t;
signal req_is_hit : std_ulogic;
signal req_is_miss : std_ulogic;
signal req_laddr : std_ulogic_vector(63 downto 0);

-- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t;
@ -193,12 +176,21 @@ architecture rtl of icache is
end;

-- Returns whether this is the last row of a line
function is_last_row(addr: wishbone_addr_type) return boolean is
function is_last_row_addr(addr: wishbone_addr_type) return boolean is
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
begin
return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones;
end;

-- Returns whether this is the last row of a line
function is_last_row(row: row_t) return boolean is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
begin
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
return row_v(ROW_LINEBITS-1 downto 0) = ones;
end;

-- Return the address of the next row in the current cache line
function next_row_addr(addr: wishbone_addr_type)
return std_ulogic_vector is
@ -213,6 +205,21 @@ architecture rtl of icache is
return result;
end;

-- Return the next row in the current cache line. We use a dedicated
-- function in order to limit the size of the generated adder to be
-- only the bits within a cache line (3 bits with default settings)
--
function next_row(row: row_t) return row_t is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
begin
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
row_idx := row_v(ROW_LINEBITS-1 downto 0);
row_v(ROW_LINEBITS-1 downto 0) := std_ulogic_vector(unsigned(row_idx) + 1);
return to_integer(unsigned(row_v));
end;

-- Read the instruction word for the given address in the current cache row
function read_insn_word(addr: std_ulogic_vector(63 downto 0);
data: cache_row_t) return std_ulogic_vector is
@ -298,7 +305,6 @@ begin
wr_data => wishbone_in.dat
);
process(all)
variable tmp_adr : std_ulogic_vector(63 downto 0);
begin
do_read <= '1';
do_write <= '0';
@ -307,8 +313,7 @@ begin
end if;
cache_out(i) <= dout;
rd_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
tmp_adr := (r.wb.adr'left downto 0 => r.wb.adr, others => '0');
wr_addr <= std_ulogic_vector(to_unsigned(get_row(tmp_adr), ROW_BITS));
wr_addr <= std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
end process;
end generate;
@ -358,6 +363,12 @@ begin
req_row <= get_row(i_in.nia);
req_tag <= get_tag(i_in.nia);

-- Calculate address of beginning of cache line, will be
-- used for cache miss processing if needed
--
req_laddr <= i_in.nia(63 downto LINE_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0');

-- Test if pending request is a hit on any way
hit_way := 0;
is_hit := '0';
@ -427,7 +438,8 @@ begin

-- Cache miss/reload synchronous machine
icache_miss : process(clk)
variable tagset : cache_tags_set_t;
variable tagset : cache_tags_set_t;
variable stbs_done : boolean;
begin
if rising_edge(clk) then
-- On reset, clear all valid bits to force misses
@ -473,29 +485,54 @@ begin
-- Keep track of our index and way for subsequent stores
r.store_index <= req_index;
r.store_way <= replace_way;
r.store_row <= get_row(req_laddr);

-- Prep for first wishbone read. We calculate the address of
-- the start of the cache line
-- the start of the cache line and start the WB cycle.
--
r.wb.adr <= i_in.nia(r.wb.adr'left downto LINE_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0');
r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
r.wb.cyc <= '1';
r.wb.stb <= '1';

-- Track that we had one request sent
r.state <= WAIT_ACK;
end if;

when WAIT_ACK =>
-- Requests are all sent if stb is 0
stbs_done := r.wb.stb = '0';

-- If we are still sending requests, was one accepted ?
if wishbone_in.stall = '0' and not stbs_done then
-- That was the last word ? We are done sending. Clear
-- stb and set stbs_done so we can handle an eventual last
-- ack on the same cycle.
--
if is_last_row_addr(r.wb.adr) then
r.wb.stb <= '0';