Merge pull request #69 from antonblanchard/debug-module

Merge debug module patches
pull/70/head
Anton Blanchard 5 years ago committed by GitHub
commit ba783fddd5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,8 +1,9 @@
GHDL=ghdl GHDL=ghdl
GHDLFLAGS=--std=08 GHDLFLAGS=--std=08 -Psim-unisim
CFLAGS=-O2 -Wall CFLAGS=-O2 -Wall


all = core_tb simple_ram_behavioural_tb soc_reset_tb icache_tb multiply_tb all = core_tb simple_ram_behavioural_tb soc_reset_tb icache_tb multiply_tb dmi_dtm_tb

# XXX # XXX
# loadstore_tb fetch_tb # loadstore_tb fetch_tb


@ -12,8 +13,10 @@ all: $(all)
$(GHDL) -a $(GHDLFLAGS) $< $(GHDL) -a $(GHDLFLAGS) $<


common.o: decode_types.o common.o: decode_types.o
core_tb.o: common.o core.o soc.o sim_jtag.o: sim_jtag_socket.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o core_tb.o: common.o core.o soc.o sim_jtag.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o core_debug.o
core_debug.o:
cr_file.o: common.o cr_file.o: common.o
crhelpers.o: common.o crhelpers.o: common.o
decode1.o: common.o decode_types.o decode1.o: common.o decode_types.o
@ -40,17 +43,26 @@ simple_ram_behavioural_helpers.o:
simple_ram_behavioural_tb.o: wishbone_types.o simple_ram_behavioural.o simple_ram_behavioural_tb.o: wishbone_types.o simple_ram_behavioural.o
simple_ram_behavioural.o: wishbone_types.o simple_ram_behavioural_helpers.o simple_ram_behavioural.o: wishbone_types.o simple_ram_behavioural_helpers.o
sim_uart.o: wishbone_types.o sim_console.o sim_uart.o: wishbone_types.o sim_console.o
soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o dmi_dtm_xilinx.o wishbone_debug_master.o
wishbone_arbiter.o: wishbone_types.o wishbone_arbiter.o: wishbone_types.o
wishbone_types.o: wishbone_types.o:
writeback.o: common.o writeback.o: common.o
dmi_dtm_tb.o: dmi_dtm_xilinx.o wishbone_debug_master.o
dmi_dtm_xilinx.o: sim-unisim/unisim_vcomponents.o
wishbone_debug_master.o: wishbone_types.o

UNISIM_BITS = sim-unisim/unisim_vcomponents.vhdl sim-unisim/BSCANE2.vhdl sim-unisim/BUFG.vhdl
sim-unisim/unisim_vcomponents.o: $(UNISIM_BITS)
$(GHDL) -a $(GHDLFLAGS) --work=unisim --workdir=sim-unisim $^


fpga/soc_reset_tb.o: fpga/soc_reset.o fpga/soc_reset_tb.o: fpga/soc_reset.o


soc_reset_tb: fpga/soc_reset_tb.o fpga/soc_reset.o soc_reset_tb: fpga/soc_reset_tb.o fpga/soc_reset.o
$(GHDL) -e $(GHDLFLAGS) soc_reset_tb $(GHDL) -e $(GHDLFLAGS) soc_reset_tb


core_tb: core_tb.o simple_ram_behavioural_helpers_c.o sim_console_c.o core_tb: core_tb.o simple_ram_behavioural_helpers_c.o sim_console_c.o sim_jtag_socket_c.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o -Wl,sim_console_c.o $@ $(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o -Wl,sim_console_c.o -Wl,sim_jtag_socket_c.o $@


fetch_tb: fetch_tb.o fetch_tb: fetch_tb.o
$(GHDL) -e $(GHDLFLAGS) $@ $(GHDL) -e $(GHDLFLAGS) $@
@ -70,6 +82,9 @@ simple_ram_tb: simple_ram_tb.o
simple_ram_behavioural_tb: simple_ram_behavioural_helpers_c.o simple_ram_behavioural_tb.o simple_ram_behavioural_tb: simple_ram_behavioural_helpers_c.o simple_ram_behavioural_tb.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@ $(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@


dmi_dtm_tb: dmi_dtm_tb.o simple_ram_behavioural_helpers_c.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@

tests = $(sort $(patsubst tests/%.out,%,$(wildcard tests/*.out))) tests = $(sort $(patsubst tests/%.out,%,$(wildcard tests/*.out)))


check: $(tests) test_micropython test_micropython_long check: $(tests) test_micropython test_micropython_long
@ -86,4 +101,8 @@ test_micropython_long: core_tb
@./scripts/test_micropython_long.py @./scripts/test_micropython_long.py


clean: clean:
rm -f *.o work-*cf $(all) rm -f *.o work-*cf unisim-*cf $(all)
rm -f sim-unisim/*.o sim-unisim/unisim-*cf

distclean: clean
rm -f *~ fpga/~

@ -13,23 +13,26 @@ package common is
end record; end record;


type Fetch1ToFetch2Type is record type Fetch1ToFetch2Type is record
nia: std_ulogic_vector(63 downto 0); nia: std_ulogic_vector(63 downto 0);
pipe_stop : std_ulogic;
end record; end record;


type Fetch2ToDecode1Type is record type Fetch2ToDecode1Type is record
valid: std_ulogic; valid: std_ulogic;
stop_mark : std_ulogic;
nia: std_ulogic_vector(63 downto 0); nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0); insn: std_ulogic_vector(31 downto 0);
end record; end record;
constant Fetch2ToDecode1Init : Fetch2ToDecode1Type := (valid => '0', others => (others => '0')); constant Fetch2ToDecode1Init : Fetch2ToDecode1Type := (valid => '0', stop_mark => '0', others => (others => '0'));


type Decode1ToDecode2Type is record type Decode1ToDecode2Type is record
valid: std_ulogic; valid: std_ulogic;
stop_mark : std_ulogic;
nia: std_ulogic_vector(63 downto 0); nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0); insn: std_ulogic_vector(31 downto 0);
decode: decode_rom_t; decode: decode_rom_t;
end record; end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', decode => decode_rom_init, others => (others => '0')); constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', stop_mark => '0', decode => decode_rom_init, others => (others => '0'));


type Fetch2ToIcacheType is record type Fetch2ToIcacheType is record
req: std_ulogic; req: std_ulogic;

@ -20,9 +20,14 @@ entity core is
wishbone_data_in : in wishbone_slave_out; wishbone_data_in : in wishbone_slave_out;
wishbone_data_out : out wishbone_master_out; wishbone_data_out : out wishbone_master_out;


-- Added for debug, ghdl doesn't support external names unfortunately dmi_addr : in std_ulogic_vector(3 downto 0);
registers : out regfile; dmi_din : in std_ulogic_vector(63 downto 0);
terminate_out : out std_ulogic dmi_dout : out std_ulogic_vector(63 downto 0);
dmi_req : in std_ulogic;
dmi_wr : in std_ulogic;
dmi_ack : out std_ulogic;

terminated_out : out std_logic
); );
end core; end core;


@ -73,11 +78,24 @@ architecture behave of core is
signal flush: std_ulogic; signal flush: std_ulogic;


signal complete: std_ulogic; signal complete: std_ulogic;

signal terminate: std_ulogic; signal terminate: std_ulogic;
signal core_rst: std_ulogic;
signal icache_rst: std_ulogic;

-- Debug actions
signal dbg_core_stop: std_ulogic;
signal dbg_core_rst: std_ulogic;
signal dbg_icache_rst: std_ulogic;

-- Debug status
signal dbg_core_is_stopped: std_ulogic;

-- For sim
signal registers: regfile;

begin begin


terminate_out <= terminate; core_rst <= dbg_core_rst or rst;


fetch1_0: entity work.fetch1 fetch1_0: entity work.fetch1
generic map ( generic map (
@ -85,7 +103,7 @@ begin
) )
port map ( port map (
clk => clk, clk => clk,
rst => rst, rst => core_rst,
stall_in => fetch1_stall_in, stall_in => fetch1_stall_in,
flush_in => flush, flush_in => flush,
e_in => execute1_to_fetch1, e_in => execute1_to_fetch1,
@ -97,12 +115,13 @@ begin
fetch2_0: entity work.fetch2 fetch2_0: entity work.fetch2
port map ( port map (
clk => clk, clk => clk,
rst => rst, rst => core_rst,
stall_in => fetch2_stall_in, stall_in => fetch2_stall_in,
stall_out => fetch2_stall_out, stall_out => fetch2_stall_out,
flush_in => flush, flush_in => flush,
i_in => icache_to_fetch2, i_in => icache_to_fetch2,
i_out => fetch2_to_icache, i_out => fetch2_to_icache,
stop_in => dbg_core_stop,
f_in => fetch1_to_fetch2, f_in => fetch1_to_fetch2,
f_out => fetch2_to_decode1 f_out => fetch2_to_decode1
); );
@ -116,17 +135,19 @@ begin
) )
port map( port map(
clk => clk, clk => clk,
rst => rst, rst => icache_rst,
i_in => fetch2_to_icache, i_in => fetch2_to_icache,
i_out => icache_to_fetch2, i_out => icache_to_fetch2,
wishbone_out => wishbone_insn_out, wishbone_out => wishbone_insn_out,
wishbone_in => wishbone_insn_in wishbone_in => wishbone_insn_in
); );


icache_rst <= rst or dbg_icache_rst;

decode1_0: entity work.decode1 decode1_0: entity work.decode1
port map ( port map (
clk => clk, clk => clk,
rst => rst, rst => core_rst,
stall_in => decode1_stall_in, stall_in => decode1_stall_in,
flush_in => flush, flush_in => flush,
f_in => fetch2_to_decode1, f_in => fetch2_to_decode1,
@ -138,10 +159,11 @@ begin
decode2_0: entity work.decode2 decode2_0: entity work.decode2
port map ( port map (
clk => clk, clk => clk,
rst => rst, rst => core_rst,
stall_out => decode2_stall_out, stall_out => decode2_stall_out,
flush_in => flush, flush_in => flush,
complete_in => complete, complete_in => complete,
stopped_out => dbg_core_is_stopped,
d_in => decode1_to_decode2, d_in => decode1_to_decode2,
e_out => decode2_to_execute1, e_out => decode2_to_execute1,
l_out => decode2_to_loadstore1, l_out => decode2_to_loadstore1,
@ -222,4 +244,36 @@ begin
complete_out => complete complete_out => complete
); );


debug_0: entity work.core_debug
port map (
clk => clk,
rst => rst,
dmi_addr => dmi_addr,
dmi_din => dmi_din,
dmi_dout => dmi_dout,
dmi_req => dmi_req,
dmi_wr => dmi_wr,
dmi_ack => dmi_ack,
core_stop => dbg_core_stop,
core_rst => dbg_core_rst,
icache_rst => dbg_icache_rst,
terminate => terminate,
core_stopped => dbg_core_is_stopped,
nia => fetch1_to_fetch2.nia,
terminated_out => terminated_out
);

-- Dump registers if core terminates
sim_terminate_test: if SIM generate
dump_registers: process(all)
begin
if terminate = '1' then
loop_0: for i in 0 to 31 loop
report "REG " & to_hstring(registers(i));
end loop loop_0;
assert false report "end of test" severity failure;
end if;
end process;
end generate;

end behave; end behave;

@ -0,0 +1,152 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;

entity core_debug is
port (
clk : in std_logic;
rst : in std_logic;

dmi_addr : in std_ulogic_vector(3 downto 0);
dmi_din : in std_ulogic_vector(63 downto 0);
dmi_dout : out std_ulogic_vector(63 downto 0);
dmi_req : in std_ulogic;
dmi_wr : in std_ulogic;
dmi_ack : out std_ulogic;

-- Debug actions
core_stop : out std_ulogic;
core_rst : out std_ulogic;
icache_rst : out std_ulogic;

-- Core status inputs
terminate : in std_ulogic;
core_stopped : in std_ulogic;
nia : in std_ulogic_vector(63 downto 0);

-- Misc
terminated_out : out std_ulogic
);
end core_debug;

architecture behave of core_debug is
-- DMI needs fixing... make a one clock pulse
signal dmi_req_1: std_ulogic;

-- CTRL register (direct actions, write 1 to act, read back 0)
-- bit 0 : Core stop
-- bit 1 : Core reset (doesn't clear stop)
-- bit 2 : Icache reset
-- bit 3 : Single step
-- bit 4 : Core start
constant DBG_CORE_CTRL : std_ulogic_vector(3 downto 0) := "0000";
constant DBG_CORE_CTRL_STOP : integer := 0;
constant DBG_CORE_CTRL_RESET : integer := 1;
constant DBG_CORE_CTRL_ICRESET : integer := 2;
constant DBG_CORE_CTRL_STEP : integer := 3;
constant DBG_CORE_CTRL_START : integer := 4;

-- STAT register (read only)
-- bit 0 : Core stopping (wait til bit 1 set)
-- bit 1 : Core stopped
-- bit 2 : Core terminated (clears with start or reset)
constant DBG_CORE_STAT : std_ulogic_vector(3 downto 0) := "0001";
constant DBG_CORE_STAT_STOPPING : integer := 0;
constant DBG_CORE_STAT_STOPPED : integer := 1;
constant DBG_CORE_STAT_TERM : integer := 2;

-- NIA register (read only for now)
constant DBG_CORE_NIA : std_ulogic_vector(3 downto 0) := "0010";

-- Some internal wires
signal stat_reg : std_ulogic_vector(63 downto 0);

-- Some internal latches
signal stopping : std_ulogic;
signal do_step : std_ulogic;
signal do_reset : std_ulogic;
signal do_icreset : std_ulogic;
signal terminated : std_ulogic;

begin
-- Single cycle register accesses on DMI
dmi_ack <= dmi_req;

-- Status register read composition
stat_reg <= (2 => terminated,
1 => core_stopped,
0 => stopping,
others => '0');

-- DMI read data mux
with dmi_addr select dmi_dout <=
stat_reg when DBG_CORE_STAT,
nia when DBG_CORE_NIA,
(others => '0') when others;

-- DMI writes
reg_write: process(clk)
begin
if rising_edge(clk) then
if (rst) then
stopping <= '0';
terminated <= '0';
else
-- Reset the 1-cycle "do" signals
do_step <= '0';
do_reset <= '0';
do_icreset <= '0';

-- Edge detect on dmi_req for 1-shot pulses
dmi_req_1 <= dmi_req;
if dmi_req = '1' and dmi_req_1 = '0' then
if dmi_wr = '1' then
report("DMI write to " & to_hstring(dmi_addr));

-- Control register actions
if dmi_addr = DBG_CORE_CTRL then
if dmi_din(DBG_CORE_CTRL_RESET) = '1' then
do_reset <= '1';
terminated <= '0';
end if;
if dmi_din(DBG_CORE_CTRL_STOP) = '1' then
stopping <= '1';
end if;
if dmi_din(DBG_CORE_CTRL_STEP) = '1' then
do_step <= '1';
terminated <= '0';
end if;
if dmi_din(DBG_CORE_CTRL_ICRESET) = '1' then
do_icreset <= '1';
end if;
if dmi_din(DBG_CORE_CTRL_START) = '1' then
stopping <= '0';
terminated <= '0';
end if;
end if;
else
report("DMI read from " & to_string(dmi_addr));
end if;
end if;

-- Set core stop on terminate. We'll be stopping some time *after*
-- the offending instruction, at least until we can do back flushes
-- that preserve NIA which we can't just yet.
if terminate = '1' then
stopping <= '1';
terminated <= '1';
end if;
end if;
end if;
end process;

-- Core control signals generated by the debug module
core_stop <= stopping and not do_step;
core_rst <= do_reset;
icache_rst <= do_icreset;
terminated_out <= terminated;
end behave;

@ -1,5 +1,6 @@
library ieee; library ieee;
use ieee.std_logic_1164.all; use ieee.std_logic_1164.all;
use ieee.numeric_std.all;


library work; library work;
use work.common.all; use work.common.all;
@ -29,19 +30,21 @@ begin
uart0_txd => open uart0_txd => open
); );


clk_process: process clk_process: process
begin begin
clk <= '0'; clk <= '0';
wait for clk_period/2; wait for clk_period/2;
clk <= '1'; clk <= '1';
wait for clk_period/2; wait for clk_period/2;
end process; end process;


rst_process: process rst_process: process
begin begin
rst <= '1'; rst <= '1';
wait for 10*clk_period; wait for 10*clk_period;
rst <= '0'; rst <= '0';
wait; wait;
end process; end process;

jtag: entity work.sim_jtag;
end; end;

@ -248,6 +248,7 @@ begin
v.valid := f_in.valid; v.valid := f_in.valid;
v.nia := f_in.nia; v.nia := f_in.nia;
v.insn := f_in.insn; v.insn := f_in.insn;
v.stop_mark := f_in.stop_mark;


ppc_insn := PPC_ILLEGAL; ppc_insn := PPC_ILLEGAL;



@ -16,6 +16,8 @@ entity decode2 is
complete_in : in std_ulogic; complete_in : in std_ulogic;
stall_out : out std_ulogic; stall_out : out std_ulogic;


stopped_out : out std_ulogic;

flush_in: in std_ulogic; flush_in: in std_ulogic;


d_in : in Decode1ToDecode2Type; d_in : in Decode1ToDecode2Type;
@ -330,9 +332,16 @@ begin
-- through the pipeline. -- through the pipeline.
stall_out <= '0'; stall_out <= '0';
is_valid := d_in.valid; is_valid := d_in.valid;

-- Handle debugger stop
stopped_out <= '0';
if d_in.stop_mark = '1' and v_int.outstanding = 0 then
stopped_out <= '1';
end if;

case v_int.state is case v_int.state is
when IDLE => when IDLE =>
if (flush_in = '0') and (d_in.valid = '1') and (d_in.decode.sgl_pipe = '1') then if (flush_in = '0') and (is_valid = '1') and (d_in.decode.sgl_pipe = '1') then
if v_int.outstanding /= 0 then if v_int.outstanding /= 0 then
v_int.state := WAIT_FOR_PREV_TO_COMPLETE; v_int.state := WAIT_FOR_PREV_TO_COMPLETE;
stall_out <= '1'; stall_out <= '1';

@ -0,0 +1,30 @@
-- Dummy/empty DMI interface to make toplevel happy on unsupported FPGAs

library ieee;
use ieee.std_logic_1164.all;

library work;
use work.wishbone_types.all;

entity dmi_dtm is
generic(ABITS : INTEGER:=8;
DBITS : INTEGER:=32);

port(sys_clk : in std_ulogic;
sys_reset : in std_ulogic;
dmi_addr : out std_ulogic_vector(ABITS - 1 downto 0);
dmi_din : in std_ulogic_vector(DBITS - 1 downto 0);
dmi_dout : out std_ulogic_vector(DBITS - 1 downto 0);
dmi_req : out std_ulogic;
dmi_wr : out std_ulogic;
dmi_ack : in std_ulogic
);
end entity dmi_dtm;

architecture behaviour of dmi_dtm is
dmi_addr <= (others => '0');
dmi_dout <= (others => '0');
dmi_req <= '0';
dmi_wr <= '0';
end architecture behaviour;

@ -0,0 +1,250 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;
use work.wishbone_types.all;

library unisim;
use unisim.vcomponents.all;

entity dmi_dtm_tb is
end dmi_dtm_tb;

architecture behave of dmi_dtm_tb is
signal clk : std_ulogic;
signal rst : std_ulogic;
constant clk_period : time := 10 ns;
constant jclk_period : time := 30 ns;

-- DMI debug bus signals
signal dmi_addr : std_ulogic_vector(7 downto 0);
signal dmi_din : std_ulogic_vector(63 downto 0);
signal dmi_dout : std_ulogic_vector(63 downto 0);
signal dmi_req : std_ulogic;
signal dmi_wr : std_ulogic;
signal dmi_ack : std_ulogic;

-- Global JTAG signals (used by BSCANE2 inside dmi_dtm
alias j : glob_jtag_t is glob_jtag;

-- Wishbone interfaces
signal wishbone_ram_in : wishbone_slave_out;
signal wishbone_ram_out : wishbone_master_out;

begin
dtm: entity work.dmi_dtm
generic map(
ABITS => 8,
DBITS => 64
)
port map(
sys_clk => clk,
sys_reset => rst,
dmi_addr => dmi_addr,
dmi_din => dmi_din,
dmi_dout => dmi_dout,
dmi_req => dmi_req,
dmi_wr => dmi_wr,
dmi_ack => dmi_ack
);

simple_ram_0: entity work.mw_soc_memory
generic map(RAM_INIT_FILE => "simple_ram_behavioural.bin",
MEMORY_SIZE => 524288)
port map(clk => clk, rst => rst,
wishbone_in => wishbone_ram_out,
wishbone_out => wishbone_ram_in);

wishbone_debug_0: entity work.wishbone_debug_master
port map(clk => clk, rst => rst,
dmi_addr => dmi_addr(1 downto 0),
dmi_dout => dmi_din,
dmi_din => dmi_dout,
dmi_wr => dmi_wr,
dmi_ack => dmi_ack,
dmi_req => dmi_req,
wb_in => wishbone_ram_in,
wb_out => wishbone_ram_out);

-- system clock
sys_clk: process
begin
clk <= '1';
wait for clk_period / 2;
clk <= '0';
wait for clk_period / 2;
end process sys_clk;

-- system sim: just reset and wait
sys_sim: process
begin
rst <= '1';
wait for clk_period;
rst <= '0';
wait;
end process;

-- jtag sim process
sim_jtag: process
procedure clock(count: in INTEGER) is
begin
for i in 1 to count loop
j.tck <= '0';
wait for jclk_period/2;
j.tck <= '1';
wait for jclk_period/2;
end loop;
end procedure clock;

procedure shift_out(val: in std_ulogic_vector) is
begin
for i in 0 to val'length-1 loop
j.tdi <= val(i);
clock(1);
end loop;
end procedure shift_out;

procedure shift_in(val: out std_ulogic_vector) is
begin
for i in val'length-1 downto 0 loop
val := j.tdo & val(val'length-1 downto 1);
clock(1);
end loop;
end procedure shift_in;

procedure send_command(
addr : in std_ulogic_vector(7 downto 0);
data : in std_ulogic_vector(63 downto 0);
op : in std_ulogic_vector(1 downto 0)) is
begin
j.capture <= '1';
clock(1);
j.capture <= '0';
clock(1);
j.shift <= '1';
shift_out(op);
shift_out(data);
shift_out(addr);
j.shift <= '0';
j.update <= '1';
clock(1);
j.update <= '0';
clock(1);
end procedure send_command;

procedure read_resp(
op : out std_ulogic_vector(1 downto 0);
data : out std_ulogic_vector(63 downto 0)) is

variable addr : std_ulogic_vector(7 downto 0);
begin
j.capture <= '1';
clock(1);
j.capture <= '0';
clock(1);
j.shift <= '1';
shift_in(op);
shift_in(data);
shift_in(addr);
j.shift <= '0';
j.update <= '1';
clock(1);
j.update <= '0';
clock(1);
end procedure read_resp;

procedure dmi_write(addr : in std_ulogic_vector(7 downto 0);
data : in std_ulogic_vector(63 downto 0)) is
variable resp_op : std_ulogic_vector(1 downto 0);
variable resp_data : std_ulogic_vector(63 downto 0);
variable timeout : integer;
begin
send_command(addr, data, "10");
loop
read_resp(resp_op, resp_data);
case resp_op is
when "00" =>
return;
when "11" =>
timeout := timeout + 1;
assert timeout < 0
report "dmi_write timed out !" severity error;
when others =>
assert 0 > 1 report "dmi_write got odd status: " &
to_hstring(resp_op) severity error;
end case;
end loop;
end procedure dmi_write;

procedure dmi_read(addr : in std_ulogic_vector(7 downto 0);
data : out std_ulogic_vector(63 downto 0)) is
variable resp_op : std_ulogic_vector(1 downto 0);
variable timeout : integer;
begin
send_command(addr, (others => '0'), "01");
loop
read_resp(resp_op, data);
case resp_op is
when "00" =>
return;
when "11" =>
timeout := timeout + 1;
assert timeout < 0
report "dmi_read timed out !" severity error;
when others =>
assert 0 > 1 report "dmi_read got odd status: " &
to_hstring(resp_op) severity error;
end case;
end loop;
end procedure dmi_read;

variable data : std_ulogic_vector(63 downto 0);
begin
-- init & reset
j.reset <= '1';
j.sel <= "0000";
j.capture <= '0';
j.update <= '0';
j.shift <= '0';
j.tdi <= '0';
j.tms <= '0';
j.runtest <= '0';
clock(5);
j.reset <= '0';
clock(5);

-- select chain 2
j.sel <= "0010";
clock(1);

-- send command
dmi_read(x"00", data);
report "Read addr reg:" & to_hstring(data);
report "Writing addr reg to all 1's";
dmi_write(x"00", (others => '1'));
dmi_read(x"00", data);
report "Read addr reg:" & to_hstring(data);

report "Writing ctrl reg to all 1's";
dmi_write(x"02", (others => '1'));
dmi_read(x"02", data);
report "Read ctrl reg:" & to_hstring(data);

report "Read memory at 0...\n";
dmi_write(x"00", x"0000000000000000");
dmi_write(x"02", x"00000000000007ff");
dmi_read(x"01", data);
report "00:" & to_hstring(data);
dmi_read(x"01", data);
report "08:" & to_hstring(data);
dmi_read(x"01", data);
report "10:" & to_hstring(data);
dmi_read(x"01", data);
report "18:" & to_hstring(data);
clock(10);
std.env.finish;
end process;
end behave;

@ -0,0 +1,276 @@
-- Xilinx internal JTAG to DMI interface
--
-- DMI bus
--
-- req : ____/------------\_____
-- addr: xxxx< >xxxxx
-- dout: xxxx< >xxxxx
-- wr : xxxx< >xxxxx
-- din : xxxxxxxxxxxx< >xxx
-- ack : ____________/------\___
--
-- * addr/dout set along with req, can be latched on same cycle by slave
-- * ack & din remain up until req is dropped by master, the slave must
-- provide a stable output on din on reads during that time.
-- * req remains low at until at least one sysclk after ack seen down.
--
-- JTAG (tck) DMI (sys_clk)
--
-- * jtag_req = 1
-- (jtag_req_0) *
-- (jtag_req_1) -> * dmi_req = 1 >
-- *.../...
-- * dmi_ack = 1 <
-- * (dmi_ack_0)
-- * <- (dmi_ack_1)
-- * jtag_req = 0 (and latch dmi_din)
-- (jtag_req_0) *
-- (jtag_req_1) -> * dmi_req = 0 >
-- * dmi_ack = 0 <
-- * (dmi_ack_0)
-- * <- (dmi_ack_1)
--
-- jtag_req can go back to 1 when jtag_rsp_1 is 0
--
-- Questions/TODO:
-- - I use 2 flip fops for sync, is that enough ?
-- - I treat the jtag_reset as an async reset, is that necessary ?
-- - Dbl check reset situation since we have two different resets
-- each only resetting part of the logic...
-- - Look at optionally removing the synchronizer on the ack path,
-- assuming JTAG is always slow enough that ack will have been
-- stable long enough by the time CAPTURE comes in.
-- - We could avoid the latched request by not shifting while a
-- request is in progress (and force TDO to 1 to return a busy
-- status).
--
-- WARNING: This isn't the real DMI JTAG protocol (at least not yet).
-- a command while busy will be ignored. A response of "11"
-- means the previous command is still going, try again.
-- As such We don't implement the DMI "error" status, and
-- we don't implement DTMCS yet... This may still all change
-- but for now it's easier that way as the real DMI protocol
-- requires for a command to work properly that enough TCK
-- are sent while IDLE and I'm having trouble getting that
-- working with UrJtag and the Xilinx BSCAN2 for now.

library ieee;
use ieee.std_logic_1164.all;
use ieee.math_real.all;

library work;
use work.wishbone_types.all;

library unisim;
use unisim.vcomponents.all;

entity dmi_dtm is
generic(ABITS : INTEGER:=8;
DBITS : INTEGER:=32);

port(sys_clk : in std_ulogic;
sys_reset : in std_ulogic;
dmi_addr : out std_ulogic_vector(ABITS - 1 downto 0);
dmi_din : in std_ulogic_vector(DBITS - 1 downto 0);
dmi_dout : out std_ulogic_vector(DBITS - 1 downto 0);
dmi_req : out std_ulogic;
dmi_wr : out std_ulogic;
dmi_ack : in std_ulogic
-- dmi_err : in std_ulogic TODO: Add error response
);
end entity dmi_dtm;

architecture behaviour of dmi_dtm is

-- Signals coming out of the BSCANE2 block
signal jtag_reset : std_ulogic;
signal capture : std_ulogic;
signal update : std_ulogic;
signal drck : std_ulogic;
signal jtag_clk : std_ulogic;
signal sel : std_ulogic;
signal shift : std_ulogic;
signal tdi : std_ulogic;
signal tdo : std_ulogic;
signal tck : std_ulogic;

-- ** JTAG clock domain **

-- Shift register
signal shiftr : std_ulogic_vector(ABITS + DBITS + 1 downto 0);

-- Latched request
signal request : std_ulogic_vector(ABITS + DBITS + 1 downto 0);

-- A request is present
signal jtag_req : std_ulogic;

-- Synchronizer for jtag_rsp (sys clk -> jtag_clk)
signal dmi_ack_0 : std_ulogic;
signal dmi_ack_1 : std_ulogic;

-- ** sys clock domain **

-- Synchronizer for jtag_req (jtag clk -> sys clk)
signal jtag_req_0 : std_ulogic;
signal jtag_req_1 : std_ulogic;

-- ** combination signals
signal jtag_bsy : std_ulogic;
signal op_valid : std_ulogic;
signal rsp_op : std_ulogic_vector(1 downto 0);

-- ** Constants **
constant DMI_REQ_NOP : std_ulogic_vector(1 downto 0) := "00";
constant DMI_REQ_RD : std_ulogic_vector(1 downto 0) := "01";
constant DMI_REQ_WR : std_ulogic_vector(1 downto 0) := "10";
constant DMI_RSP_OK : std_ulogic_vector(1 downto 0) := "00";
constant DMI_RSP_BSY : std_ulogic_vector(1 downto 0) := "11";

begin

-- Implement the Xilinx bscan2 for series 7 devices (TODO: use PoC to
-- wrap this if compatibility is required with older devices).
bscan : BSCANE2
generic map (
JTAG_CHAIN => 2
)
port map (
CAPTURE => capture,
DRCK => drck,
RESET => jtag_reset,
RUNTEST => open,
SEL => sel,
SHIFT => shift,
TCK => tck,
TDI => tdi,
TMS => open,
UPDATE => update,
TDO => tdo
);

-- Some examples out there suggest buffering the clock so it's
-- treated as a proper clock net. This is probably needed when using
-- drck (the gated clock) but I'm using the real tck here to avoid
-- missing the update phase so maybe not...
--
clkbuf : BUFG
port map (
-- I => drck,
I => tck,
O => jtag_clk
);


-- dmi_req synchronization
dmi_req_sync : process(sys_clk)
begin
-- sys_reset is synchronous
if rising_edge(sys_clk) then
if (sys_reset = '1') then
jtag_req_0 <= '0';
jtag_req_1 <= '0';
else
jtag_req_0 <= jtag_req;
jtag_req_1 <= jtag_req_0;
end if;
end if;
end process;
dmi_req <= jtag_req_1;

-- dmi_ack synchronization
dmi_ack_sync: process(jtag_clk, jtag_reset)
begin
-- jtag_reset is async (see comments)
if jtag_reset = '1' then
dmi_ack_0 <= '0';
dmi_ack_1 <= '0';
elsif rising_edge(jtag_clk) then
dmi_ack_0 <= dmi_ack;
dmi_ack_1 <= dmi_ack_0;
end if;
end process;
-- jtag_bsy indicates whether we can start a new request, we can when
-- we aren't already processing one (jtag_req) and the synchronized ack
-- of the previous one is 0.
--
jtag_bsy <= jtag_req or dmi_ack_1;

-- decode request type in shift register
with shiftr(1 downto 0) select op_valid <=
'1' when DMI_REQ_RD,
'1' when DMI_REQ_WR,
'0' when others;

-- encode response op
rsp_op <= DMI_RSP_BSY when jtag_bsy = '1' else DMI_RSP_OK;

-- Some DMI out signals are directly driven from the request register
dmi_addr <= request(ABITS + DBITS + 1 downto DBITS + 2);
dmi_dout <= request(DBITS + 1 downto 2);
dmi_wr <= '1' when request(1 downto 0) = DMI_REQ_WR else '0';

-- TDO is wired to shift register bit 0
tdo <= shiftr(0);

-- Main state machine. Handles shift registers, request latch and
-- jtag_req latch. Could be split into 3 processes but it's probably
-- not worthwhile.
--
shifter: process(jtag_clk, jtag_reset)
begin
if jtag_reset = '1' then
shiftr <= (others => '0');
request <= (others => '0');
jtag_req <= '0';
elsif rising_edge(jtag_clk) then

-- Handle jtag "commands" when sel is 1
if sel = '1' then
-- Shift state, rotate the register
if shift = '1' then
shiftr <= tdi & shiftr(ABITS + DBITS + 1 downto 1);
end if;

-- Update state (trigger)
--
-- Latch the request if we aren't already processing one and
-- it has a valid command opcode.
--
if update = '1' and op_valid = '1' then
if jtag_bsy = '0' then
request <= shiftr;
jtag_req <= '1';
end if;
-- Set the shift register "op" to "busy". This will prevent
-- us from re-starting the command on the next update if
-- the command completes before that.
shiftr(1 downto 0) <= DMI_RSP_BSY;
end if;

-- Request completion.
--
-- Capture the response data for reads and clear request flag.
--
-- Note: We clear req (and thus dmi_req) here which relies on tck
-- ticking and sel set. This means we are stuck with dmi_req up if
-- the jtag interface stops. Slaves must be resilient to this.
--
if jtag_req = '1' and dmi_ack_1 = '1' then
jtag_req <= '0';
if request(1 downto 0) = DMI_REQ_RD then
request(DBITS + 1 downto 2) <= dmi_din;
end if;
end if;

-- Capture state, grab latch content with updated status
if capture = '1' then
shiftr <= request(ABITS + DBITS + 1 downto 2) & rsp_op;
end if;

end if;
end if;
end process;
end architecture behaviour;

@ -68,6 +68,8 @@ begin


-- Update outputs -- Update outputs
f_out <= r; f_out <= r;

report "fetch1 R:" & std_ulogic'image(e_in.redirect) & " v.nia:" & to_hstring(v.nia) & " f_out.nia:" & to_hstring(f_out.nia);
end process; end process;


end architecture behaviour; end architecture behaviour;

@ -15,6 +15,7 @@ entity fetch2 is
stall_out : out std_ulogic; stall_out : out std_ulogic;


flush_in : in std_ulogic; flush_in : in std_ulogic;
stop_in : in std_ulogic;


i_in : in IcacheToFetch2Type; i_in : in IcacheToFetch2Type;
i_out : out Fetch2ToIcacheType; i_out : out Fetch2ToIcacheType;
@ -49,12 +50,12 @@ begin
v.valid := i_in.ack; v.valid := i_in.ack;
v.nia := f_in.nia; v.nia := f_in.nia;
v.insn := i_in.insn; v.insn := i_in.insn;
stall_out <= not i_in.ack; stall_out <= stop_in or not i_in.ack;



if flush_in = '1' or stop_in = '1' then
if flush_in = '1' then
v.valid := '0'; v.valid := '0';
end if; end if;
v.stop_mark := stop_in;


-- Update registers -- Update registers
rin <= v; rin <= v;

@ -25,13 +25,16 @@ filesets:
- multiply.vhdl - multiply.vhdl
- writeback.vhdl - writeback.vhdl
- insn_helpers.vhdl - insn_helpers.vhdl
- wishbone_debug_master.vhdl
- core.vhdl - core.vhdl
- icache.vhdl - icache.vhdl
- core_debug.vhdl
file_type : vhdlSource-2008 file_type : vhdlSource-2008


soc: soc:
files: files:
- wishbone_arbiter.vhdl - wishbone_arbiter.vhdl
- wishbone_debug_master.vhdl
- soc.vhdl - soc.vhdl
file_type : vhdlSource-2008 file_type : vhdlSource-2008


@ -46,6 +49,14 @@ filesets:
- fpga/firmware.hex : {copyto : firmware.hex, file_type : user} - fpga/firmware.hex : {copyto : firmware.hex, file_type : user}
file_type : vhdlSource-2008 file_type : vhdlSource-2008


debug_xilinx:
files:
- dmi_dtm_xilinx.vhdl : {file_type : vhdlSource-2008}

debug_dummy:
files:
- dmi_dtm_dummy.vhdl : {file_type : vhdlSource-2008}

nexys_a7: nexys_a7:
files: files:
- fpga/nexys_a7.xdc : {file_type : xdc} - fpga/nexys_a7.xdc : {file_type : xdc}
@ -69,7 +80,7 @@ filesets:
targets: targets:
nexys_a7: nexys_a7:
default_tool: vivado default_tool: vivado
filesets: [core, nexys_a7, soc, fpga] filesets: [core, nexys_a7, soc, fpga, debug_xilinx]
parameters : [memory_size, ram_init_file] parameters : [memory_size, ram_init_file]
tools: tools:
vivado: {part : xc7a100tcsg324-1} vivado: {part : xc7a100tcsg324-1}
@ -77,7 +88,7 @@ targets:


nexys_video: nexys_video:
default_tool: vivado default_tool: vivado
filesets: [core, nexys_video, soc, fpga] filesets: [core, nexys_video, soc, fpga, debug_xilinx]
parameters : [memory_size, ram_init_file] parameters : [memory_size, ram_init_file]
tools: tools:
vivado: {part : xc7a200tsbg484-1} vivado: {part : xc7a200tsbg484-1}
@ -85,7 +96,7 @@ targets:


arty_a7-35: arty_a7-35:
default_tool: vivado default_tool: vivado
filesets: [core, arty_a7-35, soc, fpga] filesets: [core, arty_a7-35, soc, fpga, debug_xilinx]
parameters : [memory_size, ram_init_file] parameters : [memory_size, ram_init_file]
tools: tools:
vivado: {part : xc7a35ticsg324-1L} vivado: {part : xc7a35ticsg324-1L}
@ -93,7 +104,7 @@ targets:


cmod_a7-35: cmod_a7-35:
default_tool: vivado default_tool: vivado
filesets: [core, cmod_a7-35, soc, fpga] filesets: [core, cmod_a7-35, soc, fpga, debug_xilinx]
parameters : [memory_size, ram_init_file, reset_low=false] parameters : [memory_size, ram_init_file, reset_low=false]
tools: tools:
vivado: {part : xc7a35tcpg236-1} vivado: {part : xc7a35tcpg236-1}

@ -0,0 +1,7 @@
CFLAGS = -O2 -g -Wall -std=c99

all: mw_debug

mw_debug: mw_debug.c
$(CC) -o $@ $^ -lurjtag

@ -0,0 +1,583 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdint.h>
#include <stdbool.h>
#include <getopt.h>
#include <poll.h>
#include <signal.h>
#include <fcntl.h>
#include <netdb.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <urjtag/urjtag.h>

#define DBG_WB_ADDR 0x00
#define DBG_WB_DATA 0x01
#define DBG_WB_CTRL 0x02

#define DBG_CORE_CTRL 0x10
#define DBG_CORE_CTRL_STOP (1 << 0)
#define DBG_CORE_CTRL_RESET (1 << 1)
#define DBG_CORE_CTRL_ICRESET (1 << 2)
#define DBG_CORE_CTRL_STEP (1 << 3)
#define DBG_CORE_CTRL_START (1 << 4)

#define DBG_CORE_STAT 0x11
#define DBG_CORE_STAT_STOPPING (1 << 0)
#define DBG_CORE_STAT_STOPPED (1 << 1)
#define DBG_CORE_STAT_TERM (1 << 2)

#define DBG_CORE_NIA 0x12

static bool debug;

struct backend {
int (*init)(const char *target);
int (*reset)(void);
int (*command)(uint8_t op, uint8_t addr, uint64_t *data);
};
static struct backend *b;

static void check(int r, const char *failstr)
{
if (r >= 0)
return;
fprintf(stderr, "Error %s\n", failstr);
exit(1);
}

/* -------------- SIM backend -------------- */

static int sim_fd = -1;

static int sim_init(const char *target)
{
struct sockaddr_in saddr;
struct hostent *hp;
const char *p, *host;
int port, rc;

if (!target)
target = "localhost:13245";
p = strchr(target, ':');
host = strndup(target, p - target);
if (p && *p)
p++;
else
p = "13245";
port = strtoul(p, NULL, 10);
if (debug)
printf("Opening sim backend host '%s' port %d\n", host, port);

sim_fd = socket(PF_INET, SOCK_STREAM, 0);
if (sim_fd < 0) {
fprintf(stderr, "Error opening socket: %s\n",
strerror(errno));
return -1;
}
hp = gethostbyname(host);
if (!hp) {
fprintf(stderr,"Unknown host '%s'\n", host);
return -1;
}
memcpy(&saddr.sin_addr, hp->h_addr, hp->h_length);
saddr.sin_port = htons(port);
saddr.sin_family = PF_INET;
rc = connect(sim_fd, (struct sockaddr *)&saddr, sizeof(saddr));
if (rc < 0) {
close(sim_fd);
fprintf(stderr,"Connection to '%s' failed: %s\n",
host, strerror(errno));
return -1;
}
return 0;
}

static int sim_reset(void)
{
}

static void add_bits(uint8_t **p, int *b, uint64_t d, int c)
{
uint8_t md = 1 << *b;
uint64_t ms = 1;

while (c--) {
if (d & ms)
(**p) |= md;
ms <<= 1;
if (*b == 7) {
*b = 0;
(*p)++;
md = 1;
} else {
(*b)++;
md <<= 1;
}
}
}

static uint64_t read_bits(uint8_t **p, int *b, int c)
{
uint8_t ms = 1 << *b;
uint64_t md = 1;
uint64_t d = 0;

while (c--) {
if ((**p) & ms)
d |= md;
md <<= 1;
if (*b == 7) {
*b = 0;
(*p)++;
ms = 1;
} else {
(*b)++;
ms <<= 1;
}
}
return d;
}

static int sim_command(uint8_t op, uint8_t addr, uint64_t *data)
{
uint8_t buf[16], *p;
uint64_t d = data ? *data : 0;
int r, s, b = 0;

memset(buf, 0, 16);
p = buf+1;
add_bits(&p, &b, op, 2);
add_bits(&p, &b, d, 64);
add_bits(&p, &b, addr, 8);
if (b)
p++;
buf[0] = 74;
if (0)
{
int i;

for (i=0; i<(p-buf); i++)
printf("%02x ", buf[i]);
printf("\n");
}
write(sim_fd, buf, p - buf);
r = read(sim_fd, buf, 127);
if (0 && r > 0) {
int i;

for (i=0; i<r; i++)
printf("%02x ", buf[i]);
printf("\n");
}
p = buf+1;
b = 0;
r = read_bits(&p, &b, 2);
if (data)
*data = read_bits(&p, &b, 64);
return r;
}

static struct backend sim_backend = {
.init = sim_init,
.reset = sim_reset,
.command = sim_command,
};

/* -------------- JTAG backend -------------- */

static urj_chain_t *jc;

static int jtag_init(const char *target)
{
const char *sep;
const char *cable;
char *params[] = { NULL, };
urj_part_t *p;
uint32_t id;
int rc, part;

if (!target)
target = "DigilentHS1";
sep = strchr(target, ':');
cable = strndup(target, sep - target);
if (sep && *sep) {
fprintf(stderr, "jtag cable params not supported yet\n");
return -1;
}
if (debug)
printf("Opening jtag backend cable '%s'\n", cable);

jc = urj_tap_chain_alloc();
if (!jc) {
fprintf(stderr, "Failed to alloc JTAG\n");
return -1;
}
jc->main_part = 0;

rc = urj_tap_chain_connect(jc, cable, params);
if (rc != URJ_STATUS_OK) {
fprintf(stderr, "JTAG cable detect failed\n");
return -1;
}

/* XXX Hard wire part 0, that might need to change (use params and detect !) */
rc = urj_tap_manual_add(jc, 6);
if (rc < 0) {
fprintf(stderr, "JTAG failed to add part !\n");
return -1;
}
if (jc->parts == NULL || jc->parts->len == 0) {
fprintf(stderr, "JTAG Something's wrong after adding part !\n");
return -1;
}
urj_part_parts_set_instruction(jc->parts, "BYPASS");

jc->active_part = part = 0;

p = urj_tap_chain_active_part(jc);
if (!p) {
fprintf(stderr, "Failed to get active JTAG part\n");
return -1;
}
rc = urj_part_data_register_define(p, "IDCODE_REG", 32);
if (rc != URJ_STATUS_OK) {
fprintf(stderr, "JTAG failed to add IDCODE_REG register !\n");
return -1;
}
if (urj_part_instruction_define(p, "IDCODE", "001001", "IDCODE_REG") == NULL) {
fprintf(stderr, "JTAG failed to add IDCODE instruction !\n");
return -1;
}
rc = urj_part_data_register_define(p, "USER2_REG", 74);
if (rc != URJ_STATUS_OK) {
fprintf(stderr, "JTAG failed to add USER2_REG register !\n");
return -1;
}
if (urj_part_instruction_define(p, "USER2", "000011", "USER2_REG") == NULL) {
fprintf(stderr, "JTAG failed to add USER2 instruction !\n");
return -1;
}
urj_part_set_instruction(p, "IDCODE");
urj_tap_chain_shift_instructions(jc);
urj_tap_chain_shift_data_registers(jc, 1);
id = urj_tap_register_get_value(p->active_instruction->data_register->out);
printf("Found device ID: 0x%08x\n", id);
urj_part_set_instruction(p, "USER2");
urj_tap_chain_shift_instructions(jc);

return 0;
}

static int jtag_reset(void)
{
}

static int jtag_command(uint8_t op, uint8_t addr, uint64_t *data)
{
urj_part_t *p = urj_tap_chain_active_part(jc);
urj_part_instruction_t *insn;
urj_data_register_t *dr;
uint64_t d = data ? *data : 0;
int rc;

if (!p)
return -1;
insn = p->active_instruction;
if (!insn)
return -1;
dr = insn->data_register;
if (!dr)
return -1;
rc = urj_tap_register_set_value_bit_range(dr->in, op, 1, 0);
if (rc != URJ_STATUS_OK)
return -1;
rc = urj_tap_register_set_value_bit_range(dr->in, d, 65, 2);
if (rc != URJ_STATUS_OK)
return -1;
rc = urj_tap_register_set_value_bit_range(dr->in, addr, 73, 66);
if (rc != URJ_STATUS_OK)
return -1;
rc = urj_tap_chain_shift_data_registers(jc, 1);
if (rc != URJ_STATUS_OK)
return -1;
rc = urj_tap_register_get_value_bit_range(dr->out, 1, 0);
if (data)
*data = urj_tap_register_get_value_bit_range(dr->out, 65, 2);
return rc;
}

static struct backend jtag_backend = {
.init = jtag_init,
.reset = jtag_reset,
.command = jtag_command,
};

static int dmi_read(uint8_t addr, uint64_t *data)
{
int rc;

rc = b->command(1, addr, data);
if (rc < 0)
return rc;
for (;;) {
rc = b->command(0, 0, data);
if (rc < 0)
return rc;
if (rc == 0)
return 0;
if (rc != 3)
fprintf(stderr, "Unknown status code %d !\n", rc);
}
}

static int dmi_write(uint8_t addr, uint64_t data)
{
int rc;

rc = b->command(2, addr, &data);
if (rc < 0)
return rc;
for (;;) {
rc = b->command(0, 0, NULL);
if (rc < 0)
return rc;
if (rc == 0)
return 0;
if (rc != 3)
fprintf(stderr, "Unknown status code %d !\n", rc);
}
}

static void core_status(void)
{
uint64_t stat, nia;
const char *statstr, *statstr2;

check(dmi_read(DBG_CORE_STAT, &stat), "reading core status");
check(dmi_read(DBG_CORE_NIA, &nia), "reading core NIA");

if (debug)
printf("Core status = 0x%llx\n", (unsigned long long)stat);
statstr = "running";
statstr2 = "";
if (stat & DBG_CORE_STAT_STOPPED) {
statstr = "stopped";
if (!(stat & DBG_CORE_STAT_STOPPING))
statstr2 = " (restarting?)";
else if (stat & DBG_CORE_STAT_TERM)
statstr2 = " (terminated)";
} else if (stat & DBG_CORE_STAT_STOPPING)
statstr = "stopping";
else if (stat & DBG_CORE_STAT_TERM)
statstr = "odd state (TERM but no STOP)";
printf("Core: %s%s\n", statstr, statstr2);
printf(" NIA: %016llx\n", (unsigned long long)nia);
}

static void core_stop(void)
{
check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_STOP), "stopping core");
}

static void core_start(void)
{
check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_START), "starting core");
}

static void core_reset(void)
{
check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_START), "resetting core");
}

static void core_step(void)
{
uint64_t stat;

check(dmi_read(DBG_CORE_STAT, &stat), "reading core status");

if (!(stat & DBG_CORE_STAT_STOPPED)) {
printf("Core not stopped !\n");
return;
}
check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_STEP), "stepping core");
}

static void icache_reset(void)
{
check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_ICRESET), "resetting icache");
}

static void mem_read(uint64_t addr, uint64_t count)
{
uint64_t data;
int i, rc;

rc = dmi_write(2, 0x7ff);
if (rc < 0)
return;
rc = dmi_write(0, addr);
if (rc < 0)
return;
for (i = 0; i < count; i++) {
rc = dmi_read(1, &data);
if (rc < 0)
return;
printf("%016llx: %016llx\n",
(unsigned long long)addr,
(unsigned long long)data);
addr += 8;
}
}

static void load(const char *filename, uint64_t addr)
{
uint64_t data;
int fd, rc, count;

fd = open(filename, O_RDONLY);
if (fd < 0) {
fprintf(stderr, "Failed to open '%s': %s\n", filename, strerror(errno));
exit(1);
}
// XX dumb, do better
rc = dmi_write(2, 0x7ff);
if (rc < 0)
return;
rc = dmi_write(0, addr);
if (rc < 0)
return;
count = 0;
for (;;) {
data = 0;
rc = read(fd, &data, 8);
if (rc <= 0)
break;
// if (rc < 8) XXX fixup endian ?
dmi_write(1, data);
count += 8;
if (!(count % 1024))
printf("%x...\n", count);
}
printf("%x done.\n", count);
}

static void usage(const char *cmd)
{
fprintf(stderr, "Usage: %s <command> <args>\n", cmd);
exit(1);
}

int main(int argc, char *argv[])
{
const char *progname = argv[0];
const char *target = NULL;
int rc, i = 1;

b = NULL;

while(1) {
int c, oindex;
static struct option lopts[] = {
{ "help", no_argument, 0, 'h' },
{ "backend", required_argument, 0, 'b' },
{ "target", required_argument, 0, 't' },
{ "debug", no_argument, 0, 'd' },
{ 0, 0, 0, 0 }
};
c = getopt_long(argc, argv, "dhb:t:", lopts, &oindex);
if (c < 0)
break;
switch(c) {
case 'h':
usage(progname);
break;
case 'b':
if (strcmp(optarg, "sim") == 0)
b = &sim_backend;
else if (strcmp(optarg, "jtag") == 0)
b = &jtag_backend;
else {
fprintf(stderr, "Unknown backend %s\n", optarg);
exit(1);
}
break;
case 't':
target = optarg;
break;
case 'd':
debug = true;
}
}

if (b == NULL) {
fprintf(stderr, "No backend selected\n");
exit(1);
}

rc = b->init(target);
if (rc < 0)
exit(1);
for (i = optind; i < argc; i++) {
if (strcmp(argv[i], "dmiread") == 0) {
uint8_t addr;
uint64_t data;

if ((i+1) >= argc)
usage(argv[0]);
addr = strtoul(argv[++i], NULL, 16);
dmi_read(addr, &data);
printf("%02x: %016llx\n", addr, (unsigned long long)data);
} else if (strcmp(argv[i], "dmiwrite") == 0) {
uint8_t addr;
uint64_t data;

if ((i+2) >= argc)
usage(argv[0]);
addr = strtoul(argv[++i], NULL, 16);
data = strtoul(argv[++i], NULL, 16);
dmi_write(addr, data);
} else if (strcmp(argv[i], "creset") == 0) {
core_reset();
} else if (strcmp(argv[i], "stop") == 0) {
core_stop();
} else if (strcmp(argv[i], "start") == 0) {
core_start();
} else if (strcmp(argv[i], "step") == 0) {
core_step();
} else if (strcmp(argv[i], "quit") == 0) {
dmi_write(0xff, 0);
} else if (strcmp(argv[i], "status") == 0) {
/* do nothing, always done below */
} else if (strcmp(argv[i], "mr") == 0) {
uint64_t addr, count = 1;

if ((i+1) >= argc)
usage(argv[0]);
addr = strtoul(argv[++i], NULL, 16);
if (((i+1) < argc) && isdigit(argv[i+1][0]))
count = strtoul(argv[++i], NULL, 16);
mem_read(addr, count);
} else if (strcmp(argv[i], "load") == 0) {
const char *filename;
uint64_t addr = 0;

if ((i+1) >= argc)
usage(argv[0]);
filename = argv[++i];
if (((i+1) < argc) && isdigit(argv[i+1][0]))
addr = strtoul(argv[++i], NULL, 16);
load(filename, addr);
} else {
fprintf(stderr, "Unknown command %s\n", argv[i]);
exit(1);
}
}
core_status();
return 0;
}

@ -0,0 +1,39 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.ALL;

library unisim;
use unisim.vcomponents.all;

entity BSCANE2 is
generic(jtag_chain: INTEGER);
port(capture : out std_logic;
drck : out std_logic;
reset : out std_logic;
runtest : out std_logic;
sel : out std_logic;
shift : out std_logic;
tck : out std_logic;
tdi : out std_logic;
tms : out std_logic;
update : out std_logic;
tdo : in std_logic
);
end BSCANE2;

architecture behaviour of BSCANE2 is
alias j : glob_jtag_t is glob_jtag;
begin
sel <= j.sel(jtag_chain);
tck <= j.tck;
drck <= tck and sel and (capture or shift);
capture <= j.capture;
reset <= j.reset;
runtest <= j.runtest;
shift <= j.shift;
tdi <= j.tdi;
tms <= j.tms;
update <= j.update;
j.tdo <= tdo;
end architecture behaviour;

@ -0,0 +1,12 @@
library IEEE;
use IEEE.std_logic_1164.all;

entity BUFG is
port(I : in std_logic;
O : out std_logic
);
end BUFG;
architecture behaviour of BUFG is
begin
O <= I;
end architecture behaviour;

@ -0,0 +1,45 @@
library IEEE;
use IEEE.std_logic_1164.all;

package vcomponents is

-- Global JTAG signals. Xilinx implementation hooks that up to
-- their internal JTAG tap, we just expose them for the testbench
-- to use. These are used by our BSCANE2 block.
--
type glob_jtag_t is record
reset : std_logic;
tck : std_logic;
tdo : std_logic;
tdi : std_logic;
tms : std_logic;
sel : std_logic_vector(4 downto 1);
capture : std_logic;
shift : std_logic;
update : std_logic;
runtest : std_logic;
end record glob_jtag_t;
signal glob_jtag : glob_jtag_t;

component BSCANE2 is
generic(jtag_chain: integer);
port(capture : out std_logic;
drck : out std_logic;
reset : out std_logic;
runtest : out std_logic;
sel : out std_logic;
shift : out std_logic;
tck : out std_logic;
tdi : out std_logic;
tms : out std_logic;
update : out std_logic;
tdo : in std_logic
);
end component BSCANE2;
component BUFG is
port(I : in std_logic;
O : out std_logic
);
end component BUFG;
end package vcomponents;

@ -0,0 +1,105 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.sim_jtag_socket.all;

library unisim;
use unisim.vcomponents.all;

entity sim_jtag is
end sim_jtag;

architecture behaviour of sim_jtag is
begin
jtag: process
-- Global JTAG signals (used by BSCANE2 inside dmi_dtm
alias j : glob_jtag_t is glob_jtag;

-- Super fast JTAG clock for sim. For debugging the JTAG module,
-- change this to something much larger, for example 60ns, to reflect
-- more realistic conditions.
constant jclk_period : time := 1 ns;

-- Polling the socket... this could be made slower when nothing
-- is connected once we have that indication from the C code.
constant poll_period : time := 100 ns;

-- Number of dummy JTAG clocks to inject after a command. (I haven't
-- got that working with UrJtag but at least with sim, having the
-- right number here allows the synchronizers time to complete a
-- command on the first message exchange, thus avoiding the need
-- for two full shifts for a response.
constant dummy_clocks : integer := 80;

procedure clock(count: in INTEGER) is
begin
for i in 1 to count loop
j.tck <= '0';
wait for jclk_period/2;
j.tck <= '1';
wait for jclk_period/2;
end loop;
end procedure clock;

procedure clock_command(cmd: in std_ulogic_vector;
rsp: out std_ulogic_vector) is
begin
j.capture <= '1';
clock(1);
j.capture <= '0';
clock(1);
j.shift <= '1';
for i in 0 to cmd'length-1 loop
j.tdi <= cmd(i);
rsp := rsp(1 to rsp'length-1) & j.tdo;
clock(1);
end loop;
j.shift <= '0';
j.update <= '1';
clock(1);
j.update <= '0';
clock(1);
end procedure clock_command;

variable cmd : std_ulogic_vector(0 to 247);
variable rsp : std_ulogic_vector(0 to 247);
variable msize : std_ulogic_vector(7 downto 0);
variable size : integer;

begin

-- init & reset
j.reset <= '1';
j.sel <= "0000";
j.capture <= '0';
j.update <= '0';
j.shift <= '0';
j.tdi <= '0';
j.tms <= '0';
j.runtest <= '0';
clock(5);
j.reset <= '0';
clock(5);

-- select chain USER2
-- XXX TODO: Send that via protocol instead
-- XXX TODO: Also maybe have the C code tell us if connected or not
-- and clock when connected.
j.sel <= "0010";
clock(1);
rsp := (others => '0');
while true loop
wait for poll_period;
sim_jtag_read_msg(cmd, msize);
size := to_integer(unsigned(msize));
if size /= 0 and size < 248 then
clock_command(cmd(0 to size-1),
rsp(0 to size-1));
sim_jtag_write_msg(rsp, msize);
clock(dummy_clocks);
end if;
end loop;
end process;
end;

@ -0,0 +1,24 @@
library ieee;
use ieee.std_logic_1164.all;

package sim_jtag_socket is
procedure sim_jtag_read_msg(out_msg : out std_ulogic_vector(247 downto 0);
out_size : out std_ulogic_vector(7 downto 0));
attribute foreign of sim_jtag_read_msg : procedure is "VHPIDIRECT sim_jtag_read_msg";
procedure sim_jtag_write_msg(in_msg : in std_ulogic_vector(247 downto 0);
in_size : in std_ulogic_vector(7 downto 0));
attribute foreign of sim_jtag_write_msg : procedure is "VHPIDIRECT sim_jtag_write_msg";
end sim_jtag_socket;

package body sim_jtag_socket is
procedure sim_jtag_read_msg(out_msg : out std_ulogic_vector(247 downto 0);
out_size : out std_ulogic_vector(7 downto 0)) is
begin
assert false report "VHPI" severity failure;
end sim_jtag_read_msg;
procedure sim_jtag_write_msg(in_msg : in std_ulogic_vector(247 downto 0);
in_size : in std_ulogic_vector(7 downto 0)) is
begin
assert false report "VHPI" severity failure;
end sim_jtag_write_msg;
end sim_jtag_socket;

@ -0,0 +1,222 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <poll.h>
#include <signal.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>

/* XXX Make that some parameter */
#define TCP_PORT 13245
#define MAX_PACKET 32

#define vhpi0 2 /* forcing 0 */
#define vhpi1 3 /* forcing 1 */

static void to_std_logic_vector(unsigned long val, unsigned char *p,
unsigned long len)
{
if (len > 64) {
fprintf(stderr, "%s: invalid length %lu\n", __func__, len);
exit(1);
}

for (unsigned long i = 0; i < len; i++) {
if ((val >> (len-1-i) & 1))
*p = vhpi1;
else
*p = vhpi0;

p++;
}
}

static uint64_t from_std_logic_vector(unsigned char *p, unsigned long len)
{
unsigned long ret = 0;

if (len > 64) {
fprintf(stderr, "%s: invalid length %lu\n", __func__, len);
exit(1);
}

for (unsigned long i = 0; i < len; i++) {
unsigned char bit;

if (*p == vhpi0) {
bit = 0;
} else if (*p == vhpi1) {
bit = 1;
} else {
fprintf(stderr, "%s: bad bit %d\n", __func__, *p);
bit = 0;
}

ret = (ret << 1) | bit;
p++;
}

return ret;
}

static int fd = -1;
static int cfd = -1;

static void open_socket(void)
{
struct sockaddr_in addr;
int opt, rc, flags;

if (fd >= 0 || fd < -1)
return;

signal(SIGPIPE, SIG_IGN);
fd = socket(AF_INET, SOCK_STREAM, 0);
if (fd < 0) {
fprintf(stderr, "Failed to open debug socket !\r\n");
goto fail;
}

rc = 0;
flags = fcntl(fd, F_GETFL);
if (flags >= 0)
rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
if (flags < 0 || rc < 0) {
fprintf(stderr, "Failed to configure debug socket !\r\n");
}

memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_port = htons(TCP_PORT);
addr.sin_addr.s_addr = htonl(INADDR_ANY);
opt = 1;
setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
rc = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
if (rc < 0) {
fprintf(stderr, "Failed to bind debug socket !\r\n");
goto fail;
}
rc = listen(fd,1);
if (rc < 0) {
fprintf(stderr, "Failed to listen to debug socket !\r\n");
goto fail;
}
fprintf(stderr, "Debug socket ready\r\n");
return;
fail:
if (fd >= 0)
close(fd);
fd = -2;
}

static void check_connection(void)
{
struct sockaddr_in addr;
socklen_t addr_len = sizeof(addr);

cfd = accept(fd, (struct sockaddr *)&addr, &addr_len);
if (cfd < 0)
return;
fprintf(stderr, "Debug client connected !\r\n");
}

void sim_jtag_read_msg(unsigned char *out_msg, unsigned char *out_size)
{
unsigned char data[MAX_PACKET];
unsigned char size = 0;
struct pollfd fdset[1];
int rc, i;

if (fd == -1)
open_socket();
if (fd < 0)
goto finish;
if (cfd < 0)
check_connection();
if (cfd < 0)
goto finish;

memset(fdset, 0, sizeof(fdset));
fdset[0].fd = cfd;
fdset[0].events = POLLIN;
rc = poll(fdset, 1, 0);
if (rc <= 0)
goto finish;
rc = read(cfd, data, MAX_PACKET);
if (rc < 0)
fprintf(stderr, "Debug read error, assuming client disconnected !\r\n");
if (rc == 0)
fprintf(stderr, "Debug client disconnected !\r\n");
if (rc <= 0) {
close(cfd);
cfd = -1;
goto finish;
}

#if 0
fprintf(stderr, "Got message:\n\r");
{
for (i=0; i<rc; i++)
fprintf(stderr, "%02x ", data[i]);
fprintf(stderr, "\n\r");
}
#endif
size = data[0]; /* Size in bits */

/* Special sizes */
if (size == 255) {
/* JTAG reset, message to translate */
goto finish;
}

if (((rc - 1) * 8) < size) {
fprintf(stderr, "Debug short read: %d bytes for %d bits, truncating\r\n",
rc - 1, size);
size = (rc - 1) * 8;
}

for (i = 0; i < size; i++) {
int byte = i >> 3;
int bit = 1 << (i & 7);
out_msg[i] = (data[byte+1] & bit) ? vhpi1 : vhpi0;
}
finish:
to_std_logic_vector(size, out_size, 8);
}

void sim_jtag_write_msg(unsigned char *in_msg, unsigned char *in_size)
{
unsigned char data[MAX_PACKET];
unsigned char size;
int rc, i;

size = from_std_logic_vector(in_size, 8);
data[0] = size;
for (i = 0; i < size; i++) {
int byte = i >> 3;
int bit = 1 << (i & 7);
if (in_msg[i] == vhpi1)
data[byte+1] |= bit;
else
data[byte+1] &= ~bit;
}
rc = (size + 7) / 8;

#if 0
fprintf(stderr, "Sending response:\n\r");
{
for (i=0; i<rc; i++)
fprintf(stderr, "%02x ", data[i]);
fprintf(stderr, "\n\r");
}
#endif

rc = write(cfd, data, rc);
if (rc < 0)
fprintf(stderr, "Debug write error, ignoring\r\n");
}

@ -1,8 +1,9 @@
library ieee; library ieee;
use ieee.std_logic_1164.all; use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all; use ieee.math_real.all;

use std.textio.all; use std.textio.all;
use std.env.stop;


library work; library work;
use work.common.all; use work.common.all;
@ -24,7 +25,10 @@ entity soc is


-- UART0 signals: -- UART0 signals:
uart0_txd : out std_ulogic; uart0_txd : out std_ulogic;
uart0_rxd : in std_ulogic uart0_rxd : in std_ulogic;

-- Misc (to use for things like LEDs)
core_terminated : out std_ulogic
); );
end entity soc; end entity soc;


@ -35,6 +39,8 @@ architecture behaviour of soc is
signal wishbone_dcore_out : wishbone_master_out; signal wishbone_dcore_out : wishbone_master_out;
signal wishbone_icore_in : wishbone_slave_out; signal wishbone_icore_in : wishbone_slave_out;
signal wishbone_icore_out : wishbone_master_out; signal wishbone_icore_out : wishbone_master_out;
signal wishbone_debug_in : wishbone_slave_out;
signal wishbone_debug_out : wishbone_master_out;


-- Wishbone master (output of arbiter): -- Wishbone master (output of arbiter):
signal wb_master_in : wishbone_slave_out; signal wb_master_in : wishbone_slave_out;
@ -50,10 +56,21 @@ architecture behaviour of soc is
signal wb_bram_out : wishbone_slave_out; signal wb_bram_out : wishbone_slave_out;
constant mem_adr_bits : positive := positive(ceil(log2(real(MEMORY_SIZE)))); constant mem_adr_bits : positive := positive(ceil(log2(real(MEMORY_SIZE))));


-- Debug signals (used in SIM only) -- DMI debug bus signals
signal registers : regfile; signal dmi_addr : std_ulogic_vector(7 downto 0);
signal terminate : std_ulogic; signal dmi_din : std_ulogic_vector(63 downto 0);
signal dmi_dout : std_ulogic_vector(63 downto 0);
signal dmi_req : std_ulogic;
signal dmi_wr : std_ulogic;
signal dmi_ack : std_ulogic;


-- Per slave DMI signals
signal dmi_wb_dout : std_ulogic_vector(63 downto 0);
signal dmi_wb_req : std_ulogic;
signal dmi_wb_ack : std_ulogic;
signal dmi_core_dout : std_ulogic_vector(63 downto 0);
signal dmi_core_req : std_ulogic;
signal dmi_core_ack : std_ulogic;
begin begin


-- Processor core -- Processor core
@ -68,21 +85,22 @@ begin
wishbone_insn_out => wishbone_icore_out, wishbone_insn_out => wishbone_icore_out,
wishbone_data_in => wishbone_dcore_in, wishbone_data_in => wishbone_dcore_in,
wishbone_data_out => wishbone_dcore_out, wishbone_data_out => wishbone_dcore_out,
registers => registers, dmi_addr => dmi_addr(3 downto 0),
terminate_out => terminate dmi_dout => dmi_core_dout,
dmi_din => dmi_dout,
dmi_wr => dmi_wr,
dmi_ack => dmi_core_ack,
dmi_req => dmi_core_req
); );


-- Wishbone bus master arbiter & mux -- Wishbone bus master arbiter & mux
wishbone_arbiter_0: entity work.wishbone_arbiter wishbone_arbiter_0: entity work.wishbone_arbiter
port map( port map(
clk => system_clk, clk => system_clk, rst => rst,
rst => rst, wb1_in => wishbone_dcore_out, wb1_out => wishbone_dcore_in,
wb1_in => wishbone_dcore_out, wb2_in => wishbone_icore_out, wb2_out => wishbone_icore_in,
wb1_out => wishbone_dcore_in, wb3_in => wishbone_debug_out, wb3_out => wishbone_debug_in,
wb2_in => wishbone_icore_out, wb_out => wb_master_out, wb_in => wb_master_in
wb2_out => wishbone_icore_in,
wb_out => wb_master_out,
wb_in => wb_master_in
); );


-- Wishbone slaves address decoder & mux -- Wishbone slaves address decoder & mux
@ -122,20 +140,6 @@ begin
end process slave_intercon; end process slave_intercon;


-- Simulated memory and UART -- Simulated memory and UART
sim_terminate_test: if SIM generate

-- Dump registers if core terminates
dump_registers: process(all)
begin
if terminate = '1' then
loop_0: for i in 0 to 31 loop
report "REG " & to_hstring(registers(i));
end loop loop_0;
assert false report "end of test" severity failure;
end if;
end process;

end generate;


-- UART0 wishbone slave -- UART0 wishbone slave
-- XXX FIXME: Need a proper wb64->wb8 adapter that -- XXX FIXME: Need a proper wb64->wb8 adapter that
@ -174,4 +178,81 @@ begin
wishbone_out => wb_bram_out wishbone_out => wb_bram_out
); );


-- DMI(debug bus) <-> JTAG bridge
dtm: entity work.dmi_dtm
generic map(
ABITS => 8,
DBITS => 64
)
port map(
sys_clk => system_clk,
sys_reset => rst,
dmi_addr => dmi_addr,
dmi_din => dmi_din,
dmi_dout => dmi_dout,
dmi_req => dmi_req,
dmi_wr => dmi_wr,
dmi_ack => dmi_ack
);

-- DMI interconnect
dmi_intercon: process(dmi_addr, dmi_req,
dmi_wb_ack, dmi_wb_dout,
dmi_core_ack, dmi_core_dout)

-- DMI address map (each address is a full 64-bit register)
--
-- Offset: Size: Slave:
-- 0 4 Wishbone
-- 10 16 Core

type slave_type is (SLAVE_WB,
SLAVE_CORE,
SLAVE_NONE);
variable slave : slave_type;
begin
-- Simple address decoder
slave := SLAVE_NONE;
if std_match(dmi_addr, "000000--") then
slave := SLAVE_WB;
elsif std_match(dmi_addr, "0001----") then
slave := SLAVE_CORE;
end if;

-- DMI muxing
dmi_wb_req <= '0';
dmi_core_req <= '0';
case slave is
when SLAVE_WB =>
dmi_wb_req <= dmi_req;
dmi_ack <= dmi_wb_ack;
dmi_din <= dmi_wb_dout;
when SLAVE_CORE =>
dmi_core_req <= dmi_req;
dmi_ack <= dmi_core_ack;
dmi_din <= dmi_core_dout;
when others =>
dmi_ack <= dmi_req;
dmi_din <= (others => '1');
end case;

-- SIM magic exit
if SIM and dmi_req = '1' and dmi_addr = "11111111" and dmi_wr = '1' then
stop;
end if;
end process;

-- Wishbone debug master (TODO: Add a DMI address decoder)
wishbone_debug: entity work.wishbone_debug_master
port map(clk => system_clk, rst => rst,
dmi_addr => dmi_addr(1 downto 0),
dmi_dout => dmi_wb_dout,
dmi_din => dmi_dout,
dmi_wr => dmi_wr,
dmi_ack => dmi_wb_ack,
dmi_req => dmi_wb_req,
wb_in => wishbone_debug_in,
wb_out => wishbone_debug_out);


end architecture behaviour; end architecture behaviour;

@ -4,54 +4,76 @@ use ieee.std_logic_1164.all;
library work; library work;
use work.wishbone_types.all; use work.wishbone_types.all;


-- TODO: Use an array of master/slaves with parametric size
entity wishbone_arbiter is entity wishbone_arbiter is
port ( port (clk : in std_ulogic;
clk : in std_ulogic; rst : in std_ulogic;
rst : in std_ulogic;


wb1_in : in wishbone_master_out; wb1_in : in wishbone_master_out;
wb1_out : out wishbone_slave_out; wb1_out : out wishbone_slave_out;


wb2_in : in wishbone_master_out; wb2_in : in wishbone_master_out;
wb2_out : out wishbone_slave_out; wb2_out : out wishbone_slave_out;


wb_out : out wishbone_master_out; wb3_in : in wishbone_master_out;
wb_in : in wishbone_slave_out wb3_out : out wishbone_slave_out;
);
wb_out : out wishbone_master_out;
wb_in : in wishbone_slave_out
);
end wishbone_arbiter; end wishbone_arbiter;


architecture behave of wishbone_arbiter is architecture behave of wishbone_arbiter is
type wishbone_arbiter_state_t is (IDLE, WB1_BUSY, WB2_BUSY); type wishbone_arbiter_state_t is (IDLE, WB1_BUSY, WB2_BUSY, WB3_BUSY);
signal state : wishbone_arbiter_state_t := IDLE; signal state : wishbone_arbiter_state_t := IDLE;
begin begin
wb1_out <= wb_in when state = WB1_BUSY else wishbone_slave_out_init;
wb2_out <= wb_in when state = WB2_BUSY else wishbone_slave_out_init;


wb_out <= wb1_in when state = WB1_BUSY else wb2_in when state = WB2_BUSY else wishbone_master_out_init; wishbone_muxes: process(state, wb_in, wb1_in, wb2_in, wb3_in)
begin
-- Requests from masters are fully muxed
wb_out <= wb1_in when state = WB1_BUSY else
wb2_in when state = WB2_BUSY else
wb3_in when state = WB3_BUSY else
wishbone_master_out_init;

-- Responses from slave don't need to mux the data bus
wb1_out.dat <= wb_in.dat;
wb2_out.dat <= wb_in.dat;
wb3_out.dat <= wb_in.dat;
wb1_out.ack <= wb_in.ack when state = WB1_BUSY else '0';
wb2_out.ack <= wb_in.ack when state = WB2_BUSY else '0';
wb3_out.ack <= wb_in.ack when state = WB3_BUSY else '0';
end process;


wishbone_arbiter_process: process(clk) wishbone_arbiter_process: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if rst = '1' then if rst = '1' then
state <= IDLE; state <= IDLE;
else else
case state is case state is
when IDLE => when IDLE =>
if wb1_in.cyc = '1' then if wb1_in.cyc = '1' then
state <= WB1_BUSY; state <= WB1_BUSY;
elsif wb2_in.cyc = '1' then elsif wb2_in.cyc = '1' then
state <= WB2_BUSY; state <= WB2_BUSY;
end if; elsif wb3_in.cyc = '1' then
when WB1_BUSY => state <= WB3_BUSY;
if wb1_in.cyc = '0' then end if;
state <= IDLE; when WB1_BUSY =>
end if; if wb1_in.cyc = '0' then
when WB2_BUSY => state <= IDLE;
if wb2_in.cyc = '0' then end if;
state <= IDLE; when WB2_BUSY =>
end if; if wb2_in.cyc = '0' then
end case; state <= IDLE;
end if; end if;
end if; when WB3_BUSY =>
if wb3_in.cyc = '0' then
state <= IDLE;
end if;
end case;
end if;
end if;
end process; end process;
end behave; end behave;

@ -0,0 +1,167 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.wishbone_types.all;

entity wishbone_debug_master is
port(clk : in std_ulogic;
rst : in std_ulogic;

-- Debug bus interface
dmi_addr : in std_ulogic_vector(1 downto 0);
dmi_din : in std_ulogic_vector(63 downto 0);
dmi_dout : out std_ulogic_vector(63 downto 0);
dmi_req : in std_ulogic;
dmi_wr : in std_ulogic;
dmi_ack : out std_ulogic;

-- Wishbone master interface
wb_out : out wishbone_master_out;
wb_in : in wishbone_slave_out
);
end entity wishbone_debug_master;

architecture behaviour of wishbone_debug_master is

-- ** Register offsets definitions. All registers are 64-bit
constant DBG_WB_ADDR : std_ulogic_vector(1 downto 0) := "00";
constant DBG_WB_DATA : std_ulogic_vector(1 downto 0) := "01";
constant DBG_WB_CTRL : std_ulogic_vector(1 downto 0) := "10";
constant DBG_WB_RSVD : std_ulogic_vector(1 downto 0) := "11";

-- CTRL register:
--
-- bit 0..7 : SEL bits (byte enables)
-- bit 8 : address auto-increment
-- bit 10..9 : auto-increment value:
-- 00 - +1
-- 01 - +2
-- 10 - +4
-- 11 - +8

-- ** Address and control registers and read data
signal reg_addr : std_ulogic_vector(63 downto 0);
signal reg_ctrl_out : std_ulogic_vector(63 downto 0);
signal reg_ctrl : std_ulogic_vector(10 downto 0);
signal data_latch : std_ulogic_vector(63 downto 0);
type state_t is (IDLE, WB_CYCLE, DMI_WAIT);
signal state : state_t;

begin

-- Hard wire unused bits to 0
reg_ctrl_out <= (63 downto 11 => '0',
10 downto 0 => reg_ctrl);

-- DMI read data mux
with dmi_addr select dmi_dout <=
reg_addr when DBG_WB_ADDR,
data_latch when DBG_WB_DATA,
reg_ctrl_out when DBG_WB_CTRL,
(others => '0') when others;

-- ADDR and CTRL register writes
reg_write : process(clk)
subtype autoinc_inc_t is integer range 1 to 8;
function decode_autoinc(c : std_ulogic_vector(1 downto 0))
return autoinc_inc_t is
begin
case c is
when "00" => return 1;
when "01" => return 2;
when "10" => return 4;
when "11" => return 8;
-- Below shouldn't be necessary but GHDL complains
when others => return 8;
end case;
end function decode_autoinc;
begin
if rising_edge(clk) then
if (rst) then
reg_addr <= (others => '0');
reg_ctrl <= (others => '0');
else -- Standard register writes
if dmi_req and dmi_wr then
if dmi_addr = DBG_WB_ADDR then
reg_addr <= dmi_din;
elsif dmi_addr = DBG_WB_CTRL then
reg_ctrl <= dmi_din(10 downto 0);
end if;
end if;
-- Address register auto-increment
if state = WB_CYCLE and (wb_in.ack and reg_ctrl(8))= '1' then
reg_addr <= std_ulogic_vector(unsigned(reg_addr) +
decode_autoinc(reg_ctrl(10 downto 9)));
end if;
end if;
end if;
end process;

-- ACK is hard wired to req for register writes. For data read/writes
-- (aka commands), it's sent when the state machine got the WB ack.
--
-- Note: We never set it to 1, we just pass dmi_req back when acking.
-- This fullfills two purposes:
--
-- * Avoids polluting the ack signal when another DMI slave is
-- selected. This allows the decoder to just OR all the acks
-- together rather than mux them.
--
-- * Makes ack go down on the same cycle as req goes down, thus
-- saving a clock cycle. This is safe because we know that
-- the state machine will no longer be in DMI_WAIT state on
-- the next cycle, so we won't be bouncing the signal back up.
--
dmi_ack <= dmi_req when (dmi_addr /= DBG_WB_DATA or state = DMI_WAIT) else '0';

-- Some WB signals are direct wires from registers or DMI
wb_out.adr <= reg_addr;
wb_out.dat <= dmi_din;
wb_out.sel <= reg_ctrl(7 downto 0);
wb_out.we <= dmi_wr;

-- We always move WB cyc and stb simultaneously (no pipelining yet...)
wb_out.cyc <= '1' when state = WB_CYCLE else '0';
wb_out.stb <= '1' when state = WB_CYCLE else '0';

-- Data latch. WB will take the read data away as soon as the cycle
-- terminates but we must maintain it on DMI until req goes down, so
-- we latch it. (Q: Should we move that latch to dmi_dtm itself ?)
--
latch_reads : process(clk)
begin
if rising_edge(clk) then
if state = WB_CYCLE and wb_in.ack = '1' and dmi_wr = '0' then
data_latch <= wb_in.dat;
end if;
end if;
end process;

-- Command state machine (generate wb_cyc)
wb_trigger : process(clk)
begin
if rising_edge(clk) then
if (rst) then
state <= IDLE;
else
case state is
when IDLE =>
if dmi_req = '1' and dmi_addr = DBG_WB_DATA then
state <= WB_CYCLE;
end if;
when WB_CYCLE =>
if wb_in.ack then
state <= DMI_WAIT;
end if;
when DMI_WAIT =>
if dmi_req = '0' then
state <= IDLE;
end if;
end case;
end if;
end if;
end process;
end architecture behaviour;
Loading…
Cancel
Save