Add core logging

This logs 256 bits of data per cycle to a ring buffer in BRAM.  The
data collected can be read out through 2 new SPRs or through the
debug interface.

The new SPRs are LOG_ADDR (724) and LOG_DATA (725).  LOG_ADDR contains
the buffer write pointer in the upper 32 bits (in units of entries,
i.e. 32 bytes) and the read pointer in the lower 32 bits (in units of
doublewords, i.e. 8 bytes).  Reading LOG_DATA gives the doubleword
from the buffer at the read pointer and increments the read pointer.
Setting bit 31 of LOG_ADDR inhibits the trace log system from writing
to the log buffer, so the contents are stable and can be read.

There are two new debug addresses which function similarly to the
LOG_ADDR and LOG_DATA SPRs.  The log is frozen while either or both of
the LOG_ADDR SPR bit 31 or the debug LOG_ADDR register bit 31 are set.

The buffer defaults to 2048 entries, i.e. 64kB.  The size is set by
the LOG_LENGTH generic on the core_debug module.  Software can
determine the length of the buffer because the length is ORed into the
buffer write pointer in the upper 32 bits of LOG_ADDR.  Hence the
length of the buffer can be calculated as 1 << (31 - clz(LOG_ADDR)).

There is a program to format the log entries in a somewhat readable
fashion in scripts/fmt_log/fmt_log.c.  The log_entry struct in that
file describes the layout of the bits in the log entries.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
jtag-port
Paul Mackerras 5 years ago
parent aab84acda8
commit 49a4d9f67a

@ -128,6 +128,12 @@ architecture behave of core is
-- Debug status
signal dbg_core_is_stopped: std_ulogic;

-- Logging signals
signal log_data : std_ulogic_vector(255 downto 0);
signal log_rd_addr : std_ulogic_vector(31 downto 0);
signal log_wr_addr : std_ulogic_vector(31 downto 0);
signal log_rd_data : std_ulogic_vector(63 downto 0);

function keep_h(disable : boolean) return string is
begin
if disable then
@ -183,7 +189,8 @@ begin
flush_in => flush,
stop_in => dbg_core_stop,
e_in => execute1_to_fetch1,
i_out => fetch1_to_icache
i_out => fetch1_to_icache,
log_out => log_data(42 downto 0)
);

fetch1_stall_in <= icache_stall_out or decode2_stall_out;
@ -205,7 +212,8 @@ begin
inval_in => dbg_icache_rst or ex1_icache_inval,
stall_out => icache_stall_out,
wishbone_out => wishbone_insn_out,
wishbone_in => wishbone_insn_in
wishbone_in => wishbone_insn_in,
log_out => log_data(96 downto 43)
);

fetch2_0: entity work.fetch2
@ -227,7 +235,8 @@ begin
stall_in => decode1_stall_in,
flush_in => flush,
f_in => fetch2_to_decode1,
d_out => decode1_to_decode2
d_out => decode1_to_decode2,
log_out => log_data(109 downto 97)
);

decode1_stall_in <= decode2_stall_out;
@ -249,7 +258,8 @@ begin
r_in => register_file_to_decode2,
r_out => decode2_to_register_file,
c_in => cr_file_to_decode2,
c_out => decode2_to_cr_file
c_out => decode2_to_cr_file,
log_out => log_data(119 downto 110)
);
decode2_stall_in <= ex1_stall_out or ls1_stall_out;

@ -267,7 +277,8 @@ begin
dbg_gpr_addr => dbg_gpr_addr,
dbg_gpr_data => dbg_gpr_data,
sim_dump => terminate,
sim_dump_done => sim_cr_dump
sim_dump_done => sim_cr_dump,
log_out => log_data(255 downto 185)
);

cr_file_0: entity work.cr_file
@ -279,7 +290,8 @@ begin
d_in => decode2_to_cr_file,
d_out => cr_file_to_decode2,
w_in => writeback_to_cr_file,
sim_dump => sim_cr_dump
sim_dump => sim_cr_dump,
log_out => log_data(184 downto 172)
);

execute1_0: entity work.execute1
@ -299,7 +311,11 @@ begin
e_out => execute1_to_writeback,
icache_inval => ex1_icache_inval,
dbg_msr_out => msr,
terminate_out => terminate
terminate_out => terminate,
log_out => log_data(134 downto 120),
log_rd_addr => log_rd_addr,
log_rd_data => log_rd_data,
log_wr_addr => log_wr_addr
);

loadstore1_0: entity work.loadstore1
@ -314,7 +330,8 @@ begin
m_out => loadstore1_to_mmu,
m_in => mmu_to_loadstore1,
dc_stall => dcache_stall_out,
stall_out => ls1_stall_out
stall_out => ls1_stall_out,
log_out => log_data(149 downto 140)
);

mmu_0: entity work.mmu
@ -343,7 +360,8 @@ begin
m_out => dcache_to_mmu,
stall_out => dcache_stall_out,
wishbone_in => wishbone_data_in,
wishbone_out => wishbone_data_out
wishbone_out => wishbone_data_out,
log_out => log_data(171 downto 152)
);

writeback_0: entity work.writeback
@ -356,6 +374,9 @@ begin
complete_out => complete
);

log_data(151 downto 150) <= "00";
log_data(139 downto 135) <= "00000";

debug_0: entity work.core_debug
port map (
clk => clk,
@ -377,6 +398,10 @@ begin
dbg_gpr_ack => dbg_gpr_ack,
dbg_gpr_addr => dbg_gpr_addr,
dbg_gpr_data => dbg_gpr_data,
log_data => log_data,
log_read_addr => log_rd_addr,
log_read_data => log_rd_data,
log_write_addr => log_wr_addr,
terminated_out => terminated_out
);


@ -3,9 +3,14 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.utils.all;
use work.common.all;

entity core_debug is
generic (
-- Length of log buffer
LOG_LENGTH : positive := 2048
);
port (
clk : in std_logic;
rst : in std_logic;
@ -34,6 +39,12 @@ entity core_debug is
dbg_gpr_addr : out gspr_index_t;
dbg_gpr_data : in std_ulogic_vector(63 downto 0);

-- Core logging data
log_data : in std_ulogic_vector(255 downto 0);
log_read_addr : in std_ulogic_vector(31 downto 0);
log_read_data : out std_ulogic_vector(63 downto 0);
log_write_addr : out std_ulogic_vector(31 downto 0);

-- Misc
terminated_out : out std_ulogic
);
@ -77,6 +88,10 @@ architecture behave of core_debug is
-- GSPR register data
constant DBG_CORE_GSPR_DATA : std_ulogic_vector(3 downto 0) := "0101";

-- Log buffer address and data registers
constant DBG_CORE_LOG_ADDR : std_ulogic_vector(3 downto 0) := "0110";
constant DBG_CORE_LOG_DATA : std_ulogic_vector(3 downto 0) := "0111";

-- Some internal wires
signal stat_reg : std_ulogic_vector(63 downto 0);

@ -89,6 +104,38 @@ architecture behave of core_debug is
signal do_gspr_rd : std_ulogic;
signal gspr_index : gspr_index_t;

-- Logging RAM
constant LOG_INDEX_BITS : natural := log2(LOG_LENGTH);
subtype log_ptr_t is unsigned(LOG_INDEX_BITS - 1 downto 0);
type log_array_t is array(0 to LOG_LENGTH - 1) of std_ulogic_vector(255 downto 0);
signal log_array : log_array_t;
signal log_rd_ptr : log_ptr_t;
signal log_wr_ptr : log_ptr_t;
signal log_toggle : std_ulogic;
signal log_wr_enable : std_ulogic;
signal log_rd_ptr_latched : log_ptr_t;
signal log_rd : std_ulogic_vector(255 downto 0);
signal log_dmi_addr : std_ulogic_vector(31 downto 0);
signal log_dmi_data : std_ulogic_vector(63 downto 0);
signal do_dmi_log_rd : std_ulogic;
signal log_dmi_reading : std_ulogic;
signal log_dmi_read_done : std_ulogic;
signal dmi_read_log_data : std_ulogic;
signal dmi_read_log_data_1 : std_ulogic;

function select_dword(data : std_ulogic_vector(255 downto 0);
addr : std_ulogic_vector(31 downto 0)) return std_ulogic_vector is
variable firstbit : integer;
begin
firstbit := to_integer(unsigned(addr(1 downto 0))) * 64;
return data(firstbit + 63 downto firstbit);
end;

attribute ram_style : string;
attribute ram_style of log_array : signal is "block";
attribute ram_decomp : string;
attribute ram_decomp of log_array : signal is "power";

begin
-- Single cycle register accesses on DMI except for GSPR data
dmi_ack <= dmi_req when dmi_addr /= DBG_CORE_GSPR_DATA
@ -108,6 +155,8 @@ begin
nia when DBG_CORE_NIA,
msr when DBG_CORE_MSR,
dbg_gpr_data when DBG_CORE_GSPR_DATA,
log_write_addr & log_dmi_addr when DBG_CORE_LOG_ADDR,
log_dmi_data when DBG_CORE_LOG_DATA,
(others => '0') when others;

-- DMI writes
@ -118,6 +167,7 @@ begin
do_step <= '0';
do_reset <= '0';
do_icreset <= '0';
do_dmi_log_rd <= '0';

if (rst) then
stopping <= '0';
@ -151,10 +201,25 @@ begin
end if;
elsif dmi_addr = DBG_CORE_GSPR_INDEX then
gspr_index <= dmi_din(gspr_index_t'left downto 0);
elsif dmi_addr = DBG_CORE_LOG_ADDR then
log_dmi_addr <= dmi_din(31 downto 0);
do_dmi_log_rd <= '1';
end if;
else
report("DMI read from " & to_string(dmi_addr));
end if;

elsif dmi_read_log_data = '0' and dmi_read_log_data_1 = '1' then
-- Increment log_dmi_addr after the end of a read from DBG_CORE_LOG_DATA
log_dmi_addr(LOG_INDEX_BITS + 1 downto 0) <=
std_ulogic_vector(unsigned(log_dmi_addr(LOG_INDEX_BITS+1 downto 0)) + 1);
do_dmi_log_rd <= '1';
end if;
dmi_read_log_data_1 <= dmi_read_log_data;
if dmi_req = '1' and dmi_addr = DBG_CORE_LOG_DATA then
dmi_read_log_data <= '1';
else
dmi_read_log_data <= '0';
end if;

-- Set core stop on terminate. We'll be stopping some time *after*
@ -175,5 +240,51 @@ begin
core_rst <= do_reset;
icache_rst <= do_icreset;
terminated_out <= terminated;

-- Use MSB of read addresses to stop the logging
log_wr_enable <= not (log_read_addr(31) or log_dmi_addr(31));

log_ram: process(clk)
begin
if rising_edge(clk) then
if log_wr_enable = '1' then
log_array(to_integer(log_wr_ptr)) <= log_data;
end if;
log_rd <= log_array(to_integer(log_rd_ptr_latched));
end if;
end process;


log_buffer: process(clk)
variable b : integer;
variable data : std_ulogic_vector(255 downto 0);
begin
if rising_edge(clk) then
if rst = '1' then
log_wr_ptr <= (others => '0');
log_toggle <= '0';
elsif log_wr_enable = '1' then
if log_wr_ptr = to_unsigned(LOG_LENGTH - 1, LOG_INDEX_BITS) then
log_toggle <= not log_toggle;
end if;
log_wr_ptr <= log_wr_ptr + 1;
end if;
if do_dmi_log_rd = '1' then
log_rd_ptr_latched <= unsigned(log_dmi_addr(LOG_INDEX_BITS + 1 downto 2));
else
log_rd_ptr_latched <= unsigned(log_read_addr(LOG_INDEX_BITS + 1 downto 2));
end if;
if log_dmi_read_done = '1' then
log_dmi_data <= select_dword(log_rd, log_dmi_addr);
else
log_read_data <= select_dword(log_rd, log_read_addr);
end if;
log_dmi_read_done <= log_dmi_reading;
log_dmi_reading <= do_dmi_log_rd;
end if;
end process;
log_write_addr(LOG_INDEX_BITS - 1 downto 0) <= std_ulogic_vector(log_wr_ptr);
log_write_addr(LOG_INDEX_BITS) <= '1';
log_write_addr(31 downto LOG_INDEX_BITS + 1) <= (others => '0');
end behave;


@ -18,7 +18,9 @@ entity cr_file is
w_in : in WritebackToCrFileType;

-- debug
sim_dump : in std_ulogic
sim_dump : in std_ulogic;

log_out : out std_ulogic_vector(12 downto 0)
);
end entity cr_file;

@ -27,6 +29,7 @@ architecture behaviour of cr_file is
signal crs_updated : std_ulogic_vector(31 downto 0);
signal xerc : xer_common_t := xerc_init;
signal xerc_updated : xer_common_t;
signal log_data : std_ulogic_vector(12 downto 0);
begin
cr_create_0: process(all)
variable hi, lo : integer := 0;
@ -88,4 +91,14 @@ begin
end process;
end generate;

cr_log: process(clk)
begin
if rising_edge(clk) then
log_data <= w_in.write_cr_enable &
w_in.write_cr_data(31 downto 28) &
w_in.write_cr_mask;
end if;
end process;
log_out <= log_data;

end architecture behaviour;

@ -46,7 +46,9 @@ entity dcache is
stall_out : out std_ulogic;

wishbone_out : out wishbone_master_out;
wishbone_in : in wishbone_slave_out
wishbone_in : in wishbone_slave_out;

log_out : out std_ulogic_vector(19 downto 0)
);
end entity dcache;

@ -419,6 +421,8 @@ architecture rtl of dcache is
ptes(j + TLB_PTE_BITS - 1 downto j) := newpte;
end;

signal log_data : std_ulogic_vector(19 downto 0);

begin

assert LINE_SIZE mod ROW_SIZE = 0 report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE;
@ -1265,4 +1269,22 @@ begin
end if;
end if;
end process;

dcache_log: process(clk)
begin
if rising_edge(clk) then
log_data <= r1.wb.adr(5 downto 3) &
wishbone_in.stall &
wishbone_in.ack &
r1.wb.stb & r1.wb.cyc &
d_out.error &
d_out.valid &
std_ulogic_vector(to_unsigned(op_t'pos(req_op), 3)) &
stall_out &
std_ulogic_vector(to_unsigned(tlb_hit_way, 3)) &
valid_ra &
std_ulogic_vector(to_unsigned(state_t'pos(r1.state), 3));
end if;
end process;
log_out <= log_data;
end;

@ -15,7 +15,9 @@ entity decode1 is
flush_in : in std_ulogic;

f_in : in Fetch2ToDecode1Type;
d_out : out Decode1ToDecode2Type
d_out : out Decode1ToDecode2Type;

log_out : out std_ulogic_vector(12 downto 0)
);
end entity decode1;

@ -352,6 +354,8 @@ architecture behaviour of decode1 is
constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');
constant fetch_fail_inst: decode_rom_t := (LDST, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');

signal log_data : std_ulogic_vector(12 downto 0);

begin
decode1_0: process(clk)
begin
@ -474,4 +478,16 @@ begin
-- Update outputs
d_out <= r;
end process;

dec1_log : process(clk)
begin
if rising_edge(clk) then
log_data <= std_ulogic_vector(to_unsigned(insn_type_t'pos(r.decode.insn_type), 6)) &
r.nia(5 downto 2) &
std_ulogic_vector(to_unsigned(unit_t'pos(r.decode.unit), 2)) &
r.valid;
end if;
end process;
log_out <= log_data;

end architecture behaviour;

@ -32,7 +32,9 @@ entity decode2 is
r_out : out Decode2ToRegisterFileType;

c_in : in CrFileToDecode2Type;
c_out : out Decode2ToCrFileType
c_out : out Decode2ToCrFileType;

log_out : out std_ulogic_vector(9 downto 0)
);
end entity decode2;

@ -43,6 +45,8 @@ architecture behaviour of decode2 is

signal r, rin : reg_type;

signal log_data : std_ulogic_vector(9 downto 0);

type decode_input_reg_t is record
reg_valid : std_ulogic;
reg : gspr_index_t;
@ -381,4 +385,19 @@ begin
-- Update outputs
e_out <= r.e;
end process;

dec2_log : process(clk)
begin
if rising_edge(clk) then
log_data <= r.e.nia(5 downto 2) &
r.e.valid &
stopped_out &
stall_out &
r.e.bypass_data3 &
r.e.bypass_data2 &
r.e.bypass_data1;
end if;
end process;
log_out <= log_data;

end architecture behaviour;

@ -36,7 +36,12 @@ entity execute1 is
dbg_msr_out : out std_ulogic_vector(63 downto 0);

icache_inval : out std_ulogic;
terminate_out : out std_ulogic
terminate_out : out std_ulogic;

log_out : out std_ulogic_vector(14 downto 0);
log_rd_addr : out std_ulogic_vector(31 downto 0);
log_rd_data : in std_ulogic_vector(63 downto 0);
log_wr_addr : in std_ulogic_vector(31 downto 0)
);
end entity execute1;

@ -53,6 +58,7 @@ architecture behaviour of execute1 is
slow_op_oe : std_ulogic;
slow_op_xerc : xer_common_t;
ldst_nia : std_ulogic_vector(63 downto 0);
log_addr_spr : std_ulogic_vector(31 downto 0);
end record;
constant reg_type_init : reg_type :=
(e => Execute1ToWritebackInit, lr_update => '0',
@ -83,6 +89,11 @@ architecture behaviour of execute1 is
signal x_to_divider: Execute1ToDividerType;
signal divider_to_x: DividerToExecute1Type;

-- signals for logging
signal exception_log : std_ulogic;
signal irq_valid_log : std_ulogic;
signal log_data : std_ulogic_vector(14 downto 0);

type privilege_level is (USER, SUPER);
type op_privilege_array is array(insn_type_t) of privilege_level;
constant op_privilege: op_privilege_array := (
@ -223,6 +234,7 @@ begin
);

dbg_msr_out <= ctrl.msr;
log_rd_addr <= r.log_addr_spr;

a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
@ -767,6 +779,11 @@ begin
result := ctrl.tb;
when SPR_DEC =>
result := ctrl.dec;
when 724 => -- LOG_ADDR SPR
result := log_wr_addr & r.log_addr_spr;
when 725 => -- LOG_DATA SPR
result := log_rd_data;
v.log_addr_spr := std_ulogic_vector(unsigned(r.log_addr_spr) + 1);
when others =>
-- mfspr from unimplemented SPRs should be a nop in
-- supervisor mode and a program interrupt for user mode
@ -840,6 +857,8 @@ begin
case decode_spr_num(e_in.insn) is
when SPR_DEC =>
ctrl_tmp.dec <= c_in;
when 724 => -- LOG_ADDR SPR
v.log_addr_spr := c_in(31 downto 0);
when others =>
-- mtspr to unimplemented SPRs should be a nop in
-- supervisor mode and a program interrupt for user mode
@ -1040,5 +1059,26 @@ begin
l_out <= lv;
e_out <= r.e;
flush_out <= f_out.redirect;

exception_log <= exception;
irq_valid_log <= irq_valid;
end process;

ex1_log : process(clk)
begin
if rising_edge(clk) then
log_data <= ctrl.msr(MSR_EE) & ctrl.msr(MSR_PR) &
ctrl.msr(MSR_IR) & ctrl.msr(MSR_DR) &
exception_log &
irq_valid_log &
std_ulogic_vector(to_unsigned(irq_state_t'pos(ctrl.irq_state), 1)) &
"000" &
r.e.write_enable &
r.e.valid &
f_out.redirect &
stall_out &
flush_out;
end if;
end process;
log_out <= log_data;
end architecture behaviour;

@ -24,7 +24,10 @@ entity fetch1 is
e_in : in Execute1ToFetch1Type;

-- Request to icache
i_out : out Fetch1ToIcacheType
i_out : out Fetch1ToIcacheType;

-- outputs to logger
log_out : out std_ulogic_vector(42 downto 0)
);
end entity fetch1;

@ -35,11 +38,13 @@ architecture behaviour of fetch1 is
end record;
signal r, r_next : Fetch1ToIcacheType;
signal r_int, r_next_int : reg_internal_t;
signal log_nia : std_ulogic_vector(42 downto 0);
begin

regs : process(clk)
begin
if rising_edge(clk) then
log_nia <= r.nia(63) & r.nia(43 downto 2);
if r /= r_next then
report "fetch1 rst:" & std_ulogic'image(rst) &
" IR:" & std_ulogic'image(e_in.virt_mode) &
@ -54,6 +59,7 @@ begin
r_int <= r_next_int;
end if;
end process;
log_out <= log_nia;

comb : process(all)
variable v : Fetch1ToIcacheType;

@ -57,7 +57,9 @@ entity icache is
inval_in : in std_ulogic;

wishbone_out : out wishbone_master_out;
wishbone_in : in wishbone_slave_out
wishbone_in : in wishbone_slave_out;

log_out : out std_ulogic_vector(53 downto 0)
);
end entity icache;

@ -198,6 +200,9 @@ architecture rtl of icache is
signal priv_fault : std_ulogic;
signal access_ok : std_ulogic;

-- Output data to logger
signal log_data : std_ulogic_vector(53 downto 0);

-- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t;
signal cache_out : cache_ram_out_t;
@ -674,4 +679,36 @@ begin
end if;
end if;
end process;

data_log: process(clk)
variable lway: way_t;
variable wstate: std_ulogic;
begin
if rising_edge(clk) then
if req_is_hit then
lway := req_hit_way;
else
lway := replace_way;
end if;
wstate := '0';
if r.state /= IDLE then
wstate := '1';
end if;
log_data <= i_out.valid &
i_out.insn &
wishbone_in.ack &
r.wb.adr(5 downto 3) &
r.wb.stb & r.wb.cyc &
wishbone_in.stall &
stall_out &
r.fetch_failed &
r.hit_nia(5 downto 2) &
wstate &
std_ulogic_vector(to_unsigned(lway, 3)) &
req_is_hit & req_is_miss &
access_ok &
ra_valid;
end if;
end process;
log_out <= log_data;
end;

@ -25,7 +25,9 @@ entity loadstore1 is
m_in : in MmuToLoadstore1Type;

dc_stall : in std_ulogic;
stall_out : out std_ulogic
stall_out : out std_ulogic;

log_out : out std_ulogic_vector(9 downto 0)
);
end loadstore1;

@ -80,6 +82,8 @@ architecture behave of loadstore1 is
signal r, rin : reg_stage_t;
signal lsu_sum : std_ulogic_vector(63 downto 0);

signal log_data : std_ulogic_vector(9 downto 0);

-- Generate byte enables from sizes
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
begin
@ -516,4 +520,18 @@ begin

end process;

ls1_log: process(clk)
begin
if rising_edge(clk) then
log_data <= stall_out &
e_out.exception &
l_out.valid &
m_out.valid &
d_out.valid &
m_in.done &
r.dwords_done &
std_ulogic_vector(to_unsigned(state_t'pos(r.state), 3));
end if;
end process;
log_out <= log_data;
end;

@ -24,7 +24,9 @@ entity register_file is

-- debug
sim_dump : in std_ulogic;
sim_dump_done : out std_ulogic
sim_dump_done : out std_ulogic;

log_out : out std_ulogic_vector(70 downto 0)
);
end entity register_file;

@ -34,6 +36,7 @@ architecture behaviour of register_file is
signal rd_port_b : std_ulogic_vector(63 downto 0);
signal dbg_data : std_ulogic_vector(63 downto 0);
signal dbg_ack : std_ulogic;
signal log_data : std_ulogic_vector(70 downto 0);
begin
-- synchronous writes
register_write_0: process(clk)
@ -131,4 +134,13 @@ begin
sim_dump_done <= '0';
end generate;

reg_log: process(clk)
begin
if rising_edge(clk) then
log_data <= w_in.write_data &
w_in.write_enable &
w_in.write_reg;
end if;
end process;
log_out <= log_data;
end architecture behaviour;

@ -0,0 +1,12 @@
CFLAGS = -O2 -g -Wall -std=c99

all: fmt_log

fmt_log: fmt_log.c
$(CC) -o $@ $^ $(CFLAGS)

clean:
rm -f fmt_log
distclean:
rm -f *~

@ -0,0 +1,235 @@
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>

typedef unsigned long long u64;

struct log_entry {
u64 nia_lo: 42;
u64 nia_hi: 1;
u64 ic_ra_valid: 1;
u64 ic_access_ok: 1;
u64 ic_is_miss: 1;
u64 ic_is_hit: 1;
u64 ic_way: 3;
u64 ic_state: 1;
u64 ic_part_nia: 4;
u64 ic_fetch_failed: 1;
u64 ic_stall_out: 1;
u64 ic_wb_stall: 1;
u64 ic_wb_cyc: 1;
u64 ic_wb_stb: 1;
u64 ic_wb_adr: 3;
u64 ic_wb_ack: 1;

u64 ic_insn: 32;
u64 ic_valid: 1;
u64 d1_valid: 1;
u64 d1_unit: 2;
u64 d1_part_nia: 4;
u64 d1_insn_type: 6;
u64 d2_bypass_a: 1;
u64 d2_bypass_b: 1;
u64 d2_bypass_c: 1;
u64 d2_stall_out: 1;
u64 d2_stopped_out: 1;
u64 d2_valid: 1;
u64 d2_part_nia: 4;
u64 e1_flush_out: 1;
u64 e1_stall_out: 1;
u64 e1_redirect: 1;
u64 e1_valid: 1;
u64 e1_write_enable: 1;
u64 e1_unused: 3;

u64 e1_irq_state: 1;
u64 e1_irq: 1;
u64 e1_exception: 1;
u64 e1_msr_dr: 1;
u64 e1_msr_ir: 1;
u64 e1_msr_pr: 1;
u64 e1_msr_ee: 1;
u64 pad1: 5;
u64 ls_state: 3;
u64 ls_dw_done: 1;
u64 ls_min_done: 1;
u64 ls_do_valid: 1;
u64 ls_mo_valid: 1;
u64 ls_lo_valid: 1;
u64 ls_eo_except: 1;
u64 ls_stall_out: 1;
u64 pad2: 2;
u64 dc_state: 3;
u64 dc_ra_valid: 1;
u64 dc_tlb_way: 3;
u64 dc_stall_out: 1;
u64 dc_op: 3;
u64 dc_do_valid: 1;
u64 dc_do_error: 1;
u64 dc_wb_cyc: 1;
u64 dc_wb_stb: 1;
u64 dc_wb_ack: 1;
u64 dc_wb_stall: 1;
u64 dc_wb_adr: 3;
u64 cr_wr_mask: 8;
u64 cr_wr_data: 4;
u64 cr_wr_enable: 1;
u64 reg_wr_reg: 6;
u64 reg_wr_enable: 1;

u64 reg_wr_data;
};

#define FLAG(i, y) (log.i? y: ' ')
#define FLGA(i, y, z) (log.i? y: z)
#define PNIA(f) (full_nia[log.f] & 0xff)

const char *units[4] = { "--", "al", "ls", "?3" };
const char *ops[64] =
{
"illegal", "nop ", "add ", "and ", "attn ", "b ", "bc ", "bcreg ",
"bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "darn ",
"dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "div ", "dive ", "exts ",
"extswsl", "icbi ", "icbt ", "isel ", "isync ", "ld ", "st ", "maddhd ",
"maddhdu", "maddld ", "mcrxr ", "mcrxrx ", "mfcr ", "mfmsr ", "mfspr ", "mod ",
"mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ", "popcnt ",
"prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ",
"shr ", "sync ", "tlbie ", "trap ", "xor ", "ffail ", "?62 ", "?63 "
};

const char *spr_names[13] =
{
"lr ", "ctr", "sr0", "sr1", "hr0", "hr1", "sg0", "sg1",
"sg2", "sg3", "hg0", "hg1", "xer"
};
int main(int ac, char **av)
{
struct log_entry log;
u64 full_nia[16];
long int lineno = 1;
FILE *f;
const char *filename;
int i;
long int ncompl = 0;

if (ac != 1 && ac != 2) {
fprintf(stderr, "Usage: %s [filename]\n", av[0]);
exit(1);
}
f = stdin;
if (ac == 2) {
filename = av[1];
f = fopen(filename, "rb");
if (f == NULL) {
perror(filename);
exit(1);
}
}

for (i = 0; i < 15; ++i)
full_nia[i] = i << 2;

while (fread(&log, sizeof(log), 1, f) == 1) {
full_nia[log.nia_lo & 0xf] = (log.nia_hi? 0xc000000000000000: 0) |
(log.nia_lo << 2);
if (lineno % 20 == 1) {
printf(" fetch1 NIA icache decode1 decode2 execute1 loadstore dcache CR GSPR\n");
printf(" ---------------- TAHW S -WB-- pN --insn-- pN un op pN byp FR IIE MSR WC SD MM CE SRTO DE -WB-- c ms reg val\n");
printf(" LdMy t csnSa IA IA it IA abc le srx EPID em tw rd mx tAwp vr csnSa 0 k\n");
}
printf("%4ld %c0000%.11llx %c ", lineno,
(log.nia_hi? 'c': '0'),
(unsigned long long)log.nia_lo << 2,
FLAG(ic_stall_out, '|'));
printf("%c%c%c%d %c %c%c%d%c%c %.2llx ",
FLGA(ic_ra_valid, ' ', 'T'),
FLGA(ic_access_ok, ' ', 'X'),
FLGA(ic_is_hit, 'H', FLGA(ic_is_miss, 'M', ' ')),
log.ic_way,
FLAG(ic_state, 'W'),
FLAG(ic_wb_cyc, 'c'),
FLAG(ic_wb_stb, 's'),
log.ic_wb_adr,
FLAG(ic_wb_stall, 'S'),
FLAG(ic_wb_ack, 'a'),
PNIA(ic_part_nia));
if (log.ic_valid)
printf("%.8x", log.ic_insn);
else if (log.ic_fetch_failed)
printf("!!!!!!!!");
else
printf("--------");
printf(" %c%c %.2llx ",
FLAG(ic_valid, '>'),
FLAG(d2_stall_out, '|'),
PNIA(d1_part_nia));
if (log.d1_valid)
printf("%s %s",
units[log.d1_unit],
ops[log.d1_insn_type]);
else
printf("-- -------");
printf(" %c%c ",
FLAG(d1_valid, '>'),
FLAG(d2_stall_out, '|'));
printf("%.2llx %c%c%c %c%c ",
PNIA(d2_part_nia),
FLAG(d2_bypass_a, 'a'),
FLAG(d2_bypass_b, 'b'),
FLAG(d2_bypass_c, 'c'),
FLAG(d2_valid, '>'),
FLAG(e1_stall_out, '|'));
printf("%c%c %c%c%c %c%c%c%c %c%c ",
FLAG(e1_flush_out, 'F'),
FLAG(e1_redirect, 'R'),
FLAG(e1_irq_state, 'w'),
FLAG(e1_irq, 'I'),
FLAG(e1_exception, 'X'),
FLAG(e1_msr_ee, 'E'),
FLGA(e1_msr_pr, 'u', 's'),
FLAG(e1_msr_ir, 'I'),
FLAG(e1_msr_dr, 'D'),
FLAG(e1_write_enable, 'W'),
FLAG(e1_valid, 'C'));
printf("%c %d%d %c%c %c%c %c ",
FLAG(ls_stall_out, '|'),
log.ls_state,
log.ls_dw_done,
FLAG(ls_mo_valid, 'M'),
FLAG(ls_min_done, 'm'),
FLAG(ls_lo_valid, 'C'),
FLAG(ls_eo_except, 'X'),
FLAG(ls_do_valid, '>'));
printf("%d%c%d%d %c%c %c%c%d%c%c ",
log.dc_state,
FLAG(dc_ra_valid, 'R'),
log.dc_tlb_way,
log.dc_op,
FLAG(dc_do_valid, 'V'),
FLAG(dc_do_error, 'E'),
FLAG(dc_wb_cyc, 'c'),
FLAG(dc_wb_stb, 's'),
log.dc_wb_adr,
FLAG(dc_wb_stall, 'S'),
FLAG(dc_wb_ack, 'a'));
if (log.cr_wr_enable)
printf("%x>%.2x ", log.cr_wr_data, log.cr_wr_mask);
else
printf(" ");
if (log.reg_wr_enable) {
if (log.reg_wr_reg < 32 || log.reg_wr_reg > 44)
printf("r%02d", log.reg_wr_reg);
else
printf("%s", spr_names[log.reg_wr_reg - 32]);
printf("=%.16llx", log.reg_wr_data);
}
printf("\n");
++lineno;
if (log.ls_lo_valid || log.e1_valid)
++ncompl;
}
printf("%ld instructions completed, %.2f CPI\n", ncompl,
(double)(lineno - 1) / ncompl);
exit(0);
}

@ -42,6 +42,9 @@
#define DBG_CORE_GSPR_INDEX 0x14
#define DBG_CORE_GSPR_DATA 0x15

#define DBG_LOG_ADDR 0x16
#define DBG_LOG_DATA 0x17

static bool debug;

struct backend {
@ -548,6 +551,73 @@ static void save(const char *filename, uint64_t addr, uint64_t size)
printf("%x done.\n", count);
}

#define LOG_STOP 0x80000000ull

static void log_start(void)
{
check(dmi_write(DBG_LOG_ADDR, 0), "writing LOG_ADDR");
}

static void log_stop(void)
{
uint64_t lsize, laddr, waddr;

check(dmi_write(DBG_LOG_ADDR, LOG_STOP), "writing LOG_ADDR");
check(dmi_read(DBG_LOG_ADDR, &laddr), "reading LOG_ADDR");
waddr = laddr >> 32;
for (lsize = 1; lsize; lsize <<= 1)
if ((waddr >> 1) < lsize)
break;
waddr &= ~lsize;
printf("Log size = %" PRIu64 " entries, ", lsize);
printf("write ptr = %" PRIx64 "\n", waddr);
}

static void log_dump(const char *filename)
{
FILE *f;
uint64_t lsize, laddr, waddr;
uint64_t orig_laddr;
uint64_t i, ldata;

f = fopen(filename, "w");
if (f == NULL) {
fprintf(stderr, "Failed to create '%s': %s\n", filename,
strerror(errno));
exit(1);
}

check(dmi_read(DBG_LOG_ADDR, &orig_laddr), "reading LOG_ADDR");
if (!(orig_laddr & LOG_STOP))
check(dmi_write(DBG_LOG_ADDR, LOG_STOP), "writing LOG_ADDR");

waddr = orig_laddr >> 32;
for (lsize = 1; lsize; lsize <<= 1)
if ((waddr >> 1) < lsize)
break;
waddr &= ~lsize;
printf("Log size = %" PRIu64 " entries\n", lsize);

laddr = LOG_STOP | (waddr << 2);
check(dmi_write(DBG_LOG_ADDR, laddr), "writing LOG_ADDR");

for (i = 0; i < lsize * 4; ++i) {
check(dmi_read(DBG_LOG_DATA, &ldata), "reading LOG_DATA");
if (fwrite(&ldata, sizeof(ldata), 1, f) != 1) {
fprintf(stderr, "Write error on %s\n", filename);
exit(1);
}
if (!(i % 128)) {
printf("%" PRIu64 "...\r", i * 8);
fflush(stdout);
}
}
fclose(f);
printf("%" PRIu64 " done\n", lsize * 32);

check(dmi_write(DBG_LOG_ADDR, orig_laddr), "writing LOG_ADDR");
}

static void usage(const char *cmd)
{
fprintf(stderr, "Usage: %s -b <jtag|sim> <command> <args>\n", cmd);
@ -572,6 +642,12 @@ static void usage(const char *cmd)
fprintf(stderr, " gpr <reg> [count]\n");
fprintf(stderr, " status\n");

fprintf(stderr, "\n");
fprintf(stderr, " Core logging:\n");
fprintf(stderr, " lstart start logging\n");
fprintf(stderr, " lstop stop logging\n");
fprintf(stderr, " ldump <file> dump log to file\n");

fprintf(stderr, "\n");
fprintf(stderr, " JTAG:\n");
fprintf(stderr, " dmiread <hex addr>\n");
@ -710,6 +786,17 @@ int main(int argc, char *argv[])
if (((i+1) < argc) && isdigit(argv[i+1][0]))
count = strtoul(argv[++i], NULL, 10);
gpr_read(reg, count);
} else if (strcmp(argv[i], "lstart") == 0) {
log_start();
} else if (strcmp(argv[i], "lstop") == 0) {
log_stop();
} else if (strcmp(argv[i], "ldump") == 0) {
const char *filename;

if ((i+1) >= argc)
usage(argv[0]);
filename = argv[++i];
log_dump(filename);
} else {
fprintf(stderr, "Unknown command %s\n", argv[i]);
exit(1);

Loading…
Cancel
Save