soc: Implement multiple CPU cores

This adds an 'NCPUS' generic parameter to the soc module, which then
includes that many CPU cores.

The cores have separate addresses on the DMI interconnect, meaning
that external JTAG debug tools can view and control the state of each
core individually.

The syscon module has a new 'cpu_ctrl' register, where byte 0 contains
individual enable bits for each core, and byte 1 indicates the number
of cores.  If a core's enable bit is clear, the core is held in reset.
On system reset, the enable byte is set to 0x01, so only core 0 is
active.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/436/head
Paul Mackerras 2 months ago
parent 23ff954059
commit 9a06b0c182

@ -65,7 +65,8 @@
#define SYS_REG_UART_IS_16550 (1ull << 32) #define SYS_REG_UART_IS_16550 (1ull << 32)
#define SYS_REG_GIT_INFO 0x50 #define SYS_REG_GIT_INFO 0x50
#define SYS_REG_GIT_IS_DIRTY (1ull << 63) #define SYS_REG_GIT_IS_DIRTY (1ull << 63)

#define SYS_REG_CPU_CTRL 0x58
#define SYS_REG_CPU_CTRL_ENABLE 0xff


/* /*
* Register definitions for the potato UART * Register definitions for the potato UART

@ -67,6 +67,7 @@ entity soc is
RAM_INIT_FILE : string; RAM_INIT_FILE : string;
CLK_FREQ : positive; CLK_FREQ : positive;
SIM : boolean; SIM : boolean;
NCPUS : positive := 1;
HAS_FPU : boolean := true; HAS_FPU : boolean := true;
HAS_BTC : boolean := true; HAS_BTC : boolean := true;
DISABLE_FLATTEN_CORE : boolean := false; DISABLE_FLATTEN_CORE : boolean := false;
@ -148,20 +149,18 @@ end entity soc;


architecture behaviour of soc is architecture behaviour of soc is


subtype cpu_index_t is natural range 0 to NCPUS-1;
type dword_percpu_array is array(cpu_index_t) of std_ulogic_vector(63 downto 0);

-- internal reset -- internal reset
signal soc_reset : std_ulogic; signal soc_reset : std_ulogic;


-- Wishbone master signals: -- Wishbone master signals:
signal wishbone_dcore_in : wishbone_slave_out;
signal wishbone_dcore_out : wishbone_master_out;
signal wishbone_icore_in : wishbone_slave_out;
signal wishbone_icore_out : wishbone_master_out;
signal wishbone_debug_in : wishbone_slave_out; signal wishbone_debug_in : wishbone_slave_out;
signal wishbone_debug_out : wishbone_master_out; signal wishbone_debug_out : wishbone_master_out;


-- Arbiter array (ghdl doesnt' support assigning the array -- Arbiter array
-- elements in the entity instantiation) constant NUM_WB_MASTERS : positive := NCPUS * 2 + 2;
constant NUM_WB_MASTERS : positive := 4;
signal wb_masters_out : wishbone_master_out_vector(0 to NUM_WB_MASTERS-1); signal wb_masters_out : wishbone_master_out_vector(0 to NUM_WB_MASTERS-1);
signal wb_masters_in : wishbone_slave_out_vector(0 to NUM_WB_MASTERS-1); signal wb_masters_in : wishbone_slave_out_vector(0 to NUM_WB_MASTERS-1);


@ -180,7 +179,7 @@ architecture behaviour of soc is


-- Syscon signals -- Syscon signals
signal dram_at_0 : std_ulogic; signal dram_at_0 : std_ulogic;
signal do_core_reset : std_ulogic; signal do_core_reset : std_ulogic_vector(NCPUS-1 downto 0);
signal alt_reset : std_ulogic; signal alt_reset : std_ulogic;
signal wb_syscon_in : wb_io_master_out; signal wb_syscon_in : wb_io_master_out;
signal wb_syscon_out : wb_io_slave_out; signal wb_syscon_out : wb_io_slave_out;
@ -210,7 +209,7 @@ architecture behaviour of soc is
signal wb_xics_ics_out : wb_io_slave_out; signal wb_xics_ics_out : wb_io_slave_out;
signal int_level_in : std_ulogic_vector(15 downto 0); signal int_level_in : std_ulogic_vector(15 downto 0);
signal ics_to_icp : ics_to_icp_t; signal ics_to_icp : ics_to_icp_t;
signal core_ext_irq : std_ulogic; signal core_ext_irq : std_ulogic_vector(NCPUS-1 downto 0) := (others => '0');


-- GPIO signals: -- GPIO signals:
signal wb_gpio_in : wb_io_master_out; signal wb_gpio_in : wb_io_master_out;
@ -233,12 +232,12 @@ architecture behaviour of soc is
signal dmi_wb_dout : std_ulogic_vector(63 downto 0); signal dmi_wb_dout : std_ulogic_vector(63 downto 0);
signal dmi_wb_req : std_ulogic; signal dmi_wb_req : std_ulogic;
signal dmi_wb_ack : std_ulogic; signal dmi_wb_ack : std_ulogic;
signal dmi_core_dout : std_ulogic_vector(63 downto 0); signal dmi_core_dout : dword_percpu_array;
signal dmi_core_req : std_ulogic; signal dmi_core_req : std_ulogic_vector(NCPUS-1 downto 0);
signal dmi_core_ack : std_ulogic; signal dmi_core_ack : std_ulogic_vector(NCPUS-1 downto 0);


-- Delayed/latched resets and alt_reset -- Delayed/latched resets and alt_reset
signal rst_core : std_ulogic; signal rst_core : std_ulogic_vector(NCPUS-1 downto 0);
signal rst_uart : std_ulogic; signal rst_uart : std_ulogic;
signal rst_xics : std_ulogic; signal rst_xics : std_ulogic;
signal rst_spi : std_ulogic; signal rst_spi : std_ulogic;
@ -270,6 +269,8 @@ architecture behaviour of soc is
signal io_cycle_gpio : std_ulogic; signal io_cycle_gpio : std_ulogic;
signal io_cycle_external : std_ulogic; signal io_cycle_external : std_ulogic;


signal core_run_out : std_ulogic_vector(NCPUS-1 downto 0);

function wishbone_widen_data(wb : wb_io_master_out) return wishbone_master_out is function wishbone_widen_data(wb : wb_io_master_out) return wishbone_master_out is
variable wwb : wishbone_master_out; variable wwb : wishbone_master_out;
begin begin
@ -334,7 +335,9 @@ begin
resets: process(system_clk) resets: process(system_clk)
begin begin
if rising_edge(system_clk) then if rising_edge(system_clk) then
rst_core <= soc_reset or do_core_reset; for i in 0 to NCPUS-1 loop
rst_core(i) <= soc_reset or do_core_reset(i);
end loop;
rst_uart <= soc_reset; rst_uart <= soc_reset;
rst_spi <= soc_reset; rst_spi <= soc_reset;
rst_xics <= soc_reset; rst_xics <= soc_reset;
@ -347,11 +350,12 @@ begin
end if; end if;
end process; end process;


-- Processor core -- Processor cores
processor: entity work.core processors: for i in 0 to NCPUS-1 generate
core: entity work.core
generic map( generic map(
SIM => SIM, SIM => SIM,
CPU_INDEX => 0, CPU_INDEX => i,
HAS_FPU => HAS_FPU, HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC, HAS_BTC => HAS_BTC,
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE, DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
@ -367,32 +371,31 @@ begin
) )
port map( port map(
clk => system_clk, clk => system_clk,
rst => rst_core, rst => rst_core(i),
alt_reset => alt_reset_d, alt_reset => alt_reset_d,
run_out => run_out, run_out => core_run_out(i),
wishbone_insn_in => wishbone_icore_in, wishbone_insn_in => wb_masters_in(i + NCPUS),
wishbone_insn_out => wishbone_icore_out, wishbone_insn_out => wb_masters_out(i + NCPUS),
wishbone_data_in => wishbone_dcore_in, wishbone_data_in => wb_masters_in(i),
wishbone_data_out => wishbone_dcore_out, wishbone_data_out => wb_masters_out(i),
wb_snoop_in => wb_snoop, wb_snoop_in => wb_snoop,
dmi_addr => dmi_addr(3 downto 0), dmi_addr => dmi_addr(3 downto 0),
dmi_dout => dmi_core_dout, dmi_dout => dmi_core_dout(i),
dmi_din => dmi_dout, dmi_din => dmi_dout,
dmi_wr => dmi_wr, dmi_wr => dmi_wr,
dmi_ack => dmi_core_ack, dmi_ack => dmi_core_ack(i),
dmi_req => dmi_core_req, dmi_req => dmi_core_req(i),
ext_irq => core_ext_irq ext_irq => core_ext_irq(i)
); );
end generate;

run_out <= or (core_run_out);


-- Wishbone bus master arbiter & mux -- Wishbone bus master arbiter & mux
wb_masters_out <= (0 => wishbone_dcore_out, wb_masters_out(2*NCPUS) <= wishbone_widen_data(wishbone_dma_out);
1 => wishbone_icore_out, wb_masters_out(2*NCPUS + 1) <= wishbone_debug_out;
2 => wishbone_widen_data(wishbone_dma_out), wishbone_dma_in <= wishbone_narrow_data(wb_masters_in(2*NCPUS), wishbone_dma_out.adr);
3 => wishbone_debug_out); wishbone_debug_in <= wb_masters_in(2*NCPUS + 1);
wishbone_dcore_in <= wb_masters_in(0);
wishbone_icore_in <= wb_masters_in(1);
wishbone_dma_in <= wishbone_narrow_data(wb_masters_in(2), wishbone_dma_out.adr);
wishbone_debug_in <= wb_masters_in(3);
wishbone_arbiter_0: entity work.wishbone_arbiter wishbone_arbiter_0: entity work.wishbone_arbiter
generic map( generic map(
NUM_MASTERS => NUM_WB_MASTERS NUM_MASTERS => NUM_WB_MASTERS
@ -780,6 +783,7 @@ begin
-- Syscon slave -- Syscon slave
syscon0: entity work.syscon syscon0: entity work.syscon
generic map( generic map(
NCPUS => NCPUS,
HAS_UART => true, HAS_UART => true,
HAS_DRAM => HAS_DRAM, HAS_DRAM => HAS_DRAM,
BRAM_SIZE => MEMORY_SIZE, BRAM_SIZE => MEMORY_SIZE,
@ -950,7 +954,7 @@ begin
wb_in => wb_xics_icp_in, wb_in => wb_xics_icp_in,
wb_out => wb_xics_icp_out, wb_out => wb_xics_icp_out,
ics_in => ics_to_icp, ics_in => ics_to_icp,
core_irq_out => core_ext_irq core_irq_out => core_ext_irq(0)
); );


xics_ics: entity work.xics_ics xics_ics: entity work.xics_ics
@ -1034,15 +1038,15 @@ begin
); );


-- DMI interconnect -- DMI interconnect
dmi_intercon: process(dmi_addr, dmi_req, dmi_intercon: process(all)
dmi_wb_ack, dmi_wb_dout,
dmi_core_ack, dmi_core_dout)


-- DMI address map (each address is a full 64-bit register) -- DMI address map (each address is a full 64-bit register)
-- --
-- Offset: Size: Slave: -- Offset: Size: Slave:
-- 0 4 Wishbone -- 0 4 Wishbone
-- 10 16 Core -- 10 16 Core 0
-- 20 16 Core 1
-- ... and so on for NCPUS cores


type slave_type is (SLAVE_WB, type slave_type is (SLAVE_WB,
SLAVE_CORE, SLAVE_CORE,
@ -1053,25 +1057,29 @@ begin
slave := SLAVE_NONE; slave := SLAVE_NONE;
if std_match(dmi_addr, "000000--") then if std_match(dmi_addr, "000000--") then
slave := SLAVE_WB; slave := SLAVE_WB;
elsif std_match(dmi_addr, "0001----") then elsif not is_X(dmi_addr) and to_integer(unsigned(dmi_addr(7 downto 4))) <= NCPUS then
slave := SLAVE_CORE; slave := SLAVE_CORE;
end if; end if;


-- DMI muxing -- DMI muxing
dmi_wb_req <= '0'; dmi_wb_req <= '0';
dmi_core_req <= '0'; dmi_core_req <= (others => '0');
dmi_din <= (others => '1');
dmi_ack <= dmi_req;
case slave is case slave is
when SLAVE_WB => when SLAVE_WB =>
dmi_wb_req <= dmi_req; dmi_wb_req <= dmi_req;
dmi_ack <= dmi_wb_ack; dmi_ack <= dmi_wb_ack;
dmi_din <= dmi_wb_dout; dmi_din <= dmi_wb_dout;
when SLAVE_CORE => when SLAVE_CORE =>
dmi_core_req <= dmi_req; for i in 0 to NCPUS-1 loop
dmi_ack <= dmi_core_ack; if not is_X(dmi_addr) and to_integer(unsigned(dmi_addr(7 downto 4))) = i + 1 then
dmi_din <= dmi_core_dout; dmi_core_req(i) <= dmi_req;
dmi_ack <= dmi_core_ack(i);
dmi_din <= dmi_core_dout(i);
end if;
end loop;
when others => when others =>
dmi_ack <= dmi_req;
dmi_din <= (others => '1');
end case; end case;


-- SIM magic exit -- SIM magic exit

@ -9,6 +9,7 @@ use work.wishbone_types.all;


entity syscon is entity syscon is
generic ( generic (
NCPUS : positive := 1;
SIG_VALUE : std_ulogic_vector(63 downto 0) := x"f00daa5500010001"; SIG_VALUE : std_ulogic_vector(63 downto 0) := x"f00daa5500010001";
CLK_FREQ : integer; CLK_FREQ : integer;
HAS_UART : boolean; HAS_UART : boolean;
@ -33,7 +34,7 @@ entity syscon is


-- System control ports -- System control ports
dram_at_0 : out std_ulogic; dram_at_0 : out std_ulogic;
core_reset : out std_ulogic; core_reset : out std_ulogic_vector(NCPUS-1 downto 0);
soc_reset : out std_ulogic; soc_reset : out std_ulogic;
alt_reset : out std_ulogic alt_reset : out std_ulogic
); );
@ -56,6 +57,7 @@ architecture behaviour of syscon is
constant SYS_REG_UART0_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001000"; constant SYS_REG_UART0_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001000";
constant SYS_REG_UART1_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001001"; constant SYS_REG_UART1_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001001";
constant SYS_REG_GIT_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001010"; constant SYS_REG_GIT_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001010";
constant SYS_REG_CPU_CTRL : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001011";


-- Muxed reg read signal -- Muxed reg read signal
signal reg_out : std_ulogic_vector(63 downto 0); signal reg_out : std_ulogic_vector(63 downto 0);
@ -116,6 +118,7 @@ architecture behaviour of syscon is
signal reg_uart0info : std_ulogic_vector(63 downto 0); signal reg_uart0info : std_ulogic_vector(63 downto 0);
signal reg_uart1info : std_ulogic_vector(63 downto 0); signal reg_uart1info : std_ulogic_vector(63 downto 0);
signal reg_gitinfo : std_ulogic_vector(63 downto 0); signal reg_gitinfo : std_ulogic_vector(63 downto 0);
signal reg_cpuctrl : std_ulogic_vector(63 downto 0);
signal info_has_dram : std_ulogic; signal info_has_dram : std_ulogic;
signal info_has_bram : std_ulogic; signal info_has_bram : std_ulogic;
signal info_has_uart : std_ulogic; signal info_has_uart : std_ulogic;
@ -134,7 +137,8 @@ begin
-- Generated output signals -- Generated output signals
dram_at_0 <= '1' when BRAM_SIZE = 0 else reg_ctrl(SYS_REG_CTRL_DRAM_AT_0); dram_at_0 <= '1' when BRAM_SIZE = 0 else reg_ctrl(SYS_REG_CTRL_DRAM_AT_0);
soc_reset <= reg_ctrl(SYS_REG_CTRL_SOC_RESET); soc_reset <= reg_ctrl(SYS_REG_CTRL_SOC_RESET);
core_reset <= reg_ctrl(SYS_REG_CTRL_CORE_RESET); core_reset <= not reg_cpuctrl(NCPUS-1 downto 0) when reg_ctrl(SYS_REG_CTRL_CORE_RESET) = '0'
else (others => '1');
alt_reset <= reg_ctrl(SYS_REG_CTRL_ALT_RESET); alt_reset <= reg_ctrl(SYS_REG_CTRL_ALT_RESET);




@ -187,6 +191,8 @@ begin
55 downto 0 => GIT_HASH, 55 downto 0 => GIT_HASH,
others => '0'); others => '0');


reg_cpuctrl(63 downto 8) <= std_ulogic_vector(to_unsigned(NCPUS, 56));

-- Wishbone response -- Wishbone response
wb_rsp.ack <= wishbone_in.cyc and wishbone_in.stb; wb_rsp.ack <= wishbone_in.cyc and wishbone_in.stb;
with wishbone_in.adr(SYS_REG_BITS downto 1) select reg_out <= with wishbone_in.adr(SYS_REG_BITS downto 1) select reg_out <=
@ -201,6 +207,7 @@ begin
reg_uart0info when SYS_REG_UART0_INFO, reg_uart0info when SYS_REG_UART0_INFO,
reg_uart1info when SYS_REG_UART1_INFO, reg_uart1info when SYS_REG_UART1_INFO,
reg_gitinfo when SYS_REG_GIT_INFO, reg_gitinfo when SYS_REG_GIT_INFO,
reg_cpuctrl when SYS_REG_CPU_CTRL,
(others => '0') when others; (others => '0') when others;
wb_rsp.dat <= reg_out(63 downto 32) when wishbone_in.adr(0) = '1' else wb_rsp.dat <= reg_out(63 downto 32) when wishbone_in.adr(0) = '1' else
reg_out(31 downto 0); reg_out(31 downto 0);
@ -225,6 +232,7 @@ begin
if (rst) then if (rst) then
reg_ctrl <= (SYS_REG_CTRL_ALT_RESET => ctrl_init_alt_reset, reg_ctrl <= (SYS_REG_CTRL_ALT_RESET => ctrl_init_alt_reset,
others => '0'); others => '0');
reg_cpuctrl(7 downto 0) <= x"01"; -- enable cpu 0 only
else else
if wishbone_in.cyc and wishbone_in.stb and wishbone_in.we then if wishbone_in.cyc and wishbone_in.stb and wishbone_in.we then
-- Change this if CTRL ever has more than 32 bits -- Change this if CTRL ever has more than 32 bits
@ -233,6 +241,10 @@ begin
reg_ctrl(SYS_REG_CTRL_BITS-1 downto 0) <= reg_ctrl(SYS_REG_CTRL_BITS-1 downto 0) <=
wishbone_in.dat(SYS_REG_CTRL_BITS-1 downto 0); wishbone_in.dat(SYS_REG_CTRL_BITS-1 downto 0);
end if; end if;
if wishbone_in.adr(SYS_REG_BITS downto 1) = SYS_REG_CPU_CTRL and
wishbone_in.adr(0) = '0' and wishbone_in.sel(0) = '1' then
reg_cpuctrl(7 downto 0) <= wishbone_in.dat(7 downto 0);
end if;
end if; end if;


-- Reset auto-clear -- Reset auto-clear

Loading…
Cancel
Save