microwatt/spi_flash_ctrl.vhdl

602 lines
23 KiB
VHDL

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.wishbone_types.all;
entity spi_flash_ctrl is
generic (
-- Default config for auto-mode
DEF_CLK_DIV : natural := 2; -- Clock divider SCK = CLK/((CLK_DIV+1)*2)
DEF_QUAD_READ : boolean := false; -- Use quad read with 8 clk dummy
-- Number of data lines (1=MISO/MOSI, otherwise 2 or 4)
DATA_LINES : positive := 1
);
port (
clk : in std_ulogic;
rst : in std_ulogic;
-- Wishbone ports:
wb_in : in wb_io_master_out;
wb_out : out wb_io_slave_out;
-- Wishbone extra selects
wb_sel_reg : in std_ulogic;
wb_sel_map : in std_ulogic;
-- SPI port
sck : out std_ulogic;
cs_n : out std_ulogic;
sdat_o : out std_ulogic_vector(DATA_LINES-1 downto 0);
sdat_oe : out std_ulogic_vector(DATA_LINES-1 downto 0);
sdat_i : in std_ulogic_vector(DATA_LINES-1 downto 0)
);
end entity spi_flash_ctrl;
architecture rtl of spi_flash_ctrl is
-- Register indices
constant SPI_REG_BITS : positive := 3;
-- Register addresses (matches wishbone addr downto 2, ie, 4 bytes per reg)
constant SPI_REG_DATA : std_ulogic_vector(SPI_REG_BITS-1 downto 0) := "000";
constant SPI_REG_CTRL : std_ulogic_vector(SPI_REG_BITS-1 downto 0) := "001";
constant SPI_REG_AUTO_CFG : std_ulogic_vector(SPI_REG_BITS-1 downto 0) := "010";
constant SPI_REG_INVALID : std_ulogic_vector(SPI_REG_BITS-1 downto 0) := "111";
-- Control register
signal ctrl_reg : std_ulogic_vector(15 downto 0) := (others => '0');
alias ctrl_reset : std_ulogic is ctrl_reg(0);
alias ctrl_cs : std_ulogic is ctrl_reg(1);
alias ctrl_rsrv1 : std_ulogic is ctrl_reg(2);
alias ctrl_rsrv2 : std_ulogic is ctrl_reg(3);
alias ctrl_div : std_ulogic_vector(7 downto 0) is ctrl_reg(15 downto 8);
-- Auto mode config register
signal auto_cfg_reg : std_ulogic_vector(29 downto 0) := (others => '0');
alias auto_cfg_cmd : std_ulogic_vector(7 downto 0) is auto_cfg_reg(7 downto 0);
alias auto_cfg_dummies : std_ulogic_vector(2 downto 0) is auto_cfg_reg(10 downto 8);
alias auto_cfg_mode : std_ulogic_vector(1 downto 0) is auto_cfg_reg(12 downto 11);
alias auto_cfg_addr4 : std_ulogic is auto_cfg_reg(13);
alias auto_cfg_rsrv1 : std_ulogic is auto_cfg_reg(14);
alias auto_cfg_rsrv2 : std_ulogic is auto_cfg_reg(15);
alias auto_cfg_div : std_ulogic_vector(7 downto 0) is auto_cfg_reg(23 downto 16);
alias auto_cfg_cstout : std_ulogic_vector(5 downto 0) is auto_cfg_reg(29 downto 24);
-- Constants below match top 2 bits of rxtx "mode"
constant SPI_AUTO_CFG_MODE_SINGLE : std_ulogic_vector(1 downto 0) := "00";
constant SPI_AUTO_CFG_MODE_DUAL : std_ulogic_vector(1 downto 0) := "10";
constant SPI_AUTO_CFG_MODE_QUAD : std_ulogic_vector(1 downto 0) := "11";
-- Signals to rxtx
signal cmd_valid : std_ulogic;
signal cmd_clk_div : natural range 0 to 255;
signal cmd_mode : std_ulogic_vector(2 downto 0);
signal cmd_ready : std_ulogic;
signal d_clks : std_ulogic_vector(2 downto 0);
signal d_rx : std_ulogic_vector(7 downto 0);
signal d_tx : std_ulogic_vector(7 downto 0);
signal d_ack : std_ulogic;
signal bus_idle : std_ulogic;
-- Latch to track that we have a pending read
signal pending_read : std_ulogic;
-- Wishbone latches
signal wb_req : wb_io_master_out;
signal wb_stash : wb_io_master_out;
signal wb_rsp : wb_io_slave_out;
-- Wishbone decode
signal wb_valid : std_ulogic;
signal wb_reg_valid : std_ulogic;
signal wb_reg_dat_v : std_ulogic;
signal wb_map_valid : std_ulogic;
signal wb_reg : std_ulogic_vector(SPI_REG_BITS-1 downto 0);
-- Auto mode clock counts XXX FIXME: Look at reasonable values based
-- on system clock maybe ? Or make them programmable.
constant CS_DELAY_ASSERT : integer := 1; -- CS low to cmd
constant CS_DELAY_RECOVERY : integer := 10; -- CS high to CS low
constant DEFAULT_CS_TIMEOUT : integer := 32;
-- Automatic mode state
type auto_state_t is (AUTO_IDLE, AUTO_CS_ON, AUTO_CMD,
AUTO_ADR0, AUTO_ADR1, AUTO_ADR2, AUTO_ADR3,
AUTO_DUMMY,
AUTO_DAT0, AUTO_DAT1, AUTO_DAT2, AUTO_DAT3,
AUTO_DAT0_DATA, AUTO_DAT1_DATA, AUTO_DAT2_DATA, AUTO_DAT3_DATA,
AUTO_SEND_ACK, AUTO_WAIT_REQ, AUTO_RECOVERY);
-- Automatic mode signals
signal auto_cs : std_ulogic;
signal auto_cmd_valid : std_ulogic;
signal auto_cmd_mode : std_ulogic_vector(2 downto 0);
signal auto_d_txd : std_ulogic_vector(7 downto 0);
signal auto_d_clks : std_ulogic_vector(2 downto 0);
signal auto_data_next : std_ulogic_vector(wb_out.dat'left downto 0);
signal auto_cnt_next : integer range 0 to 63;
signal auto_ack : std_ulogic;
signal auto_next : auto_state_t;
signal auto_lad_next : std_ulogic_vector(31 downto 0);
signal auto_latch_adr : std_ulogic;
-- Automatic mode latches
signal auto_data : std_ulogic_vector(wb_out.dat'left downto 0) := (others => '0');
signal auto_cnt : integer range 0 to 63 := 0;
signal auto_state : auto_state_t := AUTO_IDLE;
signal auto_last_addr : std_ulogic_vector(31 downto 0);
begin
-- Instanciate low level shifter
spi_rxtx: entity work.spi_rxtx
generic map (
DATA_LINES => DATA_LINES
)
port map(
rst => rst,
clk => clk,
clk_div_i => cmd_clk_div,
cmd_valid_i => cmd_valid,
cmd_ready_o => cmd_ready,
cmd_mode_i => cmd_mode,
cmd_clks_i => d_clks,
cmd_txd_i => d_tx,
d_rxd_o => d_rx,
d_ack_o => d_ack,
bus_idle_o => bus_idle,
sck => sck,
sdat_o => sdat_o,
sdat_oe => sdat_oe,
sdat_i => sdat_i
);
-- Valid wb command
wb_valid <= wb_req.stb and wb_req.cyc;
wb_reg_valid <= wb_valid and wb_sel_reg;
wb_map_valid <= wb_valid and wb_sel_map;
-- Register decode. For map accesses, make it look like "invalid"
wb_reg <= wb_req.adr(SPI_REG_BITS+1 downto 2) when wb_reg_valid else SPI_REG_INVALID;
-- Shortcut because we test that a lot: data register access
wb_reg_dat_v <= '1' when wb_reg = SPI_REG_DATA else '0';
-- Wishbone request -> SPI request
wb_request_sync: process(clk)
begin
if rising_edge(clk) then
-- We need to latch whether a read is in progress to block
-- a subsequent store, otherwise the acks will collide.
--
-- We are heavy handed and force a wait for an idle bus if
-- a store is behind a load. Shouldn't happen with flashes
-- in practice.
--
if cmd_valid = '1' and cmd_ready = '1' then
pending_read <= '1';
elsif bus_idle = '1' then
pending_read <= '0';
end if;
end if;
end process;
wb_request_comb: process(all)
begin
if ctrl_cs = '1' then
-- Data register access (see wb_request_sync)
cmd_valid <= wb_reg_dat_v and not (pending_read and wb_req.we);
-- Clock divider from control reg
cmd_clk_div <= to_integer(unsigned(ctrl_div));
-- Mode based on sel
if wb_req.sel = "0010" then
-- dual mode
cmd_mode <= "10" & wb_req.we;
d_clks <= "011";
elsif wb_req.sel = "0100" then
-- quad mode
cmd_mode <= "11" & wb_req.we;
d_clks <= "001";
else
-- single bit
cmd_mode <= "01" & wb_req.we;
d_clks <= "111";
end if;
d_tx <= wb_req.dat(7 downto 0);
cs_n <= not ctrl_cs;
else
cmd_valid <= auto_cmd_valid;
cmd_mode <= auto_cmd_mode;
cmd_clk_div <= to_integer(unsigned(auto_cfg_div));
d_tx <= auto_d_txd;
d_clks <= auto_d_clks;
cs_n <= not auto_cs;
end if;
end process;
-- Generate wishbone responses
--
-- Note: wb_out and wb_in should only appear in this synchronous process
--
-- Everything else should work on wb_req and wb_rsp
wb_response_sync: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
wb_out.ack <= '0';
wb_out.stall <= '0';
else
-- Latch wb responses as well for 1 cycle. Stall is updated
-- below
wb_out <= wb_rsp;
-- Implement a stash buffer. If we are stalled and stash is
-- free, fill it up. This will generate a WB stall on the
-- next cycle.
if wb_rsp.stall = '1' and wb_out.stall = '0' and
wb_in.cyc = '1' and wb_in.stb = '1' then
wb_stash <= wb_in;
wb_out.stall <= '1';
end if;
-- We aren't stalled, see what we can do
if wb_rsp.stall = '0' then
if wb_out.stall = '1' then
-- Something in stash ! use it and clear stash
wb_req <= wb_stash;
wb_out.stall <= '0';
else
-- Nothing in stash, grab request from WB
if wb_in.cyc = '1' then
wb_req <= wb_in;
else
wb_req.cyc <= wb_in.cyc;
wb_req.stb <= wb_in.stb;
end if;
end if;
end if;
end if;
end if;
end process;
wb_response_comb: process(all)
begin
-- Defaults
wb_rsp.ack <= '0';
wb_rsp.dat <= x"00" & d_rx & d_rx & d_rx;
wb_rsp.stall <= '0';
-- Depending on the access type...
if wb_map_valid = '1' then
-- Memory map access
wb_rsp.stall <= not auto_ack; -- XXX FIXME: Allow pipelining
wb_rsp.ack <= auto_ack;
wb_rsp.dat <= auto_data;
elsif ctrl_cs = '1' and wb_reg = SPI_REG_DATA then
-- Data register in manual mode
--
-- Stall stores if there's a pending read to avoid
-- acks colliding. Otherwise accept all accesses
-- immediately if rxtx is ready.
--
-- Note: This must match the logic setting cmd_valid
-- in wb_request_comb.
--
-- We also ack stores immediately when accepted. Loads
-- are handled separately further down.
--
if wb_req.we = '1' and pending_read = '1' then
wb_rsp.stall <= '1';
else
wb_rsp.ack <= wb_req.we and cmd_ready;
wb_rsp.stall <= not cmd_ready;
end if;
-- Note: loads acks are handled elsewhere
elsif wb_reg_valid = '1' then
-- Normal register access
--
-- Normally single cycle but ensure any auto-mode or manual
-- operation is complete first
--
if auto_state = AUTO_IDLE and bus_idle = '1' then
wb_rsp.ack <= '1';
wb_rsp.stall <= '0';
case wb_reg is
when SPI_REG_CTRL =>
wb_rsp.dat <= (ctrl_reg'range => ctrl_reg, others => '0');
when SPI_REG_AUTO_CFG =>
wb_rsp.dat <= (auto_cfg_reg'range => auto_cfg_reg, others => '0');
when others => null;
end case;
else
wb_rsp.stall <= '1';
end if;
end if;
-- For loads in manual mode, we've accepted the command early
-- so none of the above connditions might be true. We thus need
-- to send the ack whenever we are getting it from rxtx.
--
-- This shouldn't collide with any of the above acks because we hold
-- normal register accesses and stores when there is a pending
-- load or the bus is busy.
--
if ctrl_cs = '1' and d_ack = '1' then
assert pending_read = '1' report "d_ack without pending read !" severity failure;
wb_rsp.ack <= '1';
end if;
end process;
-- Automatic mode state machine
auto_sync: process(clk)
begin
if rising_edge(clk) then
auto_state <= auto_next;
auto_cnt <= auto_cnt_next;
auto_data <= auto_data_next;
if auto_latch_adr = '1' then
auto_last_addr <= auto_lad_next;
end if;
end if;
end process;
auto_comb: process(all)
variable addr : std_ulogic_vector(31 downto 0);
variable req_is_next : boolean;
function mode_to_clks(mode: std_ulogic_vector(1 downto 0)) return std_ulogic_vector is
begin
if mode = SPI_AUTO_CFG_MODE_QUAD then
return "001";
elsif mode = SPI_AUTO_CFG_MODE_DUAL then
return "011";
else
return "111";
end if;
end function;
begin
-- Default outputs
auto_ack <= '0';
auto_cs <= '0';
auto_cmd_valid <= '0';
auto_d_txd <= x"00";
auto_cmd_mode <= "001";
auto_d_clks <= "111";
auto_latch_adr <= '0';
-- Default next state
auto_next <= auto_state;
auto_cnt_next <= auto_cnt;
auto_data_next <= auto_data;
-- Convert wishbone address into a flash address. We mask
-- off the 4 top address bits to get rid of the "f" there.
addr := "00" & wb_req.adr(29 downto 2) & "00";
-- Calculate the next address for store & compare later
auto_lad_next <= std_ulogic_vector(unsigned(addr) + 4);
-- Match incoming request address with next address
req_is_next := addr = auto_last_addr;
-- XXX TODO:
-- - Support < 32-bit accesses
-- Reset
if rst = '1' or ctrl_reset = '1' then
auto_cs <= '0';
auto_cnt_next <= 0;
auto_next <= AUTO_IDLE;
else
-- Run counter
if auto_cnt /= 0 then
auto_cnt_next <= auto_cnt - 1;
end if;
-- Automatic CS is set whenever state isn't IDLE or RECOVERY
if auto_state /= AUTO_IDLE and
auto_state /= AUTO_RECOVERY then
auto_cs <= '1';
end if;
-- State machine
case auto_state is
when AUTO_IDLE =>
-- Access to the memory map only when manual CS isn't set
if wb_map_valid = '1' and ctrl_cs = '0' then
-- Ignore writes, we don't support them yet
if wb_req.we = '1' then
auto_ack <= '1';
else
-- Start machine with CS assertion delay
auto_next <= AUTO_CS_ON;
auto_cnt_next <= CS_DELAY_ASSERT;
end if;
end if;
when AUTO_CS_ON =>
if auto_cnt = 0 then
-- CS asserted long enough, send command
auto_next <= AUTO_CMD;
end if;
when AUTO_CMD =>
auto_d_txd <= auto_cfg_cmd;
auto_cmd_valid <= '1';
if cmd_ready = '1' then
if auto_cfg_addr4 = '1' then
auto_next <= AUTO_ADR3;
else
auto_next <= AUTO_ADR2;
end if;
end if;
when AUTO_ADR3 =>
auto_d_txd <= addr(31 downto 24);
auto_cmd_valid <= '1';
if cmd_ready = '1' then
auto_next <= AUTO_ADR2;
end if;
when AUTO_ADR2 =>
auto_d_txd <= addr(23 downto 16);
auto_cmd_valid <= '1';
if cmd_ready = '1' then
auto_next <= AUTO_ADR1;
end if;
when AUTO_ADR1 =>
auto_d_txd <= addr(15 downto 8);
auto_cmd_valid <= '1';
if cmd_ready = '1' then
auto_next <= AUTO_ADR0;
end if;
when AUTO_ADR0 =>
auto_d_txd <= addr(7 downto 0);
auto_cmd_valid <= '1';
if cmd_ready = '1' then
if auto_cfg_dummies = "000" then
auto_next <= AUTO_DAT0;
else
auto_next <= AUTO_DUMMY;
end if;
end if;
when AUTO_DUMMY =>
auto_cmd_valid <= '1';
auto_d_clks <= auto_cfg_dummies;
if cmd_ready = '1' then
auto_next <= AUTO_DAT0;
end if;
when AUTO_DAT0 =>
auto_cmd_valid <= '1';
auto_cmd_mode <= auto_cfg_mode & "0";
auto_d_clks <= mode_to_clks(auto_cfg_mode);
if cmd_ready = '1' then
auto_next <= AUTO_DAT0_DATA;
end if;
when AUTO_DAT0_DATA =>
if d_ack = '1' then
auto_data_next(7 downto 0) <= d_rx;
auto_next <= AUTO_DAT1;
end if;
when AUTO_DAT1 =>
auto_cmd_valid <= '1';
auto_cmd_mode <= auto_cfg_mode & "0";
auto_d_clks <= mode_to_clks(auto_cfg_mode);
if cmd_ready = '1' then
auto_next <= AUTO_DAT1_DATA;
end if;
when AUTO_DAT1_DATA =>
if d_ack = '1' then
auto_data_next(15 downto 8) <= d_rx;
auto_next <= AUTO_DAT2;
end if;
when AUTO_DAT2 =>
auto_cmd_valid <= '1';
auto_cmd_mode <= auto_cfg_mode & "0";
auto_d_clks <= mode_to_clks(auto_cfg_mode);
if cmd_ready = '1' then
auto_next <= AUTO_DAT2_DATA;
end if;
when AUTO_DAT2_DATA =>
if d_ack = '1' then
auto_data_next(23 downto 16) <= d_rx;
auto_next <= AUTO_DAT3;
end if;
when AUTO_DAT3 =>
auto_cmd_valid <= '1';
auto_cmd_mode <= auto_cfg_mode & "0";
auto_d_clks <= mode_to_clks(auto_cfg_mode);
if cmd_ready = '1' then
auto_next <= AUTO_DAT3_DATA;
end if;
when AUTO_DAT3_DATA =>
if d_ack = '1' then
auto_data_next(31 downto 24) <= d_rx;
auto_next <= AUTO_SEND_ACK;
auto_latch_adr <= '1';
end if;
when AUTO_SEND_ACK =>
auto_ack <= '1';
auto_cnt_next <= to_integer(unsigned(auto_cfg_cstout));
auto_next <= AUTO_WAIT_REQ;
when AUTO_WAIT_REQ =>
-- Incoming bus request we can take ? Otherwise do we need
-- to cancel the wait ?
if wb_map_valid = '1' and req_is_next and wb_req.we = '0' then
auto_next <= AUTO_DAT0;
elsif wb_map_valid = '1' or wb_reg_valid = '1' or auto_cnt = 0 then
-- This means we can drop the CS right on the next clock.
-- We make the assumption here that the two cycles min
-- spent in AUTO_SEND_ACK and AUTO_WAIT_REQ are long enough
-- to deassert CS. If that doesn't hold true in the future,
-- add another state.
auto_cnt_next <= CS_DELAY_RECOVERY;
auto_next <= AUTO_RECOVERY;
end if;
when AUTO_RECOVERY =>
if auto_cnt = 0 then
auto_next <= AUTO_IDLE;
end if;
end case;
end if;
end process;
-- Register write sync machine
reg_write: process(clk)
function reg_wr(r : in std_ulogic_vector;
w : in wb_io_master_out) return std_ulogic_vector is
variable b : natural range 0 to 31;
variable t : std_ulogic_vector(r'range);
begin
t := r;
for i in r'range loop
if w.sel(i/8) = '1' then
t(i) := w.dat(i);
end if;
end loop;
return t;
end function;
begin
if rising_edge(clk) then
-- Reset auto-clear
if rst = '1' or ctrl_reset = '1' then
ctrl_reset <= '0';
ctrl_cs <= '0';
ctrl_rsrv1 <= '0';
ctrl_rsrv2 <= '0';
ctrl_div <= std_ulogic_vector(to_unsigned(DEF_CLK_DIV, 8));
if DEF_QUAD_READ then
auto_cfg_cmd <= x"6b";
auto_cfg_dummies <= "111";
auto_cfg_mode <= SPI_AUTO_CFG_MODE_QUAD;
else
auto_cfg_cmd <= x"03";
auto_cfg_dummies <= "000";
auto_cfg_mode <= SPI_AUTO_CFG_MODE_SINGLE;
end if;
auto_cfg_addr4 <= '0';
auto_cfg_rsrv1 <= '0';
auto_cfg_rsrv2 <= '0';
auto_cfg_div <= std_ulogic_vector(to_unsigned(DEF_CLK_DIV, 8));
auto_cfg_cstout <= std_ulogic_vector(to_unsigned(DEFAULT_CS_TIMEOUT, 6));
end if;
if wb_reg_valid = '1' and wb_req.we = '1' and auto_state = AUTO_IDLE and bus_idle = '1' then
if wb_reg = SPI_REG_CTRL then
ctrl_reg <= reg_wr(ctrl_reg, wb_req);
end if;
if wb_reg = SPI_REG_AUTO_CFG then
auto_cfg_reg <= reg_wr(auto_cfg_reg, wb_req);
end if;
end if;
end if;
end process;
end architecture;