library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;

entity cache_ram is
        ROW_BITS : integer := 16;
        WIDTH    : integer := 64;
        TRACE    : boolean := false;
        ADD_BUF  : boolean := false

        clk     : in  std_logic;
        rd_en   : in  std_logic;
        rd_addr : in  std_logic_vector(ROW_BITS - 1 downto 0);
        rd_data : out std_logic_vector(WIDTH - 1 downto 0);
        wr_sel  : in  std_logic_vector(WIDTH/8 - 1 downto 0);
        wr_addr : in  std_logic_vector(ROW_BITS - 1 downto 0);
        wr_data : in  std_logic_vector(WIDTH - 1 downto 0)

end cache_ram;

architecture rtl of cache_ram is
    constant SIZE : integer := 2**ROW_BITS;

    type ram_type is array (0 to SIZE - 1) of std_logic_vector(WIDTH - 1 downto 0);
    signal ram : ram_type;
    attribute ram_style : string;
    attribute ram_style of ram : signal is "block";

    signal rd_data0 : std_logic_vector(WIDTH - 1 downto 0);

        variable lbit : integer range 0 to WIDTH - 1;
        variable mbit : integer range 0 to WIDTH - 1;
        variable widx : integer range 0 to SIZE - 1;
        constant sel0 : std_logic_vector(WIDTH/8 - 1 downto 0)
            := (others => '0');
        if rising_edge(clk) then
            if TRACE then
                if wr_sel /= sel0 then
                    report "write a:" & to_hstring(wr_addr) &
                        " sel:" & to_hstring(wr_sel) &
                        " dat:" & to_hstring(wr_data);
                end if;
            end if;
            for i in 0 to WIDTH/8-1 loop
                lbit := i * 8;
                mbit := lbit + 7;
                widx := to_integer(unsigned(wr_addr));
                if wr_sel(i) = '1' then
                    ram(widx)(mbit downto lbit) <= wr_data(mbit downto lbit);
                end if;
            end loop;
            if rd_en = '1' then
                rd_data0 <= ram(to_integer(unsigned(rd_addr)));
                if TRACE then
                    report "read a:" & to_hstring(rd_addr) &
                        " dat:" & to_hstring(ram(to_integer(unsigned(rd_addr))));
                end if;
            end if;
        end if;
    end process;

    buf: if ADD_BUF generate
            if rising_edge(clk) then
                rd_data <= rd_data0;
            end if;
        end process;
    end generate;

    nobuf: if not ADD_BUF generate
        rd_data <= rd_data0;
    end generate;
