execute1: Add a pipelined 33-bit signed multiplier
This adds a pipelined 33-bit by 33-bit signed multiplier with one cycle latency to the execute pipeline, and uses it for the mullw, mulhw and mulhwu instructions. Because it has one cycle of latency we can assume that its result is available in the second execute stage without needing to add busy logic to the second stage. This adds both a generic version of the multiplier and a Xilinx-specific version using four DSP slices of the Artix-7. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>pull/382/head
parent
58e799b350
commit
595a758400
@ -0,0 +1,55 @@
|
||||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
library work;
|
||||
use work.common.all;
|
||||
|
||||
-- Signed 33b x 33b multiplier giving 64-bit product, with no addend,
|
||||
-- with fixed 1-cycle latency.
|
||||
|
||||
entity multiply_32s is
|
||||
port (
|
||||
clk : in std_logic;
|
||||
stall : in std_ulogic;
|
||||
|
||||
m_in : in MultiplyInputType;
|
||||
m_out : out MultiplyOutputType
|
||||
);
|
||||
end entity multiply_32s;
|
||||
|
||||
architecture behaviour of multiply_32s is
|
||||
type reg_type is record
|
||||
valid : std_ulogic;
|
||||
data : signed(65 downto 0);
|
||||
end record;
|
||||
constant reg_type_init : reg_type := (valid => '0', data => (others => '0'));
|
||||
|
||||
signal r, rin : reg_type := reg_type_init;
|
||||
begin
|
||||
multiply_0: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) and stall = '0' then
|
||||
r <= rin;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
multiply_1: process(all)
|
||||
variable v : reg_type;
|
||||
variable d : std_ulogic_vector(63 downto 0);
|
||||
variable ov : std_ulogic;
|
||||
begin
|
||||
v.valid := m_in.valid;
|
||||
v.data := signed(m_in.data1(32 downto 0)) * signed(m_in.data2(32 downto 0));
|
||||
|
||||
d := std_ulogic_vector(r.data(63 downto 0));
|
||||
|
||||
ov := (or d(63 downto 31)) and not (and d(63 downto 31));
|
||||
|
||||
m_out.result <= 64x"0" & d;
|
||||
m_out.overflow <= ov;
|
||||
m_out.valid <= r.valid;
|
||||
|
||||
rin <= v;
|
||||
end process;
|
||||
end architecture behaviour;
|
@ -0,0 +1,293 @@
|
||||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
library work;
|
||||
use work.common.all;
|
||||
|
||||
library unisim;
|
||||
use unisim.vcomponents.all;
|
||||
|
||||
-- Signed 33b x 33b multiplier giving 64-bit product, with no addend.
|
||||
|
||||
entity multiply_32s is
|
||||
port (
|
||||
clk : in std_logic;
|
||||
stall : in std_ulogic;
|
||||
|
||||
m_in : in MultiplyInputType;
|
||||
m_out : out MultiplyOutputType
|
||||
);
|
||||
end entity multiply_32s;
|
||||
|
||||
architecture behaviour of multiply_32s is
|
||||
signal clocken : std_ulogic;
|
||||
signal data1 : std_ulogic_vector(52 downto 0);
|
||||
signal data2 : std_ulogic_vector(34 downto 0);
|
||||
signal m00_p, m01_p : std_ulogic_vector(47 downto 0);
|
||||
signal m00_pc : std_ulogic_vector(47 downto 0);
|
||||
signal m10_p, m11_p : std_ulogic_vector(47 downto 0);
|
||||
signal m10_pc : std_ulogic_vector(47 downto 0);
|
||||
signal p0_pat, p0_patb : std_ulogic;
|
||||
signal p1_pat, p1_patb : std_ulogic;
|
||||
signal product_lo : std_ulogic_vector(22 downto 0);
|
||||
|
||||
begin
|
||||
-- sign extend
|
||||
data1 <= std_ulogic_vector(resize(signed(m_in.data1(32 downto 0)), 53));
|
||||
data2 <= std_ulogic_vector(resize(signed(m_in.data2(32 downto 0)), 35));
|
||||
|
||||
clocken <= m_in.valid and not stall;
|
||||
|
||||
m00: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
CREG => 0,
|
||||
INMODEREG => 0,
|
||||
MREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0
|
||||
)
|
||||
port map (
|
||||
A => "0000000" & data1(22 downto 0),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => '0' & data2(16 downto 0),
|
||||
BCIN => (others => '0'),
|
||||
C => (others => '0'),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '0',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '0',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0110101",
|
||||
P => m00_p,
|
||||
PCIN => (others => '0'),
|
||||
PCOUT => m00_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m01: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
CREG => 0,
|
||||
INMODEREG => 0,
|
||||
MREG => 0,
|
||||
OPMODEREG => 0,
|
||||
PREG => 0
|
||||
)
|
||||
port map (
|
||||
A => "0000000" & data1(22 downto 0),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => data2(34 downto 17),
|
||||
BCIN => (others => '0'),
|
||||
C => (others => '0'),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '0',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => '0',
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "1010101",
|
||||
P => m01_p,
|
||||
PCIN => m00_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m10: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
CREG => 1,
|
||||
INMODEREG => 0,
|
||||
MASK => x"fffffffe00ff",
|
||||
OPMODEREG => 0,
|
||||
PREG => 0,
|
||||
USE_PATTERN_DETECT => "PATDET"
|
||||
)
|
||||
port map (
|
||||
A => data1(52 downto 23),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => '0' & data2(16 downto 0),
|
||||
BCIN => (others => '0'),
|
||||
C => std_ulogic_vector(resize(signed(m01_p(38 downto 6)), 48)),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => clocken,
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => clocken,
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "0110101",
|
||||
P => m10_p,
|
||||
PATTERNDETECT => p0_pat,
|
||||
PATTERNBDETECT => p0_patb,
|
||||
PCIN => (others => '0'),
|
||||
PCOUT => m10_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m11: DSP48E1
|
||||
generic map (
|
||||
ACASCREG => 0,
|
||||
ALUMODEREG => 0,
|
||||
AREG => 0,
|
||||
BCASCREG => 0,
|
||||
BREG => 0,
|
||||
CARRYINREG => 0,
|
||||
CARRYINSELREG => 0,
|
||||
CREG => 0,
|
||||
INMODEREG => 0,
|
||||
MASK => x"fffffc000000",
|
||||
OPMODEREG => 0,
|
||||
PREG => 0,
|
||||
USE_PATTERN_DETECT => "PATDET"
|
||||
)
|
||||
port map (
|
||||
A => data1(52 downto 23),
|
||||
ACIN => (others => '0'),
|
||||
ALUMODE => "0000",
|
||||
B => data2(34 downto 17),
|
||||
BCIN => (others => '0'),
|
||||
C => (others => '0'),
|
||||
CARRYCASCIN => '0',
|
||||
CARRYIN => '0',
|
||||
CARRYINSEL => "000",
|
||||
CEA1 => '0',
|
||||
CEA2 => '0',
|
||||
CEAD => '0',
|
||||
CEALUMODE => '0',
|
||||
CEB1 => '0',
|
||||
CEB2 => '0',
|
||||
CEC => '0',
|
||||
CECARRYIN => '0',
|
||||
CECTRL => '0',
|
||||
CED => '0',
|
||||
CEINMODE => '0',
|
||||
CEM => clocken,
|
||||
CEP => '0',
|
||||
CLK => clk,
|
||||
D => (others => '0'),
|
||||
INMODE => "00000",
|
||||
MULTSIGNIN => '0',
|
||||
OPMODE => "1010101",
|
||||
P => m11_p,
|
||||
PATTERNDETECT => p1_pat,
|
||||
PATTERNBDETECT => p1_patb,
|
||||
PCIN => m10_pc,
|
||||
RSTA => '0',
|
||||
RSTALLCARRYIN => '0',
|
||||
RSTALUMODE => '0',
|
||||
RSTB => '0',
|
||||
RSTC => '0',
|
||||
RSTCTRL => '0',
|
||||
RSTD => '0',
|
||||
RSTINMODE => '0',
|
||||
RSTM => '0',
|
||||
RSTP => '0'
|
||||
);
|
||||
|
||||
m_out.result(127 downto 64) <= (others => '0');
|
||||
m_out.result(63 downto 40) <= m11_p(23 downto 0);
|
||||
m_out.result(39 downto 23) <= m10_p(16 downto 0);
|
||||
m_out.result(22 downto 0) <= product_lo;
|
||||
|
||||
m_out.overflow <= not ((p0_pat and p1_pat) or (p0_patb and p1_patb));
|
||||
|
||||
process(clk)
|
||||
begin
|
||||
if rising_edge(clk) and stall = '0' then
|
||||
m_out.valid <= m_in.valid;
|
||||
product_lo <= m01_p(5 downto 0) & m00_p(16 downto 0);
|
||||
end if;
|
||||
end process;
|
||||
|
||||
end architecture behaviour;
|
Loading…
Reference in New Issue