Implement cfuged, pdepd and pextd
This implements the cfuged, pdepd and pextd instructions in a new unit called bit_sorter (so called because cfuged and pextd can be viewed as sorting the bits of the mask). The cnt* instructions and the popcnt* instructions now use the same OP_COUNTB insn_type so as to free up an insn_type value to use for the new instructions. The new instructions are implemented using a slow and simple algorithm that takes 64 cycles to compute the result. The ex1 stage is stalled while this happens, as for a 64-bit multiply, or for a divide when there is no FPU. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>pull/434/head
parent
d7d7a3afd4
commit
fa9df33f7e
@ -0,0 +1,102 @@
|
|||||||
|
-- Implements instructions that involve sorting bits,
|
||||||
|
-- that is, cfuged, pextd and pdepd.
|
||||||
|
--
|
||||||
|
-- cfuged: Sort the bits in the mask in RB into 0s at the left, 1s at the right
|
||||||
|
-- and move the bits in RS in the same fashion to give the result
|
||||||
|
-- pextd: Like cfuged but the only use the bits of RS where the
|
||||||
|
-- corresponding bit in RB is 1
|
||||||
|
-- pdepd: Inverse of pextd; take the low-order bits of RS and spread them out
|
||||||
|
-- to the bit positions which have a 1 in RB
|
||||||
|
|
||||||
|
-- NB opc is bits 7-6 of the instruction:
|
||||||
|
-- 00 = pdepd, 01 = pextd, 10 = cfuged
|
||||||
|
|
||||||
|
library ieee;
|
||||||
|
use ieee.std_logic_1164.all;
|
||||||
|
use ieee.numeric_std.all;
|
||||||
|
|
||||||
|
library work;
|
||||||
|
use work.helpers.all;
|
||||||
|
|
||||||
|
entity bit_sorter is
|
||||||
|
port (
|
||||||
|
clk : in std_ulogic;
|
||||||
|
rst : in std_ulogic;
|
||||||
|
rs : in std_ulogic_vector(63 downto 0);
|
||||||
|
rb : in std_ulogic_vector(63 downto 0);
|
||||||
|
go : in std_ulogic;
|
||||||
|
opc : in std_ulogic_vector(1 downto 0);
|
||||||
|
done : out std_ulogic;
|
||||||
|
result : out std_ulogic_vector(63 downto 0)
|
||||||
|
);
|
||||||
|
end entity bit_sorter;
|
||||||
|
|
||||||
|
architecture behaviour of bit_sorter is
|
||||||
|
|
||||||
|
signal val : std_ulogic_vector(63 downto 0);
|
||||||
|
signal st : std_ulogic;
|
||||||
|
signal sd : std_ulogic;
|
||||||
|
signal opr : std_ulogic_vector(1 downto 0);
|
||||||
|
signal bc : unsigned(5 downto 0);
|
||||||
|
signal jl : unsigned(5 downto 0);
|
||||||
|
signal jr : unsigned(5 downto 0);
|
||||||
|
signal sr_ml : std_ulogic_vector(63 downto 0);
|
||||||
|
signal sr_mr : std_ulogic_vector(63 downto 0);
|
||||||
|
signal sr_vl : std_ulogic_vector(63 downto 0);
|
||||||
|
signal sr_vr : std_ulogic_vector(63 downto 0);
|
||||||
|
|
||||||
|
begin
|
||||||
|
bsort_r: process(clk)
|
||||||
|
begin
|
||||||
|
if rising_edge(clk) then
|
||||||
|
sd <= '0';
|
||||||
|
if rst = '1' then
|
||||||
|
st <= '0';
|
||||||
|
opr <= "00";
|
||||||
|
val <= (others => '0');
|
||||||
|
elsif go = '1' then
|
||||||
|
st <= '1';
|
||||||
|
sr_ml <= rb;
|
||||||
|
sr_mr <= rb;
|
||||||
|
sr_vl <= rs;
|
||||||
|
sr_vr <= rs;
|
||||||
|
opr <= opc;
|
||||||
|
val <= (others => '0');
|
||||||
|
bc <= to_unsigned(0, 6);
|
||||||
|
jl <= to_unsigned(63, 6);
|
||||||
|
jr <= to_unsigned(0, 6);
|
||||||
|
elsif st = '1' then
|
||||||
|
if bc = 6x"3f" then
|
||||||
|
st <= '0';
|
||||||
|
sd <= '1';
|
||||||
|
end if;
|
||||||
|
bc <= bc + 1;
|
||||||
|
if sr_ml(63) = '0' and opr(1) = '1' then
|
||||||
|
-- cfuged
|
||||||
|
val(to_integer(jl)) <= sr_vl(63);
|
||||||
|
jl <= jl - 1;
|
||||||
|
end if;
|
||||||
|
if sr_mr(0) = '1' then
|
||||||
|
if opr = "00" then
|
||||||
|
-- pdepd
|
||||||
|
val(to_integer(bc)) <= sr_vr(0);
|
||||||
|
else
|
||||||
|
-- cfuged or pextd
|
||||||
|
val(to_integer(jr)) <= sr_vr(0);
|
||||||
|
end if;
|
||||||
|
jr <= jr + 1;
|
||||||
|
end if;
|
||||||
|
sr_vl <= sr_vl(62 downto 0) & '0';
|
||||||
|
if opr /= "00" or sr_mr(0) = '1' then
|
||||||
|
sr_vr <= '0' & sr_vr(63 downto 1);
|
||||||
|
end if;
|
||||||
|
sr_ml <= sr_ml(62 downto 0) & '0';
|
||||||
|
sr_mr <= '0' & sr_mr(63 downto 1);
|
||||||
|
end if;
|
||||||
|
end if;
|
||||||
|
end process;
|
||||||
|
|
||||||
|
done <= sd;
|
||||||
|
result <= val;
|
||||||
|
|
||||||
|
end behaviour;
|
Loading…
Reference in New Issue