Implement cfuged, pdepd and pextd

This implements the cfuged, pdepd and pextd instructions in a new unit
called bit_sorter (so called because cfuged and pextd can be viewed as
sorting the bits of the mask).

The cnt* instructions and the popcnt* instructions now use the same
OP_COUNTB insn_type so as to free up an insn_type value to use for the
new instructions.

The new instructions are implemented using a slow and simple algorithm
that takes 64 cycles to compute the result.  The ex1 stage is stalled
while this happens, as for a 64-bit multiply, or for a divide when
there is no FPU.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/434/head
Paul Mackerras 1 year ago
parent d7d7a3afd4
commit fa9df33f7e

@ -74,7 +74,7 @@ core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \ cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
logical.vhdl countbits.vhdl multiply.vhdl multiply-32s.vhdl divider.vhdl \ logical.vhdl countbits.vhdl multiply.vhdl multiply-32s.vhdl divider.vhdl \
execute1.vhdl loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl \ execute1.vhdl loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl \
core_debug.vhdl core.vhdl fpu.vhdl pmu.vhdl core_debug.vhdl core.vhdl fpu.vhdl pmu.vhdl bitsort.vhdl


soc_files = wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \ soc_files = wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \
wishbone_debug_master.vhdl xics.vhdl syscon.vhdl gpio.vhdl soc.vhdl \ wishbone_debug_master.vhdl xics.vhdl syscon.vhdl gpio.vhdl soc.vhdl \

@ -0,0 +1,102 @@
-- Implements instructions that involve sorting bits,
-- that is, cfuged, pextd and pdepd.
--
-- cfuged: Sort the bits in the mask in RB into 0s at the left, 1s at the right
-- and move the bits in RS in the same fashion to give the result
-- pextd: Like cfuged but the only use the bits of RS where the
-- corresponding bit in RB is 1
-- pdepd: Inverse of pextd; take the low-order bits of RS and spread them out
-- to the bit positions which have a 1 in RB

-- NB opc is bits 7-6 of the instruction:
-- 00 = pdepd, 01 = pextd, 10 = cfuged

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.helpers.all;

entity bit_sorter is
port (
clk : in std_ulogic;
rst : in std_ulogic;
rs : in std_ulogic_vector(63 downto 0);
rb : in std_ulogic_vector(63 downto 0);
go : in std_ulogic;
opc : in std_ulogic_vector(1 downto 0);
done : out std_ulogic;
result : out std_ulogic_vector(63 downto 0)
);
end entity bit_sorter;

architecture behaviour of bit_sorter is

signal val : std_ulogic_vector(63 downto 0);
signal st : std_ulogic;
signal sd : std_ulogic;
signal opr : std_ulogic_vector(1 downto 0);
signal bc : unsigned(5 downto 0);
signal jl : unsigned(5 downto 0);
signal jr : unsigned(5 downto 0);
signal sr_ml : std_ulogic_vector(63 downto 0);
signal sr_mr : std_ulogic_vector(63 downto 0);
signal sr_vl : std_ulogic_vector(63 downto 0);
signal sr_vr : std_ulogic_vector(63 downto 0);

begin
bsort_r: process(clk)
begin
if rising_edge(clk) then
sd <= '0';
if rst = '1' then
st <= '0';
opr <= "00";
val <= (others => '0');
elsif go = '1' then
st <= '1';
sr_ml <= rb;
sr_mr <= rb;
sr_vl <= rs;
sr_vr <= rs;
opr <= opc;
val <= (others => '0');
bc <= to_unsigned(0, 6);
jl <= to_unsigned(63, 6);
jr <= to_unsigned(0, 6);
elsif st = '1' then
if bc = 6x"3f" then
st <= '0';
sd <= '1';
end if;
bc <= bc + 1;
if sr_ml(63) = '0' and opr(1) = '1' then
-- cfuged
val(to_integer(jl)) <= sr_vl(63);
jl <= jl - 1;
end if;
if sr_mr(0) = '1' then
if opr = "00" then
-- pdepd
val(to_integer(bc)) <= sr_vr(0);
else
-- cfuged or pextd
val(to_integer(jr)) <= sr_vr(0);
end if;
jr <= jr + 1;
end if;
sr_vl <= sr_vl(62 downto 0) & '0';
if opr /= "00" or sr_mr(0) = '1' then
sr_vr <= '0' & sr_vr(63 downto 1);
end if;
sr_ml <= sr_ml(62 downto 0) & '0';
sr_mr <= '0' & sr_mr(63 downto 1);
end if;
end if;
end process;

done <= sd;
result <= val;

end behaviour;

@ -106,6 +106,7 @@ architecture behaviour of decode1 is
INSN_brd => (ALU, NONE, OP_BREV, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_brd => (ALU, NONE, OP_BREV, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cbcdtd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cbcdtd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cdtbcd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cdtbcd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cfuged => (ALU, NONE, OP_BSORT, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cmp => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), INSN_cmp => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
INSN_cmpb => (ALU, NONE, OP_CMPB, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cmpb => (ALU, NONE, OP_CMPB, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cmpeqb => (ALU, NONE, OP_CMPEQB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cmpeqb => (ALU, NONE, OP_CMPEQB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -113,10 +114,10 @@ architecture behaviour of decode1 is
INSN_cmpl => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cmpl => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cmpli => (ALU, NONE, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cmpli => (ALU, NONE, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cmprb => (ALU, NONE, OP_CMPRB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cmprb => (ALU, NONE, OP_CMPRB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cntlzd => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), INSN_cntlzd => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_cntlzw => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), INSN_cntlzw => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_cnttzd => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), INSN_cnttzd => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_cnttzw => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), INSN_cnttzw => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_crand => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_crand => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_crandc => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_crandc => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_creqv => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_creqv => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -281,6 +282,8 @@ architecture behaviour of decode1 is
INSN_ori => (ALU, NONE, OP_LOGIC, NONE, CONST_UI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), INSN_ori => (ALU, NONE, OP_LOGIC, NONE, CONST_UI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
INSN_oris => (ALU, NONE, OP_LOGIC, NONE, CONST_UI_HI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), INSN_oris => (ALU, NONE, OP_LOGIC, NONE, CONST_UI_HI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
INSN_paddi => (ALU, NONE, OP_ADD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_paddi => (ALU, NONE, OP_ADD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pdepd => (ALU, NONE, OP_BSORT, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pextd => (ALU, NONE, OP_BSORT, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plbz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_plbz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pld => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_pld => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plfd => (LDST, FPU, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_plfd => (LDST, FPU, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -296,9 +299,9 @@ architecture behaviour of decode1 is
INSN_pstfs => (LDST, FPU, OP_STORE, RA0_OR_CIA, CONST_PSI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), INSN_pstfs => (LDST, FPU, OP_STORE, RA0_OR_CIA, CONST_PSI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
INSN_psth => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_psth => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pstw => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_pstw => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntb => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_popcntb => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntd => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_popcntd => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntw => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_popcntw => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_prtyd => (ALU, NONE, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_prtyd => (ALU, NONE, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_prtyw => (ALU, NONE, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_prtyw => (ALU, NONE, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_rfid => (ALU, NONE, OP_RFID, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_rfid => (ALU, NONE, OP_RFID, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),

@ -232,12 +232,13 @@ architecture behaviour of decode2 is
); );


constant subresult_select : mux_select_array_t := ( constant subresult_select : mux_select_array_t := (
OP_MUL_L64 => "000", -- muldiv_result OP_MUL_L64 => "000", -- multicyc_result
OP_MUL_H64 => "001", OP_MUL_H64 => "010",
OP_MUL_H32 => "010", OP_MUL_H32 => "001",
OP_DIV => "011", OP_DIV => "101",
OP_DIVE => "011", OP_DIVE => "101",
OP_MOD => "011", OP_MOD => "101",
OP_BSORT => "100",
OP_ADDG6S => "001", -- misc_result OP_ADDG6S => "001", -- misc_result
OP_ISEL => "010", OP_ISEL => "010",
OP_DARN => "011", OP_DARN => "011",

@ -6,7 +6,7 @@ package decode_types is
OP_ATTN, OP_B, OP_BC, OP_BCREG, OP_ATTN, OP_B, OP_BC, OP_BCREG,
OP_BCD, OP_BPERM, OP_BREV, OP_BCD, OP_BPERM, OP_BREV,
OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_CNTZ, OP_CROP, OP_COUNTB, OP_CROP,
OP_DARN, OP_DCBF, OP_DCBST, OP_XCBT, OP_DCBTST, OP_DARN, OP_DCBF, OP_DCBST, OP_XCBT, OP_DCBTST,
OP_DCBZ, OP_ICBI, OP_DCBZ, OP_ICBI,
OP_FP_CMP, OP_FP_ARITH, OP_FP_MOVE, OP_FP_MISC, OP_FP_CMP, OP_FP_ARITH, OP_FP_MOVE, OP_FP_MISC,
@ -18,7 +18,8 @@ package decode_types is
OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR,
OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64, OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_MUL_H64, OP_MUL_H32,
OP_POPCNT, OP_PRTY, OP_RFID, OP_BSORT,
OP_PRTY, OP_RFID,
OP_RLC, OP_RLCL, OP_RLCR, OP_SC, OP_SETB, OP_RLC, OP_RLCL, OP_RLCR, OP_SC, OP_SETB,
OP_SHL, OP_SHR, OP_SHL, OP_SHR,
OP_SYNC, OP_TLBIE, OP_TRAP, OP_SYNC, OP_TLBIE, OP_TRAP,
@ -179,11 +180,12 @@ package decode_types is
INSN_and, INSN_and,
INSN_andc, INSN_andc,
INSN_bperm, INSN_bperm,
INSN_cfuged,
INSN_cmp, INSN_cmp,
INSN_cmpb, INSN_cmpb,
INSN_cmpeqb, INSN_cmpeqb,
INSN_cmpl, INSN_cmpl, -- 140
INSN_cmprb, -- 140 INSN_cmprb,
INSN_dcbf, INSN_dcbf,
INSN_dcbst, INSN_dcbst,
INSN_dcbt, INSN_dcbt,
@ -192,8 +194,8 @@ package decode_types is
INSN_divd, INSN_divd,
INSN_divdu, INSN_divdu,
INSN_divde, INSN_divde,
INSN_divdeu, INSN_divdeu, -- 150
INSN_divw, -- 150 INSN_divw,
INSN_divwu, INSN_divwu,
INSN_divwe, INSN_divwe,
INSN_divweu, INSN_divweu,
@ -202,8 +204,8 @@ package decode_types is
INSN_icbt, INSN_icbt,
INSN_isel, INSN_isel,
INSN_lbarx, INSN_lbarx,
INSN_lbzcix, INSN_lbzcix, -- 160
INSN_lbzux, -- 160 INSN_lbzux,
INSN_lbzx, INSN_lbzx,
INSN_ldarx, INSN_ldarx,
INSN_ldbrx, INSN_ldbrx,
@ -212,8 +214,8 @@ package decode_types is
INSN_ldux, INSN_ldux,
INSN_lharx, INSN_lharx,
INSN_lhax, INSN_lhax,
INSN_lhaux, INSN_lhaux, -- 170
INSN_lhbrx, -- 170 INSN_lhbrx,
INSN_lhzcix, INSN_lhzcix,
INSN_lhzx, INSN_lhzx,
INSN_lhzux, INSN_lhzux,
@ -222,8 +224,8 @@ package decode_types is
INSN_lwaux, INSN_lwaux,
INSN_lwbrx, INSN_lwbrx,
INSN_lwzcix, INSN_lwzcix,
INSN_lwzx, INSN_lwzx, -- 180
INSN_lwzux, -- 180 INSN_lwzux,
INSN_modsd, INSN_modsd,
INSN_modsw, INSN_modsw,
INSN_moduw, INSN_moduw,
@ -232,51 +234,54 @@ package decode_types is
INSN_mulhwu, INSN_mulhwu,
INSN_mulhd, INSN_mulhd,
INSN_mulhdu, INSN_mulhdu,
INSN_mullw, INSN_mullw, -- 190
INSN_mulld, -- 190 INSN_mulld,
INSN_nand, INSN_nand,
INSN_nor, INSN_nor,
INSN_or, INSN_or,
INSN_orc, INSN_orc,
INSN_pdepd,
INSN_pextd,
INSN_rldcl, INSN_rldcl,
INSN_rldcr, INSN_rldcr,
INSN_rlwnm, INSN_rlwnm, -- 200
INSN_slw, INSN_slw,
INSN_sld, INSN_sld,
INSN_sraw, -- 200 INSN_sraw,
INSN_srad, INSN_srad,
INSN_srw, INSN_srw,
INSN_srd, INSN_srd,
INSN_stbcix, INSN_stbcix,
INSN_stbcx, INSN_stbcx,
INSN_stbx, INSN_stbx,
INSN_stbux, INSN_stbux, -- 210
INSN_stdbrx, INSN_stdbrx,
INSN_stdcix, INSN_stdcix,
INSN_stdcx, -- 210 INSN_stdcx,
INSN_stdx, INSN_stdx,
INSN_stdux, INSN_stdux,
INSN_sthbrx, INSN_sthbrx,
INSN_sthcix, INSN_sthcix,
INSN_sthcx, INSN_sthcx,
INSN_sthx, INSN_sthx,
INSN_sthux, INSN_sthux, -- 220
INSN_stwbrx, INSN_stwbrx,
INSN_stwcix, INSN_stwcix,
INSN_stwcx, -- 220 INSN_stwcx,
INSN_stwx, INSN_stwx,
INSN_stwux, INSN_stwux,
INSN_subf, INSN_subf,
INSN_subfc, INSN_subfc,
INSN_subfe, INSN_subfe,
INSN_td, INSN_td,
INSN_tlbie, INSN_tlbie, -- 230
INSN_tlbiel, INSN_tlbiel,
INSN_tw, INSN_tw,
INSN_xor, -- 230 INSN_xor,


-- pad to 232 to simplify comparison logic -- pad to 240 to simplify comparison logic
INSN_231, INSN_234, INSN_235,
INSN_236, INSN_237, INSN_238, INSN_239,


-- The following instructions have a third input addressed by RC -- The following instructions have a third input addressed by RC
INSN_maddld, INSN_maddld,
@ -284,9 +289,7 @@ package decode_types is
INSN_maddhdu, INSN_maddhdu,


-- pad to 256 to simplify comparison logic -- pad to 256 to simplify comparison logic
INSN_235, INSN_243,
INSN_236, INSN_237, INSN_238, INSN_239,
INSN_240, INSN_241, INSN_242, INSN_243,
INSN_244, INSN_245, INSN_246, INSN_247, INSN_244, INSN_245, INSN_246, INSN_247,
INSN_248, INSN_249, INSN_250, INSN_251, INSN_248, INSN_249, INSN_250, INSN_251,
INSN_252, INSN_253, INSN_254, INSN_255, INSN_252, INSN_253, INSN_254, INSN_255,

@ -113,6 +113,7 @@ architecture behaviour of execute1 is
direct_branch : std_ulogic; direct_branch : std_ulogic;
start_mul : std_ulogic; start_mul : std_ulogic;
start_div : std_ulogic; start_div : std_ulogic;
start_bsort : std_ulogic;
do_trace : std_ulogic; do_trace : std_ulogic;
fp_intr : std_ulogic; fp_intr : std_ulogic;
res2_sel : std_ulogic_vector(1 downto 0); res2_sel : std_ulogic_vector(1 downto 0);
@ -134,7 +135,7 @@ architecture behaviour of execute1 is
prev_op : insn_type_t; prev_op : insn_type_t;
prev_prefixed : std_ulogic; prev_prefixed : std_ulogic;
oe : std_ulogic; oe : std_ulogic;
mul_select : std_ulogic_vector(1 downto 0); mul_select : std_ulogic_vector(2 downto 0);
res2_sel : std_ulogic_vector(1 downto 0); res2_sel : std_ulogic_vector(1 downto 0);
spr_select : spr_id; spr_select : spr_id;
pmu_spr_num : std_ulogic_vector(4 downto 0); pmu_spr_num : std_ulogic_vector(4 downto 0);
@ -144,6 +145,7 @@ architecture behaviour of execute1 is
mul_in_progress : std_ulogic; mul_in_progress : std_ulogic;
mul_finish : std_ulogic; mul_finish : std_ulogic;
div_in_progress : std_ulogic; div_in_progress : std_ulogic;
bsort_in_progress : std_ulogic;
no_instr_avail : std_ulogic; no_instr_avail : std_ulogic;
instr_dispatch : std_ulogic; instr_dispatch : std_ulogic;
ext_interrupt : std_ulogic; ext_interrupt : std_ulogic;
@ -164,10 +166,11 @@ architecture behaviour of execute1 is
busy => '0', busy => '0',
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
prev_prefixed => '0', prev_prefixed => '0',
oe => '0', mul_select => "00", res2_sel => "00", oe => '0', mul_select => "000", res2_sel => "00",
spr_select => spr_id_init, pmu_spr_num => 5x"0", spr_select => spr_id_init, pmu_spr_num => 5x"0",
redir_to_next => '0', advance_nia => '0', lr_from_next => '0', redir_to_next => '0', advance_nia => '0', lr_from_next => '0',
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', mul_in_progress => '0', mul_finish => '0', div_in_progress => '0',
bsort_in_progress => '0',
no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0', no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0',
taken_branch_event => '0', br_mispredict => '0', taken_branch_event => '0', br_mispredict => '0',
msr => 64x"0", msr => 64x"0",
@ -209,7 +212,8 @@ architecture behaviour of execute1 is
signal alu_result: std_ulogic_vector(63 downto 0); signal alu_result: std_ulogic_vector(63 downto 0);
signal adder_result: std_ulogic_vector(63 downto 0); signal adder_result: std_ulogic_vector(63 downto 0);
signal misc_result: std_ulogic_vector(63 downto 0); signal misc_result: std_ulogic_vector(63 downto 0);
signal muldiv_result: std_ulogic_vector(63 downto 0); signal multicyc_result: std_ulogic_vector(63 downto 0);
signal bsort_result: std_ulogic_vector(63 downto 0);
signal spr_result: std_ulogic_vector(63 downto 0); signal spr_result: std_ulogic_vector(63 downto 0);
signal next_nia : std_ulogic_vector(63 downto 0); signal next_nia : std_ulogic_vector(63 downto 0);
signal s1_sel : std_ulogic_vector(2 downto 0); signal s1_sel : std_ulogic_vector(2 downto 0);
@ -234,6 +238,10 @@ architecture behaviour of execute1 is
signal x_to_divider: Execute1ToDividerType; signal x_to_divider: Execute1ToDividerType;
signal divider_to_x: DividerToExecute1Type := DividerToExecute1Init; signal divider_to_x: DividerToExecute1Type := DividerToExecute1Init;


-- bit-sort unit signals
signal bsort_start : std_ulogic;
signal bsort_done : std_ulogic;

-- random number generator signals -- random number generator signals
signal random_raw : std_ulogic_vector(63 downto 0); signal random_raw : std_ulogic_vector(63 downto 0);
signal random_cond : std_ulogic_vector(63 downto 0); signal random_cond : std_ulogic_vector(63 downto 0);
@ -493,6 +501,18 @@ begin
); );
end generate; end generate;


bsort_0: entity work.bit_sorter
port map (
clk => clk,
rst => rst,
rs => c_in,
rb => b_in,
go => bsort_start,
opc => e_in.insn(7 downto 6),
done => bsort_done,
result => bsort_result
);

random_0: entity work.random random_0: entity work.random
port map ( port map (
clk => clk, clk => clk,
@ -664,7 +684,7 @@ begin
adder_result when "000", adder_result when "000",
logical_result when "001", logical_result when "001",
rotator_result when "010", rotator_result when "010",
muldiv_result when "100", multicyc_result when "100",
ramspr_result when "101", ramspr_result when "101",
misc_result when others; misc_result when others;


@ -845,17 +865,21 @@ begin
x_to_mult_32s.subtract <= '0'; x_to_mult_32s.subtract <= '0';
x_to_mult_32s.addend <= (others => '0'); x_to_mult_32s.addend <= (others => '0');


case ex1.mul_select is if ex1.mul_select(2) = '0' then
case ex1.mul_select(1 downto 0) is
when "00" => when "00" =>
muldiv_result <= multiply_to_x.result(63 downto 0); multicyc_result <= multiply_to_x.result(63 downto 0);
when "01" => when "01" =>
muldiv_result <= multiply_to_x.result(127 downto 64); multicyc_result <= multiply_to_x.result(63 downto 32) &
when "10" =>
muldiv_result <= multiply_to_x.result(63 downto 32) &
multiply_to_x.result(63 downto 32); multiply_to_x.result(63 downto 32);
when others => when others =>
muldiv_result <= divider_to_x.write_reg_data; multicyc_result <= multiply_to_x.result(127 downto 64);
end case; end case;
elsif ex1.mul_select(0) = '1' and not HAS_FPU then
multicyc_result <= divider_to_x.write_reg_data;
else
multicyc_result <= bsort_result;
end if;


-- Compute misc_result -- Compute misc_result
case e_in.sub_select is case e_in.sub_select is
@ -1266,7 +1290,7 @@ begin
end if; end if;
v.do_trace := '0'; v.do_trace := '0';


when OP_CNTZ | OP_POPCNT => when OP_COUNTB =>
v.res2_sel := "01"; v.res2_sel := "01";
slow_op := '1'; slow_op := '1';
when OP_ISEL => when OP_ISEL =>
@ -1388,6 +1412,11 @@ begin
when OP_ICBI => when OP_ICBI =>
v.se.icache_inval := '1'; v.se.icache_inval := '1';


when OP_BSORT =>
v.start_bsort := '1';
slow_op := '1';
owait := '1';

when OP_MUL_L64 => when OP_MUL_L64 =>
if e_in.is_32bit = '1' then if e_in.is_32bit = '1' then
v.se.mult_32s := '1'; v.se.mult_32s := '1';
@ -1565,7 +1594,7 @@ begin
v.oe := e_in.oe; v.oe := e_in.oe;
v.spr_select := e_in.spr_select; v.spr_select := e_in.spr_select;
v.pmu_spr_num := e_in.insn(20 downto 16); v.pmu_spr_num := e_in.insn(20 downto 16);
v.mul_select := e_in.sub_select(1 downto 0); v.mul_select := e_in.sub_select;
v.se := side_effect_init; v.se := side_effect_init;
v.ramspr_wraddr := e_in.ramspr_wraddr; v.ramspr_wraddr := e_in.ramspr_wraddr;
v.lr_from_next := e_in.lr; v.lr_from_next := e_in.lr;
@ -1596,7 +1625,7 @@ begin
rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0'; rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0';
rot_sign_ext <= '1' when e_in.insn_type = OP_EXTSWSLI else '0'; rot_sign_ext <= '1' when e_in.insn_type = OP_EXTSWSLI else '0';


do_popcnt <= '1' when e_in.insn_type = OP_POPCNT else '0'; do_popcnt <= '1' when e_in.insn_type = OP_COUNTB and e_in.insn(7 downto 6) = "11" else '0';


if valid_in = '1' then if valid_in = '1' then
v.prev_op := e_in.insn_type; v.prev_op := e_in.insn_type;
@ -1671,6 +1700,7 @@ begin
v.mul_in_progress := actions.start_mul; v.mul_in_progress := actions.start_mul;
x_to_divider.valid <= actions.start_div; x_to_divider.valid <= actions.start_div;
v.div_in_progress := actions.start_div; v.div_in_progress := actions.start_div;
v.bsort_in_progress := actions.start_bsort;
v.br_mispredict := v.e.redirect and actions.direct_branch; v.br_mispredict := v.e.redirect and actions.direct_branch;
v.advance_nia := actions.advance_nia; v.advance_nia := actions.advance_nia;
v.redir_to_next := actions.redir_to_next; v.redir_to_next := actions.redir_to_next;
@ -1681,7 +1711,7 @@ begin
-- multiply is happening in order to stop following -- multiply is happening in order to stop following
-- instructions from using the wrong XER value -- instructions from using the wrong XER value
-- (and for simplicity in the OE=0 case). -- (and for simplicity in the OE=0 case).
v.busy := actions.start_div or actions.start_mul; v.busy := actions.start_div or actions.start_mul or actions.start_bsort;


-- instruction for other units, i.e. LDST -- instruction for other units, i.e. LDST
if e_in.unit = LDST then if e_in.unit = LDST then
@ -1692,6 +1722,7 @@ begin
end if; end if;
end if; end if;
is_scv := go and actions.se.scv_trap; is_scv := go and actions.se.scv_trap;
bsort_start <= go and actions.start_bsort;


if not HAS_FPU and ex1.div_in_progress = '1' then if not HAS_FPU and ex1.div_in_progress = '1' then
v.div_in_progress := not divider_to_x.valid; v.div_in_progress := not divider_to_x.valid;
@ -1724,6 +1755,13 @@ begin
end if; end if;
v.e.valid := '1'; v.e.valid := '1';
end if; end if;
if ex1.bsort_in_progress = '1' then
v.bsort_in_progress := not bsort_done;
v.e.valid := bsort_done;
v.busy := not bsort_done;
v.e.write_data := alu_result;
bypass_valid := bsort_done;
end if;


if v.e.write_xerc_enable = '1' and v.e.valid = '1' then if v.e.write_xerc_enable = '1' and v.e.valid = '1' then
v.xerc := v.e.xerc; v.xerc := v.e.xerc;

@ -20,6 +20,7 @@ filesets:
- sim_console.vhdl - sim_console.vhdl
- logical.vhdl - logical.vhdl
- countbits.vhdl - countbits.vhdl
- bitsort.vhdl
- control.vhdl - control.vhdl
- execute1.vhdl - execute1.vhdl
- fpu.vhdl - fpu.vhdl

@ -219,6 +219,7 @@ architecture behaviour of predecoder is
2#0_00101_11011# => INSN_brd, 2#0_00101_11011# => INSN_brd,
2#0_01001_11010# => INSN_cbcdtd, 2#0_01001_11010# => INSN_cbcdtd,
2#0_01000_11010# => INSN_cdtbcd, 2#0_01000_11010# => INSN_cdtbcd,
2#0_00110_11100# => INSN_cfuged,
2#0_00000_00000# => INSN_cmp, 2#0_00000_00000# => INSN_cmp,
2#0_01111_11100# => INSN_cmpb, 2#0_01111_11100# => INSN_cmpb,
2#0_00111_00000# => INSN_cmpeqb, 2#0_00111_00000# => INSN_cmpeqb,
@ -363,6 +364,8 @@ architecture behaviour of predecoder is
2#0_00011_11100# => INSN_nor, 2#0_00011_11100# => INSN_nor,
2#0_01101_11100# => INSN_or, 2#0_01101_11100# => INSN_or,
2#0_01100_11100# => INSN_orc, 2#0_01100_11100# => INSN_orc,
2#0_00100_11100# => INSN_pdepd,
2#0_00101_11100# => INSN_pextd,
2#0_00011_11010# => INSN_popcntb, 2#0_00011_11010# => INSN_popcntb,
2#0_01111_11010# => INSN_popcntd, 2#0_01111_11010# => INSN_popcntd,
2#0_01011_11010# => INSN_popcntw, 2#0_01011_11010# => INSN_popcntw,

@ -87,11 +87,11 @@ const char *units[4] = { "al", "ls", "fp", "3?" };
const char *ops[64] = const char *ops[64] =
{ {
"illegal", "nop ", "add ", "attn ", "b ", "bc ", "bcreg ", "bcd ", "illegal", "nop ", "add ", "attn ", "b ", "bc ", "bcreg ", "bcd ",
"bperm ", "brev ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "bperm ", "brev ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "countb ", "crop ",
"darn ", "dcbf ", "dcbst ", "xcbt ", "dcbtst ", "dcbz ", "icbi ", "fpcmp ", "darn ", "dcbf ", "dcbst ", "xcbt ", "dcbtst ", "dcbz ", "icbi ", "fpcmp ",
"fparith", "fpmove ", "fpmisc ", "div ", "dive ", "mod ", "exts ", "extswsl", "fparith", "fpmove ", "fpmisc ", "div ", "dive ", "mod ", "exts ", "extswsl",
"isel ", "isync ", "logic ", "ld ", "st ", "mcrxrx ", "mfcr ", "mfmsr ", "isel ", "isync ", "logic ", "ld ", "st ", "mcrxrx ", "mfcr ", "mfmsr ",
"mfspr ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "popcnt ", "mfspr ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "bsort ",
"prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ", "prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ",
"shr ", "sync ", "tlbie ", "trap ", "xor ", "addg6s ", "wait ", "ffail ", "shr ", "sync ", "tlbie ", "trap ", "xor ", "addg6s ", "wait ", "ffail ",
}; };

Loading…
Cancel
Save