execute: Move popcnt and prty instructions into the logical unit

This implements logic in the logical entity to calculate the results
of the popcnt* and prty* instructions.  We now have one insn_type_t
value for the 3 popcnt variants and one for the two prty variants,
using the length field of the decode_rom_t to distinguish between
them.  The implementations in logical.vhdl using recursive
algorithms rather than the simple functions in ppc_fx_insns.vhdl.

This gives a saving of about 140 slice LUTs on the A7-100 and
improves timing slightly.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/134/head
Paul Mackerras 5 years ago
parent d2ca625b3b
commit 0c714f1be6

@ -263,11 +263,11 @@ architecture behaviour of decode1 is
2#0001111100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nor
2#0110111100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- or
2#0110011100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- orc
2#0001111010# => (ALU, OP_POPCNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntb
2#0111111010# => (ALU, OP_POPCNTD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntd
2#0101111010# => (ALU, OP_POPCNTW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntw
2#0010111010# => (ALU, OP_PRTYD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyd
2#0010011010# => (ALU, OP_PRTYW, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyw
2#0001111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntb
2#0111111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntd
2#0101111010# => (ALU, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- popcntw
2#0010111010# => (ALU, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyd
2#0010011010# => (ALU, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- prtyw
-- 2#0010000000# setb
2#0000011011# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- sld
2#0000011000# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- slw

@ -14,8 +14,8 @@ package decode_types is
OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD,
OP_MTCRF, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR,
OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD,
OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
OP_POPCNT, OP_PRTY,
OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
OP_SHL, OP_SHR,
OP_SYNC, OP_TD, OP_TDI, OP_TW,
OP_TWI, OP_XOR, OP_SIM_CONFIG

@ -54,6 +54,8 @@ architecture behaviour of execute1 is
signal rotator_carry: std_ulogic;
signal logical_result: std_ulogic_vector(63 downto 0);
signal countzero_result: std_ulogic_vector(63 downto 0);
signal popcnt_result: std_ulogic_vector(63 downto 0);
signal parity_result: std_ulogic_vector(63 downto 0);

-- multiply signals
signal x_to_multiply: Execute1ToMultiplyType;
@ -127,7 +129,10 @@ begin
op => e_in.insn_type,
invert_in => e_in.invert_a,
invert_out => e_in.invert_out,
result => logical_result
result => logical_result,
datalen => e_in.data_len,
popcnt => popcnt_result,
parity => parity_result
);

countzero_0: entity work.zero_counter
@ -612,20 +617,11 @@ begin
-- when others =>
-- end case;
end if;
when OP_POPCNTB =>
result := ppc_popcntb(e_in.read_data3);
when OP_POPCNT =>
result := popcnt_result;
result_en := '1';
when OP_POPCNTW =>
result := ppc_popcntw(e_in.read_data3);
result_en := '1';
when OP_POPCNTD =>
result := ppc_popcntd(e_in.read_data3);
result_en := '1';
when OP_PRTYD =>
result := ppc_prtyd(e_in.read_data3);
result_en := '1';
when OP_PRTYW =>
result := ppc_prtyw(e_in.read_data3);
when OP_PRTY =>
result := parity_result;
result_en := '1';
when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR =>
result := rotator_result;

@ -12,11 +12,29 @@ entity logical is
op : in insn_type_t;
invert_in : in std_ulogic;
invert_out : in std_ulogic;
result : out std_ulogic_vector(63 downto 0)
result : out std_ulogic_vector(63 downto 0);
datalen : in std_logic_vector(3 downto 0);
popcnt : out std_ulogic_vector(63 downto 0);
parity : out std_ulogic_vector(63 downto 0)
);
end entity logical;

architecture behaviour of logical is

subtype twobit is unsigned(1 downto 0);
type twobit32 is array(0 to 31) of twobit;
signal pc2 : twobit32;
subtype threebit is unsigned(2 downto 0);
type threebit16 is array(0 to 15) of threebit;
signal pc4 : threebit16;
subtype fourbit is unsigned(3 downto 0);
type fourbit8 is array(0 to 7) of fourbit;
signal pc8 : fourbit8;
subtype sixbit is unsigned(5 downto 0);
type sixbit2 is array(0 to 1) of sixbit;
signal pc32 : sixbit2;
signal par0, par1 : std_ulogic;

begin
logical_0: process(all)
variable rb_adj, tmp : std_ulogic_vector(63 downto 0);
@ -40,5 +58,45 @@ begin
result <= not tmp;
end if;

-- population counts
for i in 0 to 31 loop
pc2(i) <= unsigned("0" & rs(i * 2 downto i * 2)) + unsigned("0" & rs(i * 2 + 1 downto i * 2 + 1));
end loop;
for i in 0 to 15 loop
pc4(i) <= ('0' & pc2(i * 2)) + ('0' & pc2(i * 2 + 1));
end loop;
for i in 0 to 7 loop
pc8(i) <= ('0' & pc4(i * 2)) + ('0' & pc4(i * 2 + 1));
end loop;
for i in 0 to 1 loop
pc32(i) <= ("00" & pc8(i * 4)) + ("00" & pc8(i * 4 + 1)) +
("00" & pc8(i * 4 + 2)) + ("00" & pc8(i * 4 + 3));
end loop;
popcnt <= (others => '0');
if datalen(3 downto 2) = "00" then
-- popcntb
for i in 0 to 7 loop
popcnt(i * 8 + 3 downto i * 8) <= std_ulogic_vector(pc8(i));
end loop;
elsif datalen(3) = '0' then
-- popcntw
for i in 0 to 1 loop
popcnt(i * 32 + 5 downto i * 32) <= std_ulogic_vector(pc32(i));
end loop;
else
popcnt(6 downto 0) <= std_ulogic_vector(('0' & pc32(0)) + ('0' & pc32(1)));
end if;

-- parity calculations
par0 <= rs(0) xor rs(8) xor rs(16) xor rs(24);
par1 <= rs(32) xor rs(40) xor rs(48) xor rs(56);
parity <= (others => '0');
if datalen(3) = '1' then
parity(0) <= par0 xor par1;
else
parity(0) <= par0;
parity(32) <= par1;
end if;

end process;
end behaviour;

Loading…
Cancel
Save