You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
microwatt/execute1.vhdl

1504 lines
55 KiB
VHDL

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.decode_types.all;
use work.common.all;
use work.helpers.all;
use work.crhelpers.all;
use work.insn_helpers.all;
use work.ppc_fx_insns.all;
entity execute1 is
generic (
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
HAS_SHORT_MULT : boolean := false;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
port (
clk : in std_ulogic;
rst : in std_ulogic;
-- asynchronous
flush_in : in std_ulogic;
busy_out : out std_ulogic;
e_in : in Decode2ToExecute1Type;
l_in : in Loadstore1ToExecute1Type;
fp_in : in FPUToExecute1Type;
ext_irq_in : std_ulogic;
interrupt_in : std_ulogic;
-- asynchronous
l_out : out Execute1ToLoadstore1Type;
fp_out : out Execute1ToFPUType;
e_out : out Execute1ToWritebackType;
bypass_data : out bypass_data_t;
bypass_cr_data : out cr_bypass_data_t;
dbg_ctrl_out : out ctrl_t;
icache_inval : out std_ulogic;
terminate_out : out std_ulogic;
-- PMU event buses
wb_events : in WritebackEventType;
ls_events : in Loadstore1EventType;
dc_events : in DcacheEventType;
ic_events : in IcacheEventType;
log_out : out std_ulogic_vector(14 downto 0);
log_rd_addr : out std_ulogic_vector(31 downto 0);
log_rd_data : in std_ulogic_vector(63 downto 0);
log_wr_addr : in std_ulogic_vector(31 downto 0)
);
end entity execute1;
architecture behaviour of execute1 is
type reg_type is record
e : Execute1ToWritebackType;
busy: std_ulogic;
terminate: std_ulogic;
intr_pending : std_ulogic;
fp_exception_next : std_ulogic;
trace_next : std_ulogic;
prev_op : insn_type_t;
br_taken : std_ulogic;
oe : std_ulogic;
mul_select : std_ulogic_vector(1 downto 0);
mul_in_progress : std_ulogic;
mul_finish : std_ulogic;
div_in_progress : std_ulogic;
cntz_in_progress : std_ulogic;
no_instr_avail : std_ulogic;
instr_dispatch : std_ulogic;
ext_interrupt : std_ulogic;
taken_branch_event : std_ulogic;
br_mispredict : std_ulogic;
log_addr_spr : std_ulogic_vector(31 downto 0);
end record;
constant reg_type_init : reg_type :=
(e => Execute1ToWritebackInit,
busy => '0', terminate => '0', intr_pending => '0',
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, br_taken => '0',
oe => '0', mul_select => "00",
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0',
no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0',
taken_branch_event => '0', br_mispredict => '0',
others => (others => '0'));
type actions_type is record
e : Execute1ToWritebackType;
complete : std_ulogic;
exception : std_ulogic;
trap : std_ulogic;
terminate : std_ulogic;
write_msr : std_ulogic;
new_msr : std_ulogic_vector(63 downto 0);
write_xerlow : std_ulogic;
write_pmuspr : std_ulogic;
write_dec : std_ulogic;
write_loga : std_ulogic;
inc_loga : std_ulogic;
write_cfar : std_ulogic;
take_branch : std_ulogic;
direct_branch : std_ulogic;
start_mul : std_ulogic;
start_div : std_ulogic;
start_cntz : std_ulogic;
do_trace : std_ulogic;
fp_intr : std_ulogic;
icache_inval : std_ulogic;
end record;
constant actions_type_init : actions_type :=
(e => Execute1ToWritebackInit, new_msr => (others => '0'), others => '0');
signal r, rin : reg_type;
signal actions : actions_type;
signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
signal cr_in : std_ulogic_vector(31 downto 0);
signal xerc_in : xer_common_t;
signal mshort_p : std_ulogic_vector(31 downto 0) := (others => '0');
signal valid_in : std_ulogic;
signal ctrl: ctrl_t := ctrl_t_init;
signal ctrl_tmp: ctrl_t := ctrl_t_init;
signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
signal rot_sign_ext: std_ulogic;
signal rotator_result: std_ulogic_vector(63 downto 0);
signal rotator_carry: std_ulogic;
signal logical_result: std_ulogic_vector(63 downto 0);
signal do_popcnt: std_ulogic;
signal countbits_result: std_ulogic_vector(63 downto 0);
signal alu_result: std_ulogic_vector(63 downto 0);
signal adder_result: std_ulogic_vector(63 downto 0);
signal misc_result: std_ulogic_vector(63 downto 0);
signal muldiv_result: std_ulogic_vector(63 downto 0);
signal shortmul_result: std_ulogic_vector(63 downto 0);
signal spr_result: std_ulogic_vector(63 downto 0);
signal next_nia : std_ulogic_vector(63 downto 0);
signal carry_32 : std_ulogic;
signal carry_64 : std_ulogic;
signal overflow_32 : std_ulogic;
signal overflow_64 : std_ulogic;
signal trapval : std_ulogic_vector(4 downto 0);
signal write_cr_mask : std_ulogic_vector(7 downto 0);
signal write_cr_data : std_ulogic_vector(31 downto 0);
-- multiply signals
signal x_to_multiply: MultiplyInputType;
signal multiply_to_x: MultiplyOutputType;
-- divider signals
signal x_to_divider: Execute1ToDividerType;
signal divider_to_x: DividerToExecute1Type;
-- random number generator signals
signal random_raw : std_ulogic_vector(63 downto 0);
signal random_cond : std_ulogic_vector(63 downto 0);
signal random_err : std_ulogic;
-- PMU signals
signal x_to_pmu : Execute1ToPMUType;
signal pmu_to_x : PMUToExecute1Type;
-- signals for logging
signal exception_log : std_ulogic;
signal irq_valid_log : std_ulogic;
type privilege_level is (USER, SUPER);
type op_privilege_array is array(insn_type_t) of privilege_level;
constant op_privilege: op_privilege_array := (
OP_ATTN => SUPER,
OP_MFMSR => SUPER,
OP_MTMSRD => SUPER,
OP_RFID => SUPER,
dcache: Implement data TLB This adds a TLB to dcache, providing the ability to translate addresses for loads and stores. No protection mechanism has been implemented yet. The MSR_DR bit controls whether addresses are translated through the TLB. The TLB is a fixed-pagesize, set-associative cache. Currently the page size is 4kB and the TLB is 2-way set associative with 64 entries per set. This implements the tlbie instruction. RB bits 10 and 11 control whether the whole TLB is invalidated (if either bit is 1) or just a single entry corresponding to the effective page number in bits 12-63 of RB. As an extension until we get a hardware page table walk, a tlbie instruction with RB bits 9-11 set to 001 will load an entry into the TLB. The TLB entry value is in RS in the format of a radix PTE. Currently there is no proper handling of TLB misses. The load or store will not be performed but no interrupt is generated. In order to make timing at 100MHz on the Arty A7-100, we compare the real address from each way of the TLB with the tag from each way of the cache in parallel (requiring # TLB ways * # cache ways comparators). Then the result is selected based on which way hit in the TLB. That avoids a timing path going through the TLB EA comparators, the multiplexer that selects the RA, and the cache tag comparators. The hack where addresses of the form 0xc------- are marked as cache-inhibited is kept for now but restricted to real-mode accesses. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
OP_TLBIE => SUPER,
others => USER
);
function instr_is_privileged(op: insn_type_t; insn: std_ulogic_vector(31 downto 0))
return boolean is
begin
if op_privilege(op) = SUPER then
return true;
elsif op = OP_MFSPR or op = OP_MTSPR then
return insn(20) = '1';
else
return false;
end if;
end;
Add basic XER support The carry is currently internal to execute1. We don't handle any of the other XER fields. This creates type called "xer_common_t" that contains the commonly used XER bits (CA, CA32, SO, OV, OV32). The value is stored in the CR file (though it could be a separate module). The rest of the bits will be implemented as a separate SPR and the two parts reconciled in mfspr/mtspr in latter commits. We always read XER in decode2 (there is little point not to) and send it down all pipeline branches as it will be needed in writeback for all type of instructions when CR0:SO needs to be updated (such forms exist for all pipeline branches even if we don't yet implement them). To avoid having to track XER hazards, we forward it back in EX1. This assumes that other pipeline branches that can modify it (mult and div) are running single issue for now. One additional hazard to beware of is an XER:SO modifying instruction in EX1 followed immediately by a store conditional. Due to our writeback latency, the store will go down the LSU with the previous XER value, thus the stcx. will set CR0:SO using an obsolete SO value. I doubt there exist any code relying on this behaviour being correct but we should account for it regardless, possibly by ensuring that stcx. remain single issue initially, or later by adding some minimal tracking or moving the LSU into the same pipeline as execute. Missing some obscure XER affecting instructions like addex or mcrxrx. [paulus@ozlabs.org - fix CA32 and OV32 for OP_ADD, fix order of arguments to set_ov] Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
procedure set_carry(e: inout Execute1ToWritebackType;
carry32 : in std_ulogic;
carry : in std_ulogic) is
begin
Add basic XER support The carry is currently internal to execute1. We don't handle any of the other XER fields. This creates type called "xer_common_t" that contains the commonly used XER bits (CA, CA32, SO, OV, OV32). The value is stored in the CR file (though it could be a separate module). The rest of the bits will be implemented as a separate SPR and the two parts reconciled in mfspr/mtspr in latter commits. We always read XER in decode2 (there is little point not to) and send it down all pipeline branches as it will be needed in writeback for all type of instructions when CR0:SO needs to be updated (such forms exist for all pipeline branches even if we don't yet implement them). To avoid having to track XER hazards, we forward it back in EX1. This assumes that other pipeline branches that can modify it (mult and div) are running single issue for now. One additional hazard to beware of is an XER:SO modifying instruction in EX1 followed immediately by a store conditional. Due to our writeback latency, the store will go down the LSU with the previous XER value, thus the stcx. will set CR0:SO using an obsolete SO value. I doubt there exist any code relying on this behaviour being correct but we should account for it regardless, possibly by ensuring that stcx. remain single issue initially, or later by adding some minimal tracking or moving the LSU into the same pipeline as execute. Missing some obscure XER affecting instructions like addex or mcrxrx. [paulus@ozlabs.org - fix CA32 and OV32 for OP_ADD, fix order of arguments to set_ov] Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
e.xerc.ca32 := carry32;
e.xerc.ca := carry;
end;
procedure set_ov(e: inout Execute1ToWritebackType;
ov : in std_ulogic;
ov32 : in std_ulogic) is
begin
e.xerc.ov32 := ov32;
e.xerc.ov := ov;
if ov = '1' then
e.xerc.so := '1';
end if;
end;
function calc_ov(msb_a : std_ulogic; msb_b: std_ulogic;
ca: std_ulogic; msb_r: std_ulogic) return std_ulogic is
begin
return (ca xor msb_r) and not (msb_a xor msb_b);
end;
function decode_input_carry(ic : carry_in_t;
xerc : xer_common_t) return std_ulogic is
begin
case ic is
when ZERO =>
return '0';
when CA =>
Add basic XER support The carry is currently internal to execute1. We don't handle any of the other XER fields. This creates type called "xer_common_t" that contains the commonly used XER bits (CA, CA32, SO, OV, OV32). The value is stored in the CR file (though it could be a separate module). The rest of the bits will be implemented as a separate SPR and the two parts reconciled in mfspr/mtspr in latter commits. We always read XER in decode2 (there is little point not to) and send it down all pipeline branches as it will be needed in writeback for all type of instructions when CR0:SO needs to be updated (such forms exist for all pipeline branches even if we don't yet implement them). To avoid having to track XER hazards, we forward it back in EX1. This assumes that other pipeline branches that can modify it (mult and div) are running single issue for now. One additional hazard to beware of is an XER:SO modifying instruction in EX1 followed immediately by a store conditional. Due to our writeback latency, the store will go down the LSU with the previous XER value, thus the stcx. will set CR0:SO using an obsolete SO value. I doubt there exist any code relying on this behaviour being correct but we should account for it regardless, possibly by ensuring that stcx. remain single issue initially, or later by adding some minimal tracking or moving the LSU into the same pipeline as execute. Missing some obscure XER affecting instructions like addex or mcrxrx. [paulus@ozlabs.org - fix CA32 and OV32 for OP_ADD, fix order of arguments to set_ov] Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
return xerc.ca;
when OV =>
return xerc.ov;
when ONE =>
return '1';
end case;
end;
Add basic XER support The carry is currently internal to execute1. We don't handle any of the other XER fields. This creates type called "xer_common_t" that contains the commonly used XER bits (CA, CA32, SO, OV, OV32). The value is stored in the CR file (though it could be a separate module). The rest of the bits will be implemented as a separate SPR and the two parts reconciled in mfspr/mtspr in latter commits. We always read XER in decode2 (there is little point not to) and send it down all pipeline branches as it will be needed in writeback for all type of instructions when CR0:SO needs to be updated (such forms exist for all pipeline branches even if we don't yet implement them). To avoid having to track XER hazards, we forward it back in EX1. This assumes that other pipeline branches that can modify it (mult and div) are running single issue for now. One additional hazard to beware of is an XER:SO modifying instruction in EX1 followed immediately by a store conditional. Due to our writeback latency, the store will go down the LSU with the previous XER value, thus the stcx. will set CR0:SO using an obsolete SO value. I doubt there exist any code relying on this behaviour being correct but we should account for it regardless, possibly by ensuring that stcx. remain single issue initially, or later by adding some minimal tracking or moving the LSU into the same pipeline as execute. Missing some obscure XER affecting instructions like addex or mcrxrx. [paulus@ozlabs.org - fix CA32 and OV32 for OP_ADD, fix order of arguments to set_ov] Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
function msr_copy(msr: std_ulogic_vector(63 downto 0))
return std_ulogic_vector is
variable msr_out: std_ulogic_vector(63 downto 0);
begin
-- ISA says this:
-- Defined MSR bits are classified as either full func-
-- tion or partial function. Full function MSR bits are
-- saved in SRR1 or HSRR1 when an interrupt other
-- than a System Call Vectored interrupt occurs and
-- restored by rfscv, rfid, or hrfid, while partial func-
-- tion MSR bits are not saved or restored.
-- Full function MSR bits lie in the range 0:32, 37:41, and
-- 48:63, and partial function MSR bits lie in the range
-- 33:36 and 42:47. (Note this is IBM bit numbering).
msr_out := (others => '0');
msr_out(63 downto 31) := msr(63 downto 31);
msr_out(26 downto 22) := msr(26 downto 22);
msr_out(15 downto 0) := msr(15 downto 0);
return msr_out;
end;
-- Work out whether a signed value fits into n bits,
-- that is, see if it is in the range -2^(n-1) .. 2^(n-1) - 1
function fits_in_n_bits(val: std_ulogic_vector; n: integer) return boolean is
variable x, xp1: std_ulogic_vector(val'left downto val'right);
begin
x := val;
if val(val'left) = '0' then
x := not val;
end if;
xp1 := bit_reverse(std_ulogic_vector(unsigned(bit_reverse(x)) + 1));
x := x and not xp1;
-- For positive inputs, x has ones at the positions
-- to the left of the leftmost 1 bit in val.
-- For negative inputs, x has ones to the left of
-- the leftmost 0 bit in val.
return x(n - 1) = '1';
end;
function assemble_xer(xerc: xer_common_t; xer_low: std_ulogic_vector)
return std_ulogic_vector is
begin
return 32x"0" & xerc.so & xerc.ov & xerc.ca & "000000000" &
xerc.ov32 & xerc.ca32 & xer_low(17 downto 0);
end;
-- Tell vivado to keep the hierarchy for the random module so that the
-- net names in the xdc file match.
attribute keep_hierarchy : string;
attribute keep_hierarchy of random_0 : label is "yes";
begin
rotator_0: entity work.rotator
port map (
rs => c_in,
ra => a_in,
shift => b_in(6 downto 0),
insn => e_in.insn,
is_32bit => e_in.is_32bit,
right_shift => right_shift,
arith => e_in.is_signed,
clear_left => rot_clear_left,
clear_right => rot_clear_right,
sign_ext_rs => rot_sign_ext,
result => rotator_result,
carry_out => rotator_carry
);
logical_0: entity work.logical
port map (
rs => c_in,
rb => b_in,
op => e_in.insn_type,
invert_in => e_in.invert_a,
invert_out => e_in.invert_out,
result => logical_result,
datalen => e_in.data_len
);
countbits_0: entity work.bit_counter
port map (
clk => clk,
rs => c_in,
count_right => e_in.insn(10),
is_32bit => e_in.is_32bit,
do_popcnt => do_popcnt,
datalen => e_in.data_len,
result => countbits_result
);
multiply_0: entity work.multiply
port map (
clk => clk,
m_in => x_to_multiply,
m_out => multiply_to_x
);
divider_0: entity work.divider
port map (
clk => clk,
rst => rst,
d_in => x_to_divider,
d_out => divider_to_x
);
random_0: entity work.random
port map (
clk => clk,
data => random_cond,
raw => random_raw,
err => random_err
);
pmu_0: entity work.pmu
port map (
clk => clk,
rst => rst,
p_in => x_to_pmu,
p_out => pmu_to_x
);
short_mult_0: if HAS_SHORT_MULT generate
begin
short_mult: entity work.short_multiply
port map (
clk => clk,
a_in => a_in(15 downto 0),
b_in => b_in(15 downto 0),
m_out => mshort_p
);
end generate;
dbg_ctrl_out <= ctrl;
log_rd_addr <= r.log_addr_spr;
a_in <= e_in.read_data1;
b_in <= e_in.read_data2;
c_in <= e_in.read_data3;
cr_in <= e_in.cr;
x_to_pmu.occur <= (instr_complete => wb_events.instr_complete,
fp_complete => wb_events.fp_complete,
ld_complete => ls_events.load_complete,
st_complete => ls_events.store_complete,
itlb_miss => ls_events.itlb_miss,
dc_load_miss => dc_events.load_miss,
dc_ld_miss_resolved => dc_events.dcache_refill,
dc_store_miss => dc_events.store_miss,
dtlb_miss => dc_events.dtlb_miss,
dtlb_miss_resolved => dc_events.dtlb_miss_resolved,
icache_miss => ic_events.icache_miss,
itlb_miss_resolved => ic_events.itlb_miss_resolved,
no_instr_avail => r.no_instr_avail,
dispatch => r.instr_dispatch,
ext_interrupt => r.ext_interrupt,
br_taken_complete => r.taken_branch_event,
br_mispredict => r.br_mispredict,
others => '0');
x_to_pmu.nia <= e_in.nia;
x_to_pmu.addr <= (others => '0');
x_to_pmu.addr_v <= '0';
x_to_pmu.spr_num <= e_in.insn(20 downto 16);
x_to_pmu.spr_val <= c_in;
x_to_pmu.run <= '1';
-- XER forwarding. To avoid having to track XER hazards, we use
-- the previously latched value. Since the XER common bits
-- (SO, OV[32] and CA[32]) are only modified by instructions that are
-- handled here, we can just forward the result being sent to
-- writeback.
xerc_in <= r.e.xerc when (r.e.write_xerc_enable and r.e.valid) = '1' else e_in.xerc;
with e_in.unit select busy_out <=
l_in.busy or r.busy or fp_in.busy when LDST,
l_in.busy or l_in.in_progress or r.busy or fp_in.busy when others;
valid_in <= e_in.valid and not busy_out and not flush_in;
terminate_out <= r.terminate;
-- Slow SPR read mux
with e_in.spr_select.sel select spr_result <=
ctrl.tb when SPRSEL_TB,
32x"0" & ctrl.tb(63 downto 32) when SPRSEL_TBU,
ctrl.dec when SPRSEL_DEC,
32x"0" & PVR_MICROWATT when SPRSEL_PVR,
log_wr_addr & r.log_addr_spr when SPRSEL_LOGA,
log_rd_data when SPRSEL_LOGD,
ctrl.cfar when SPRSEL_CFAR,
assemble_xer(xerc_in, ctrl.xer_low) when others;
-- Result mux
with e_in.result_sel select alu_result <=
adder_result when "000",
logical_result when "001",
rotator_result when "010",
shortmul_result when "011",
pmu_to_x.spr_val when "100",
spr_result when "101",
next_nia when "110",
misc_result when others;
execute1_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
r <= reg_type_init;
ctrl <= ctrl_t_init;
ctrl.msr <= (MSR_SF => '1', MSR_LE => '1', others => '0');
else
r <= rin;
ctrl <= ctrl_tmp;
if valid_in = '1' then
report "execute " & to_hstring(e_in.nia) & " op=" & insn_type_t'image(e_in.insn_type) &
" wr=" & to_hstring(rin.e.write_reg) & " we=" & std_ulogic'image(rin.e.write_enable) &
" tag=" & integer'image(rin.e.instr_tag.tag) & std_ulogic'image(rin.e.instr_tag.valid);
end if;
end if;
end if;
end process;
-- Data path for integer instructions
execute1_dp: process(all)
variable a_inv : std_ulogic_vector(63 downto 0);
variable b_or_m1 : std_ulogic_vector(63 downto 0);
variable sum_with_carry : std_ulogic_vector(64 downto 0);
variable sign1, sign2 : std_ulogic;
variable abs1, abs2 : signed(63 downto 0);
variable addend : std_ulogic_vector(127 downto 0);
variable addg6s : std_ulogic_vector(63 downto 0);
variable crbit : integer range 0 to 31;
variable isel_result : std_ulogic_vector(63 downto 0);
variable darn : std_ulogic_vector(63 downto 0);
variable setb_result : std_ulogic_vector(63 downto 0);
variable mfcr_result : std_ulogic_vector(63 downto 0);
variable lo, hi : integer;
variable l : std_ulogic;
variable zerohi, zerolo : std_ulogic;
variable msb_a, msb_b : std_ulogic;
variable a_lt : std_ulogic;
variable a_lt_lo : std_ulogic;
variable a_lt_hi : std_ulogic;
variable newcrf : std_ulogic_vector(3 downto 0);
variable bf, bfa : std_ulogic_vector(2 downto 0);
variable crnum : crnum_t;
variable scrnum : crnum_t;
variable cr_operands : std_ulogic_vector(1 downto 0);
variable crresult : std_ulogic;
variable bt, ba, bb : std_ulogic_vector(4 downto 0);
variable btnum : integer range 0 to 3;
variable banum, bbnum : integer range 0 to 31;
variable j : integer;
begin
-- Main adder
if e_in.invert_a = '0' then
a_inv := a_in;
else
a_inv := not a_in;
end if;
if e_in.addm1 = '0' then
b_or_m1 := b_in;
else
b_or_m1 := (others => '1');
end if;
sum_with_carry := ppc_adde(a_inv, b_or_m1,
decode_input_carry(e_in.input_carry, xerc_in));
adder_result <= sum_with_carry(63 downto 0);
carry_32 <= sum_with_carry(32) xor a_inv(32) xor b_in(32);
carry_64 <= sum_with_carry(64);
overflow_32 <= calc_ov(a_inv(31), b_in(31), carry_32, sum_with_carry(31));
overflow_64 <= calc_ov(a_inv(63), b_in(63), carry_64, sum_with_carry(63));
-- signals to multiply and divide units
sign1 := '0';
sign2 := '0';
if e_in.is_signed = '1' then
if e_in.is_32bit = '1' then
sign1 := a_in(31);
sign2 := b_in(31);
else
sign1 := a_in(63);
sign2 := b_in(63);
end if;
end if;
-- take absolute values
if sign1 = '0' then
abs1 := signed(a_in);
else
abs1 := - signed(a_in);
end if;
if sign2 = '0' then
abs2 := signed(b_in);
else
abs2 := - signed(b_in);
end if;
-- Interface to multiply and divide units
x_to_divider.is_signed <= e_in.is_signed;
x_to_divider.is_32bit <= e_in.is_32bit;
x_to_divider.is_extended <= '0';
x_to_divider.is_modulus <= '0';
if e_in.insn_type = OP_MOD then
x_to_divider.is_modulus <= '1';
end if;
addend := (others => '0');
if e_in.insn(26) = '0' then
-- integer multiply-add, major op 4 (if it is a multiply)
addend(63 downto 0) := c_in;
if e_in.is_signed = '1' then
addend(127 downto 64) := (others => c_in(63));
end if;
end if;
if (sign1 xor sign2) = '1' then
addend := not addend;
end if;
x_to_multiply.is_32bit <= e_in.is_32bit;
x_to_multiply.not_result <= sign1 xor sign2;
x_to_multiply.addend <= addend;
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
if e_in.is_32bit = '0' then
-- 64-bit forms
x_to_multiply.data1 <= std_ulogic_vector(abs1);
x_to_multiply.data2 <= std_ulogic_vector(abs2)