|
|
|
library ieee;
|
|
|
|
use ieee.std_logic_1164.all;
|
|
|
|
use ieee.numeric_std.all;
|
|
|
|
|
|
|
|
library work;
|
|
|
|
use work.decode_types.all;
|
|
|
|
use work.common.all;
|
|
|
|
use work.helpers.all;
|
|
|
|
use work.crhelpers.all;
|
|
|
|
use work.insn_helpers.all;
|
|
|
|
use work.ppc_fx_insns.all;
|
|
|
|
|
|
|
|
entity execute1 is
|
|
|
|
generic (
|
|
|
|
EX1_BYPASS : boolean := true;
|
|
|
|
HAS_FPU : boolean := true;
|
|
|
|
HAS_SHORT_MULT : boolean := false;
|
|
|
|
-- Non-zero to enable log data collection
|
|
|
|
LOG_LENGTH : natural := 0
|
|
|
|
);
|
|
|
|
port (
|
|
|
|
clk : in std_ulogic;
|
|
|
|
rst : in std_ulogic;
|
|
|
|
|
|
|
|
-- asynchronous
|
|
|
|
flush_in : in std_ulogic;
|
|
|
|
busy_out : out std_ulogic;
|
|
|
|
|
|
|
|
e_in : in Decode2ToExecute1Type;
|
|
|
|
l_in : in Loadstore1ToExecute1Type;
|
|
|
|
fp_in : in FPUToExecute1Type;
|
|
|
|
|
|
|
|
ext_irq_in : std_ulogic;
|
|
|
|
interrupt_in : std_ulogic;
|
|
|
|
|
|
|
|
-- asynchronous
|
|
|
|
l_out : out Execute1ToLoadstore1Type;
|
|
|
|
fp_out : out Execute1ToFPUType;
|
|
|
|
|
|
|
|
e_out : out Execute1ToWritebackType;
|
|
|
|
bypass_data : out bypass_data_t;
|
|
|
|
bypass_cr_data : out cr_bypass_data_t;
|
|
|
|
|
|
|
|
dbg_ctrl_out : out ctrl_t;
|
|
|
|
|
|
|
|
icache_inval : out std_ulogic;
|
|
|
|
terminate_out : out std_ulogic;
|
|
|
|
|
|
|
|
-- PMU event buses
|
|
|
|
wb_events : in WritebackEventType;
|
|
|
|
ls_events : in Loadstore1EventType;
|
|
|
|
dc_events : in DcacheEventType;
|
|
|
|
ic_events : in IcacheEventType;
|
|
|
|
|
|
|
|
log_out : out std_ulogic_vector(14 downto 0);
|
|
|
|
log_rd_addr : out std_ulogic_vector(31 downto 0);
|
|
|
|
log_rd_data : in std_ulogic_vector(63 downto 0);
|
|
|
|
log_wr_addr : in std_ulogic_vector(31 downto 0)
|
|
|
|
);
|
|
|
|
end entity execute1;
|
|
|
|
|
|
|
|
architecture behaviour of execute1 is
|
|
|
|
type reg_type is record
|
|
|
|
e : Execute1ToWritebackType;
|
|
|
|
cur_instr : Decode2ToExecute1Type;
|
|
|
|
busy: std_ulogic;
|
|
|
|
terminate: std_ulogic;
|
|
|
|
intr_pending : std_ulogic;
|
|
|
|
fp_exception_next : std_ulogic;
|
|
|
|
trace_next : std_ulogic;
|
|
|
|
prev_op : insn_type_t;
|
|
|
|
br_taken : std_ulogic;
|
|
|
|
mul_in_progress : std_ulogic;
|
|
|
|
mul_finish : std_ulogic;
|
|
|
|
div_in_progress : std_ulogic;
|
|
|
|
cntz_in_progress : std_ulogic;
|
|
|
|
no_instr_avail : std_ulogic;
|
|
|
|
instr_dispatch : std_ulogic;
|
|
|
|
ext_interrupt : std_ulogic;
|
|
|
|
taken_branch_event : std_ulogic;
|
|
|
|
br_mispredict : std_ulogic;
|
|
|
|
log_addr_spr : std_ulogic_vector(31 downto 0);
|
|
|
|
end record;
|
execute1: Improve architecture compliance of MSR and related instructions
This makes our treatment of the MSR conform better with the ISA.
- On reset, initialize the MSR to have the SF and LE bits set and
all the others reset. For good measure initialize r properly too.
- Fix the bit numbering in msr_copy (the code was using big-endian
bit numbers, not little-endian).
- Use constants like MSR_EE to index MSR bits instead of expressions
like '63 - 48', for readability.
- Set MSR[SF, LE] and clear MSR[PR, IR, DR, RI] on interrupts.
- Copy the relevant fields for rfid instead of using msr_copy, because
the partial function fields of the MSR should be left unchanged,
not zeroed. Our implementation of rfid is like the architecture
description of hrfid, because we don't implement hypervisor mode.
- Return the whole MSR for mfmsr.
- Implement the L field for mtmsrd (L=1 copies just EE and RI).
- For mtmsrd with L=0, leave out the HV, ME and LE bits as per the arch.
- For mtmsrd and rfid, if PR ends up set, then also set EE, IR and DR
as per the arch.
- A few other minor tidyups (no semantic change).
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
constant reg_type_init : reg_type :=
|
|
|
|
(e => Execute1ToWritebackInit,
|
|
|
|
cur_instr => Decode2ToExecute1Init,
|
|
|
|
busy => '0', terminate => '0', intr_pending => '0',
|
|
|
|
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, br_taken => '0',
|
|
|
|
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0',
|
|
|
|
no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0',
|
|
|
|
taken_branch_event => '0', br_mispredict => '0',
|
|
|
|
others => (others => '0'));
|
|
|
|
|
|
|
|
signal r, rin : reg_type;
|
|
|
|
|
|
|
|
signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
|
|
|
|
signal cr_in : std_ulogic_vector(31 downto 0);
|
|
|
|
signal xerc_in : xer_common_t;
|
|
|
|
signal mshort_p : std_ulogic_vector(31 downto 0) := (others => '0');
|
|
|
|
|
|
|
|
signal valid_in : std_ulogic;
|
|
|
|
signal ctrl: ctrl_t := ctrl_t_init;
|
|
|
|
signal ctrl_tmp: ctrl_t := ctrl_t_init;
|
|
|
|
signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
|
|
|
|
signal rot_sign_ext: std_ulogic;
|
|
|
|
signal rotator_result: std_ulogic_vector(63 downto 0);
|
|
|
|
signal rotator_carry: std_ulogic;
|
|
|
|
signal logical_result: std_ulogic_vector(63 downto 0);
|
|
|
|
signal do_popcnt: std_ulogic;
|
|
|
|
signal countbits_result: std_ulogic_vector(63 downto 0);
|
|
|
|
signal alu_result: std_ulogic_vector(63 downto 0);
|
|
|
|
signal adder_result: std_ulogic_vector(63 downto 0);
|
|
|
|
signal misc_result: std_ulogic_vector(63 downto 0);
|
|
|
|
signal muldiv_result: std_ulogic_vector(63 downto 0);
|
|
|
|
signal spr_result: std_ulogic_vector(63 downto 0);
|
|
|
|
signal next_nia : std_ulogic_vector(63 downto 0);
|
|
|
|
signal current: Decode2ToExecute1Type;
|
Add a rotate/mask/shift unit and use it in execute1
This adds a new entity 'rotator' which contains combinatorial logic
for rotating and masking 64-bit values. It implements the operations
of the rlwinm, rlwnm, rlwimi, rldicl, rldicr, rldic, rldimi, rldcl,
rldcr, sld, slw, srd, srw, srad, sradi, sraw and srawi instructions.
It consists of a 3-stage 64-bit rotator using 4:1 multiplexors at
each stage, two mask generators, output logic and control logic.
The insn_type_t values used for these instructions have been reduced
to just 5: OP_RLC, OP_RLCL and OP_RLCR for the rotate and mask
instructions (clear both left and right, clear left, clear right
variants), OP_SHL for left shifts, and OP_SHR for right shifts.
The control signals for the rotator are derived from the opcode
and from the is_32bit and is_signed fields of the decode_rom_t.
The rotator is instantiated as an entity in execute1 so that we can
be sure we only have one of it.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
|
|
|
|
signal carry_32 : std_ulogic;
|
|
|
|
signal carry_64 : std_ulogic;
|
|
|
|
signal overflow_32 : std_ulogic;
|
|
|
|
signal overflow_64 : std_ulogic;
|
|
|
|
|
|
|
|
signal trapval : std_ulogic_vector(4 downto 0);
|
|
|
|
|
|
|
|
signal write_cr_mask : std_ulogic_vector(7 downto 0);
|
|
|
|
signal write_cr_data : std_ulogic_vector(31 downto 0);
|
|
|
|
|
|
|
|
-- multiply signals
|
|
|
|
signal x_to_multiply: MultiplyInputType;
|
|
|
|
signal multiply_to_x: MultiplyOutputType;
|
|
|
|
|
|
|
|
-- divider signals
|
|
|
|
signal x_to_divider: Execute1ToDividerType;
|
|
|
|
signal divider_to_x: DividerToExecute1Type;
|
|
|
|
|
|
|
|
-- random number generator signals
|
|
|
|
signal random_raw : std_ulogic_vector(63 downto 0);
|
|
|
|
signal random_cond : std_ulogic_vector(63 downto 0);
|
|
|
|
signal random_err : std_ulogic;
|
|
|
|
|
|
|
|
-- PMU signals
|
|
|
|
signal x_to_pmu : Execute1ToPMUType;
|
|
|
|
signal pmu_to_x : PMUToExecute1Type;
|
|
|
|
|
|
|
|
-- signals for logging
|
|
|
|
signal exception_log : std_ulogic;
|
|
|
|
signal irq_valid_log : std_ulogic;
|
|
|
|
|
|
|
|
type privilege_level is (USER, SUPER);
|
|
|
|
type op_privilege_array is array(insn_type_t) of privilege_level;
|
|
|
|
constant op_privilege: op_privilege_array := (
|
|
|
|
OP_ATTN => SUPER,
|
|
|
|
OP_MFMSR => SUPER,
|
|
|
|
OP_MTMSRD => SUPER,
|
|
|
|
OP_RFID => SUPER,
|
|
|
|
OP_TLBIE => SUPER,
|
|
|
|
others => USER
|
|
|
|
);
|
|
|
|
|
|
|
|
function instr_is_privileged(op: insn_type_t; insn: std_ulogic_vector(31 downto 0))
|
|
|
|
return boolean is
|
|
|
|
begin
|
|
|
|
if op_privilege(op) = SUPER then
|
|
|
|
return true;
|
|
|
|
elsif op = OP_MFSPR or op = OP_MTSPR then
|
|
|
|
return insn(20) = '1';
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
end if;
|
|
|
|
end;
|
|
|
|
|
Add basic XER support
The carry is currently internal to execute1. We don't handle any of
the other XER fields.
This creates type called "xer_common_t" that contains the commonly
used XER bits (CA, CA32, SO, OV, OV32).
The value is stored in the CR file (though it could be a separate
module). The rest of the bits will be implemented as a separate
SPR and the two parts reconciled in mfspr/mtspr in latter commits.
We always read XER in decode2 (there is little point not to)
and send it down all pipeline branches as it will be needed in
writeback for all type of instructions when CR0:SO needs to be
updated (such forms exist for all pipeline branches even if we don't
yet implement them).
To avoid having to track XER hazards, we forward it back in EX1. This
assumes that other pipeline branches that can modify it (mult and div)
are running single issue for now.
One additional hazard to beware of is an XER:SO modifying instruction
in EX1 followed immediately by a store conditional. Due to our writeback
latency, the store will go down the LSU with the previous XER value,
thus the stcx. will set CR0:SO using an obsolete SO value.
I doubt there exist any code relying on this behaviour being correct
but we should account for it regardless, possibly by ensuring that
stcx. remain single issue initially, or later by adding some minimal
tracking or moving the LSU into the same pipeline as execute.
Missing some obscure XER affecting instructions like addex or mcrxrx.
[paulus@ozlabs.org - fix CA32 and OV32 for OP_ADD, fix order of
arguments to set_ov]
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
procedure set_carry(e: inout Execute1ToWritebackType;
|
|
|
|
carry32 : in std_ulogic;
|
|
|
|
carry : in std_ulogic) is
|
|
|
|
begin
|
Add basic XER support
The carry is currently internal to execute1. We don't handle any of
the other XER fields.
This creates type called "xer_common_t" that contains the commonly
used XER bits (CA, CA32, SO, OV, OV32).
The value is stored in the CR file (though it could be a separate
module). The rest of the bits will be implemented as a separate
SPR and the two parts reconciled in mfspr/mtspr in latter commits.
We always read XER in decode2 (there is little point not to)
and send it down all pipeline branches as it will be needed in
writeback for all type of instructions when CR0:SO needs to be
updated (such forms exist for all pipeline branches even if we don't
yet implement them).
To avoid having to track XER hazards, we forward it back in EX1. This
assumes that other pipeline branches that can modify it (mult and div)
are running single issue for now.
One additional hazard to beware of is an XER:SO modifying instruction
in EX1 followed immediately by a store conditional. Due to our writeback
latency, the store will go down the LSU with the previous XER value,
thus the stcx. will set CR0:SO using an obsolete SO value.
I doubt there exist any code relying on this behaviour being correct
but we should account for it regardless, possibly by ensuring that
stcx. remain single issue initially, or later by adding some minimal
tracking or moving the LSU into the same pipeline as execute.
Missing some obscure XER affecting instructions like addex or mcrxrx.
[paulus@ozlabs.org - fix CA32 and OV32 for OP_ADD, fix order of
arguments to set_ov]
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
e.xerc.ca32 := carry32;
|
|
|
|
e.xerc.ca := carry;
|
|
|
|
end;
|
|
|
|
|
|
|
|
procedure set_ov(e: inout Execute1ToWritebackType;
|
|
|
|
ov : in std_ulogic;
|
|
|
|
ov32 : in std_ulogic) is
|
|
|
|
begin
|
|
|
|
e.xerc.ov32 := ov32;
|
|
|
|
e.xerc.ov := ov;
|
|
|
|
if ov = '1' then
|
|
|
|
e.xerc.so := '1';
|
|
|
|
end if;
|
|
|
|
end;
|
|
|
|
|
|
|
|
function calc_ov(msb_a : std_ulogic; msb_b: std_ulogic;
|
|
|
|
ca: std_ulogic; msb_r: std_ulogic) return std_ulogic is
|
|
|
|
begin
|
|
|
|
return (ca xor msb_r) and not (msb_a xor msb_b);
|
|
|
|
end;
|
|
|
|
|
|
|
|
function decode_input_carry(ic : carry_in_t;
|
|
|
|
xerc : xer_common_t) return std_ulogic is
|
|
|
|
begin
|
|
|
|
case ic is
|
|
|
|
when ZERO =>
|
|
|
|
return '0';
|
|
|
|
when CA =>
|
Add basic XER support
The carry is currently internal to execute1. We don't handle any of
the other XER fields.
This creates type called "xer_common_t" that contains the commonly
used XER bits (CA, CA32, SO, OV, OV32).
The value is stored in the CR file (though it could be a separate
module). The rest of the bits will be implemented as a separate
SPR and the two parts reconciled in mfspr/mtspr in latter commits.
We always read XER in decode2 (there is little point not to)
and send it down all pipeline branches as it will be needed in
writeback for all type of instructions when CR0:SO needs to be
updated (such forms exist for all pipeline branches even if we don't
yet implement them).
To avoid having to track XER hazards, we forward it back in EX1. This
assumes that other pipeline branches that can modify it (mult and div)
are running single issue for now.
One additional hazard to beware of is an XER:SO modifying instruction
in EX1 followed immediately by a store conditional. Due to our writeback
latency, the store will go down the LSU with the previous XER value,
thus the stcx. will set CR0:SO using an obsolete SO value.
I doubt there exist any code relying on this behaviour being correct
but we should account for it regardless, possibly by ensuring that
stcx. remain single issue initially, or later by adding some minimal
tracking or moving the LSU into the same pipeline as execute.
Missing some obscure XER affecting instructions like addex or mcrxrx.
[paulus@ozlabs.org - fix CA32 and OV32 for OP_ADD, fix order of
arguments to set_ov]
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
return xerc.ca;
|
|
|
|
when OV =>
|
|
|
|
return xerc.ov;
|
|
|
|
when ONE =>
|
|
|
|
return '1';
|
|
|
|
end case;
|
|
|
|
end;
|
Add basic XER support
The carry is currently internal to execute1. We don't handle any of
the other XER fields.
This creates type called "xer_common_t" that contains the commonly
used XER bits (CA, CA32, SO, OV, OV32).
The value is stored in the CR file (though it could be a separate
module). The rest of the bits will be implemented as a separate
SPR and the two parts reconciled in mfspr/mtspr in latter commits.
We always read XER in decode2 (there is little point not to)
and send it down all pipeline branches as it will be needed in
writeback for all type of instructions when CR0:SO needs to be
updated (such forms exist for all pipeline branches even if we don't
yet implement them).
To avoid having to track XER hazards, we forward it back in EX1. This
assumes that other pipeline branches that can modify it (mult and div)
are running single issue for now.
One additional hazard to beware of is an XER:SO modifying instruction
in EX1 followed immediately by a store conditional. Due to our writeback
latency, the store will go down the LSU with the previous XER value,
thus the stcx. will set CR0:SO using an obsolete SO value.
I doubt there exist any code relying on this behaviour being correct
but we should account for it regardless, possibly by ensuring that
stcx. remain single issue initially, or later by adding some minimal
tracking or moving the LSU into the same pipeline as execute.
Missing some obscure XER affecting instructions like addex or mcrxrx.
[paulus@ozlabs.org - fix CA32 and OV32 for OP_ADD, fix order of
arguments to set_ov]
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
|
|
|
|
function msr_copy(msr: std_ulogic_vector(63 downto 0))
|
|
|
|
return std_ulogic_vector is
|
|
|
|
variable msr_out: std_ulogic_vector(63 downto 0);
|
|
|
|
begin
|
|
|
|
-- ISA says this:
|
|
|
|
-- Defined MSR bits are classified as either full func-
|
|
|
|
-- tion or partial function. Full function MSR bits are
|
|
|
|
-- saved in SRR1 or HSRR1 when an interrupt other
|
|
|
|
-- than a System Call Vectored interrupt occurs and
|
|
|
|
-- restored by rfscv, rfid, or hrfid, while partial func-
|
|
|
|
-- tion MSR bits are not saved or restored.
|
|
|
|
-- Full function MSR bits lie in the range 0:32, 37:41, and
|
|
|
|
-- 48:63, and partial function MSR bits lie in the range
|
execute1: Improve architecture compliance of MSR and related instructions
This makes our treatment of the MSR conform better with the ISA.
- On reset, initialize the MSR to have the SF and LE bits set and
all the others reset. For good measure initialize r properly too.
- Fix the bit numbering in msr_copy (the code was using big-endian
bit numbers, not little-endian).
- Use constants like MSR_EE to index MSR bits instead of expressions
like '63 - 48', for readability.
- Set MSR[SF, LE] and clear MSR[PR, IR, DR, RI] on interrupts.
- Copy the relevant fields for rfid instead of using msr_copy, because
the partial function fields of the MSR should be left unchanged,
not zeroed. Our implementation of rfid is like the architecture
description of hrfid, because we don't implement hypervisor mode.
- Return the whole MSR for mfmsr.
- Implement the L field for mtmsrd (L=1 copies just EE and RI).
- For mtmsrd with L=0, leave out the HV, ME and LE bits as per the arch.
- For mtmsrd and rfid, if PR ends up set, then also set EE, IR and DR
as per the arch.
- A few other minor tidyups (no semantic change).
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
-- 33:36 and 42:47. (Note this is IBM bit numbering).
|
|
|
|
msr_out := (others => '0');
|
execute1: Improve architecture compliance of MSR and related instructions
This makes our treatment of the MSR conform better with the ISA.
- On reset, initialize the MSR to have the SF and LE bits set and
all the others reset. For good measure initialize r properly too.
- Fix the bit numbering in msr_copy (the code was using big-endian
bit numbers, not little-endian).
- Use constants like MSR_EE to index MSR bits instead of expressions
like '63 - 48', for readability.
- Set MSR[SF, LE] and clear MSR[PR, IR, DR, RI] on interrupts.
- Copy the relevant fields for rfid instead of using msr_copy, because
the partial function fields of the MSR should be left unchanged,
not zeroed. Our implementation of rfid is like the architecture
description of hrfid, because we don't implement hypervisor mode.
- Return the whole MSR for mfmsr.
- Implement the L field for mtmsrd (L=1 copies just EE and RI).
- For mtmsrd with L=0, leave out the HV, ME and LE bits as per the arch.
- For mtmsrd and rfid, if PR ends up set, then also set EE, IR and DR
as per the arch.
- A few other minor tidyups (no semantic change).
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
msr_out(63 downto 31) := msr(63 downto 31);
|
|
|
|
msr_out(26 downto 22) := msr(26 downto 22);
|
|
|
|
msr_out(15 downto 0) := msr(15 downto 0);
|
|
|
|
return msr_out;
|
|
|
|
end;
|
|
|
|
|
|
|
|
-- Work out whether a signed value fits into n bits,
|
|
|
|
-- that is, see if it is in the range -2^(n-1) .. 2^(n-1) - 1
|
|
|
|
function fits_in_n_bits(val: std_ulogic_vector; n: integer) return boolean is
|
|
|
|
variable x, xp1: std_ulogic_vector(val'left downto val'right);
|
|
|
|
begin
|
|
|
|
x := val;
|
|
|
|
if val(val'left) = '0' then
|
|
|
|
x := not val;
|
|
|
|
end if;
|
|
|
|
xp1 := bit_reverse(std_ulogic_vector(unsigned(bit_reverse(x)) + 1));
|
|
|
|
x := x and not xp1;
|
|
|
|
-- For positive inputs, x has ones at the positions
|
|
|
|
-- to the left of the leftmost 1 bit in val.
|
|
|
|
-- For negative inputs, x has ones to the left of
|
|
|
|
-- the leftmost 0 bit in val.
|
|
|
|
return x(n - 1) = '1';
|
|
|
|
end;
|
|
|
|
|
|
|
|
function assemble_xer(xerc: xer_common_t; xer_low: std_ulogic_vector)
|
|
|
|
return std_ulogic_vector is
|
|
|
|
begin
|
|
|
|
return 32x"0" & xerc.so & xerc.ov & xerc.ca & "000000000" &
|
|
|
|
xerc.ov32 & xerc.ca32 & xer_low(17 downto 0);
|
|
|
|
end;
|
|
|
|
|
|
|
|
-- Tell vivado to keep the hierarchy for the random module so that the
|
|
|
|
-- net names in the xdc file match.
|
|
|
|
attribute keep_hierarchy : string;
|
|
|
|
attribute keep_hierarchy of random_0 : label is "yes";
|
|
|
|
|
|
|
|
begin
|
Add a rotate/mask/shift unit and use it in execute1
This adds a new entity 'rotator' which contains combinatorial logic
for rotating and masking 64-bit values. It implements the operations
of the rlwinm, rlwnm, rlwimi, rldicl, rldicr, rldic, rldimi, rldcl,
rldcr, sld, slw, srd, srw, srad, sradi, sraw and srawi instructions.
It consists of a 3-stage 64-bit rotator using 4:1 multiplexors at
each stage, two mask generators, output logic and control logic.
The insn_type_t values used for these instructions have been reduced
to just 5: OP_RLC, OP_RLCL and OP_RLCR for the rotate and mask
instructions (clear both left and right, clear left, clear right
variants), OP_SHL for left shifts, and OP_SHR for right shifts.
The control signals for the rotator are derived from the opcode
and from the is_32bit and is_signed fields of the decode_rom_t.
The rotator is instantiated as an entity in execute1 so that we can
be sure we only have one of it.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
|
|
|
|
rotator_0: entity work.rotator
|
|
|
|
port map (
|
|
|
|
rs => c_in,
|
|
|
|
ra => a_in,
|
|
|
|
shift => b_in(6 downto 0),
|
|
|
|
insn => e_in.insn,
|
|
|
|
is_32bit => e_in.is_32bit,
|
|
|
|
right_shift => right_shift,
|
|
|
|
arith => e_in.is_signed,
|
|
|
|
clear_left => rot_clear_left,
|
|
|
|
clear_right => rot_clear_right,
|
|
|
|
sign_ext_rs => rot_sign_ext,
|
|
|
|
result => rotator_result,
|
|
|
|
carry_out => rotator_carry
|
|
|
|
);
|
Add a rotate/mask/shift unit and use it in execute1
This adds a new entity 'rotator' which contains combinatorial logic
for rotating and masking 64-bit values. It implements the operations
of the rlwinm, rlwnm, rlwimi, rldicl, rldicr, rldic, rldimi, rldcl,
rldcr, sld, slw, srd, srw, srad, sradi, sraw and srawi instructions.
It consists of a 3-stage 64-bit rotator using 4:1 multiplexors at
each stage, two mask generators, output logic and control logic.
The insn_type_t values used for these instructions have been reduced
to just 5: OP_RLC, OP_RLCL and OP_RLCR for the rotate and mask
instructions (clear both left and right, clear left, clear right
variants), OP_SHL for left shifts, and OP_SHR for right shifts.
The control signals for the rotator are derived from the opcode
and from the is_32bit and is_signed fields of the decode_rom_t.
The rotator is instantiated as an entity in execute1 so that we can
be sure we only have one of it.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
|
|
|
|
logical_0: entity work.logical
|
|
|
|
port map (
|
|
|
|
rs => c_in,
|
|
|
|
rb => b_in,
|
|
|
|
op => e_in.insn_type,
|
|
|
|
invert_in => e_in.invert_a,
|
|
|
|
invert_out => e_in.invert_out,
|
|
|
|
result => logical_result,
|
|
|
|
datalen => e_in.data_len
|
|
|
|
);
|
|
|
|
|
|
|
|
countbits_0: entity work.bit_counter
|
|
|
|
port map (
|
|
|
|
clk => clk,
|
|
|
|
rs => c_in,
|
|
|
|
count_right => e_in.insn(10),
|
|
|
|
is_32bit => e_in.is_32bit,
|
|
|
|
do_popcnt => do_popcnt,
|
|
|
|
datalen => e_in.data_len,
|
|
|
|
result => countbits_result
|
|
|
|
);
|
|
|
|
|
|
|
|
multiply_0: entity work.multiply
|
|
|
|
port map (
|
|
|
|
clk => clk,
|
|
|
|
m_in => x_to_multiply,
|
|
|
|
m_out => multiply_to_x
|
|
|
|
);
|
|
|
|
|
|
|
|
divider_0: entity work.divider
|
|
|
|
port map (
|
|
|
|
clk => clk,
|
|
|
|
rst => rst,
|
|
|
|
d_in => x_to_divider,
|
|
|
|
d_out => divider_to_x
|
|
|
|
);
|
|
|
|
|
|
|
|
random_0: entity work.random
|
|
|
|
port map (
|
|
|
|
clk => clk,
|
|
|
|
data => random_cond,
|
|
|
|
raw => random_raw,
|
|
|
|
err => random_err
|
|
|
|
);
|
|
|
|
|
|
|
|
pmu_0: entity work.pmu
|
|
|
|
port map (
|
|
|
|
clk => clk,
|
|
|
|
rst => rst,
|
|
|
|
p_in => x_to_pmu,
|
|
|
|
p_out => pmu_to_x
|
|
|
|
);
|
|
|
|
|
|
|
|
short_mult_0: if HAS_SHORT_MULT generate
|
|
|
|
begin
|
|
|
|
short_mult: entity work.short_multiply
|
|
|
|
port map (
|
|
|
|
clk => clk,
|
|
|
|
a_in => a_in(15 downto 0),
|
|
|
|
b_in => b_in(15 downto 0),
|
|
|
|
m_out => mshort_p
|
|
|
|
);
|
|
|
|
end generate;
|
|
|
|
|
|
|
|
dbg_ctrl_out <= ctrl;
|
|
|
|
log_rd_addr <= r.log_addr_spr;
|
|
|
|
|
|
|
|
a_in <= e_in.read_data1;
|
|
|
|
b_in <= e_in.read_data2;
|
|
|
|
c_in <= e_in.read_data3;
|
|
|
|
cr_in <= e_in.cr;
|
|
|
|
|
|
|
|
x_to_pmu.occur <= (instr_complete => wb_events.instr_complete,
|
|
|
|
fp_complete => wb_events.fp_complete,
|
|
|
|
ld_complete => ls_events.load_complete,
|
|
|
|
st_complete => ls_events.store_complete,
|
|
|
|
itlb_miss => ls_events.itlb_miss,
|
|
|
|
dc_load_miss => dc_events.load_miss,
|
|
|
|
dc_ld_miss_resolved => dc_events.dcache_refill,
|
|
|
|
dc_store_miss => dc_events.store_miss,
|
|
|
|
dtlb_miss => dc_events.dtlb_miss,
|
|
|
|
dtlb_miss_resolved => dc_events.dtlb_miss_resolved,
|
|
|
|
icache_miss => ic_events.icache_miss,
|
|
|
|
itlb_miss_resolved => ic_events.itlb_miss_resolved,
|
|
|
|
no_instr_avail => r.no_instr_avail,
|
|
|
|
dispatch => r.instr_dispatch,
|
|
|
|
ext_interrupt => r.ext_interrupt,
|
|
|
|
br_taken_complete => r.taken_branch_event,
|
|
|
|
br_mispredict => r.br_mispredict,
|
|
|
|
others => '0');
|
|
|
|
x_to_pmu.nia <= current.nia;
|
|
|
|
x_to_pmu.addr <= (others => '0');
|
|
|
|
x_to_pmu.addr_v <= '0';
|
|
|
|
x_to_pmu.spr_num <= e_in.insn(20 downto 16);
|
|
|
|
x_to_pmu.spr_val <= c_in;
|
|
|
|
x_to_pmu.run <= '1';
|
|
|
|
|
|
|
|
-- XER forwarding. To avoid having to track XER hazards, we use
|
|
|
|
-- the previously latched value. Since the XER common bits
|
|
|
|
-- (SO, OV[32] and CA[32]) are only modified by instructions that are
|
|
|
|
-- handled here, we can just forward the result being sent to
|
|
|
|
-- writeback.
|
|
|
|
xerc_in <= r.e.xerc when r.e.write_xerc_enable = '1' or r.busy = '1' else e_in.xerc;
|
|
|
|
|
|
|
|
with e_in.unit select busy_out <=
|
|
|
|
l_in.busy or r.busy or fp_in.busy when LDST,
|
|
|
|
l_in.busy or l_in.in_progress or r.busy or fp_in.busy when others;
|
|
|
|
|
|
|
|
valid_in <= e_in.valid and not busy_out and not flush_in;
|
|
|
|
|
|
|
|
terminate_out <= r.terminate;
|
|
|
|
|
|
|
|
current <= e_in when r.busy = '0' else r.cur_instr;
|
|
|
|
|
|
|
|
-- Result mux
|
|
|
|
with current.result_sel select alu_result <=
|
|
|
|
adder_result when "000",
|
|
|
|
logical_result when "001",
|
|
|
|
rotator_result when "010",
|
|
|
|
muldiv_result when "011",
|
|
|
|
countbits_result when "100",
|
|
|
|
spr_result when "101",
|
|
|
|
next_nia when "110",
|
|
|
|
misc_result when others;
|
|
|
|
|
|
|
|
execute1_0: process(clk)
|
|
|
|
begin
|
|
|
|
if rising_edge(clk) then
|
execute1: Improve architecture compliance of MSR and related instructions
This makes our treatment of the MSR conform better with the ISA.
- On reset, initialize the MSR to have the SF and LE bits set and
all the others reset. For good measure initialize r properly too.
- Fix the bit numbering in msr_copy (the code was using big-endian
bit numbers, not little-endian).
- Use constants like MSR_EE to index MSR bits instead of expressions
like '63 - 48', for readability.
- Set MSR[SF, LE] and clear MSR[PR, IR, DR, RI] on interrupts.
- Copy the relevant fields for rfid instead of using msr_copy, because
the partial function fields of the MSR should be left unchanged,
not zeroed. Our implementation of rfid is like the architecture
description of hrfid, because we don't implement hypervisor mode.
- Return the whole MSR for mfmsr.
- Implement the L field for mtmsrd (L=1 copies just EE and RI).
- For mtmsrd with L=0, leave out the HV, ME and LE bits as per the arch.
- For mtmsrd and rfid, if PR ends up set, then also set EE, IR and DR
as per the arch.
- A few other minor tidyups (no semantic change).
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
if rst = '1' then
|
|
|
|
r <= reg_type_init;
|
|
|
|
ctrl <= ctrl_t_init;
|
execute1: Improve architecture compliance of MSR and related instructions
This makes our treatment of the MSR conform better with the ISA.
- On reset, initialize the MSR to have the SF and LE bits set and
all the others reset. For good measure initialize r properly too.
- Fix the bit numbering in msr_copy (the code was using big-endian
bit numbers, not little-endian).
- Use constants like MSR_EE to index MSR bits instead of expressions
like '63 - 48', for readability.
- Set MSR[SF, LE] and clear MSR[PR, IR, DR, RI] on interrupts.
- Copy the relevant fields for rfid instead of using msr_copy, because
the partial function fields of the MSR should be left unchanged,
not zeroed. Our implementation of rfid is like the architecture
description of hrfid, because we don't implement hypervisor mode.
- Return the whole MSR for mfmsr.
- Implement the L field for mtmsrd (L=1 copies just EE and RI).
- For mtmsrd with L=0, leave out the HV, ME and LE bits as per the arch.
- For mtmsrd and rfid, if PR ends up set, then also set EE, IR and DR
as per the arch.
- A few other minor tidyups (no semantic change).
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
ctrl.msr <= (MSR_SF => '1', MSR_LE => '1', others => '0');
|
|
|
|
else
|
|
|
|
r <= rin;
|
|
|
|
ctrl <= ctrl_tmp;
|
|
|
|
if valid_in = '1' then
|
|
|
|
report "execute " & to_hstring(e_in.nia) & " op=" & insn_type_t'image(e_in.insn_type) &
|
|
|
|
" wr=" & to_hstring(rin.e.write_reg) & " we=" & std_ulogic'image(rin.e.write_enable) &
|
|
|
|
" tag=" & integer'image(rin.e.instr_tag.tag) & std_ulogic'image(rin.e.instr_tag.valid);
|
execute1: Improve architecture compliance of MSR and related instructions
This makes our treatment of the MSR conform better with the ISA.
- On reset, initialize the MSR to have the SF and LE bits set and
all the others reset. For good measure initialize r properly too.
- Fix the bit numbering in msr_copy (the code was using big-endian
bit numbers, not little-endian).
- Use constants like MSR_EE to index MSR bits instead of expressions
like '63 - 48', for readability.
- Set MSR[SF, LE] and clear MSR[PR, IR, DR, RI] on interrupts.
- Copy the relevant fields for rfid instead of using msr_copy, because
the partial function fields of the MSR should be left unchanged,
not zeroed. Our implementation of rfid is like the architecture
description of hrfid, because we don't implement hypervisor mode.
- Return the whole MSR for mfmsr.
- Implement the L field for mtmsrd (L=1 copies just EE and RI).
- For mtmsrd with L=0, leave out the HV, ME and LE bits as per the arch.
- For mtmsrd and rfid, if PR ends up set, then also set EE, IR and DR
as per the arch.
- A few other minor tidyups (no semantic change).
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
-- Data path for integer instructions
|
|
|
|
execute1_dp: process(all)
|
|
|
|
variable a_inv : std_ulogic_vector(63 downto 0);
|
|
|
|
variable b_or_m1 : std_ulogic_vector(63 downto 0);
|
|
|
|
variable sum_with_carry : std_ulogic_vector(64 downto 0);
|
|
|
|
variable sign1, sign2 : std_ulogic;
|
|
|
|
variable abs1, abs2 : signed(63 downto 0);
|
|
|
|
variable addend : std_ulogic_vector(127 downto 0);
|
|
|
|
variable addg6s : std_ulogic_vector(63 downto 0);
|
|
|
|
variable crbit : integer range 0 to 31;
|
|
|
|
variable isel_result : std_ulogic_vector(63 downto 0);
|
|
|
|
variable darn : std_ulogic_vector(63 downto 0);
|
|
|
|
variable setb_result : std_ulogic_vector(63 downto 0);
|
|
|
|
variable mfcr_result : std_ulogic_vector(63 downto 0);
|
|
|
|
variable lo, hi : integer;
|
|
|
|
variable l : std_ulogic;
|
|
|
|
variable zerohi, zerolo : std_ulogic;
|
|
|
|
variable msb_a, msb_b : std_ulogic;
|
|
|
|
variable a_lt : std_ulogic;
|
|
|
|
variable a_lt_lo : std_ulogic;
|
|
|
|
variable a_lt_hi : std_ulogic;
|
|
|
|
variable newcrf : std_ulogic_vector(3 downto 0);
|
|
|
|
variable bf, bfa : std_ulogic_vector(2 downto 0);
|
|
|
|
variable crnum : crnum_t;
|
|
|
|
variable scrnum : crnum_t;
|
|
|
|
variable cr_operands : std_ulogic_vector(1 downto 0);
|
|
|
|
variable crresult : std_ulogic;
|
|
|
|
variable bt, ba, bb : std_ulogic_vector(4 downto 0);
|
|
|
|
variable btnum : integer range 0 to 3;
|
|
|
|
variable banum, bbnum : integer range 0 to 31;
|
|
|
|
variable j : integer;
|
|
|
|
begin
|
|
|
|
-- Main adder
|
|
|
|
if e_in.invert_a = '0' then
|
|
|
|
a_inv := a_in;
|
|
|
|
else
|
|
|
|
a_inv := not a_in;
|
|
|
|
end if;
|
|
|
|
if e_in.addm1 = '0' then
|
|
|
|
b_or_m1 := b_in;
|
|
|
|
else
|
|
|
|
b_or_m1 := (others => '1');
|
|
|
|
end if;
|
|
|
|
sum_with_carry := ppc_adde(a_inv, b_or_m1,
|
|
|
|
decode_input_carry(e_in.input_carry, xerc_in));
|
|
|
|
adder_result <= sum_with_carry(63 downto 0);
|
|
|
|
carry_32 <= sum_with_carry(32) xor a_inv(32) xor b_in(32);
|
|
|
|
carry_64 <= sum_with_carry(64);
|
|
|
|
overflow_32 <= calc_ov(a_inv(31), b_in(31), carry_32, sum_with_carry(31));
|
|
|
|
overflow_64 <= calc_ov(a_inv(63), b_in(63), carry_64, sum_with_carry(63));
|
|
|
|
|
|
|
|
-- signals to multiply and divide units
|
|
|
|
sign1 := '0';
|
|
|
|
sign2 := '0';
|
|
|
|
if e_in.is_signed = '1' then
|
|
|
|
if e_in.is_32bit = '1' then
|
|
|
|
sign1 := a_in(31);
|
|
|
|
sign2 := b_in(31);
|
|
|
|
else
|
|
|
|
sign1 := a_in(63);
|
|
|
|
sign2 := b_in(63);
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
-- take absolute values
|
|
|
|
if sign1 = '0' then
|
|
|
|
abs1 := signed(a_in);
|
|
|
|
else
|
|
|
|
abs1 := - signed(a_in);
|
|
|
|
end if;
|
|
|
|
if sign2 = '0' then
|
|
|
|
abs2 := signed(b_in);
|
|
|
|
else
|
|
|
|
abs2 := - signed(b_in);
|
|
|
|
end if;
|
|
|
|
|
|
|
|
-- Interface to multiply and divide units
|
|
|
|
x_to_divider.is_signed <= e_in.is_signed;
|
|
|
|
x_to_divider.is_32bit <= e_in.is_32bit;
|
|
|
|
x_to_divider.is_extended <= '0';
|
|
|
|
x_to_divider.is_modulus <= '0';
|
|
|
|
if e_in.insn_type = OP_MOD then
|
|
|
|
x_to_divider.is_modulus <= '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
addend := (others => '0');
|
|
|
|
if e_in.insn(26) = '0' then
|
|
|
|
-- integer multiply-add, major op 4 (if it is a multiply)
|
|
|
|
addend(63 downto 0) := c_in;
|
|
|
|
if e_in.is_signed = '1' then
|
|
|
|
addend(127 downto 64) := (others => c_in(63));
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
if (sign1 xor sign2) = '1' then
|
|
|
|
addend := not addend;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
x_to_multiply.is_32bit <= e_in.is_32bit;
|
|
|
|
x_to_multiply.not_result <= sign1 xor sign2;
|
|
|
|
x_to_multiply.addend <= addend;
|
|
|
|
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
|
|
|
|
if e_in.is_32bit = '0' then
|
|
|
|
-- 64-bit forms
|
|
|
|
x_to_multiply.data1 <= std_ulogic_vector(abs1);
|
|
|
|
x_to_multiply.data2 <= std_ulogic_vector(abs2);
|
|
|
|
if e_in.insn_type = OP_DIVE then
|
|
|
|
x_to_divider.is_extended <= '1';
|
|
|
|
end if;
|
|
|
|
x_to_divider.dividend <= std_ulogic_vector(abs1);
|
|
|
|
x_to_divider.divisor <= std_ulogic_vector(abs2);
|
|
|
|
else
|
|
|
|
-- 32-bit forms
|
|
|
|
x_to_multiply.data1 <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
|
|
|
|
x_to_multiply.data2 <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
|
|
|
|
x_to_divider.is_extended <= '0';
|
|
|
|
if e_in.insn_type = OP_DIVE then -- extended forms
|
|
|
|
x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000";
|
|
|
|
else
|
|
|
|
x_to_divider.dividend <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
|
|
|
|
end if;
|
|
|
|
x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
|
|
|
|
end if;
|
|
|
|
|
|
|
|
case current.sub_select(1 downto 0) is
|
|
|
|
when "00" =>
|
|
|
|
if HAS_SHORT_MULT and r.mul_in_progress = '0' then
|
|
|
|
muldiv_result <= std_ulogic_vector(resize(signed(mshort_p), 64));
|
|
|
|
else
|
|
|
|
muldiv_result <= multiply_to_x.result(63 downto 0);
|
|
|
|
end if;
|
|
|
|
when "01" =>
|
|
|
|
muldiv_result <= multiply_to_x.result(127 downto 64);
|
|
|
|
when "10" =>
|
|
|
|
muldiv_result <= multiply_to_x.result(63 downto 32) &
|
|
|
|
multiply_to_x.result(63 downto 32);
|
|
|
|
when others =>
|
|
|
|
|