You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2226 lines
84 KiB
Verilog
2226 lines
84 KiB
Verilog
// © IBM Corp. 2020
|
|
// Licensed under the Apache License, Version 2.0 (the "License"), as modified by
|
|
// the terms below; you may not use the files in this repository except in
|
|
// compliance with the License as modified.
|
|
// You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Modified Terms:
|
|
//
|
|
// 1) For the purpose of the patent license granted to you in Section 3 of the
|
|
// License, the "Work" hereby includes implementations of the work of authorship
|
|
// in physical form.
|
|
//
|
|
// 2) Notwithstanding any terms to the contrary in the License, any licenses
|
|
// necessary for implementation of the Work that are available from OpenPOWER
|
|
// via the Power ISA End User License Agreement (EULA) are explicitly excluded
|
|
// hereunder, and may be obtained from OpenPOWER under the terms and conditions
|
|
// of the EULA.
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, the reference design
|
|
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
|
|
// for the specific language governing permissions and limitations under the License.
|
|
//
|
|
// Additional rights, including the ability to physically implement a softcore that
|
|
// is compliant with the required sections of the Power ISA Specification, are
|
|
// available at no cost under the terms of the OpenPOWER Power ISA EULA, which can be
|
|
// obtained (along with the Power ISA) here: https://openpowerfoundation.org.
|
|
|
|
`timescale 1 ns / 1 ns
|
|
|
|
//********************************************************************
|
|
//*
|
|
//* TITLE:
|
|
//*
|
|
//* NAME: iuq_ic_miss.v
|
|
//*
|
|
//*********************************************************************
|
|
|
|
`include "tri_a2o.vh"
|
|
|
|
|
|
module iuq_ic_miss(
|
|
vdd,
|
|
gnd,
|
|
nclk,
|
|
pc_iu_func_sl_thold_0_b,
|
|
pc_iu_sg_0,
|
|
force_t,
|
|
d_mode,
|
|
delay_lclkr,
|
|
mpw1_b,
|
|
mpw2_b,
|
|
scan_in,
|
|
scan_out,
|
|
iu_flush,
|
|
br_iu_redirect,
|
|
bp_ic_iu4_redirect,
|
|
ic_bp_iu3_ecc_err,
|
|
ics_icm_iu0_t0_ifar,
|
|
`ifndef THREADS1
|
|
ics_icm_iu0_t1_ifar,
|
|
`endif
|
|
ics_icm_iu0_inval,
|
|
ics_icm_iu0_inval_addr,
|
|
ics_icm_iu2_flush,
|
|
icm_ics_hold_thread,
|
|
icm_ics_hold_iu0,
|
|
icm_ics_iu3_miss_match,
|
|
icm_ics_iu3_ecc_fp_cancel,
|
|
icm_ics_iu0_preload_val,
|
|
icm_ics_iu0_preload_ifar,
|
|
icm_ics_prefetch_req,
|
|
icm_ics_prefetch_sm_idle,
|
|
icm_icd_lru_addr,
|
|
icm_icd_dir_inval,
|
|
icm_icd_dir_val,
|
|
icm_icd_data_write,
|
|
icm_icd_reload_addr,
|
|
icm_icd_reload_data,
|
|
icm_icd_reload_way,
|
|
icm_icd_load,
|
|
icm_icd_load_addr,
|
|
icm_icd_load_2ucode,
|
|
icm_icd_load_2ucode_type,
|
|
icm_icd_dir_write,
|
|
icm_icd_dir_write_addr,
|
|
icm_icd_dir_write_endian,
|
|
icm_icd_dir_write_way,
|
|
icm_icd_lru_write,
|
|
icm_icd_lru_write_addr,
|
|
icm_icd_lru_write_way,
|
|
icm_icd_ecc_inval,
|
|
icm_icd_ecc_addr,
|
|
icm_icd_ecc_way,
|
|
icm_icd_iu3_ecc_fp_cancel,
|
|
icm_icd_any_reld_r2,
|
|
icd_icm_miss,
|
|
icd_icm_prefetch,
|
|
icd_icm_tid,
|
|
icd_icm_addr_real,
|
|
icd_icm_addr_eff,
|
|
icd_icm_wimge,
|
|
icd_icm_userdef,
|
|
icd_icm_2ucode,
|
|
icd_icm_2ucode_type,
|
|
icd_icm_iu2_inval,
|
|
icd_icm_any_iu2_valid,
|
|
icd_icm_row_lru,
|
|
icd_icm_row_val,
|
|
ic_perf_t0_event,
|
|
`ifndef THREADS1
|
|
ic_perf_t1_event,
|
|
`endif
|
|
cp_async_block,
|
|
iu_mm_lmq_empty,
|
|
iu_xu_icache_quiesce,
|
|
iu_pc_icache_quiesce,
|
|
an_ac_reld_data_vld,
|
|
an_ac_reld_core_tag,
|
|
an_ac_reld_qw,
|
|
an_ac_reld_data,
|
|
an_ac_reld_ecc_err,
|
|
an_ac_reld_ecc_err_ue,
|
|
spr_ic_cls,
|
|
spr_ic_bp_config,
|
|
iu_lq_request,
|
|
iu_lq_ctag,
|
|
iu_lq_ra,
|
|
iu_lq_wimge,
|
|
iu_lq_userdef,
|
|
event_bus_enable
|
|
);
|
|
|
|
|
|
inout vdd;
|
|
|
|
inout gnd;
|
|
|
|
(* pin_data ="PIN_FUNCTION=/G_CLK/" *)
|
|
input [0:`NCLK_WIDTH-1] nclk;
|
|
input pc_iu_func_sl_thold_0_b;
|
|
input pc_iu_sg_0;
|
|
input force_t;
|
|
input d_mode;
|
|
input delay_lclkr;
|
|
input mpw1_b;
|
|
input mpw2_b;
|
|
input scan_in;
|
|
output scan_out;
|
|
|
|
input [0:`THREADS-1] iu_flush;
|
|
input [0:`THREADS-1] br_iu_redirect;
|
|
input [0:`THREADS-1] bp_ic_iu4_redirect;
|
|
|
|
output ic_bp_iu3_ecc_err;
|
|
|
|
input [46:52] ics_icm_iu0_t0_ifar;
|
|
`ifndef THREADS1
|
|
input [46:52] ics_icm_iu0_t1_ifar;
|
|
`endif
|
|
|
|
input ics_icm_iu0_inval;
|
|
input [51:57] ics_icm_iu0_inval_addr;
|
|
|
|
input [0:`THREADS-1] ics_icm_iu2_flush;
|
|
output [0:`THREADS-1] icm_ics_hold_thread;
|
|
output icm_ics_hold_iu0;
|
|
output icm_ics_iu3_miss_match;
|
|
output [0:`THREADS-1] icm_ics_iu3_ecc_fp_cancel;
|
|
|
|
output [0:`THREADS-1] icm_ics_iu0_preload_val;
|
|
output [50:59] icm_ics_iu0_preload_ifar;
|
|
output [0:`THREADS-1] icm_ics_prefetch_req;
|
|
output [0:`THREADS-1] icm_ics_prefetch_sm_idle;
|
|
|
|
output [51:57] icm_icd_lru_addr;
|
|
output icm_icd_dir_inval;
|
|
output icm_icd_dir_val;
|
|
output icm_icd_data_write;
|
|
output [51:59] icm_icd_reload_addr;
|
|
output [0:143] icm_icd_reload_data;
|
|
output [0:3] icm_icd_reload_way;
|
|
output [0:`THREADS-1] icm_icd_load;
|
|
output [62-`EFF_IFAR_WIDTH:61] icm_icd_load_addr;
|
|
output icm_icd_load_2ucode;
|
|
output icm_icd_load_2ucode_type;
|
|
output icm_icd_dir_write;
|
|
output [64-`REAL_IFAR_WIDTH:57] icm_icd_dir_write_addr;
|
|
output icm_icd_dir_write_endian;
|
|
output [0:3] icm_icd_dir_write_way;
|
|
output icm_icd_lru_write;
|
|
output [51:57] icm_icd_lru_write_addr;
|
|
output [0:3] icm_icd_lru_write_way;
|
|
output icm_icd_ecc_inval;
|
|
output [51:57] icm_icd_ecc_addr;
|
|
output [0:3] icm_icd_ecc_way;
|
|
output icm_icd_iu3_ecc_fp_cancel;
|
|
output icm_icd_any_reld_r2;
|
|
|
|
input icd_icm_miss; // These signals, except icd_icm_miss, come off a latch
|
|
input icd_icm_prefetch;
|
|
input [0:`THREADS-1] icd_icm_tid;
|
|
input [64-`REAL_IFAR_WIDTH:61] icd_icm_addr_real;
|
|
input [62-`EFF_IFAR_WIDTH:51] icd_icm_addr_eff; // Shares bits 52:61 with real_ifar
|
|
input [0:4] icd_icm_wimge; // (1): CI, (4): Endian
|
|
input [0:3] icd_icm_userdef;
|
|
input icd_icm_2ucode;
|
|
input icd_icm_2ucode_type;
|
|
input icd_icm_iu2_inval;
|
|
input icd_icm_any_iu2_valid;
|
|
|
|
input [0:2] icd_icm_row_lru; // valid same cycle as read_lru
|
|
input [0:3] icd_icm_row_val; // valid same cycle as read_lru
|
|
|
|
output [0:2] ic_perf_t0_event;
|
|
`ifndef THREADS1
|
|
output [0:2] ic_perf_t1_event;
|
|
`endif
|
|
|
|
input [0:`THREADS-1] cp_async_block;
|
|
output iu_mm_lmq_empty;
|
|
output [0:`THREADS-1] iu_xu_icache_quiesce;
|
|
output [0:`THREADS-1] iu_pc_icache_quiesce;
|
|
|
|
input an_ac_reld_data_vld; // This comes back two cycles before the data
|
|
input [0:4] an_ac_reld_core_tag; // This signal comes active two cycles before the data
|
|
input [58:59] an_ac_reld_qw; // This signal comes active two cycles before the data
|
|
input [0:127] an_ac_reld_data; // This signal comes active two cycles after the valid
|
|
input an_ac_reld_ecc_err; // This signal comes active one cycle after data
|
|
input an_ac_reld_ecc_err_ue; // This signal comes active one cycle after data
|
|
|
|
input spr_ic_cls; // (0): 64B cacheline, (1): 128B cacheline
|
|
input [0:3] spr_ic_bp_config; // (0): bc, (1): bclr, (2): bcctr, (3): sw
|
|
|
|
output [0:`THREADS-1] iu_lq_request;
|
|
output [0:1] iu_lq_ctag; // (0): thread ID, (1): prefetch
|
|
output [64-`REAL_IFAR_WIDTH:59] iu_lq_ra;
|
|
output [0:4] iu_lq_wimge;
|
|
output [0:3] iu_lq_userdef;
|
|
|
|
input event_bus_enable;
|
|
|
|
|
|
localparam [0:31] value_1 = 32'h00000001;
|
|
localparam [0:31] value_2 = 32'h00000002;
|
|
|
|
parameter SM_MAX = 4; // max # of state machines (# of tables)
|
|
parameter TAGS_USED = `THREADS * 2;
|
|
|
|
parameter spr_ic_cls_offset = 0;
|
|
parameter bp_config_offset = spr_ic_cls_offset + 1;
|
|
parameter an_ac_reld_data_vld_offset = bp_config_offset + 4;
|
|
parameter an_ac_reld_core_tag_offset = an_ac_reld_data_vld_offset + 1;
|
|
parameter an_ac_reld_qw_offset = an_ac_reld_core_tag_offset + 5;
|
|
parameter reld_data_offset = an_ac_reld_qw_offset + 2;
|
|
parameter an_ac_reld_ecc_err_offset = reld_data_offset + 128;
|
|
parameter an_ac_reld_ecc_err_ue_offset = an_ac_reld_ecc_err_offset + 1;
|
|
parameter reld_r1_val_offset = an_ac_reld_ecc_err_ue_offset + 1;
|
|
parameter reld_r1_qw_offset = reld_r1_val_offset + TAGS_USED;
|
|
parameter reld_r2_val_offset = reld_r1_qw_offset + 2;
|
|
parameter reld_r2_qw_offset = reld_r2_val_offset + TAGS_USED;
|
|
parameter r2_crit_qw_offset = reld_r2_qw_offset + 2;
|
|
parameter reld_r3_val_offset = r2_crit_qw_offset + 1;
|
|
parameter r3_loaded_offset = reld_r3_val_offset + TAGS_USED;
|
|
parameter request_offset = r3_loaded_offset + 1;
|
|
parameter req_ctag_offset = request_offset + `THREADS;
|
|
parameter req_ra_offset = req_ctag_offset + 2;
|
|
parameter req_wimge_offset = req_ra_offset + `REAL_IFAR_WIDTH - 4;
|
|
parameter req_userdef_offset = req_wimge_offset + 5;
|
|
parameter iu3_miss_match_offset = req_userdef_offset + 4;
|
|
|
|
parameter miss_tid_sm_offset = iu3_miss_match_offset + 1;
|
|
parameter miss_count_offset = miss_tid_sm_offset + TAGS_USED * 6;
|
|
parameter miss_flush_occurred_offset = miss_count_offset + TAGS_USED * 3;
|
|
parameter miss_flushed_offset = miss_flush_occurred_offset + TAGS_USED;
|
|
parameter miss_inval_offset = miss_flushed_offset + TAGS_USED;
|
|
parameter miss_block_fp_offset = miss_inval_offset + TAGS_USED;
|
|
parameter miss_ecc_err_offset = miss_block_fp_offset + TAGS_USED;
|
|
parameter miss_ecc_err_ue_offset = miss_ecc_err_offset + TAGS_USED;
|
|
parameter miss_wrote_dir_offset = miss_ecc_err_ue_offset + TAGS_USED;
|
|
parameter miss_need_hold_offset = miss_wrote_dir_offset + TAGS_USED;
|
|
parameter miss_addr_real_offset = miss_need_hold_offset + TAGS_USED;
|
|
parameter miss_addr_eff_offset = miss_addr_real_offset + TAGS_USED * (`REAL_IFAR_WIDTH - 2);
|
|
parameter miss_ci_offset = miss_addr_eff_offset + TAGS_USED * (`EFF_IFAR_WIDTH - 10);
|
|
parameter miss_endian_offset = miss_ci_offset + TAGS_USED;
|
|
parameter miss_2ucode_offset = miss_endian_offset + TAGS_USED;
|
|
parameter miss_2ucode_type_offset = miss_2ucode_offset + TAGS_USED;
|
|
parameter miss_way_offset = miss_2ucode_type_offset + TAGS_USED;
|
|
parameter lru_write_next_cycle_offset = miss_way_offset + 4 * TAGS_USED;
|
|
parameter lru_write_offset = lru_write_next_cycle_offset + TAGS_USED;
|
|
parameter miss_prefetch_perf_offset = lru_write_offset + TAGS_USED;
|
|
parameter perf_event_offset = miss_prefetch_perf_offset + `THREADS;
|
|
parameter scan_right = perf_event_offset + `THREADS * 3 - 1;
|
|
|
|
parameter IDLE = 0;
|
|
parameter WAITMISS = 1;
|
|
parameter WAITSTATE = 2;
|
|
parameter DATA = 3;
|
|
parameter CI = 4;
|
|
parameter CHECK_ECC = 5;
|
|
|
|
wire [1:24] select_lru_way_pt;
|
|
|
|
// Latch definition begin
|
|
wire [0:TAGS_USED-1] reld_r1_val_d;
|
|
wire spr_ic_cls_d;
|
|
wire [0:3] bp_config_d;
|
|
wire an_ac_reld_data_vld_d;
|
|
wire [0:4] an_ac_reld_core_tag_d;
|
|
wire [58:59] an_ac_reld_qw_d;
|
|
wire [0:127] reld_data_d;
|
|
wire an_ac_reld_ecc_err_d;
|
|
wire an_ac_reld_ecc_err_ue_d;
|
|
wire [0:1] reld_r1_qw_d;
|
|
wire [0:TAGS_USED-1] reld_r2_val_d;
|
|
wire [0:1] reld_r2_qw_d;
|
|
wire r2_crit_qw_d;
|
|
wire [0:TAGS_USED-1] reld_r3_val_d;
|
|
wire r3_loaded_d;
|
|
wire [0:`THREADS-1] request_d;
|
|
wire [0:1] req_ctag_d;
|
|
wire [64-`REAL_IFAR_WIDTH:59] req_ra_d;
|
|
wire [0:4] req_wimge_d;
|
|
wire [0:3] req_userdef_d;
|
|
wire iu3_miss_match_d;
|
|
wire [0:5] miss_tid_sm_d[0:SM_MAX-1];
|
|
wire [0:2] miss_count_d[0:TAGS_USED-1];
|
|
wire [64-`REAL_IFAR_WIDTH:61] miss_addr_real_d[0:TAGS_USED-1];
|
|
wire [62-`EFF_IFAR_WIDTH:51] miss_addr_eff_d[0:TAGS_USED-1];
|
|
wire [0:3] miss_way_d[0:TAGS_USED-1];
|
|
wire [0:TAGS_USED-1] miss_flush_occurred_d;
|
|
wire [0:SM_MAX-1] miss_flushed_d;
|
|
wire [0:SM_MAX-1] miss_inval_d;
|
|
wire [0:TAGS_USED-1] miss_block_fp_d;
|
|
wire [0:TAGS_USED-1] miss_ecc_err_d;
|
|
wire [0:TAGS_USED-1] miss_ecc_err_ue_d;
|
|
wire [0:TAGS_USED-1] miss_wrote_dir_d;
|
|
wire [0:TAGS_USED-1] miss_need_hold_d;
|
|
wire [0:SM_MAX-1] miss_ci_d;
|
|
wire [0:TAGS_USED-1] miss_endian_d;
|
|
wire [0:TAGS_USED-1] miss_2ucode_d;
|
|
wire [0:TAGS_USED-1] miss_2ucode_type_d;
|
|
wire [0:TAGS_USED-1] lru_write_next_cycle_d;
|
|
wire [0:TAGS_USED-1] lru_write_d;
|
|
wire [0:`THREADS-1] miss_prefetch_perf_d;
|
|
wire [0:2] perf_event_d[0:`THREADS-1];
|
|
|
|
wire [0:SM_MAX-1] reld_r1_val_l2;
|
|
wire spr_ic_cls_l2;
|
|
wire [0:3] bp_config_l2;
|
|
wire an_ac_reld_data_vld_l2;
|
|
wire [0:4] an_ac_reld_core_tag_l2;
|
|
wire [58:59] an_ac_reld_qw_l2;
|
|
wire [0:127] reld_data_l2;
|
|
wire an_ac_reld_ecc_err_l2;
|
|
wire an_ac_reld_ecc_err_ue_l2;
|
|
wire [0:1] reld_r1_qw_l2;
|
|
wire [0:TAGS_USED-1] reld_r2_val_l2;
|
|
wire [0:1] reld_r2_qw_l2;
|
|
wire r2_crit_qw_l2;
|
|
wire [0:TAGS_USED-1] reld_r3_val_l2;
|
|
wire r3_loaded_l2;
|
|
wire [0:`THREADS-1] request_l2;
|
|
wire [0:1] req_ctag_l2;
|
|
wire [64-`REAL_IFAR_WIDTH:59] req_ra_l2;
|
|
wire [0:4] req_wimge_l2;
|
|
wire [0:3] req_userdef_l2;
|
|
wire iu3_miss_match_l2;
|
|
wire [0:5] miss_tid_sm_l2[0:SM_MAX-1]; //state machine for each tag
|
|
wire [0:2] miss_count_l2[0:TAGS_USED-1];
|
|
wire [64-`REAL_IFAR_WIDTH:61] miss_addr_real_l2[0:TAGS_USED-1];
|
|
wire [62-`EFF_IFAR_WIDTH:51] miss_addr_eff_l2[0:TAGS_USED-1];
|
|
wire [0:3] miss_way_l2[0:TAGS_USED-1];
|
|
wire [0:TAGS_USED-1] miss_flush_occurred_l2;
|
|
wire [0:SM_MAX-1] miss_flushed_l2;
|
|
wire [0:SM_MAX-1] miss_inval_l2;
|
|
wire [0:TAGS_USED-1] miss_block_fp_l2; //block fastpath
|
|
wire [0:TAGS_USED-1] miss_ecc_err_l2;
|
|
wire [0:TAGS_USED-1] miss_ecc_err_ue_l2;
|
|
wire [0:TAGS_USED-1] miss_wrote_dir_l2;
|
|
wire [0:TAGS_USED-1] miss_need_hold_l2;
|
|
wire [0:SM_MAX-1] miss_ci_l2;
|
|
wire [0:TAGS_USED-1] miss_endian_l2;
|
|
wire [0:TAGS_USED-1] miss_2ucode_l2;
|
|
wire [0:TAGS_USED-1] miss_2ucode_type_l2;
|
|
wire [0:TAGS_USED-1] lru_write_next_cycle_l2;
|
|
wire [0:TAGS_USED-1] lru_write_l2;
|
|
wire [0:`THREADS-1] miss_prefetch_perf_l2;
|
|
wire [0:2] perf_event_l2[0:`THREADS-1];
|
|
// Latch definition end
|
|
|
|
wire [46:52] iu0_ifar[0:TAGS_USED-1];
|
|
|
|
// Act control; only needed for power reduction
|
|
wire [0:TAGS_USED-1] default_reld_act_v;
|
|
wire default_reld_act;
|
|
wire miss_or_default_act;
|
|
wire reld_r2_act;
|
|
wire [0:TAGS_USED-1] miss_act;
|
|
|
|
// reload pipeline
|
|
wire reld_r0_vld;
|
|
wire [0:TAGS_USED-1] reld_r0_tag;
|
|
wire [0:`THREADS-1] reld_r3_tid;
|
|
|
|
wire [0:`THREADS-1] iu_xu_icache_quiesce_int;
|
|
|
|
wire [0:SM_MAX-1] iu2_flush;
|
|
wire [0:SM_MAX-1] new_miss;
|
|
wire [0:SM_MAX-1] last_data;
|
|
wire [0:TAGS_USED-1] no_data;
|
|
wire [0:TAGS_USED-1] set_flush_occurred;
|
|
wire [0:TAGS_USED-1] flush_addr_outside_range;
|
|
|
|
wire [0:TAGS_USED-1] set_flushed;
|
|
wire [0:TAGS_USED-1] inval_equal;
|
|
wire [0:TAGS_USED-1] set_invalidated;
|
|
wire [0:SM_MAX-1] reset_state;
|
|
wire [0:TAGS_USED-1] sent_fp;
|
|
wire [0:TAGS_USED-1] set_block_fp;
|
|
|
|
// this signal will check incoming addr against current valid addresses
|
|
wire [0:TAGS_USED-1] addr_equal;
|
|
wire [0:TAGS_USED-1] addr_match_tag;
|
|
wire addr_match;
|
|
wire miss_thread_has_idle;
|
|
|
|
wire release_sm;
|
|
wire [0:SM_MAX-1] release_sm_hold /*verilator split_var*/;
|
|
|
|
// IU0 inval
|
|
wire [0:TAGS_USED-1] iu0_inval_match;
|
|
|
|
// OR these together to get iu_lq_request
|
|
wire [0:SM_MAX-1] request_tag;
|
|
|
|
// fastpath
|
|
wire [0:TAGS_USED-1] preload_r0_tag;
|
|
wire [0:`THREADS-1] preload_r0_tid;
|
|
wire [0:`THREADS-1] preload_hold_iu0;
|
|
reg [50:59] r0_addr;
|
|
wire [0:SM_MAX-1] load_tag;
|
|
reg [62-`EFF_IFAR_WIDTH:61] load_addr;
|
|
wire load_2ucode;
|
|
wire load_2ucode_type;
|
|
wire [0:TAGS_USED-1] load_tag_no_block;
|
|
wire [0:`THREADS-1] load_tid_no_block;
|
|
|
|
// this signal indicates critical quadword is in r0, r1
|
|
wire [0:TAGS_USED-1] r0_crit_qw;
|
|
wire [0:TAGS_USED-1] r1_crit_qw;
|
|
|
|
// lru
|
|
wire lru_write_hit;
|
|
wire [0:2] hit_lru;
|
|
wire [0:2] row_lru;
|
|
wire [0:TAGS_USED-1] select_lru;
|
|
reg [51:57] lru_addr;
|
|
wire [0:TAGS_USED-1] lru_valid;
|
|
wire [0:TAGS_USED-1] row_match;
|
|
reg [0:3] row_match_way;
|
|
wire [0:3] val_or_match;
|
|
wire [0:3] next_lru_way;
|
|
wire [0:3] next_way;
|
|
|
|
// this signal is set by each state machine; OR bits together for final holds
|
|
wire [0:SM_MAX-1] hold_tid;
|
|
wire hold_iu0;
|
|
|
|
// OR these together to get icm_icd_*
|
|
wire [0:SM_MAX-1] write_dir_inval;
|
|
wire [0:SM_MAX-1] write_dir_val;
|
|
wire [0:SM_MAX-1] data_write;
|
|
wire [0:SM_MAX-1] dir_write;
|
|
wire [0:TAGS_USED-1] dir_write_no_block;
|
|
|
|
reg [64-`REAL_IFAR_WIDTH:57] reload_addr;
|
|
reg [0:3] reload_way;
|
|
wire reload_endian;
|
|
wire reld_r1_endian;
|
|
wire [0:127] swap_endian_data;
|
|
|
|
wire [0:3] branch_decode0;
|
|
wire [0:3] branch_decode1;
|
|
wire [0:3] branch_decode2;
|
|
wire [0:3] branch_decode3;
|
|
|
|
wire [0:143] instr_data;
|
|
|
|
wire [0:TAGS_USED-1] lru_write;
|
|
reg [51:57] lru_write_addr;
|
|
reg [0:3] lru_write_way;
|
|
|
|
// ECC Error handling
|
|
wire [0:TAGS_USED-1] new_ecc_err;
|
|
wire [0:TAGS_USED-1] new_ecc_err_ue;
|
|
wire [0:SM_MAX-1] ecc_err;
|
|
wire [0:SM_MAX-1] ecc_err_ue;
|
|
wire [0:TAGS_USED-1] ecc_inval;
|
|
wire [0:TAGS_USED-1] ecc_block_iu0;
|
|
wire ecc_fp;
|
|
reg [51:57] r3_addr;
|
|
reg [0:3] r3_way;
|
|
|
|
wire [0:SM_MAX-1] active_l1_miss;
|
|
wire [0:scan_right] siv;
|
|
wire [0:scan_right] sov;
|
|
|
|
wire [0:31] tidn32;
|
|
|
|
|
|
(* analysis_not_referenced="true" *)
|
|
|
|
wire miss_unused;
|
|
|
|
//@@ START OF EXECUTABLE CODE FOR IUQ_IC_MISS
|
|
|
|
assign tidn32 = 32'b0;
|
|
|
|
generate
|
|
begin : xhdl1
|
|
if (TAGS_USED < SM_MAX)
|
|
begin : gen_unused_t1
|
|
assign miss_unused = | {load_tag[TAGS_USED:SM_MAX - 1], reset_state[TAGS_USED:SM_MAX - 1], request_tag[TAGS_USED:SM_MAX - 1], write_dir_val[TAGS_USED:SM_MAX - 1], hold_tid[TAGS_USED:SM_MAX - 1], dir_write[TAGS_USED:SM_MAX - 1], miss_ci_d[TAGS_USED:SM_MAX - 1], miss_flushed_d[TAGS_USED:SM_MAX - 1], miss_inval_d[TAGS_USED:SM_MAX - 1], active_l1_miss[TAGS_USED:SM_MAX-1], miss_tid_sm_d[TAGS_USED], miss_tid_sm_d[SM_MAX - 1]}; // ??? tid_sm isn't covered for (sm_max-tags_used > 2)
|
|
|
|
// sourceless unused
|
|
assign iu2_flush[TAGS_USED:SM_MAX - 1] = {SM_MAX-TAGS_USED{1'b0}};
|
|
assign new_miss[TAGS_USED:SM_MAX - 1] = {SM_MAX-TAGS_USED{1'b0}};
|
|
assign last_data[TAGS_USED:SM_MAX - 1] = {SM_MAX-TAGS_USED{1'b0}};
|
|
assign ecc_err[TAGS_USED:SM_MAX - 1] = {SM_MAX-TAGS_USED{1'b0}};
|
|
assign ecc_err_ue[TAGS_USED:SM_MAX - 1] = {SM_MAX-TAGS_USED{1'b0}};
|
|
|
|
// Latches
|
|
assign reld_r1_val_l2[TAGS_USED:SM_MAX - 1] = {SM_MAX-TAGS_USED{1'b0}};
|
|
|
|
assign miss_flushed_d[TAGS_USED:SM_MAX - 1] = {SM_MAX-TAGS_USED{1'b0}};
|
|
assign miss_inval_d[TAGS_USED:SM_MAX - 1] = {SM_MAX-TAGS_USED{1'b0}};
|
|
assign miss_ci_d[TAGS_USED:SM_MAX - 1] = {SM_MAX-TAGS_USED{1'b0}};
|
|
|
|
assign miss_flushed_l2[TAGS_USED:SM_MAX - 1] = {SM_MAX-TAGS_USED{1'b0}};
|
|
assign miss_inval_l2[TAGS_USED:SM_MAX - 1] = {SM_MAX-TAGS_USED{1'b0}};
|
|
assign miss_ci_l2[TAGS_USED:SM_MAX - 1] = {SM_MAX-TAGS_USED{1'b0}};
|
|
|
|
genvar i;
|
|
for (i = TAGS_USED; i < SM_MAX; i = i + 1)
|
|
begin : gen_sm_t1
|
|
assign miss_tid_sm_l2[i][0] = 1'b1;
|
|
assign miss_tid_sm_l2[i][1:CHECK_ECC] = {CHECK_ECC{1'b0}};
|
|
end
|
|
end
|
|
|
|
if (TAGS_USED >= SM_MAX)
|
|
begin : gen_unused_t2
|
|
assign miss_unused = 1'b0;
|
|
end
|
|
|
|
end
|
|
endgenerate
|
|
|
|
assign iu0_ifar[0] = ics_icm_iu0_t0_ifar;
|
|
assign iu0_ifar[1] = ics_icm_iu0_t0_ifar;
|
|
|
|
`ifndef THREADS1
|
|
assign iu0_ifar[2] = ics_icm_iu0_t1_ifar;
|
|
assign iu0_ifar[3] = ics_icm_iu0_t1_ifar;
|
|
`endif
|
|
|
|
//---------------------------------------------------------------------
|
|
// Latch Inputs, Reload pipeline
|
|
//---------------------------------------------------------------------
|
|
generate
|
|
begin : xhdl2
|
|
genvar i;
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin : gen_default_reld_act
|
|
assign default_reld_act_v[i] = (~miss_tid_sm_l2[i][IDLE]);
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
assign default_reld_act = |(default_reld_act_v);
|
|
assign miss_or_default_act = default_reld_act | (|(miss_act));
|
|
assign reld_r2_act = |(reld_r1_val_l2);
|
|
|
|
assign bp_config_d = spr_ic_bp_config; // ??? Do I need to latch these? How far away is spr?
|
|
assign spr_ic_cls_d = spr_ic_cls;
|
|
|
|
// d-2 (r0)
|
|
assign an_ac_reld_data_vld_d = an_ac_reld_data_vld;
|
|
assign an_ac_reld_core_tag_d = an_ac_reld_core_tag;
|
|
assign an_ac_reld_qw_d = an_ac_reld_qw;
|
|
|
|
// d-1 (r1)
|
|
// Core_tag(0:2) specifies unit (IU is '010'); Core_tag(3:4) is encoded Thread ID
|
|
assign reld_r0_vld = an_ac_reld_data_vld_l2 & (an_ac_reld_core_tag_l2[0:2] == 3'b010);
|
|
|
|
generate
|
|
begin : xhdl3
|
|
genvar i;
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin : gen_reld_tag
|
|
wire [0:1] index = i;
|
|
assign reld_r0_tag[i] = (an_ac_reld_core_tag_l2[3:4] == index);
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
assign reld_r1_val_d = {TAGS_USED{reld_r0_vld}} & reld_r0_tag;
|
|
assign reld_r1_qw_d = an_ac_reld_qw_l2;
|
|
|
|
// d (r2)
|
|
// Use reld_r1_vld as act to gate clock
|
|
assign reld_r2_val_d = reld_r1_val_l2[0:TAGS_USED - 1];
|
|
assign reld_r2_qw_d = reld_r1_qw_l2;
|
|
|
|
// d+1 (r3)
|
|
assign reld_r3_val_d = reld_r2_val_l2;
|
|
assign an_ac_reld_ecc_err_d = an_ac_reld_ecc_err;
|
|
assign an_ac_reld_ecc_err_ue_d = an_ac_reld_ecc_err_ue;
|
|
|
|
generate
|
|
begin : xhdl4
|
|
genvar i;
|
|
for (i = 0; i < `THREADS; i = i + 1)
|
|
begin : gen_reld_r3_tid
|
|
assign reld_r3_tid[i] = reld_r3_val_l2[2 * i] | reld_r3_val_l2[2 * i + 1];
|
|
assign iu2_flush[2 * i] = ics_icm_iu2_flush[i];
|
|
assign iu2_flush[2 * i + 1] = ics_icm_iu2_flush[i];
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
//---------------------------------------------------------------------
|
|
// State Machine
|
|
//---------------------------------------------------------------------
|
|
// Example State Ordering for cacheable reloads
|
|
// OLD:
|
|
// 64B Cacheline, No Gaps : (1)(3)(4)(5)(6)(11) - Wait 0, Data0, Data1, Data2, Data3, CheckECC
|
|
// 64B Cacheline, Always Gaps: (1)(3)(8)(4)(9)(5)(10)(6)(11) - Wait 0, Data0, Wait1, Data1, Wait2, Data2, Wait3, Data3, CheckECC
|
|
// 128B Cacheline, No Gaps : (1)(3)(4)(5)(12)(13)(14)(15)(6)(11) - Wait 0, Data0, Data1, Data2, Data3_128B, Data4_128B, Data5_128B, Data6_128B, Data3/7, CheckECC
|
|
// 128B Cacheline, Always Gaps: (1)(3)(8)(4)(9)(5)(16)(12)(17)(13)(18)(14)(19)(15)(10)(6)(11)
|
|
// - Wait 0, Data0, Wait1, Data1, Wait2, Data2, Wait3_128B, Data3_128B, Wait4_128B, Data4_128B, Wait5_128B, Data5_128B, Wait6_128B, Data6_128B, Data3/7, CheckECC
|
|
//
|
|
// New:
|
|
// 64B Cacheline, No Gaps : (2)(3)(3)(3)(3)(5) - Wait, Data, Data, Data, Data, CheckECC
|
|
// 64B Cacheline, Always Gaps : (2)(3)(2)(3)(2)(3)(2)(3)(5) - Wait, Data, Wait, Data, Wait, Data, Wait, Data, CheckECC
|
|
// similar pattern for 128B
|
|
//
|
|
// For now, always generating 4 tables, even if only 1 thread. Can't generate based on a generic, and don't want to include config file. Extra tables should optimize out when not needed.
|
|
//
|
|
generate
|
|
begin
|
|
genvar i;
|
|
for (i = 0; i < SM_MAX; i = i + 1)
|
|
begin : miss_sm_loop
|
|
iuq_ic_miss_table miss_sm(
|
|
.new_miss(new_miss[i]),
|
|
.miss_ci_l2(miss_ci_l2[i]),
|
|
.reld_r1_val_l2(reld_r1_val_l2[i]),
|
|
.r2_crit_qw_l2(r2_crit_qw_l2),
|
|
.ecc_err(ecc_err[i]),
|
|
.ecc_err_ue(ecc_err_ue[i]),
|
|
.addr_match(addr_match),
|
|
.iu2_flush(iu2_flush[i]),
|
|
.release_sm(release_sm),
|
|
.miss_flushed_l2(miss_flushed_l2[i]),
|
|
.miss_inval_l2(miss_inval_l2[i]),
|
|
.miss_tid_sm_l2(miss_tid_sm_l2[i]),
|
|
.last_data(last_data[i]),
|
|
.miss_tid_sm_d(miss_tid_sm_d[i]),
|
|
.reset_state(reset_state[i]),
|
|
.request_tag(request_tag[i]),
|
|
.write_dir_inval(write_dir_inval[i]),
|
|
.write_dir_val(write_dir_val[i]),
|
|
.hold_tid(hold_tid[i]),
|
|
.data_write(data_write[i]),
|
|
.dir_write(dir_write[i]),
|
|
.load_tag(load_tag[i]),
|
|
.release_sm_hold(release_sm_hold[i])
|
|
);
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
assign iu_mm_lmq_empty = &(iu_xu_icache_quiesce_int) & (~(|(cp_async_block)));
|
|
assign iu_xu_icache_quiesce = iu_xu_icache_quiesce_int;
|
|
assign iu_pc_icache_quiesce = iu_xu_icache_quiesce_int;
|
|
|
|
// SM0 is only for non-prefetches, SM1 is for prefetches, or for new IFetches if SM1 is free and SM0 is busy (e.g. sometimes after flush)
|
|
generate
|
|
begin : xhdl5
|
|
genvar i;
|
|
for (i = 0; i < `THREADS; i = i + 1)
|
|
begin : gen_new_miss
|
|
assign new_miss[2*i] = icd_icm_miss & icd_icm_tid[i] & (~icd_icm_prefetch);
|
|
assign new_miss[2*i+1] = icd_icm_miss & icd_icm_tid[i] & ((icd_icm_prefetch & (~icd_icm_wimge[1]) & (~icd_icm_wimge[3])) | (~miss_tid_sm_l2[2*i][IDLE]));
|
|
|
|
// Only active when performance enabled
|
|
assign miss_prefetch_perf_d[i] = (icd_icm_miss & icd_icm_tid[i] & miss_tid_sm_l2[2*i+1][IDLE]) ?
|
|
(icd_icm_prefetch & (~icd_icm_wimge[1]) & (~icd_icm_wimge[3])) :
|
|
miss_prefetch_perf_l2[i];
|
|
|
|
assign iu_xu_icache_quiesce_int[i] = miss_tid_sm_l2[2*i][IDLE] & miss_tid_sm_l2[2*i+1][IDLE];
|
|
assign icm_ics_prefetch_req[i] = icd_icm_miss & icd_icm_tid[i] & (~icd_icm_prefetch) & (~icd_icm_wimge[1]) & (~icd_icm_wimge[3]) & (~addr_match) & (miss_tid_sm_l2[2*i][IDLE] | miss_tid_sm_l2[2*i+1][IDLE]) & (~ics_icm_iu2_flush[i]);
|
|
assign icm_ics_prefetch_sm_idle[i] = miss_tid_sm_l2[2*i+1][IDLE];
|
|
end
|
|
|
|
//genvar i;
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin : gen_miss
|
|
// Count down from 3 (if 64B) or 7 (if 128B)
|
|
assign miss_count_d[i] = ((request_tag[i] | (miss_tid_sm_l2[i][CHECK_ECC] & ecc_err[i])) == 1'b1) ? {spr_ic_cls_l2, 2'b11} :
|
|
(miss_tid_sm_l2[i][DATA] == 1'b1) ? miss_count_l2[i] - 3'b001 :
|
|
miss_count_l2[i];
|
|
|
|
assign last_data[i] = miss_count_l2[i] == 3'b000;
|
|
assign no_data[i] = miss_count_l2[i] == {spr_ic_cls_l2, 2'b11};
|
|
|
|
assign miss_act[i] = miss_tid_sm_l2[i][IDLE] & icd_icm_any_iu2_valid & icd_icm_tid[i/2]; // Idle state and processing this thread
|
|
assign miss_addr_real_d[i] = icd_icm_addr_real; // uses miss_act
|
|
assign miss_addr_eff_d[i] = icd_icm_addr_eff; // uses miss_act
|
|
assign miss_ci_d[i] = icd_icm_wimge[1]; // uses miss_act
|
|
assign miss_endian_d[i] = icd_icm_wimge[4]; // uses miss_act
|
|
assign miss_2ucode_d[i] = icd_icm_2ucode; // uses miss_act
|
|
assign miss_2ucode_type_d[i] = icd_icm_2ucode_type; // uses miss_act
|
|
|
|
// State-related latches
|
|
assign set_flush_occurred[i] = (iu_flush[i/2] | br_iu_redirect[i/2] | bp_ic_iu4_redirect[i/2]) & (~miss_tid_sm_l2[i][IDLE]) & (~miss_tid_sm_l2[i][WAITMISS]);
|
|
assign miss_flush_occurred_d[i] = (reset_state[i] == 1'b1) ? 1'b0 : // reset when going back to idle state
|
|
(set_flush_occurred[i] == 1'b1) ? 1'b1 : // set when new flush
|
|
miss_flush_occurred_l2[i];
|
|
|
|
// Flushed before entering Data0 - don't load ICache if flushed outside range
|
|
assign flush_addr_outside_range[i] = iu0_ifar[i] != {miss_addr_eff_l2[i][46:51], miss_addr_real_l2[i][52]}; // eff address shares lower bits with real addr
|
|
|
|
assign set_flushed[i] = miss_flush_occurred_l2[i] & flush_addr_outside_range[i] & reld_r1_val_l2[i] &
|
|
((miss_tid_sm_l2[i][WAITSTATE] & no_data[i]) | miss_tid_sm_l2[i][CHECK_ECC]);
|
|
|
|
assign miss_flushed_d[i] = (reset_state[i] == 1'b1) ? 1'b0 : // reset when going back to idle state
|
|
(set_flushed[i] == 1'b1) ? 1'b1 : // set when new flush
|
|
miss_flushed_l2[i];
|
|
end
|
|
|
|
assign inval_equal = {TAGS_USED{icd_icm_iu2_inval}} & addr_equal;
|
|
|
|
//genvar i;
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin : gen_miss_inval
|
|
assign set_invalidated[i] = inval_equal[i] & (~miss_tid_sm_l2[i][IDLE]) & (~miss_tid_sm_l2[i][WAITMISS]) & (~miss_ci_l2[i]);
|
|
assign miss_inval_d[i] = (reset_state[i] == 1'b1) ? 1'b0 : // reset when going back to idle state
|
|
(set_invalidated[i] == 1'b1) ? 1'b1 : // set when new back_inv
|
|
miss_inval_l2[i];
|
|
end
|
|
|
|
//genvar i;
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin : gen_miss_block_fp
|
|
assign sent_fp[i] = (r3_loaded_l2 & (~(an_ac_reld_ecc_err_l2))) & reld_r3_val_l2[i]; // sent critical qw last cycle (unless it was blocked)
|
|
assign set_block_fp[i] = sent_fp[i] | // sent critical qw last cycle and not ecc err
|
|
(iu2_flush[i] & (~(miss_tid_sm_l2[i][IDLE] | miss_tid_sm_l2[i][WAITMISS]))) |
|
|
(icd_icm_prefetch & new_miss[i] & miss_tid_sm_l2[i][IDLE] & miss_tid_sm_d[i][WAITSTATE]);
|
|
assign miss_block_fp_d[i] = (reset_state[i] == 1'b1) ? 1'b0 : // reset when going back to idle state
|
|
(set_block_fp[i] == 1'b1) ? 1'b1 : // set when new block condition
|
|
miss_block_fp_l2[i];
|
|
end
|
|
|
|
//genvar i;
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin : gen_miss_ecc_err
|
|
assign miss_ecc_err_d[i] = ((miss_tid_sm_l2[i][WAITSTATE] & no_data[i]) == 1'b1) ? 1'b0 : // reset before starting or resending data
|
|
(new_ecc_err[i] == 1'b1) ? 1'b1 :
|
|
miss_ecc_err_l2[i];
|
|
|
|
assign miss_ecc_err_ue_d[i] = ((miss_tid_sm_l2[i][WAITSTATE] & no_data[i]) == 1'b1) ? 1'b0 : // reset before starting or resending data
|
|
(new_ecc_err_ue[i] == 1'b1) ? an_ac_reld_ecc_err_ue_l2 :
|
|
miss_ecc_err_ue_l2[i];
|
|
|
|
assign addr_equal[i] = (icd_icm_addr_real[64 - `REAL_IFAR_WIDTH:56] == miss_addr_real_l2[i][64 - `REAL_IFAR_WIDTH:56]) &
|
|
(spr_ic_cls_l2 | (icd_icm_addr_real[57] == miss_addr_real_l2[i][57]));
|
|
|
|
assign addr_match_tag[i] = (addr_equal[i] & (~miss_tid_sm_l2[i][IDLE]));
|
|
end
|
|
|
|
assign addr_match = |(addr_match_tag);
|
|
|
|
if (`THREADS == 1)
|
|
begin : gen_is_idle_t1
|
|
assign miss_thread_has_idle = miss_tid_sm_l2[0][IDLE] | miss_tid_sm_l2[1][IDLE];
|
|
end
|
|
if (`THREADS == 2)
|
|
begin : gen_is_idle_t2
|
|
assign miss_thread_has_idle = ((miss_tid_sm_l2[0][IDLE] | miss_tid_sm_l2[1][IDLE]) & icd_icm_tid[0]) |
|
|
((miss_tid_sm_l2[2][IDLE] | miss_tid_sm_l2[3][IDLE]) & icd_icm_tid[1]);
|
|
end
|
|
|
|
assign iu3_miss_match_d = (miss_thread_has_idle == 1'b1) ? addr_match : // new miss matches other reload
|
|
1'b1; //(not miss_thread_has_idle) --2nd (or 3rd) miss for thread - SM's full;
|
|
assign icm_ics_iu3_miss_match = iu3_miss_match_l2;
|
|
|
|
assign release_sm = |(release_sm_hold);
|
|
|
|
//genvar i;
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin : gen_miss_wrote
|
|
// Detect write through collision with invalidate array read
|
|
assign iu0_inval_match[i] = ics_icm_iu0_inval & (ics_icm_iu0_inval_addr[51:56] == miss_addr_real_l2[i][51:56]) &
|
|
(spr_ic_cls_l2 | (ics_icm_iu0_inval_addr[57] == miss_addr_real_l2[i][57]));
|
|
|
|
assign miss_wrote_dir_d[i] = (reset_state[i] == 1'b1) ? 1'b0 : // reset when going back to idle state
|
|
(dir_write_no_block[i] | miss_wrote_dir_l2[i]);
|
|
end
|
|
|
|
//genvar i;
|
|
for (i = 0; i < `THREADS; i = i + 1)
|
|
begin : gen_need_hold
|
|
// Hold if new miss to SM0, or if new miss and no SMs available
|
|
assign miss_need_hold_d[2*i] = (iu2_flush[2*i] == 1'b1) ? 1'b0 :
|
|
((new_miss[2*i] &
|
|
(miss_tid_sm_l2[2*i][IDLE] | ((~miss_tid_sm_l2[2*i][IDLE]) & (~miss_tid_sm_l2[2*i+1][IDLE])))) == 1'b1) ? 1'b1 :
|
|
miss_need_hold_l2[2*i];
|
|
|
|
// Hold if new miss to SM1
|
|
assign miss_need_hold_d[2*i+1] = ((iu2_flush[2*i+1] | reset_state[2*i+1]) == 1'b1) ? 1'b0 :
|
|
((new_miss[2*i] & // -- yes, I meant new_miss(2*i) - this is miss and tid and not prefetch
|
|
miss_tid_sm_l2[2*i+1][IDLE] & (~miss_tid_sm_l2[2*i][IDLE])) == 1'b1) ? 1'b1 :
|
|
miss_need_hold_l2[2 * i + 1];
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
//---------------------------------------------------------------------
|
|
// Send request
|
|
//---------------------------------------------------------------------
|
|
generate
|
|
begin : xhdl12
|
|
genvar i;
|
|
for (i = 0; i < `THREADS; i = i + 1)
|
|
begin : gen_request
|
|
assign request_d[i] = request_tag[2*i] | request_tag[2*i+1];
|
|
end
|
|
|
|
if (`THREADS == 1)
|
|
begin : gen_ctag_t1
|
|
assign req_ctag_d[0] = 1'b0;
|
|
end
|
|
if (`THREADS == 2)
|
|
begin : gen_ctag_t2
|
|
assign req_ctag_d[0] = icd_icm_tid[1];
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
assign req_ctag_d[1] = new_miss[1] | new_miss[TAGS_USED - 1]; // prefetch or extra IFetch
|
|
|
|
assign req_ra_d = icd_icm_addr_real[64 - `REAL_IFAR_WIDTH:59];
|
|
assign req_wimge_d = icd_icm_wimge;
|
|
assign req_userdef_d = icd_icm_userdef;
|
|
|
|
assign iu_lq_request = request_l2;
|
|
assign iu_lq_ctag = req_ctag_l2;
|
|
assign iu_lq_ra = req_ra_l2;
|
|
assign iu_lq_wimge = req_wimge_l2;
|
|
assign iu_lq_userdef = req_userdef_l2;
|
|
|
|
//---------------------------------------------------------------------
|
|
// address muxing
|
|
//---------------------------------------------------------------------
|
|
|
|
// always @(reld_r0_tag or reld_r1_val_l2 or reld_r2_val_l2 or lru_write_l2 or reld_r3_val_l2 or row_match or miss_addr_eff_l2 or miss_addr_real_l2 or miss_way_l2)
|
|
always @(*)
|
|
begin: addr_mux_proc
|
|
reg [50:59] r0_addr_calc;
|
|
reg [51:57] lru_addr_calc;
|
|
reg [62-`EFF_IFAR_WIDTH:61] load_addr_calc;
|
|
reg [64-`REAL_IFAR_WIDTH:57] reload_addr_calc;
|
|
reg [0:3] reload_way_calc;
|
|
reg [51:57] lru_write_addr_calc;
|
|
reg [0:3] lru_write_way_calc;
|
|
reg [51:57] r3_addr_calc;
|
|
reg [0:3] r3_way_calc;
|
|
reg [0:3] row_match_way_calc;
|
|
//(* analysis_not_referenced="true" *)
|
|
integer i;
|
|
r0_addr_calc = 10'b0;
|
|
lru_addr_calc = 7'b0;
|
|
load_addr_calc = {`EFF_IFAR_WIDTH{1'b0}};
|
|
reload_addr_calc = {`REAL_IFAR_WIDTH-6{1'b0}};
|
|
reload_way_calc = 4'b0;
|
|
lru_write_addr_calc = 7'b0;
|
|
lru_write_way_calc = 4'b0;
|
|
r3_addr_calc = 7'b0;
|
|
r3_way_calc = 4'b0;
|
|
row_match_way_calc = 4'b0;
|
|
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin
|
|
r0_addr_calc = r0_addr_calc |
|
|
{10{reld_r0_tag[i]}} & {miss_addr_eff_l2[i][50:51], miss_addr_real_l2[i][52:59]};
|
|
lru_addr_calc = lru_addr_calc |
|
|
{7{reld_r1_val_l2[i]}} & miss_addr_real_l2[i][51:57];
|
|
load_addr_calc = load_addr_calc |
|
|
{`EFF_IFAR_WIDTH{reld_r2_val_l2[i]}} & {miss_addr_eff_l2[i], miss_addr_real_l2[i][52:61]};
|
|
reload_addr_calc = reload_addr_calc |
|
|
{`REAL_IFAR_WIDTH-6{reld_r2_val_l2[i]}} & miss_addr_real_l2[i][64 - `REAL_IFAR_WIDTH:57];
|
|
reload_way_calc = reload_way_calc |
|
|
{4{reld_r2_val_l2[i]}} & miss_way_l2[i];
|
|
lru_write_addr_calc = lru_write_addr_calc |
|
|
{7{lru_write_l2[i]}} & miss_addr_real_l2[i][51:57];
|
|
lru_write_way_calc = lru_write_way_calc |
|
|
{4{lru_write_l2[i]}} & miss_way_l2[i];
|
|
r3_addr_calc = r3_addr_calc |
|
|
{7{reld_r3_val_l2[i]}} & miss_addr_real_l2[i][51:57];
|
|
r3_way_calc = r3_way_calc |
|
|
{4{reld_r3_val_l2[i]}} & miss_way_l2[i];
|
|
row_match_way_calc = row_match_way_calc |
|
|
{4{row_match[i]}} & miss_way_l2[i];
|
|
end
|
|
r0_addr = r0_addr_calc;
|
|
lru_addr = lru_addr_calc;
|
|
load_addr = load_addr_calc;
|
|
reload_addr = reload_addr_calc;
|
|
reload_way = reload_way_calc;
|
|
lru_write_addr = lru_write_addr_calc;
|
|
lru_write_way = lru_write_way_calc;
|
|
r3_addr = r3_addr_calc;
|
|
r3_way = r3_way_calc;
|
|
row_match_way = row_match_way_calc;
|
|
end
|
|
|
|
//---------------------------------------------------------------------
|
|
// fastpath-related signals
|
|
//---------------------------------------------------------------------
|
|
// for first beat of data: create hole in IU0 so we can fastpath data into IU2
|
|
assign preload_r0_tag = r0_crit_qw & reld_r0_tag & (~miss_block_fp_l2) & (~miss_flushed_l2[0:TAGS_USED - 1]);
|
|
|
|
generate
|
|
begin : xhdl13
|
|
genvar i;
|
|
for (i = 0; i < `THREADS; i = i + 1)
|
|
begin : gen_preload_r0_tid
|
|
assign preload_r0_tid[i] = preload_r0_tag[2*i] | preload_r0_tag[2*i+1];
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
assign preload_hold_iu0 = {`THREADS{reld_r0_vld}} & preload_r0_tid;
|
|
|
|
assign icm_ics_iu0_preload_val = preload_hold_iu0;
|
|
assign icm_ics_iu0_preload_ifar = r0_addr;
|
|
|
|
assign load_2ucode = |(reld_r2_val_l2 & miss_2ucode_l2);
|
|
assign load_2ucode_type = |(reld_r2_val_l2 & miss_2ucode_type_l2);
|
|
assign load_tag_no_block = load_tag[0:TAGS_USED - 1] & (~miss_block_fp_l2[0:TAGS_USED - 1]);
|
|
|
|
generate
|
|
begin : xhdl14
|
|
genvar i;
|
|
for (i = 0; i < `THREADS; i = i + 1)
|
|
begin : gen_load_tid
|
|
assign load_tid_no_block[i] = load_tag_no_block[2*i] | load_tag_no_block[2*i+1];
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
assign icm_icd_load = load_tid_no_block;
|
|
assign icm_icd_load_addr = load_addr;
|
|
assign icm_icd_load_2ucode = load_2ucode;
|
|
assign icm_icd_load_2ucode_type = load_2ucode_type;
|
|
|
|
assign r3_loaded_d = |(load_tid_no_block);
|
|
|
|
//---------------------------------------------------------------------
|
|
// Critical Quadword
|
|
//---------------------------------------------------------------------
|
|
// Note: Could latch reld_crit_qw signal from L2, but we need addr (60:61), so might as well keep whole address
|
|
generate
|
|
begin : xhdl15
|
|
genvar i;
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin : gen_crit_qw
|
|
assign r0_crit_qw[i] = an_ac_reld_qw_l2[58:59] == miss_addr_real_l2[i][58:59];
|
|
assign r1_crit_qw[i] = reld_r1_qw_l2 == miss_addr_real_l2[i][58:59];
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
assign r2_crit_qw_d = |(r1_crit_qw & reld_r1_val_l2[0:TAGS_USED - 1]);
|
|
|
|
//---------------------------------------------------------------------
|
|
// Get LRU
|
|
//---------------------------------------------------------------------
|
|
// ??? Might have to read in r0
|
|
|
|
assign lru_write_hit = |(lru_write) & (lru_addr[51:56] == lru_write_addr[51:56]) &
|
|
(spr_ic_cls_l2 | (lru_addr[57] == lru_write_addr[57]));
|
|
|
|
assign hit_lru = ({3{lru_write_way[0]}} & {2'b11, icd_icm_row_lru[2]}) |
|
|
({3{lru_write_way[1]}} & {2'b10, icd_icm_row_lru[2]}) |
|
|
({3{lru_write_way[2]}} & {1'b0, icd_icm_row_lru[1], 1'b1}) |
|
|
({3{lru_write_way[3]}} & {1'b0, icd_icm_row_lru[1], 1'b0});
|
|
|
|
assign row_lru = (lru_write_hit == 1'b0) ? icd_icm_row_lru :
|
|
hit_lru;
|
|
|
|
generate
|
|
begin : xhdl16
|
|
genvar i;
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin : gen_lru
|
|
// Select_lru in r1
|
|
assign select_lru[i] = (~miss_ci_l2[i]) & reld_r1_val_l2[i] & (miss_tid_sm_l2[i][WAITSTATE] & no_data[i]) & (~miss_flushed_l2[i]) & (~miss_inval_l2[i]);
|
|
|
|
// lru/way is valid in Data0-3, Wait1-3, CheckECC
|
|
// lru_valid(<a>) <= (miss_tid<a>_sm_l2(3) or miss_tid<a>_sm_l2(4) or miss_tid<a>_sm_l2(5) or miss_tid<a>_sm_l2(6) or
|
|
// miss_tid<a>_sm_l2(8) or miss_tid<a>_sm_l2(9) or miss_tid<a>_sm_l2(10) or miss_tid<a>_sm_l2(11) ) and not miss_flushed<a>_l2 and not miss_inval<a>_l2;
|
|
assign lru_valid[i] = (~(miss_tid_sm_l2[i][IDLE] | miss_tid_sm_l2[i][WAITMISS] | (miss_tid_sm_l2[i][WAITSTATE] & no_data[i]) | miss_flushed_l2[i] | miss_inval_l2[i] | miss_ci_l2[i]));
|
|
|
|
// check if any other thread is writing into this spot in the cache
|
|
assign row_match[i] = lru_valid[i] & (lru_addr[51:56] == miss_addr_real_l2[i][51:56]) & (spr_ic_cls_l2 | (lru_addr[57] == miss_addr_real_l2[i][57]));
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
assign val_or_match = icd_icm_row_val | row_match_way;
|
|
|
|
// Old: Use if can never hit more than one entry, since only two reloads are in data mode at a time
|
|
//?TABLE select_lru_way LISTING(final) OPTIMIZE PARMS(ON-SET, OFF-SET);
|
|
//*INPUTS*=================*OUTPUTS*======*
|
|
//| | |
|
|
//| row_match_way | |
|
|
//| | row_lru | |
|
|
//| | | | next_lru_way |
|
|
//| | | | | |
|
|
//| | | | | |
|
|
//| 0123 012 | 0123 |
|
|
//*TYPE*===================+==============+
|
|
//| PPPP PPP | PPPP |
|
|
//*TERMS*==================+==============+
|
|
//| 0--- 00- | 1000 |
|
|
//| 1--- 000 | 0010 |
|
|
//| 1--- 001 | 0001 |
|
|
//| | |
|
|
//| -0-- 01- | 0100 |
|
|
//| -1-- 010 | 0010 |
|
|
//| -1-- 011 | 0001 |
|
|
//| | |
|
|
//| --0- 1-0 | 0010 |
|
|
//| --1- 100 | 1000 |
|
|
//| --1- 110 | 0100 |
|
|
//| | |
|
|
//| ---0 1-1 | 0001 |
|
|
//| ---1 101 | 1000 |
|
|
//| ---1 111 | 0100 |
|
|
//*END*====================+==============+
|
|
//?TABLE END select_lru_way;
|
|
|
|
// Could have all 4 tags going to same row
|
|
/*
|
|
//table_start
|
|
?TABLE select_lru_way LISTING(final) OPTIMIZE PARMS(ON-SET, OFF-SET);
|
|
*INPUTS*=================*OUTPUTS*======*
|
|
| | |
|
|
| row_lru | |
|
|
| | row_match_way | |
|
|
| | | | next_lru_way |
|
|
| | | | | |
|
|
| | | | | |
|
|
| 012 0123 | 0123 |
|
|
*TYPE*===================+==============+
|
|
| PPP PPPP | PPPP |
|
|
*TERMS*==================+==============+
|
|
| 00- 0--- | 1000 |
|
|
| 000 1-0- | 0010 |
|
|
| 000 101- | 0100 |
|
|
| 000 111- | 0001 |
|
|
| 001 1--0 | 0001 |
|
|
| 001 10-1 | 0100 |
|
|
| 001 11-1 | 0010 |
|
|
| | |
|
|
| 01- -0-- | 0100 |
|
|
| 010 -10- | 0010 |
|
|
| 010 011- | 1000 |
|
|
| 010 111- | 0001 |
|
|
| 011 -1-0 | 0001 |
|
|
| 011 01-1 | 1000 |
|
|
| 011 11-1 | 0010 |
|
|
| | |
|
|
| 1-0 --0- | 0010 |
|
|
| 100 0-1- | 1000 |
|
|
| 100 1-10 | 0001 |
|
|
| 100 1-11 | 0100 |
|
|
| 110 -01- | 0100 |
|
|
| 110 -110 | 0001 |
|
|
| 110 -111 | 1000 |
|
|
| | |
|
|
| 1-1 ---0 | 0001 |
|
|
| 101 0--1 | 1000 |
|
|
| 101 1-01 | 0010 |
|
|
| 101 1-11 | 0100 |
|
|
| 111 -0-1 | 0100 |
|
|
| 111 -101 | 0010 |
|
|
| 111 -111 | 1000 |
|
|
*END*====================+==============+
|
|
?TABLE END select_lru_way;
|
|
//table_end
|
|
*/
|
|
|
|
//assign_start
|
|
|
|
assign select_lru_way_pt[1] =
|
|
(({ row_lru[0] , row_lru[2] ,
|
|
row_match_way[0] , row_match_way[1] ,
|
|
row_match_way[3] }) === 5'b01011);
|
|
assign select_lru_way_pt[2] =
|
|
(({ row_lru[0] , row_lru[2] ,
|
|
row_match_way[0] , row_match_way[1] ,
|
|
row_match_way[2] }) === 5'b00011);
|
|
assign select_lru_way_pt[3] =
|
|
(({ row_lru[0] , row_lru[2] ,
|
|
row_match_way[0] , row_match_way[1] ,
|
|
row_match_way[3] }) === 5'b01101);
|
|
assign select_lru_way_pt[4] =
|
|
(({ row_lru[0] , row_lru[2] ,
|
|
row_match_way[0] , row_match_way[1] ,
|
|
row_match_way[2] }) === 5'b00101);
|
|
assign select_lru_way_pt[5] =
|
|
(({ row_lru[0] , row_lru[1] ,
|
|
row_match_way[1] , row_match_way[2] ,
|
|
row_match_way[3] }) === 5'b11101);
|
|
assign select_lru_way_pt[6] =
|
|
(({ row_lru[0] , row_lru[1] ,
|
|
row_match_way[0] , row_match_way[2] ,
|
|
row_match_way[3] }) === 5'b10101);
|
|
assign select_lru_way_pt[7] =
|
|
(({ row_lru[0] , row_lru[1] ,
|
|
row_match_way[1] , row_match_way[2] ,
|
|
row_match_way[3] }) === 5'b11110);
|
|
assign select_lru_way_pt[8] =
|
|
(({ row_lru[0] , row_lru[1] ,
|
|
row_match_way[0] , row_match_way[2] ,
|
|
row_match_way[3] }) === 5'b10110);
|
|
assign select_lru_way_pt[9] =
|
|
(({ row_lru[0] , row_lru[1] ,
|
|
row_match_way[1] , row_match_way[2] ,
|
|
row_match_way[3] }) === 5'b11111);
|
|
assign select_lru_way_pt[10] =
|
|
(({ row_lru[0] , row_lru[1] ,
|
|
row_match_way[0] , row_match_way[2] ,
|
|
row_match_way[3] }) === 5'b10111);
|
|
assign select_lru_way_pt[11] =
|
|
(({ row_lru[0] , row_lru[2] ,
|
|
row_match_way[0] , row_match_way[1] ,
|
|
row_match_way[3] }) === 5'b01111);
|
|
assign select_lru_way_pt[12] =
|
|
(({ row_lru[0] , row_lru[2] ,
|
|
row_match_way[0] , row_match_way[1] ,
|
|
row_match_way[2] }) === 5'b00111);
|
|
assign select_lru_way_pt[13] =
|
|
(({ row_lru[1] , row_lru[2] ,
|
|
row_match_way[0] , row_match_way[3]
|
|
}) === 4'b0101);
|
|
assign select_lru_way_pt[14] =
|
|
(({ row_lru[1] , row_lru[2] ,
|
|
row_match_way[0] , row_match_way[2]
|
|
}) === 4'b0001);
|
|
assign select_lru_way_pt[15] =
|
|
(({ row_lru[1] , row_lru[2] ,
|
|
row_match_way[1] , row_match_way[3]
|
|
}) === 4'b1101);
|
|
assign select_lru_way_pt[16] =
|
|
(({ row_lru[1] , row_lru[2] ,
|
|
row_match_way[1] , row_match_way[2]
|
|
}) === 4'b1001);
|
|
assign select_lru_way_pt[17] =
|
|
(({ row_lru[1] , row_lru[2] ,
|
|
row_match_way[1] , row_match_way[2]
|
|
}) === 4'b1010);
|
|
assign select_lru_way_pt[18] =
|
|
(({ row_lru[1] , row_lru[2] ,
|
|
row_match_way[0] , row_match_way[2]
|
|
}) === 4'b0010);
|
|
assign select_lru_way_pt[19] =
|
|
(({ row_lru[1] , row_lru[2] ,
|
|
row_match_way[1] , row_match_way[3]
|
|
}) === 4'b1110);
|
|
assign select_lru_way_pt[20] =
|
|
(({ row_lru[1] , row_lru[2] ,
|
|
row_match_way[0] , row_match_way[3]
|
|
}) === 4'b0110);
|
|
assign select_lru_way_pt[21] =
|
|
(({ row_lru[0] , row_lru[1] ,
|
|
row_match_way[0] }) === 3'b000);
|
|
assign select_lru_way_pt[22] =
|
|
(({ row_lru[0] , row_lru[1] ,
|
|
row_match_way[1] }) === 3'b010);
|
|
assign select_lru_way_pt[23] =
|
|
(({ row_lru[0] , row_lru[2] ,
|
|
row_match_way[2] }) === 3'b100);
|
|
assign select_lru_way_pt[24] =
|
|
(({ row_lru[0] , row_lru[2] ,
|
|
row_match_way[3] }) === 3'b110);
|
|
assign next_lru_way[0] =
|
|
(select_lru_way_pt[1] | select_lru_way_pt[2]
|
|
| select_lru_way_pt[9] | select_lru_way_pt[13]
|
|
| select_lru_way_pt[14] | select_lru_way_pt[21]
|
|
);
|
|
assign next_lru_way[1] =
|
|
(select_lru_way_pt[3] | select_lru_way_pt[4]
|
|
| select_lru_way_pt[10] | select_lru_way_pt[15]
|
|
| select_lru_way_pt[16] | select_lru_way_pt[22]
|
|
);
|
|
assign next_lru_way[2] =
|
|
(select_lru_way_pt[5] | select_lru_way_pt[6]
|
|
| select_lru_way_pt[11] | select_lru_way_pt[17]
|
|
| select_lru_way_pt[18] | select_lru_way_pt[23]
|
|
);
|
|
assign next_lru_way[3] =
|
|
(select_lru_way_pt[7] | select_lru_way_pt[8]
|
|
| select_lru_way_pt[12] | select_lru_way_pt[19]
|
|
| select_lru_way_pt[20] | select_lru_way_pt[24]
|
|
);
|
|
|
|
//assign_end
|
|
|
|
assign next_way = (val_or_match[0] == 1'b0) ? 4'b1000 :
|
|
(val_or_match[1] == 1'b0) ? 4'b0100 :
|
|
(val_or_match[2] == 1'b0) ? 4'b0010 :
|
|
(val_or_match[3] == 1'b0) ? 4'b0001 :
|
|
next_lru_way;
|
|
|
|
generate
|
|
begin : xhdl17
|
|
genvar i;
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin : gen_miss_way
|
|
assign miss_way_d[i] = (select_lru[i] == 1'b1) ? next_way :
|
|
miss_way_l2[i];
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
//---------------------------------------------------------------------
|
|
// setting output signals
|
|
//---------------------------------------------------------------------
|
|
generate
|
|
begin : xhdl18
|
|
genvar i;
|
|
for (i = 0; i < `THREADS ; i = i + 1)
|
|
begin : gen_hold_thread
|
|
assign icm_ics_hold_thread[i] = ((hold_tid[2*i] | ecc_block_iu0[2*i]) & miss_need_hold_l2[2*i]) | ((hold_tid[2*i+1] | ecc_block_iu0[2*i+1]) & miss_need_hold_l2[2*i+1]);
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
// Note: If data_write timing is bad, can switch back to using hold_all_tids, but use reld_r2
|
|
// Hold iu0 when writing into Data this cycle or fastpath 2 cycles from now.
|
|
// For reld in Wait0, not checking flush for timing reasons.
|
|
assign hold_iu0 = |(data_write) | (|(preload_hold_iu0));
|
|
|
|
assign icm_ics_hold_iu0 = hold_iu0;
|
|
assign icm_icd_lru_addr = lru_addr;
|
|
assign icm_icd_data_write = |(data_write);
|
|
assign icm_icd_dir_inval = |(write_dir_inval);
|
|
|
|
// ??? Move inval_equal for timing?
|
|
assign icm_icd_dir_val = | (write_dir_val[0:TAGS_USED - 1] & miss_wrote_dir_l2 & (~inval_equal));
|
|
|
|
assign icm_icd_reload_addr = {reload_addr[51:57], reld_r2_qw_l2};
|
|
assign icm_icd_reload_way = reload_way;
|
|
|
|
// Check which endian
|
|
assign reload_endian = | (reld_r2_val_l2 & miss_endian_l2);
|
|
assign reld_r1_endian = | (reld_r1_val_l2[0:TAGS_USED - 1] & miss_endian_l2);
|
|
|
|
assign swap_endian_data =
|
|
{an_ac_reld_data[24:31], an_ac_reld_data[16:23], an_ac_reld_data[8:15], an_ac_reld_data[0:7],
|
|
an_ac_reld_data[56:63], an_ac_reld_data[48:55], an_ac_reld_data[40:47], an_ac_reld_data[32:39],
|
|
an_ac_reld_data[88:95], an_ac_reld_data[80:87], an_ac_reld_data[72:79], an_ac_reld_data[64:71],
|
|
an_ac_reld_data[120:127], an_ac_reld_data[112:119], an_ac_reld_data[104:111], an_ac_reld_data[96:103]};
|
|
|
|
assign reld_data_d = (reld_r1_endian == 1'b0) ? an_ac_reld_data :
|
|
swap_endian_data;
|
|
|
|
// Branch Decode
|
|
iuq_bd br_decode0(
|
|
.instruction(reld_data_l2[0:31]),
|
|
.instruction_next(reld_data_l2[32:63]),
|
|
.branch_decode(branch_decode0[0:3]),
|
|
.bp_bc_en(bp_config_l2[0]),
|
|
.bp_bclr_en(bp_config_l2[1]),
|
|
.bp_bcctr_en(bp_config_l2[2]),
|
|
.bp_sw_en(bp_config_l2[3])
|
|
);
|
|
|
|
iuq_bd br_decode1(
|
|
.instruction(reld_data_l2[32:63]),
|
|
.instruction_next(reld_data_l2[64:95]),
|
|
.branch_decode(branch_decode1[0:3]),
|
|
.bp_bc_en(bp_config_l2[0]),
|
|
.bp_bclr_en(bp_config_l2[1]),
|
|
.bp_bcctr_en(bp_config_l2[2]),
|
|
.bp_sw_en(bp_config_l2[3])
|
|
);
|
|
|
|
iuq_bd br_decode2(
|
|
.instruction(reld_data_l2[64:95]),
|
|
.instruction_next(reld_data_l2[96:127]),
|
|
.branch_decode(branch_decode2[0:3]),
|
|
.bp_bc_en(bp_config_l2[0]),
|
|
.bp_bclr_en(bp_config_l2[1]),
|
|
.bp_bcctr_en(bp_config_l2[2]),
|
|
.bp_sw_en(bp_config_l2[3])
|
|
);
|
|
|
|
iuq_bd br_decode3(
|
|
.instruction(reld_data_l2[96:127]),
|
|
.instruction_next(tidn32[0:31]),
|
|
.branch_decode(branch_decode3[0:3]),
|
|
.bp_bc_en(bp_config_l2[0]),
|
|
.bp_bclr_en(bp_config_l2[1]),
|
|
.bp_bcctr_en(bp_config_l2[2]),
|
|
.bp_sw_en(bp_config_l2[3])
|
|
);
|
|
|
|
assign instr_data = {reld_data_l2[0:31], branch_decode0[0:3],
|
|
reld_data_l2[32:63], branch_decode1[0:3],
|
|
reld_data_l2[64:95], branch_decode2[0:3],
|
|
reld_data_l2[96:127], branch_decode3[0:3]};
|
|
|
|
assign icm_icd_reload_data = instr_data;
|
|
|
|
// Dir Write moved to r2
|
|
assign dir_write_no_block = dir_write[0:TAGS_USED - 1] & (~iu0_inval_match);
|
|
assign icm_icd_dir_write = |(dir_write_no_block);
|
|
assign icm_icd_dir_write_addr = reload_addr;
|
|
assign icm_icd_dir_write_endian = reload_endian;
|
|
assign icm_icd_dir_write_way = reload_way;
|
|
|
|
// LRU Write: Occurs 2 cycles after Data 2 data_write (64B mode) or Data6 (128B mode)
|
|
generate
|
|
begin : xhdl19
|
|
genvar i;
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin : gen_lru_write
|
|
assign lru_write_next_cycle_d[i] = data_write[i] & (miss_tid_sm_l2[i][DATA] & (miss_count_l2[i] == 3'b001));
|
|
assign lru_write[i] = lru_write_l2[i] & (~miss_inval_l2[i]);
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
assign lru_write_d = lru_write_next_cycle_l2;
|
|
|
|
assign icm_icd_lru_write = |(lru_write);
|
|
assign icm_icd_lru_write_addr = lru_write_addr;
|
|
assign icm_icd_lru_write_way = lru_write_way;
|
|
|
|
// For act's in idir
|
|
assign icm_icd_any_reld_r2 = |(reld_r2_val_l2);
|
|
|
|
//---------------------------------------------------------------------
|
|
// ECC Error handling
|
|
//---------------------------------------------------------------------
|
|
assign new_ecc_err = {TAGS_USED{an_ac_reld_ecc_err_l2}} & reld_r3_val_l2;
|
|
assign new_ecc_err_ue = {TAGS_USED{an_ac_reld_ecc_err_ue_l2}} & reld_r3_val_l2;
|
|
assign ecc_err[0:TAGS_USED - 1] = new_ecc_err | miss_ecc_err_l2;
|
|
assign ecc_err_ue[0:TAGS_USED - 1] = new_ecc_err_ue | miss_ecc_err_ue_l2;
|
|
|
|
generate
|
|
begin : xhdl20
|
|
genvar i;
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin : gen_ecc_inval
|
|
assign ecc_inval[i] = (an_ac_reld_ecc_err_l2 | an_ac_reld_ecc_err_ue_l2 | inval_equal[i]) &
|
|
miss_tid_sm_l2[i][CHECK_ECC] & (~miss_ci_l2[i]) & (~miss_flushed_l2[i]) & (~miss_inval_l2[i]);
|
|
assign ecc_block_iu0[i] = ecc_err[i] & (miss_tid_sm_l2[i][CHECK_ECC] | (miss_tid_sm_l2[i][DATA] & last_data[i])); // moved last data check here from hold_tid for timing; check need_hold in hold_thread logic
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
// CheckECC stage
|
|
// Non-CI: If last beat of data has bad ECC, invalidate cache & flush IU1
|
|
|
|
// Back inval in Check ECC state
|
|
assign icm_icd_ecc_inval = |(ecc_inval); //or back_inval_check_ecc;
|
|
|
|
assign icm_icd_ecc_addr = r3_addr[51:57];
|
|
assign icm_icd_ecc_way = r3_way;
|
|
|
|
// CI/Critical QW: Invalidate IU3 or set error bit
|
|
assign ecc_fp = r3_loaded_l2 & an_ac_reld_ecc_err_l2;
|
|
assign icm_icd_iu3_ecc_fp_cancel = ecc_fp;
|
|
assign icm_ics_iu3_ecc_fp_cancel = {`THREADS{ecc_fp}} & reld_r3_tid;
|
|
assign ic_bp_iu3_ecc_err = r3_loaded_l2 & an_ac_reld_ecc_err_ue_l2;
|
|
|
|
//---------------------------------------------------------------------
|
|
// Performance Events
|
|
//---------------------------------------------------------------------
|
|
generate
|
|
begin : xhdl11
|
|
genvar i;
|
|
for (i = 0; i < SM_MAX; i = i + 1)
|
|
begin : g11
|
|
// - not CI, not Idle, not WaitMiss, & not (CheckECC & done)
|
|
assign active_l1_miss[i] = ~miss_ci_l2[i] & ~miss_tid_sm_l2[i][IDLE] & ~miss_tid_sm_l2[i][WAITMISS] & ~(miss_tid_sm_l2[i][CHECK_ECC] & ~ecc_err[i]);
|
|
end
|
|
|
|
genvar t;
|
|
for (t = 0; t < `THREADS; t = t + 1)
|
|
begin : gen_perf
|
|
// IL1 Miss Cycles
|
|
// - not CI, not Idle, not WaitMiss, & not (CheckECC & done)
|
|
// - event mode/edge should not count multiple times if flushed and recycled
|
|
assign perf_event_d[t][0] = active_l1_miss[2*t] | (active_l1_miss[2*t+1] & ~miss_prefetch_perf_l2[t]);
|
|
|
|
// IL1 Reload Dropped
|
|
// - not CI, flushed, & returning to Idle; includes prefetches
|
|
assign perf_event_d[t][1] =
|
|
(~miss_ci_l2[2*t] & miss_flushed_l2[2*t] & (miss_tid_sm_l2[2*t][CHECK_ECC] & ~ecc_err[2*t])) |
|
|
(~miss_ci_l2[2*t+1] & miss_flushed_l2[2*t+1] & (miss_tid_sm_l2[2*t+1][CHECK_ECC] & ~ecc_err[2*t+1]));
|
|
|
|
// Prefetch cycles
|
|
assign perf_event_d[t][2] = active_l1_miss[2*t+1] & miss_prefetch_perf_l2[t];
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
assign ic_perf_t0_event = perf_event_l2[0];
|
|
`ifndef THREADS1
|
|
assign ic_perf_t1_event = perf_event_l2[1];
|
|
`endif
|
|
|
|
//---------------------------------------------------------------------
|
|
// Latches
|
|
//---------------------------------------------------------------------
|
|
|
|
tri_rlmlatch_p #(.INIT(0)) spr_ic_cls_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[spr_ic_cls_offset]),
|
|
.scout(sov[spr_ic_cls_offset]),
|
|
.din(spr_ic_cls_d),
|
|
.dout(spr_ic_cls_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(4), .INIT(0)) bp_config_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[bp_config_offset:bp_config_offset + 4 - 1]),
|
|
.scout(sov[bp_config_offset:bp_config_offset + 4 - 1]),
|
|
.din(bp_config_d),
|
|
.dout(bp_config_l2)
|
|
);
|
|
|
|
tri_rlmlatch_p #(.INIT(0)) an_ac_reld_data_vld_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[an_ac_reld_data_vld_offset]),
|
|
.scout(sov[an_ac_reld_data_vld_offset]),
|
|
.din(an_ac_reld_data_vld_d),
|
|
.dout(an_ac_reld_data_vld_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(5), .INIT(0)) an_ac_reld_core_tag_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[an_ac_reld_core_tag_offset:an_ac_reld_core_tag_offset + 5 - 1]),
|
|
.scout(sov[an_ac_reld_core_tag_offset:an_ac_reld_core_tag_offset + 5 - 1]),
|
|
.din(an_ac_reld_core_tag_d),
|
|
.dout(an_ac_reld_core_tag_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0)) an_ac_reld_qw_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[an_ac_reld_qw_offset:an_ac_reld_qw_offset + 2 - 1]),
|
|
.scout(sov[an_ac_reld_qw_offset:an_ac_reld_qw_offset + 2 - 1]),
|
|
.din(an_ac_reld_qw_d),
|
|
.dout(an_ac_reld_qw_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(TAGS_USED), .INIT(0)) reld_r1_val_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[reld_r1_val_offset:reld_r1_val_offset + TAGS_USED - 1]),
|
|
.scout(sov[reld_r1_val_offset:reld_r1_val_offset + TAGS_USED - 1]),
|
|
.din(reld_r1_val_d),
|
|
.dout(reld_r1_val_l2[0:TAGS_USED - 1])
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0)) reld_r1_qw_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[reld_r1_qw_offset:reld_r1_qw_offset + 2 - 1]),
|
|
.scout(sov[reld_r1_qw_offset:reld_r1_qw_offset + 2 - 1]),
|
|
.din(reld_r1_qw_d),
|
|
.dout(reld_r1_qw_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(128), .INIT(0)) reld_data_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(reld_r2_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[reld_data_offset:reld_data_offset + 128 - 1]),
|
|
.scout(sov[reld_data_offset:reld_data_offset + 128 - 1]),
|
|
.din(reld_data_d),
|
|
.dout(reld_data_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(TAGS_USED), .INIT(0)) reld_r2_val_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[reld_r2_val_offset:reld_r2_val_offset + TAGS_USED - 1]),
|
|
.scout(sov[reld_r2_val_offset:reld_r2_val_offset + TAGS_USED - 1]),
|
|
.din(reld_r2_val_d),
|
|
.dout(reld_r2_val_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0)) reld_r2_qw_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(reld_r2_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[reld_r2_qw_offset:reld_r2_qw_offset + 2 - 1]),
|
|
.scout(sov[reld_r2_qw_offset:reld_r2_qw_offset + 2 - 1]),
|
|
.din(reld_r2_qw_d),
|
|
.dout(reld_r2_qw_l2)
|
|
);
|
|
|
|
tri_rlmlatch_p #(.INIT(0)) r2_crit_qw_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[r2_crit_qw_offset]),
|
|
.scout(sov[r2_crit_qw_offset]),
|
|
.din(r2_crit_qw_d),
|
|
.dout(r2_crit_qw_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(TAGS_USED), .INIT(0)) reld_r3_val_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[reld_r3_val_offset:reld_r3_val_offset + TAGS_USED - 1]),
|
|
.scout(sov[reld_r3_val_offset:reld_r3_val_offset + TAGS_USED - 1]),
|
|
.din(reld_r3_val_d),
|
|
.dout(reld_r3_val_l2)
|
|
);
|
|
|
|
tri_rlmlatch_p #(.INIT(0)) r3_loaded_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[r3_loaded_offset]),
|
|
.scout(sov[r3_loaded_offset]),
|
|
.din(r3_loaded_d),
|
|
.dout(r3_loaded_l2)
|
|
);
|
|
|
|
tri_rlmlatch_p #(.INIT(0)) an_ac_reld_ecc_err_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[an_ac_reld_ecc_err_offset]),
|
|
.scout(sov[an_ac_reld_ecc_err_offset]),
|
|
.din(an_ac_reld_ecc_err_d),
|
|
.dout(an_ac_reld_ecc_err_l2)
|
|
);
|
|
|
|
tri_rlmlatch_p #(.INIT(0)) an_ac_reld_ecc_err_ue_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[an_ac_reld_ecc_err_ue_offset]),
|
|
.scout(sov[an_ac_reld_ecc_err_ue_offset]),
|
|
.din(an_ac_reld_ecc_err_ue_d),
|
|
.dout(an_ac_reld_ecc_err_ue_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0)) request_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(miss_or_default_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[request_offset:request_offset + `THREADS - 1]),
|
|
.scout(sov[request_offset:request_offset + `THREADS - 1]),
|
|
.din(request_d),
|
|
.dout(request_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0)) req_ctag_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(icd_icm_any_iu2_valid),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[req_ctag_offset:req_ctag_offset + 2 - 1]),
|
|
.scout(sov[req_ctag_offset:req_ctag_offset + 2 - 1]),
|
|
.din(req_ctag_d),
|
|
.dout(req_ctag_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(`REAL_IFAR_WIDTH-4), .INIT(0)) req_ra_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(icd_icm_any_iu2_valid),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[req_ra_offset:req_ra_offset + (`REAL_IFAR_WIDTH-4) - 1]),
|
|
.scout(sov[req_ra_offset:req_ra_offset + (`REAL_IFAR_WIDTH-4) - 1]),
|
|
.din(req_ra_d),
|
|
.dout(req_ra_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(5), .INIT(0)) req_wimge_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(icd_icm_any_iu2_valid),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[req_wimge_offset:req_wimge_offset + 5 - 1]),
|
|
.scout(sov[req_wimge_offset:req_wimge_offset + 5 - 1]),
|
|
.din(req_wimge_d),
|
|
.dout(req_wimge_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(4), .INIT(0)) req_userdef_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(icd_icm_any_iu2_valid),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[req_userdef_offset:req_userdef_offset + 4 - 1]),
|
|
.scout(sov[req_userdef_offset:req_userdef_offset + 4 - 1]),
|
|
.din(req_userdef_d),
|
|
.dout(req_userdef_l2)
|
|
);
|
|
|
|
tri_rlmlatch_p #(.INIT(0)) iu3_miss_match_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(icd_icm_any_iu2_valid),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[iu3_miss_match_offset]),
|
|
.scout(sov[iu3_miss_match_offset]),
|
|
.din(iu3_miss_match_d),
|
|
.dout(iu3_miss_match_l2)
|
|
);
|
|
|
|
generate
|
|
begin : xhdl21
|
|
genvar i;
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin : gen_sm
|
|
|
|
tri_rlmreg_p #(.WIDTH(CHECK_ECC+1), .INIT({1'b1, {CHECK_ECC{1'b0}} })) miss_tid_sm_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(miss_or_default_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_tid_sm_offset + i * (CHECK_ECC+1):miss_tid_sm_offset + (i + 1) * (CHECK_ECC+1) - 1]),
|
|
.scout(sov[miss_tid_sm_offset + i * (CHECK_ECC+1):miss_tid_sm_offset + (i + 1) * (CHECK_ECC+1) - 1]),
|
|
.din(miss_tid_sm_d[i]),
|
|
.dout(miss_tid_sm_l2[i])
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(3), .INIT(0)) miss_count_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(miss_or_default_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_count_offset + i * 3:miss_count_offset + (i + 1) * 3 - 1]),
|
|
.scout(sov[miss_count_offset + i * 3:miss_count_offset + (i + 1) * 3 - 1]),
|
|
.din(miss_count_d[i]),
|
|
.dout(miss_count_l2[i])
|
|
);
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
tri_rlmreg_p #(.WIDTH(TAGS_USED), .INIT(0)) miss_flush_occurred_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_flush_occurred_offset:miss_flush_occurred_offset + TAGS_USED - 1]),
|
|
.scout(sov[miss_flush_occurred_offset:miss_flush_occurred_offset + TAGS_USED - 1]),
|
|
.din(miss_flush_occurred_d),
|
|
.dout(miss_flush_occurred_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(TAGS_USED), .INIT(0)) miss_flushed_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_flushed_offset:miss_flushed_offset + TAGS_USED - 1]),
|
|
.scout(sov[miss_flushed_offset:miss_flushed_offset + TAGS_USED - 1]),
|
|
.din(miss_flushed_d[0:TAGS_USED - 1]),
|
|
.dout(miss_flushed_l2[0:TAGS_USED - 1])
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(TAGS_USED), .INIT(0)) miss_inval_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_inval_offset:miss_inval_offset + TAGS_USED - 1]),
|
|
.scout(sov[miss_inval_offset:miss_inval_offset + TAGS_USED - 1]),
|
|
.din(miss_inval_d[0:TAGS_USED - 1]),
|
|
.dout(miss_inval_l2[0:TAGS_USED - 1])
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(TAGS_USED), .INIT(0)) miss_block_fp_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(miss_or_default_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_block_fp_offset:miss_block_fp_offset + TAGS_USED - 1]),
|
|
.scout(sov[miss_block_fp_offset:miss_block_fp_offset + TAGS_USED - 1]),
|
|
.din(miss_block_fp_d),
|
|
.dout(miss_block_fp_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(TAGS_USED), .INIT(0)) miss_ecc_err_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_ecc_err_offset:miss_ecc_err_offset + TAGS_USED - 1]),
|
|
.scout(sov[miss_ecc_err_offset:miss_ecc_err_offset + TAGS_USED - 1]),
|
|
.din(miss_ecc_err_d),
|
|
.dout(miss_ecc_err_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(TAGS_USED), .INIT(0)) miss_ecc_err_ue_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_ecc_err_ue_offset:miss_ecc_err_ue_offset + TAGS_USED - 1]),
|
|
.scout(sov[miss_ecc_err_ue_offset:miss_ecc_err_ue_offset + TAGS_USED - 1]),
|
|
.din(miss_ecc_err_ue_d),
|
|
.dout(miss_ecc_err_ue_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(TAGS_USED), .INIT(0), .NEEDS_SRESET(1)) miss_wrote_dir_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_wrote_dir_offset:miss_wrote_dir_offset + TAGS_USED - 1]),
|
|
.scout(sov[miss_wrote_dir_offset:miss_wrote_dir_offset + TAGS_USED - 1]),
|
|
.din(miss_wrote_dir_d),
|
|
.dout(miss_wrote_dir_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(TAGS_USED), .INIT(0), .NEEDS_SRESET(1)) miss_need_hold_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(miss_or_default_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_need_hold_offset:miss_need_hold_offset + TAGS_USED - 1]),
|
|
.scout(sov[miss_need_hold_offset:miss_need_hold_offset + TAGS_USED - 1]),
|
|
.din(miss_need_hold_d),
|
|
.dout(miss_need_hold_l2)
|
|
);
|
|
|
|
generate
|
|
begin : xhdl22
|
|
genvar i;
|
|
for (i = 0; i < TAGS_USED; i = i + 1)
|
|
begin : gen
|
|
tri_rlmreg_p #(.WIDTH(`REAL_IFAR_WIDTH - 2), .INIT(0)) miss_addr_real_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(miss_act[i]),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_addr_real_offset + i * (`REAL_IFAR_WIDTH - 2):miss_addr_real_offset + (i + 1) * (`REAL_IFAR_WIDTH - 2) - 1]),
|
|
.scout(sov[miss_addr_real_offset + i * (`REAL_IFAR_WIDTH - 2):miss_addr_real_offset + (i + 1) * (`REAL_IFAR_WIDTH - 2) - 1]),
|
|
.din(miss_addr_real_d[i]),
|
|
.dout(miss_addr_real_l2[i])
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(`EFF_IFAR_WIDTH - 10), .INIT(0)) miss_addr_eff_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(miss_act[i]),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_addr_eff_offset + i * (`EFF_IFAR_WIDTH - 10):miss_addr_eff_offset + (i + 1) * (`EFF_IFAR_WIDTH - 10) - 1]),
|
|
.scout(sov[miss_addr_eff_offset + i * (`EFF_IFAR_WIDTH - 10):miss_addr_eff_offset + (i + 1) * (`EFF_IFAR_WIDTH - 10) - 1]),
|
|
.din(miss_addr_eff_d[i]),
|
|
.dout(miss_addr_eff_l2[i])
|
|
);
|
|
|
|
tri_rlmlatch_p #(.INIT(0)) miss_ci_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(miss_act[i]),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_ci_offset + i]),
|
|
.scout(sov[miss_ci_offset + i]),
|
|
.din(miss_ci_d[i]),
|
|
.dout(miss_ci_l2[i])
|
|
);
|
|
|
|
tri_rlmlatch_p #(.INIT(0)) miss_endian_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(miss_act[i]),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_endian_offset + i]),
|
|
.scout(sov[miss_endian_offset + i]),
|
|
.din(miss_endian_d[i]),
|
|
.dout(miss_endian_l2[i])
|
|
);
|
|
|
|
tri_rlmlatch_p #(.INIT(0)) miss_2ucode_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(miss_act[i]),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_2ucode_offset + i]),
|
|
.scout(sov[miss_2ucode_offset + i]),
|
|
.din(miss_2ucode_d[i]),
|
|
.dout(miss_2ucode_l2[i])
|
|
);
|
|
|
|
tri_rlmlatch_p #(.INIT(0)) miss_2ucode_type_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(miss_act[i]),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_2ucode_type_offset + i]),
|
|
.scout(sov[miss_2ucode_type_offset + i]),
|
|
.din(miss_2ucode_type_d[i]),
|
|
.dout(miss_2ucode_type_l2[i])
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(4), .INIT(0)) miss_way_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(reld_r2_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_way_offset + i * 4:miss_way_offset + (i + 1) * 4 - 1]),
|
|
.scout(sov[miss_way_offset + i * 4:miss_way_offset + (i + 1) * 4 - 1]),
|
|
.din(miss_way_d[i]),
|
|
.dout(miss_way_l2[i])
|
|
);
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
tri_rlmreg_p #(.WIDTH(TAGS_USED), .INIT(0)) lru_write_next_cycle_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[lru_write_next_cycle_offset:lru_write_next_cycle_offset + TAGS_USED - 1]),
|
|
.scout(sov[lru_write_next_cycle_offset:lru_write_next_cycle_offset + TAGS_USED - 1]),
|
|
.din(lru_write_next_cycle_d),
|
|
.dout(lru_write_next_cycle_l2)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(TAGS_USED), .INIT(0)) lru_write_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(default_reld_act),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[lru_write_offset:lru_write_offset + TAGS_USED - 1]),
|
|
.scout(sov[lru_write_offset:lru_write_offset + TAGS_USED - 1]),
|
|
.din(lru_write_d),
|
|
.dout(lru_write_l2)
|
|
);
|
|
|
|
generate
|
|
begin : xhdl23
|
|
genvar i;
|
|
for (i = 0; i < `THREADS; i = i + 1)
|
|
begin : t
|
|
tri_rlmreg_p #(.WIDTH(3), .INIT(0)) perf_event_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(event_bus_enable),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[perf_event_offset + i * 3:perf_event_offset + (i+1) * 3 - 1]),
|
|
.scout(sov[perf_event_offset + i * 3:perf_event_offset + (i+1) * 3 - 1]),
|
|
.din(perf_event_d[i]),
|
|
.dout(perf_event_l2[i])
|
|
);
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0)) miss_prefetch_perf_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(event_bus_enable),
|
|
.thold_b(pc_iu_func_sl_thold_0_b),
|
|
.sg(pc_iu_sg_0),
|
|
.force_t(force_t),
|
|
.delay_lclkr(delay_lclkr),
|
|
.mpw1_b(mpw1_b),
|
|
.mpw2_b(mpw2_b),
|
|
.d_mode(d_mode),
|
|
.scin(siv[miss_prefetch_perf_offset:miss_prefetch_perf_offset + `THREADS - 1]),
|
|
.scout(sov[miss_prefetch_perf_offset:miss_prefetch_perf_offset + `THREADS - 1]),
|
|
.din(miss_prefetch_perf_d),
|
|
.dout(miss_prefetch_perf_l2)
|
|
);
|
|
|
|
//---------------------------------------------------------------------
|
|
// Scan
|
|
//---------------------------------------------------------------------
|
|
assign siv[0:scan_right] = {sov[1:scan_right], scan_in};
|
|
assign scan_out = sov[0];
|
|
|
|
endmodule
|