You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2890 lines
122 KiB
Verilog
2890 lines
122 KiB
Verilog
// © IBM Corp. 2020
|
|
// Licensed under the Apache License, Version 2.0 (the "License"), as modified by
|
|
// the terms below; you may not use the files in this repository except in
|
|
// compliance with the License as modified.
|
|
// You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Modified Terms:
|
|
//
|
|
// 1) For the purpose of the patent license granted to you in Section 3 of the
|
|
// License, the "Work" hereby includes implementations of the work of authorship
|
|
// in physical form.
|
|
//
|
|
// 2) Notwithstanding any terms to the contrary in the License, any licenses
|
|
// necessary for implementation of the Work that are available from OpenPOWER
|
|
// via the Power ISA End User License Agreement (EULA) are explicitly excluded
|
|
// hereunder, and may be obtained from OpenPOWER under the terms and conditions
|
|
// of the EULA.
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, the reference design
|
|
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
|
|
// for the specific language governing permissions and limitations under the License.
|
|
//
|
|
// Additional rights, including the ability to physically implement a softcore that
|
|
// is compliant with the required sections of the Power ISA Specification, are
|
|
// available at no cost under the terms of the OpenPOWER Power ISA EULA, which can be
|
|
// obtained (along with the Power ISA) here: https://openpowerfoundation.org.
|
|
|
|
`timescale 1 ns / 1 ns
|
|
|
|
//
|
|
// Description: XU LSU Data Prefetcher
|
|
//
|
|
//*****************************************************************************
|
|
|
|
`include "tri_a2o.vh"
|
|
|
|
// parameter EXPAND_TYPE = 2;
|
|
// parameter GPR_WIDTH_ENC = 6; // 5 = 32bit mode, 6 = 64bit mode
|
|
// parameter CL_SIZE = 6; // 6 => 64B CLINE, 7 => 128B CLINE
|
|
// parameter THREADS = 2; // Number of Threads in the system
|
|
// parameter REAL_IFAR_WIDTH = 42; // width of the read address
|
|
// parameter ITAG_SIZE_ENC = 7;
|
|
// parameter LDSTQ_ENTRIES = 16; // Order Queue Size
|
|
// `define PF_IFAR_WIDTH 12 // number of IAR bits used by prefetch
|
|
// `define PFETCH_INITIAL_DEPTH 0 // the initial value for the SPR that determines how many lines to prefetch
|
|
// `define PFETCH_Q_SIZE_ENC 3 // number of bits to address queue size (3 => 8 entries, 4 => 16 entries)
|
|
// `define PFETCH_Q_SIZE 8 // number of entries in prefetch queue
|
|
|
|
module lq_pfetch(
|
|
rv_lq_rv1_i0_vld,
|
|
rv_lq_rv1_i0_rte_lq,
|
|
rv_lq_rv1_i0_isLoad,
|
|
rv_lq_rv1_i0_ifar,
|
|
rv_lq_rv1_i0_itag,
|
|
rv_lq_rv1_i1_vld,
|
|
rv_lq_rv1_i1_rte_lq,
|
|
rv_lq_rv1_i1_isLoad,
|
|
rv_lq_rv1_i1_ifar,
|
|
rv_lq_rv1_i1_itag,
|
|
iu_lq_cp_flush,
|
|
ctl_pf_clear_queue,
|
|
odq_pf_report_tid,
|
|
odq_pf_report_itag,
|
|
odq_pf_resolved,
|
|
dcc_pf_ex5_eff_addr,
|
|
dcc_pf_ex5_req_val_4pf,
|
|
dcc_pf_ex5_act,
|
|
dcc_pf_ex5_loadmiss,
|
|
dcc_pf_ex5_thrd_id,
|
|
dcc_pf_ex5_itag,
|
|
spr_pf_spr_dscr_lsd,
|
|
spr_pf_spr_dscr_snse,
|
|
spr_pf_spr_dscr_sse,
|
|
spr_pf_spr_dscr_dpfd,
|
|
spr_pf_spr_pesr,
|
|
pf_dec_req_addr,
|
|
pf_dec_req_thrd,
|
|
pf_dec_req_val,
|
|
dec_pf_ack,
|
|
pf_empty,
|
|
pc_lq_inj_prefetcher_parity,
|
|
lq_pc_err_prefetcher_parity,
|
|
vdd,
|
|
gnd,
|
|
vcs,
|
|
nclk,
|
|
sg_0,
|
|
func_sl_thold_0_b,
|
|
func_sl_force,
|
|
d_mode_dc,
|
|
delay_lclkr_dc,
|
|
clkoff_dc_b,
|
|
mpw1_dc_b,
|
|
mpw2_dc_b,
|
|
scan_in,
|
|
scan_out,
|
|
abst_sl_thold_0,
|
|
ary_nsl_thold_0,
|
|
time_sl_thold_0,
|
|
repr_sl_thold_0,
|
|
g8t_clkoff_dc_b,
|
|
pc_lq_ccflush_dc,
|
|
an_ac_scan_dis_dc_b,
|
|
an_ac_scan_diag_dc,
|
|
g8t_d_mode_dc,
|
|
g8t_mpw1_dc_b,
|
|
g8t_mpw2_dc_b,
|
|
g8t_delay_lclkr_dc,
|
|
pc_xu_abist_g8t_wenb_q,
|
|
pc_xu_abist_g8t1p_renb_0_q,
|
|
pc_xu_abist_di_0_q,
|
|
pc_xu_abist_g8t_bw_1_q,
|
|
pc_xu_abist_g8t_bw_0_q,
|
|
pc_xu_abist_waddr_0_q,
|
|
pc_xu_abist_raddr_0_q,
|
|
an_ac_lbist_ary_wrt_thru_dc,
|
|
pc_xu_abist_ena_dc,
|
|
pc_xu_abist_wl64_comp_ena_q,
|
|
pc_xu_abist_raw_dc_b,
|
|
pc_xu_abist_g8t_dcomp_q,
|
|
abst_scan_in,
|
|
time_scan_in,
|
|
repr_scan_in,
|
|
abst_scan_out,
|
|
time_scan_out,
|
|
repr_scan_out,
|
|
bolt_sl_thold_0,
|
|
pc_bo_enable_2,
|
|
pc_xu_bo_reset,
|
|
pc_xu_bo_unload,
|
|
pc_xu_bo_repair,
|
|
pc_xu_bo_shdata,
|
|
pc_xu_bo_select,
|
|
xu_pc_bo_fail,
|
|
xu_pc_bo_diagout
|
|
);
|
|
|
|
// iar and itag of the load instruction from dispatch
|
|
input [0:`THREADS-1] rv_lq_rv1_i0_vld;
|
|
input rv_lq_rv1_i0_rte_lq;
|
|
input rv_lq_rv1_i0_isLoad;
|
|
input [61-`PF_IFAR_WIDTH+1:61] rv_lq_rv1_i0_ifar;
|
|
input [0:`ITAG_SIZE_ENC-1] rv_lq_rv1_i0_itag;
|
|
input [0:`THREADS-1] rv_lq_rv1_i1_vld;
|
|
input rv_lq_rv1_i1_rte_lq;
|
|
input rv_lq_rv1_i1_isLoad;
|
|
input [61-`PF_IFAR_WIDTH+1:61] rv_lq_rv1_i1_ifar;
|
|
input [0:`ITAG_SIZE_ENC-1] rv_lq_rv1_i1_itag;
|
|
|
|
// flush interface
|
|
input [0:`THREADS-1] iu_lq_cp_flush;
|
|
|
|
input ctl_pf_clear_queue;
|
|
|
|
// release itag to pfetch
|
|
input [0:`THREADS-1] odq_pf_report_tid;
|
|
input [0:`ITAG_SIZE_ENC-1] odq_pf_report_itag;
|
|
input odq_pf_resolved;
|
|
|
|
// EA of load miss that is valid for pre-fetching
|
|
input [64-(2**`GPR_WIDTH_ENC):59] dcc_pf_ex5_eff_addr;
|
|
input dcc_pf_ex5_req_val_4pf;
|
|
input dcc_pf_ex5_act;
|
|
input dcc_pf_ex5_loadmiss;
|
|
input [0:`THREADS-1] dcc_pf_ex5_thrd_id;
|
|
input [0:`ITAG_SIZE_ENC-1] dcc_pf_ex5_itag;
|
|
|
|
input [0:`THREADS-1] spr_pf_spr_dscr_lsd;
|
|
input [0:`THREADS-1] spr_pf_spr_dscr_snse;
|
|
input [0:`THREADS-1] spr_pf_spr_dscr_sse;
|
|
input [0:3*`THREADS-1] spr_pf_spr_dscr_dpfd;
|
|
input [0:31] spr_pf_spr_pesr;
|
|
|
|
// EA of prefetch request
|
|
output [64-(2**`GPR_WIDTH_ENC):63-`CL_SIZE] pf_dec_req_addr;
|
|
output [0:`THREADS-1] pf_dec_req_thrd;
|
|
output pf_dec_req_val;
|
|
input dec_pf_ack;
|
|
|
|
output [0:`THREADS-1] pf_empty;
|
|
|
|
// parity error signals
|
|
input pc_lq_inj_prefetcher_parity;
|
|
output lq_pc_err_prefetcher_parity;
|
|
|
|
// Pervasive
|
|
|
|
|
|
inout vcs;
|
|
|
|
|
|
inout vdd;
|
|
|
|
|
|
inout gnd;
|
|
|
|
(* pin_data="PIN_FUNCTION=/G_CLK/CAP_LIMIT=/99999/" *)
|
|
|
|
input [0:`NCLK_WIDTH-1] nclk;
|
|
input sg_0;
|
|
input func_sl_thold_0_b;
|
|
input func_sl_force;
|
|
input d_mode_dc;
|
|
input delay_lclkr_dc;
|
|
input clkoff_dc_b;
|
|
input mpw1_dc_b;
|
|
input mpw2_dc_b;
|
|
|
|
(* pin_data="PIN_FUNCTION=/SCAN_IN/" *)
|
|
|
|
input scan_in;
|
|
|
|
(* pin_data="PIN_FUNCTION=/SCAN_OUT/" *)
|
|
|
|
output scan_out;
|
|
|
|
// array pervasive
|
|
input abst_sl_thold_0;
|
|
input ary_nsl_thold_0;
|
|
input time_sl_thold_0;
|
|
input repr_sl_thold_0;
|
|
input g8t_clkoff_dc_b;
|
|
input pc_lq_ccflush_dc;
|
|
input an_ac_scan_dis_dc_b;
|
|
input an_ac_scan_diag_dc;
|
|
input g8t_d_mode_dc;
|
|
input [0:4] g8t_mpw1_dc_b;
|
|
input g8t_mpw2_dc_b;
|
|
input [0:4] g8t_delay_lclkr_dc;
|
|
// ABIST
|
|
input pc_xu_abist_g8t_wenb_q;
|
|
input pc_xu_abist_g8t1p_renb_0_q;
|
|
input [0:3] pc_xu_abist_di_0_q;
|
|
input pc_xu_abist_g8t_bw_1_q;
|
|
input pc_xu_abist_g8t_bw_0_q;
|
|
input [0:4] pc_xu_abist_waddr_0_q;
|
|
input [0:4] pc_xu_abist_raddr_0_q;
|
|
input an_ac_lbist_ary_wrt_thru_dc;
|
|
input pc_xu_abist_ena_dc;
|
|
input pc_xu_abist_wl64_comp_ena_q;
|
|
input pc_xu_abist_raw_dc_b;
|
|
input [0:3] pc_xu_abist_g8t_dcomp_q;
|
|
// Scan
|
|
|
|
(* pin_data="PIN_FUNCTION=/SCAN_IN/" *)
|
|
|
|
input [0:1] abst_scan_in;
|
|
|
|
(* pin_data="PIN_FUNCTION=/SCAN_IN/" *)
|
|
|
|
input time_scan_in;
|
|
|
|
(* pin_data="PIN_FUNCTION=/SCAN_IN/" *)
|
|
|
|
input repr_scan_in;
|
|
|
|
(* pin_data="PIN_FUNCTION=/SCAN_OUT/" *)
|
|
|
|
output [0:1] abst_scan_out;
|
|
|
|
(* pin_data="PIN_FUNCTION=/SCAN_OUT/" *)
|
|
|
|
output time_scan_out;
|
|
|
|
(* pin_data="PIN_FUNCTION=/SCAN_OUT/" *)
|
|
|
|
output repr_scan_out;
|
|
// BOLT-ON
|
|
input bolt_sl_thold_0;
|
|
input pc_bo_enable_2; // general bolt-on enable
|
|
input pc_xu_bo_reset; // reset
|
|
input pc_xu_bo_unload; // unload sticky bits
|
|
input pc_xu_bo_repair; // execute sticky bit decode
|
|
input pc_xu_bo_shdata; // shift data for timing write and diag loop
|
|
input [0:1] pc_xu_bo_select; // select for mask and hier writes
|
|
output [0:1] xu_pc_bo_fail; // fail/no-fix reg
|
|
|
|
output [0:1] xu_pc_bo_diagout;
|
|
|
|
//--------------------------
|
|
// signals
|
|
//--------------------------
|
|
wire [0:`THREADS-1] pfetch_dis_thrd;
|
|
reg pf1_disable;
|
|
reg ex6_pf_disable;
|
|
|
|
wire [58:63] pf_dscr_reg[0:`THREADS-1];
|
|
reg [58:63] pf1_dscr;
|
|
wire [0:`THREADS-1] rv_i0_vld_d;
|
|
wire [0:`THREADS-1] rv_i0_vld_q;
|
|
wire rv_i0_rte_lq_q;
|
|
wire rv_i0_isLoad_q;
|
|
wire [61-`PF_IFAR_WIDTH+1:61] rv_i0_ifar_q;
|
|
wire [0:`ITAG_SIZE_ENC-1] rv_i0_itag_q;
|
|
wire [0:`THREADS-1] rv_i1_vld_d;
|
|
wire [0:`THREADS-1] rv_i1_vld_q;
|
|
wire rv_i1_rte_lq_q;
|
|
wire rv_i1_isLoad_q;
|
|
wire [61-`PF_IFAR_WIDTH+1:61] rv_i1_ifar_q;
|
|
wire [0:`ITAG_SIZE_ENC-1] rv_i1_itag_q;
|
|
wire [0:`THREADS-1] cp_flush_q;
|
|
wire [0:`THREADS-1] cp_flush2_q;
|
|
wire [0:`THREADS-1] cp_flush3_q;
|
|
wire [0:`THREADS-1] cp_flush4_q;
|
|
|
|
wire new_itag_i0_val;
|
|
wire new_itag_i1_val;
|
|
wire [0:`LDSTQ_ENTRIES-1] pf_iar_i0_wen;
|
|
wire [0:`LDSTQ_ENTRIES-1] pf_iar_val_for_i1;
|
|
wire [0:`LDSTQ_ENTRIES-1] pf_iar_i1_wen;
|
|
wire [61-`PF_IFAR_WIDTH+1:61] pf_iar_tbl_d[0:`LDSTQ_ENTRIES-1];
|
|
wire [61-`PF_IFAR_WIDTH+1:61] pf_iar_tbl_q[0:`LDSTQ_ENTRIES-1];
|
|
wire [0:`ITAG_SIZE_ENC-1] pf_itag_tbl_d[0:`LDSTQ_ENTRIES-1];
|
|
wire [0:`ITAG_SIZE_ENC-1] pf_itag_tbl_q[0:`LDSTQ_ENTRIES-1];
|
|
wire [0:`THREADS-1] pf_tid_tbl_d[0:`LDSTQ_ENTRIES-1];
|
|
wire [0:`THREADS-1] pf_tid_tbl_q[0:`LDSTQ_ENTRIES-1];
|
|
wire [0:`LDSTQ_ENTRIES-1] pf_iar_tbl_val_d;
|
|
wire [0:`LDSTQ_ENTRIES-1] pf_iar_tbl_val_q;
|
|
wire [0:`LDSTQ_ENTRIES-1] ex5_itag_match;
|
|
reg [61-`PF_IFAR_WIDTH+1:61] ex5_iar;
|
|
wire [61-`PF_IFAR_WIDTH+1:61] ex6_iar_q;
|
|
wire [61-`PF_IFAR_WIDTH+1:61] ex7_iar_q;
|
|
wire [61-`PF_IFAR_WIDTH+1:61] ex8_iar_q;
|
|
wire [0:`LDSTQ_ENTRIES-1] pf_iar_tbl_reset;
|
|
wire odq_resolved_q;
|
|
wire [0:`ITAG_SIZE_ENC-1] odq_report_itag_q;
|
|
wire [0:`THREADS-1] odq_report_tid_q;
|
|
|
|
wire [0:21] pfq_stride_d[0:`PFETCH_Q_SIZE-1];
|
|
wire [0:21] pfq_stride_q[0:`PFETCH_Q_SIZE-1];
|
|
wire [64-(2**`GPR_WIDTH_ENC):59] pfq_data_ea_d[0:`PFETCH_Q_SIZE-1];
|
|
wire [64-(2**`GPR_WIDTH_ENC):59] pfq_data_ea_q[0:`PFETCH_Q_SIZE-1];
|
|
wire [0:`PFETCH_Q_SIZE-1] pfq_dup_flag_d;
|
|
wire [0:`PFETCH_Q_SIZE-1] pfq_dup_flag_q;
|
|
wire [0:`THREADS-1] pfq_thrd_d[0:`PFETCH_Q_SIZE-1];
|
|
wire [0:`THREADS-1] pfq_thrd_q[0:`PFETCH_Q_SIZE-1];
|
|
wire [61:63] pfq_dscr_d[0:`PFETCH_Q_SIZE-1];
|
|
wire [61:63] pfq_dscr_q[0:`PFETCH_Q_SIZE-1];
|
|
wire [0:`PFETCH_Q_SIZE-1] pfq_wen;
|
|
wire [0:`PFETCH_Q_SIZE_ENC-1] pfq_wrt_ptr_plus1;
|
|
wire [0:`PFETCH_Q_SIZE_ENC-1] pfq_wrt_ptr_d;
|
|
wire [0:`PFETCH_Q_SIZE_ENC-1] pfq_wrt_ptr_q;
|
|
wire [0:`PFETCH_Q_SIZE_ENC-1] pfq_rd_ptr_d;
|
|
wire [0:`PFETCH_Q_SIZE_ENC-1] pfq_rd_ptr_q;
|
|
wire pfq_full_d;
|
|
wire pfq_full_q;
|
|
wire pfq_wrt_val;
|
|
reg [0:21] pf3_stride_d;
|
|
wire [0:21] pf3_stride_q;
|
|
reg [64-(2**`GPR_WIDTH_ENC):59] pfq_rd_data_ea;
|
|
reg pfq_rd_dup_flag;
|
|
reg [0:`THREADS-1] pfq_rd_thrd;
|
|
reg [0:`THREADS-1] pfq_thrd_v;
|
|
reg [61:63] pfq_rd_dscr;
|
|
wire pf_rd_val;
|
|
wire pf_idle;
|
|
wire pf_gen;
|
|
wire pf_send;
|
|
wire pf_next;
|
|
wire pf_done;
|
|
reg pf_nxt_idle;
|
|
reg pf_nxt_gen;
|
|
reg pf_nxt_send;
|
|
reg pf_nxt_next;
|
|
reg pf_nxt_done;
|
|
wire [0:4] pf_nxt_state;
|
|
wire [0:4] pf_state_q;
|
|
wire [0:2] pf_count_d;
|
|
wire [0:2] pf_count_q;
|
|
wire [0:21] pf1_new_stride_d;
|
|
wire [0:21] pf1_new_stride_q;
|
|
wire [0:21] pf1_rpt_stride_q;
|
|
wire stride_match;
|
|
wire generate_pfetch;
|
|
wire [0:2] nxt_state_cntrl;
|
|
wire [0:1] burst_cnt_inc;
|
|
wire [0:21] pf2_next_stride_d;
|
|
wire [0:21] pf2_stride_q;
|
|
wire [61-`PF_IFAR_WIDTH+1:61] pf1_iar_d;
|
|
wire [61-`PF_IFAR_WIDTH+1:61] pf1_iar_q;
|
|
wire [61-`PF_IFAR_WIDTH+1:61] pf2_iar_q;
|
|
wire [64-(2**`GPR_WIDTH_ENC):59] pf1_data_ea_d;
|
|
wire [64-(2**`GPR_WIDTH_ENC):59] pf1_data_ea_q;
|
|
wire [64-(2**`GPR_WIDTH_ENC):59] pf1_new_data_ea;
|
|
wire [64-(2**`GPR_WIDTH_ENC):59] pf2_data_ea_d;
|
|
wire [64-(2**`GPR_WIDTH_ENC):59] pf2_data_ea_q;
|
|
wire [0:1] pf1_pf_state_d;
|
|
wire [0:1] pf1_pf_state_q;
|
|
wire [0:1] pf2_next_state_d;
|
|
wire [0:1] pf1_update_state;
|
|
wire [0:1] pf2_pf_state_q;
|
|
wire [0:1] pf1_burst_cnt_d;
|
|
wire [0:1] pf1_burst_cnt_q;
|
|
wire [0:1] pf2_burst_cnt_d;
|
|
wire [0:1] pf2_burst_cnt_q;
|
|
wire pf1_dup_flag_d;
|
|
wire pf1_dup_flag_q;
|
|
wire [0:2] ex8_pf_hits_d;
|
|
wire [0:2] ex8_pf_hits_q;
|
|
wire [0:1] ex8_rpt_pe_d;
|
|
wire [0:1] ex8_rpt_pe_q;
|
|
wire [38:59] ex8_last_dat_addr_q;
|
|
wire [0:21] ex8_stride_q;
|
|
wire [0:1] ex8_pf_state_q;
|
|
wire [0:1] ex8_burst_cnt_q;
|
|
wire ex8_dup_flag_q;
|
|
wire [0:2] pf1_hits_d;
|
|
wire [0:2] pf1_hits_q;
|
|
wire [0:1] pf1_rpt_pe_q;
|
|
wire [0:2] pf2_hits_q;
|
|
wire [0:1] pf2_rpt_pe_q;
|
|
wire [0:`THREADS-1] pf1_thrd_d;
|
|
wire [0:`THREADS-1] pf1_thrd_q;
|
|
wire [0:`THREADS-1] pf2_thrd_q;
|
|
wire [0:`THREADS-1] pf3_thrd_d;
|
|
wire [0:`THREADS-1] pf3_thrd_q;
|
|
wire pf2_gen_pfetch_q;
|
|
wire pf2_valid;
|
|
reg old_rpt_lru;
|
|
wire new_rpt_lru;
|
|
wire [0:31] rpt_lru_d;
|
|
wire [0:31] rpt_lru_q;
|
|
|
|
wire [0:1] rpt_wen;
|
|
wire [0:1] rpt_rd_act;
|
|
wire [0:1] rpt_byp_val;
|
|
wire [0:4] rpt_wrt_addr;
|
|
wire [0:69] rpt_data_in /*verilator split_var*/;
|
|
wire [0:4] rpt_rd_addr;
|
|
wire [0:139] rpt_data_out;
|
|
wire [0:69] rpt_byp_dat_d;
|
|
wire [0:69] rpt_byp_dat_q;
|
|
wire [0:69] rpt_byp_dat1_d;
|
|
wire [0:69] rpt_byp_dat1_q;
|
|
wire [0:1] byp_rpt_ary_d;
|
|
wire [0:1] byp_rpt_ary_q;
|
|
wire [0:1] byp1_rpt_ary_d;
|
|
wire [0:1] byp1_rpt_ary_q;
|
|
wire [0:69] ex7_rpt_entry0;
|
|
wire [0:69] ex7_rpt_entry1;
|
|
wire entry0_hit;
|
|
wire entry1_hit;
|
|
wire [0:56] ex7_rpt_entry_mux;
|
|
wire [0:56] new_rpt_entry;
|
|
wire [0:21] new_stride_prelim;
|
|
wire same_cline;
|
|
wire pf1_stride_too_small_q;
|
|
wire pf1_same_cline_q;
|
|
wire stride_too_small;
|
|
wire stride_lessthan_cline_pos;
|
|
wire stride_lessthan_cline_neg;
|
|
|
|
wire [0:`THREADS-1] ex6_thrd_q;
|
|
wire [0:`THREADS-1] ex7_thrd_q;
|
|
wire [0:`THREADS-1] ex8_thrd_q;
|
|
wire [64-(2**`GPR_WIDTH_ENC):59] ex6_eff_addr_q;
|
|
wire [64-(2**`GPR_WIDTH_ENC):59] ex7_eff_addr_q;
|
|
wire [64-(2**`GPR_WIDTH_ENC):59] ex8_eff_addr_q;
|
|
wire ex6_req_val_4pf_q;
|
|
wire ex7_req_val_4pf_d;
|
|
wire ex7_req_val_4pf_q;
|
|
wire ex8_req_val_4pf_q;
|
|
wire pf1_req_val_4pf_q;
|
|
wire pf2_req_val_4pf_d;
|
|
wire pf2_req_val_4pf_q;
|
|
wire ex5_valid_loadmiss;
|
|
wire ex6_loadmiss_q;
|
|
wire ex7_loadmiss_q;
|
|
wire pf3_req_val_d;
|
|
wire pf3_req_val_q;
|
|
wire [64-(2**`GPR_WIDTH_ENC):59] pf3_req_addr_d;
|
|
wire [64-(2**`GPR_WIDTH_ENC):59] pf3_req_addr_q;
|
|
wire block_dup_pfetch;
|
|
wire inj_pfetch_parity_q;
|
|
wire ex7_rpt_entry0_pe;
|
|
wire ex7_rpt_entry1_pe;
|
|
wire ex8_pfetch_pe_d;
|
|
wire ex8_pfetch_pe_q;
|
|
wire [57+`THREADS:64+`THREADS] ex7_rpt_entry0_par;
|
|
wire [57+`THREADS:64+`THREADS] ex7_rpt_entry1_par;
|
|
wire [0:`LDSTQ_ENTRIES-1] pf_itag_tbl_act;
|
|
wire ex6_pf_act;
|
|
wire ex7_pf_act;
|
|
wire ex8_pf_act;
|
|
wire pf1_act;
|
|
wire pf2_act;
|
|
wire pf3_act;
|
|
wire byp_act;
|
|
wire byp1_act;
|
|
wire rpt_func_scan_in;
|
|
wire rpt_func_scan_out;
|
|
|
|
//--------------------------
|
|
// constants
|
|
//--------------------------
|
|
|
|
parameter rv_i0_vld_offset = 0;
|
|
|
|
parameter rv_i0_isLoad_offset = rv_i0_vld_offset + `THREADS;
|
|
parameter rv_i0_rte_lq_offset = rv_i0_isLoad_offset + 1;
|
|
parameter rv_i0_ifar_offset = rv_i0_rte_lq_offset + 1;
|
|
parameter rv_i0_itag_offset = rv_i0_ifar_offset + `PF_IFAR_WIDTH;
|
|
parameter rv_i1_vld_offset = rv_i0_itag_offset + `ITAG_SIZE_ENC;
|
|
parameter rv_i1_isLoad_offset = rv_i1_vld_offset + `THREADS;
|
|
parameter rv_i1_rte_lq_offset = rv_i1_isLoad_offset + 1;
|
|
parameter rv_i1_ifar_offset = rv_i1_rte_lq_offset + 1;
|
|
parameter rv_i1_itag_offset = rv_i1_ifar_offset + `PF_IFAR_WIDTH;
|
|
parameter cp_flush_offset = rv_i1_itag_offset + `ITAG_SIZE_ENC;
|
|
parameter cp_flush2_offset = cp_flush_offset + `THREADS;
|
|
parameter cp_flush3_offset = cp_flush2_offset + `THREADS;
|
|
parameter cp_flush4_offset = cp_flush3_offset + `THREADS;
|
|
parameter inj_pfetch_parity_offset = cp_flush4_offset + `THREADS;
|
|
parameter odq_resolved_offset = inj_pfetch_parity_offset + 1;
|
|
parameter odq_report_itag_offset = odq_resolved_offset + 1;
|
|
parameter odq_report_tid_offset = odq_report_itag_offset + `ITAG_SIZE_ENC;
|
|
parameter pf_iar_tbl_offset = odq_report_tid_offset + `THREADS;
|
|
parameter pf_itag_tbl_offset = pf_iar_tbl_offset + `PF_IFAR_WIDTH * `LDSTQ_ENTRIES;
|
|
parameter pf_tid_tbl_offset = pf_itag_tbl_offset + `ITAG_SIZE_ENC * `LDSTQ_ENTRIES;
|
|
parameter pf_iar_tbl_val_offset = pf_tid_tbl_offset + `THREADS * `LDSTQ_ENTRIES;
|
|
parameter ex6_iar_offset = pf_iar_tbl_val_offset + `LDSTQ_ENTRIES;
|
|
parameter ex7_iar_offset = ex6_iar_offset + `PF_IFAR_WIDTH;
|
|
parameter ex8_iar_offset = ex7_iar_offset + `PF_IFAR_WIDTH;
|
|
parameter ex6_thrd_offset = ex8_iar_offset + `PF_IFAR_WIDTH;
|
|
parameter ex7_thrd_offset = ex6_thrd_offset + `THREADS;
|
|
parameter ex8_thrd_offset = ex7_thrd_offset + `THREADS;
|
|
parameter ex6_eff_addr_offset = ex8_thrd_offset + `THREADS;
|
|
parameter ex7_eff_addr_offset = ex6_eff_addr_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1);
|
|
parameter ex8_eff_addr_offset = ex7_eff_addr_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1);
|
|
parameter ex6_req_val_4pf_offset = ex8_eff_addr_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1);
|
|
parameter ex7_req_val_4pf_offset = ex6_req_val_4pf_offset + 1;
|
|
parameter ex8_req_val_4pf_offset = ex7_req_val_4pf_offset + 1;
|
|
parameter pf1_req_val_4pf_offset = ex8_req_val_4pf_offset + 1;
|
|
parameter pf2_req_val_4pf_offset = pf1_req_val_4pf_offset + 1;
|
|
parameter ex6_loadmiss_offset = pf2_req_val_4pf_offset + 1;
|
|
parameter ex7_loadmiss_offset = ex6_loadmiss_offset + 1;
|
|
parameter byp_rpt_ary_offset = ex7_loadmiss_offset + 1;
|
|
parameter byp1_rpt_ary_offset = byp_rpt_ary_offset + 2;
|
|
parameter rpt_byp_dat_offset = byp1_rpt_ary_offset + 2;
|
|
parameter rpt_byp_dat1_offset = rpt_byp_dat_offset + 70;
|
|
parameter ex8_last_dat_addr_offset = rpt_byp_dat1_offset + 70;
|
|
parameter ex8_stride_offset = ex8_last_dat_addr_offset + 22;
|
|
parameter ex8_pf_state_offset = ex8_stride_offset + 22;
|
|
parameter ex8_burst_cnt_offset = ex8_pf_state_offset + 2;
|
|
parameter ex8_dup_flag_offset = ex8_burst_cnt_offset + 2;
|
|
parameter ex8_pf_hits_offset = ex8_dup_flag_offset + 1;
|
|
parameter ex8_rpt_pe_offset = ex8_pf_hits_offset + 3;
|
|
parameter ex8_pfetch_pe_offset = ex8_rpt_pe_offset + 2;
|
|
parameter pfq_stride_offset = ex8_pfetch_pe_offset + 1;
|
|
parameter pfq_data_ea_offset = pfq_stride_offset + 22 * `PFETCH_Q_SIZE;
|
|
parameter pfq_thrd_offset = pfq_data_ea_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) * `PFETCH_Q_SIZE;
|
|
parameter pfq_dscr_offset = pfq_thrd_offset + `THREADS * `PFETCH_Q_SIZE;
|
|
parameter pfq_dup_flag_offset = pfq_dscr_offset + 3 * `PFETCH_Q_SIZE;
|
|
parameter pfq_full_offset = pfq_dup_flag_offset + `PFETCH_Q_SIZE;
|
|
parameter pfq_wrt_ptr_offset = pfq_full_offset + 1;
|
|
parameter pf_state_offset = pfq_wrt_ptr_offset + `PFETCH_Q_SIZE_ENC;
|
|
parameter pf_count_offset = pf_state_offset + 5;
|
|
parameter pf1_new_stride_offset = pf_count_offset + 3;
|
|
parameter pf1_rpt_stride_offset = pf1_new_stride_offset + 22;
|
|
parameter pf1_data_ea_offset = pf1_rpt_stride_offset + 22;
|
|
parameter pf1_iar_offset = pf1_data_ea_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1);
|
|
parameter pf1_pf_state_offset = pf1_iar_offset + `PF_IFAR_WIDTH;
|
|
parameter pf1_burst_cnt_offset = pf1_pf_state_offset + 2;
|
|
parameter pf1_dup_flag_offset = pf1_burst_cnt_offset + 2;
|
|
parameter pf1_hits_offset = pf1_dup_flag_offset + 1;
|
|
parameter pf1_rpt_pe_offset = pf1_hits_offset + 3;
|
|
parameter pf1_thrd_offset = pf1_rpt_pe_offset + 2;
|
|
parameter pf1_same_cline_offset = pf1_thrd_offset + `THREADS;
|
|
parameter pf1_stride_too_small_offset = pf1_same_cline_offset + 1;
|
|
parameter pf2_gen_pfetch_offset = pf1_stride_too_small_offset + 1;
|
|
parameter pf2_rpt_stride_offset = pf2_gen_pfetch_offset + 1;
|
|
parameter pf2_data_ea_offset = pf2_rpt_stride_offset + 22;
|
|
parameter pf2_iar_offset = pf2_data_ea_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1);
|
|
parameter pf2_pf_state_offset = pf2_iar_offset + `PF_IFAR_WIDTH;
|
|
parameter pf2_burst_cnt_offset = pf2_pf_state_offset + 2;
|
|
parameter pf2_hits_offset = pf2_burst_cnt_offset + 2;
|
|
parameter pf2_rpt_pe_offset = pf2_hits_offset + 3;
|
|
parameter pf2_thrd_offset = pf2_rpt_pe_offset + 2;
|
|
parameter rpt_lru_offset = pf2_thrd_offset + `THREADS;
|
|
parameter pfq_rd_ptr_offset = rpt_lru_offset + 32;
|
|
parameter pf3_stride_offset = pfq_rd_ptr_offset + `PFETCH_Q_SIZE_ENC;
|
|
parameter pf3_req_addr_offset = pf3_stride_offset + 22;
|
|
parameter pf3_req_val_offset = pf3_req_addr_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1);
|
|
parameter pf3_thrd_offset = pf3_req_val_offset + 1;
|
|
|
|
parameter scan_right = pf3_thrd_offset + `THREADS - 1;
|
|
|
|
wire tiup;
|
|
wire tidn;
|
|
wire [0:scan_right] siv;
|
|
wire [0:scan_right] sov;
|
|
wire [0:31] value1;
|
|
wire [0:31] value2;
|
|
|
|
//!! Bugspray Include: lq_pfetch
|
|
|
|
assign tiup = 1'b1;
|
|
assign tidn = 1'b0;
|
|
assign value1 = 32'h00000001;
|
|
assign value2 = 32'h00000002;
|
|
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
// SPR for prefetch depth
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
|
|
generate
|
|
begin : xhdl0
|
|
genvar tid;
|
|
for (tid = 0; tid <= `THREADS - 1; tid = tid + 1)
|
|
begin : sprThrd
|
|
assign pf_dscr_reg[tid] = {spr_pf_spr_dscr_lsd[tid], spr_pf_spr_dscr_snse[tid], spr_pf_spr_dscr_sse[tid],
|
|
spr_pf_spr_dscr_dpfd[tid * 3:(tid * 3) + 2]};
|
|
assign pfetch_dis_thrd[tid] = pf_dscr_reg[tid][58] | (pf_dscr_reg[tid][61:62] == 2'b00);
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
|
|
always @(*)
|
|
begin: tid_pd_dis_p
|
|
reg pf_dis;
|
|
reg ex6_dis;
|
|
reg [58:63] pf_dscr;
|
|
integer tid;
|
|
|
|
ex6_dis = 1'b0;
|
|
pf_dis = 1'b0;
|
|
pf_dscr = {6{1'b0}};
|
|
for (tid = 0; tid <= `THREADS - 1; tid = tid + 1)
|
|
begin
|
|
ex6_dis = (pfetch_dis_thrd[tid] & ex6_thrd_q[tid]) | ex6_dis;
|
|
pf_dis = (pfetch_dis_thrd[tid] & pf1_thrd_q[tid]) | pf_dis;
|
|
pf_dscr = (pf_dscr_reg[tid] & {6{pf1_thrd_q[tid]}}) | pf_dscr;
|
|
end
|
|
ex6_pf_disable = ex6_dis;
|
|
pf1_disable = pf_dis;
|
|
pf1_dscr = pf_dscr;
|
|
end
|
|
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
// latch iu signals before using
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
|
|
assign rv_i0_vld_d = rv_lq_rv1_i0_vld;
|
|
assign rv_i1_vld_d = rv_lq_rv1_i1_vld;
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0), .NEEDS_SRESET(1)) rv_i0_vld_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[rv_i0_vld_offset:rv_i0_vld_offset + `THREADS - 1]),
|
|
.scout(sov[rv_i0_vld_offset:rv_i0_vld_offset + `THREADS - 1]),
|
|
.din(rv_i0_vld_d),
|
|
.dout(rv_i0_vld_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) rv_i0_isLoad_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[rv_i0_isLoad_offset:rv_i0_isLoad_offset]),
|
|
.scout(sov[rv_i0_isLoad_offset:rv_i0_isLoad_offset]),
|
|
.din(rv_lq_rv1_i0_isLoad),
|
|
.dout(rv_i0_isLoad_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) rv_i0_rte_lq_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[rv_i0_rte_lq_offset:rv_i0_rte_lq_offset]),
|
|
.scout(sov[rv_i0_rte_lq_offset:rv_i0_rte_lq_offset]),
|
|
.din(rv_lq_rv1_i0_rte_lq),
|
|
.dout(rv_i0_rte_lq_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`PF_IFAR_WIDTH), .INIT(0), .NEEDS_SRESET(1)) rv_i0_ifar_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[rv_i0_ifar_offset:rv_i0_ifar_offset + `PF_IFAR_WIDTH - 1]),
|
|
.scout(sov[rv_i0_ifar_offset:rv_i0_ifar_offset + `PF_IFAR_WIDTH - 1]),
|
|
.din(rv_lq_rv1_i0_ifar),
|
|
.dout(rv_i0_ifar_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`ITAG_SIZE_ENC), .INIT(0), .NEEDS_SRESET(1)) rv_i0_itag_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[rv_i0_itag_offset:rv_i0_itag_offset + `ITAG_SIZE_ENC - 1]),
|
|
.scout(sov[rv_i0_itag_offset:rv_i0_itag_offset + `ITAG_SIZE_ENC - 1]),
|
|
.din(rv_lq_rv1_i0_itag),
|
|
.dout(rv_i0_itag_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0), .NEEDS_SRESET(1)) rv_i1_vld_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[rv_i1_vld_offset:rv_i1_vld_offset + `THREADS - 1]),
|
|
.scout(sov[rv_i1_vld_offset:rv_i1_vld_offset + `THREADS - 1]),
|
|
.din(rv_i1_vld_d),
|
|
.dout(rv_i1_vld_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) rv_i1_isLoad_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[rv_i1_isLoad_offset:rv_i1_isLoad_offset]),
|
|
.scout(sov[rv_i1_isLoad_offset:rv_i1_isLoad_offset]),
|
|
.din(rv_lq_rv1_i1_isLoad),
|
|
.dout(rv_i1_isLoad_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) rv_i1_rte_lq_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[rv_i1_rte_lq_offset:rv_i1_rte_lq_offset]),
|
|
.scout(sov[rv_i1_rte_lq_offset:rv_i1_rte_lq_offset]),
|
|
.din(rv_lq_rv1_i1_rte_lq),
|
|
.dout(rv_i1_rte_lq_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`PF_IFAR_WIDTH), .INIT(0), .NEEDS_SRESET(1)) rv_i1_ifar_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[rv_i1_ifar_offset:rv_i1_ifar_offset + `PF_IFAR_WIDTH - 1]),
|
|
.scout(sov[rv_i1_ifar_offset:rv_i1_ifar_offset + `PF_IFAR_WIDTH - 1]),
|
|
.din(rv_lq_rv1_i1_ifar),
|
|
.dout(rv_i1_ifar_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`ITAG_SIZE_ENC), .INIT(0), .NEEDS_SRESET(1)) rv_i1_itag_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[rv_i1_itag_offset:rv_i1_itag_offset + `ITAG_SIZE_ENC - 1]),
|
|
.scout(sov[rv_i1_itag_offset:rv_i1_itag_offset + `ITAG_SIZE_ENC - 1]),
|
|
.din(rv_lq_rv1_i1_itag),
|
|
.dout(rv_i1_itag_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0), .NEEDS_SRESET(1)) cp_flush_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[cp_flush_offset:cp_flush_offset + `THREADS - 1]),
|
|
.scout(sov[cp_flush_offset:cp_flush_offset + `THREADS - 1]),
|
|
.din(iu_lq_cp_flush),
|
|
.dout(cp_flush_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0), .NEEDS_SRESET(1)) cp_flush2_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[cp_flush2_offset:cp_flush2_offset + `THREADS - 1]),
|
|
.scout(sov[cp_flush2_offset:cp_flush2_offset + `THREADS - 1]),
|
|
.din(cp_flush_q),
|
|
.dout(cp_flush2_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0), .NEEDS_SRESET(1)) cp_flush3_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[cp_flush3_offset:cp_flush3_offset + `THREADS - 1]),
|
|
.scout(sov[cp_flush3_offset:cp_flush3_offset + `THREADS - 1]),
|
|
.din(cp_flush2_q),
|
|
.dout(cp_flush3_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0), .NEEDS_SRESET(1)) cp_flush4_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[cp_flush4_offset:cp_flush4_offset + `THREADS - 1]),
|
|
.scout(sov[cp_flush4_offset:cp_flush4_offset + `THREADS - 1]),
|
|
.din(cp_flush3_q),
|
|
.dout(cp_flush4_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) inj_pfetch_parity_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[inj_pfetch_parity_offset:inj_pfetch_parity_offset]),
|
|
.scout(sov[inj_pfetch_parity_offset:inj_pfetch_parity_offset]),
|
|
.din(pc_lq_inj_prefetcher_parity),
|
|
.dout(inj_pfetch_parity_q)
|
|
);
|
|
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
// Save iar and itag from dispatch
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
|
|
assign new_itag_i0_val = |(rv_i0_vld_q & (~(cp_flush_q | cp_flush2_q | cp_flush3_q | cp_flush4_q))) & rv_i0_rte_lq_q & rv_i0_isLoad_q;
|
|
assign new_itag_i1_val = |(rv_i1_vld_q & (~(cp_flush_q | cp_flush2_q | cp_flush3_q | cp_flush4_q))) & rv_i1_rte_lq_q & rv_i1_isLoad_q;
|
|
|
|
assign pf_iar_i0_wen[0] = new_itag_i0_val & (pf_iar_tbl_val_q[0] == 1'b0);
|
|
assign pf_iar_i0_wen[1] = new_itag_i0_val & (pf_iar_tbl_val_q[0:1] == 2'b10);
|
|
|
|
generate
|
|
begin : xhdl1
|
|
genvar i;
|
|
for (i = 2; i <= `LDSTQ_ENTRIES - 1; i = i + 1)
|
|
begin : pf_iar_i0_wen_gen
|
|
assign pf_iar_i0_wen[i] = new_itag_i0_val & &(pf_iar_tbl_val_q[0:i - 1]) & (pf_iar_tbl_val_q[i] == 1'b0);
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
assign pf_iar_val_for_i1 = pf_iar_tbl_val_q | pf_iar_i0_wen;
|
|
|
|
assign pf_iar_i1_wen[0] = new_itag_i1_val & (pf_iar_val_for_i1[0] == 1'b0);
|
|
assign pf_iar_i1_wen[1] = new_itag_i1_val & (pf_iar_val_for_i1[0:1] == 2'b10);
|
|
|
|
generate
|
|
begin : xhdl2
|
|
genvar i;
|
|
for (i = 2; i <= `LDSTQ_ENTRIES - 1; i = i + 1)
|
|
begin : pf_iar_i1_wen_gen
|
|
assign pf_iar_i1_wen[i] = new_itag_i1_val & &(pf_iar_val_for_i1[0:i - 1]) & (pf_iar_val_for_i1[i] == 1'b0);
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
// latch itag report from odq
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) odq_resolved_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[odq_resolved_offset:odq_resolved_offset]),
|
|
.scout(sov[odq_resolved_offset:odq_resolved_offset]),
|
|
.din(odq_pf_resolved),
|
|
.dout(odq_resolved_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`ITAG_SIZE_ENC), .INIT(0), .NEEDS_SRESET(1)) odq_report_itag_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[odq_report_itag_offset:odq_report_itag_offset + `ITAG_SIZE_ENC - 1]),
|
|
.scout(sov[odq_report_itag_offset:odq_report_itag_offset + `ITAG_SIZE_ENC - 1]),
|
|
.din(odq_pf_report_itag),
|
|
.dout(odq_report_itag_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0), .NEEDS_SRESET(1)) odq_report_tid_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[odq_report_tid_offset:odq_report_tid_offset + `THREADS - 1]),
|
|
.scout(sov[odq_report_tid_offset:odq_report_tid_offset + `THREADS - 1]),
|
|
.din(odq_pf_report_tid),
|
|
.dout(odq_report_tid_q)
|
|
);
|
|
|
|
generate
|
|
begin : xhdl3
|
|
genvar i;
|
|
for (i = 0; i <= `LDSTQ_ENTRIES - 1; i = i + 1)
|
|
begin : done_itag_match_gen
|
|
assign pf_iar_tbl_reset[i] = (odq_report_itag_q == pf_itag_tbl_q[i]) &
|
|
|(odq_report_tid_q & pf_tid_tbl_q[i]) &
|
|
odq_resolved_q & pf_iar_tbl_val_q[i];
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
generate
|
|
begin : xhdl4
|
|
genvar i;
|
|
for (i = 0; i <= `LDSTQ_ENTRIES - 1; i = i + 1)
|
|
begin : pf_iar_table
|
|
|
|
assign pf_itag_tbl_act[i] = pf_iar_i0_wen[i] | pf_iar_i1_wen[i] | pf_iar_tbl_reset[i] | |(cp_flush_q);
|
|
|
|
assign pf_iar_tbl_d[i] = (pf_iar_i0_wen[i] == 1'b1) ? rv_i0_ifar_q :
|
|
(pf_iar_i1_wen[i] == 1'b1) ? rv_i1_ifar_q :
|
|
pf_iar_tbl_q[i];
|
|
assign pf_itag_tbl_d[i] = (pf_iar_i0_wen[i] == 1'b1) ? rv_i0_itag_q :
|
|
(pf_iar_i1_wen[i] == 1'b1) ? rv_i1_itag_q :
|
|
pf_itag_tbl_q[i];
|
|
assign pf_tid_tbl_d[i] = (pf_iar_i0_wen[i] == 1'b1) ? rv_i0_vld_q :
|
|
(pf_iar_i1_wen[i] == 1'b1) ? rv_i1_vld_q :
|
|
pf_tid_tbl_q[i];
|
|
|
|
assign pf_iar_tbl_val_d[i] = pf_iar_i0_wen[i] | pf_iar_i1_wen[i] |
|
|
(pf_iar_tbl_val_q[i] & (~(|(pf_tid_tbl_q[i] & cp_flush_q) | pf_iar_tbl_reset[i])));
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`PF_IFAR_WIDTH), .INIT(0), .NEEDS_SRESET(1)) pf_iar_tbl_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(pf_itag_tbl_act[i]),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pf_iar_tbl_offset + `PF_IFAR_WIDTH * i:pf_iar_tbl_offset + `PF_IFAR_WIDTH * (i + 1) - 1]),
|
|
.scout(sov[pf_iar_tbl_offset + `PF_IFAR_WIDTH * i:pf_iar_tbl_offset + `PF_IFAR_WIDTH * (i + 1) - 1]),
|
|
.din(pf_iar_tbl_d[i]),
|
|
.dout(pf_iar_tbl_q[i])
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`ITAG_SIZE_ENC), .INIT(0), .NEEDS_SRESET(1)) pf_itag_tbl_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(pf_itag_tbl_act[i]),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pf_itag_tbl_offset + `ITAG_SIZE_ENC * i:pf_itag_tbl_offset + `ITAG_SIZE_ENC * (i + 1) - 1]),
|
|
.scout(sov[pf_itag_tbl_offset + `ITAG_SIZE_ENC * i:pf_itag_tbl_offset + `ITAG_SIZE_ENC * (i + 1) - 1]),
|
|
.din(pf_itag_tbl_d[i]),
|
|
.dout(pf_itag_tbl_q[i])
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0), .NEEDS_SRESET(1)) pf_tid_tbl_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(pf_itag_tbl_act[i]),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pf_tid_tbl_offset + `THREADS * i:pf_tid_tbl_offset + `THREADS * (i + 1) - 1]),
|
|
.scout(sov[pf_tid_tbl_offset + `THREADS * i:pf_tid_tbl_offset + `THREADS * (i + 1) - 1]),
|
|
.din(pf_tid_tbl_d[i]),
|
|
.dout(pf_tid_tbl_q[i])
|
|
);
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`LDSTQ_ENTRIES), .INIT(0)) latch_pf_iar_tbl_val(
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf_iar_tbl_val_offset:pf_iar_tbl_val_offset + `LDSTQ_ENTRIES - 1]),
|
|
.scout(sov[pf_iar_tbl_val_offset:pf_iar_tbl_val_offset + `LDSTQ_ENTRIES - 1]),
|
|
.din(pf_iar_tbl_val_d),
|
|
.dout(pf_iar_tbl_val_q)
|
|
);
|
|
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
// lookup iar from itag-iar table
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
|
|
generate
|
|
begin : xhdl5
|
|
genvar i;
|
|
for (i = 0; i <= `LDSTQ_ENTRIES - 1; i = i + 1)
|
|
begin : new_itag_match_gen
|
|
assign ex5_itag_match[i] = (dcc_pf_ex5_itag == pf_itag_tbl_q[i]) &
|
|
|(dcc_pf_ex5_thrd_id & pf_tid_tbl_q[i]) &
|
|
pf_iar_tbl_val_q[i];
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
always @(*)
|
|
begin: ex5_iar_proc
|
|
reg [61-`PF_IFAR_WIDTH+1:61] iar;
|
|
integer i;
|
|
iar = {61-(61-`PF_IFAR_WIDTH+1)+1{1'b0}};
|
|
for (i = 0; i <= `LDSTQ_ENTRIES - 1; i = i + 1)
|
|
iar = ({`PF_IFAR_WIDTH{ex5_itag_match[i]}} & pf_iar_tbl_q[i]) | iar;
|
|
|
|
ex5_iar = iar;
|
|
end
|
|
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
// stage out signals to ex7
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
|
|
assign ex6_pf_act = dcc_pf_ex5_act | ex6_req_val_4pf_q;
|
|
assign ex7_pf_act = (ex6_req_val_4pf_q & (~ex6_pf_disable)) | ex7_req_val_4pf_q;
|
|
assign ex8_pf_act = ex7_req_val_4pf_q | ex8_req_val_4pf_q;
|
|
assign pf1_act = ex8_req_val_4pf_q | pf1_req_val_4pf_q;
|
|
assign pf2_act = pf1_req_val_4pf_q | pf2_req_val_4pf_q;
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`PF_IFAR_WIDTH), .INIT(0), .NEEDS_SRESET(1)) ex6_iar_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex6_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex6_iar_offset:ex6_iar_offset + `PF_IFAR_WIDTH - 1]),
|
|
.scout(sov[ex6_iar_offset:ex6_iar_offset + `PF_IFAR_WIDTH - 1]),
|
|
.din(ex5_iar),
|
|
.dout(ex6_iar_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`PF_IFAR_WIDTH), .INIT(0), .NEEDS_SRESET(1)) ex7_iar_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex7_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex7_iar_offset:ex7_iar_offset + `PF_IFAR_WIDTH - 1]),
|
|
.scout(sov[ex7_iar_offset:ex7_iar_offset + `PF_IFAR_WIDTH - 1]),
|
|
.din(ex6_iar_q),
|
|
.dout(ex7_iar_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`PF_IFAR_WIDTH), .INIT(0), .NEEDS_SRESET(1)) ex8_iar_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex8_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex8_iar_offset:ex8_iar_offset + `PF_IFAR_WIDTH - 1]),
|
|
.scout(sov[ex8_iar_offset:ex8_iar_offset + `PF_IFAR_WIDTH - 1]),
|
|
.din(ex7_iar_q),
|
|
.dout(ex8_iar_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0), .NEEDS_SRESET(1)) ex6_thrd_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex6_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex6_thrd_offset:ex6_thrd_offset + `THREADS - 1]),
|
|
.scout(sov[ex6_thrd_offset:ex6_thrd_offset + `THREADS - 1]),
|
|
.din(dcc_pf_ex5_thrd_id),
|
|
.dout(ex6_thrd_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0), .NEEDS_SRESET(1)) ex7_thrd_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex7_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex7_thrd_offset:ex7_thrd_offset + `THREADS - 1]),
|
|
.scout(sov[ex7_thrd_offset:ex7_thrd_offset + `THREADS - 1]),
|
|
.din(ex6_thrd_q),
|
|
.dout(ex7_thrd_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0), .NEEDS_SRESET(1)) ex8_thrd_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex8_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex8_thrd_offset:ex8_thrd_offset + `THREADS - 1]),
|
|
.scout(sov[ex8_thrd_offset:ex8_thrd_offset + `THREADS - 1]),
|
|
.din(ex7_thrd_q),
|
|
.dout(ex8_thrd_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH((59-(64-(2**`GPR_WIDTH_ENC))+1)), .INIT(0), .NEEDS_SRESET(1)) ex6_eff_addr_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex6_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex6_eff_addr_offset:ex6_eff_addr_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) - 1]),
|
|
.scout(sov[ex6_eff_addr_offset:ex6_eff_addr_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) - 1]),
|
|
.din(dcc_pf_ex5_eff_addr),
|
|
.dout(ex6_eff_addr_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH((59-(64-(2**`GPR_WIDTH_ENC))+1)), .INIT(0), .NEEDS_SRESET(1)) ex7_eff_addr_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex7_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex7_eff_addr_offset:ex7_eff_addr_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) - 1]),
|
|
.scout(sov[ex7_eff_addr_offset:ex7_eff_addr_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) - 1]),
|
|
.din(ex6_eff_addr_q),
|
|
.dout(ex7_eff_addr_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH((59-(64-(2**`GPR_WIDTH_ENC))+1)), .INIT(0), .NEEDS_SRESET(1)) ex8_eff_addr_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex8_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex8_eff_addr_offset:ex8_eff_addr_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) - 1]),
|
|
.scout(sov[ex8_eff_addr_offset:ex8_eff_addr_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) - 1]),
|
|
.din(ex7_eff_addr_q),
|
|
.dout(ex8_eff_addr_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) ex6_req_val_4pf_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex6_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex6_req_val_4pf_offset:ex6_req_val_4pf_offset]),
|
|
.scout(sov[ex6_req_val_4pf_offset:ex6_req_val_4pf_offset]),
|
|
.din(dcc_pf_ex5_req_val_4pf),
|
|
.dout(ex6_req_val_4pf_q)
|
|
);
|
|
|
|
assign ex7_req_val_4pf_d = ex6_req_val_4pf_q & (~ex6_pf_disable);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) ex7_req_val_4pf_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex7_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex7_req_val_4pf_offset:ex7_req_val_4pf_offset]),
|
|
.scout(sov[ex7_req_val_4pf_offset:ex7_req_val_4pf_offset]),
|
|
.din(ex7_req_val_4pf_d),
|
|
.dout(ex7_req_val_4pf_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) ex8_req_val_4pf_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex8_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex8_req_val_4pf_offset:ex8_req_val_4pf_offset]),
|
|
.scout(sov[ex8_req_val_4pf_offset:ex8_req_val_4pf_offset]),
|
|
.din(ex7_req_val_4pf_q),
|
|
.dout(ex8_req_val_4pf_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) pf1_req_val_4pf_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(pf1_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pf1_req_val_4pf_offset:pf1_req_val_4pf_offset]),
|
|
.scout(sov[pf1_req_val_4pf_offset:pf1_req_val_4pf_offset]),
|
|
.din(ex8_req_val_4pf_q),
|
|
.dout(pf1_req_val_4pf_q)
|
|
);
|
|
|
|
assign pf2_req_val_4pf_d = pf1_req_val_4pf_q & (~pf1_stride_too_small_q);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) pf2_req_val_4pf_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(pf2_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pf2_req_val_4pf_offset:pf2_req_val_4pf_offset]),
|
|
.scout(sov[pf2_req_val_4pf_offset:pf2_req_val_4pf_offset]),
|
|
.din(pf2_req_val_4pf_d),
|
|
.dout(pf2_req_val_4pf_q)
|
|
);
|
|
|
|
assign ex5_valid_loadmiss = dcc_pf_ex5_loadmiss & dcc_pf_ex5_req_val_4pf;
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) ex6_loadmiss_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex6_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex6_loadmiss_offset:ex6_loadmiss_offset]),
|
|
.scout(sov[ex6_loadmiss_offset:ex6_loadmiss_offset]),
|
|
.din(ex5_valid_loadmiss),
|
|
.dout(ex6_loadmiss_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) ex7_loadmiss_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex7_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex7_loadmiss_offset:ex7_loadmiss_offset]),
|
|
.scout(sov[ex7_loadmiss_offset:ex7_loadmiss_offset]),
|
|
.din(ex6_loadmiss_q),
|
|
.dout(ex7_loadmiss_q)
|
|
);
|
|
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
// lookup entry in RPT (Reference Predictor Table)
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
|
|
assign rpt_rd_addr = ex5_iar[57:61];
|
|
|
|
tri_32x70_2w_1r1w rpt(
|
|
// POWER PINS
|
|
.gnd(gnd),
|
|
.vdd(vdd),
|
|
.vcs(vcs),
|
|
// CLOCK and CLOCKCONTROL ports
|
|
.nclk(nclk),
|
|
.rd_act(rpt_rd_act[0:1]),
|
|
.wr_act(rpt_wen[0:1]),
|
|
.sg_0(sg_0),
|
|
.abst_sl_thold_0(abst_sl_thold_0),
|
|
.ary_nsl_thold_0(ary_nsl_thold_0),
|
|
.time_sl_thold_0(time_sl_thold_0),
|
|
.repr_sl_thold_0(repr_sl_thold_0),
|
|
.func_sl_force(func_sl_force),
|
|
.func_sl_thold_0_b(func_sl_thold_0_b),
|
|
.g8t_clkoff_dc_b(g8t_clkoff_dc_b),
|
|
.ccflush_dc(pc_lq_ccflush_dc),
|
|
.scan_dis_dc_b(an_ac_scan_dis_dc_b),
|
|
.scan_diag_dc(an_ac_scan_diag_dc),
|
|
.g8t_d_mode_dc(g8t_d_mode_dc),
|
|
.g8t_mpw1_dc_b(g8t_mpw1_dc_b[0:4]),
|
|
.g8t_mpw2_dc_b(g8t_mpw2_dc_b),
|
|
.g8t_delay_lclkr_dc(g8t_delay_lclkr_dc[0:4]),
|
|
.d_mode_dc(d_mode_dc),
|
|
.mpw1_dc_b(mpw1_dc_b),
|
|
.mpw2_dc_b(mpw2_dc_b),
|
|
.delay_lclkr_dc(delay_lclkr_dc),
|
|
// ABIST
|
|
.wr_abst_act(pc_xu_abist_g8t_wenb_q),
|
|
.rd0_abst_act(pc_xu_abist_g8t1p_renb_0_q),
|
|
.abist_di(pc_xu_abist_di_0_q[0:3]),
|
|
.abist_bw_odd(pc_xu_abist_g8t_bw_1_q),
|
|
.abist_bw_even(pc_xu_abist_g8t_bw_0_q),
|
|
.abist_wr_adr(pc_xu_abist_waddr_0_q[0:4]),
|
|
.abist_rd0_adr(pc_xu_abist_raddr_0_q[0:4]),
|
|
.tc_lbist_ary_wrt_thru_dc(an_ac_lbist_ary_wrt_thru_dc),
|
|
.abist_ena_1(pc_xu_abist_ena_dc),
|
|
.abist_g8t_rd0_comp_ena(pc_xu_abist_wl64_comp_ena_q),
|
|
.abist_raw_dc_b(pc_xu_abist_raw_dc_b),
|
|
.obs0_abist_cmp(pc_xu_abist_g8t_dcomp_q[0:3]),
|
|
// Scan
|
|
.abst_scan_in(abst_scan_in[0:1]),
|
|
.time_scan_in(time_scan_in),
|
|
.repr_scan_in(repr_scan_in),
|
|
.func_scan_in(rpt_func_scan_in),
|
|
.abst_scan_out(abst_scan_out[0:1]),
|
|
.time_scan_out(time_scan_out),
|
|
.repr_scan_out(repr_scan_out),
|
|
.func_scan_out(rpt_func_scan_out),
|
|
// BOLT-ON
|
|
.lcb_bolt_sl_thold_0(bolt_sl_thold_0),
|
|
.pc_bo_enable_2(pc_bo_enable_2), // general bolt-on enable
|
|
.pc_bo_reset(pc_xu_bo_reset), // reset
|
|
.pc_bo_unload(pc_xu_bo_unload), // unload sticky bits
|
|
.pc_bo_repair(pc_xu_bo_repair), // execute sticky bit decode
|
|
.pc_bo_shdata(pc_xu_bo_shdata), // shift data for timing write and diag loop
|
|
.pc_bo_select(pc_xu_bo_select[0:1]), // select for mask and hier writes
|
|
.bo_pc_failout(xu_pc_bo_fail[0:1]), // fail/no-fix reg
|
|
.bo_pc_diagloop(xu_pc_bo_diagout[0:1]),
|
|
.tri_lcb_mpw1_dc_b(mpw1_dc_b),
|
|
.tri_lcb_mpw2_dc_b(mpw2_dc_b),
|
|
.tri_lcb_delay_lclkr_dc(delay_lclkr_dc),
|
|
.tri_lcb_clkoff_dc_b(clkoff_dc_b),
|
|
.tri_lcb_act_dis_dc(tidn),
|
|
// Write Ports
|
|
.wr_way(rpt_wen[0:1]),
|
|
.wr_addr(rpt_wrt_addr[0:4]),
|
|
.data_in(rpt_data_in[0:69]),
|
|
// Read Ports
|
|
.rd_addr(rpt_rd_addr[0:4]),
|
|
.data_out(rpt_data_out[0:139])
|
|
);
|
|
|
|
// bypass around array when wrt addr equals read addr (and turn off rd act)
|
|
|
|
assign rpt_byp_val[0] = (rpt_rd_addr == rpt_wrt_addr) & rpt_wen[0];
|
|
assign rpt_byp_val[1] = (rpt_rd_addr == rpt_wrt_addr) & rpt_wen[1];
|
|
|
|
assign rpt_rd_act[0] = dcc_pf_ex5_act & ~rpt_byp_val[0] & ~(&(pfetch_dis_thrd));
|
|
assign rpt_rd_act[1] = dcc_pf_ex5_act & ~rpt_byp_val[1] & ~(&(pfetch_dis_thrd));
|
|
|
|
assign byp_act = rpt_wen[0] | rpt_wen[1];
|
|
assign byp1_act = |(byp_rpt_ary_q);
|
|
|
|
assign byp_rpt_ary_d = (~rpt_rd_act);
|
|
assign byp1_rpt_ary_d = byp_rpt_ary_q;
|
|
|
|
assign rpt_byp_dat_d = rpt_data_in[0:69];
|
|
assign rpt_byp_dat1_d = rpt_byp_dat_q[0:69];
|
|
|
|
assign ex7_rpt_entry0 = (byp1_rpt_ary_q[0] == 1'b1) ? rpt_byp_dat1_q[0:69] :
|
|
rpt_data_out[0:69];
|
|
|
|
assign ex7_rpt_entry1 = (byp1_rpt_ary_q[1] == 1'b1) ? rpt_byp_dat1_q[0:69] :
|
|
rpt_data_out[70:139];
|
|
|
|
assign ex7_rpt_entry0_par[57 + `THREADS] = ^(ex7_rpt_entry0[0:7]);
|
|
assign ex7_rpt_entry0_par[58 + `THREADS] = ^(ex7_rpt_entry0[8:15]);
|
|
assign ex7_rpt_entry0_par[59 + `THREADS] = ^(ex7_rpt_entry0[16:23]);
|
|
assign ex7_rpt_entry0_par[60 + `THREADS] = ^(ex7_rpt_entry0[24:31]);
|
|
assign ex7_rpt_entry0_par[61 + `THREADS] = ^(ex7_rpt_entry0[32:39]);
|
|
assign ex7_rpt_entry0_par[62 + `THREADS] = ^(ex7_rpt_entry0[40:47]);
|
|
assign ex7_rpt_entry0_par[63 + `THREADS] = ^(ex7_rpt_entry0[48:55]);
|
|
assign ex7_rpt_entry0_par[64 + `THREADS] = ^(ex7_rpt_entry0[56:57 + `THREADS - 1]);
|
|
|
|
assign ex7_rpt_entry1_par[57 + `THREADS] = ^(ex7_rpt_entry1[0:7]);
|
|
assign ex7_rpt_entry1_par[58 + `THREADS] = ^(ex7_rpt_entry1[8:15]);
|
|
assign ex7_rpt_entry1_par[59 + `THREADS] = ^(ex7_rpt_entry1[16:23]);
|
|
assign ex7_rpt_entry1_par[60 + `THREADS] = ^(ex7_rpt_entry1[24:31]);
|
|
assign ex7_rpt_entry1_par[61 + `THREADS] = ^(ex7_rpt_entry1[32:39]);
|
|
assign ex7_rpt_entry1_par[62 + `THREADS] = ^(ex7_rpt_entry1[40:47]);
|
|
assign ex7_rpt_entry1_par[63 + `THREADS] = ^(ex7_rpt_entry1[48:55]);
|
|
assign ex7_rpt_entry1_par[64 + `THREADS] = ^(ex7_rpt_entry1[56:57 + `THREADS - 1]);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0), .NEEDS_SRESET(1)) byp_rpt_ary_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[byp_rpt_ary_offset:byp_rpt_ary_offset + 2 - 1]),
|
|
.scout(sov[byp_rpt_ary_offset:byp_rpt_ary_offset + 2 - 1]),
|
|
.din(byp_rpt_ary_d),
|
|
.dout(byp_rpt_ary_q)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0), .NEEDS_SRESET(1)) byp1_rpt_ary_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[byp1_rpt_ary_offset:byp1_rpt_ary_offset + 2 - 1]),
|
|
.scout(sov[byp1_rpt_ary_offset:byp1_rpt_ary_offset + 2 - 1]),
|
|
.din(byp1_rpt_ary_d),
|
|
.dout(byp1_rpt_ary_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(70), .INIT(0), .NEEDS_SRESET(1)) rpt_byp_dat_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(byp_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[rpt_byp_dat_offset:rpt_byp_dat_offset + 70 - 1]),
|
|
.scout(sov[rpt_byp_dat_offset:rpt_byp_dat_offset + 70 - 1]),
|
|
.din(rpt_byp_dat_d),
|
|
.dout(rpt_byp_dat_q)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(70), .INIT(0), .NEEDS_SRESET(1)) rpt_byp_dat1_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(byp1_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[rpt_byp_dat1_offset:rpt_byp_dat1_offset + 70 - 1]),
|
|
.scout(sov[rpt_byp_dat1_offset:rpt_byp_dat1_offset + 70 - 1]),
|
|
.din(rpt_byp_dat1_d),
|
|
.dout(rpt_byp_dat1_q)
|
|
);
|
|
|
|
|
|
|
|
assign ex7_rpt_entry0_pe = |(ex7_rpt_entry0_par ^ ex7_rpt_entry0[57 + `THREADS:57 + `THREADS + 7]);
|
|
assign ex7_rpt_entry1_pe = |(ex7_rpt_entry1_par ^ ex7_rpt_entry1[57 + `THREADS:57 + `THREADS + 7]);
|
|
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
// Check entry hit/miss and create new entry
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
|
|
assign entry0_hit = ex7_rpt_entry0[0] & ex7_req_val_4pf_q & (~ex7_rpt_entry0_pe) &
|
|
(ex7_iar_q[50:56] == ex7_rpt_entry0[45:51]) &
|
|
(ex7_thrd_q == ex7_rpt_entry0[57:57 + `THREADS - 1]);
|
|
|
|
assign entry1_hit = ex7_rpt_entry1[0] & ex7_req_val_4pf_q & (~ex7_rpt_entry1_pe) &
|
|
(ex7_iar_q[50:56] == ex7_rpt_entry1[45:51]) &
|
|
(ex7_thrd_q == ex7_rpt_entry1[57:57 + `THREADS - 1]);
|
|
|
|
assign new_rpt_entry[0] = 1'b1; // valid bit
|
|
assign new_rpt_entry[1:22] = ex7_eff_addr_q[38:59]; // last data address
|
|
assign new_rpt_entry[23:44] = {22{1'b0}}; // stride
|
|
assign new_rpt_entry[45:51] = ex7_iar_q[50:56]; // iar tag
|
|
assign new_rpt_entry[52:53] = 2'b01; // prefetch state
|
|
assign new_rpt_entry[54:55] = 2'b00; // burst counter
|
|
assign new_rpt_entry[56] = 1'b0; // duplicate flag
|
|
|
|
assign ex7_rpt_entry_mux = (entry0_hit == 1'b1) ? ex7_rpt_entry0[0:56] :
|
|
(entry1_hit == 1'b1) ? ex7_rpt_entry1[0:56] :
|
|
new_rpt_entry;
|
|
|
|
assign ex8_pf_hits_d = {entry0_hit, entry1_hit, ex7_loadmiss_q};
|
|
|
|
assign ex8_rpt_pe_d = {ex7_rpt_entry0_pe, ex7_rpt_entry1_pe};
|
|
|
|
assign ex8_pfetch_pe_d = ex7_req_val_4pf_q & (ex7_rpt_entry0_pe | ex7_rpt_entry1_pe);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(22), .INIT(0), .NEEDS_SRESET(1)) ex8_last_dat_addr_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex8_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex8_last_dat_addr_offset:ex8_last_dat_addr_offset + 22 - 1]),
|
|
.scout(sov[ex8_last_dat_addr_offset:ex8_last_dat_addr_offset + 22 - 1]),
|
|
.din(ex7_rpt_entry_mux[1:22]),
|
|
.dout(ex8_last_dat_addr_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(22), .INIT(0), .NEEDS_SRESET(1)) ex8_stride_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex8_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex8_stride_offset:ex8_stride_offset + 22 - 1]),
|
|
.scout(sov[ex8_stride_offset:ex8_stride_offset + 22 - 1]),
|
|
.din(ex7_rpt_entry_mux[23:44]),
|
|
.dout(ex8_stride_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0), .NEEDS_SRESET(1)) ex8_pf_state_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex8_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex8_pf_state_offset:ex8_pf_state_offset + 2 - 1]),
|
|
.scout(sov[ex8_pf_state_offset:ex8_pf_state_offset + 2 - 1]),
|
|
.din(ex7_rpt_entry_mux[52:53]),
|
|
.dout(ex8_pf_state_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0), .NEEDS_SRESET(1)) ex8_burst_cnt_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex8_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex8_burst_cnt_offset:ex8_burst_cnt_offset + 2 - 1]),
|
|
.scout(sov[ex8_burst_cnt_offset:ex8_burst_cnt_offset + 2 - 1]),
|
|
.din(ex7_rpt_entry_mux[54:55]),
|
|
.dout(ex8_burst_cnt_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) ex8_dup_flag_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex8_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex8_dup_flag_offset:ex8_dup_flag_offset]),
|
|
.scout(sov[ex8_dup_flag_offset:ex8_dup_flag_offset]),
|
|
.din(ex7_rpt_entry_mux[56]),
|
|
.dout(ex8_dup_flag_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(3), .INIT(0), .NEEDS_SRESET(1)) ex8_pf_hits_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex8_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex8_pf_hits_offset:ex8_pf_hits_offset + 3 - 1]),
|
|
.scout(sov[ex8_pf_hits_offset:ex8_pf_hits_offset + 3 - 1]),
|
|
.din(ex8_pf_hits_d),
|
|
.dout(ex8_pf_hits_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0), .NEEDS_SRESET(1)) ex8_rpt_pe_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex8_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex8_rpt_pe_offset:ex8_rpt_pe_offset + 2 - 1]),
|
|
.scout(sov[ex8_rpt_pe_offset:ex8_rpt_pe_offset + 2 - 1]),
|
|
.din(ex8_rpt_pe_d),
|
|
.dout(ex8_rpt_pe_q)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0), .NEEDS_SRESET(1)) ex8_pfetch_pe_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(ex8_pf_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[ex8_pfetch_pe_offset:ex8_pfetch_pe_offset]),
|
|
.scout(sov[ex8_pfetch_pe_offset:ex8_pfetch_pe_offset]),
|
|
.din(ex8_pfetch_pe_d),
|
|
.dout(ex8_pfetch_pe_q)
|
|
);
|
|
|
|
|
|
tri_direct_err_rpt #(.WIDTH(1)) pfetch_err_rpt(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.err_in(ex8_pfetch_pe_q),
|
|
.err_out(lq_pc_err_prefetcher_parity)
|
|
);
|
|
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
// Compute new Stride
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
|
|
assign new_stride_prelim = ex8_eff_addr_q[38:59] - ex8_last_dat_addr_q[38:59]; // Data EA - last address
|
|
|
|
assign same_cline = ex8_eff_addr_q[38:57] == ex8_last_dat_addr_q[38:57];
|
|
|
|
// transaction dropped if stride is not at least half a cache line (stride of 0, 1 or -1)
|
|
assign stride_too_small = ((new_stride_prelim == {20'h00000, 2'b00}) | // 0
|
|
(new_stride_prelim == {20'h00000, 2'b01}) | // +1 (+16 bytes)
|
|
(new_stride_prelim == {20'hFFFFF, 2'b11})) & // -1 (-16 bytes)
|
|
(ex8_pf_hits_q[0] | ex8_pf_hits_q[1]); // hit on either entry 0 or 1
|
|
|
|
//if stride is less than a cache line, round up
|
|
|
|
assign stride_lessthan_cline_pos = (new_stride_prelim == {20'h00000, 2'b10}) | // +2 (+32 bytes)
|
|
(new_stride_prelim == {20'h00000, 2'b11}); // +3 (+48 bytes)
|
|
|
|
|
|
assign stride_lessthan_cline_neg = (new_stride_prelim == {20'hFFFFF, 2'b10}) | // -2 (-32 bytes)
|
|
(new_stride_prelim == {20'hFFFFF, 2'b01}); // -3 (-48 bytes)
|
|
|
|
assign pf1_new_stride_d = (stride_lessthan_cline_pos == 1'b1) ? {20'h00001, 2'b00} :
|
|
(stride_lessthan_cline_neg == 1'b1) ? {20'hFFFFF, 2'b00} :
|
|
new_stride_prelim;
|
|
|
|
assign pf1_iar_d = ex8_iar_q;
|
|
assign pf1_data_ea_d = ex8_eff_addr_q;
|
|
assign pf1_pf_state_d = ex8_pf_state_q;
|
|
assign pf1_burst_cnt_d = ex8_burst_cnt_q;
|
|
assign pf1_dup_flag_d = ex8_dup_flag_q;
|
|
assign pf1_hits_d = ex8_pf_hits_q;
|
|
assign pf1_thrd_d = ex8_thrd_q;
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(22), .INIT(0)) latch_pf1_new_stride(
|
|
.nclk(nclk),
|
|
.act(pf1_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf1_new_stride_offset:pf1_new_stride_offset + 22 - 1]),
|
|
.scout(sov[pf1_new_stride_offset:pf1_new_stride_offset + 22 - 1]),
|
|
.din(pf1_new_stride_d),
|
|
.dout(pf1_new_stride_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(22), .INIT(0)) latch_pf1_rpt_stride(
|
|
.nclk(nclk),
|
|
.act(pf1_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf1_rpt_stride_offset:pf1_rpt_stride_offset + 22 - 1]),
|
|
.scout(sov[pf1_rpt_stride_offset:pf1_rpt_stride_offset + 22 - 1]),
|
|
.din(ex8_stride_q),
|
|
.dout(pf1_rpt_stride_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH((59-(64-(2**`GPR_WIDTH_ENC))+1)), .INIT(0)) latch_pf1_data_ea(
|
|
.nclk(nclk),
|
|
.act(pf1_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf1_data_ea_offset:pf1_data_ea_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) - 1]),
|
|
.scout(sov[pf1_data_ea_offset:pf1_data_ea_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) - 1]),
|
|
.din(pf1_data_ea_d),
|
|
.dout(pf1_data_ea_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`PF_IFAR_WIDTH), .INIT(0)) latch_pf1_iar(
|
|
.nclk(nclk),
|
|
.act(pf1_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf1_iar_offset:pf1_iar_offset + `PF_IFAR_WIDTH - 1]),
|
|
.scout(sov[pf1_iar_offset:pf1_iar_offset + `PF_IFAR_WIDTH - 1]),
|
|
.din(pf1_iar_d),
|
|
.dout(pf1_iar_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0)) latch_pf1_pf_state(
|
|
.nclk(nclk),
|
|
.act(pf1_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf1_pf_state_offset:pf1_pf_state_offset + 2 - 1]),
|
|
.scout(sov[pf1_pf_state_offset:pf1_pf_state_offset + 2 - 1]),
|
|
.din(pf1_pf_state_d),
|
|
.dout(pf1_pf_state_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0)) latch_pf1_burst_cnt(
|
|
.nclk(nclk),
|
|
.act(pf1_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf1_burst_cnt_offset:pf1_burst_cnt_offset + 2 - 1]),
|
|
.scout(sov[pf1_burst_cnt_offset:pf1_burst_cnt_offset + 2 - 1]),
|
|
.din(pf1_burst_cnt_d),
|
|
.dout(pf1_burst_cnt_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0)) latch_pf1_dup_flag(
|
|
.nclk(nclk),
|
|
.act(pf1_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf1_dup_flag_offset:pf1_dup_flag_offset]),
|
|
.scout(sov[pf1_dup_flag_offset:pf1_dup_flag_offset]),
|
|
.din(pf1_dup_flag_d),
|
|
.dout(pf1_dup_flag_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(3), .INIT(0)) latch_pf1_hits(
|
|
.nclk(nclk),
|
|
.act(pf1_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf1_hits_offset:pf1_hits_offset + 3 - 1]),
|
|
.scout(sov[pf1_hits_offset:pf1_hits_offset + 3 - 1]),
|
|
.din(pf1_hits_d),
|
|
.dout(pf1_hits_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0), .NEEDS_SRESET(1)) pf1_rpt_pe_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(pf1_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pf1_rpt_pe_offset:pf1_rpt_pe_offset + 2 - 1]),
|
|
.scout(sov[pf1_rpt_pe_offset:pf1_rpt_pe_offset + 2 - 1]),
|
|
.din(ex8_rpt_pe_q),
|
|
.dout(pf1_rpt_pe_q)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0)) latch_pf1_thrd(
|
|
.nclk(nclk),
|
|
.act(pf1_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf1_thrd_offset:pf1_thrd_offset + `THREADS - 1]),
|
|
.scout(sov[pf1_thrd_offset:pf1_thrd_offset + `THREADS - 1]),
|
|
.din(pf1_thrd_d),
|
|
.dout(pf1_thrd_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0)) latch_pf1_same_cline(
|
|
.nclk(nclk),
|
|
.act(pf1_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf1_same_cline_offset:pf1_same_cline_offset]),
|
|
.scout(sov[pf1_same_cline_offset:pf1_same_cline_offset]),
|
|
.din(same_cline),
|
|
.dout(pf1_same_cline_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0)) latch_pf1_stride_too_small(
|
|
.nclk(nclk),
|
|
.act(pf1_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf1_stride_too_small_offset:pf1_stride_too_small_offset]),
|
|
.scout(sov[pf1_stride_too_small_offset:pf1_stride_too_small_offset]),
|
|
.din(stride_too_small),
|
|
.dout(pf1_stride_too_small_q)
|
|
);
|
|
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
// Stride Compare
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
|
|
assign stride_match = pf1_new_stride_q == pf1_rpt_stride_q;
|
|
|
|
|
|
|
|
assign generate_pfetch = (~(pf1_pf_state_q == 2'b11)) & // state 0, 1, or 2
|
|
(stride_match | (pf1_burst_cnt_q == 2'b11 & pf1_pf_state_q == 2'b00)) & // stride correct or burst count is 3
|
|
(~(pf1_hits_q[0:1] == 2'b00)) & // not for a new RPT entry
|
|
(~pf1_stride_too_small_q);
|
|
|
|
assign nxt_state_cntrl = {stride_match, pf1_pf_state_q};
|
|
|
|
assign pf1_update_state = ((nxt_state_cntrl) == 3'b100) ? 2'b00 : // state is 01 for new entry
|
|
((nxt_state_cntrl) == 3'b101) ? 2'b00 :
|
|
((nxt_state_cntrl) == 3'b110) ? 2'b00 :
|
|
((nxt_state_cntrl) == 3'b111) ? 2'b10 :
|
|
((nxt_state_cntrl) == 3'b000) ? 2'b01 :
|
|
((nxt_state_cntrl) == 3'b001) ? 2'b10 :
|
|
((nxt_state_cntrl) == 3'b010) ? 2'b11 :
|
|
2'b11;
|
|
|
|
assign pf2_next_state_d = (pf1_hits_q[0:1] == 2'b00) ? 2'b01 :
|
|
pf1_update_state;
|
|
|
|
assign pf2_next_stride_d = (((~stride_match) & (~(pf1_pf_state_q == 2'b00))) == 1'b1) ? pf1_new_stride_q :
|
|
pf1_rpt_stride_q;
|
|
|
|
assign burst_cnt_inc = (pf1_burst_cnt_q == 2'b00) ? 2'b01 :
|
|
(pf1_burst_cnt_q == 2'b01) ? 2'b10 :
|
|
2'b11;
|
|
|
|
assign pf2_burst_cnt_d = ((pf1_pf_state_q == 2'b01 & stride_match) == 1'b1) ? burst_cnt_inc :
|
|
((pf1_pf_state_q == 2'b01 & (~stride_match)) == 1'b1) ? 2'b00 :
|
|
pf1_burst_cnt_q;
|
|
|
|
assign pf2_data_ea_d = pf1_data_ea_q;
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0)) latch_pf2_gen_pfetch(
|
|
.nclk(nclk),
|
|
.act(pf2_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf2_gen_pfetch_offset:pf2_gen_pfetch_offset]),
|
|
.scout(sov[pf2_gen_pfetch_offset:pf2_gen_pfetch_offset]),
|
|
.din(generate_pfetch),
|
|
.dout(pf2_gen_pfetch_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(22), .INIT(0)) latch_pf2_rpt_stride(
|
|
.nclk(nclk),
|
|
.act(pf2_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf2_rpt_stride_offset:pf2_rpt_stride_offset + 22 - 1]),
|
|
.scout(sov[pf2_rpt_stride_offset:pf2_rpt_stride_offset + 22 - 1]),
|
|
.din(pf2_next_stride_d),
|
|
.dout(pf2_stride_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH((59-(64-(2**`GPR_WIDTH_ENC))+1)), .INIT(0)) latch_pf2_data_ea(
|
|
.nclk(nclk),
|
|
.act(pf2_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf2_data_ea_offset:pf2_data_ea_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) - 1]),
|
|
.scout(sov[pf2_data_ea_offset:pf2_data_ea_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) - 1]),
|
|
.din(pf2_data_ea_d),
|
|
.dout(pf2_data_ea_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`PF_IFAR_WIDTH), .INIT(0)) latch_pf2_iar(
|
|
.nclk(nclk),
|
|
.act(pf2_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf2_iar_offset:pf2_iar_offset + `PF_IFAR_WIDTH - 1]),
|
|
.scout(sov[pf2_iar_offset:pf2_iar_offset + `PF_IFAR_WIDTH - 1]),
|
|
.din(pf1_iar_q),
|
|
.dout(pf2_iar_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0)) latch_pf2_pf_state(
|
|
.nclk(nclk),
|
|
.act(pf2_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf2_pf_state_offset:pf2_pf_state_offset + 2 - 1]),
|
|
.scout(sov[pf2_pf_state_offset:pf2_pf_state_offset + 2 - 1]),
|
|
.din(pf2_next_state_d),
|
|
.dout(pf2_pf_state_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0)) latch_pf2_burst_cnt(
|
|
.nclk(nclk),
|
|
.act(pf2_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf2_burst_cnt_offset:pf2_burst_cnt_offset + 2 - 1]),
|
|
.scout(sov[pf2_burst_cnt_offset:pf2_burst_cnt_offset + 2 - 1]),
|
|
.din(pf2_burst_cnt_d),
|
|
.dout(pf2_burst_cnt_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(3), .INIT(0)) latch_pf2_hits(
|
|
.nclk(nclk),
|
|
.act(pf2_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf2_hits_offset:pf2_hits_offset + 3 - 1]),
|
|
.scout(sov[pf2_hits_offset:pf2_hits_offset + 3 - 1]),
|
|
.din(pf1_hits_q),
|
|
.dout(pf2_hits_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(2), .INIT(0), .NEEDS_SRESET(1)) pf2_rpt_pe_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(pf2_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pf2_rpt_pe_offset:pf2_rpt_pe_offset + 2 - 1]),
|
|
.scout(sov[pf2_rpt_pe_offset:pf2_rpt_pe_offset + 2 - 1]),
|
|
.din(pf1_rpt_pe_q),
|
|
.dout(pf2_rpt_pe_q)
|
|
);
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0)) latch_pf2_thrd(
|
|
.nclk(nclk),
|
|
.act(pf2_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf2_thrd_offset:pf2_thrd_offset + `THREADS - 1]),
|
|
.scout(sov[pf2_thrd_offset:pf2_thrd_offset + `THREADS - 1]),
|
|
.din(pf1_thrd_q),
|
|
.dout(pf2_thrd_q)
|
|
);
|
|
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
// RPT update
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
|
|
assign pf2_valid = pf2_req_val_4pf_q;
|
|
|
|
assign rpt_data_in[0] = ~|(pf2_rpt_pe_q); // valid bit
|
|
assign rpt_data_in[1:22] = pf2_data_ea_q[38:59]; // last data address
|
|
assign rpt_data_in[23:44] = pf2_stride_q; // stride
|
|
assign rpt_data_in[45:51] = pf2_iar_q[50:56]; // iar tag
|
|
assign rpt_data_in[52:53] = pf2_pf_state_q; // prefetch state
|
|
assign rpt_data_in[54:55] = pf2_burst_cnt_q; // burst counter
|
|
assign rpt_data_in[56] = pf2_gen_pfetch_q; // duplicate flag
|
|
assign rpt_data_in[57:57 + `THREADS - 1] = pf2_thrd_q; // thread id
|
|
|
|
assign rpt_data_in[57 + `THREADS] = ^({rpt_data_in[0:7], inj_pfetch_parity_q});
|
|
assign rpt_data_in[58 + `THREADS] = ^(rpt_data_in[8:15]);
|
|
assign rpt_data_in[59 + `THREADS] = ^(rpt_data_in[16:23]);
|
|
assign rpt_data_in[60 + `THREADS] = ^(rpt_data_in[24:31]);
|
|
assign rpt_data_in[61 + `THREADS] = ^(rpt_data_in[32:39]);
|
|
assign rpt_data_in[62 + `THREADS] = ^(rpt_data_in[40:47]);
|
|
assign rpt_data_in[63 + `THREADS] = ^(rpt_data_in[48:55]);
|
|
assign rpt_data_in[64 + `THREADS] = ^(rpt_data_in[56:57 + `THREADS - 1]);
|
|
|
|
assign rpt_data_in[65 + `THREADS:69] = 0; // unused
|
|
|
|
assign rpt_wrt_addr = pf2_iar_q[57:61];
|
|
|
|
|
|
always @(*)
|
|
begin: old_lru_proc
|
|
reg lru;
|
|
|
|
//(* analysis_not_referenced="true" *)
|
|
|
|
integer i;
|
|
lru = 1'b0;
|
|
for (i = 0; i <= 31; i = i + 1)
|
|
lru = (rpt_lru_q[i] & (pf2_iar_q[57:61] == i[4:0])) | lru;
|
|
|
|
old_rpt_lru = lru;
|
|
end
|
|
|
|
assign new_rpt_lru = (pf2_hits_q[0:1] == 2'b01) ? 1'b0 :
|
|
(pf2_hits_q[0:1] == 2'b10) ? 1'b1 :
|
|
(~old_rpt_lru);
|
|
|
|
generate
|
|
begin : xhdl6
|
|
genvar i;
|
|
for (i = 0; i <= 31; i = i + 1)
|
|
begin : rpt_lru_gen
|
|
wire [0:4] iDummy=i;
|
|
assign rpt_lru_d[i] = ((pf2_iar_q[57:61] == iDummy)) ? new_rpt_lru :
|
|
rpt_lru_q[i];
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
assign rpt_wen[0:1] = |(pf2_rpt_pe_q) ? pf2_rpt_pe_q :
|
|
((pf2_valid & (~new_rpt_lru)) == 1'b1) ? 2'b01 :
|
|
((pf2_valid & new_rpt_lru) == 1'b1) ? 2'b10 :
|
|
2'b00;
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(32), .INIT(0)) latch_rpt_lru(
|
|
.nclk(nclk),
|
|
.act(pf2_valid),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[rpt_lru_offset:rpt_lru_offset + 32 - 1]),
|
|
.scout(sov[rpt_lru_offset:rpt_lru_offset + 32 - 1]),
|
|
.din(rpt_lru_d),
|
|
.dout(rpt_lru_q)
|
|
);
|
|
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
// Create new prefetches based current load request and store into queue
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
|
|
// latch new EA, stride, and dup flag in to prefetch queue
|
|
|
|
assign pfq_wrt_ptr_plus1 = pfq_wrt_ptr_q + value1[32-`PFETCH_Q_SIZE_ENC:31];
|
|
|
|
assign pfq_full_d = (((pfq_wrt_ptr_plus1 == pfq_rd_ptr_q) & pfq_wrt_val) | (pfq_full_q & (~(pfq_wrt_ptr_plus1 == pfq_rd_ptr_q)))) &
|
|
~ctl_pf_clear_queue;
|
|
|
|
assign pfq_wrt_val = (generate_pfetch & (~(pf1_dup_flag_q & pf1_same_cline_q))) & (~pfq_full_q) & (~pf1_disable);
|
|
|
|
assign pfq_wrt_ptr_d = (ctl_pf_clear_queue == 1'b1) ? {`PFETCH_Q_SIZE_ENC{1'b0}} :
|
|
(pfq_wrt_val == 1'b1) ? pfq_wrt_ptr_plus1 :
|
|
pfq_wrt_ptr_q;
|
|
|
|
assign pf1_new_data_ea = pf1_data_ea_q + ({ {59-21-1-(64-(2**`GPR_WIDTH_ENC))+1{pf1_rpt_stride_q[0]}}, pf1_rpt_stride_q });
|
|
|
|
generate
|
|
begin : xhdl7
|
|
genvar i;
|
|
for (i = 0; i <= `PFETCH_Q_SIZE - 1; i = i + 1)
|
|
begin : pfq_gen
|
|
wire [0:`PFETCH_Q_SIZE_ENC-1] iDummy=i;
|
|
assign pfq_wen[i] = pfq_wrt_val & (pfq_wrt_ptr_q == iDummy);
|
|
|
|
assign pfq_stride_d[i] = (pfq_wen[i] == 1'b1) ? pf1_rpt_stride_q :
|
|
pfq_stride_q[i];
|
|
|
|
assign pfq_data_ea_d[i] = (pfq_wen[i] == 1'b1) ? pf1_new_data_ea :
|
|
pfq_data_ea_q[i];
|
|
|
|
assign pfq_dup_flag_d[i] = (pfq_wen[i] == 1'b1) ? pf1_dup_flag_q :
|
|
pfq_dup_flag_q[i];
|
|
|
|
assign pfq_thrd_d[i] = (ctl_pf_clear_queue == 1'b1) ? {`THREADS{1'b0}}:
|
|
(pfq_wen[i] == 1'b1) ? pf1_thrd_q :
|
|
(pf_done & (pfq_rd_ptr_q == iDummy)) ? {`THREADS{1'b0}}:
|
|
pfq_thrd_q[i];
|
|
|
|
assign pfq_dscr_d[i] = (pfq_wen[i] == 1'b1) ? pf1_dscr[61:63] :
|
|
pfq_dscr_q[i];
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(22), .INIT(0), .NEEDS_SRESET(1)) pfq_stride_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(pf2_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pfq_stride_offset + 22 * i:pfq_stride_offset + 22 * (i + 1) - 1]),
|
|
.scout(sov[pfq_stride_offset + 22 * i:pfq_stride_offset + 22 * (i + 1) - 1]),
|
|
.din(pfq_stride_d[i]),
|
|
.dout(pfq_stride_q[i])
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH((59-(64-(2**`GPR_WIDTH_ENC))+1)), .INIT(0), .NEEDS_SRESET(1)) pfq_data_ea_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(pf2_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pfq_data_ea_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) * i:pfq_data_ea_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) * (i + 1) - 1]),
|
|
.scout(sov[pfq_data_ea_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) * i:pfq_data_ea_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) * (i + 1) - 1]),
|
|
.din(pfq_data_ea_d[i]),
|
|
.dout(pfq_data_ea_q[i])
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0), .NEEDS_SRESET(1)) pfq_thrd_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pfq_thrd_offset + `THREADS * i:pfq_thrd_offset + `THREADS * (i + 1) - 1]),
|
|
.scout(sov[pfq_thrd_offset + `THREADS * i:pfq_thrd_offset + `THREADS * (i + 1) - 1]),
|
|
.din(pfq_thrd_d[i]),
|
|
.dout(pfq_thrd_q[i])
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(3), .INIT(0), .NEEDS_SRESET(1)) pfq_dscr_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(pf2_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pfq_dscr_offset + 3 * i:pfq_dscr_offset + 3 * (i + 1) - 1]),
|
|
.scout(sov[pfq_dscr_offset + 3 * i:pfq_dscr_offset + 3 * (i + 1) - 1]),
|
|
.din(pfq_dscr_d[i]),
|
|
.dout(pfq_dscr_q[i])
|
|
);
|
|
end
|
|
end
|
|
endgenerate
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`PFETCH_Q_SIZE), .INIT(1)) latch_pfq_dup_flag(
|
|
.nclk(nclk),
|
|
.act(pf2_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pfq_dup_flag_offset:pfq_dup_flag_offset + `PFETCH_Q_SIZE - 1]),
|
|
.scout(sov[pfq_dup_flag_offset:pfq_dup_flag_offset + `PFETCH_Q_SIZE - 1]),
|
|
.din(pfq_dup_flag_d),
|
|
.dout(pfq_dup_flag_q)
|
|
);
|
|
|
|
|
|
tri_rlmlatch_p #(.INIT(0), .NEEDS_SRESET(1)) pfq_full_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pfq_full_offset]),
|
|
.scout(sov[pfq_full_offset]),
|
|
.din(pfq_full_d),
|
|
.dout(pfq_full_q)
|
|
);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`PFETCH_Q_SIZE_ENC), .INIT(0), .NEEDS_SRESET(1)) pfq_wrt_ptr_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pfq_wrt_ptr_offset:pfq_wrt_ptr_offset + `PFETCH_Q_SIZE_ENC - 1]),
|
|
.scout(sov[pfq_wrt_ptr_offset:pfq_wrt_ptr_offset + `PFETCH_Q_SIZE_ENC - 1]),
|
|
.din(pfq_wrt_ptr_d),
|
|
.dout(pfq_wrt_ptr_q)
|
|
);
|
|
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
// State Machine to read the prefetch queue
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
|
|
assign pf_rd_val = ((~(pfq_wrt_ptr_q == pfq_rd_ptr_q)) | pfq_full_q) & (~ctl_pf_clear_queue);
|
|
|
|
assign pf_empty = ~(pfq_thrd_v) &
|
|
~({`THREADS{ex6_req_val_4pf_q}} & ex6_thrd_q) &
|
|
~({`THREADS{ex7_req_val_4pf_q}} & ex7_thrd_q) &
|
|
~({`THREADS{ex8_req_val_4pf_q}} & ex8_thrd_q) &
|
|
~({`THREADS{pf1_req_val_4pf_q}} & pf1_thrd_q) &
|
|
~({`THREADS{pf2_req_val_4pf_q}} & pf2_thrd_q);
|
|
|
|
assign pf_idle = pf_state_q[4];
|
|
assign pf_gen = pf_state_q[0];
|
|
assign pf_send = pf_state_q[1];
|
|
assign pf_next = pf_state_q[2];
|
|
assign pf_done = pf_state_q[3];
|
|
|
|
|
|
always @(*)
|
|
begin: pf_state_mach
|
|
|
|
pf_nxt_idle = 1'b0;
|
|
pf_nxt_gen = 1'b0;
|
|
pf_nxt_send = 1'b0;
|
|
pf_nxt_next = 1'b0;
|
|
pf_nxt_done = 1'b0;
|
|
|
|
if (pf_idle == 1'b1)
|
|
begin
|
|
if (pf_rd_val == 1'b1)
|
|
pf_nxt_gen = 1'b1;
|
|
else
|
|
pf_nxt_idle = 1'b1;
|
|
end
|
|
|
|
if (pf_gen == 1'b1)
|
|
begin
|
|
if (block_dup_pfetch == 1'b0)
|
|
pf_nxt_send = 1'b1;
|
|
else
|
|
pf_nxt_next = 1'b1;
|
|
end
|
|
|
|
if (pf_send == 1'b1)
|
|
begin
|
|
if (dec_pf_ack == 1'b1)
|
|
pf_nxt_next = 1'b1;
|
|
else
|
|
pf_nxt_send = 1'b1;
|
|
end
|
|
|
|
if (pf_next == 1'b1)
|
|
begin
|
|
if (pf_count_q == 3'b000)
|
|
pf_nxt_done = 1'b1;
|
|
else if (block_dup_pfetch == 1'b0)
|
|
pf_nxt_send = 1'b1;
|
|
else
|
|
pf_nxt_next = 1'b1;
|
|
end
|
|
|
|
if (pf_done == 1'b1)
|
|
pf_nxt_idle = 1'b1;
|
|
|
|
end
|
|
|
|
assign pf_nxt_state[4] = pf_nxt_idle | ctl_pf_clear_queue;
|
|
assign pf_nxt_state[0] = pf_nxt_gen & (~ctl_pf_clear_queue);
|
|
assign pf_nxt_state[1] = pf_nxt_send & (~ctl_pf_clear_queue);
|
|
assign pf_nxt_state[2] = pf_nxt_next & (~ctl_pf_clear_queue);
|
|
assign pf_nxt_state[3] = pf_nxt_done & (~ctl_pf_clear_queue);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(5), .INIT(1)) latch_pf_state(
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf_state_offset:pf_state_offset + 5 - 1]),
|
|
.scout(sov[pf_state_offset:pf_state_offset + 5 - 1]),
|
|
.din(pf_nxt_state),
|
|
.dout(pf_state_q)
|
|
);
|
|
|
|
// count the number of prefetches to issue
|
|
assign pf_count_d = (pf_gen == 1'b1) ? pfq_rd_dscr[61:63] - value2[29:31] :
|
|
(pf_next == 1'b1) ? pf_count_q - value1[29:31] :
|
|
pf_count_q;
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(3), .INIT(0)) latch_pf_count(
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf_count_offset:pf_count_offset + 3 - 1]),
|
|
.scout(sov[pf_count_offset:pf_count_offset + 3 - 1]),
|
|
.din(pf_count_d),
|
|
.dout(pf_count_q)
|
|
);
|
|
|
|
// increment read pointer when prefetches for that entry are done
|
|
assign pfq_rd_ptr_d = (ctl_pf_clear_queue == 1'b1) ? {`PFETCH_Q_SIZE_ENC{1'b0}} :
|
|
(pf_done == 1'b1) ? pfq_rd_ptr_q + value1[32-`PFETCH_Q_SIZE_ENC:31] :
|
|
pfq_rd_ptr_q;
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`PFETCH_Q_SIZE_ENC), .INIT(0), .NEEDS_SRESET(1)) pfq_rd_ptr_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(tiup),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pfq_rd_ptr_offset:pfq_rd_ptr_offset + `PFETCH_Q_SIZE_ENC - 1]),
|
|
.scout(sov[pfq_rd_ptr_offset:pfq_rd_ptr_offset + `PFETCH_Q_SIZE_ENC - 1]),
|
|
.din(pfq_rd_ptr_d),
|
|
.dout(pfq_rd_ptr_q)
|
|
);
|
|
|
|
// mux next address from prefetch queue
|
|
|
|
always @(*)
|
|
begin: pfq_rd_data_proc
|
|
reg [0:21] rd_stride;
|
|
reg [64-(2**`GPR_WIDTH_ENC):59] rd_data_ea;
|
|
reg rd_dup_flag;
|
|
reg [0:`THREADS-1] rd_thrd;
|
|
reg [61:63] rd_dscr;
|
|
reg [0:`THREADS-1] thrd_v;
|
|
reg [0:31] i;
|
|
rd_stride = {22{1'b0}};
|
|
rd_data_ea = {59-(64-(2**`GPR_WIDTH_ENC))+1{1'b0}};
|
|
rd_dup_flag = 1'b0;
|
|
rd_thrd = {`THREADS{1'b0}};
|
|
rd_dscr = {3{1'b0}};
|
|
thrd_v = {`THREADS{1'b0}};
|
|
for (i = 0; i <= `PFETCH_Q_SIZE - 1; i = i + 1)
|
|
begin
|
|
rd_stride = ( {22{(pfq_rd_ptr_q == i[32-`PFETCH_Q_SIZE_ENC:31])}} & pfq_stride_q[i]) | rd_stride;
|
|
rd_data_ea = ({59-(64-(2**`GPR_WIDTH_ENC))+1{(pfq_rd_ptr_q == i[32-`PFETCH_Q_SIZE_ENC:31])}} & pfq_data_ea_q[i]) | rd_data_ea;
|
|
rd_dup_flag = ( (pfq_rd_ptr_q == i[32-`PFETCH_Q_SIZE_ENC:31]) & pfq_dup_flag_q[i]) | rd_dup_flag;
|
|
rd_thrd = ( {`THREADS{(pfq_rd_ptr_q == i[32-`PFETCH_Q_SIZE_ENC:31])}} & pfq_thrd_q[i]) | rd_thrd;
|
|
rd_dscr = ( {3{pfq_rd_ptr_q == i[32-`PFETCH_Q_SIZE_ENC:31]}} & pfq_dscr_q[i]) | rd_dscr;
|
|
thrd_v = pfq_thrd_q[i] | thrd_v;
|
|
end
|
|
pf3_stride_d = rd_stride;
|
|
pfq_rd_data_ea = rd_data_ea;
|
|
pfq_rd_dup_flag = rd_dup_flag;
|
|
pfq_rd_thrd = rd_thrd;
|
|
pfq_rd_dscr = rd_dscr;
|
|
pfq_thrd_v = thrd_v;
|
|
end
|
|
|
|
assign pf3_act = (~pf_idle);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(22), .INIT(0), .NEEDS_SRESET(1)) pf3_stride_latch(
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.nclk(nclk),
|
|
.act(pf3_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.scin(siv[pf3_stride_offset:pf3_stride_offset + 22 - 1]),
|
|
.scout(sov[pf3_stride_offset:pf3_stride_offset + 22 - 1]),
|
|
.din(pf3_stride_d),
|
|
.dout(pf3_stride_q)
|
|
);
|
|
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
// Prefetch Generation
|
|
// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
|
|
assign block_dup_pfetch = (pfq_rd_dup_flag & (~(pf_count_d == 3'b000))); // after 1st set of N prefetches, only prefetch the last of N
|
|
|
|
assign pf3_req_val_d = ((pf_gen | (pf_next & (~(pf_count_q == 3'b000)))) == 1'b1) ? pf_rd_val & (~block_dup_pfetch) :
|
|
((pf_send & (~dec_pf_ack)) == 1'b1) ? pf3_req_val_q & (~ctl_pf_clear_queue) :
|
|
1'b0;
|
|
|
|
assign pf3_thrd_d = (pf_gen == 1'b1) ? pfq_rd_thrd :
|
|
pf3_thrd_q;
|
|
|
|
assign pf3_req_addr_d = (pf_gen == 1'b1) ? pfq_rd_data_ea :
|
|
(pf_next == 1'b1) ? pf3_req_addr_q + ({ {59-21-1-(64-(2**`GPR_WIDTH_ENC))+1{pf3_stride_q[0]}}, pf3_stride_q }) :
|
|
pf3_req_addr_q;
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH((59-(64-(2**`GPR_WIDTH_ENC))+1)), .INIT(0)) latch_pf3_req_addr(
|
|
.nclk(nclk),
|
|
.act(pf3_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf3_req_addr_offset:pf3_req_addr_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) - 1]),
|
|
.scout(sov[pf3_req_addr_offset:pf3_req_addr_offset + (59-(64-(2**`GPR_WIDTH_ENC))+1) - 1]),
|
|
.din(pf3_req_addr_d),
|
|
.dout(pf3_req_addr_q)
|
|
);
|
|
|
|
assign pf_dec_req_addr = pf3_req_addr_q[64 - (2 ** `GPR_WIDTH_ENC):63 - `CL_SIZE];
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(1), .INIT(0)) latch_pf3_req_val(
|
|
.nclk(nclk),
|
|
.act(pf3_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf3_req_val_offset:pf3_req_val_offset]),
|
|
.scout(sov[pf3_req_val_offset:pf3_req_val_offset]),
|
|
.din(pf3_req_val_d),
|
|
.dout(pf3_req_val_q)
|
|
);
|
|
|
|
assign pf_dec_req_val = pf3_req_val_q & (~ctl_pf_clear_queue);
|
|
|
|
|
|
tri_rlmreg_p #(.WIDTH(`THREADS), .INIT(0)) latch_pf3_thrd(
|
|
.nclk(nclk),
|
|
.act(pf3_act),
|
|
.force_t(func_sl_force),
|
|
.d_mode(d_mode_dc),
|
|
.delay_lclkr(delay_lclkr_dc),
|
|
.mpw1_b(mpw1_dc_b),
|
|
.mpw2_b(mpw2_dc_b),
|
|
.thold_b(func_sl_thold_0_b),
|
|
.sg(sg_0),
|
|
.vd(vdd),
|
|
.gd(gnd),
|
|
.scin(siv[pf3_thrd_offset:pf3_thrd_offset + `THREADS - 1]),
|
|
.scout(sov[pf3_thrd_offset:pf3_thrd_offset + `THREADS - 1]),
|
|
.din(pf3_thrd_d),
|
|
.dout(pf3_thrd_q)
|
|
);
|
|
|
|
assign pf_dec_req_thrd = pf3_thrd_q;
|
|
|
|
assign rpt_func_scan_in = scan_in;
|
|
assign siv[0:scan_right] = {sov[1:scan_right], rpt_func_scan_out};
|
|
assign scan_out = sov[0];
|
|
|
|
endmodule
|