You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3280 lines
140 KiB
Verilog

// © IBM Corp. 2020
// Licensed under the Apache License, Version 2.0 (the "License"), as modified by
// the terms below; you may not use the files in this repository except in
// compliance with the License as modified.
// You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
//
// Modified Terms:
//
// 1) For the purpose of the patent license granted to you in Section 3 of the
// License, the "Work" hereby includes implementations of the work of authorship
// in physical form.
//
// 2) Notwithstanding any terms to the contrary in the License, any licenses
// necessary for implementation of the Work that are available from OpenPOWER
// via the Power ISA End User License Agreement (EULA) are explicitly excluded
// hereunder, and may be obtained from OpenPOWER under the terms and conditions
// of the EULA.
//
// Unless required by applicable law or agreed to in writing, the reference design
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
// for the specific language governing permissions and limitations under the License.
//
// Additional rights, including the ability to physically implement a softcore that
// is compliant with the required sections of the Power ISA Specification, are
// available at no cost under the terms of the OpenPOWER Power ISA EULA, which can be
// obtained (along with the Power ISA) here: https://openpowerfoundation.org.
`timescale 1 ns / 1 ns
//==##########################################################################
//==### FU_DIVSQRT.VHDL #########
//==### #########
//==##########################################################################
`include "tri_a2o.vh"
module fu_divsqrt(
vdd,
gnd,
clk,
rst,
clkoff_b,
act_dis,
flush,
delay_lclkr,
mpw1_b,
mpw2_b,
sg_1,
thold_1,
fpu_enable,
f_dsq_si,
f_dsq_so,
ex0_act_b,
f_dcd_ex0_div,
f_dcd_ex0_divs,
f_dcd_ex0_sqrt,
f_dcd_ex0_sqrts,
f_dcd_ex0_record_v,
f_dcd_ex2_divsqrt_hole_v,
f_dcd_flush,
f_dcd_ex1_itag,
f_dcd_ex1_fpscr_addr,
f_dcd_ex1_instr_frt,
f_dcd_ex1_instr_tid,
f_dcd_ex1_divsqrt_cr_bf,
f_dcd_axucr0_deno,
f_scr_ex6_fpscr_rm_thr0,
f_scr_ex6_fpscr_ee_thr0,
f_scr_ex6_fpscr_rm_thr1,
f_scr_ex6_fpscr_ee_thr1,
f_fmt_ex2_a_sign_div,
f_fmt_ex2_a_expo_div_b,
f_fmt_ex2_a_frac_div,
f_fmt_ex2_b_sign_div,
f_fmt_ex2_b_expo_div_b,
f_fmt_ex2_b_frac_div,
f_fmt_ex2_a_zero,
f_fmt_ex2_a_zero_dsq,
f_fmt_ex2_a_expo_max,
f_fmt_ex2_a_expo_max_dsq,
f_fmt_ex2_a_frac_zero,
f_fmt_ex2_b_zero,
f_fmt_ex2_b_zero_dsq,
f_fmt_ex2_b_expo_max,
f_fmt_ex2_b_expo_max_dsq,
f_fmt_ex2_b_frac_zero,
f_dsq_ex3_hangcounter_trigger,
f_dsq_ex5_divsqrt_v,
f_dsq_ex6_divsqrt_v,
f_dsq_ex6_divsqrt_record_v,
f_dsq_ex6_divsqrt_v_suppress,
f_dsq_ex5_divsqrt_itag,
f_dsq_ex6_divsqrt_fpscr_addr,
f_dsq_ex6_divsqrt_instr_frt,
f_dsq_ex6_divsqrt_instr_tid,
f_dsq_ex6_divsqrt_cr_bf,
f_dsq_ex6_divsqrt_sign,
f_dsq_ex6_divsqrt_exp,
f_dsq_ex6_divsqrt_fract,
f_dsq_ex6_divsqrt_flag_fpscr,
f_dsq_debug
);
inout vdd;
inout gnd;
input clk;
input rst;
input clkoff_b; // tiup
input act_dis; // ??tidn??
input flush; // ??tidn??
input delay_lclkr; // tidn,
input mpw1_b; // tidn,
input mpw2_b; // tidn,
input sg_1;
input thold_1;
input fpu_enable; //dc_act
//--------------------------------------------------------------------------
input f_dsq_si; //perv scan
output f_dsq_so; //perv scan
input ex0_act_b;
//--------------------------------------------------------------------------
input f_dcd_ex0_div;
input f_dcd_ex0_divs;
input f_dcd_ex0_sqrt;
input f_dcd_ex0_sqrts;
input f_dcd_ex0_record_v;
input f_dcd_ex2_divsqrt_hole_v;
//--------------------------------------------------------------------------
input [0:1] f_dcd_flush;
input [0:6] f_dcd_ex1_itag;
input [0:5] f_dcd_ex1_fpscr_addr;
input [0:5] f_dcd_ex1_instr_frt;
input [0:3] f_dcd_ex1_instr_tid;
input [0:4] f_dcd_ex1_divsqrt_cr_bf;
input f_dcd_axucr0_deno;
input [0:1] f_scr_ex6_fpscr_rm_thr0;
input [0:4] f_scr_ex6_fpscr_ee_thr0; // FPSCR VE,OE,UE,ZE,XE
input [0:1] f_scr_ex6_fpscr_rm_thr1;
input [0:4] f_scr_ex6_fpscr_ee_thr1; // FPSCR VE,OE,UE,ZE,XE
//--------------------------------------------------------------------------
input f_fmt_ex2_a_sign_div; // these operands are actually ex2
input [01:13] f_fmt_ex2_a_expo_div_b;
input [01:52] f_fmt_ex2_a_frac_div;
input f_fmt_ex2_b_sign_div;
input [01:13] f_fmt_ex2_b_expo_div_b;
input [01:52] f_fmt_ex2_b_frac_div;
input f_fmt_ex2_a_zero;
input f_fmt_ex2_a_zero_dsq;
input f_fmt_ex2_a_expo_max;
input f_fmt_ex2_a_expo_max_dsq;
input f_fmt_ex2_a_frac_zero;
input f_fmt_ex2_b_zero;
input f_fmt_ex2_b_zero_dsq;
input f_fmt_ex2_b_expo_max;
input f_fmt_ex2_b_expo_max_dsq;
input f_fmt_ex2_b_frac_zero;
output f_dsq_ex3_hangcounter_trigger;
//--------------------------------------------------------------------------
output [0:1] f_dsq_ex5_divsqrt_v;
output [0:1] f_dsq_ex6_divsqrt_v;
output f_dsq_ex6_divsqrt_record_v;
output f_dsq_ex6_divsqrt_v_suppress;
output [0:6] f_dsq_ex5_divsqrt_itag;
output [0:5] f_dsq_ex6_divsqrt_fpscr_addr;
output [0:5] f_dsq_ex6_divsqrt_instr_frt;
output [0:3] f_dsq_ex6_divsqrt_instr_tid;
output [0:4] f_dsq_ex6_divsqrt_cr_bf;
output f_dsq_ex6_divsqrt_sign; // needs to be right off of a latch
output [01:13] f_dsq_ex6_divsqrt_exp; // needs to be right off of a latch
output [00:52] f_dsq_ex6_divsqrt_fract; // needs to be right off of a latch
output [00:15] f_dsq_ex6_divsqrt_flag_fpscr;
output [00:63] f_dsq_debug;
//--------------------------------------------------------------------------
//==################################################
parameter tiup = 1'b1;
parameter tidn = 1'b0;
wire sg_0;
wire thold_0_b;
wire thold_0;
wire force_t;
//----------------------------------------------------------------------
// todo items:
//----------------------------------------------------------------------
wire [00:56] zeros;
wire [00:27] ones;
wire ex0_act;
wire ex1_act;
wire ex2_act;
wire ex3_act;
wire ex4_act;
wire [0:7] act_so;
wire [0:7] act_si;
wire [0:14] ex1_div_instr_lat_scin;
wire [0:14] ex1_div_instr_lat_scout;
wire [0:5] ex2_div_instr_lat_scin;
wire [0:5] ex2_div_instr_lat_scout;
wire [0:8] ex2_itag_lat_scin;
wire [0:8] ex2_itag_lat_scout;
wire [0:27] ex2_fpscr_addr_lat_scin;
wire [0:27] ex2_fpscr_addr_lat_scout;
wire [0:18] ex1_div_ctr_lat_scin;
wire [0:18] ex1_div_ctr_lat_scout;
wire [0:95] ex5_div_result_lat_scin;
wire [0:95] ex5_div_result_lat_scout;
wire [0:65] ex6_div_result_lat_scin;
wire [0:65] ex6_div_result_lat_scout;
wire [0:162] ex5_special_case_lat_scin;
wire [0:162] ex5_special_case_lat_scout;
wire [0:3] ex5_div_done_lat_scout;
wire [0:3] ex5_div_done_lat_scin;
wire HW165073_bits;
wire HW165073_hit;
wire ex1_divsqrt_running_d;
wire exx_divsqrt_running_q;
wire ex1_divsqrt_done;
wire ex2_divsqrt_done;
wire ex2_divsqrt_done_din;
wire ex2_waiting_for_hole;
wire ex2_divsqrt_zero;
wire ex3_divsqrt_done_din;
wire ex4_divsqrt_done_din;
wire ex5_divsqrt_done_din;
wire ex3_divsqrt_done;
wire ex4_divsqrt_done;
wire ex4_divsqrt_done_q;
wire ex5_divsqrt_done;
wire ex6_divsqrt_done;
wire ex4_start_a_denorm_result;
wire ex4_start_denorm_result;
wire ex4_denormalizing_result;
wire ex4_denormalizing_result_done;
wire ex4_denormalizing_result_done_din;
wire ex5_denormalizing_result_done;
wire ex4_denormalizing_result_shifting;
wire ex4_divsqrt_denorm_hold;
wire ex4_denormalizing_result_rounding;
wire ex4_start_sp_denorm_result;
wire exp_eq_369;
wire exp_eq_380;
wire exp_eq_368;
wire exp_eq_367;
wire exp_eq_367to9;
wire ex4_force_36A;
wire ex4_force;
wire ex4_dnr_roundup_incexp;
wire ex4_roundup_incexp;
wire ex4_x_roundup_incexp;
wire ex5_x_roundup_incexp;
wire [0:70] ex2_div_a_stage_lat_scout;
wire [0:70] ex2_div_a_stage_lat_scin;
wire [0:70] ex2_div_b_stage_lat_scout;
wire [0:70] ex2_div_b_stage_lat_scin;
wire [0:113] ex3_div_PR_sumcarry_lat_scout;
wire [0:113] ex3_div_PR_sumcarry_lat_scin;
wire [0:7] ex3_div_PR_sum4carry4_lat_scout;
wire [0:7] ex3_div_PR_sum4carry4_lat_scin;
wire [0:113] ex3_div_Q_QM_lat_scin;
wire [0:113] ex3_div_Q_QM_lat_scout;
wire [0:113] ex3_div_bQ_QM_lat_scin;
wire [0:113] ex3_div_bQ_QM_lat_scout;
wire [0:167] ex3_sqrt_bitmask_lat_scin;
wire [0:167] ex3_sqrt_bitmask_lat_scout;
wire [0:51] ex2_div_exp_lat_scout;
wire [0:51] ex2_div_exp_lat_scin;
wire [0:55] ex3_denom_lat_scout;
wire [0:55] ex3_denom_lat_scin;
wire [0:26] exx_div_denorm_lat_scout;
wire [0:26] exx_div_denorm_lat_scin;
wire ex4_deno_force_zero;
wire exx_running_act_d;
wire exx_running_act_q;
(* analysis_not_referenced="TRUE" *)
wire [0:3] act_spare_unused;
(* analysis_not_referenced="TRUE" *)
wire [0:880] spare_unused;
wire ex0_record_v;
wire ex1_record_v;
wire ex2_record_v;
wire exx_record_v_din;
wire exx_record_v_q;
wire ex0_div;
wire ex0_divs;
wire ex0_sqrt;
wire ex0_sqrts;
wire ex1_div;
wire ex1_divs;
wire ex1_sqrt;
wire ex1_sqrts;
wire ex1_div_dout;
wire ex1_divs_dout;
wire ex1_sqrt_dout;
wire ex1_sqrts_dout;
wire ex2_div;
wire ex2_divs;
wire ex2_sqrt;
wire ex2_sqrts;
wire ex2_sp;
wire ex1_instr_v;
wire ex2_div_or_divs;
wire ex2_sqrt_or_sqrts;
wire ex0_anydivsqrt;
wire ex1_anydivsqrt;
wire ex2_anydivsqrt;
wire ex3_anydivsqrt;
wire ex4_anydivsqrt;
wire ex5_anydivsqrt;
wire ex6_anydivsqrt;
wire [0:6] ex1_itag_din;
wire [0:6] exx_itag_q;
wire [0:5] ex1_fpscr_addr_din;
wire [0:5] exx_fpscr_addr_q;
wire [0:5] ex1_instr_frt_din;
wire [0:5] exx_instr_frt_q;
wire [0:3] ex1_instr_tid_din;
wire [0:3] exx_instr_tid_q;
wire [0:1] tid_init;
wire [0:1] tid_hold;
wire [0:1] tid_clear;
wire [0:4] ex1_cr_bf_din;
wire [0:4] exx_cr_bf_q;
wire [0:7] ex0_op_cyc_count_din;
wire [0:7] ex1_op_cyc_count;
wire [0:7] ex2_hangcounter_din;
wire [0:7] ex3_hangcounter_q;
wire [0:7] ex3_div_hangcounter_lat_scout;
wire [0:7] ex3_div_hangcounter_lat_scin;
wire [0:63] f_dsq_debug_din;
wire [0:63] f_dsq_debug_q;
wire [0:63] f_dsq_debug_lat_scin;
wire [0:63] f_dsq_debug_lat_scout;
wire ex2_hangcounter_clear;
wire ex2_hangcounter_incr;
wire ex3_hangcounter_trigger;
wire ex4_sp;
wire ex4_dp;
wire exx_sp;
wire exx_dp;
wire ex1_cycles_init;
wire ex1_cycles_decr;
wire ex1_cycles_hold;
wire ex1_cycles_clear;
wire exx_single_precision_d;
wire exx_single_precision_q;
wire ex2_a_zero;
wire ex2_a_SPunderflow_zero;
wire ex2_a_expo_max;
wire ex2_a_SPoverflow_expo_max;
wire ex2_b_SPoverflow_expo_max;
wire ex2_a_frac_zero;
wire ex2_b_zero;
wire ex2_b_SPunderflow_zero;
wire ex2_b_expo_max;
wire ex2_b_frac_zero;
wire exx_a_zero_d;
wire exx_a_expo_max_d;
wire exx_a_frac_zero_d;
wire exx_b_zero_d;
wire exx_a_SPunderflow_zero_d;
wire exx_b_SPunderflow_zero_d;
wire exx_a_SPoverflow_expo_max_d;
wire exx_b_SPoverflow_expo_max_d;
wire exx_a_SPoverflow_expo_max_q;
wire exx_b_SPoverflow_expo_max_q;
wire exx_b_expo_max_d;
wire exx_b_frac_zero_d;
wire exx_a_zero_q;
wire exx_b_SPunderflow_zero_q;
wire exx_a_SPunderflow_zero_q;
wire exx_a_expo_max_q;
wire exx_a_frac_zero_q;
wire exx_b_zero_q;
wire exx_b_expo_max_q;
wire exx_b_frac_zero_q;
wire exx_a_NAN;
wire exx_b_NAN;
wire exx_a_INF;
wire exx_b_INF;
wire exx_a_SPoverflowINF;
wire exx_b_SPoverflowINF;
wire exx_b_ZER;
wire exx_a_ZER;
wire exx_b_SPunderflowZER;
wire exx_a_SPunderflowZER;
wire ex4_a_snan;
wire ex4_b_snan;
wire ex4_snan;
wire exx_hard_spec_case;
wire ex4_div_by_zero_zx;
wire ex4_zero_div_zero;
wire ex4_inf_div_inf;
wire ex4_sqrt_neg;
wire ex4_pass_a_nan;
wire ex4_pass_b_nan;
wire ex4_pass_nan;
wire ex4_pass_a_nan_sp;
wire ex4_pass_b_nan_sp;
wire ex4_pass_a_nan_dp;
wire ex4_pass_b_nan_dp;
wire exx_divsqrt_v_suppress_d;
wire exx_divsqrt_v_suppress_q;
wire ex4_force_zero;
wire ex4_force_zeroone;
wire ex4_force_inf;
wire ex5_force_inf;
wire ex4_force_maxnorm;
wire ex4_force_maxnorm_sp;
wire ex4_force_maxnorm_dp;
wire ex4_force_qnan;
wire ex4_div_special_case;
wire ex5_div_special_case;
wire exx_sqrt_d;
wire exx_div_d;
wire [00:03] exx_div_q;
wire [00:03] exx_sqrt_q;
wire [00:06] exx_fpscr_din;
wire [00:06] exx_fpscr_q;
wire [00:52] ex4_divsqrt_fract;
wire [00:56] ex4_divsqrt_fract_cur;
wire [00:56] ex4_divsqrt_fract_shifted;
wire [00:56] ex4_divsqrt_fract_shifted_dp;
wire [00:56] ex4_divsqrt_fract_shifted_spmasked;
wire [00:56] ex4_divsqrt_fract_stickymask;
wire [00:53] ex4_divsqrt_fract_dnr;
wire dn_lv1sh00;
wire dn_lv1sh01;
wire dn_lv1sh10;
wire dn_lv1sh11;
wire dn_lv2sh00;
wire dn_lv2sh01;
wire dn_lv2sh10;
wire dn_lv2sh11;
wire dn_lv3sh00;
wire dn_lv3sh01;
wire dn_lv3sh10;
wire dn_lv3sh11;
wire dnsp_lv1sh00;
wire dnsp_lv1sh01;
wire dnsp_lv1sh10;
wire dnsp_lv1sh11;
wire dnsp_lv2sh00;
wire dnsp_lv2sh01;
wire dnsp_lv2sh10;
wire dnsp_lv2sh11;
wire dnsp_lv3sh00;
wire dnsp_lv3sh01;
wire dnsp_lv3sh10;
wire dnsp_lv3sh11;
wire [00:59] ex4_divsqrt_fract_shifted_00to03;
wire [00:71] ex4_divsqrt_fract_shifted_00to12;
wire [00:119] ex4_divsqrt_fract_shifted_00to48;
wire [00:56] ex4_spdenorm_mask;
wire [00:59] ex4_spdenorm_mask_shifted_00to03;
wire [00:71] ex4_spdenorm_mask_shifted_00to12;
wire [00:119] ex4_spdenorm_mask_shifted_00to48;
wire [00:56] ex4_spdenorm_mask_lsb;
wire [00:59] ex4_spdenorm_mask_lsb_shifted_00to03;
wire [00:71] ex4_spdenorm_mask_lsb_shifted_00to12;
wire [00:119] ex4_spdenorm_mask_lsb_shifted_00to48;
wire [00:56] ex4_spdenorm_mask_guard;
wire [00:59] ex4_spdenorm_mask_guard_shifted_00to03;
wire [00:71] ex4_spdenorm_mask_guard_shifted_00to12;
wire [00:119] ex4_spdenorm_mask_guard_shifted_00to48;
wire [00:56] ex4_spdenorm_mask_round;
wire [00:59] ex4_spdenorm_mask_round_shifted_00to03;
wire [00:71] ex4_spdenorm_mask_round_shifted_00to12;
wire [00:119] ex4_spdenorm_mask_round_shifted_00to48;
wire [00:52] ex4_divsqrt_fract_special;
wire [00:52] ex5_divsqrt_fract_special;
wire [00:52] ex5_divsqrt_fract_d;
wire [00:52] ex6_divsqrt_fract_q;
wire [01:13] ex4_divsqrt_exp;
wire [01:13] ex4_divsqrt_exp_special;
wire [01:13] ex5_divsqrt_exp_special;
wire [01:13] ex5_divsqrt_exp_d;
wire [01:13] ex6_divsqrt_exp_q;
wire ex4_maxnorm_sign;
wire ex4_divsqrt_sign;
wire ex4_divsqrt_sign_special;
wire [1:52] ex2_b_fract;
wire [1:52] ex2_a_fract;
wire exx_a_sign_d;
wire [1:13] exx_a_biased_13exp_d;
wire [1:52] exx_a_fract_d;
wire exx_b_sign_d;
wire [1:13] exx_b_biased_13exp_d;
wire [1:52] exx_b_fract_d;
wire exx_a_sign_q;
wire [1:13] exx_a_biased_13exp_q;
wire [1:52] exx_a_fract_q;
wire exx_b_sign_q;
wire [1:13] exx_b_biased_13exp_q;
wire [1:52] exx_b_fract_q;
wire [1:13] exx_exp_ux_adj;
wire [1:13] exx_exp_ux_adj_dp;
wire [1:13] exx_exp_ux_adj_sp;
wire [1:13] exx_exp_ox_adj;
wire [1:13] exx_exp_ox_adj_dp;
wire [1:13] exx_exp_ox_adj_sp;
wire exx_invalid_mixed_precision;
wire [1:13] exx_b_ubexp;
wire [1:13] exy_b_ubexp;
wire [1:13] exx_exp_adj;
wire [1:13] exx_exp_adj_p1;
wire [0:12] exz_exp_addres_x0;
wire [0:12] exx_exp_addres_ux;
wire [0:12] exx_exp_addres_ox;
wire [0:12] exx_exp_addres;
wire [0:12] exx_exp_addres_div_x0;
wire [0:12] exx_exp_addres_sqrt_x0;
wire [0:12] exy_exp_addres_div_x0;
wire [0:12] exy_exp_addres_div_x0_m1;
wire [0:12] exz_exp_addres_div_x0_m1;
wire [0:12] exz_exp_addres_div_x0_adj;
wire [0:12] exy_exp_addres_sqrt_x0;
wire [0:12] exx_exp_addres_x0_p1;
wire [0:12] exx_exp_addres_ux_p1;
wire [0:12] exx_exp_addres_ox_p1;
wire [0:12] exy_exp_addres_x0_p1;
wire [0:12] exy_exp_addres_ux_p1;
wire [0:12] exy_exp_addres_ox_p1;
wire [0:12] exy_exp_addres_p1;
wire [0:12] exx_exp_addres_div_x0_p1;
wire [0:12] exx_exp_addres_sqrt_x0_p1;
wire ex4_expresult_zero;
wire [7:12] denorm_count_start;
wire [0:5] denorm_shift_amt;
wire [0:5] denorm_shift_amt_din;
wire [0:5] denorm_shift_amt_q;
wire [0:5] sp_denorm_shift_amt;
wire [0:5] sp_denorm_shift_amt_din;
wire [0:5] sp_denorm_shift_amt_q;
wire ex2_divsqrt_hole_v_b;
wire overflow;
wire underflow;
wire ueux;
wire oeox;
wire zezx;
wire vevx;
wire not_ueux_or_oeox;
wire exy_not_ueux_or_oeox;
wire exy_oeox;
wire exy_ueux;
wire overflow_sp;
wire sp_overflow_brink_x47E;
wire ex4_incexp_to_sp_overflow;
wire dp_overflow_brink_x7FE;
wire ex4_incexp_to_dp_overflow;
wire ex4_incexp_to_overflow;
wire underflow_sp;
wire overflow_dp;
wire underflow_dp;
wire underflow_denorm;
wire underflow_denorm_dp;
wire underflow_denorm_sp;
wire underflow_force_zero;
wire underflow_force_36A;
wire underflow_force_zeroone;
wire overflow_force_inf;
wire special_force_zero;
wire special_force_inf;
wire overflow_force_maxnorm;
wire underflow_sp_denorm;
wire sp_denorm_0x369roundup;
wire sp_denorm_underflow_zero;
wire sp_denorm_0x380roundup;
wire exx_q_bit0;
wire exx_q_bit0_cin;
wire exx_q_bit1;
wire exx_q_bit1_div;
wire exx_q_bit1_sqrt;
wire exx_q_bit1_cin_div;
wire exx_q_bit1_cin_sqrt;
wire exx_q_bit2;
wire exx_q_bit2_cin;
wire exx_q_bit3_div;
wire exx_q_bit3_cin_div;
wire exx_q_bit3_sqrt;
wire exx_q_bit3_cin_sqrt;
wire exx_q_bit3;
wire exx_nq_bit3;
wire [0:1] exx_q_bit22_sel;
wire exx_nq_bit0;
wire exx_nq_bit1;
wire exx_nq_bit1_div;
wire exx_nq_bit1_sqrt;
wire exx_nq_bit2;
wire exx_nq_bit3_div;
wire exx_nq_bit3_sqrt;
wire exx_notqornq_bit1_sqrt;
wire exx_notqornq_bit2;
wire exx_notqornq_bit3_sqrt;
wire exx_notqornq_bit1_div;
wire exx_notqornq_bit3_div;
wire exx_q_bit22;
wire exx_nq_bit22;
wire exx_q_bit22_div;
wire exx_nq_bit22_div;
wire exx_q_bit22_sqrt;
wire exx_nq_bit22_sqrt;
wire exx_notqornq_bit22_sqrt;
wire exx_notqornq_bit22_div;
wire exx_q_bit0_b;
wire exx_nq_bit0_b;
wire exx_q_bit0_prebuf;
wire exx_nq_bit0_prebuf;
wire [0:56] exx_Q_q;
wire [0:56] exx_Q_d;
wire [0:56] exx_QM_q;
wire [0:56] exx_QM_d;
wire [0:56] exx_bQ_q;
wire [0:56] exx_bQ_d;
wire [0:56] exx_bQM_q;
wire [0:56] exx_bQM_d;
wire [0:56] exx_lev0_csaout_sum;
wire [0:56] exx_lev0_csaout_carry;
wire [0:56] exx_lev0_csaoutsh_sum;
wire [0:56] exx_lev0_csaoutsh_carry;
wire exx_lev0_selD;
wire exx_lev0_selnD;
wire exx_lev0_selneg;
wire exx_lev0_selD_b;
wire exx_lev0_selnD_b;
wire exx_lev0_selQ;
wire exx_lev0_selMQ;
wire exx_lev0_selQ_b;
wire exx_lev0_selMQ_b;
wire exx_lev22_selD;
wire exx_lev22_selnD;
wire exx_lev22_selneg;
wire exx_lev22_selQ;
wire exx_lev22_selMQ;
wire [0:56] exx_lev0_csaout_carryout;
wire [0:56] exx_lev0_divsqrt_csaout_xor;
wire [0:56] exx_lev1_divsqrt_csaout_xor;
wire [0:56] exx_lev3_divsqrt_csaout_xor;
wire [0:56] exx_lev1_div_oper;
wire [0:56] exx_lev1_sqrt_oper;
wire [0:56] exx_lev3_div_oper;
wire [0:56] exx_lev3_sqrt_oper;
wire [0:56] exx_lev1_div_csaout_sum;
wire [0:56] exx_lev1_div_csaout_carry;
wire [0:56] exx_lev1_sqrt_csaout_sum;
wire [0:56] exx_lev1_sqrt_csaout_carry;
wire [0:56] exx_lev2_csaout_sum;
wire [0:56] exx_lev2_csaout_carry;
wire [0:56] exx_lev3_div_csaout_sum;
wire [0:56] exx_lev3_div_csaout_carry;
wire [0:56] exx_lev3_sqrt_csaout_sum;
wire [0:56] exx_lev3_sqrt_csaout_carry;
wire [0:56] exx_lev1_div_csaout_carryout;
wire [0:56] exx_lev1_sqrt_csaout_carryout;
wire [0:56] exx_lev2_csaout_carryout;
wire [0:56] exx_lev3_div_csaout_carryout;
wire [0:56] exx_lev3_sqrt_csaout_carryout;
wire [0:56] exx_lev22_csaout_carryout_div;
wire [0:56] exx_lev22_csaout_carryout_sqrt;
wire [0:56] exx_lev22_csaout_sum_sqrt;
wire [0:56] exx_lev22_csaout_carry_sqrt;
wire [0:56] exx_lev22_csaout_sum_div;
wire [0:56] exx_lev22_csaout_carry_div;
wire [0:56] exx_lev22_csaout_sum_xor;
wire [0:56] exx_PR_sum_d;
wire [0:56] exx_PR_sum_q;
wire [0:56] exx_PR_sum_d_late;
wire [0:56] exx_PR_sum_d_early;
wire [0:3] exx_PR_sum4_q;
wire [0:56] ex3_divsqrt_remainder;
wire [0:56] ex4_divsqrt_remainder;
wire ex3_rem_neg;
wire ex3_rem_neg_b;
wire [0:3] ex4_rem_neg;
wire [0:3] ex4_rem_neg_b;
wire [0:56] ex4_rem_neg_buf;
wire [0:56] ex4_rem_neg_buf_b;
wire ex4_rem_nonzero;
wire ex4_rem_nonzero_fi;
wire underflow_fi;
wire ex4_round_up;
wire ex4_round_up_underflow;
wire ex4_round_up_dnr;
wire ex3_norm_shl1;
wire ex3_norm_shl1_dp;
wire ex3_norm_shl1_sp;
wire ex4_norm_shl1;
wire ex4_norm_shl1_q;
wire ex4_norm_shl1_d;
wire ex4_norm_shl1_test;
wire [0:56] exx_PR_carry_d;
wire [0:56] exx_PR_carry_q;
wire [0:3] exx_PR_carry4_q;
wire [0:56] exx_PR_sum_shift;
wire [0:56] exx_PR_sum_final;
wire [0:56] exx_PR_carry_shift;
wire [0:56] exx_PR_carry_final;
wire [0:56] exx_PR_sum_q_shifted;
wire [0:56] exx_PR_carry_q_shifted;
wire [0:56] exx_Qin_lev0;
wire [0:56] exx_QMin_lev0;
wire [0:56] exx_Qin_lev1_sqrt;
wire [0:56] exx_QMin_lev1_sqrt;
wire [0:56] exx_Qin_lev1_div;
wire [0:56] exx_QMin_lev1_div;
wire [0:56] exx_bQin_lev1_sqrt;
wire [0:56] exx_bQMin_lev1_sqrt;
wire [0:56] exx_bQ_q_t;
wire [0:56] exx_bQM_q_t;
wire exx_Qin_lev0_sel0;
wire exx_Qin_lev0_sel1;
wire exx_QMin_lev0_sel0;
wire exx_QMin_lev0_sel1;
wire exx_QMin_lev0_sel2;
wire exx_Qin_lev1_sel0_sqrt;
wire exx_Qin_lev1_sel1_sqrt;
wire exx_Qin_lev1_sel0_div;
wire exx_Qin_lev1_sel1_div;
wire exx_QMin_lev1_sel0_div;
wire exx_QMin_lev1_sel1_div;
wire exx_QMin_lev1_sel2_div;
wire exx_QMin_lev1_sel0_sqrt;
wire exx_QMin_lev1_sel1_sqrt;
wire exx_QMin_lev1_sel2_sqrt;
wire [0:3] exx_sum4;
wire [0:3] exx_sum4_lev1_div;
wire [0:3] exx_sum4_lev1_sqrt;
wire [0:3] exx_sum4_lev2;
wire [0:3] exx_sum4_lev3_div;
wire [0:3] exx_sum4_lev3_sqrt;
wire [0:55] exx_denom_d;
wire [0:55] exx_denom_q;
wire [0:55] exx_denomQ_lev0;
wire [0:55] exx_denomQ_lev22_div;
wire [0:55] exx_denomQ_lev22_sqrt;
wire [0:55] exx_denomQ_lev0_nD_b;
wire [0:55] exx_denomQ_lev0_D_b;
wire [0:55] exx_denomQ_lev0_Q_b;
wire [0:55] exx_denomQ_lev0_MQ_b;
wire [0:55] exx_sqrtlev0_Q;
wire [0:55] exx_sqrtlev0_MQ;
wire [0:55] exx_sqrt_newbitmask_din;
wire [0:55] exx_sqrt_newbitmask_q;
wire [0:55] exx_sqrt_Qbitmask_din;
wire [0:55] exx_sqrt_Qbitmask_q;
wire [0:55] exx_sqrt_QMbitmask_din;
wire [0:55] exx_sqrt_QMbitmask_q;
wire [0:55] exx_sqrt_Qmaskvec;
wire [0:55] exx_sqrt_QMmaskvec;
wire wQ;
wire wMQ;
wire [0:55] exx_sqrtlev22_Q;
wire [0:55] exx_sqrtlev22_MQ;
wire [0:55] exx_bQin_lev0;
wire [0:55] exx_bQMin_lev0;
wire [0:55] exx_bQin_lev0_t;
wire [0:55] exx_bQMin_lev0_t;
wire ex2_PR_sum_sel0;
wire ex2_PR_sum_sel1;
wire ex2_PR_sum_sel2;
wire ex2_PR_sum_sel3;
wire ex2_PR_sum_sel4;
wire ex2_PR_sum_sel_late;
wire ex2_PR_sum_sel_early;
wire ex2_PR_carry_sel0;
wire ex2_PR_carry_sel1;
wire ex2_PR_carry_sel2;
wire [00:56] ex4_divsqrt_fract_preround;
wire [00:56] ex4_divsqrt_fract_preround_prenorm;
wire [00:53] ex4_divsqrt_fract_p0;
wire [00:53] ex4_divsqrt_fract_p1;
wire [00:53] ex5_divsqrt_fract_p1;
wire ex5_round_up;
wire [00:53] ex4_divsqrt_fract_dnr_p0;
wire [00:53] ex4_divsqrt_fract_dnr_p1;
wire [00:53] ex4_divsqrt_fract_dnr_sp_p0;
wire [00:53] ex4_divsqrt_fract_dnr_sp_p1;
wire [00:53] ex4_divsqrt_fract_dnr_dp;
wire [00:53] ex4_divsqrt_fract_dnr_sp;
wire [00:53] ex4_divsqrt_fract_dnr_sp_prem;
wire denorm_res_shiftoff_exp;
wire denorm_res_shiftoff_din;
wire denorm_res_shiftoff_q;
wire ex4_denorm_res_shiftoff_zero;
wire [00:53] ex4_divsqrt_fract_p0_sp;
wire [00:53] ex4_divsqrt_fract_p1_sp;
wire [00:53] ex4_divsqrt_fract_p0_dp;
wire [00:53] ex4_divsqrt_fract_p1_dp;
wire [00:53] ex4_divsqrt_fract_rounded;
wire exx_divsqrt_sign_d;
wire [01:13] exx_divsqrt_exp_d;
wire [00:56] exx_divsqrt_fract_d;
wire [00:15] exx_divsqrt_flag_fpscr_d;
wire exx_divsqrt_sign_q;
wire [01:13] exx_divsqrt_exp_q;
wire [00:56] exx_divsqrt_fract_q;
wire [00:15] exx_divsqrt_flag_fpscr_q;
wire n_flush_d;
wire n_flush;
wire [01:13] f_fmt_ex2_b_expo_div;
wire [01:13] f_fmt_ex2_a_expo_div;
wire [0:7] ex1_cycles_d;
wire [0:7] ex2_cycles_q;
wire lsb;
wire guard;
wire round;
wire sticky;
wire sticky_w_underflow;
wire denorm_sticky;
wire denorm_sticky_q;
wire denorm_sticky_din;
wire sign;
wire denorm_sticky_sp;
wire denorm_sticky_sp_q;
wire denorm_sticky_sp_din;
wire lsb_dnr;
wire guard_dnr;
wire round_dnr;
wire sticky_dnr;
wire lsb_dnr_sp;
wire guard_dnr_sp;
wire round_dnr_sp;
wire sticky_dnr_sp;
wire ex4_round_up_dnr_sp;
wire RNEmode;
wire RTZmode;
wire RPImode;
wire RNImode;
wire ex4_sp_inexact_roundbits;
wire ex4_denorm_result_det;
wire exp_gt_cap;
wire ex4_sp_denorm_result_det;
wire ex4_exp_le_896;
wire [00:13] denorm_exp_addres;
wire [00:13] denorm_exp_addres_sp;
wire [00:05] denorm_count_din;
wire [00:05] denorm_count_q;
wire VE; // FPSCR VE,OE,UE,ZE,XE
wire OE;
wire UE;
wire ZE;
wire XE;
//==##########################################
//# pervasive
//==##########################################
tri_plat #(.WIDTH(1)) thold_reg_0(
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.flush(flush),
.din(thold_1),
.q(thold_0)
);
tri_plat #(.WIDTH(1)) sg_reg_0(
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.flush(flush),
.din(sg_1),
.q(sg_0)
);
tri_lcbor lcbor_0(
.clkoff_b(clkoff_b),
.thold(thold_0),
.sg(sg_0),
.act_dis(act_dis),
.force_t(force_t),
.thold_b(thold_0_b)
);
//==##########################################
assign ex0_act = (~ex0_act_b);
assign n_flush_d = (f_dcd_flush[0] & exx_instr_tid_q[0]) | (f_dcd_flush[1] & exx_instr_tid_q[1]);
assign exx_running_act_d = (ex0_anydivsqrt | exx_running_act_q) & (~(ex4_divsqrt_done | n_flush));
tri_rlmreg_p #(.INIT(0), .WIDTH(8), .NEEDS_SRESET(0)) act_lat(
.force_t(force_t),
.d_mode(tiup),
.delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.act(tiup),
.thold_b(thold_0_b),
.sg(sg_0),
.scout(act_so),
.scin(act_si),
//-----------------
.din({
exx_running_act_d,
tidn,
ex0_act,
ex1_act,
ex2_act,
ex3_act,
n_flush_d,
tidn
}),
//-----------------
.dout({
exx_running_act_q,
act_spare_unused[1],
ex1_act,
ex2_act,
ex3_act,
ex4_act,
n_flush,
act_spare_unused[3]
})
);
//==##########################################
assign zeros = {57{1'b0}};
assign ones = {28{1'b1}};
assign act_spare_unused[0] = tidn;
assign act_spare_unused[2] = tidn;
//----------------------------------------------------------------------
//----------------------------------------------------------------------
// Algorithm
//
//
// cyc xx ex1_divsqrt_done=1, final cycle that the fract path is functioning for the main fract bits
// cyc xx ex2_divsqrt_done=1, extra 2 rounding bits generated, initial normalize (possible SHL by 1)
// cyc xx ex3_divsqrt_done=1, round
// cyc xx ex4_divsqrt_done=1, renormalize after rounding, compute the final exponent (+expadj)
// cyc xx ex5_divsqrt_done=1, final result is on the bus, directly off of the latch
// cyc xx
//----------------------------------------------------------------------
//----------------------------------------------------------------------
//----------------------------------------------------------------------
assign ex0_div = f_dcd_ex0_div;
assign ex0_divs = f_dcd_ex0_divs;
assign ex0_sqrt = f_dcd_ex0_sqrt;
assign ex0_sqrts = f_dcd_ex0_sqrts;
assign ex0_record_v = f_dcd_ex0_record_v;
assign ex0_anydivsqrt = ex0_div | ex0_sqrt | ex0_divs | ex0_sqrts;
assign ex0_op_cyc_count_din[0:7] = (8'b00011110 & {8{ex0_div}}) | //0d30
(8'b00010000 & {8{ex0_divs}}) | //0d16
(8'b00011101 & {8{ex0_sqrt}}) | //0d29
(8'b00001111 & {8{ex0_sqrts}}); //0d15
tri_rlmreg_p #(.INIT(0), .WIDTH(15), .NEEDS_SRESET(0)) ex1_div_instr_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex1_div_instr_lat_scout),
.scin(ex1_div_instr_lat_scin),
//-----------------
.din({
ex0_div,
ex0_divs,
ex0_sqrt,
ex0_sqrts,
ex0_record_v,
ex0_op_cyc_count_din,
ex4_anydivsqrt,
ex5_anydivsqrt}),
//-----------------
.dout({
ex1_div_dout,
ex1_divs_dout,
ex1_sqrt_dout,
ex1_sqrts_dout,
ex1_record_v,
ex1_op_cyc_count,
ex5_anydivsqrt,
ex6_anydivsqrt})
);
assign ex1_instr_v = |(f_dcd_ex1_instr_tid[0:3]); //or_reduce(f_dcd_ex1_instr_tid[0:3]);
assign ex1_div = ex1_div_dout & ex1_instr_v;
assign ex1_divs = ex1_divs_dout & ex1_instr_v;
assign ex1_sqrt = ex1_sqrt_dout & ex1_instr_v;
assign ex1_sqrts = ex1_sqrts_dout & ex1_instr_v;
assign ex1_anydivsqrt = ex1_div | ex1_sqrt | ex1_divs | ex1_sqrts;
tri_rlmreg_p #(.INIT(0), .WIDTH(6), .NEEDS_SRESET(0)) ex2_div_instr_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex2_div_instr_lat_scout),
.scin(ex2_div_instr_lat_scin),
//-----------------
.din({
ex1_div,
ex1_divs,
ex1_sqrt,
ex1_sqrts,
ex1_record_v,
ex1_anydivsqrt}),
//-----------------
.dout({
ex2_div,
ex2_divs,
ex2_sqrt,
ex2_sqrts,
ex2_record_v,
ex2_anydivsqrt})
);
assign ex2_div_or_divs = ex2_div | ex2_divs;
assign ex2_sqrt_or_sqrts = ex2_sqrt | ex2_sqrts;
assign ex2_sp = ex2_divs | ex2_sqrts;
//----------------------------------------------------------------------
assign ex1_itag_din = (f_dcd_ex1_itag & {7{ex1_anydivsqrt}}) | (exx_itag_q & {7{(~ex1_anydivsqrt)}});
tri_rlmreg_p #(.INIT(0), .WIDTH(9), .NEEDS_SRESET(0)) ex2_div_itag_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex2_itag_lat_scout),
.scin(ex2_itag_lat_scin),
//-----------------
.din({
ex1_itag_din,
ex2_anydivsqrt,
ex3_anydivsqrt}),
//-----------------
.dout({
exx_itag_q,
ex3_anydivsqrt,
ex4_anydivsqrt})
);
assign ex1_fpscr_addr_din = (f_dcd_ex1_fpscr_addr & {6{ex1_anydivsqrt}}) |
(exx_fpscr_addr_q & {6{(~ex1_anydivsqrt)}});
assign exx_fpscr_din = (({f_scr_ex6_fpscr_ee_thr0, f_scr_ex6_fpscr_rm_thr0}) & {7{(ex6_anydivsqrt & exx_instr_tid_q[0])}}) |
(({f_scr_ex6_fpscr_ee_thr1, f_scr_ex6_fpscr_rm_thr1}) & {7{(ex6_anydivsqrt & exx_instr_tid_q[1])}}) |
((exx_fpscr_q) & {7{(~ex6_anydivsqrt)}});
assign ex1_instr_frt_din = (f_dcd_ex1_instr_frt & {6{ex1_anydivsqrt}}) |
(exx_instr_frt_q & {6{(~ex1_anydivsqrt)}});
assign tid_init = {2{(ex1_anydivsqrt)}} & (~f_dcd_flush[0:1]); // new one can be starting in ex1 while ex6 finishing
assign tid_hold = {2{((~ex1_anydivsqrt) & (~ex6_divsqrt_done))}} & (~f_dcd_flush[0:1]);
assign tid_clear = ({2{(~ex1_anydivsqrt)}} & {2{ex6_divsqrt_done}}) | f_dcd_flush[0:1];
assign ex1_instr_tid_din[0:1] = (f_dcd_ex1_instr_tid[0:1] & tid_init) | (exx_instr_tid_q[0:1] & tid_hold) | (2'b00 & tid_clear);
assign ex1_instr_tid_din[2:3] = 2'b00;
assign ex1_cr_bf_din = (f_dcd_ex1_divsqrt_cr_bf & {5{ex1_anydivsqrt}}) |
(exx_cr_bf_q & {5{(~ex1_anydivsqrt)}});
tri_rlmreg_p #(.INIT(0), .WIDTH(28), .NEEDS_SRESET(1)) ex2_div_fpscr_addr_cr_bf_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex2_fpscr_addr_lat_scout),
.scin(ex2_fpscr_addr_lat_scin),
//-----------------
.din({ ex1_fpscr_addr_din,
ex1_cr_bf_din,
ex1_instr_frt_din,
ex1_instr_tid_din,
exx_fpscr_din}),
//-----------------
.dout({ exx_fpscr_addr_q,
exx_cr_bf_q,
exx_instr_frt_q,
exx_instr_tid_q,
exx_fpscr_q})
);
//----------------------------------------------------------------------
assign f_fmt_ex2_a_expo_div = (~f_fmt_ex2_a_expo_div_b);
assign f_fmt_ex2_b_expo_div = (~f_fmt_ex2_b_expo_div_b);
assign exx_a_sign_d = (f_fmt_ex2_a_sign_div & ex2_anydivsqrt) | (exx_a_sign_q & (~ex2_anydivsqrt));
assign exx_a_biased_13exp_d = (f_fmt_ex2_a_expo_div & {13{ex2_anydivsqrt}}) |
(exx_a_biased_13exp_q & {13{(~ex2_anydivsqrt)}});
assign exx_a_fract_d = (f_fmt_ex2_a_frac_div & {52{ex2_anydivsqrt}}) |
(exx_a_fract_q & {52{(~ex2_anydivsqrt)}});
assign ex2_a_zero = f_fmt_ex2_a_zero;
assign ex2_a_SPunderflow_zero = (f_fmt_ex2_a_zero_dsq & ex2_sp) & (~f_fmt_ex2_a_zero);
assign ex2_a_expo_max = f_fmt_ex2_a_expo_max;
assign ex2_a_SPoverflow_expo_max = (f_fmt_ex2_a_expo_max_dsq & ex2_sp) & (~ex2_a_expo_max);
assign ex2_a_frac_zero = f_fmt_ex2_a_frac_zero;
assign exx_a_zero_d = (ex2_a_zero & ex2_anydivsqrt) | (exx_a_zero_q & (~ex2_anydivsqrt));
assign exx_a_SPunderflow_zero_d = (ex2_a_SPunderflow_zero & ex2_anydivsqrt) | (exx_a_SPunderflow_zero_q & (~ex2_anydivsqrt));
assign exx_a_expo_max_d = (ex2_a_expo_max & ex2_anydivsqrt) | (exx_a_expo_max_q & (~ex2_anydivsqrt));
assign exx_a_SPoverflow_expo_max_d = (ex2_a_SPoverflow_expo_max & ex2_anydivsqrt) | (exx_a_SPoverflow_expo_max_q & (~ex2_anydivsqrt));
assign exx_a_frac_zero_d = (ex2_a_frac_zero & ex2_anydivsqrt) | (exx_a_frac_zero_q & (~ex2_anydivsqrt));
assign VE = exx_fpscr_q[0];
assign OE = exx_fpscr_q[1];
assign UE = exx_fpscr_q[2];
assign ZE = exx_fpscr_q[3];
assign XE = exx_fpscr_q[4];
assign spare_unused[0] = XE;
//---------------------------------------------------------------------
tri_rlmreg_p #(.INIT(0), .WIDTH(71), .NEEDS_SRESET(0)) ex2_div_a_stage_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex2_div_a_stage_lat_scout),
.scin(ex2_div_a_stage_lat_scin),
//-----------------
.din({
exx_a_sign_d,
exx_a_biased_13exp_d,
exx_a_fract_d,
exx_a_zero_d,
exx_a_expo_max_d,
exx_a_frac_zero_d,
exx_a_SPunderflow_zero_d,
exx_a_SPoverflow_expo_max_d}),
//-----------------
.dout({
exx_a_sign_q,
exx_a_biased_13exp_q,
exx_a_fract_q,
exx_a_zero_q,
exx_a_expo_max_q,
exx_a_frac_zero_q,
exx_a_SPunderflow_zero_q,
exx_a_SPoverflow_expo_max_q})
);
assign ex2_a_fract = f_fmt_ex2_a_frac_div[1:52];
assign exx_b_sign_d = (f_fmt_ex2_b_sign_div & ex2_anydivsqrt) | (exx_b_sign_q & (~ex2_anydivsqrt));
assign exx_b_biased_13exp_d = (f_fmt_ex2_b_expo_div & {13{ex2_anydivsqrt}}) |
(exx_b_biased_13exp_q & {13{(~ex2_anydivsqrt)}});
assign exx_b_fract_d = (f_fmt_ex2_b_frac_div & {52{ex2_anydivsqrt}}) |
(exx_b_fract_q & {52{(~ex2_anydivsqrt)}});
assign ex2_b_zero = f_fmt_ex2_b_zero;
assign ex2_b_SPunderflow_zero = (f_fmt_ex2_b_zero_dsq & ex2_sp) & (~f_fmt_ex2_b_zero);
assign ex2_b_expo_max = f_fmt_ex2_b_expo_max;
assign ex2_b_SPoverflow_expo_max = (f_fmt_ex2_b_expo_max_dsq & ex2_sp) & (~ex2_b_expo_max);
assign ex2_b_frac_zero = f_fmt_ex2_b_frac_zero;
assign exx_b_zero_d = (ex2_b_zero & ex2_anydivsqrt) | (exx_b_zero_q & (~ex2_anydivsqrt));
assign exx_b_SPunderflow_zero_d = (ex2_b_SPunderflow_zero & ex2_anydivsqrt) | (exx_b_SPunderflow_zero_q & (~ex2_anydivsqrt));
assign exx_b_expo_max_d = (ex2_b_expo_max & ex2_anydivsqrt) | (exx_b_expo_max_q & (~ex2_anydivsqrt));
assign exx_b_SPoverflow_expo_max_d = (ex2_b_SPoverflow_expo_max & ex2_anydivsqrt) | (exx_b_SPoverflow_expo_max_q & (~ex2_anydivsqrt));
assign exx_b_frac_zero_d = (ex2_b_frac_zero & ex2_anydivsqrt) | (exx_b_frac_zero_q & (~ex2_anydivsqrt));
//---------------------------------------------------------------------
tri_rlmreg_p #(.INIT(0), .WIDTH(71), .NEEDS_SRESET(0)) ex2_div_b_stage_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex2_div_b_stage_lat_scout),
.scin(ex2_div_b_stage_lat_scin),
//-----------------
.din({
exx_b_sign_d,
exx_b_biased_13exp_d,
exx_b_fract_d,
exx_b_zero_d,
exx_b_expo_max_d,
exx_b_frac_zero_d,
exx_b_SPunderflow_zero_d,
exx_b_SPoverflow_expo_max_d}),
//-----------------
.dout({ exx_b_sign_q,
exx_b_biased_13exp_q,
exx_b_fract_q,
exx_b_zero_q,
exx_b_expo_max_q,
exx_b_frac_zero_q,
exx_b_SPunderflow_zero_q,
exx_b_SPoverflow_expo_max_q})
);
assign ex2_b_fract = (f_fmt_ex2_b_frac_div[1:52]);
//------------------------------------------------------------------------------
// unbias the exponents
//------------------------------------------------------------------------------
// bias is DP, so subtract 1023
assign exx_b_ubexp = exx_b_biased_13exp_q[1:13] + 13'b1110000000001;
tri_rlmreg_p #(.INIT(0), .WIDTH(52), .NEEDS_SRESET(0)) ex2_div_exp_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex2_div_exp_lat_scout),
.scin(ex2_div_exp_lat_scin),
//-----------------
.din({ exx_b_ubexp,
exx_exp_addres_div_x0,
exx_exp_addres_sqrt_x0,
exy_exp_addres_div_x0_m1
}),
//-----------------
.dout({ exy_b_ubexp,
exy_exp_addres_div_x0,
exy_exp_addres_sqrt_x0,
exz_exp_addres_div_x0_m1
})
);
//------------------------------------------------------------------------------
// counter/state machine
assign ex2_divsqrt_hole_v_b = (~f_dcd_ex2_divsqrt_hole_v);
assign ex1_cycles_init = (ex1_div | ex1_divs | ex1_sqrt | ex1_sqrts) & (~n_flush);
assign ex1_cycles_hold = (ex2_divsqrt_zero | (ex2_divsqrt_done & ex2_divsqrt_hole_v_b)) & (~ex1_cycles_init) & (~n_flush);
assign ex1_cycles_decr = exx_divsqrt_running_q & (~ex1_cycles_hold) & (~ex1_cycles_init) & (~n_flush);
assign ex1_cycles_clear = n_flush;
//
assign ex1_cycles_d = (ex1_op_cyc_count & {8{ex1_cycles_init}}) |
(ex2_cycles_q & {8{ex1_cycles_hold}}) |
(8'b00000000 & {8{ex1_cycles_clear}}) |
((ex2_cycles_q - 8'b00000001) & {8{ex1_cycles_decr}});
assign ex2_divsqrt_zero = (ex2_cycles_q == 8'b00000000) ? 1'b1 :
1'b0;
assign ex1_divsqrt_done = (ex2_cycles_q == 8'b00000010) ? 1'b1 :
1'b0;
assign ex2_divsqrt_done = (ex2_cycles_q == 8'b00000001) ? 1'b1 :
1'b0;
assign ex2_divsqrt_done_din = ex2_divsqrt_done & (~ex2_divsqrt_hole_v_b) & (~n_flush);
assign ex2_waiting_for_hole = (ex2_divsqrt_done & ex2_divsqrt_hole_v_b) & (~ex1_cycles_init) & (~n_flush);
assign ex2_hangcounter_incr = ex2_waiting_for_hole & (~ex3_hangcounter_trigger);
assign ex2_hangcounter_clear = (ex2_divsqrt_done & (~ex2_divsqrt_hole_v_b)) | ex1_cycles_init | ex3_hangcounter_trigger | n_flush;
assign ex3_hangcounter_trigger = (ex3_hangcounter_q == 8'b00100000) ? 1'b1 :
1'b0;
assign f_dsq_ex3_hangcounter_trigger = ex3_hangcounter_trigger;
assign ex2_hangcounter_din = (8'b00000000 & {8{ex2_hangcounter_clear}}) |
((ex3_hangcounter_q + 8'b00000001) & {8{ex2_hangcounter_incr}});
assign ex1_divsqrt_running_d = ((ex1_div | ex1_divs | ex1_sqrt | ex1_sqrts) | exx_divsqrt_running_q) & (~(ex2_divsqrt_done_din | n_flush));
assign exx_single_precision_d = ((ex1_divs | ex1_sqrts) | (exx_single_precision_q & (~ex1_anydivsqrt))) & (~(n_flush));
assign exx_record_v_din = ((ex1_record_v & ex1_anydivsqrt) | (exx_record_v_q & (~ex1_anydivsqrt))) & (~(n_flush));
assign ex4_sp = exx_single_precision_q;
assign ex4_dp = (~exx_single_precision_q);
assign exx_sp = exx_single_precision_q;
assign exx_dp = (~exx_single_precision_q);
assign exx_sqrt_d = ((ex1_sqrt | ex1_sqrts) | (exx_sqrt_q[0] & (~ex1_anydivsqrt))) & (~(n_flush));
assign exx_div_d = ((ex1_div | ex1_divs) | (exx_div_q[0] & (~ex1_anydivsqrt))) & (~(n_flush));
tri_rlmreg_p #(.INIT(0), .WIDTH(8), .NEEDS_SRESET(1)) ex3_div_hangcounter_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex3_div_hangcounter_lat_scout),
.scin(ex3_div_hangcounter_lat_scin),
//-----------------
.din({ex2_hangcounter_din}),
//-----------------
.dout({ex3_hangcounter_q})
);
tri_rlmreg_p #(.INIT(0), .WIDTH(19), .NEEDS_SRESET(1)) ex1_div_ctr_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex1_div_ctr_lat_scout),
.scin(ex1_div_ctr_lat_scin),
//-----------------
.din({ ex1_cycles_d,
ex1_divsqrt_running_d,
exx_single_precision_d,
exx_sqrt_d,
exx_sqrt_d,
exx_sqrt_d,
exx_sqrt_d,
exx_div_d,
exx_div_d,
exx_div_d,
exx_div_d,
exx_record_v_din}),
//-----------------
.dout({ ex2_cycles_q,
exx_divsqrt_running_q,
exx_single_precision_q,
exx_sqrt_q[0:3],
exx_div_q[0:3],
exx_record_v_q})
);
//------------------------------------------------------------------------------
// fraction path
//------------------------------------------------------------------------------
//-------------------------------------------------------------------
// Initial 4-bit add and quotient select
//-------------------------------------------------------------------
assign exx_denom_d = (exx_denom_q & {56{(exx_divsqrt_running_q & (~ex2_anydivsqrt))}}) |
({{({1'b1, ex2_b_fract, 3'b000})}} & {56{(ex2_anydivsqrt)}});
//------------------------------------------------------------------------------------------------------------------------------------------------
assign exx_PR_sum_shift = exx_PR_sum_final;
assign ex2_PR_sum_sel0 = ex2_div_or_divs; // initialize div
assign ex2_PR_sum_sel1 = ex2_sqrt_or_sqrts & (~f_fmt_ex2_b_expo_div_b[13]); // initialize sqrt, even exponent
assign ex2_PR_sum_sel2 = ex2_sqrt_or_sqrts & f_fmt_ex2_b_expo_div_b[13]; // initialize sqrt, odd exponent
assign ex2_PR_sum_sel3 = (~ex2_anydivsqrt) & (~(ex2_divsqrt_done & ex2_divsqrt_hole_v_b));
assign ex2_PR_sum_sel4 = ex2_divsqrt_done & ex2_divsqrt_hole_v_b;
assign ex2_PR_sum_sel_late = ex2_PR_sum_sel3;
assign ex2_PR_sum_sel_early = ex2_PR_sum_sel0 | ex2_PR_sum_sel1 | ex2_PR_sum_sel2 | ex2_PR_sum_sel4;
// div
// sqrt even exponent
assign exx_PR_sum_d_early = (({4'b0001, ex2_a_fract[1:52], 1'b0}) & {57{ex2_PR_sum_sel0}}) |
(({4'b0001, ex2_b_fract[1:52], 1'b0}) & {57{ex2_PR_sum_sel1}}) |
(({3'b001, ex2_b_fract[1:52], 2'b00}) & {57{ex2_PR_sum_sel2}}) |
(exx_PR_sum_q & {57{ex2_PR_sum_sel4}}); // sqrt odd exponent
// hold
assign exx_PR_sum_d_late = exx_PR_sum_shift;
assign exx_PR_sum_d = (exx_PR_sum_d_late & {57{ex2_PR_sum_sel_late}}) |
(exx_PR_sum_d_early & {57{ex2_PR_sum_sel_early}});
assign exx_PR_carry_shift = exx_PR_carry_final;
assign ex2_PR_carry_sel0 = ex2_anydivsqrt;
assign ex2_PR_carry_sel1 = (~ex2_anydivsqrt) & (~(ex2_divsqrt_done & ex2_divsqrt_hole_v_b));
assign ex2_PR_carry_sel2 = ex2_divsqrt_done & ex2_divsqrt_hole_v_b; // hold
assign exx_PR_carry_d = ({57{1'b0}} & {57{ex2_PR_carry_sel0}}) |
(exx_PR_carry_shift & {57{ex2_PR_carry_sel1}}) |
(exx_PR_carry_q & {57{ex2_PR_carry_sel2}});
tri_rlmreg_p #(.INIT(0), .WIDTH(114), .NEEDS_SRESET(0)) ex3_div_PR_sumcarry_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex3_div_PR_sumcarry_lat_scout),
.scin(ex3_div_PR_sumcarry_lat_scin),
//-----------------
.din({exx_PR_sum_d, exx_PR_carry_d}),
//-----------------
.dout({exx_PR_sum_q, exx_PR_carry_q})
);
tri_rlmreg_p #(.INIT(0), .WIDTH(8), .NEEDS_SRESET(0)) ex3_div_PR_sum4carry4_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex3_div_PR_sum4carry4_lat_scout),
.scin(ex3_div_PR_sum4carry4_lat_scin),
//-----------------
.din({exx_PR_sum_d[0:3],exx_PR_carry_d[0:3]}),
//-----------------
.dout({exx_PR_sum4_q, exx_PR_carry4_q })
);
tri_rlmreg_p #(.INIT(0), .WIDTH(114), .NEEDS_SRESET(0)) ex3_div_Q_QM_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex3_div_Q_QM_lat_scout),
.scin(ex3_div_Q_QM_lat_scin),
//-----------------
.din({exx_Q_d, exx_QM_d }),
//-----------------
.dout({exx_Q_q, exx_QM_q})
);
tri_rlmreg_p #(.INIT(0), .WIDTH(114), .NEEDS_SRESET(0)) ex3_div_bQ_QM_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex3_div_bQ_QM_lat_scout),
.scin(ex3_div_bQ_QM_lat_scin),
//-----------------
.din({exx_bQ_d, exx_bQM_d }),
//-----------------
.dout({exx_bQ_q,exx_bQM_q })
);
tri_rlmreg_p #(.INIT(0), .WIDTH(168), .NEEDS_SRESET(0)) ex3_sqrt_bitmask_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex3_sqrt_bitmask_lat_scout),
.scin(ex3_sqrt_bitmask_lat_scin),
//-----------------
.din({exx_sqrt_newbitmask_din,
exx_sqrt_Qbitmask_din,
exx_sqrt_QMbitmask_din }),
//-----------------
.dout({exx_sqrt_newbitmask_q,
exx_sqrt_Qbitmask_q,
exx_sqrt_QMbitmask_q })
);
tri_rlmreg_p #(.INIT(0), .WIDTH(56), .NEEDS_SRESET(0)) ex3_div_denom_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex3_denom_lat_scout),
.scin(ex3_denom_lat_scin),
//-----------------
.din(exx_denom_d),
//-----------------
.dout(exx_denom_q)
);
//----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
fu_divsqrt_add4 lev0_add4(
.x(exx_PR_sum4_q[0:3]),
.y(exx_PR_carry4_q[0:3]),
//------------------------------------------------------
.s(exx_sum4)
);
assign exx_q_bit0_cin = exx_PR_sum_q[5] | exx_PR_carry_q[5];
fu_divsqrt_q_table lev0_div_q_table(
.x(exx_sum4[0:3]),
.cin(exx_q_bit0_cin),
//------------------------------------------------------
.q(exx_q_bit0_prebuf)
);
fu_divsqrt_nq_table lev0_div_nq_table(
.x(exx_sum4[0:3]),
//------------------------------------------------------
.nq(exx_nq_bit0_prebuf)
);
assign exx_q_bit0_b = (~exx_q_bit0_prebuf);
assign exx_nq_bit0_b = (~exx_nq_bit0_prebuf);
assign exx_q_bit0 = (~exx_q_bit0_b);
assign exx_nq_bit0 = (~exx_nq_bit0_b);
//----------------------------------------------------------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------
// on-the-fly quotient digit conversion logic for level 0
//-------------------------------------------------------------------
// Qin=(Q & q) if q >= 0. Qin=(QM & 1) if q < 0
assign exx_Qin_lev0_sel0 = exx_q_bit0 | ((~exx_nq_bit0));
assign exx_Qin_lev0_sel1 = exx_nq_bit0;
assign exx_Qin_lev0[0:56] = (({exx_Q_q[1:56], exx_q_bit0}) & {57{exx_Qin_lev0_sel0}}) |
(({exx_QM_q[1:56], 1'b1}) & {57{exx_Qin_lev0_sel1}});
// QMin=(Q & 0) if q > 0. QMin=(QM & 0) if q < 0. QMin=(QM & 1) if q = 0
assign exx_QMin_lev0_sel0 = exx_q_bit0;
assign exx_QMin_lev0_sel1 = exx_nq_bit0;
assign exx_QMin_lev0_sel2 = (~(exx_nq_bit0 | exx_q_bit0));
assign exx_QMin_lev0[0:56] = (({exx_Q_q[1:56], 1'b0}) & {57{exx_QMin_lev0_sel0}}) |
(({exx_QM_q[1:56], 1'b0}) & {57{exx_QMin_lev0_sel1}}) |
(({exx_QM_q[1:56], 1'b1}) & {57{exx_QMin_lev0_sel2}});
// massage Q and QM for use with square root
// sel_denom_pre1 = ~(((Q << 2) | 1) << 29-i);
// sel_denom_pre3 = (((QM << 2) | 3) << 29-i);
assign exx_sqrtlev0_Q[0:55] = exx_bQ_q_t[0:55];
assign exx_sqrtlev0_MQ[0:55] = exx_bQM_q_t[0:55];
//-------------------------------------------------------------------
// Initial Denominator mux and 3:2 CSA
//-------------------------------------------------------------------
assign exx_PR_sum_q_shifted = {exx_PR_sum_q[1:56], 1'b0};
assign exx_PR_carry_q_shifted = {exx_PR_carry_q[1:56], 1'b0};
assign exx_lev0_selneg = exx_q_bit0 & (~exx_nq_bit0);
assign exx_lev0_selD_b = (~(exx_nq_bit0 & exx_div_q[0]));
assign exx_lev0_selnD_b = (~(exx_q_bit0 & exx_div_q[0]));
assign exx_lev0_selD = (~exx_lev0_selD_b);
assign exx_lev0_selnD = (~exx_lev0_selnD_b);
assign exx_lev0_selQ_b = (~(exx_q_bit0 & exx_sqrt_q[0]));
assign exx_lev0_selMQ_b = (~(exx_nq_bit0 & exx_sqrt_q[0]));
assign exx_lev0_selQ = (~exx_lev0_selQ_b);
assign exx_lev0_selMQ = (~exx_lev0_selMQ_b);
assign exx_denomQ_lev0_nD_b = (~((~exx_denom_q) & {56{exx_lev0_selnD}}));
assign exx_denomQ_lev0_D_b = (~(exx_denom_q & {56{exx_lev0_selD}}));
assign exx_denomQ_lev0_Q_b = (~((~exx_sqrtlev0_Q) & {56{exx_lev0_selQ}}));
assign exx_denomQ_lev0_MQ_b = (~(exx_sqrtlev0_MQ & {56{exx_lev0_selMQ}}));
assign exx_denomQ_lev0 = (~(exx_denomQ_lev0_nD_b & exx_denomQ_lev0_D_b & exx_denomQ_lev0_Q_b & exx_denomQ_lev0_MQ_b));
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev0_div_csaout_sum(exx_lev0_csaoutsh_sum,
{exx_lev0_selneg, exx_denomQ_lev0},
exx_lev0_divsqrt_csaout_xor);
assign exx_lev0_csaout_carryout = (({exx_lev0_selneg, exx_denomQ_lev0}) & exx_PR_sum_q_shifted) |
(({exx_lev0_selneg, exx_denomQ_lev0}) & exx_PR_carry_q_shifted) |
(exx_PR_sum_q_shifted & exx_PR_carry_q_shifted);
assign exx_lev0_csaoutsh_carry[0:56] = {exx_lev0_csaout_carryout[1:56], exx_lev0_selneg};
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev0_div_csaout_xor(exx_lev0_divsqrt_csaout_xor,
exx_PR_sum_q_shifted,
exx_PR_carry_q_shifted);
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev1_div_csaout_xor(exx_lev1_divsqrt_csaout_xor,
exx_PR_sum_q_shifted,
exx_PR_carry_q_shifted);
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev3_div_csaout_xor(exx_lev3_divsqrt_csaout_xor,
exx_PR_sum_q_shifted,
exx_PR_carry_q_shifted);
//-------------------------------------------------------------------
// Pick -d, 0, +d
//-------------------------------------------------------------------
// lev1: neg d, +q ========================================================
assign exx_lev1_div_oper = ({1'b1, (~exx_denom_q)});
assign exx_lev1_sqrt_oper = ({1'b1, (~exx_sqrtlev0_Q)});
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev1_div_csaout_sum(exx_lev1_div_csaout_sum,
exx_lev1_div_oper,
exx_lev1_divsqrt_csaout_xor);
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev1_sqrt_csaout_sum(exx_lev1_sqrt_csaout_sum,
exx_lev1_sqrt_oper,
exx_lev1_divsqrt_csaout_xor);
assign exx_lev1_div_csaout_carryout = (exx_lev1_div_oper & exx_PR_sum_q_shifted) | (exx_lev1_div_oper & exx_PR_carry_q_shifted) | (exx_PR_sum_q_shifted & exx_PR_carry_q_shifted);
assign exx_lev1_sqrt_csaout_carryout = (exx_lev1_sqrt_oper & exx_PR_sum_q_shifted) | (exx_lev1_sqrt_oper & exx_PR_carry_q_shifted) | (exx_PR_sum_q_shifted & exx_PR_carry_q_shifted);
assign exx_lev1_div_csaout_carry[0:56] = {exx_lev1_div_csaout_carryout[1:56], 1'b1};
assign exx_lev1_sqrt_csaout_carry[0:56] = {exx_lev1_sqrt_csaout_carryout[1:56], 1'b1};
fu_divsqrt_add4 lev1_div_add4(
.x(exx_lev1_div_csaout_sum[0:3]),
.y(exx_lev1_div_csaout_carry[0:3]),
//------------------------------------------------------
.s(exx_sum4_lev1_div)
);
fu_divsqrt_add4 lev1_sqrt_add4(
.x(exx_lev1_sqrt_csaout_sum[0:3]),
.y(exx_lev1_sqrt_csaout_carry[0:3]),
//------------------------------------------------------
.s(exx_sum4_lev1_sqrt)
);
assign exx_q_bit1_cin_div = exx_lev1_div_csaout_sum[5] | exx_lev1_div_csaout_carry[5];
assign exx_q_bit1_cin_sqrt = exx_lev1_sqrt_csaout_sum[5] | exx_lev1_sqrt_csaout_carry[5];
fu_divsqrt_q_table lev1_div_q_table(
.x(exx_sum4_lev1_div[0:3]),
.cin(exx_q_bit1_cin_div),
//------------------------------------------------------
.q(exx_q_bit1_div)
);
fu_divsqrt_q_table lev1_sqrt_q_table(
.x(exx_sum4_lev1_sqrt[0:3]),
.cin(exx_q_bit1_cin_sqrt ),
//------------------------------------------------------
.q(exx_q_bit1_sqrt)
);
fu_divsqrt_nq_table lev1_div_nq_table(
.x(exx_sum4_lev1_div[0:3]),
//------------------------------------------------------
.nq(exx_nq_bit1_div )
);
fu_divsqrt_nq_table lev1_sqrt_nq_table(
.x(exx_sum4_lev1_sqrt[0:3]),
//------------------------------------------------------
.nq(exx_nq_bit1_sqrt )
);
assign exx_notqornq_bit1_sqrt = ((exx_sum4_lev1_sqrt == 4'b0000) & (~exx_q_bit1_cin_sqrt)) |
(exx_sum4_lev1_sqrt == 4'b1111) ;
assign exx_notqornq_bit1_div = ((exx_sum4_lev1_div == 4'b0000) & (~exx_q_bit1_cin_div)) |
(exx_sum4_lev1_div == 4'b1111) ;
assign exx_q_bit1 = (exx_q_bit1_div & exx_div_q[1]) | (exx_q_bit1_sqrt & exx_sqrt_q[1]);
assign exx_nq_bit1 = (exx_nq_bit1_div & exx_div_q[1]) | (exx_nq_bit1_sqrt & exx_sqrt_q[1]);
// zero: lev2 ===========================================================
assign exx_lev2_csaout_sum = exx_PR_sum_q_shifted ^ exx_PR_carry_q_shifted;
assign exx_lev2_csaout_carryout = (exx_PR_sum_q_shifted & exx_PR_carry_q_shifted);
assign exx_lev2_csaout_carry[0:56] = {exx_lev2_csaout_carryout[1:56], 1'b0};
fu_divsqrt_add4 lev2_add4(
.x(exx_lev2_csaout_sum[0:3]),
.y(exx_lev2_csaout_carry[0:3]),
//------------------------------------------------------
.s(exx_sum4_lev2)
);
assign exx_q_bit2_cin = exx_lev2_csaout_sum[5] | exx_lev2_csaout_carry[5];
fu_divsqrt_q_table lev2_div_q_table(
.x(exx_sum4_lev2[0:3]),
.cin(exx_q_bit2_cin),
//------------------------------------------------------
.q(exx_q_bit2)
);
fu_divsqrt_nq_table lev2_nq_table(
.x(exx_sum4_lev2[0:3]),
//------------------------------------------------------
.nq(exx_nq_bit2 )
);
assign exx_notqornq_bit2 = ((exx_sum4_lev2 == 4'b0000) & (~exx_q_bit2_cin)) |
(exx_sum4_lev2 == 4'b1111) ;
// pos d, -q: lev3 =======================================================
assign exx_lev3_div_oper = ({1'b0, exx_denom_q});
assign exx_lev3_sqrt_oper = ({1'b0, exx_sqrtlev0_MQ});
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev3_div_csaout_sum(exx_lev3_div_csaout_sum,
exx_lev3_div_oper,
exx_lev3_divsqrt_csaout_xor);
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev3_sqrt_csaout_sum(exx_lev3_sqrt_csaout_sum,
exx_lev3_sqrt_oper,
exx_lev3_divsqrt_csaout_xor);
assign exx_lev3_div_csaout_carryout = (exx_lev3_div_oper & exx_PR_sum_q_shifted) | (exx_lev3_div_oper & exx_PR_carry_q_shifted) | (exx_PR_sum_q_shifted & exx_PR_carry_q_shifted);
assign exx_lev3_sqrt_csaout_carryout = (exx_lev3_sqrt_oper & exx_PR_sum_q_shifted) | (exx_lev3_sqrt_oper & exx_PR_carry_q_shifted) | (exx_PR_sum_q_shifted & exx_PR_carry_q_shifted);
assign exx_lev3_div_csaout_carry[0:56] = {exx_lev3_div_csaout_carryout[1:56], 1'b0};
assign exx_lev3_sqrt_csaout_carry[0:56] = {exx_lev3_sqrt_csaout_carryout[1:56], 1'b0};
fu_divsqrt_add4 lev3_div_add4(
.x(exx_lev3_div_csaout_sum[0:3]),
.y(exx_lev3_div_csaout_carry[0:3]),
//------------------------------------------------------
.s(exx_sum4_lev3_div)
);
fu_divsqrt_add4 lev3_sqrt_add4(
.x(exx_lev3_sqrt_csaout_sum[0:3]),
.y(exx_lev3_sqrt_csaout_carry[0:3]),
//------------------------------------------------------
.s(exx_sum4_lev3_sqrt)
);
assign exx_q_bit3_cin_div = exx_lev3_div_csaout_sum[5] | exx_lev3_div_csaout_carry[5];
assign exx_q_bit3_cin_sqrt = exx_lev3_sqrt_csaout_sum[5] | exx_lev3_sqrt_csaout_carry[5];
fu_divsqrt_q_table lev3_div_q_table(
.x(exx_sum4_lev3_div[0:3]),
.cin(exx_q_bit3_cin_div),
//------------------------------------------------------
.q(exx_q_bit3_div)
);
fu_divsqrt_q_table lev3_sqrt_q_table(
.x(exx_sum4_lev3_sqrt[0:3]),
.cin(exx_q_bit3_cin_sqrt),
//------------------------------------------------------
.q(exx_q_bit3_sqrt)
);
fu_divsqrt_nq_table lev3_div_nq_table(
.x(exx_sum4_lev3_div[0:3]),
//------------------------------------------------------
.nq(exx_nq_bit3_div )
);
fu_divsqrt_nq_table lev3_sqrt_nq_table(
.x(exx_sum4_lev3_sqrt[0:3]),
//------------------------------------------------------
.nq(exx_nq_bit3_sqrt )
);
assign exx_notqornq_bit3_sqrt = ((exx_sum4_lev3_sqrt == 4'b0000) & (~exx_q_bit3_cin_sqrt)) |
(exx_sum4_lev3_sqrt == 4'b1111) ;
assign exx_notqornq_bit3_div = ((exx_sum4_lev3_div == 4'b0000) & (~exx_q_bit3_cin_div)) |
(exx_sum4_lev3_div == 4'b1111) ;
assign exx_q_bit3 = (exx_q_bit3_div & exx_div_q[2]) | (exx_q_bit3_sqrt & exx_sqrt_q[2]);
assign exx_nq_bit3 = (exx_nq_bit3_div & exx_div_q[2]) | (exx_nq_bit3_sqrt & exx_sqrt_q[2]);
//-------------------------------------------------------------------
// Mux between these three to get the next quotient bit
//-------------------------------------------------------------------
assign exx_q_bit22_sel = {exx_q_bit0, exx_nq_bit0};
assign exx_q_bit22_sqrt = (exx_q_bit22_sel == 2'b10) ? exx_q_bit1_sqrt :
(exx_q_bit22_sel == 2'b00) ? exx_q_bit2 :
(exx_q_bit22_sel == 2'b01) ? exx_q_bit3_sqrt :
1'b0;
assign exx_nq_bit22_sqrt = (exx_q_bit22_sel == 2'b10) ? exx_nq_bit1_sqrt :
(exx_q_bit22_sel == 2'b00) ? exx_nq_bit2 :
(exx_q_bit22_sel == 2'b01) ? exx_nq_bit3_sqrt :
1'b0;
assign exx_notqornq_bit22_sqrt = (exx_q_bit22_sel == 2'b10) ? exx_notqornq_bit1_sqrt :
(exx_q_bit22_sel == 2'b00) ? exx_notqornq_bit2 :
(exx_q_bit22_sel == 2'b01) ? exx_notqornq_bit3_sqrt :
1'b0;
assign exx_q_bit22_div = (exx_q_bit22_sel == 2'b10) ? exx_q_bit1_div :
(exx_q_bit22_sel == 2'b00) ? exx_q_bit2 :
(exx_q_bit22_sel == 2'b01) ? exx_q_bit3_div :
1'b0;
assign exx_nq_bit22_div = (exx_q_bit22_sel == 2'b10) ? exx_nq_bit1_div :
(exx_q_bit22_sel == 2'b00) ? exx_nq_bit2 :
(exx_q_bit22_sel == 2'b01) ? exx_nq_bit3_div :
1'b0;
assign exx_notqornq_bit22_div = (exx_q_bit22_sel == 2'b10) ? exx_notqornq_bit1_div :
(exx_q_bit22_sel == 2'b00) ? exx_notqornq_bit2 :
(exx_q_bit22_sel == 2'b01) ? exx_notqornq_bit3_div :
1'b0;
assign exx_q_bit22 = (exx_q_bit22_div & exx_div_q[2]) | (exx_q_bit22_sqrt & exx_sqrt_q[2]);
assign exx_nq_bit22 = (exx_nq_bit22_div & exx_div_q[2]) | (exx_nq_bit22_sqrt & exx_sqrt_q[2]);
// massage Q and QM for use with square root
// sel_denom_pre1 = ~(((Q << 2) | 1) << 29-i);
// sel_denom_pre3 = (((QM << 2) | 3) << 29-i);
// sel_denom_1 = ~(((Q << 2) | 1) << 28-i);
// sel_denom_3 = (((QM << 2) | 3) << 28-i);
assign exx_bQin_lev0[0:55] = ((exx_bQ_q[0:55]) & {56{exx_Qin_lev0_sel0}}) |
((exx_bQM_q[0:55]) & {56{exx_Qin_lev0_sel1}});
assign exx_bQMin_lev0[0:55] = ((exx_bQ_q[0:55]) & {56{exx_QMin_lev0_sel0}}) |
((exx_bQM_q[0:55]) & {56{(~exx_QMin_lev0_sel0)}});
assign exx_bQin_lev0_t[0:55] = exx_bQin_lev0 | ({exx_sqrt_Qbitmask_q[1:55], 1'b0});
assign exx_bQMin_lev0_t[0:55] = exx_bQMin_lev0 | ({exx_sqrt_QMbitmask_q[1:55], 1'b0});
assign exx_sqrtlev22_Q[0:55] = (exx_sqrt_Qmaskvec[0:55] & exx_sqrt_newbitmask_q[0:55]) |
({56{1'b1}} & exx_sqrt_Qbitmask_q[0:55]) |
(exx_bQin_lev0_t[0:55] & (~(exx_sqrt_newbitmask_q[0:55] | exx_sqrt_QMbitmask_q[0:55]))); // need QM for 3 bit mask
assign exx_sqrtlev22_MQ[0:55] = (exx_sqrt_QMmaskvec[0:55] & exx_sqrt_newbitmask_q[0:55]) |
({56{1'b1}} & exx_sqrt_QMbitmask_q[0:55]) |
(exx_bQMin_lev0_t[0:55] & (~(exx_sqrt_newbitmask_q[0:55] | exx_sqrt_QMbitmask_q[0:55])));
assign exx_sqrt_Qmaskvec[0:55] = {56{wQ}};
assign exx_sqrt_QMmaskvec[0:55] = {56{wMQ}};
assign wQ = exx_Qin_lev0[56];
assign wMQ = exx_QMin_lev0[56];
//-------------------------------------------------------------------
// Final Denominator mux and 3:2 CSA
//-------------------------------------------------------------------
// shift left by 1 again
assign exx_lev0_csaout_sum[0:56] = {exx_lev0_csaoutsh_sum[1:56], 1'b0};
assign exx_lev0_csaout_carry[0:56] = {exx_lev0_csaoutsh_carry[1:56], 1'b0};
assign exx_lev22_selneg = exx_q_bit22; //exx_q_bit22 & (~exx_nq_bit22);
assign exx_lev22_selD = exx_nq_bit22_div; // and not exx_q_bit22_div and exx_div_q(0);
assign exx_lev22_selnD = exx_q_bit22_div; // and not exx_nq_bit22_div and exx_div_q(0);
assign exx_lev22_selQ = exx_q_bit22_sqrt; // and not exx_nq_bit22_sqrt and exx_sqrt_q(0);
assign exx_lev22_selMQ = exx_nq_bit22_sqrt; // and not exx_q_bit22_sqrt and exx_sqrt_q(0);
assign exx_denomQ_lev22_div = ((~exx_denom_q) & {56{exx_lev22_selnD}}) |
(exx_denom_q & {56{exx_lev22_selD}});
assign exx_denomQ_lev22_sqrt = ((~exx_sqrtlev22_Q) & {56{exx_lev22_selQ}}) |
(exx_sqrtlev22_MQ & {56{exx_lev22_selMQ}});
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev22_csaout_sum_xor(exx_lev22_csaout_sum_xor,
exx_lev0_csaout_sum,
exx_lev0_csaout_carry );
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev22_csaout_sum_div(exx_lev22_csaout_sum_div,
{exx_lev22_selneg, exx_denomQ_lev22_div},
exx_lev22_csaout_sum_xor );
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev22_csaout_sum_sqrt(exx_lev22_csaout_sum_sqrt,
{exx_lev22_selneg, exx_denomQ_lev22_sqrt},
exx_lev22_csaout_sum_xor );
assign exx_lev22_csaout_carryout_div = (({exx_lev22_selneg, exx_denomQ_lev22_div}) & exx_lev0_csaout_sum) |
(({exx_lev22_selneg, exx_denomQ_lev22_div}) & exx_lev0_csaout_carry) |
(exx_lev0_csaout_sum & exx_lev0_csaout_carry);
assign exx_lev22_csaout_carryout_sqrt = (({exx_lev22_selneg, exx_denomQ_lev22_sqrt}) & exx_lev0_csaout_sum) |
(({exx_lev22_selneg, exx_denomQ_lev22_sqrt}) & exx_lev0_csaout_carry) |
(exx_lev0_csaout_sum & exx_lev0_csaout_carry);
assign exx_lev22_csaout_carry_div[0:56] = {exx_lev22_csaout_carryout_div[1:56], exx_lev22_selneg};
assign exx_lev22_csaout_carry_sqrt[0:56] = {exx_lev22_csaout_carryout_sqrt[1:56], exx_lev22_selneg};
assign exx_PR_sum_final = (exx_lev22_csaout_sum_div & {57{exx_div_q[0]}}) |
(exx_lev22_csaout_sum_sqrt & {57{exx_sqrt_q[0]}});
assign exx_PR_carry_final = (exx_lev22_csaout_carry_div & {57{exx_div_q[0]}}) |
(exx_lev22_csaout_carry_sqrt & {57{exx_sqrt_q[0]}});
//-------------------------------------------------------------------
// on-the-fly quotient digit conversion logic
//-------------------------------------------------------------------
// Qin=(Q & q) if q >= 0. Qin=(QM & 1) if q < 0
//timing: split out seperate sqrt Q latch?
assign exx_Qin_lev1_sel0_div = (~exx_nq_bit22_div); // (exx_q_bit22_div | ((~exx_nq_bit22_div)));this combination will never be 11
assign exx_Qin_lev1_sel1_div = exx_nq_bit22_div;
assign exx_Qin_lev1_sel0_sqrt = (~exx_nq_bit22_sqrt); // (exx_q_bit22_sqrt | ((~exx_nq_bit22_sqrt))); this combination will never be 11
assign exx_Qin_lev1_sel1_sqrt = exx_nq_bit22_sqrt;
assign exx_Qin_lev1_div[0:56] = (({exx_Qin_lev0[1:56], exx_q_bit22_div}) & {57{exx_Qin_lev1_sel0_div}}) |
(({exx_QMin_lev0[1:56], 1'b1}) & {57{exx_Qin_lev1_sel1_div}});
assign exx_Qin_lev1_sqrt[0:56] = (({exx_Qin_lev0[1:56], exx_q_bit22_sqrt}) & {57{exx_Qin_lev1_sel0_sqrt}}) |
(({exx_QMin_lev0[1:56], 1'b1}) & {57{exx_Qin_lev1_sel1_sqrt}});
// QMin=(Q & 0) if q > 0. QMin=(QM & 0) if q < 0. QMin=(QM & 1) if q = 0
assign exx_QMin_lev1_sel0_div = exx_q_bit22_div;
assign exx_QMin_lev1_sel1_div = exx_nq_bit22_div;
assign exx_QMin_lev1_sel2_div = exx_notqornq_bit22_div; //((~(exx_nq_bit22_div | exx_q_bit22_div)));
assign exx_QMin_lev1_sel0_sqrt = exx_q_bit22_sqrt;
assign exx_QMin_lev1_sel1_sqrt = exx_nq_bit22_sqrt;
assign exx_QMin_lev1_sel2_sqrt = exx_notqornq_bit22_sqrt;
assign exx_QMin_lev1_div[0:56] = (({exx_Qin_lev0[1:56], 1'b0}) & {57{exx_QMin_lev1_sel0_div}}) |
(({exx_QMin_lev0[1:56], 1'b0}) & {57{exx_QMin_lev1_sel1_div}}) |
(({exx_QMin_lev0[1:56], 1'b1}) & {57{exx_QMin_lev1_sel2_div}});
assign exx_QMin_lev1_sqrt[0:56] = (({exx_Qin_lev0[1:56], 1'b0}) & {57{exx_QMin_lev1_sel0_sqrt}}) |
(({exx_QMin_lev0[1:56], 1'b0}) & {57{exx_QMin_lev1_sel1_sqrt}}) |
(({exx_QMin_lev0[1:56], 1'b1}) & {57{exx_QMin_lev1_sel2_sqrt}});
assign exx_Q_d = (exx_Qin_lev1_div & {57{(exx_div_q[0] & exx_divsqrt_running_q & (~ex3_divsqrt_done) & (~ex2_anydivsqrt) & (~(ex2_divsqrt_done & ex2_divsqrt_hole_v_b)))}}) | // normal running mode
(exx_Qin_lev1_sqrt & {57{(exx_sqrt_q[0] & exx_divsqrt_running_q & (~ex3_divsqrt_done) & (~ex2_anydivsqrt) & (~(ex2_divsqrt_done & ex2_divsqrt_hole_v_b)))}}) | // normal running mode
(exx_Q_q & {57{(ex2_divsqrt_done & ex2_divsqrt_hole_v_b)}}) | // hold
(exx_Q_q & {57{(ex3_divsqrt_done & (~ex2_anydivsqrt))}}) | // hold for rounding
({57{1'b0}} & {57{ex2_anydivsqrt}}); // init
assign exx_QM_d = (exx_QMin_lev1_div & {57{(exx_div_q[0] & exx_divsqrt_running_q & (~ex3_divsqrt_done) & (~ex2_anydivsqrt) & (~(ex2_divsqrt_done & ex2_divsqrt_hole_v_b)))}}) |
(exx_QMin_lev1_sqrt & {57{(exx_sqrt_q[0] & exx_divsqrt_running_q & (~ex3_divsqrt_done) & (~ex2_anydivsqrt) & (~(ex2_divsqrt_done & ex2_divsqrt_hole_v_b)))}}) |
(exx_QM_q & {57{(ex2_divsqrt_done & ex2_divsqrt_hole_v_b)}}) |
(exx_QM_q & {57{(ex3_divsqrt_done & (~ex2_anydivsqrt))}}) |
({57{1'b1}} & {57{ex2_anydivsqrt}}); // hold for rounding
//-------------------------------------------------------------------------------------------------------------
// massage Q and QM for use with square root
// sel_denom_pre1 = ~(((Q << 2) | 1) << 29-i);
// sel_denom_pre3 = (((QM << 2) | 3) << 29-i);
// sel_denom_1 = ~(((Q << 2) | 1) << 28-i);
// sel_denom_3 = ((QM << 2) | 3) << 28-i;
// left justify Q, QM and append 01, 11 for use in square root
//---------------------------------------------------------------------------------------------------------------------------------------------------------------------
assign exx_bQ_q_t = exx_bQ_q[00:56] | ({exx_sqrt_Qbitmask_q[1:55], 2'b00});
assign exx_bQM_q_t = exx_bQM_q[00:56] | ({exx_sqrt_QMbitmask_q[1:55], 2'b00});
assign exx_bQin_lev1_sqrt[0:56] = (({exx_bQin_lev0[00:55], 1'b0}) & {57{exx_Qin_lev1_sel0_sqrt}}) |
(({exx_bQMin_lev0[00:55], 1'b0}) & {57{exx_Qin_lev1_sel1_sqrt}});
assign exx_bQMin_lev1_sqrt[0:56] = (({exx_bQin_lev0[00:55], 1'b0}) & {57{exx_QMin_lev1_sel0_sqrt}}) |
(({exx_bQMin_lev0[00:55], 1'b0}) & {57{exx_QMin_lev1_sel1_sqrt}}) |
(({exx_bQMin_lev0[00:55], 1'b0}) & {57{exx_QMin_lev1_sel2_sqrt}});
// lev0
assign exx_bQ_d[00:56] = (({exx_sqrt_newbitmask_q[0:55], 1'b0}) & {57{exx_Qin_lev1_sqrt[55]}}) |
(({1'b0, exx_sqrt_newbitmask_q[0:55]}) & {57{exx_Qin_lev1_sqrt[56]}}) |
((exx_bQin_lev1_sqrt) & {57{(exx_divsqrt_running_q & (~ex2_anydivsqrt))}}); // lev1
// lev0
assign exx_bQM_d[00:56] = (({exx_sqrt_newbitmask_q[0:55], 1'b0}) & {57{exx_QMin_lev1_sqrt[55]}}) |
({{({1'b0, exx_sqrt_newbitmask_q[0:55]})}} & {57{exx_QMin_lev1_sqrt[56]}}) |
((exx_bQMin_lev1_sqrt) & {57{(exx_divsqrt_running_q & (~ex2_anydivsqrt))}}); // lev1
assign exx_sqrt_newbitmask_din[0:55] = (({2'b00, exx_sqrt_newbitmask_q[0:53]}) & {56{(exx_divsqrt_running_q & (~ex2_anydivsqrt))}}) |
({{({1'b1, zeros[1:55]})}} & {56{ex2_anydivsqrt}});
assign exx_sqrt_Qbitmask_din[0:55] = (({2'b00, exx_sqrt_Qbitmask_q[0:53]}) & {56{(exx_divsqrt_running_q & (~ex2_anydivsqrt))}}) |
(({3'b001, zeros[3:55]}) & {56{ex2_anydivsqrt}});
assign exx_sqrt_QMbitmask_din[0:55] = (({2'b00, exx_sqrt_QMbitmask_q[0:53]}) & {56{(exx_divsqrt_running_q & (~ex2_anydivsqrt))}}) |
(({3'b011, zeros[3:55]}) & {56{ex2_anydivsqrt}});
// todo: probably don't need both newbitmask and Qbitmask
//-------------------------------------------------------------------
//
//-------------------------------------------------------------------
//-------------------------------------------------------------------
// exponent logic
//-------------------------------------------------------------------
assign exx_exp_adj[1:13] = (13'b1111111111111 & {13{(ex4_norm_shl1_d)}}) |
(13'b0000000000000 & {13{(~(ex4_norm_shl1_d))}});
assign exx_exp_addres_div_x0 = (exx_a_biased_13exp_q) - (exy_b_ubexp[1:13]);
assign exy_exp_addres_div_x0_m1 = exy_exp_addres_div_x0 - 13'b0000000000001;
assign exz_exp_addres_div_x0_adj = (exz_exp_addres_div_x0_m1 & {13{(ex4_norm_shl1_d)}}) |
(exy_exp_addres_div_x0 & {13{((~ex4_norm_shl1_d))}});
assign exx_exp_addres_sqrt_x0 = ({exy_b_ubexp[1], exy_b_ubexp[1:12]}) + 13'b0001111111111;
assign exz_exp_addres_x0 = (exz_exp_addres_div_x0_adj & {13{exx_div_q[0]}}) |
(exy_exp_addres_sqrt_x0 & {13{exx_sqrt_q[0]}});
assign exx_exp_addres_ux = (exx_a_biased_13exp_q) - (exy_b_ubexp[1:13]) + (exx_exp_adj[1:13]) + exx_exp_ux_adj;
assign exx_exp_addres_ox = (exx_a_biased_13exp_q) - (exy_b_ubexp[1:13]) + (exx_exp_adj[1:13]) + exx_exp_ox_adj;
assign exx_exp_adj_p1[1:13] = (13'b0000000000000 & {13{(ex4_norm_shl1_d)}}) |
(13'b0000000000001 & {13{(~(ex4_norm_shl1_d))}});
assign exx_exp_addres_div_x0_p1 = (exx_a_biased_13exp_q) - (exy_b_ubexp[1:13]) + (exx_exp_adj_p1[1:13]);
assign exx_exp_addres_sqrt_x0_p1 = ({exy_b_ubexp[1], exy_b_ubexp[1:12]}) + 13'b0010000000000;
assign exx_exp_addres_x0_p1 = (exx_exp_addres_div_x0_p1 & {13{exx_div_q[0]}}) |
(exx_exp_addres_sqrt_x0_p1 & {13{exx_sqrt_q[0]}});
assign exx_exp_addres_ux_p1 = (exx_a_biased_13exp_q) - (exy_b_ubexp[1:13]) + (exx_exp_adj_p1[1:13]) + exx_exp_ux_adj;
assign exx_exp_addres_ox_p1 = (exx_a_biased_13exp_q) - (exy_b_ubexp[1:13]) + (exx_exp_adj_p1[1:13]) + exx_exp_ox_adj;
assign ueux = (underflow & (~special_force_zero)) & UE;
assign oeox = (overflow & (~exx_hard_spec_case)) & OE;
assign zezx = ex4_div_by_zero_zx & ZE;
assign vevx = (ex4_zero_div_zero | ex4_inf_div_inf | ex4_sqrt_neg | ex4_snan) & VE;
assign not_ueux_or_oeox = ~(ueux | oeox);
assign exx_exp_addres = (exz_exp_addres_x0 & {13{(~(ueux | oeox))}}) |
(exx_exp_addres_ux & {13{ueux}}) |
(exx_exp_addres_ox & {13{oeox}});
assign ex4_expresult_zero = (~|(exz_exp_addres_x0)); //or_reduce
//
assign exx_exp_ux_adj_dp = 13'b0011000000000; // 1536
assign exx_exp_ux_adj_sp = 13'b0000011000000; // 192
assign exx_exp_ox_adj_dp = 13'b1101000000000; // -1536
assign exx_exp_ox_adj_sp = 13'b1111101000000; // -192
assign exx_exp_ux_adj = (exx_exp_ux_adj_dp & {13{exx_dp}}) |
(exx_exp_ux_adj_sp & {13{exx_sp}});
assign exx_exp_ox_adj = (exx_exp_ox_adj_dp & {13{exx_dp}}) |
(exx_exp_ox_adj_sp & {13{exx_sp}});
// underflow
assign underflow_dp = exz_exp_addres_x0[0] | ex4_expresult_zero;
// neg
// < -127+1023 0b000000xxxxxxx
assign underflow_sp = (exz_exp_addres_x0[0]) | (((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) & (~exz_exp_addres_x0[2]) & (~exz_exp_addres_x0[3]) & (~exz_exp_addres_x0[4]) & (~exz_exp_addres_x0[5])) & (exz_exp_addres_x0[6] | exz_exp_addres_x0[7] | exz_exp_addres_x0[8] | exz_exp_addres_x0[9] | exz_exp_addres_x0[10] | exz_exp_addres_x0[11] | exz_exp_addres_x0[12])) | (((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) & (~exz_exp_addres_x0[2])) & (((exz_exp_addres_x0[3] | exz_exp_addres_x0[4]) & (~exz_exp_addres_x0[5])) | ((exz_exp_addres_x0[5] | exz_exp_addres_x0[3]) & (~exz_exp_addres_x0[4])) | ((exz_exp_addres_x0[4] | exz_exp_addres_x0[5]) & (~exz_exp_addres_x0[3])))) | ((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) & (~exz_exp_addres_x0[2]) & exz_exp_addres_x0[3] & exz_exp_addres_x0[4] & exz_exp_addres_x0[5] & (~exz_exp_addres_x0[6]) & (~exz_exp_addres_x0[7]) & (~exz_exp_addres_x0[8]) & (~exz_exp_addres_x0[9]) & (~exz_exp_addres_x0[10]) & (~exz_exp_addres_x0[11]) & (~exz_exp_addres_x0[12])); // < -127+1023 0b000xxxXXXXXXX
// -127+1023 0b0001110000000
assign underflow_denorm_dp = (denorm_sticky | exx_divsqrt_fract_q[53]); // guard bit also
assign underflow_denorm_sp = (denorm_sticky_sp | guard_dnr_sp | round_dnr_sp);
assign underflow_denorm = (underflow_denorm_dp & exx_dp) | (underflow_denorm_sp & exx_sp);
assign underflow_fi = (underflow & (~ex4_denormalizing_result_done)) | (underflow_denorm & ex4_denormalizing_result_done);
// overflow
assign sp_overflow_brink_x47E = ((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) & exz_exp_addres_x0[2] &
(~exz_exp_addres_x0[3]) & (~exz_exp_addres_x0[4]) & (~exz_exp_addres_x0[5]) &
exz_exp_addres_x0[6] & exz_exp_addres_x0[7] & exz_exp_addres_x0[8] & exz_exp_addres_x0[9] & exz_exp_addres_x0[10] & exz_exp_addres_x0[11] & (~exz_exp_addres_x0[12]));
// 0b0010001111110 128+1023-1
assign ex4_incexp_to_sp_overflow = ex4_divsqrt_fract_rounded[0] & sp_overflow_brink_x47E & exx_sp; // rounded up past the implicit bit (which is bit 1 here) and into sp overflow
assign dp_overflow_brink_x7FE = ((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) &
exz_exp_addres_x0[2] & exz_exp_addres_x0[3] & exz_exp_addres_x0[4] & exz_exp_addres_x0[5] & exz_exp_addres_x0[6] & exz_exp_addres_x0[7] & exz_exp_addres_x0[8] & exz_exp_addres_x0[9] & exz_exp_addres_x0[10] & exz_exp_addres_x0[11] & (~exz_exp_addres_x0[12])); // 0b0011111111110 1024+1023-1
assign ex4_incexp_to_dp_overflow = ex4_divsqrt_fract_rounded[0] & dp_overflow_brink_x7FE & exx_dp;
assign ex4_incexp_to_overflow = ex4_incexp_to_sp_overflow | ex4_incexp_to_dp_overflow;
assign overflow_dp = ex4_incexp_to_dp_overflow |
(((~exz_exp_addres_x0[0]) & exz_exp_addres_x0[1]) | // 0b01XXXXXXXXXXX > 1024+1023
((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) &
exz_exp_addres_x0[2] & exz_exp_addres_x0[3] & exz_exp_addres_x0[4] & exz_exp_addres_x0[5] & exz_exp_addres_x0[6] & exz_exp_addres_x0[7] & exz_exp_addres_x0[8] & exz_exp_addres_x0[9] & exz_exp_addres_x0[10] & exz_exp_addres_x0[11] & exz_exp_addres_x0[12])); // 0b0011111111111 1024+1023
assign overflow_sp = ex4_incexp_to_sp_overflow |
((((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) & exz_exp_addres_x0[2]) &
(exz_exp_addres_x0[3] | exz_exp_addres_x0[4] | exz_exp_addres_x0[5])) | // 0b001xxxXXXXXXX > 128+1023
(((~exz_exp_addres_x0[0]) & exz_exp_addres_x0[1])) | // 0b01xxxxXXXXXXX > 128+1023
((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) & exz_exp_addres_x0[2] & (~exz_exp_addres_x0[3]) & (~exz_exp_addres_x0[4]) & (~exz_exp_addres_x0[5]) & exz_exp_addres_x0[6] & exz_exp_addres_x0[7] & exz_exp_addres_x0[8] & exz_exp_addres_x0[9] & exz_exp_addres_x0[10] & exz_exp_addres_x0[11] & exz_exp_addres_x0[12]));
// 0b0010001111111 128+1023
assign overflow = (overflow_sp & exx_sp) | (overflow_dp & exx_dp);
assign underflow = (underflow_sp & exx_sp) | (underflow_dp & exx_dp);
//-------------------------------------------------------------------
// result staging latch
//-------------------------------------------------------------------
assign ex3_divsqrt_done_din = ex3_divsqrt_done & (~n_flush);
assign ex4_divsqrt_done_din = ex4_divsqrt_done & (~n_flush);
assign ex5_divsqrt_done_din = ex5_divsqrt_done & (~n_flush);
tri_rlmreg_p #(.INIT(0), .WIDTH(4), .NEEDS_SRESET(0)) ex4_div_done_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.clk(clk),
.rst(rst),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex5_div_done_lat_scout),
.scin(ex5_div_done_lat_scin),
//-----------------
.din({ ex2_divsqrt_done_din,
ex3_divsqrt_done_din,
ex4_divsqrt_done_din,
ex5_divsqrt_done_din}),
//-----------------
.dout({ ex3_divsqrt_done,
ex4_divsqrt_done_q,
ex5_divsqrt_done,
ex6_divsqrt_done})
);
//------------------------------------------------------------------------------------------------------------------------------------
// final fixup stages: normalize, round, final staging
// generate the remainder
assign ex3_divsqrt_remainder[00:56] = exx_PR_sum_q[0:56] + exx_PR_carry_q[0:56];
//-----------------------------------------------------------------------
assign ex4_divsqrt_remainder[00:56] = exx_divsqrt_fract_q[00:56];
assign ex4_rem_neg_buf[00:14] = {15{ex4_rem_neg[0]}};
assign ex4_rem_neg_buf[15:28] = {14{ex4_rem_neg[1]}};
assign ex4_rem_neg_buf[29:42] = {14{ex4_rem_neg[2]}};
assign ex4_rem_neg_buf[43:56] = {14{ex4_rem_neg[3]}};
assign ex4_rem_neg_buf_b[00:14] = {15{ex4_rem_neg_b[0]}};
assign ex4_rem_neg_buf_b[15:28] = {14{ex4_rem_neg_b[1]}};
assign ex4_rem_neg_buf_b[29:42] = {14{ex4_rem_neg_b[2]}};
assign ex4_rem_neg_buf_b[43:56] = {14{ex4_rem_neg_b[3]}};
//assign ex4_rem_neg = ex4_divsqrt_remainder[00];
assign ex4_rem_nonzero = |(ex4_divsqrt_remainder[00:56]); // or_reduce
assign ex4_rem_nonzero_fi = (ex4_rem_nonzero | ex4_sp_inexact_roundbits) & (~ex4_denormalizing_result_done);
assign ex4_divsqrt_fract_preround_prenorm[00:56] = (exx_Q_q[00:56] & ex4_rem_neg_buf_b ) |
(exx_QM_q[00:56] & ex4_rem_neg_buf );
assign ex4_norm_shl1_test = (((~ex4_divsqrt_fract_preround_prenorm[00])) & exx_dp) | (((~ex4_divsqrt_fract_preround_prenorm[28])) & exx_sp); // normalize
assign ex3_norm_shl1_dp = (exx_Q_d[0] & (~ex3_divsqrt_remainder[0])) | (exx_QM_d[0] & ex3_divsqrt_remainder[0]);
assign ex3_norm_shl1_sp = (exx_Q_d[28] & (~ex3_divsqrt_remainder[0])) | (exx_QM_d[28] & ex3_divsqrt_remainder[0]);
assign ex3_norm_shl1 = (~((ex3_norm_shl1_dp & exx_dp) | (ex3_norm_shl1_sp & exx_sp)));
assign ex4_norm_shl1_d = ((ex4_norm_shl1 & ex4_divsqrt_done_q) | ex4_norm_shl1_q) & (~(n_flush | ex2_anydivsqrt | ex6_divsqrt_done));
assign ex4_divsqrt_fract_preround[00:56] = (ex4_divsqrt_fract_preround_prenorm[00:56] & {57{(~ex4_norm_shl1)}}) |
({ex4_divsqrt_fract_preround_prenorm[01:56], 1'b0} & {57{ex4_norm_shl1}});
assign ex4_divsqrt_fract_p0_dp = {1'b0, ex4_divsqrt_fract_preround[00:52]};
assign ex4_divsqrt_fract_p1_dp = ({1'b0, ex4_divsqrt_fract_preround[00:52]}) + ({{53{1'b0}}, 1'b1});
assign ex4_divsqrt_fract_p0_sp = {1'b0, ex4_divsqrt_fract_preround[28:51], {29{1'b0}}};
assign ex4_divsqrt_fract_p1_sp = ({1'b0, ex4_divsqrt_fract_preround[28:51], {29{1'b0}}}) +
({{24{1'b0}}, 1'b1, {29{1'b0}}});
assign HW165073_bits = (ex4_divsqrt_fract_preround_prenorm[52:56] == 5'b10000) ? 1'b1 :
1'b0;
assign HW165073_hit = HW165073_bits & exx_sp & ex4_divsqrt_done & ex4_norm_shl1;
assign spare_unused[1] = HW165073_hit;
assign ex4_sp_inexact_roundbits = |(ex4_divsqrt_fract_preround[52:56]) & ex4_sp; // or_reduce
assign ex4_divsqrt_fract_p0 = (ex4_divsqrt_fract_p0_sp & {54{exx_sp}}) |
(ex4_divsqrt_fract_p0_dp & {54{exx_dp}});
assign ex4_divsqrt_fract_p1 = (ex4_divsqrt_fract_p1_sp & {54{exx_sp}}) |
(ex4_divsqrt_fract_p1_dp & {54{exx_dp}});
assign sign = ex4_divsqrt_sign; //exx_divsqrt_sign_d;
assign lsb = (ex4_divsqrt_fract_preround[52] & ex4_dp) | (ex4_divsqrt_fract_preround[51] & ex4_sp);
assign guard = (ex4_divsqrt_fract_preround[53] & ex4_dp) | (ex4_divsqrt_fract_preround[52] & ex4_sp);
assign round = sticky | ((ex4_divsqrt_fract_preround[54] & ex4_dp) | (ex4_divsqrt_fract_preround[53] & ex4_sp));
assign sticky = ex4_rem_nonzero;
assign sticky_w_underflow = ex4_rem_nonzero | (underflow & (~exx_hard_spec_case) & (~UE));
assign RNEmode = (~exx_fpscr_q[5]) & (~exx_fpscr_q[6]); // 00
assign RTZmode = (~exx_fpscr_q[5]) & exx_fpscr_q[6]; // 01
assign RPImode = exx_fpscr_q[5] & (~exx_fpscr_q[6]); // 10
assign RNImode = exx_fpscr_q[5] & exx_fpscr_q[6]; // 11
assign ex4_round_up = ((guard & (lsb | round)) & RNEmode) | ((1'b0) & RTZmode) | (((guard | round) & (~sign)) & RPImode) | (((guard | round) & sign) & RNImode); // round to nearest mode
assign ex4_round_up_underflow = (((sticky_w_underflow) & (~sign)) & RPImode) | (((sticky_w_underflow) & sign) & RNImode);
//timing todo: don't need this whole vector
assign ex4_divsqrt_fract_rounded = (ex4_divsqrt_fract_p0 & {54{(~ex4_round_up)}}) |
(ex4_divsqrt_fract_p1 & {54{ex4_round_up}});
assign ex4_roundup_incexp = ex4_divsqrt_fract_rounded[0] & (~ex4_start_a_denorm_result) & (~exx_hard_spec_case) & (~ex4_force); // rounded up past the implicit bit (which is bit 1 here)
assign ex4_x_roundup_incexp = ex4_dnr_roundup_incexp | ex4_roundup_incexp;
//-----------------------------------------------------------------------
// Denormal result handling
// exx_exp_addres <= std_ulogic_vector(unsigned((exx_a_biased_13exp_q) -
// (exx_b_ubexp(1) & exx_b_ubexp(1) & exx_b_ubexp(1 to 11)) +
// (exx_exp_adj(1) & exx_exp_adj(1) & exx_exp_adj(1 to 11))));
// underflow
// underflow <= exx_exp_addres(0);
// ex4_divsqrt_denorm_hold
// exp_gt_cap <= (exx_exp_addres(0 to 12) < "1111111001011"); -- < -53
// result is too small to denormalize = exp_gt_cap
assign denorm_exp_addres = (({exz_exp_addres_x0[0], exz_exp_addres_x0[0:12]})) + (14'b00000000110101);
assign denorm_exp_addres_sp = (({exz_exp_addres_x0[0], exz_exp_addres_x0[0:12]})) + (14'b11110010011001); // -(896-25)=-871
//denorm_exp_addres_sp_lsb <= std_ulogic_vector(((exx_exp_addres_x0(0) & exx_exp_addres_x0(0 to 12))) + ("11110010010111")); -- -(896-23)=-873
// denormal result shiftoff zero case
assign denorm_res_shiftoff_exp = (denorm_exp_addres[0:12] == 13'b0000000000000) ? 1'b1 : // 0 or 1: implicit bit shifted to Guard or Round position
1'b0;
assign denorm_res_shiftoff_din = ((denorm_res_shiftoff_exp & ex4_start_denorm_result) | denorm_res_shiftoff_q) & (~ex2_anydivsqrt);
assign exp_gt_cap = (denorm_exp_addres[0] & ex4_dp) | (denorm_exp_addres_sp[0] & ex4_sp);
assign ex4_denorm_result_det = exx_dp & (exz_exp_addres_x0[0] | ex4_expresult_zero) & (~exp_gt_cap);
assign ex4_sp_denorm_result_det = exx_sp & ex4_exp_le_896 & (~exp_gt_cap); // if the exponent is in the range [871 to 896] [0x367 to 0x380] 0x369 puts the lsb one to the left of the implicit bit
assign ex4_exp_le_896 =
((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) &
(~exz_exp_addres_x0[2]) & exz_exp_addres_x0[3] &
exz_exp_addres_x0[4] & exz_exp_addres_x0[5] &
(~|(exz_exp_addres_x0[6:12]))) |
((~|(exz_exp_addres_x0[0:2])) &
((~(exz_exp_addres_x0[3] & exz_exp_addres_x0[4] & exz_exp_addres_x0[5])) & (exz_exp_addres_x0[3] | exz_exp_addres_x0[4] | exz_exp_addres_x0[5]))); // =0b0001110000000
// less than or equal to 0b0001110000000
assign exp_eq_369 = (exz_exp_addres_x0[0:12] == 13'b0001101101001) ? 1'b1 :
1'b0;
assign exp_eq_368 = (exz_exp_addres_x0[0:12] == 13'b0001101101000) ? 1'b1 :