// ========== Copyright Header Begin ==========================================
//
// OpenSPARC T1 Processor File: sparc_ifu_dcl.v
// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
//
// The above named program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public
// License version 2 as published by the Free Software Foundation.
//
// The above named program is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public
// License along with this work; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
//
// ========== Copyright Header End ============================================
////////////////////////////////////////////////////////////////////////
/*
// Module Name: sparc_ifu_dcl
// Description:
// The decode control logic block does branch condition evaluation,
// delay slot management, and appropriate condition code
// selection. It also executes the tcc instruction and kills the E
// stage instruction if a move did not succeed. The DCL block is
// also responsible for generating the correct select signals to
// choose the branch offset and immediate operand.
//
*/
////////////////////////////////////////////////////////////////////////
`define CC_N 3
`define CC_Z 2
`define CC_V 1
`define CC_C 0
`define FP_U 3
`define FP_G 2
`define FP_L 1
`define FP_E 0
`define FSR_FCC0_HI 11
`define FSR_FCC0_LO 10
`define FSR_FCC1_HI 33
`define FSR_FCC1_LO 32
`define FSR_FCC2_HI 35
`define FSR_FCC2_LO 34
`define FSR_FCC3_HI 37
`define FSR_FCC3_LO 36
module sparc_ifu_dcl
(/*AUTOARG*/
// Outputs
ifu_exu_kill_e, ifu_exu_dontmv_regz0_e, ifu_exu_dontmv_regz1_e,
ifu_exu_tcc_e, ifu_exu_dbrinst_d, ifu_ffu_mvcnd_m,
dcl_fcl_bcregz0_e, dcl_fcl_bcregz1_e, dtu_inst_anull_e,
dcl_swl_tcc_done_m, dcl_imd_immdata_sel_simm13_d_l,
dcl_imd_immdata_sel_movcc_d_l, dcl_imd_immdata_sel_sethi_d_l,
dcl_imd_immdata_sel_movr_d_l, dcl_imd_broff_sel_call_d_l,
dcl_imd_broff_sel_br_d_l, dcl_imd_broff_sel_bcc_d_l,
dcl_imd_broff_sel_bpcc_d_l, dcl_imd_immbr_sel_br_d, so,
// Inputs
rclk, se, si, dtu_reset, exu_ifu_cc_d, fcl_dcl_regz_e,
exu_ifu_regn_e, ffu_ifu_cc_w2, ffu_ifu_cc_vld_w2,
tlu_ifu_flush_pipe_w, swl_dcl_thr_d, swl_dcl_thr_w2,
imd_dcl_brcond_d, imd_dcl_mvcond_d, fdp_dcl_op_s, fdp_dcl_op3_s,
imd_dcl_abit_d, dec_dcl_cctype_d, dtu_dcl_opf2_d,
fcl_dtu_inst_vld_e, fcl_dtu_intr_vld_e, ifu_tlu_flush_w
);
input rclk,
se,
si,
dtu_reset;
input [7:0] exu_ifu_cc_d; // latest CCs from EXU
input fcl_dcl_regz_e, // rs1=0
exu_ifu_regn_e; // rs1<0
input [7:0] ffu_ifu_cc_w2;
input [3:0] ffu_ifu_cc_vld_w2;
input tlu_ifu_flush_pipe_w;
input [3:0] swl_dcl_thr_d,
swl_dcl_thr_w2;
input [3:0] imd_dcl_brcond_d; // branch condition type
input [7:0] imd_dcl_mvcond_d; // mov condition type
input [1:0] fdp_dcl_op_s;
input [5:0] fdp_dcl_op3_s;
input imd_dcl_abit_d; // anull bit for cond branch
input [2:0] dec_dcl_cctype_d; // which cond codes to use
input dtu_dcl_opf2_d;
input fcl_dtu_inst_vld_e;
input fcl_dtu_intr_vld_e;
input ifu_tlu_flush_w;
output ifu_exu_kill_e,
ifu_exu_dontmv_regz0_e,
ifu_exu_dontmv_regz1_e,
ifu_exu_tcc_e;
output ifu_exu_dbrinst_d;
output ifu_ffu_mvcnd_m;
output dcl_fcl_bcregz0_e,
dcl_fcl_bcregz1_e;
output dtu_inst_anull_e;
output dcl_swl_tcc_done_m;
output dcl_imd_immdata_sel_simm13_d_l, // imm data select
dcl_imd_immdata_sel_movcc_d_l,
dcl_imd_immdata_sel_sethi_d_l,
dcl_imd_immdata_sel_movr_d_l;
output dcl_imd_broff_sel_call_d_l, // dir branch offset select
dcl_imd_broff_sel_br_d_l,
dcl_imd_broff_sel_bcc_d_l,
dcl_imd_broff_sel_bpcc_d_l;
output dcl_imd_immbr_sel_br_d;
output so;
//----------------------------------------------------------------------
// Declarations
//----------------------------------------------------------------------
wire [7:0] cc_breval_e,
fp_breval_d;
wire abit_e;
wire cond_brtaken_e,
anull_all,
anull_ubr,
anull_cbr;
wire [3:0] anull_next_e,
anull_e,
thr_anull_d;
wire inst_anull_d,
inst_anull_e;
wire [3:0] flush_abit;
wire all_flush_w,
all_flush_w2;
wire br_always_e;
wire sel_movcc,
sel_movr;
wire [3:0] br_cond_e,
br_cond_d;
wire [3:0] thr_vld_e;
wire [3:0] ls_brcond_d,
ls_brcond_e;
wire [1:0] ccfp_sel;
wire [3:0] cc_e;
wire [1:0] curr_fcc_d;
wire [7:0] fcc_d;
wire [7:0] t0_fcc_d,
t1_fcc_d,
t2_fcc_d,
t3_fcc_d,
t0_fcc_nxt,
t1_fcc_nxt,
t2_fcc_nxt,
t3_fcc_nxt;
wire use_fcc0_d,
use_fcc1_d,
use_fcc2_d,
use_fcc3_d;
wire [3:0] thr_e,
thr_dec_d;
// fcc_dec_d,
// fcc_dec_e;
wire [1:0] op_d;
wire [5:0] op3_d;
wire use_xcc_d,
ltz_e,
cc_eval0,
cc_eval1,
fp_eval0_d,
fp_eval1_d,
fp_eval_d,
fp_eval_e,
r_eval1,
r_eval0,
ccfp_eval,
ccbr_taken_e,
mvbr_sel_br_d,
cc_mvbr_d,
cc_mvbr_e,
fpcond_mvbr_d,
fpcond_mvbr_e;
wire call_inst_e,
call_inst_d,
dbr_inst_d,
dbr_inst_e,
ibr_inst_d,
ibr_inst_e,
mov_inst_d,
mov_inst_e,
tcc_done_e,
tcc_inst_d,
tcc_inst_e;
wire clk;
//----------------------------------------------------------------------
// Code start here
//----------------------------------------------------------------------
assign clk = rclk;
// S Stage Operands
dff #(2) opreg(.din (fdp_dcl_op_s),
.clk (clk),
.q (op_d),
.se (se), .si(), .so());
dff #(6) op3_reg(.din (fdp_dcl_op3_s),
.clk (clk),
.q (op3_d),
.se (se), .si(), .so());
dff abite_reg(.din (imd_dcl_abit_d),
.clk (clk),
.q (abit_e),
.se (se), .si(), .so());
// need to protect from scan contention
dff #(4) thre_reg(.din (swl_dcl_thr_d),
.q (thr_e),
.clk (clk), .se(se), .si(), .so());
//------------------------------
// Choose correct immediate data
//------------------------------
// movcc if op3 = 101100
assign dcl_imd_immdata_sel_movcc_d_l = ~(op_d[1] &
op3_d[5] & ~op3_d[4] &
op3_d[3] & ~op3_d[0]);
// movr if op3 = 101111
//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// Reduced the number of terms in the eqn to help with timing
// path, the result of which is that the immediate data sent to the
// exu for a FLUSH instruction is INCORRECT! (It is decoded as a
// MOVR). However, since our architecture completely ignores the
// address of the flush, this should be ok. Confirmed with Sanjay
// 03/31/03. (v1.29 -> 1.30)
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
assign dcl_imd_immdata_sel_movr_d_l = ~(op_d[1] &
op3_d[5] & op3_d[3] &
op3_d[1] & op3_d[0]);
// sethi if op3 = 100xx
assign dcl_imd_immdata_sel_sethi_d_l = ~(~op_d[1]);
// everything else
assign dcl_imd_immdata_sel_simm13_d_l =
~(dcl_imd_immdata_sel_movcc_d_l &
dcl_imd_immdata_sel_movr_d_l &
dcl_imd_immdata_sel_sethi_d_l);
//------------------------------
// Choose correct branch offset
//------------------------------
// call or ld/store
assign dcl_imd_broff_sel_call_d_l = ~(op_d[0]);
// branch on register
assign dcl_imd_broff_sel_br_d_l = ~(~op_d[0] &
op3_d[4] & op3_d[3]);
// branch w/o prediction
assign dcl_imd_broff_sel_bcc_d_l = ~(~op_d[0] &
op3_d[4] & ~op3_d[3]);
// everything else
assign dcl_imd_broff_sel_bpcc_d_l = ~(~op_d[0] &
~op3_d[4]);
//------------------------------------
// mark branch/conditional instrctions
//------------------------------------
// call
assign call_inst_d = ~op_d[1] & op_d[0];
dff #(1) call_inste_reg(.din (call_inst_d),
.clk (clk),
.q (call_inst_e),
.se (se), .si(), .so());
// call or branch but not nop/sethi
assign dbr_inst_d = ~op_d[1] & (op_d[0] | op3_d[4] | op3_d[3]);
// Choose between branch offset and immediate operand
assign dcl_imd_immbr_sel_br_d = dbr_inst_d;
// tell exu to use pc instead of rs1
assign ifu_exu_dbrinst_d = ~op_d[1];
dff #(1) dbr_inste_reg(.din (dbr_inst_d),
.clk (clk),
.q (dbr_inst_e),
.se (se), .si(), .so());
// jmpl + return
assign ibr_inst_d = op_d[1] & ~op_d[0] &
op3_d[5] & op3_d[4] & op3_d[3] &
~op3_d[2] & ~op3_d[1];
dff #(1) ibr_inste_reg(.din (ibr_inst_d),
.clk (clk),
.q (ibr_inst_e),
.se (se), .si(), .so());
// mov
assign mov_inst_d = (op_d[1] & ~op_d[0] &
op3_d[5] & ~op3_d[4] & op3_d[3] & op3_d[2] &
(~op3_d[1] & ~op3_d[0] | op3_d[1] & op3_d[0]));
dff #(1) mov_inste_reg(.din (mov_inst_d),
.clk (clk),
.q (mov_inst_e),
.se (se), .si(), .so());
// tcc
assign tcc_inst_d = op_d[1] & ~op_d[0] &
op3_d[5] & op3_d[4] & op3_d[3] &
~op3_d[2] & op3_d[1] & ~op3_d[0];
dff #(1) tcc_inste_reg(.din (tcc_inst_d),
.clk (clk),
.q (tcc_inst_e),
.se (se), .si(), .so());
assign mvbr_sel_br_d = ~op_d[1] & ~op_d[0] | // br
op3_d[3] & ~op3_d[2] & op3_d[1] & ~op3_d[0]; // tcc
assign cc_mvbr_d = ~(~op_d[1] & ~op_d[0] & op3_d[4] & op3_d[3] | // bpr
op_d[1] & ~op_d[0] & op3_d[5] & ~op3_d[4] &
op3_d[3] & op3_d[2] & op3_d[1] & op3_d[0] | // movr
op_d[1] & ~op_d[0] & op3_d[5] & op3_d[4] &
~op3_d[3] & op3_d[2] & ~op3_d[1] & op3_d[0] &
dtu_dcl_opf2_d); // fmovr
//---------------------------
// FCC Logic
//--------------------------
// choose current fcc
assign use_fcc0_d = ~dec_dcl_cctype_d[1] & ~dec_dcl_cctype_d[0];
assign use_fcc1_d = ~dec_dcl_cctype_d[1] & dec_dcl_cctype_d[0];
assign use_fcc2_d = dec_dcl_cctype_d[1] & ~dec_dcl_cctype_d[0];
assign use_fcc3_d = dec_dcl_cctype_d[1] & dec_dcl_cctype_d[0];
mux4ds #(2) fcc_mux(.dout (curr_fcc_d[1:0]),
.in0 (fcc_d[1:0]),
.in1 (fcc_d[3:2]),
.in2 (fcc_d[5:4]),
.in3 (fcc_d[7:6]),
.sel0 (use_fcc0_d),
.sel1 (use_fcc1_d),
.sel2 (use_fcc2_d),
.sel3 (use_fcc3_d));
// decode to make next step easier
// assign fcc_dec_d[0] = ~curr_fcc_d[1] & ~curr_fcc_d[0];
// assign fcc_dec_d[1] = ~curr_fcc_d[1] & curr_fcc_d[0];
// assign fcc_dec_d[2] = curr_fcc_d[1] & ~curr_fcc_d[0];
// assign fcc_dec_d[3] = curr_fcc_d[1] & curr_fcc_d[0];
// dff #(4) fcce_reg(.din (fcc_dec_d),
// .q (fcc_dec_e),
// .clk (clk),
// .se (se), .si(), .so());
//------------------
// CC Logic for BCC
//------------------
// Choose appropriate CCs
//
// dec_cctype is 3 bits
// 10X icc
// 11X xcc
// 000 fcc0
// 001 fcc1
// 010 fcc2
// 011 fcc3
// assign use_xcc_d = (dec_dcl_cctype_d[2] | op3_d[3]) & dec_dcl_cctype_d[1];
assign use_xcc_d = dec_dcl_cctype_d[1];
assign fpcond_mvbr_d = ~dec_dcl_cctype_d[2] & ~tcc_inst_d;
dff fpbr_reg(.din (fpcond_mvbr_d),
.clk (clk),
.q (fpcond_mvbr_e),
.se (se), .si(), .so());
// mux between xcc and icc
// assign cc_d = use_xcc_d ? exu_ifu_cc_d[7:4] : // xcc
// exu_ifu_cc_d[3:0]; // icc
// dff #(4) ccreg_e(.din (cc_d),
// .clk (clk),
// .q (cc_e),
// .se (se), .si(), .so());
bw_u1_soffm2_4x UZsize_ccreg0_e(.d0 (exu_ifu_cc_d[0]),
.d1 (exu_ifu_cc_d[4]),
.s (use_xcc_d),
.q (cc_e[0]),
.ck (clk), .se(se), .sd(), .so());
bw_u1_soffm2_4x UZsize_ccreg1_e(.d0 (exu_ifu_cc_d[1]),
.d1 (exu_ifu_cc_d[5]),
.s (use_xcc_d),
.q (cc_e[1]),
.ck (clk), .se(se), .sd(), .so());
bw_u1_soffm2_4x UZsize_ccreg2_e(.d0 (exu_ifu_cc_d[2]),
.d1 (exu_ifu_cc_d[6]),
.s (use_xcc_d),
.q (cc_e[2]),
.ck (clk), .se(se), .sd(), .so());
bw_u1_soffm2_4x UZsize_ccreg3_e(.d0 (exu_ifu_cc_d[3]),
.d1 (exu_ifu_cc_d[7]),
.s (use_xcc_d),
.q (cc_e[3]),
.ck (clk), .se(se), .sd(), .so());
//------------------------------
// Evaluate Branch
//------------------------------
// Select correct branch condition
assign sel_movcc = ~mvbr_sel_br_d & cc_mvbr_d;
assign sel_movr = ~mvbr_sel_br_d & ~cc_mvbr_d;
// br_cond is the same as the "cond" field = inst[28:25] for bcc
mux3ds #(4) brcond_mux(.dout (br_cond_d),
.in0 (imd_dcl_brcond_d), // br or tcc
.in1 (imd_dcl_mvcond_d[7:4]), // movcc
.in2 (imd_dcl_mvcond_d[3:0]), // movr
.sel0 (mvbr_sel_br_d),
.sel1 (sel_movcc),
.sel2 (sel_movr));
dff #(4) brcond_e_reg(.din (br_cond_d),
.clk (clk),
.q (br_cond_e),
.se (se), .si(), .so());
// Branch Type Decode
assign ls_brcond_d[0] = ~br_cond_d[1] & ~br_cond_d[0];
assign ls_brcond_d[1] = ~br_cond_d[1] & br_cond_d[0];
assign ls_brcond_d[2] = br_cond_d[1] & ~br_cond_d[0];
assign ls_brcond_d[3] = br_cond_d[1] & br_cond_d[0];
dff #(4) lsbrc_e_reg(.din (ls_brcond_d),
.clk (clk),
.q (ls_brcond_e),
.se (se), .si(), .so());
// Evaluate potential integer CC branches
assign ltz_e = (cc_e[`CC_N] ^ cc_e[`CC_V]);
assign cc_breval_e[0] = 1'b0; // BPN
assign cc_breval_e[1] = cc_e[`CC_Z]; // BPE
assign cc_breval_e[2] = cc_e[`CC_Z] | ltz_e; // BPLE
assign cc_breval_e[3] = ltz_e; // BPL
assign cc_breval_e[4] = cc_e[`CC_Z] | cc_e[`CC_C]; // BPLEU
assign cc_breval_e[5] = cc_e[`CC_C]; // BPCS
assign cc_breval_e[6] = cc_e[`CC_N]; // BPNEG
assign cc_breval_e[7] = cc_e[`CC_V]; // BPVS
// mux to choose right condition
assign cc_eval0 = cc_breval_e[0] & ls_brcond_e[0] |
cc_breval_e[1] & ls_brcond_e[1] |
cc_breval_e[2] & ls_brcond_e[2] |
cc_breval_e[3] & ls_brcond_e[3];
assign cc_eval1 = cc_breval_e[4] & ls_brcond_e[0] |
cc_breval_e[5] & ls_brcond_e[1] |
cc_breval_e[6] & ls_brcond_e[2] |
cc_breval_e[7] & ls_brcond_e[3];
// Evaluate FP CC branches in D stage
assign fp_breval_d[0] = 1'b0; // FBN / A
assign fp_breval_d[1] = (curr_fcc_d[1] | curr_fcc_d[0]); // FBNE / E
assign fp_breval_d[2] = curr_fcc_d[1] ^ curr_fcc_d[0]; // FBLG / UE
assign fp_breval_d[3] = curr_fcc_d[0]; // FBUL / GE
assign fp_breval_d[4] = ~curr_fcc_d[1] & curr_fcc_d[0]; // FBL / UGE
assign fp_breval_d[5] = curr_fcc_d[1]; // FBUG / LE
assign fp_breval_d[6] = curr_fcc_d[1] & ~curr_fcc_d[0]; // FBG / ULE
assign fp_breval_d[7] = curr_fcc_d[1] & curr_fcc_d[0]; // FBU / O
assign fp_eval0_d = fp_breval_d[0] & ls_brcond_d[0] |
fp_breval_d[1] & ls_brcond_d[1] |
fp_breval_d[2] & ls_brcond_d[2] |
fp_breval_d[3] & ls_brcond_d[3];
assign fp_eval1_d = fp_breval_d[4] & ls_brcond_d[0] |
fp_breval_d[5] & ls_brcond_d[1] |
fp_breval_d[6] & ls_brcond_d[2] |
fp_breval_d[7] & ls_brcond_d[3];
assign fp_eval_d = br_cond_d[2] ? fp_eval1_d :
fp_eval0_d;
dff #(1) fpev_ff(.din (fp_eval_d),
.q (fp_eval_e),
.clk (clk),
.se (se), .si(), .so());
// merge eval0, eval1 and fp condition codes
assign ccfp_sel[0] = ~fpcond_mvbr_e & ~br_cond_e[2];
assign ccfp_sel[1] = ~fpcond_mvbr_e & br_cond_e[2];
// assign ccfp_sel[2] = fpcond_mvbr_e & ~br_cond_e[2];
// assign ccfp_sel[3] = fpcond_mvbr_e & br_cond_e[2];
assign ccfp_eval = ccfp_sel[0] & cc_eval0 |
ccfp_sel[1] & cc_eval1 |
fpcond_mvbr_e & fp_eval_e;
// invert branch condition if this is an inverted br type
// assign ccbr_taken_e = (ccfp_eval ^ br_cond_e[3]) & cc_mvbr_e;
assign ccbr_taken_e = ccfp_eval ? (cc_mvbr_e & ~br_cond_e[3]) :
(cc_mvbr_e & br_cond_e[3]);
assign br_always_e = (~br_cond_e[0] & ~br_cond_e[1] & ~br_cond_e[2] &
br_cond_e[3] & cc_mvbr_e);
//--------------
// For BRZ
// -------------
// Calculate Cond Assuming Z=1 And Z=0. Then Mux
// assign r_eval1 = ((exu_ifu_regn_e | ~br_cond_e[1] | ~br_cond_e[0]) ^
// br_cond_e[2]) & ~cc_mvbr_e;
assign r_eval1 = exu_ifu_regn_e ? (~br_cond_e[2] & ~cc_mvbr_e) :
(((br_cond_e[1] & br_cond_e[0]) ^
~br_cond_e[2]) & ~cc_mvbr_e);
// assign r_eval0 = ((exu_ifu_regn_e & br_cond_e[1]) ^
// br_cond_e[2]) & ~cc_mvbr_e;
assign r_eval0 = exu_ifu_regn_e ? ((br_cond_e[1] ^ br_cond_e[2]) &
~cc_mvbr_e) :
(br_cond_e[2] & ~cc_mvbr_e);
dff #(1) regcc_ff(.din (cc_mvbr_d),
.clk (clk),
.q (cc_mvbr_e),
.se (se), .si(), .so());
// Evaluate Final Branch condition
// 3:1 mux
// assign cond_brtaken_e = cc_mvbr_e ? ccbr_taken_e :
// exu_ifu_regz_e ? r_eval1 :
// r_eval0;
// 2:1 mux
// assign cond_brtaken_e = exu_ifu_regz_e ? (r_eval1 | ccbr_taken_e) :
// (r_eval0 | ccbr_taken_e);
//////// Chandra ////////
wire temp0, temp1, cond_brtaken_e_l;
// limit loading on this signal
// wire regz_buf_e;
// bw_u1_buf_5x UZfix_regz_bf(.a (exu_ifu_regz_e),
// .z (regz_buf_e));
assign temp0 = (r_eval0 | ccbr_taken_e);
assign temp1 = (r_eval1 | ccbr_taken_e);
bw_u1_muxi21_6x UZsize_cbtmux(.z(cond_brtaken_e_l),
.d0(temp0),
.d1(temp1),
.s(fcl_dcl_regz_e));
bw_u1_inv_20x UZsize_cbtinv(.z(cond_brtaken_e),
.a(cond_brtaken_e_l));
////////////////////////
assign dcl_fcl_bcregz0_e = (temp0 & dbr_inst_e | ibr_inst_e |
call_inst_e) & ~dtu_inst_anull_e;
assign dcl_fcl_bcregz1_e = (temp1 & dbr_inst_e | ibr_inst_e |
call_inst_e) & ~dtu_inst_anull_e;
// assign ifu_exu_dontmove_e = mov_inst_e & ~cond_brtaken_e;
assign ifu_exu_dontmv_regz0_e = ~temp0 & mov_inst_e;
assign ifu_exu_dontmv_regz1_e = ~temp1 & mov_inst_e;
// branch condition to FPU
dff #(1) fpcond_ff(.din (cond_brtaken_e),
.q (ifu_ffu_mvcnd_m),
.clk (clk),
.se (se), .si(), .so());
// branch / move completion and anull signals
// assign dtu_fcl_brtaken_e = ~dtu_inst_anull_e &
// (ibr_inst_e | call_inst_e |
// dbr_inst_e & cond_brtaken_e);
// if mov didn't succeed kill write back and bypass
// need to check thread as well
// assign ifu_exu_kill_e = dtu_inst_anull_e |
// ~fcl_dtu_inst_vld_e; // don't need this anymore
assign ifu_exu_kill_e = dtu_inst_anull_e;
// signal trap if tcc succeeds
assign ifu_exu_tcc_e = ~dtu_inst_anull_e & tcc_inst_e & ccbr_taken_e &
fcl_dtu_inst_vld_e;
assign tcc_done_e = ~dtu_inst_anull_e & tcc_inst_e & ~ccbr_taken_e &
fcl_dtu_inst_vld_e;
dff #(1) tccm_ff(.din (tcc_done_e),
.q (dcl_swl_tcc_done_m),
.clk (clk),
.se (se), .si(), .so());
// logic to anull delay slot, if this branch itsel is not anulled
assign anull_cbr = abit_e & dbr_inst_e & ~br_always_e & ~call_inst_e;
assign anull_ubr = abit_e & dbr_inst_e & br_always_e & ~call_inst_e;
assign anull_all = anull_ubr | anull_cbr & ~cond_brtaken_e;
// check which thread to anull
assign thr_vld_e = thr_e & {4{fcl_dtu_inst_vld_e}};
assign all_flush_w = tlu_ifu_flush_pipe_w | ifu_tlu_flush_w;
dff #(1) flshw2_ff(.din (all_flush_w),
.q (all_flush_w2),
.clk (clk), .se(se), .si(), .so());
assign flush_abit = swl_dcl_thr_w2 & {4{all_flush_w2}};
assign anull_next_e = ((~anull_e & {4{anull_all}} & thr_vld_e) |
(anull_e & ~(thr_e & {4{fcl_dtu_inst_vld_e |
fcl_dtu_intr_vld_e}}))) &
~flush_abit;
// anull_e needs to be per thread
dffr #(4) anull_ff(.din (anull_next_e),
.clk (clk),
.rst (dtu_reset),
.q (anull_e),
.se (se), .si(), .so());
//
// assign thr_dec_e[0] = swl_dcl_thr_e[0] | rst_tri_enable;
// assign thr_dec_e[3:1] = swl_dcl_thr_e[3:1] & {3{~rst_tri_enable}};
assign thr_anull_d = swl_dcl_thr_d & anull_next_e;
assign inst_anull_d = (|thr_anull_d[3:0]);
dff #(1) ina_ff(.din (inst_anull_d),
.q (inst_anull_e),
.clk (clk), .se (se), .si(), .so());
assign dtu_inst_anull_e = inst_anull_e;
// mux4ds dcla_mux(.dout (this_inst_anull_e),
// .in0 (anull_e[0]),
// .in1 (anull_e[1]),
// .in2 (anull_e[2]),
// .in3 (anull_e[3]),
// .sel0 (thr_dec_e[0]),
// .sel1 (thr_dec_e[1]),
// .sel2 (thr_dec_e[2]),
// .sel3 (thr_dec_e[3]));
// assign dtu_inst_anull_e = this_inst_anull_e & fcl_dtu_inst_vld_e;
//--------------------
// Copy of FCC
//--------------------
// FCC's are maintained in the ffu. A copy is kept here to run the
// FP branch instructions.
// load FCC from FFU
mux2ds #(8) t0_fcc_mux(.dout (t0_fcc_nxt[7:0]),
.in0 (t0_fcc_d[7:0]),
.in1 (ffu_ifu_cc_w2[7:0]),
.sel0 (~ffu_ifu_cc_vld_w2[0]),
.sel1 (ffu_ifu_cc_vld_w2[0]));
dffr #(8) t0_fcc_reg(.din (t0_fcc_nxt[7:0]),
.q (t0_fcc_d[7:0]),
.rst (dtu_reset),
.clk (clk), .se (se), .si(), .so());
`ifdef FPGA_SYN_1THREAD
assign fcc_d[7:0] = t0_fcc_d[7:0];
`else
mux2ds #(8) t1_fcc_mux(.dout (t1_fcc_nxt[7:0]),
.in0 (t1_fcc_d[7:0]),
.in1 (ffu_ifu_cc_w2[7:0]),
.sel0 (~ffu_ifu_cc_vld_w2[1]),
.sel1 (ffu_ifu_cc_vld_w2[1]));
mux2ds #(8) t2_fcc_mux(.dout (t2_fcc_nxt[7:0]),
.in0 (t2_fcc_d[7:0]),
.in1 (ffu_ifu_cc_w2[7:0]),
.sel0 (~ffu_ifu_cc_vld_w2[2]),
.sel1 (ffu_ifu_cc_vld_w2[2]));
mux2ds #(8) t3_fcc_mux(.dout (t3_fcc_nxt[7:0]),
.in0 (t3_fcc_d[7:0]),
.in1 (ffu_ifu_cc_w2[7:0]),
.sel0 (~ffu_ifu_cc_vld_w2[3]),
.sel1 (ffu_ifu_cc_vld_w2[3]));
// thread0 fcc registers
dffr #(8) t1_fcc_reg(.din (t1_fcc_nxt[7:0]),
.q (t1_fcc_d[7:0]),
.rst (dtu_reset),
.clk (clk), .se (se), .si(), .so());
dffr #(8) t2_fcc_reg(.din (t2_fcc_nxt[7:0]),
.q (t2_fcc_d[7:0]),
.rst (dtu_reset),
.clk (clk), .se (se), .si(), .so());
dffr #(8) t3_fcc_reg(.din (t3_fcc_nxt[7:0]),
.q (t3_fcc_d[7:0]),
.rst (dtu_reset),
.clk (clk), .se (se), .si(), .so());
// choose thread
assign thr_dec_d[0] = swl_dcl_thr_d[0];
assign thr_dec_d[3:1] = swl_dcl_thr_d[3:1];
mux4ds #(8) fcc0d_mx(.dout (fcc_d[7:0]),
.in0 (t0_fcc_d[7:0]),
.in1 (t1_fcc_d[7:0]),
.in2 (t2_fcc_d[7:0]),
.in3 (t3_fcc_d[7:0]),
.sel0 (thr_dec_d[0]),
.sel1 (thr_dec_d[1]),
.sel2 (thr_dec_d[2]),
.sel3 (thr_dec_d[3]));
`endif // !`ifdef FPGA_SYN_1THREAD
endmodule // sparc_ifu_dcl