// ========== Copyright Header Begin ==========================================
//
// OpenSPARC T1 Processor File: sparc_ifu_fdp.v
// Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
//
// The above named program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public
// License version 2 as published by the Free Software Foundation.
//
// The above named program is distributed in the hope that it will be
// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public
// License along with this work; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
//
// ========== Copyright Header End ============================================
////////////////////////////////////////////////////////////////////////
/*
// Module Name: sparc_ifu_fdp
// Description:
// The fdp contains the pc's for all four threads and the PC and
// nPC for all pipestages register. The fetcher also contains two
// adders for doing PC + br_offset and PC + 4.
// The fdp also holds the last fetched icache data for each thread
// and the next instruction register, which has the top half of the
// double instruction bundle which is fetched from the icache.
*/
////////////////////////////////////////////////////////////////////////
// Local header file includes / local defines
////////////////////////////////////////////////////////////////////////
`include "ifu.h"
`define NOP 32'h01000000
`define PO_RESET_PC 48'hfffff0000020
`define VER_MANUF 16'h003e
`define VER_IMPL 16'h0023
`define VER_MAXGL 8'h03
`define VER_MAXWIN 8'h07
`define VER_MAXTL 8'h06
//`define VER_MAXTL {5'b0, fcl_fdp_hprivmode_e, 2'b10}
//`define VER_IMPL_MASK 24'h002301
//`define VERSION_REG_HPV {`VER_MANUF, `VER_IMPL_MASK, `VER_MAXGL, 5'b0, fcl_fdp_hprivmode_e, 2'b10, `VER_MAXWIN}
//`define VERSION_REG {`VER_MANUF, `VER_IMPL_MASK, `VER_MAXGL, 8'h06, `VER_MAXWIN}
//FPGA_SYN enables all FPGA related modifications
`ifdef FPGA_SYN
`define FPGA_SYN_CLK_EN
`define FPGA_SYN_CLK_DFF
`endif
module sparc_ifu_fdp
(/*AUTOARG*/
// Outputs
so, fdp_itlb_ctxt_bf, fdp_icd_vaddr_bf, fdp_icv_index_bf,
fdp_erb_pc_f, fdp_dtu_inst_s, ifu_exu_pc_d, ifu_exu_rs1_s,
ifu_exu_rs2_s, ifu_exu_rs3_s, ifu_tlu_pc_m, ifu_tlu_npc_m,
ifu_tlu_pc_oor_e, ifu_exu_pcver_e, fdp_fcl_swc_s2,
fdp_fcl_pc_oor_vec_f, fdp_fcl_pc_oor_e, fdp_fcl_op_s,
fdp_fcl_op3_s, fdp_fcl_ibit_s,
// Inputs
rclk, se, si, const_maskid, lsu_t0_pctxt_state,
lsu_t1_pctxt_state, lsu_t2_pctxt_state, lsu_t3_pctxt_state,
exu_ifu_brpc_e, tlu_ifu_trappc_w2, tlu_ifu_trapnpc_w2,
tlu_itlb_dmp_nctxt_g, tlu_itlb_dmp_actxt_g, tlu_itlb_tte_tag_w2,
dtu_fdp_thrconf_e, icd_fdp_fetdata_s1, icd_fdp_topdata_s1,
ifq_fdp_fill_inst, fcl_fdp_oddwin_s, fcl_fdp_pcoor_vec_f,
fcl_fdp_pcoor_f, fcl_fdp_mask32b_f, fcl_fdp_addr_mask_d,
fcl_fdp_tctxt_sel_prim, fcl_fdp_usenir_sel_nir_s1,
fcl_fdp_rbinst_sel_inste_s, fcl_fdp_thrtnpc_sel_tnpc_l,
fcl_fdp_thrtnpc_sel_npcw_l, fcl_fdp_thrtnpc_sel_pcf_l,
fcl_fdp_thrtnpc_sel_old_l, fcl_fdp_thr_s1_l,
fcl_fdp_next_thr_bf_l, fcl_fdp_next_ctxt_bf_l, fcl_fdp_thr_s2_l,
fcl_fdp_nirthr_s1_l, fcl_fdp_tpcbf_sel_pcp4_bf_l,
fcl_fdp_tpcbf_sel_brpc_bf_l, fcl_fdp_tpcbf_sel_trap_bf_l,
fcl_fdp_tpcbf_sel_old_bf_l, fcl_fdp_pcbf_sel_swpc_bf_l,
fcl_fdp_pcbf_sel_nosw_bf_l, fcl_fdp_pcbf_sel_br_bf_l,
fcl_fdp_trrbpc_sel_trap_bf_l, fcl_fdp_trrbpc_sel_rb_bf_l,
fcl_fdp_trrbpc_sel_err_bf_l, fcl_fdp_trrbpc_sel_pcs_bf_l,
fcl_fdp_noswpc_sel_tnpc_l_bf, fcl_fdp_noswpc_sel_old_l_bf,
fcl_fdp_noswpc_sel_inc_l_bf, fcl_fdp_nextpcs_sel_pce_f_l,
fcl_fdp_nextpcs_sel_pcd_f_l, fcl_fdp_nextpcs_sel_pcs_f_l,
fcl_fdp_nextpcs_sel_pcf_f_l, fcl_fdp_rdsr_sel_pc_e_l,
fcl_fdp_rdsr_sel_ver_e_l, fcl_fdp_rdsr_sel_thr_e_l,
fcl_fdp_inst_sel_curr_s_l, fcl_fdp_inst_sel_switch_s_l,
fcl_fdp_inst_sel_nir_s_l, fcl_fdp_inst_sel_nop_s_l,
fcl_fdp_tinst_sel_curr_s_l, fcl_fdp_tinst_sel_rb_s_l,
fcl_fdp_tinst_sel_old_s_l, fcl_fdp_tinst_sel_ifq_s_l,
fcl_fdp_dmpthr_l, fcl_fdp_ctxt_sel_dmp_bf_l,
fcl_fdp_ctxt_sel_sw_bf_l, fcl_fdp_ctxt_sel_curr_bf_l
);
input rclk,
se,
si;
input [7:0] const_maskid;
input [12:0] lsu_t0_pctxt_state, // primary context
lsu_t1_pctxt_state,
lsu_t2_pctxt_state,
lsu_t3_pctxt_state;
// input exu_ifu_va_oor_e;
input [47:0] exu_ifu_brpc_e; // br address for dir branch
input [48:0] tlu_ifu_trappc_w2, // trap/exception PC
tlu_ifu_trapnpc_w2; // next trap PC
input tlu_itlb_dmp_nctxt_g,
tlu_itlb_dmp_actxt_g;
input [12:0] tlu_itlb_tte_tag_w2;
// input [`IC_IDX_HI:4] ifq_fdp_icindex_bf; // index + 1 bit for 16B write
input [40:0] dtu_fdp_thrconf_e;
input [32:0] icd_fdp_fetdata_s1, // 4 inst + 4 sw bits
icd_fdp_topdata_s1; // next instruction
input [32:0] ifq_fdp_fill_inst; // icache miss return
input fcl_fdp_oddwin_s;
input [3:0] fcl_fdp_pcoor_vec_f;
input fcl_fdp_pcoor_f;
input fcl_fdp_mask32b_f;
input fcl_fdp_addr_mask_d;
input [3:0] fcl_fdp_tctxt_sel_prim;
// 2:1 mux selects
input fcl_fdp_usenir_sel_nir_s1; // same as usenir_d2
input [3:0] fcl_fdp_rbinst_sel_inste_s; // rollback 1 or 2
input [3:0] fcl_fdp_thrtnpc_sel_tnpc_l, // load npc
fcl_fdp_thrtnpc_sel_npcw_l,
fcl_fdp_thrtnpc_sel_pcf_l,
fcl_fdp_thrtnpc_sel_old_l;
input [3:0] fcl_fdp_thr_s1_l; // s2 thr (64*5 muxes)
// other mux selects
input [3:0] fcl_fdp_next_thr_bf_l; // for thrpc output mux
input [3:0] fcl_fdp_next_ctxt_bf_l; // for ctxt output mux
input [3:0] fcl_fdp_thr_s2_l; // s2 thr (64*5 muxes)
input [3:0] fcl_fdp_nirthr_s1_l; // same as thr_s1, but protected
input [3:0] fcl_fdp_tpcbf_sel_pcp4_bf_l, // selects for thread PC muxes
fcl_fdp_tpcbf_sel_brpc_bf_l,
fcl_fdp_tpcbf_sel_trap_bf_l,
fcl_fdp_tpcbf_sel_old_bf_l;
input fcl_fdp_pcbf_sel_swpc_bf_l,
fcl_fdp_pcbf_sel_nosw_bf_l,
fcl_fdp_pcbf_sel_br_bf_l;
input [3:0] fcl_fdp_trrbpc_sel_trap_bf_l,
fcl_fdp_trrbpc_sel_rb_bf_l,
fcl_fdp_trrbpc_sel_err_bf_l,
fcl_fdp_trrbpc_sel_pcs_bf_l;
input fcl_fdp_noswpc_sel_tnpc_l_bf, // next pc select from trap,
fcl_fdp_noswpc_sel_old_l_bf,
fcl_fdp_noswpc_sel_inc_l_bf;
input [3:0] fcl_fdp_nextpcs_sel_pce_f_l,
fcl_fdp_nextpcs_sel_pcd_f_l,
fcl_fdp_nextpcs_sel_pcs_f_l,
fcl_fdp_nextpcs_sel_pcf_f_l;
input fcl_fdp_rdsr_sel_pc_e_l,
fcl_fdp_rdsr_sel_ver_e_l,
fcl_fdp_rdsr_sel_thr_e_l;
input fcl_fdp_inst_sel_curr_s_l, // selects for inst_s2
fcl_fdp_inst_sel_switch_s_l,
fcl_fdp_inst_sel_nir_s_l,
fcl_fdp_inst_sel_nop_s_l;
input [3:0] fcl_fdp_tinst_sel_curr_s_l, // selects for tinst regs
fcl_fdp_tinst_sel_rb_s_l,
fcl_fdp_tinst_sel_old_s_l,
fcl_fdp_tinst_sel_ifq_s_l;
input [3:0] fcl_fdp_dmpthr_l;
input fcl_fdp_ctxt_sel_dmp_bf_l,
fcl_fdp_ctxt_sel_sw_bf_l,
fcl_fdp_ctxt_sel_curr_bf_l;
output so;
output [12:0] fdp_itlb_ctxt_bf;
output [47:2] fdp_icd_vaddr_bf; // 11:2 is index to ic
output [11:5] fdp_icv_index_bf;
output [47:0] fdp_erb_pc_f;
output [31:0] fdp_dtu_inst_s; // 32b inst + switch bit
output [47:0] ifu_exu_pc_d; // PC for rel branch
output [4:0] ifu_exu_rs1_s, // reg file read address
ifu_exu_rs2_s,
ifu_exu_rs3_s;
output [48:0] ifu_tlu_pc_m,
ifu_tlu_npc_m;
output ifu_tlu_pc_oor_e;
output [63:0] ifu_exu_pcver_e; // PCs to different dests.
output fdp_fcl_swc_s2; // tells whether to switch or not
output [3:0] fdp_fcl_pc_oor_vec_f; // PC va hole check
output fdp_fcl_pc_oor_e;
output [1:0] fdp_fcl_op_s;
output [5:2] fdp_fcl_op3_s;
output fdp_fcl_ibit_s;
//----------------------------------------------------------------------
// Declarations
//----------------------------------------------------------------------
// local signals
// Contexts
wire [12:0] curr_ctxt,
sw_ctxt,
dmp_ctxt,
dmp_ctxt_unq,
dmp_ctxt1,
dmp_ctxt2,
t0_ctxt_bf,
t1_ctxt_bf,
t2_ctxt_bf,
t3_ctxt_bf;
// PCs
wire [48:0] t0pc_f, t1pc_f, t2pc_f, t3pc_f, // F stage thread PC
t0pc_s, t1pc_s, t2pc_s, t3pc_s, // S stage thr pc
t0_next_pcs_f, t1_next_pcs_f, t2_next_pcs_f, t3_next_pcs_f,
t0npc_bf, t1npc_bf, t2npc_bf, t3npc_bf, // Next PC in
// BF stage
pc_s, pc_d, pc_e, pc_m, pc_w,
npc_s, npc_d, npc_e, npc_m, npc_w,
pc_d_adj, npc_d_adj;
wire [47:0] pc_bf,
swpc_bf, // PC of next thread if not branch
pc_f;
wire [48:0] nextpc_nosw_bf, // next pc if no switch
am_mask;
// trap PCs and rollback PCs
wire [48:0] t0_trap_rb_pc_bf,
t1_trap_rb_pc_bf,
t2_trap_rb_pc_bf,
t3_trap_rb_pc_bf;
wire [48:0] thr_trappc_bf,
t0_trapnpc_f,
t1_trapnpc_f,
t2_trapnpc_f,
t3_trapnpc_f,
trapnpc0_bf,
trapnpc1_bf,
trapnpc2_bf,
trapnpc3_bf;
// Branch PCs
wire [48:0] pcinc_f; // incr output
// Instruction Words
wire [32:0] inst_s2, // instruction to switch to in S
fdp_inst_s, // instruction to be sent to D
t0inst_s1, // input to thr inst reg in S
t1inst_s1,
t2inst_s1,
t3inst_s1,
t0inst_s2, // thr inst reg output
t1inst_s2,
t2inst_s2,
t3inst_s2;
wire [32:0] inst_s1; // fetched instruction in S
wire [32:0] inst_s1_bf1; // buf version of inst_s1
wire [32:0] rb_inst0_s, // instruction to rollback to
rb_inst1_s, // instruction to rollback to
rb_inst2_s, // instruction to rollback to
rb_inst3_s, // instruction to rollback to
inst_d, // rollback 1
inst_e; // rollback 2
// Next instruction word
wire [32:0] nirdata_s1, // next inst reg contents
t0nir, // thread NIR reg output
t1nir,
t2nir,
t3nir;
wire clk;
//
// Code start here
//
assign clk = rclk;
//----------------------------------------------------------------------
// Context Reg
//----------------------------------------------------------------------
assign t0_ctxt_bf = lsu_t0_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[0]}};
`ifdef FPGA_SYN_1THREAD
assign sw_ctxt = t0_ctxt_bf;
assign curr_ctxt = t0_ctxt_bf;
assign dmp_ctxt_unq = lsu_t0_pctxt_state;
`else
assign t1_ctxt_bf = lsu_t1_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[1]}};
assign t2_ctxt_bf = lsu_t2_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[2]}};
assign t3_ctxt_bf = lsu_t3_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[3]}};
dp_mux4ds #(13) sw_ctxt_mux(.dout (sw_ctxt),
.in0 (t0_ctxt_bf),
.in1 (t1_ctxt_bf),
.in2 (t2_ctxt_bf),
.in3 (t3_ctxt_bf),
.sel0_l (fcl_fdp_next_ctxt_bf_l[0]),
.sel1_l (fcl_fdp_next_ctxt_bf_l[1]),
.sel2_l (fcl_fdp_next_ctxt_bf_l[2]),
.sel3_l (fcl_fdp_next_ctxt_bf_l[3]));
dp_mux4ds #(13) curr_ctxt_mux(.dout (curr_ctxt),
.in0 (t0_ctxt_bf),
.in1 (t1_ctxt_bf),
.in2 (t2_ctxt_bf),
.in3 (t3_ctxt_bf),
.sel0_l (fcl_fdp_thr_s2_l[0]),
.sel1_l (fcl_fdp_thr_s2_l[1]),
.sel2_l (fcl_fdp_thr_s2_l[2]),
.sel3_l (fcl_fdp_thr_s2_l[3]));
dp_mux4ds #(13) dmp_ctxt_mux(.dout (dmp_ctxt_unq),
.in0 (lsu_t0_pctxt_state),
.in1 (lsu_t1_pctxt_state),
.in2 (lsu_t2_pctxt_state),
.in3 (lsu_t3_pctxt_state),
.sel0_l (fcl_fdp_dmpthr_l[0]),
.sel1_l (fcl_fdp_dmpthr_l[1]),
.sel2_l (fcl_fdp_dmpthr_l[2]),
.sel3_l (fcl_fdp_dmpthr_l[3]));
`endif // !`ifdef FPGA_SYN_1THREAD
assign dmp_ctxt1 = dmp_ctxt_unq & {13{~(tlu_itlb_dmp_nctxt_g |
tlu_itlb_dmp_actxt_g)}};
//`ifdef SPARC_HPV_EN
assign dmp_ctxt2 = {tlu_itlb_tte_tag_w2[12:7],tlu_itlb_tte_tag_w2[6:0]} &
{13{tlu_itlb_dmp_actxt_g}};
//`else
// assign dmp_ctxt2 = {tlu_itlb_tte_tag_w2[13:8],tlu_itlb_tte_tag_w2[6:0]} &
// {13{tlu_itlb_dmp_actxt_g}};
//`endif
assign dmp_ctxt = dmp_ctxt1 | dmp_ctxt2;
dp_mux3ds #(13) ctxt_mux (.dout (fdp_itlb_ctxt_bf),
.in0 (curr_ctxt),
.in1 (sw_ctxt),
.in2 (dmp_ctxt),
.sel0_l (fcl_fdp_ctxt_sel_curr_bf_l),
.sel1_l (fcl_fdp_ctxt_sel_sw_bf_l),
.sel2_l (fcl_fdp_ctxt_sel_dmp_bf_l));
// ----------------------------------------------------------------------
// PC datapath
// ----------------------------------------------------------------------
// pc/thr to exu for rdsr instruction
// this is the only 64 bit cell in the IFU
dp_mux3ds #(64) ver_mux(.dout (ifu_exu_pcver_e[63:0]),
.in0 ({{16{pc_e[47]}}, pc_e[47:0]}),
.in1 ({`VER_MANUF,
`VER_IMPL,
const_maskid[7:0],
`VER_MAXGL,
`VER_MAXTL,
`VER_MAXWIN}),
.in2 ({12'b0,
dtu_fdp_thrconf_e[40:29],
4'b0,
dtu_fdp_thrconf_e[28:9],
2'b0,
dtu_fdp_thrconf_e[8:3],
5'b0,
dtu_fdp_thrconf_e[2:0]}),
.sel0_l (fcl_fdp_rdsr_sel_pc_e_l),
.sel1_l (fcl_fdp_rdsr_sel_ver_e_l),
.sel2_l (fcl_fdp_rdsr_sel_thr_e_l));
// Select the next thread pc (for F stage)
dp_mux4ds #(49) t0_pcbf_mux(.dout (t0npc_bf),
.in0 ({fcl_fdp_pcoor_vec_f[0], t0pc_f[47:0]}),
.in1 (nextpc_nosw_bf),
.in2 (t0_trap_rb_pc_bf),
.in3 ({1'b0, exu_ifu_brpc_e}),
.sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[0]),
.sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[0]),
.sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[0]),
.sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[0]));
`ifdef FPGA_SYN_1THREAD
`else
dp_mux4ds #(49) t1_pcbf_mux(.dout (t1npc_bf),
.in0 ({fcl_fdp_pcoor_vec_f[1], t1pc_f[47:0]}),
.in1 (nextpc_nosw_bf),
.in2 (t1_trap_rb_pc_bf),
.in3 ({1'b0, exu_ifu_brpc_e}),
.sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[1]),
.sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[1]),
.sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[1]),
.sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[1]));
dp_mux4ds #(49) t2_pcbf_mux(.dout (t2npc_bf),
.in0 ({fcl_fdp_pcoor_vec_f[2], t2pc_f[47:0]}),
.in1 (nextpc_nosw_bf),
.in2 (t2_trap_rb_pc_bf),
.in3 ({1'b0, exu_ifu_brpc_e}),
.sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[2]),
.sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[2]),
.sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[2]),
.sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[2]));
dp_mux4ds #(49) t3_pcbf_mux(.dout (t3npc_bf),
.in0 ({fcl_fdp_pcoor_vec_f[3], t3pc_f[47:0]}),
.in1 (nextpc_nosw_bf),
.in2 (t3_trap_rb_pc_bf),
.in3 ({1'b0, exu_ifu_brpc_e}),
.sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[3]),
.sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[3]),
.sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[3]),
.sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[3]));
`endif
// F stage thread PC regs; use low power thr flop
dff #(49) t0_pcf_reg(.din (t0npc_bf),
.clk (clk),
.q (t0pc_f),
.se (se), .si(), .so());
`ifdef FPGA_SYN_1THREAD
assign fdp_fcl_pc_oor_vec_f = {3'b0, t0pc_f[48]};
assign swpc_bf = t0pc_f[47:0];
`else
dff #(49) t1_pcf_reg(.din (t1npc_bf),
.clk (clk),
.q (t1pc_f),
.se (se), .si(), .so());
dff #(49) t2_pcf_reg(.din (t2npc_bf),
.clk (clk),
.q (t2pc_f),
.se (se), .si(), .so());
dff #(49) t3_pcf_reg(.din (t3npc_bf),
.clk (clk),
.q (t3pc_f),
.se (se), .si(), .so());
assign fdp_fcl_pc_oor_vec_f = {t3pc_f[48], t2pc_f[48],
t1pc_f[48], t0pc_f[48]};
// select the pc to be used on a switch -- need to protect
dp_mux4ds #(48) swpc_mux(.dout (swpc_bf),
.in0 (t0pc_f[47:0]),
.in1 (t1pc_f[47:0]),
.in2 (t2pc_f[47:0]),
.in3 (t3pc_f[47:0]),
.sel0_l (fcl_fdp_next_thr_bf_l[0]),
.sel1_l (fcl_fdp_next_thr_bf_l[1]),
.sel2_l (fcl_fdp_next_thr_bf_l[2]),
.sel3_l (fcl_fdp_next_thr_bf_l[3]));
`endif
// choose between I$ write address and read address
// need mux only for lower 11 bits (2+3 + ICINDEX_SIZE)
// dp_mux2es #(48) ifqfdp_mux(.dout (icaddr_nosw_bf[47:0]),
// .in0 (nextpc_nosw_bf[47:0]),
// .in1 ({{37{1'b0}}, ifq_fdp_icindex_bf, 4'b0}),
// .sel (fcl_fdp_ifqfdp_sel_ifq_bf)); // 1=ifq
// implements switch and branch
// can we cut this down to 11 bits? No! tlb needs all 48
// dp_mux4ds #(48) nxt_icaddr_mux(.dout (icaddr_bf),
// .in0 (swpc_bf[47:0]),
// .in1 (nextpc_nosw_bf[47:0]),
// .in2 ({8'b0, {`IC_TAG_SZ{1'b0}},
// ifq_fdp_icindex_bf, 4'b0}),
// .in3 (exu_ifu_brpc_e[47:0]),
// .sel0_l (fcl_fdp_icaddr_sel_swpc_bf_l),
// .sel1_l (fcl_fdp_icaddr_sel_curr_bf_l),
// .sel2_l (fcl_fdp_icaddr_sel_ifq_bf_l),
// .sel3_l (fcl_fdp_icaddr_sel_br_bf_l));
// assign fdp_icd_vaddr_bf = icaddr_bf[47:0];
// this goes to the itlb, icd and ict on top of fdp
// this is !!very critical!!
assign fdp_icd_vaddr_bf = pc_bf[47:2];
// create separate output for the icv to the left
assign fdp_icv_index_bf = pc_bf[11:5];
// Place this mux as close to the top (itlb) as possible
dp_mux3ds #(48) pcbf_mux(.dout (pc_bf[47:0]),
.in0 (swpc_bf[47:0]),
.in1 (nextpc_nosw_bf[47:0]),
.in2 (exu_ifu_brpc_e[47:0]),
.sel0_l (fcl_fdp_pcbf_sel_swpc_bf_l),
.sel1_l (fcl_fdp_pcbf_sel_nosw_bf_l),
.sel2_l (fcl_fdp_pcbf_sel_br_bf_l));
dff #(48) pcf_reg(.din (pc_bf),
.clk (clk),
.q (pc_f),
.se (se), .si(), .so());
assign fdp_erb_pc_f = pc_f[47:0];
// trappc mux (choose trap pc vs rollback/uTrap pc)
dp_mux4ds #(49) trap_pc0_mux(.dout (t0_trap_rb_pc_bf),
.in0 (tlu_ifu_trappc_w2),
.in1 (pc_d_adj),
.in2 (t0pc_s),
.in3 (pc_w),
.sel0_l (fcl_fdp_trrbpc_sel_trap_bf_l[0]),
.sel1_l (fcl_fdp_trrbpc_sel_rb_bf_l[0]),
.sel2_l (fcl_fdp_trrbpc_sel_pcs_bf_l[0]),
.sel3_l (fcl_fdp_trrbpc_sel_err_bf_l[0]));
`ifdef FPGA_SYN_1THREAD
`else
dp_mux4ds #(49) trap_pc1_mux(.dout (t1_trap_rb_pc_bf),
.in0 (tlu_ifu_trappc_w2),
.in1 (pc_d_adj),
.in2 (t1pc_s),
.in3 (pc_w),
.sel0_l (fcl_fdp_trrbpc_sel_trap_bf_l[1]),
.sel1_l (fcl_fdp_trrbpc_sel_rb_bf_l[1]),
.sel2_l (fcl_fdp_trrbpc_sel_pcs_bf_l[1]),
.sel3_l (fcl_fdp_trrbpc_sel_err_bf_l[1]));
dp_mux4ds #(49) trap_pc2_mux(.dout (t2_trap_rb_pc_bf),
.in0 (tlu_ifu_trappc_w2),
.in1 (pc_d_adj),
.in2 (t2pc_s),
.in3 (pc_w),
.sel0_l (fcl_fdp_trrbpc_sel_trap_bf_l[2]),
.sel1_l (fcl_fdp_trrbpc_sel_rb_bf_l[2]),
.sel2_l (fcl_fdp_trrbpc_sel_pcs_bf_l[2]),
.sel3_l (fcl_fdp_trrbpc_sel_err_bf_l[2]));
dp_mux4ds #(49) trap_pc3_mux(.dout (t3_trap_rb_pc_bf),
.in0 (tlu_ifu_trappc_w2),
.in1 (pc_d_adj),
.in2 (t3pc_s),
.in3 (pc_w),
.sel0_l (fcl_fdp_trrbpc_sel_trap_bf_l[3]),
.sel1_l (fcl_fdp_trrbpc_sel_rb_bf_l[3]),
.sel2_l (fcl_fdp_trrbpc_sel_pcs_bf_l[3]),
.sel3_l (fcl_fdp_trrbpc_sel_err_bf_l[3]));
`endif
// can reduce this to a 2:1 mux since reset pc is not used any more and
// pc_f is not needed.
dp_mux3ds #(49) pcp4_mux(.dout (nextpc_nosw_bf),
.in0 (pcinc_f),
.in1 (thr_trappc_bf),
.in2 ({fcl_fdp_pcoor_f, pc_f[47:0]}),
.sel0_l (fcl_fdp_noswpc_sel_inc_l_bf),
.sel1_l (fcl_fdp_noswpc_sel_tnpc_l_bf),
.sel2_l (fcl_fdp_noswpc_sel_old_l_bf));
// next S stage thread pc mux per thread
// Use advtpcs signal which works for stall (Aug '01)
// Merged pc_e/pc_d into the eqn to allow for rollback
dp_mux4ds #(49) t0pcf_mux(.dout (t0_next_pcs_f),
.in0 (t0pc_s),
.in1 ({fcl_fdp_pcoor_vec_f[0], t0pc_f[47:0]}),
.in2 (pc_d_adj),
.in3 (pc_e),
.sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[0]),
.sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[0]),
.sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[0]),
.sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[0]));
`ifdef FPGA_SYN_1THREAD
`else
dp_mux4ds #(49) t1pcf_mux(.dout (t1_next_pcs_f),
.in0 (t1pc_s),
.in1 ({fcl_fdp_pcoor_vec_f[1], t1pc_f[47:0]}),
.in2 (pc_d_adj),
.in3 (pc_e),
.sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[1]),
.sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[1]),
.sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[1]),
.sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[1]));
dp_mux4ds #(49) t2pcf_mux(.dout (t2_next_pcs_f),
.in0 (t2pc_s),
.in1 ({fcl_fdp_pcoor_vec_f[2], t2pc_f[47:0]}),
// .in1 ({fcl_fdp_pcoor_f, pc_f[47:0]}),
.in2 (pc_d_adj),
.in3 (pc_e),
.sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[2]),
.sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[2]),
.sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[2]),
.sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[2]));
dp_mux4ds #(49) t3pcf_mux(.dout (t3_next_pcs_f),
.in0 (t3pc_s),
.in1 ({fcl_fdp_pcoor_vec_f[3], t3pc_f[47:0]}),
// .in1 ({fcl_fdp_pcoor_f, pc_f[47:0]}),
.in2 (pc_d_adj),
.in3 (pc_e),
.sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[3]),
.sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[3]),
.sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[3]),
.sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[3]));
`endif
// S stage thread PC regs; use low power thr flop
dff #(49) t0pcs_reg(.din (t0_next_pcs_f),
.q (t0pc_s),
.clk (clk), .se(se), .si(), .so());
`ifdef FPGA_SYN_1THREAD
assign pc_s = t0pc_s;
assign npc_s = t0_next_pcs_f;
`else
dff #(49) t1pcs_reg(.din (t1_next_pcs_f),
.q (t1pc_s),
.clk (clk), .se(se), .si(), .so());
dff #(49) t2pcs_reg(.din (t2_next_pcs_f),
.q (t2pc_s),
.clk (clk), .se(se), .si(), .so());
dff #(49) t3pcs_reg(.din (t3_next_pcs_f),
.q (t3pc_s),
.clk (clk), .se(se), .si(), .so());
// S stage PC mux -- need to protect
dp_mux4ds #(49) pcs_mux(.dout (pc_s),
.in0 (t0pc_s),
.in1 (t1pc_s),
.in2 (t2pc_s),
.in3 (t3pc_s),
.sel0_l (fcl_fdp_thr_s2_l[0]),
.sel1_l (fcl_fdp_thr_s2_l[1]),
.sel2_l (fcl_fdp_thr_s2_l[2]),
.sel3_l (fcl_fdp_thr_s2_l[3]));
// S stage next PC mux -- need to protect
dp_mux4ds #(49) npcs_mux(.dout (npc_s),
.in0 (t0_next_pcs_f),
.in1 (t1_next_pcs_f),
.in2 (t2_next_pcs_f),
.in3 (t3_next_pcs_f),
.sel0_l (fcl_fdp_thr_s2_l[0]),
.sel1_l (fcl_fdp_thr_s2_l[1]),
.sel2_l (fcl_fdp_thr_s2_l[2]),
.sel3_l (fcl_fdp_thr_s2_l[3]));
`endif
// D stage PC and nPC
dff #(49) pcd_reg(.din (pc_s),
.q (pc_d),
.clk (clk), .se(se), .si(), .so());
dff #(49) npcd_reg(.din (npc_s),
.q (npc_d),
.clk (clk), .se(se), .si(), .so());
assign am_mask = {{17{~fcl_fdp_addr_mask_d}}, 32'hffffffff};
// nand2
assign pc_d_adj = pc_d & am_mask;
assign npc_d_adj = npc_d & am_mask;
assign ifu_exu_pc_d = pc_d_adj[47:0];
// E stage PC and nPC
dff #(49) pce_reg(.din (pc_d_adj),
.q (pc_e),
.clk (clk), .se(se), .si(), .so());
dff #(49) npce_reg(.din (npc_d_adj),
.q (npc_e),
.clk (clk), .se(se), .si(), .so());
assign fdp_fcl_pc_oor_e = pc_e[48];
assign ifu_tlu_pc_oor_e = pc_e[48];
// M stage PC and nPC
dff #(49) pcm_reg(.din (pc_e),
.q (pc_m),
.clk (clk), .se(se), .si(), .so());
dff #(49) npcm_reg(.din (npc_e),
.q (npc_m),
.clk (clk), .se(se), .si(), .so());
assign ifu_tlu_pc_m = pc_m[48:0];
assign ifu_tlu_npc_m = npc_m[48:0];
// W stage PC and nPC
dff #(49) pcw_reg(.din (pc_m),
.q (pc_w),
.clk (clk), .se(se), .si(), .so());
dff #(49) npcw_reg(.din (npc_m),
.q (npc_w),
.clk (clk), .se(se), .si(), .so());
// assign ifu_tlu_pc_w = pc_w;
// assign ifu_tlu_npc_w = npc_w;
// PC incrementer
// can we fit the ofl logic on the side of the incrementer?
assign pcinc_f[1:0] = pc_f[1:0];
sparc_ifu_incr46 pc_inc(.a (pc_f[47:2]),
.a_inc (pcinc_f[47:2]),
.ofl ()); // ofl output not needed
// assign pcinc_f[48] = inc_ofl & ~fcl_fdp_mask32b_f | fcl_fdp_pcoor_f;
assign pcinc_f[48] = ~pc_f[47] & pcinc_f[47] & ~fcl_fdp_mask32b_f |
fcl_fdp_pcoor_f;
// Enable for thr trapnpc reg
dp_mux4ds #(49) t0tnpc_mux(.dout (trapnpc0_bf),
.in0 (tlu_ifu_trapnpc_w2),
.in1 (npc_w),
.in2 (t0pc_f),
.in3 (t0_trapnpc_f),
.sel0_l (fcl_fdp_thrtnpc_sel_tnpc_l[0]),
.sel1_l (fcl_fdp_thrtnpc_sel_npcw_l[0]),
.sel2_l (fcl_fdp_thrtnpc_sel_pcf_l[0]),
.sel3_l (fcl_fdp_thrtnpc_sel_old_l[0]));
`ifdef FPGA_SYN_1THREAD
`else
dp_mux4ds #(49) t1tnpc_mux(.dout (trapnpc1_bf),
.in0 (tlu_ifu_trapnpc_w2),
.in1 (npc_w),
.in2 (t1pc_f),
.in3 (t1_trapnpc_f),
.sel0_l (fcl_fdp_thrtnpc_sel_tnpc_l[1]),
.sel1_l (fcl_fdp_thrtnpc_sel_npcw_l[1]),
.sel2_l (fcl_fdp_thrtnpc_sel_pcf_l[1]),
.sel3_l (fcl_fdp_thrtnpc_sel_old_l[1]));
dp_mux4ds #(49) t2tnpc_mux(.dout (trapnpc2_bf),
.in0 (tlu_ifu_trapnpc_w2),
.in1 (npc_w),
.in2 (t2pc_f),
.in3 (t2_trapnpc_f),
.sel0_l (fcl_fdp_thrtnpc_sel_tnpc_l[2]),
.sel1_l (fcl_fdp_thrtnpc_sel_npcw_l[2]),
.sel2_l (fcl_fdp_thrtnpc_sel_pcf_l[2]),
.sel3_l (fcl_fdp_thrtnpc_sel_old_l[2]));
dp_mux4ds #(49) t3tnpc_mux(.dout (trapnpc3_bf),
.in0 (tlu_ifu_trapnpc_w2),
.in1 (npc_w),
.in2 (t3pc_f),
.in3 (t3_trapnpc_f),
.sel0_l (fcl_fdp_thrtnpc_sel_tnpc_l[3]),
.sel1_l (fcl_fdp_thrtnpc_sel_npcw_l[3]),
.sel2_l (fcl_fdp_thrtnpc_sel_pcf_l[3]),
.sel3_l (fcl_fdp_thrtnpc_sel_old_l[3]));
`endif
// thread next trap pc reg
dff #(49) t0tnpcf_reg(.din (trapnpc0_bf),
.q (t0_trapnpc_f),
.clk (clk), .se(se), .si(), .so());
`ifdef FPGA_SYN_1THREAD
assign thr_trappc_bf = t0_trapnpc_f;
`else
dff #(49) t1tnpcf_reg(.din (trapnpc1_bf),
.q (t1_trapnpc_f),
.clk (clk), .se(se), .si(), .so());
dff #(49) t2tnpcf_reg(.din (trapnpc2_bf),
.q (t2_trapnpc_f),
.clk (clk), .se(se), .si(), .so());
dff #(49) t3tnpcf_reg(.din (trapnpc3_bf),
.q (t3_trapnpc_f),
.clk (clk), .se(se), .si(), .so());
dp_mux4ds #(49) nxttpc_mux(.dout (thr_trappc_bf),
.in0 (t0_trapnpc_f),
.in1 (t1_trapnpc_f),
.in2 (t2_trapnpc_f),
.in3 (t3_trapnpc_f),
.sel0_l (fcl_fdp_thr_s2_l[0]), // thr_s2 = thr_f
.sel1_l (fcl_fdp_thr_s2_l[1]),
.sel2_l (fcl_fdp_thr_s2_l[2]),
.sel3_l (fcl_fdp_thr_s2_l[3]));
`endif
// During rst nextpc_nosw_bf = PO_RESET_PC. All thread PC_f registers,
// the icaddr_f register and the nextpc register should be loaded
// with nextpc_nosw_bf during reset.
// Eventually, we will load the reset_pc from the trap logic unit,
// which will arrive on the trap_pc bus.
// TBD in PC datapath:
// 1. Add useNIR bit to PCs -- DONE
// 2. Add support for ifq request grant -- DONE
// 3. Generate icache read signal (from fcl?) -- DONE
// 4. Rollback functionality -- DONE
// 5. PC range checks -- DONE
// 6. Change PC to 48 bit value -- DONE
//----------------------------------------------------------------------
// Fetched Instruction Datapath
//----------------------------------------------------------------------
// This is logically 33 bits wide. The NIR and IR datapaths are laid
// side by side, making this a 66bit datapath. The NIR path is
// potentially a little longer.
// choose between NIR data and fetched data
dp_mux2es #(33) usenir_mux(.dout (inst_s1),
.in0 (icd_fdp_fetdata_s1[32:0]),
.in1 (nirdata_s1),
.sel (fcl_fdp_usenir_sel_nir_s1)); // 1=nir
// Instruction Output Mux
// CHANGE: now 4:1
dp_mux4ds #(33) instout_mux(.dout (fdp_inst_s),
.in0 (icd_fdp_fetdata_s1[32:0]),
.in1 (inst_s2),
.in2 ({`NOP, 1'b0}),
.in3 (nirdata_s1[32:0]),
.sel0_l (fcl_fdp_inst_sel_curr_s_l),
.sel1_l (fcl_fdp_inst_sel_switch_s_l),
.sel2_l (fcl_fdp_inst_sel_nop_s_l),
.sel3_l (fcl_fdp_inst_sel_nir_s_l));
assign fdp_fcl_swc_s2 = fdp_inst_s[0];
assign fdp_fcl_op_s = fdp_inst_s[32:31];
assign fdp_fcl_op3_s = fdp_inst_s[25:22];
assign fdp_fcl_ibit_s = fdp_inst_s[14];
assign fdp_dtu_inst_s = fdp_inst_s[32:1];
// CHANGE: Random logic to fix timing paths
// output pin on RHS, as close to IRF as possible
// 16x drivers
// nand2-xor-invert
assign ifu_exu_rs1_s[4] = fdp_inst_s[19] ^
(fdp_inst_s[18] & fcl_fdp_oddwin_s);
assign ifu_exu_rs1_s[3:0] = fdp_inst_s[18:15];
assign ifu_exu_rs2_s[4] = (fdp_inst_s[5] ^
(fdp_inst_s[4] & fcl_fdp_oddwin_s));
assign ifu_exu_rs2_s[3:0] = fdp_inst_s[4:1];
assign ifu_exu_rs3_s[4] = (fdp_inst_s[30] ^
(fdp_inst_s[29] & fcl_fdp_oddwin_s));
assign ifu_exu_rs3_s[3:0] = fdp_inst_s[29:26];
dp_buffer #(33) insts1_buf(inst_s1_bf1, inst_s1[32:0]);
// Thread instruction muxes
dp_mux4ds #(33) t0inst_mux(.dout (t0inst_s1),
.in0 (ifq_fdp_fill_inst),
.in1 (inst_s1_bf1),
.in2 (t0inst_s2),
.in3 (rb_inst0_s),
.sel0_l (fcl_fdp_tinst_sel_ifq_s_l[0]),
.sel1_l (fcl_fdp_tinst_sel_curr_s_l[0]),
.sel2_l (fcl_fdp_tinst_sel_old_s_l[0]),
.sel3_l (fcl_fdp_tinst_sel_rb_s_l[0]));
`ifdef FPGA_SYN_1THREAD
`else
dp_mux4ds #(33) t1inst_mux(.dout (t1inst_s1),
.in0 (ifq_fdp_fill_inst),
.in1 (inst_s1_bf1),
.in2 (t1inst_s2),
.in3 (rb_inst1_s),
.sel0_l (fcl_fdp_tinst_sel_ifq_s_l[1]),
.sel1_l (fcl_fdp_tinst_sel_curr_s_l[1]),
.sel2_l (fcl_fdp_tinst_sel_old_s_l[1]),
.sel3_l (fcl_fdp_tinst_sel_rb_s_l[1]));
dp_mux4ds #(33) t2inst_mux(.dout (t2inst_s1),
.in0 (ifq_fdp_fill_inst),
.in1 (inst_s1_bf1),
.in2 (t2inst_s2),
.in3 (rb_inst2_s),
.sel0_l (fcl_fdp_tinst_sel_ifq_s_l[2]),
.sel1_l (fcl_fdp_tinst_sel_curr_s_l[2]),
.sel2_l (fcl_fdp_tinst_sel_old_s_l[2]),
.sel3_l (fcl_fdp_tinst_sel_rb_s_l[2]));
dp_mux4ds #(33) t3inst_mux(.dout (t3inst_s1),
.in0 (ifq_fdp_fill_inst),
.in1 (inst_s1_bf1),
.in2 (t3inst_s2),
.in3 (rb_inst3_s),
.sel0_l (fcl_fdp_tinst_sel_ifq_s_l[3]),
.sel1_l (fcl_fdp_tinst_sel_curr_s_l[3]),
.sel2_l (fcl_fdp_tinst_sel_old_s_l[3]),
.sel3_l (fcl_fdp_tinst_sel_rb_s_l[3]));
`endif
// Thread Instruction Register
dff #(33) t0_inst_reg(.din (t0inst_s1),
.q (t0inst_s2),
.clk (clk), .se(se), .si(), .so());
`ifdef FPGA_SYN_1THREAD
assign inst_s2 = t0inst_s2;
`else
dff #(33) t1_inst_reg(.din (t1inst_s1),
.q (t1inst_s2),
.clk (clk), .se(se), .si(), .so());
dff #(33) t2_inst_reg(.din (t2inst_s1),
.q (t2inst_s2),
.clk (clk), .se(se), .si(), .so());
dff #(33) t3_inst_reg(.din (t3inst_s1),
.q (t3inst_s2),
.clk (clk), .se(se), .si(), .so());
// switch instruction mux -- choose the instruction to switch to
// fcl keep track of which t*inst_s2 is valid
dp_mux4ds #(33) swinst_mux(.dout (inst_s2),
.in0 (t0inst_s2),
.in1 (t1inst_s2),
.in2 (t2inst_s2),
.in3 (t3inst_s2),
.sel0_l (fcl_fdp_thr_s2_l[0]),
.sel1_l (fcl_fdp_thr_s2_l[1]),
.sel2_l (fcl_fdp_thr_s2_l[2]),
.sel3_l (fcl_fdp_thr_s2_l[3]));
`endif
// Rollback instruction
dff #(33) rbinst_d_reg(.din (fdp_inst_s[32:0]),
.q (inst_d),
.clk (clk),
.se (se), .si(), .so());
dff #(33) rbinst_e_reg(.din (inst_d),
.q (inst_e),
.clk (clk),
.se (se), .si(), .so());
dp_mux2es #(33) rbinst0_mux(.dout (rb_inst0_s),
.in0 (inst_d),
.in1 (inst_e),
.sel (fcl_fdp_rbinst_sel_inste_s[0]));
`ifdef FPGA_SYN_1THREAD
`else
dp_mux2es #(33) rbinst1_mux(.dout (rb_inst1_s),
.in0 (inst_d),
.in1 (inst_e),
.sel (fcl_fdp_rbinst_sel_inste_s[1]));
dp_mux2es #(33) rbinst2_mux(.dout (rb_inst2_s),
.in0 (inst_d),
.in1 (inst_e),
.sel (fcl_fdp_rbinst_sel_inste_s[2]));
dp_mux2es #(33) rbinst3_mux(.dout (rb_inst3_s),
.in0 (inst_d),
.in1 (inst_e),
.sel (fcl_fdp_rbinst_sel_inste_s[3]));
`endif
//----------------------------------------------------------------------
// Next Instruction Datapath
//----------------------------------------------------------------------
// Thread next instruction muxes
// dp_mux2es #(33) t0nir_mux(.dout (t0nir_in),
// .in0 (icd_fdp_topdata_s1[32:0]),
// .in1 (t0nir),
// .sel (fcl_fdp_thr_s1_l[0])); // 0=new
// dp_mux2es #(33) t1nir_mux(.dout (t1nir_in),
// .in0 (icd_fdp_topdata_s1[32:0]),
// .in1 (t1nir),
// .sel (fcl_fdp_thr_s1_l[1]));
// dp_mux2es #(33) t2nir_mux(.dout (t2nir_in),
// .in0 (icd_fdp_topdata_s1[32:0]),
// .in1 (t2nir),
// .sel (fcl_fdp_thr_s1_l[2]));
// dp_mux2es #(33) t3nir_mux(.dout (t3nir_in),
// .in0 (icd_fdp_topdata_s1[32:0]),
// .in1 (t3nir),
// .sel (fcl_fdp_thr_s1_l[3]));
// Thread Next Instruction Register
wire clk_nir0;
`ifdef FPGA_SYN_CLK_EN
`else
bw_u1_ckenbuf_6x ckennir0(.rclk (rclk),
.clk (clk_nir0),
.en_l (fcl_fdp_thr_s1_l[0]),
.tm_l (~se));
`endif
`ifdef FPGA_SYN_CLK_DFF
dffe #(33) t0nir_reg(.din (icd_fdp_topdata_s1[32:0]),
.q (t0nir),
.en (~(fcl_fdp_thr_s1_l[0])), .clk(rclk), .se(se), .si(), .so());
`else
dff #(33) t0nir_reg(.din (icd_fdp_topdata_s1[32:0]),
.q (t0nir),
.clk (clk_nir0), .se(se), .si(), .so());
`endif
`ifdef FPGA_SYN_1THREAD
assign nirdata_s1 = t0nir;
`else
wire clk_nir1;
`ifdef FPGA_SYN_CLK_EN
`else
bw_u1_ckenbuf_6x ckennir1(.rclk (rclk),
.clk (clk_nir1),
.en_l (fcl_fdp_thr_s1_l[1]),
.tm_l (~se));
`endif
`ifdef FPGA_SYN_CLK_DFF
dffe #(33) t1nir_reg(.din (icd_fdp_topdata_s1[32:0]),
.q (t1nir),
.en (~(fcl_fdp_thr_s1_l[1])), .clk (rclk), .se(se), .si(), .so());
`else
dff #(33) t1nir_reg(.din (icd_fdp_topdata_s1[32:0]),
.q (t1nir),
.clk (clk_nir1), .se(se), .si(), .so());
`endif
wire clk_nir2;
`ifdef FPGA_SYN_CLK_EN
`else
bw_u1_ckenbuf_6x ckennir2(.rclk (rclk),
.clk (clk_nir2),
.en_l (fcl_fdp_thr_s1_l[2]),
.tm_l (~se));
`endif
`ifdef FPGA_SYN_CLK_DFF
dffe #(33) t2nir_reg(.din (icd_fdp_topdata_s1[32:0]),
.q (t2nir),
.en (~(fcl_fdp_thr_s1_l[2])), .clk (rclk), .se(se), .si(), .so());
`else
dff #(33) t2nir_reg(.din (icd_fdp_topdata_s1[32:0]),
.q (t2nir),
.clk (clk_nir2), .se(se), .si(), .so());
`endif
wire clk_nir3;
`ifdef FPGA_SYN_CLK_EN
`else
bw_u1_ckenbuf_6x ckennir3(.rclk (rclk),
.clk (clk_nir3),
.en_l (fcl_fdp_thr_s1_l[3]),
.tm_l (~se));
`endif
`ifdef FPGA_SYN_CLK_DFF
dffe #(33) t3nir_reg(.din (icd_fdp_topdata_s1[32:0]),
.q (t3nir),
.en (~(fcl_fdp_thr_s1_l[3])), .clk (rclk), .se(se), .si(), .so());
`else
dff #(33) t3nir_reg(.din (icd_fdp_topdata_s1[32:0]),
.q (t3nir),
.clk (clk_nir3), .se(se), .si(), .so());
`endif
// Next thread NIR mux (nir output mux)
dp_mux4ds #(33) nextnir_mux(.dout (nirdata_s1),
.in0 (t0nir),
.in1 (t1nir),
.in2 (t2nir),
.in3 (t3nir),
.sel0_l (fcl_fdp_nirthr_s1_l[0]),
.sel1_l (fcl_fdp_nirthr_s1_l[1]),
.sel2_l (fcl_fdp_nirthr_s1_l[2]),
.sel3_l (fcl_fdp_nirthr_s1_l[3]));
`endif
// TBD in fetched instruction DP:
// 1. Rollback -- DONE
// 2. Icache parity check (increase fet data and top data to 34 bits)
endmodule // sparc_ifu_fdp