// ========== Copyright Header Begin ========================================== // // OpenSPARC T1 Processor File: sparc_ifu_fdp.v // Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. // // The above named program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public // License version 2 as published by the Free Software Foundation. // // The above named program is distributed in the hope that it will be // useful, but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public // License along with this work; if not, write to the Free Software // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. // // ========== Copyright Header End ============================================ //////////////////////////////////////////////////////////////////////// /* // Module Name: sparc_ifu_fdp // Description: // The fdp contains the pc's for all four threads and the PC and // nPC for all pipestages register. The fetcher also contains two // adders for doing PC + br_offset and PC + 4. // The fdp also holds the last fetched icache data for each thread // and the next instruction register, which has the top half of the // double instruction bundle which is fetched from the icache. */ //////////////////////////////////////////////////////////////////////// // Local header file includes / local defines //////////////////////////////////////////////////////////////////////// `include "ifu.h" `define NOP 32'h01000000 `define PO_RESET_PC 48'hfffff0000020 `define VER_MANUF 16'h003e `define VER_IMPL 16'h0023 `define VER_MAXGL 8'h03 `define VER_MAXWIN 8'h07 `define VER_MAXTL 8'h06 //`define VER_MAXTL {5'b0, fcl_fdp_hprivmode_e, 2'b10} //`define VER_IMPL_MASK 24'h002301 //`define VERSION_REG_HPV {`VER_MANUF, `VER_IMPL_MASK, `VER_MAXGL, 5'b0, fcl_fdp_hprivmode_e, 2'b10, `VER_MAXWIN} //`define VERSION_REG {`VER_MANUF, `VER_IMPL_MASK, `VER_MAXGL, 8'h06, `VER_MAXWIN} //FPGA_SYN enables all FPGA related modifications `ifdef FPGA_SYN `define FPGA_SYN_CLK_EN `define FPGA_SYN_CLK_DFF `endif module sparc_ifu_fdp(/*AUTOARG*/ // Outputs so, fdp_itlb_ctxt_bf, fdp_icd_vaddr_bf, fdp_icv_index_bf, fdp_erb_pc_f, fdp_dtu_inst_s, ifu_exu_pc_d, ifu_exu_rs1_s, ifu_exu_rs2_s, ifu_exu_rs3_s, ifu_tlu_pc_m, ifu_tlu_npc_m, ifu_tlu_pc_oor_e, ifu_exu_pcver_e, fdp_fcl_swc_s2, fdp_fcl_pc_oor_vec_f, fdp_fcl_pc_oor_e, fdp_fcl_op_s, fdp_fcl_op3_s, fdp_fcl_ibit_s, // Inputs rclk, se, si, const_maskid, lsu_t0_pctxt_state, lsu_t1_pctxt_state, lsu_t2_pctxt_state, lsu_t3_pctxt_state, exu_ifu_brpc_e, tlu_ifu_trappc_w2, tlu_ifu_trapnpc_w2, tlu_itlb_dmp_nctxt_g, tlu_itlb_dmp_actxt_g, tlu_itlb_tte_tag_w2, dtu_fdp_thrconf_e, icd_fdp_fetdata_s1, icd_fdp_topdata_s1, ifq_fdp_fill_inst, fcl_fdp_oddwin_s, fcl_fdp_pcoor_vec_f, fcl_fdp_pcoor_f, fcl_fdp_mask32b_f, fcl_fdp_addr_mask_d, fcl_fdp_tctxt_sel_prim, fcl_fdp_usenir_sel_nir_s1, fcl_fdp_rbinst_sel_inste_s, fcl_fdp_thrtnpc_sel_tnpc_l, fcl_fdp_thrtnpc_sel_npcw_l, fcl_fdp_thrtnpc_sel_pcf_l, fcl_fdp_thrtnpc_sel_old_l, fcl_fdp_thr_s1_l, fcl_fdp_next_thr_bf_l, fcl_fdp_next_ctxt_bf_l, fcl_fdp_thr_s2_l, fcl_fdp_nirthr_s1_l, fcl_fdp_tpcbf_sel_pcp4_bf_l, fcl_fdp_tpcbf_sel_brpc_bf_l, fcl_fdp_tpcbf_sel_trap_bf_l, fcl_fdp_tpcbf_sel_old_bf_l, fcl_fdp_pcbf_sel_swpc_bf_l, fcl_fdp_pcbf_sel_nosw_bf_l, fcl_fdp_pcbf_sel_br_bf_l, fcl_fdp_trrbpc_sel_trap_bf_l, fcl_fdp_trrbpc_sel_rb_bf_l, fcl_fdp_trrbpc_sel_err_bf_l, fcl_fdp_trrbpc_sel_pcs_bf_l, fcl_fdp_noswpc_sel_tnpc_l_bf, fcl_fdp_noswpc_sel_old_l_bf, fcl_fdp_noswpc_sel_inc_l_bf, fcl_fdp_nextpcs_sel_pce_f_l, fcl_fdp_nextpcs_sel_pcd_f_l, fcl_fdp_nextpcs_sel_pcs_f_l, fcl_fdp_nextpcs_sel_pcf_f_l, fcl_fdp_rdsr_sel_pc_e_l, fcl_fdp_rdsr_sel_ver_e_l, fcl_fdp_rdsr_sel_thr_e_l, fcl_fdp_inst_sel_curr_s_l, fcl_fdp_inst_sel_switch_s_l, fcl_fdp_inst_sel_nir_s_l, fcl_fdp_inst_sel_nop_s_l, fcl_fdp_tinst_sel_curr_s_l, fcl_fdp_tinst_sel_rb_s_l, fcl_fdp_tinst_sel_old_s_l, fcl_fdp_tinst_sel_ifq_s_l, fcl_fdp_dmpthr_l, fcl_fdp_ctxt_sel_dmp_bf_l, fcl_fdp_ctxt_sel_sw_bf_l, fcl_fdp_ctxt_sel_curr_bf_l ); input rclk, se, si; input [7:0] const_maskid; input [12:0] lsu_t0_pctxt_state, // primary context lsu_t1_pctxt_state, lsu_t2_pctxt_state, lsu_t3_pctxt_state; // input exu_ifu_va_oor_e; input [47:0] exu_ifu_brpc_e; // br address for dir branch input [48:0] tlu_ifu_trappc_w2, // trap/exception PC tlu_ifu_trapnpc_w2; // next trap PC input tlu_itlb_dmp_nctxt_g, tlu_itlb_dmp_actxt_g; input [12:0] tlu_itlb_tte_tag_w2; // input [`IC_IDX_HI:4] ifq_fdp_icindex_bf; // index + 1 bit for 16B write input [40:0] dtu_fdp_thrconf_e; input [32:0] icd_fdp_fetdata_s1, // 4 inst + 4 sw bits icd_fdp_topdata_s1; // next instruction input [32:0] ifq_fdp_fill_inst; // icache miss return input fcl_fdp_oddwin_s; input [3:0] fcl_fdp_pcoor_vec_f; input fcl_fdp_pcoor_f; input fcl_fdp_mask32b_f; input fcl_fdp_addr_mask_d; input [3:0] fcl_fdp_tctxt_sel_prim; // 2:1 mux selects input fcl_fdp_usenir_sel_nir_s1; // same as usenir_d2 input [3:0] fcl_fdp_rbinst_sel_inste_s; // rollback 1 or 2 input [3:0] fcl_fdp_thrtnpc_sel_tnpc_l, // load npc fcl_fdp_thrtnpc_sel_npcw_l, fcl_fdp_thrtnpc_sel_pcf_l, fcl_fdp_thrtnpc_sel_old_l; input [3:0] fcl_fdp_thr_s1_l; // s2 thr (64*5 muxes) // other mux selects input [3:0] fcl_fdp_next_thr_bf_l; // for thrpc output mux input [3:0] fcl_fdp_next_ctxt_bf_l; // for ctxt output mux input [3:0] fcl_fdp_thr_s2_l; // s2 thr (64*5 muxes) input [3:0] fcl_fdp_nirthr_s1_l; // same as thr_s1, but protected input [3:0] fcl_fdp_tpcbf_sel_pcp4_bf_l, // selects for thread PC muxes fcl_fdp_tpcbf_sel_brpc_bf_l, fcl_fdp_tpcbf_sel_trap_bf_l, fcl_fdp_tpcbf_sel_old_bf_l; input fcl_fdp_pcbf_sel_swpc_bf_l, fcl_fdp_pcbf_sel_nosw_bf_l, fcl_fdp_pcbf_sel_br_bf_l; input [3:0] fcl_fdp_trrbpc_sel_trap_bf_l, fcl_fdp_trrbpc_sel_rb_bf_l, fcl_fdp_trrbpc_sel_err_bf_l, fcl_fdp_trrbpc_sel_pcs_bf_l; input fcl_fdp_noswpc_sel_tnpc_l_bf, // next pc select from trap, fcl_fdp_noswpc_sel_old_l_bf, fcl_fdp_noswpc_sel_inc_l_bf; input [3:0] fcl_fdp_nextpcs_sel_pce_f_l, fcl_fdp_nextpcs_sel_pcd_f_l, fcl_fdp_nextpcs_sel_pcs_f_l, fcl_fdp_nextpcs_sel_pcf_f_l; input fcl_fdp_rdsr_sel_pc_e_l, fcl_fdp_rdsr_sel_ver_e_l, fcl_fdp_rdsr_sel_thr_e_l; input fcl_fdp_inst_sel_curr_s_l, // selects for inst_s2 fcl_fdp_inst_sel_switch_s_l, fcl_fdp_inst_sel_nir_s_l, fcl_fdp_inst_sel_nop_s_l; input [3:0] fcl_fdp_tinst_sel_curr_s_l, // selects for tinst regs fcl_fdp_tinst_sel_rb_s_l, fcl_fdp_tinst_sel_old_s_l, fcl_fdp_tinst_sel_ifq_s_l; input [3:0] fcl_fdp_dmpthr_l; input fcl_fdp_ctxt_sel_dmp_bf_l, fcl_fdp_ctxt_sel_sw_bf_l, fcl_fdp_ctxt_sel_curr_bf_l; output so; output [12:0] fdp_itlb_ctxt_bf; output [47:2] fdp_icd_vaddr_bf; // 11:2 is index to ic output [11:5] fdp_icv_index_bf; output [47:0] fdp_erb_pc_f; output [31:0] fdp_dtu_inst_s; // 32b inst + switch bit output [47:0] ifu_exu_pc_d; // PC for rel branch output [4:0] ifu_exu_rs1_s, // reg file read address ifu_exu_rs2_s, ifu_exu_rs3_s; output [48:0] ifu_tlu_pc_m, ifu_tlu_npc_m; output ifu_tlu_pc_oor_e; output [63:0] ifu_exu_pcver_e; // PCs to different dests. output fdp_fcl_swc_s2; // tells whether to switch or not output [3:0] fdp_fcl_pc_oor_vec_f; // PC va hole check output fdp_fcl_pc_oor_e; output [1:0] fdp_fcl_op_s; output [5:2] fdp_fcl_op3_s; output fdp_fcl_ibit_s; //---------------------------------------------------------------------- // Declarations //---------------------------------------------------------------------- // local signals // Contexts wire [12:0] curr_ctxt, sw_ctxt, dmp_ctxt, dmp_ctxt_unq, dmp_ctxt1, dmp_ctxt2, t0_ctxt_bf, t1_ctxt_bf, t2_ctxt_bf, t3_ctxt_bf; // PCs wire [48:0] t0pc_f, t1pc_f, t2pc_f, t3pc_f, // F stage thread PC t0pc_s, t1pc_s, t2pc_s, t3pc_s, // S stage thr pc t0_next_pcs_f, t1_next_pcs_f, t2_next_pcs_f, t3_next_pcs_f, t0npc_bf, t1npc_bf, t2npc_bf, t3npc_bf, // Next PC in // BF stage pc_s, pc_d, pc_e, pc_m, pc_w, npc_s, npc_d, npc_e, npc_m, npc_w, pc_d_adj, npc_d_adj; wire [47:0] pc_bf, swpc_bf, // PC of next thread if not branch pc_f; wire [48:0] nextpc_nosw_bf, // next pc if no switch am_mask; // trap PCs and rollback PCs wire [48:0] t0_trap_rb_pc_bf, t1_trap_rb_pc_bf, t2_trap_rb_pc_bf, t3_trap_rb_pc_bf; wire [48:0] thr_trappc_bf, t0_trapnpc_f, t1_trapnpc_f, t2_trapnpc_f, t3_trapnpc_f, trapnpc0_bf, trapnpc1_bf, trapnpc2_bf, trapnpc3_bf; // Branch PCs wire [48:0] pcinc_f; // incr output // Instruction Words wire [32:0] inst_s2, // instruction to switch to in S fdp_inst_s, // instruction to be sent to D t0inst_s1, // input to thr inst reg in S t1inst_s1, t2inst_s1, t3inst_s1, t0inst_s2, // thr inst reg output t1inst_s2, t2inst_s2, t3inst_s2; wire [32:0] inst_s1; // fetched instruction in S wire [32:0] inst_s1_bf1; // buf version of inst_s1 wire [32:0] rb_inst0_s, // instruction to rollback to rb_inst1_s, // instruction to rollback to rb_inst2_s, // instruction to rollback to rb_inst3_s, // instruction to rollback to inst_d, // rollback 1 inst_e; // rollback 2 // Next instruction word wire [32:0] nirdata_s1, // next inst reg contents t0nir, // thread NIR reg output t1nir, t2nir, t3nir; wire clk; // // Code start here // assign clk = rclk; //---------------------------------------------------------------------- // Context Reg //---------------------------------------------------------------------- assign t0_ctxt_bf = lsu_t0_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[0]}}; `ifdef FPGA_SYN_1THREAD assign sw_ctxt = t0_ctxt_bf; assign curr_ctxt = t0_ctxt_bf; assign dmp_ctxt_unq = lsu_t0_pctxt_state; `else assign t1_ctxt_bf = lsu_t1_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[1]}}; assign t2_ctxt_bf = lsu_t2_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[2]}}; assign t3_ctxt_bf = lsu_t3_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[3]}}; dp_mux4ds #(13) sw_ctxt_mux(.dout (sw_ctxt), .in0 (t0_ctxt_bf), .in1 (t1_ctxt_bf), .in2 (t2_ctxt_bf), .in3 (t3_ctxt_bf), .sel0_l (fcl_fdp_next_ctxt_bf_l[0]), .sel1_l (fcl_fdp_next_ctxt_bf_l[1]), .sel2_l (fcl_fdp_next_ctxt_bf_l[2]), .sel3_l (fcl_fdp_next_ctxt_bf_l[3])); dp_mux4ds #(13) curr_ctxt_mux(.dout (curr_ctxt), .in0 (t0_ctxt_bf), .in1 (t1_ctxt_bf), .in2 (t2_ctxt_bf), .in3 (t3_ctxt_bf), .sel0_l (fcl_fdp_thr_s2_l[0]), .sel1_l (fcl_fdp_thr_s2_l[1]), .sel2_l (fcl_fdp_thr_s2_l[2]), .sel3_l (fcl_fdp_thr_s2_l[3])); dp_mux4ds #(13) dmp_ctxt_mux(.dout (dmp_ctxt_unq), .in0 (lsu_t0_pctxt_state), .in1 (lsu_t1_pctxt_state), .in2 (lsu_t2_pctxt_state), .in3 (lsu_t3_pctxt_state), .sel0_l (fcl_fdp_dmpthr_l[0]), .sel1_l (fcl_fdp_dmpthr_l[1]), .sel2_l (fcl_fdp_dmpthr_l[2]), .sel3_l (fcl_fdp_dmpthr_l[3])); `endif // !`ifdef FPGA_SYN_1THREAD assign dmp_ctxt1 = dmp_ctxt_unq & {13{~(tlu_itlb_dmp_nctxt_g | tlu_itlb_dmp_actxt_g)}}; //`ifdef SPARC_HPV_EN assign dmp_ctxt2 = {tlu_itlb_tte_tag_w2[12:7],tlu_itlb_tte_tag_w2[6:0]} & {13{tlu_itlb_dmp_actxt_g}}; //`else // assign dmp_ctxt2 = {tlu_itlb_tte_tag_w2[13:8],tlu_itlb_tte_tag_w2[6:0]} & // {13{tlu_itlb_dmp_actxt_g}}; //`endif assign dmp_ctxt = dmp_ctxt1 | dmp_ctxt2; dp_mux3ds #(13) ctxt_mux (.dout (fdp_itlb_ctxt_bf), .in0 (curr_ctxt), .in1 (sw_ctxt), .in2 (dmp_ctxt), .sel0_l (fcl_fdp_ctxt_sel_curr_bf_l), .sel1_l (fcl_fdp_ctxt_sel_sw_bf_l), .sel2_l (fcl_fdp_ctxt_sel_dmp_bf_l)); // ---------------------------------------------------------------------- // PC datapath // ---------------------------------------------------------------------- // pc/thr to exu for rdsr instruction // this is the only 64 bit cell in the IFU dp_mux3ds #(64) ver_mux(.dout (ifu_exu_pcver_e[63:0]), .in0 ({{16{pc_e[47]}}, pc_e[47:0]}), .in1 ({`VER_MANUF, `VER_IMPL, const_maskid[7:0], `VER_MAXGL, `VER_MAXTL, `VER_MAXWIN}), .in2 ({12'b0, dtu_fdp_thrconf_e[40:29], 4'b0, dtu_fdp_thrconf_e[28:9], 2'b0, dtu_fdp_thrconf_e[8:3], 5'b0, dtu_fdp_thrconf_e[2:0]}), .sel0_l (fcl_fdp_rdsr_sel_pc_e_l), .sel1_l (fcl_fdp_rdsr_sel_ver_e_l), .sel2_l (fcl_fdp_rdsr_sel_thr_e_l)); // Select the next thread pc (for F stage) dp_mux4ds #(49) t0_pcbf_mux(.dout (t0npc_bf), .in0 ({fcl_fdp_pcoor_vec_f[0], t0pc_f[47:0]}), .in1 (nextpc_nosw_bf), .in2 (t0_trap_rb_pc_bf), .in3 ({1'b0, exu_ifu_brpc_e}), .sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[0]), .sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[0]), .sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[0]), .sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[0])); `ifdef FPGA_SYN_1THREAD `else dp_mux4ds #(49) t1_pcbf_mux(.dout (t1npc_bf), .in0 ({fcl_fdp_pcoor_vec_f[1], t1pc_f[47:0]}), .in1 (nextpc_nosw_bf), .in2 (t1_trap_rb_pc_bf), .in3 ({1'b0, exu_ifu_brpc_e}), .sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[1]), .sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[1]), .sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[1]), .sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[1])); dp_mux4ds #(49) t2_pcbf_mux(.dout (t2npc_bf), .in0 ({fcl_fdp_pcoor_vec_f[2], t2pc_f[47:0]}), .in1 (nextpc_nosw_bf), .in2 (t2_trap_rb_pc_bf), .in3 ({1'b0, exu_ifu_brpc_e}), .sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[2]), .sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[2]), .sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[2]), .sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[2])); dp_mux4ds #(49) t3_pcbf_mux(.dout (t3npc_bf), .in0 ({fcl_fdp_pcoor_vec_f[3], t3pc_f[47:0]}), .in1 (nextpc_nosw_bf), .in2 (t3_trap_rb_pc_bf), .in3 ({1'b0, exu_ifu_brpc_e}), .sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[3]), .sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[3]), .sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[3]), .sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[3])); `endif // F stage thread PC regs; use low power thr flop dff_s #(49) t0_pcf_reg(.din (t0npc_bf), .clk (clk), .q (t0pc_f), .se (se), .si(), .so()); `ifdef FPGA_SYN_1THREAD assign fdp_fcl_pc_oor_vec_f = {3'b0, t0pc_f[48]}; assign swpc_bf = t0pc_f[47:0]; `else dff_s #(49) t1_pcf_reg(.din (t1npc_bf), .clk (clk), .q (t1pc_f), .se (se), .si(), .so()); dff_s #(49) t2_pcf_reg(.din (t2npc_bf), .clk (clk), .q (t2pc_f), .se (se), .si(), .so()); dff_s #(49) t3_pcf_reg(.din (t3npc_bf), .clk (clk), .q (t3pc_f), .se (se), .si(), .so()); assign fdp_fcl_pc_oor_vec_f = {t3pc_f[48], t2pc_f[48], t1pc_f[48], t0pc_f[48]}; // select the pc to be used on a switch -- need to protect dp_mux4ds #(48) swpc_mux(.dout (swpc_bf), .in0 (t0pc_f[47:0]), .in1 (t1pc_f[47:0]), .in2 (t2pc_f[47:0]), .in3 (t3pc_f[47:0]), .sel0_l (fcl_fdp_next_thr_bf_l[0]), .sel1_l (fcl_fdp_next_thr_bf_l[1]), .sel2_l (fcl_fdp_next_thr_bf_l[2]), .sel3_l (fcl_fdp_next_thr_bf_l[3])); `endif // choose between I$ write address and read address // need mux only for lower 11 bits (2+3 + ICINDEX_SIZE) // dp_mux2es #(48) ifqfdp_mux(.dout (icaddr_nosw_bf[47:0]), // .in0 (nextpc_nosw_bf[47:0]), // .in1 ({{37{1'b0}}, ifq_fdp_icindex_bf, 4'b0}), // .sel (fcl_fdp_ifqfdp_sel_ifq_bf)); // 1=ifq // implements switch and branch // can we cut this down to 11 bits? No! tlb needs all 48 // dp_mux4ds #(48) nxt_icaddr_mux(.dout (icaddr_bf), // .in0 (swpc_bf[47:0]), // .in1 (nextpc_nosw_bf[47:0]), // .in2 ({8'b0, {`IC_TAG_SZ{1'b0}}, // ifq_fdp_icindex_bf, 4'b0}), // .in3 (exu_ifu_brpc_e[47:0]), // .sel0_l (fcl_fdp_icaddr_sel_swpc_bf_l), // .sel1_l (fcl_fdp_icaddr_sel_curr_bf_l), // .sel2_l (fcl_fdp_icaddr_sel_ifq_bf_l), // .sel3_l (fcl_fdp_icaddr_sel_br_bf_l)); // assign fdp_icd_vaddr_bf = icaddr_bf[47:0]; // this goes to the itlb, icd and ict on top of fdp // this is !!very critical!! assign fdp_icd_vaddr_bf = pc_bf[47:2]; // create separate output for the icv to the left assign fdp_icv_index_bf = pc_bf[11:5]; // Place this mux as close to the top (itlb) as possible dp_mux3ds #(48) pcbf_mux(.dout (pc_bf[47:0]), .in0 (swpc_bf[47:0]), .in1 (nextpc_nosw_bf[47:0]), .in2 (exu_ifu_brpc_e[47:0]), .sel0_l (fcl_fdp_pcbf_sel_swpc_bf_l), .sel1_l (fcl_fdp_pcbf_sel_nosw_bf_l), .sel2_l (fcl_fdp_pcbf_sel_br_bf_l)); dff_s #(48) pcf_reg(.din (pc_bf), .clk (clk), .q (pc_f), .se (se), .si(), .so()); assign fdp_erb_pc_f = pc_f[47:0]; // trappc mux (choose trap pc vs rollback/uTrap pc) dp_mux4ds #(49) trap_pc0_mux(.dout (t0_trap_rb_pc_bf), .in0 (tlu_ifu_trappc_w2), .in1 (pc_d_adj), .in2 (t0pc_s), .in3 (pc_w), .sel0_l (fcl_fdp_trrbpc_sel_trap_bf_l[0]), .sel1_l (fcl_fdp_trrbpc_sel_rb_bf_l[0]), .sel2_l (fcl_fdp_trrbpc_sel_pcs_bf_l[0]), .sel3_l (fcl_fdp_trrbpc_sel_err_bf_l[0])); `ifdef FPGA_SYN_1THREAD `else dp_mux4ds #(49) trap_pc1_mux(.dout (t1_trap_rb_pc_bf), .in0 (tlu_ifu_trappc_w2), .in1 (pc_d_adj), .in2 (t1pc_s), .in3 (pc_w), .sel0_l (fcl_fdp_trrbpc_sel_trap_bf_l[1]), .sel1_l (fcl_fdp_trrbpc_sel_rb_bf_l[1]), .sel2_l (fcl_fdp_trrbpc_sel_pcs_bf_l[1]), .sel3_l (fcl_fdp_trrbpc_sel_err_bf_l[1])); dp_mux4ds #(49) trap_pc2_mux(.dout (t2_trap_rb_pc_bf), .in0 (tlu_ifu_trappc_w2), .in1 (pc_d_adj), .in2 (t2pc_s), .in3 (pc_w), .sel0_l (fcl_fdp_trrbpc_sel_trap_bf_l[2]), .sel1_l (fcl_fdp_trrbpc_sel_rb_bf_l[2]), .sel2_l (fcl_fdp_trrbpc_sel_pcs_bf_l[2]), .sel3_l (fcl_fdp_trrbpc_sel_err_bf_l[2])); dp_mux4ds #(49) trap_pc3_mux(.dout (t3_trap_rb_pc_bf), .in0 (tlu_ifu_trappc_w2), .in1 (pc_d_adj), .in2 (t3pc_s), .in3 (pc_w), .sel0_l (fcl_fdp_trrbpc_sel_trap_bf_l[3]), .sel1_l (fcl_fdp_trrbpc_sel_rb_bf_l[3]), .sel2_l (fcl_fdp_trrbpc_sel_pcs_bf_l[3]), .sel3_l (fcl_fdp_trrbpc_sel_err_bf_l[3])); `endif // can reduce this to a 2:1 mux since reset pc is not used any more and // pc_f is not needed. dp_mux3ds #(49) pcp4_mux(.dout (nextpc_nosw_bf), .in0 (pcinc_f), .in1 (thr_trappc_bf), .in2 ({fcl_fdp_pcoor_f, pc_f[47:0]}), .sel0_l (fcl_fdp_noswpc_sel_inc_l_bf), .sel1_l (fcl_fdp_noswpc_sel_tnpc_l_bf), .sel2_l (fcl_fdp_noswpc_sel_old_l_bf)); // next S stage thread pc mux per thread // Use advtpcs signal which works for stall (Aug '01) // Merged pc_e/pc_d into the eqn to allow for rollback dp_mux4ds #(49) t0pcf_mux(.dout (t0_next_pcs_f), .in0 (t0pc_s), .in1 ({fcl_fdp_pcoor_vec_f[0], t0pc_f[47:0]}), .in2 (pc_d_adj), .in3 (pc_e), .sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[0]), .sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[0]), .sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[0]), .sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[0])); `ifdef FPGA_SYN_1THREAD `else dp_mux4ds #(49) t1pcf_mux(.dout (t1_next_pcs_f), .in0 (t1pc_s), .in1 ({fcl_fdp_pcoor_vec_f[1], t1pc_f[47:0]}), .in2 (pc_d_adj), .in3 (pc_e), .sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[1]), .sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[1]), .sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[1]), .sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[1])); dp_mux4ds #(49) t2pcf_mux(.dout (t2_next_pcs_f), .in0 (t2pc_s), .in1 ({fcl_fdp_pcoor_vec_f[2], t2pc_f[47:0]}), // .in1 ({fcl_fdp_pcoor_f, pc_f[47:0]}), .in2 (pc_d_adj), .in3 (pc_e), .sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[2]), .sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[2]), .sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[2]), .sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[2])); dp_mux4ds #(49) t3pcf_mux(.dout (t3_next_pcs_f), .in0 (t3pc_s), .in1 ({fcl_fdp_pcoor_vec_f[3], t3pc_f[47:0]}), // .in1 ({fcl_fdp_pcoor_f, pc_f[47:0]}), .in2 (pc_d_adj), .in3 (pc_e), .sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[3]), .sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[3]), .sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[3]), .sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[3])); `endif // S stage thread PC regs; use low power thr flop dff_s #(49) t0pcs_reg(.din (t0_next_pcs_f), .q (t0pc_s), .clk (clk), .se(se), .si(), .so()); `ifdef FPGA_SYN_1THREAD assign pc_s = t0pc_s; assign npc_s = t0_next_pcs_f; `else dff_s #(49) t1pcs_reg(.din (t1_next_pcs_f), .q (t1pc_s), .clk (clk), .se(se), .si(), .so()); dff_s #(49) t2pcs_reg(.din (t2_next_pcs_f), .q (t2pc_s), .clk (clk), .se(se), .si(), .so()); dff_s #(49) t3pcs_reg(.din (t3_next_pcs_f), .q (t3pc_s), .clk (clk), .se(se), .si(), .so()); // S stage PC mux -- need to protect dp_mux4ds #(49) pcs_mux(.dout (pc_s), .in0 (t0pc_s), .in1 (t1pc_s), .in2 (t2pc_s), .in3 (t3pc_s), .sel0_l (fcl_fdp_thr_s2_l[0]), .sel1_l (fcl_fdp_thr_s2_l[1]), .sel2_l (fcl_fdp_thr_s2_l[2]), .sel3_l (fcl_fdp_thr_s2_l[3])); // S stage next PC mux -- need to protect dp_mux4ds #(49) npcs_mux(.dout (npc_s), .in0 (t0_next_pcs_f), .in1 (t1_next_pcs_f), .in2 (t2_next_pcs_f), .in3 (t3_next_pcs_f), .sel0_l (fcl_fdp_thr_s2_l[0]), .sel1_l (fcl_fdp_thr_s2_l[1]), .sel2_l (fcl_fdp_thr_s2_l[2]), .sel3_l (fcl_fdp_thr_s2_l[3])); `endif // D stage PC and nPC dff_s #(49) pcd_reg(.din (pc_s), .q (pc_d), .clk (clk), .se(se), .si(), .so()); dff_s #(49) npcd_reg(.din (npc_s), .q (npc_d), .clk (clk), .se(se), .si(), .so()); assign am_mask = {{17{~fcl_fdp_addr_mask_d}}, 32'hffffffff}; // nand2 assign pc_d_adj = pc_d & am_mask; assign npc_d_adj = npc_d & am_mask; assign ifu_exu_pc_d = pc_d_adj[47:0]; // E stage PC and nPC dff_s #(49) pce_reg(.din (pc_d_adj), .q (pc_e), .clk (clk), .se(se), .si(), .so()); dff_s #(49) npce_reg(.din (npc_d_adj), .q (npc_e), .clk (clk), .se(se), .si(), .so()); assign fdp_fcl_pc_oor_e = pc_e[48]; assign ifu_tlu_pc_oor_e = pc_e[48]; // M stage PC and nPC dff_s #(49) pcm_reg(.din (pc_e), .q (pc_m), .clk (clk), .se(se), .si(), .so()); dff_s #(49) npcm_reg(.din (npc_e), .q (npc_m), .clk (clk), .se(se), .si(), .so()); assign ifu_tlu_pc_m = pc_m[48:0]; assign ifu_tlu_npc_m = npc_m[48:0]; // W stage PC and nPC dff_s #(49) pcw_reg(.din (pc_m), .q (pc_w), .clk (clk), .se(se), .si(), .so()); dff_s #(49) npcw_reg(.din (npc_m), .q (npc_w), .clk (clk), .se(se), .si(), .so()); // assign ifu_tlu_pc_w = pc_w; // assign ifu_tlu_npc_w = npc_w; // PC incrementer // can we fit the ofl logic on the side of the incrementer? assign pcinc_f[1:0] = pc_f[1:0]; sparc_ifu_incr46 pc_inc(.a (pc_f[47:2]), .a_inc (pcinc_f[47:2]), .ofl ()); // ofl output not needed // assign pcinc_f[48] = inc_ofl & ~fcl_fdp_mask32b_f | fcl_fdp_pcoor_f; assign pcinc_f[48] = ~pc_f[47] & pcinc_f[47] & ~fcl_fdp_mask32b_f | fcl_fdp_pcoor_f; // Enable for thr trapnpc reg dp_mux4ds #(49) t0tnpc_mux(.dout (trapnpc0_bf), .in0 (tlu_ifu_trapnpc_w2), .in1 (npc_w), .in2 (t0pc_f), .in3 (t0_trapnpc_f), .sel0_l (fcl_fdp_thrtnpc_sel_tnpc_l[0]), .sel1_l (fcl_fdp_thrtnpc_sel_npcw_l[0]), .sel2_l (fcl_fdp_thrtnpc_sel_pcf_l[0]), .sel3_l (fcl_fdp_thrtnpc_sel_old_l[0])); `ifdef FPGA_SYN_1THREAD `else dp_mux4ds #(49) t1tnpc_mux(.dout (trapnpc1_bf), .in0 (tlu_ifu_trapnpc_w2), .in1 (npc_w), .in2 (t1pc_f), .in3 (t1_trapnpc_f), .sel0_l (fcl_fdp_thrtnpc_sel_tnpc_l[1]), .sel1_l (fcl_fdp_thrtnpc_sel_npcw_l[1]), .sel2_l (fcl_fdp_thrtnpc_sel_pcf_l[1]), .sel3_l (fcl_fdp_thrtnpc_sel_old_l[1])); dp_mux4ds #(49) t2tnpc_mux(.dout (trapnpc2_bf), .in0 (tlu_ifu_trapnpc_w2), .in1 (npc_w), .in2 (t2pc_f), .in3 (t2_trapnpc_f), .sel0_l (fcl_fdp_thrtnpc_sel_tnpc_l[2]), .sel1_l (fcl_fdp_thrtnpc_sel_npcw_l[2]), .sel2_l (fcl_fdp_thrtnpc_sel_pcf_l[2]), .sel3_l (fcl_fdp_thrtnpc_sel_old_l[2])); dp_mux4ds #(49) t3tnpc_mux(.dout (trapnpc3_bf), .in0 (tlu_ifu_trapnpc_w2), .in1 (npc_w), .in2 (t3pc_f), .in3 (t3_trapnpc_f), .sel0_l (fcl_fdp_thrtnpc_sel_tnpc_l[3]), .sel1_l (fcl_fdp_thrtnpc_sel_npcw_l[3]), .sel2_l (fcl_fdp_thrtnpc_sel_pcf_l[3]), .sel3_l (fcl_fdp_thrtnpc_sel_old_l[3])); `endif // thread next trap pc reg dff_s #(49) t0tnpcf_reg(.din (trapnpc0_bf), .q (t0_trapnpc_f), .clk (clk), .se(se), .si(), .so()); `ifdef FPGA_SYN_1THREAD assign thr_trappc_bf = t0_trapnpc_f; `else dff_s #(49) t1tnpcf_reg(.din (trapnpc1_bf), .q (t1_trapnpc_f), .clk (clk), .se(se), .si(), .so()); dff_s #(49) t2tnpcf_reg(.din (trapnpc2_bf), .q (t2_trapnpc_f), .clk (clk), .se(se), .si(), .so()); dff_s #(49) t3tnpcf_reg(.din (trapnpc3_bf), .q (t3_trapnpc_f), .clk (clk), .se(se), .si(), .so()); dp_mux4ds #(49) nxttpc_mux(.dout (thr_trappc_bf), .in0 (t0_trapnpc_f), .in1 (t1_trapnpc_f), .in2 (t2_trapnpc_f), .in3 (t3_trapnpc_f), .sel0_l (fcl_fdp_thr_s2_l[0]), // thr_s2 = thr_f .sel1_l (fcl_fdp_thr_s2_l[1]), .sel2_l (fcl_fdp_thr_s2_l[2]), .sel3_l (fcl_fdp_thr_s2_l[3])); `endif // During rst nextpc_nosw_bf = PO_RESET_PC. All thread PC_f registers, // the icaddr_f register and the nextpc register should be loaded // with nextpc_nosw_bf during reset. // Eventually, we will load the reset_pc from the trap logic unit, // which will arrive on the trap_pc bus. // TBD in PC datapath: // 1. Add useNIR bit to PCs -- DONE // 2. Add support for ifq request grant -- DONE // 3. Generate icache read signal (from fcl?) -- DONE // 4. Rollback functionality -- DONE // 5. PC range checks -- DONE // 6. Change PC to 48 bit value -- DONE //---------------------------------------------------------------------- // Fetched Instruction Datapath //---------------------------------------------------------------------- // This is logically 33 bits wide. The NIR and IR datapaths are laid // side by side, making this a 66bit datapath. The NIR path is // potentially a little longer. // choose between NIR data and fetched data dp_mux2es #(33) usenir_mux(.dout (inst_s1), .in0 (icd_fdp_fetdata_s1[32:0]), .in1 (nirdata_s1), .sel (fcl_fdp_usenir_sel_nir_s1)); // 1=nir // Instruction Output Mux // CHANGE: now 4:1 dp_mux4ds #(33) instout_mux(.dout (fdp_inst_s), .in0 (icd_fdp_fetdata_s1[32:0]), .in1 (inst_s2), .in2 ({`NOP, 1'b0}), .in3 (nirdata_s1[32:0]), .sel0_l (fcl_fdp_inst_sel_curr_s_l), .sel1_l (fcl_fdp_inst_sel_switch_s_l), .sel2_l (fcl_fdp_inst_sel_nop_s_l), .sel3_l (fcl_fdp_inst_sel_nir_s_l)); assign fdp_fcl_swc_s2 = fdp_inst_s[0]; assign fdp_fcl_op_s = fdp_inst_s[32:31]; assign fdp_fcl_op3_s = fdp_inst_s[25:22]; assign fdp_fcl_ibit_s = fdp_inst_s[14]; assign fdp_dtu_inst_s = fdp_inst_s[32:1]; // CHANGE: Random logic to fix timing paths // output pin on RHS, as close to IRF as possible // 16x drivers // nand2-xor-invert assign ifu_exu_rs1_s[4] = fdp_inst_s[19] ^ (fdp_inst_s[18] & fcl_fdp_oddwin_s); assign ifu_exu_rs1_s[3:0] = fdp_inst_s[18:15]; assign ifu_exu_rs2_s[4] = (fdp_inst_s[5] ^ (fdp_inst_s[4] & fcl_fdp_oddwin_s)); assign ifu_exu_rs2_s[3:0] = fdp_inst_s[4:1]; assign ifu_exu_rs3_s[4] = (fdp_inst_s[30] ^ (fdp_inst_s[29] & fcl_fdp_oddwin_s)); assign ifu_exu_rs3_s[3:0] = fdp_inst_s[29:26]; dp_buffer #(33) insts1_buf(inst_s1_bf1, inst_s1[32:0]); // Thread instruction muxes dp_mux4ds #(33) t0inst_mux(.dout (t0inst_s1), .in0 (ifq_fdp_fill_inst), .in1 (inst_s1_bf1), .in2 (t0inst_s2), .in3 (rb_inst0_s), .sel0_l (fcl_fdp_tinst_sel_ifq_s_l[0]), .sel1_l (fcl_fdp_tinst_sel_curr_s_l[0]), .sel2_l (fcl_fdp_tinst_sel_old_s_l[0]), .sel3_l (fcl_fdp_tinst_sel_rb_s_l[0])); `ifdef FPGA_SYN_1THREAD `else dp_mux4ds #(33) t1inst_mux(.dout (t1inst_s1), .in0 (ifq_fdp_fill_inst), .in1 (inst_s1_bf1), .in2 (t1inst_s2), .in3 (rb_inst1_s), .sel0_l (fcl_fdp_tinst_sel_ifq_s_l[1]), .sel1_l (fcl_fdp_tinst_sel_curr_s_l[1]), .sel2_l (fcl_fdp_tinst_sel_old_s_l[1]), .sel3_l (fcl_fdp_tinst_sel_rb_s_l[1])); dp_mux4ds #(33) t2inst_mux(.dout (t2inst_s1), .in0 (ifq_fdp_fill_inst), .in1 (inst_s1_bf1), .in2 (t2inst_s2), .in3 (rb_inst2_s), .sel0_l (fcl_fdp_tinst_sel_ifq_s_l[2]), .sel1_l (fcl_fdp_tinst_sel_curr_s_l[2]), .sel2_l (fcl_fdp_tinst_sel_old_s_l[2]), .sel3_l (fcl_fdp_tinst_sel_rb_s_l[2])); dp_mux4ds #(33) t3inst_mux(.dout (t3inst_s1), .in0 (ifq_fdp_fill_inst), .in1 (inst_s1_bf1), .in2 (t3inst_s2), .in3 (rb_inst3_s), .sel0_l (fcl_fdp_tinst_sel_ifq_s_l[3]), .sel1_l (fcl_fdp_tinst_sel_curr_s_l[3]), .sel2_l (fcl_fdp_tinst_sel_old_s_l[3]), .sel3_l (fcl_fdp_tinst_sel_rb_s_l[3])); `endif // Thread Instruction Register dff_s #(33) t0_inst_reg(.din (t0inst_s1), .q (t0inst_s2), .clk (clk), .se(se), .si(), .so()); `ifdef FPGA_SYN_1THREAD assign inst_s2 = t0inst_s2; `else dff_s #(33) t1_inst_reg(.din (t1inst_s1), .q (t1inst_s2), .clk (clk), .se(se), .si(), .so()); dff_s #(33) t2_inst_reg(.din (t2inst_s1), .q (t2inst_s2), .clk (clk), .se(se), .si(), .so()); dff_s #(33) t3_inst_reg(.din (t3inst_s1), .q (t3inst_s2), .clk (clk), .se(se), .si(), .so()); // switch instruction mux -- choose the instruction to switch to // fcl keep track of which t*inst_s2 is valid dp_mux4ds #(33) swinst_mux(.dout (inst_s2), .in0 (t0inst_s2), .in1 (t1inst_s2), .in2 (t2inst_s2), .in3 (t3inst_s2), .sel0_l (fcl_fdp_thr_s2_l[0]), .sel1_l (fcl_fdp_thr_s2_l[1]), .sel2_l (fcl_fdp_thr_s2_l[2]), .sel3_l (fcl_fdp_thr_s2_l[3])); `endif // Rollback instruction dff_s #(33) rbinst_d_reg(.din (fdp_inst_s[32:0]), .q (inst_d), .clk (clk), .se (se), .si(), .so()); dff_s #(33) rbinst_e_reg(.din (inst_d), .q (inst_e), .clk (clk), .se (se), .si(), .so()); dp_mux2es #(33) rbinst0_mux(.dout (rb_inst0_s), .in0 (inst_d), .in1 (inst_e), .sel (fcl_fdp_rbinst_sel_inste_s[0])); `ifdef FPGA_SYN_1THREAD `else dp_mux2es #(33) rbinst1_mux(.dout (rb_inst1_s), .in0 (inst_d), .in1 (inst_e), .sel (fcl_fdp_rbinst_sel_inste_s[1])); dp_mux2es #(33) rbinst2_mux(.dout (rb_inst2_s), .in0 (inst_d), .in1 (inst_e), .sel (fcl_fdp_rbinst_sel_inste_s[2])); dp_mux2es #(33) rbinst3_mux(.dout (rb_inst3_s), .in0 (inst_d), .in1 (inst_e), .sel (fcl_fdp_rbinst_sel_inste_s[3])); `endif //---------------------------------------------------------------------- // Next Instruction Datapath //---------------------------------------------------------------------- // Thread next instruction muxes // dp_mux2es #(33) t0nir_mux(.dout (t0nir_in), // .in0 (icd_fdp_topdata_s1[32:0]), // .in1 (t0nir), // .sel (fcl_fdp_thr_s1_l[0])); // 0=new // dp_mux2es #(33) t1nir_mux(.dout (t1nir_in), // .in0 (icd_fdp_topdata_s1[32:0]), // .in1 (t1nir), // .sel (fcl_fdp_thr_s1_l[1])); // dp_mux2es #(33) t2nir_mux(.dout (t2nir_in), // .in0 (icd_fdp_topdata_s1[32:0]), // .in1 (t2nir), // .sel (fcl_fdp_thr_s1_l[2])); // dp_mux2es #(33) t3nir_mux(.dout (t3nir_in), // .in0 (icd_fdp_topdata_s1[32:0]), // .in1 (t3nir), // .sel (fcl_fdp_thr_s1_l[3])); // Thread Next Instruction Register wire clk_nir0; `ifdef FPGA_SYN_CLK_EN `else bw_u1_ckenbuf_6x ckennir0(.rclk (rclk), .clk (clk_nir0), .en_l (fcl_fdp_thr_s1_l[0]), .tm_l (~se)); `endif `ifdef FPGA_SYN_CLK_DFF dffe_s #(33) t0nir_reg(.din (icd_fdp_topdata_s1[32:0]), .q (t0nir), .en (~(fcl_fdp_thr_s1_l[0])), .clk(rclk), .se(se), .si(), .so()); `else dff_s #(33) t0nir_reg(.din (icd_fdp_topdata_s1[32:0]), .q (t0nir), .clk (clk_nir0), .se(se), .si(), .so()); `endif `ifdef FPGA_SYN_1THREAD assign nirdata_s1 = t0nir; `else wire clk_nir1; `ifdef FPGA_SYN_CLK_EN `else bw_u1_ckenbuf_6x ckennir1(.rclk (rclk), .clk (clk_nir1), .en_l (fcl_fdp_thr_s1_l[1]), .tm_l (~se)); `endif `ifdef FPGA_SYN_CLK_DFF dffe_s #(33) t1nir_reg(.din (icd_fdp_topdata_s1[32:0]), .q (t1nir), .en (~(fcl_fdp_thr_s1_l[1])), .clk (rclk), .se(se), .si(), .so()); `else dff_s #(33) t1nir_reg(.din (icd_fdp_topdata_s1[32:0]), .q (t1nir), .clk (clk_nir1), .se(se), .si(), .so()); `endif wire clk_nir2; `ifdef FPGA_SYN_CLK_EN `else bw_u1_ckenbuf_6x ckennir2(.rclk (rclk), .clk (clk_nir2), .en_l (fcl_fdp_thr_s1_l[2]), .tm_l (~se)); `endif `ifdef FPGA_SYN_CLK_DFF dffe_s #(33) t2nir_reg(.din (icd_fdp_topdata_s1[32:0]), .q (t2nir), .en (~(fcl_fdp_thr_s1_l[2])), .clk (rclk), .se(se), .si(), .so()); `else dff_s #(33) t2nir_reg(.din (icd_fdp_topdata_s1[32:0]), .q (t2nir), .clk (clk_nir2), .se(se), .si(), .so()); `endif wire clk_nir3; `ifdef FPGA_SYN_CLK_EN `else bw_u1_ckenbuf_6x ckennir3(.rclk (rclk), .clk (clk_nir3), .en_l (fcl_fdp_thr_s1_l[3]), .tm_l (~se)); `endif `ifdef FPGA_SYN_CLK_DFF dffe_s #(33) t3nir_reg(.din (icd_fdp_topdata_s1[32:0]), .q (t3nir), .en (~(fcl_fdp_thr_s1_l[3])), .clk (rclk), .se(se), .si(), .so()); `else dff_s #(33) t3nir_reg(.din (icd_fdp_topdata_s1[32:0]), .q (t3nir), .clk (clk_nir3), .se(se), .si(), .so()); `endif // Next thread NIR mux (nir output mux) dp_mux4ds #(33) nextnir_mux(.dout (nirdata_s1), .in0 (t0nir), .in1 (t1nir), .in2 (t2nir), .in3 (t3nir), .sel0_l (fcl_fdp_nirthr_s1_l[0]), .sel1_l (fcl_fdp_nirthr_s1_l[1]), .sel2_l (fcl_fdp_nirthr_s1_l[2]), .sel3_l (fcl_fdp_nirthr_s1_l[3])); `endif // TBD in fetched instruction DP: // 1. Rollback -- DONE // 2. Icache parity check (increase fet data and top data to 34 bits) endmodule // sparc_ifu_fdp